FIX: Canonical URLs may be relative (#14825)

FinalDestination's follow_canonical mode used for embedded topics should work when canonical URLs are relative, as specified in [RFC 6596](https://datatracker.ietf.org/doc/html/rfc6596)
This commit is contained in:
Roman Rizzi 2021-11-05 14:20:14 -03:00 committed by GitHub
parent f444e3e3f7
commit 53abcd825d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 36 additions and 4 deletions

View File

@ -225,7 +225,7 @@ class FinalDestination
end
if @follow_canonical
next_url = uri(fetch_canonical_url(response.body))
next_url = fetch_canonical_url(response.body)
if next_url.to_s.present? && next_url != @uri
@follow_canonical = false
@ -481,10 +481,17 @@ class FinalDestination
def fetch_canonical_url(body)
return if body.blank?
canonical_link = Nokogiri::HTML5(body).at("link[rel='canonical']")
return if canonical_link.nil?
canonical_element = Nokogiri::HTML5(body).at("link[rel='canonical']")
return if canonical_element.nil?
canonical_uri = uri(canonical_element['href'])
return if canonical_uri.blank?
canonical_link['href']
return canonical_uri if canonical_uri.host.present?
parts = [@uri.host, canonical_uri.to_s]
complete_url = canonical_uri.to_s.starts_with?('/') ? parts.join('') : parts.join('/')
complete_url = "#{@uri.scheme}://#{complete_url}" if @uri.scheme
uri(complete_url)
end
end

View File

@ -194,6 +194,31 @@ describe FinalDestination do
expect(final.status).to eq(:resolved)
end
it 'resolves the canonical link when the URL is relative' do
host = "https://codinghorror.com"
canonical_follow("#{host}/blog", "/blog/canonical")
stub_request(:head, "#{host}/blog/canonical").to_return(doc_response)
final = FinalDestination.new("#{host}/blog", opts.merge(follow_canonical: true))
expect(final.resolve.to_s).to eq("#{host}/blog/canonical")
expect(final.redirected?).to eq(false)
expect(final.status).to eq(:resolved)
end
it 'resolves the canonical link when the URL is relative and does not start with the / symbol' do
host = "https://codinghorror.com"
canonical_follow("#{host}/blog", "blog/canonical")
stub_request(:head, "#{host}/blog/canonical").to_return(doc_response)
final = FinalDestination.new("#{host}/blog", opts.merge(follow_canonical: true))
expect(final.resolve.to_s).to eq("#{host}/blog/canonical")
expect(final.redirected?).to eq(false)
expect(final.status).to eq(:resolved)
end
it "does not follow the canonical link if it's the same as the current URL" do
canonical_follow("https://eviltrout.com", "https://eviltrout.com")