FIX: Canonical URLs may be relative (#14825)

FinalDestination's follow_canonical mode used for embedded topics should work when canonical URLs are relative, as specified in [RFC 6596](https://datatracker.ietf.org/doc/html/rfc6596)
This commit is contained in:
Roman Rizzi 2021-11-05 14:20:14 -03:00 committed by GitHub
parent f444e3e3f7
commit 53abcd825d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 36 additions and 4 deletions

View File

@ -225,7 +225,7 @@ class FinalDestination
end end
if @follow_canonical if @follow_canonical
next_url = uri(fetch_canonical_url(response.body)) next_url = fetch_canonical_url(response.body)
if next_url.to_s.present? && next_url != @uri if next_url.to_s.present? && next_url != @uri
@follow_canonical = false @follow_canonical = false
@ -481,10 +481,17 @@ class FinalDestination
def fetch_canonical_url(body) def fetch_canonical_url(body)
return if body.blank? return if body.blank?
canonical_link = Nokogiri::HTML5(body).at("link[rel='canonical']")
return if canonical_link.nil? canonical_element = Nokogiri::HTML5(body).at("link[rel='canonical']")
return if canonical_element.nil?
canonical_uri = uri(canonical_element['href'])
return if canonical_uri.blank?
canonical_link['href'] return canonical_uri if canonical_uri.host.present?
parts = [@uri.host, canonical_uri.to_s]
complete_url = canonical_uri.to_s.starts_with?('/') ? parts.join('') : parts.join('/')
complete_url = "#{@uri.scheme}://#{complete_url}" if @uri.scheme
uri(complete_url)
end end
end end

View File

@ -194,6 +194,31 @@ describe FinalDestination do
expect(final.status).to eq(:resolved) expect(final.status).to eq(:resolved)
end end
it 'resolves the canonical link when the URL is relative' do
host = "https://codinghorror.com"
canonical_follow("#{host}/blog", "/blog/canonical")
stub_request(:head, "#{host}/blog/canonical").to_return(doc_response)
final = FinalDestination.new("#{host}/blog", opts.merge(follow_canonical: true))
expect(final.resolve.to_s).to eq("#{host}/blog/canonical")
expect(final.redirected?).to eq(false)
expect(final.status).to eq(:resolved)
end
it 'resolves the canonical link when the URL is relative and does not start with the / symbol' do
host = "https://codinghorror.com"
canonical_follow("#{host}/blog", "blog/canonical")
stub_request(:head, "#{host}/blog/canonical").to_return(doc_response)
final = FinalDestination.new("#{host}/blog", opts.merge(follow_canonical: true))
expect(final.resolve.to_s).to eq("#{host}/blog/canonical")
expect(final.redirected?).to eq(false)
expect(final.status).to eq(:resolved)
end
it "does not follow the canonical link if it's the same as the current URL" do it "does not follow the canonical link if it's the same as the current URL" do
canonical_follow("https://eviltrout.com", "https://eviltrout.com") canonical_follow("https://eviltrout.com", "https://eviltrout.com")