From 53abcd825d2abbd2065796c974314f0952b26041 Mon Sep 17 00:00:00 2001 From: Roman Rizzi Date: Fri, 5 Nov 2021 14:20:14 -0300 Subject: [PATCH] FIX: Canonical URLs may be relative (#14825) FinalDestination's follow_canonical mode used for embedded topics should work when canonical URLs are relative, as specified in [RFC 6596](https://datatracker.ietf.org/doc/html/rfc6596) --- lib/final_destination.rb | 15 ++++++++++---- spec/components/final_destination_spec.rb | 25 +++++++++++++++++++++++ 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/lib/final_destination.rb b/lib/final_destination.rb index 6ab994fb83f..09f4d0c58eb 100644 --- a/lib/final_destination.rb +++ b/lib/final_destination.rb @@ -225,7 +225,7 @@ class FinalDestination end if @follow_canonical - next_url = uri(fetch_canonical_url(response.body)) + next_url = fetch_canonical_url(response.body) if next_url.to_s.present? && next_url != @uri @follow_canonical = false @@ -481,10 +481,17 @@ class FinalDestination def fetch_canonical_url(body) return if body.blank? - canonical_link = Nokogiri::HTML5(body).at("link[rel='canonical']") - return if canonical_link.nil? + canonical_element = Nokogiri::HTML5(body).at("link[rel='canonical']") + return if canonical_element.nil? + canonical_uri = uri(canonical_element['href']) + return if canonical_uri.blank? - canonical_link['href'] + return canonical_uri if canonical_uri.host.present? + parts = [@uri.host, canonical_uri.to_s] + complete_url = canonical_uri.to_s.starts_with?('/') ? parts.join('') : parts.join('/') + complete_url = "#{@uri.scheme}://#{complete_url}" if @uri.scheme + + uri(complete_url) end end diff --git a/spec/components/final_destination_spec.rb b/spec/components/final_destination_spec.rb index 8b781324bd5..e9548ead0b9 100644 --- a/spec/components/final_destination_spec.rb +++ b/spec/components/final_destination_spec.rb @@ -194,6 +194,31 @@ describe FinalDestination do expect(final.status).to eq(:resolved) end + it 'resolves the canonical link when the URL is relative' do + host = "https://codinghorror.com" + + canonical_follow("#{host}/blog", "/blog/canonical") + stub_request(:head, "#{host}/blog/canonical").to_return(doc_response) + + final = FinalDestination.new("#{host}/blog", opts.merge(follow_canonical: true)) + + expect(final.resolve.to_s).to eq("#{host}/blog/canonical") + expect(final.redirected?).to eq(false) + expect(final.status).to eq(:resolved) + end + + it 'resolves the canonical link when the URL is relative and does not start with the / symbol' do + host = "https://codinghorror.com" + canonical_follow("#{host}/blog", "blog/canonical") + stub_request(:head, "#{host}/blog/canonical").to_return(doc_response) + + final = FinalDestination.new("#{host}/blog", opts.merge(follow_canonical: true)) + + expect(final.resolve.to_s).to eq("#{host}/blog/canonical") + expect(final.redirected?).to eq(false) + expect(final.status).to eq(:resolved) + end + it "does not follow the canonical link if it's the same as the current URL" do canonical_follow("https://eviltrout.com", "https://eviltrout.com")