From 05bdbd9f97b1cc039ffe3437f1e7ac17d9a4716f Mon Sep 17 00:00:00 2001 From: Arpit Jalan Date: Thu, 1 Jul 2021 20:09:29 +0530 Subject: [PATCH] SECURITY: Onebox canonical links bypassing FinalDestination checks (#13605) --- lib/onebox/helpers.rb | 9 ++-- lib/oneboxer.rb | 52 +++++++++++-------- .../engine/allowlisted_generic_onebox_spec.rb | 5 +- .../engine/google_photos_onebox_spec.rb | 2 + .../engine/twitter_status_onebox_spec.rb | 2 +- spec/lib/onebox/helpers_spec.rb | 1 + 6 files changed, 45 insertions(+), 26 deletions(-) diff --git a/lib/onebox/helpers.rb b/lib/onebox/helpers.rb index 10dd9fa0e63..09383d5ed44 100644 --- a/lib/onebox/helpers.rb +++ b/lib/onebox/helpers.rb @@ -36,9 +36,12 @@ module Onebox # prefer canonical link canonical_link = doc.at('//link[@rel="canonical"]/@href') canonical_uri = Addressable::URI.parse(canonical_link) - if canonical_link && "#{canonical_uri.host}#{canonical_uri.path}" != "#{uri.host}#{uri.path}" && canonical_uri.host != "localhost" - response = (fetch_response(canonical_uri.to_s, headers: headers, body_cacher: body_cacher) rescue nil) - doc = Nokogiri::HTML(response) if response + if canonical_link && canonical_uri && "#{canonical_uri.host}#{canonical_uri.path}" != "#{uri.host}#{uri.path}" + uri = FinalDestination.new(canonical_link, Oneboxer.get_final_destination_options(canonical_link)).resolve + if uri.present? + response = (fetch_response(uri.to_s, headers: headers, body_cacher: body_cacher) rescue nil) + doc = Nokogiri::HTML(response) if response + end end end diff --git a/lib/oneboxer.rb b/lib/oneboxer.rb index a1d1817d11f..845fc2c4bb2 100644 --- a/lib/oneboxer.rb +++ b/lib/oneboxer.rb @@ -397,31 +397,11 @@ module Oneboxer def self.external_onebox(url, available_strategies = nil) Discourse.cache.fetch(onebox_cache_key(url), expires_in: 1.day) do - uri = URI(url) available_strategies ||= Oneboxer.ordered_strategies(uri.hostname) strategy = available_strategies.shift - fd_options = { - ignore_redirects: ignore_redirects, - ignore_hostnames: blocked_domains, - force_get_hosts: force_get_hosts, - force_custom_user_agent_hosts: force_custom_user_agent_hosts, - preserve_fragment_url_hosts: preserve_fragment_url_hosts, - timeout: 5 - } - - if strategy && Oneboxer.strategies[strategy][:force_get_host] - fd_options[:force_get_hosts] = ["https://#{uri.hostname}"] - end - if strategy && Oneboxer.strategies[strategy][:force_custom_user_agent_host] - fd_options[:force_custom_user_agent_hosts] = ["https://#{uri.hostname}"] - end - - user_agent_override = SiteSetting.cache_onebox_user_agent if Oneboxer.cache_response_body?(url) && SiteSetting.cache_onebox_user_agent.present? - fd_options[:default_user_agent] = user_agent_override if user_agent_override - - fd = FinalDestination.new(url, fd_options) + fd = FinalDestination.new(url, get_final_destination_options(url, strategy)) uri = fd.resolve if fd.status != :resolved @@ -453,6 +433,8 @@ module Oneboxer } onebox_options[:cookie] = fd.cookie if fd.cookie + + user_agent_override = SiteSetting.cache_onebox_user_agent if Oneboxer.cache_response_body?(url) && SiteSetting.cache_onebox_user_agent.present? onebox_options[:user_agent] = user_agent_override if user_agent_override r = Onebox.preview(uri.to_s, onebox_options) @@ -552,4 +534,32 @@ module Oneboxer "ONEBOXER_STRATEGY_#{hostname}" end + def self.get_final_destination_options(url, strategy = nil) + fd_options = { + ignore_redirects: ignore_redirects, + ignore_hostnames: blocked_domains, + force_get_hosts: force_get_hosts, + force_custom_user_agent_hosts: force_custom_user_agent_hosts, + preserve_fragment_url_hosts: preserve_fragment_url_hosts, + timeout: 5 + } + + uri = URI(url) + + if strategy.blank? + strategy = Oneboxer.ordered_strategies(uri.hostname).shift + end + + if strategy && Oneboxer.strategies[strategy][:force_get_host] + fd_options[:force_get_hosts] = ["https://#{uri.hostname}"] + end + if strategy && Oneboxer.strategies[strategy][:force_custom_user_agent_host] + fd_options[:force_custom_user_agent_hosts] = ["https://#{uri.hostname}"] + end + + user_agent_override = SiteSetting.cache_onebox_user_agent if Oneboxer.cache_response_body?(url) && SiteSetting.cache_onebox_user_agent.present? + fd_options[:default_user_agent] = user_agent_override if user_agent_override + + fd_options + end end diff --git a/spec/lib/onebox/engine/allowlisted_generic_onebox_spec.rb b/spec/lib/onebox/engine/allowlisted_generic_onebox_spec.rb index 46991aa74a9..14cadc152c6 100644 --- a/spec/lib/onebox/engine/allowlisted_generic_onebox_spec.rb +++ b/spec/lib/onebox/engine/allowlisted_generic_onebox_spec.rb @@ -98,6 +98,7 @@ describe Onebox::Engine::AllowlistedGenericOnebox do before do stub_request(:get, mobile_url).to_return(status: 200, body: onebox_response('etsy_mobile')) stub_request(:get, canonical_url).to_return(status: 200, body: onebox_response('etsy')) + stub_request(:head, canonical_url).to_return(status: 200, body: "") end it 'fetches opengraph data and price from canonical link' do @@ -142,6 +143,7 @@ describe Onebox::Engine::AllowlistedGenericOnebox do } ) stub_request(:get, redirect_link).to_return(status: 200, body: onebox_response('dailymail')) + stub_request(:head, redirect_link).to_return(status: 200, body: "") end around do |example| @@ -168,9 +170,10 @@ describe Onebox::Engine::AllowlistedGenericOnebox do before do stub_request(:get, "https://edition.cnn.com/2020/05/15/health/gallery/coronavirus-people-adopting-pets-photos/index.html") .to_return(status: 200, body: onebox_response('cnn')) - stub_request(:get, "https://www.cnn.com/2020/05/15/health/gallery/coronavirus-people-adopting-pets-photos/index.html") .to_return(status: 200, body: onebox_response('cnn')) + stub_request(:head, "https://www.cnn.com/2020/05/15/health/gallery/coronavirus-people-adopting-pets-photos/index.html") + .to_return(status: 200, body: "") end it 'shows basic onebox' do diff --git a/spec/lib/onebox/engine/google_photos_onebox_spec.rb b/spec/lib/onebox/engine/google_photos_onebox_spec.rb index ba4888b87d3..ffb1327468b 100644 --- a/spec/lib/onebox/engine/google_photos_onebox_spec.rb +++ b/spec/lib/onebox/engine/google_photos_onebox_spec.rb @@ -10,6 +10,8 @@ describe Onebox::Engine::GooglePhotosOnebox do stub_request(:get, link).to_return(status: 200, body: onebox_response("googlephotos")) stub_request(:get, "https://photos.google.com/share/AF1QipOV3gcu_edA8lyjJEpS9sC1g3AeCUtaZox11ylYZId7wJ7cthZ8M1kZXeAp5vhEPg?key=QktmUFNvdWpNVktERU5zWmVRZlZubzRRc0ttWWN3") .to_return(status: 200, body: onebox_response("googlephotos")) + stub_request(:head, "https://photos.google.com/share/AF1QipOV3gcu_edA8lyjJEpS9sC1g3AeCUtaZox11ylYZId7wJ7cthZ8M1kZXeAp5vhEPg?key=QktmUFNvdWpNVktERU5zWmVRZlZubzRRc0ttWWN3") + .to_return(status: 200, body: "") end it "includes album title" do diff --git a/spec/lib/onebox/engine/twitter_status_onebox_spec.rb b/spec/lib/onebox/engine/twitter_status_onebox_spec.rb index 9b0085ee8ef..daa6f36cf9d 100644 --- a/spec/lib/onebox/engine/twitter_status_onebox_spec.rb +++ b/spec/lib/onebox/engine/twitter_status_onebox_spec.rb @@ -59,7 +59,7 @@ describe Onebox::Engine::TwitterStatusOnebox do shared_context "quoted tweet info" do before do - @link = "https://twitter.com/Metallica/status/1128068672289890305" + @link = "https://twitter.com/metallica/status/1128068672289890305" @onebox_fixture = "twitterstatus_quoted" stub_request(:get, @link.downcase).to_return(status: 200, body: onebox_response(@onebox_fixture)) diff --git a/spec/lib/onebox/helpers_spec.rb b/spec/lib/onebox/helpers_spec.rb index f24719ff20f..42543cae0c5 100644 --- a/spec/lib/onebox/helpers_spec.rb +++ b/spec/lib/onebox/helpers_spec.rb @@ -58,6 +58,7 @@ RSpec.describe Onebox::Helpers do uri = 'https://www.example.com' stub_request(:get, uri).to_return(status: 200, body: "

invalid

") stub_request(:get, 'http://foobar.com').to_return(status: 200, body: "

success

") + stub_request(:head, 'http://foobar.com').to_return(status: 200, body: "") expect(described_class.fetch_html_doc(uri).to_s).to match("success") end