From f369db5ae9a29a23299dff5f14768167230b0b79 Mon Sep 17 00:00:00 2001 From: Gary Pendergast Date: Thu, 9 Jan 2025 09:08:27 +1100 Subject: [PATCH] FIX: Ensure Onebox requests ask for the correct language. (#30637) Onebox embeds currently default to accepting any language response from the destination, which can have some surprising behaviour. For example the `curl` equivalent of what Onebox does: ``` % curl -si -H "Accept-Language: *" 'https://developer.android.com/studio' | grep location: location: /studio?hl=hi ``` This PR uses the value of `SiteSetting.default_locale` to populate the `Accept-Language` header, falling back to English if that isn't available, then finally accepting whatever language the destination makes available. --- lib/onebox/helpers.rb | 18 +++++++++++++----- lib/oneboxer.rb | 2 +- spec/lib/oneboxer_spec.rb | 7 +++++++ 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/lib/onebox/helpers.rb b/lib/onebox/helpers.rb index f80cca5fba3..eea070c2591 100644 --- a/lib/onebox/helpers.rb +++ b/lib/onebox/helpers.rb @@ -41,11 +41,10 @@ module Onebox canonical_uri = Addressable::URI.parse(canonical_link) if canonical_link && canonical_uri && "#{canonical_uri.host}#{canonical_uri.path}" != "#{uri.host}#{uri.path}" - uri = - FinalDestination.new( - canonical_link, - Oneboxer.get_final_destination_options(canonical_link), - ).resolve + canonical_options = Oneboxer.get_final_destination_options(canonical_link) + canonical_options["extra_headers"] = { "Accept-Language" => accept_language } + + uri = FinalDestination.new(canonical_link, canonical_options).resolve if uri.present? response = ( @@ -105,6 +104,7 @@ module Onebox headers ||= {} headers["User-Agent"] ||= user_agent if user_agent + headers["Accept-Language"] ||= accept_language if accept_language request = Net::HTTP::Get.new(uri.request_uri, headers) start_time = Time.now @@ -236,6 +236,14 @@ module Onebox user_agent end + def self.accept_language + if SiteSetting.default_locale == "en" + "en;q=0.9, *;q=0.5" + else + "#{SiteSetting.default_locale.gsub(/_/, "-")};q=0.9, en;q=0.8, *;q=0.5" + end + end + # Percent-encodes a URI string per RFC3986 - https://tools.ietf.org/html/rfc3986 def self.uri_encode(url) return "" unless url diff --git a/lib/oneboxer.rb b/lib/oneboxer.rb index 422cc9603e5..a124e69023c 100644 --- a/lib/oneboxer.rb +++ b/lib/oneboxer.rb @@ -152,7 +152,7 @@ module Oneboxer end def self.redis_cached_response_body_key(uri) - "CACHED_RESPONSE_#{uri}" + "CACHED_RESPONSE_#{SiteSetting.default_locale}_#{uri}" end # Parse URLs out of HTML, returning the document when finished. diff --git a/spec/lib/oneboxer_spec.rb b/spec/lib/oneboxer_spec.rb index a2cde501cda..96f38fdc5f5 100644 --- a/spec/lib/oneboxer_spec.rb +++ b/spec/lib/oneboxer_spec.rb @@ -955,6 +955,13 @@ RSpec.describe Oneboxer do preview = Oneboxer.preview(url2, invalidate_oneboxes: true) expect(Oneboxer.cached_response_body_exists?(url2)).to eq(false) end + + it "separates cache by default_locale" do + preview = Oneboxer.preview(url, invalidate_oneboxes: true) + expect(Oneboxer.cached_response_body_exists?(url)).to eq(true) + SiteSetting.default_locale = "fr" + expect(Oneboxer.cached_response_body_exists?(url)).to eq(false) + end end describe "register_local_handler" do