FIX: Ensure Onebox requests ask for the correct language. (#30637)

Onebox embeds currently default to accepting any language response from the destination, which can have some surprising behaviour. For example the `curl` equivalent of what Onebox does:

```
% curl -si -H "Accept-Language: *" 'https://developer.android.com/studio' | grep location:
location: /studio?hl=hi
```

This PR uses the value of `SiteSetting.default_locale` to populate the `Accept-Language` header, falling back to English if that isn't available, then finally accepting whatever language the destination makes available.
This commit is contained in:
Gary Pendergast 2025-01-09 09:08:27 +11:00 committed by GitHub
parent a29b964329
commit f369db5ae9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 21 additions and 6 deletions

View File

@ -41,11 +41,10 @@ module Onebox
canonical_uri = Addressable::URI.parse(canonical_link) canonical_uri = Addressable::URI.parse(canonical_link)
if canonical_link && canonical_uri && if canonical_link && canonical_uri &&
"#{canonical_uri.host}#{canonical_uri.path}" != "#{uri.host}#{uri.path}" "#{canonical_uri.host}#{canonical_uri.path}" != "#{uri.host}#{uri.path}"
uri = canonical_options = Oneboxer.get_final_destination_options(canonical_link)
FinalDestination.new( canonical_options["extra_headers"] = { "Accept-Language" => accept_language }
canonical_link,
Oneboxer.get_final_destination_options(canonical_link), uri = FinalDestination.new(canonical_link, canonical_options).resolve
).resolve
if uri.present? if uri.present?
response = response =
( (
@ -105,6 +104,7 @@ module Onebox
headers ||= {} headers ||= {}
headers["User-Agent"] ||= user_agent if user_agent headers["User-Agent"] ||= user_agent if user_agent
headers["Accept-Language"] ||= accept_language if accept_language
request = Net::HTTP::Get.new(uri.request_uri, headers) request = Net::HTTP::Get.new(uri.request_uri, headers)
start_time = Time.now start_time = Time.now
@ -236,6 +236,14 @@ module Onebox
user_agent user_agent
end end
def self.accept_language
if SiteSetting.default_locale == "en"
"en;q=0.9, *;q=0.5"
else
"#{SiteSetting.default_locale.gsub(/_/, "-")};q=0.9, en;q=0.8, *;q=0.5"
end
end
# Percent-encodes a URI string per RFC3986 - https://tools.ietf.org/html/rfc3986 # Percent-encodes a URI string per RFC3986 - https://tools.ietf.org/html/rfc3986
def self.uri_encode(url) def self.uri_encode(url)
return "" unless url return "" unless url

View File

@ -152,7 +152,7 @@ module Oneboxer
end end
def self.redis_cached_response_body_key(uri) def self.redis_cached_response_body_key(uri)
"CACHED_RESPONSE_#{uri}" "CACHED_RESPONSE_#{SiteSetting.default_locale}_#{uri}"
end end
# Parse URLs out of HTML, returning the document when finished. # Parse URLs out of HTML, returning the document when finished.

View File

@ -955,6 +955,13 @@ RSpec.describe Oneboxer do
preview = Oneboxer.preview(url2, invalidate_oneboxes: true) preview = Oneboxer.preview(url2, invalidate_oneboxes: true)
expect(Oneboxer.cached_response_body_exists?(url2)).to eq(false) expect(Oneboxer.cached_response_body_exists?(url2)).to eq(false)
end end
it "separates cache by default_locale" do
preview = Oneboxer.preview(url, invalidate_oneboxes: true)
expect(Oneboxer.cached_response_body_exists?(url)).to eq(true)
SiteSetting.default_locale = "fr"
expect(Oneboxer.cached_response_body_exists?(url)).to eq(false)
end
end end
describe "register_local_handler" do describe "register_local_handler" do