mirror of
https://github.com/discourse/discourse.git
synced 2024-11-22 11:23:25 +08:00
FIX: Attempt to onebox even if response body exceeds max_download_kb
(#26929)
In 95a82d608d
, we lowered the default for
`Onebox.options.max_download_kb` from 10mb to 2mb for security hardening
purposes. However, this resulted in multiple bug reports where seemingly
nomral URLs stopped being oneboxed. It turns out that lowering
`Onebox.options.max_download_kb` resulted in `Onebox::Helpers::DownloadTooLarge` being raised
more often for more URLs in `Onebox::Helpers.fetch_response` which
`Onebox::Helpers.fetch_html_doc` relies on. When
`Onebox::Helpers::DownloadTooLarge` is raised in
`Onebox::Helpers.fetch_response`, we throw away whatever response body
which we have already downloaded at that point. This is not ideal
because Nokogiri can parse incomplete HTML documents and there is a
really high chance that the incomplete HTML document still contains the
information which we need for oneboxing.
Therefore, this commit updates `Onebox::Helpers.fetch_html_doc` to not
throw away the response body when the size of the response body exceeds
`Onebox.options.max_download_size`. Instead, we just take whatever
response which we have and get Nokogiri to parse it.
This commit is contained in:
parent
13334a3da0
commit
c8da2a33e8
|
@ -13,15 +13,20 @@ module Onebox
|
|||
html.gsub(/<[^>]+>/, " ").gsub(/\n/, "")
|
||||
end
|
||||
|
||||
# Fetches the HTML response body for a URL.
|
||||
#
|
||||
# Note that the size of the response body is capped at `Onebox.options.max_download_kb`. When the limit has been reached,
|
||||
# this method will return the response body that has been downloaded up to the limit.
|
||||
def self.fetch_html_doc(url, headers = nil, body_cacher = nil)
|
||||
response =
|
||||
(
|
||||
begin
|
||||
fetch_response(url, headers: headers, body_cacher: body_cacher)
|
||||
fetch_response(url, headers:, body_cacher:, raise_error_when_response_too_large: false)
|
||||
rescue StandardError
|
||||
nil
|
||||
end
|
||||
)
|
||||
|
||||
doc = Nokogiri.HTML(response)
|
||||
uri = Addressable::URI.parse(url)
|
||||
|
||||
|
@ -45,7 +50,12 @@ module Onebox
|
|||
response =
|
||||
(
|
||||
begin
|
||||
fetch_response(uri.to_s, headers: headers, body_cacher: body_cacher)
|
||||
fetch_response(
|
||||
uri.to_s,
|
||||
headers:,
|
||||
body_cacher:,
|
||||
raise_error_when_response_too_large: false,
|
||||
)
|
||||
rescue StandardError
|
||||
nil
|
||||
end
|
||||
|
@ -63,7 +73,8 @@ module Onebox
|
|||
redirect_limit: 5,
|
||||
domain: nil,
|
||||
headers: nil,
|
||||
body_cacher: nil
|
||||
body_cacher: nil,
|
||||
raise_error_when_response_too_large: true
|
||||
)
|
||||
redirect_limit = Onebox.options.redirect_limit if redirect_limit >
|
||||
Onebox.options.redirect_limit
|
||||
|
@ -125,7 +136,11 @@ module Onebox
|
|||
|
||||
response.read_body do |chunk|
|
||||
result.write(chunk)
|
||||
raise DownloadTooLarge.new if result.size > size_bytes
|
||||
|
||||
if result.size > size_bytes
|
||||
raise_error_when_response_too_large ? raise(DownloadTooLarge.new) : break
|
||||
end
|
||||
|
||||
raise Timeout::Error.new if (Time.now - start_time) > Onebox.options.timeout
|
||||
end
|
||||
|
||||
|
|
|
@ -16,6 +16,7 @@ RSpec.describe Onebox::Helpers do
|
|||
around do |example|
|
||||
previous_options = Onebox.options.to_h
|
||||
Onebox.options = { max_download_kb: 1 }
|
||||
|
||||
stub_request(:get, "http://example.com/large-file").to_return(
|
||||
status: 200,
|
||||
body: onebox_response("slides"),
|
||||
|
@ -32,6 +33,15 @@ RSpec.describe Onebox::Helpers do
|
|||
)
|
||||
end
|
||||
|
||||
it "returns the body of the response when size of response body exceeds the limit and `raise_error_when_response_too_large` has been set to `false`" do
|
||||
expect(
|
||||
described_class.fetch_response(
|
||||
"http://example.com/large-file",
|
||||
raise_error_when_response_too_large: false,
|
||||
),
|
||||
).to eq(onebox_response("slides"))
|
||||
end
|
||||
|
||||
it "raises an exception when private url requested" do
|
||||
FinalDestination::TestHelper.stub_to_fail do
|
||||
expect { described_class.fetch_response("http://example.com/large-file") }.to raise_error(
|
||||
|
@ -49,6 +59,22 @@ RSpec.describe Onebox::Helpers do
|
|||
expect(described_class.fetch_html_doc(uri).to_s).to match("success")
|
||||
end
|
||||
|
||||
it "does not raise an error when response body exceeds Onebox's `max_download_kb` limit" do
|
||||
previous_options = Onebox.options.to_h
|
||||
Onebox.options = previous_options.merge(max_download_kb: 1)
|
||||
|
||||
stub_request(:get, "http://example.com/large-file").to_return(
|
||||
status: 200,
|
||||
body: onebox_response("slides"),
|
||||
)
|
||||
|
||||
expect(described_class.fetch_html_doc("http://example.com/large-file").to_s).to include(
|
||||
"ECMAScript 2015 by David Leonard",
|
||||
)
|
||||
ensure
|
||||
Onebox.options = previous_options
|
||||
end
|
||||
|
||||
context "with canonical link" do
|
||||
it "follows canonical link" do
|
||||
uri = "https://www.example.com"
|
||||
|
|
Loading…
Reference in New Issue
Block a user