FIX: don't double request when downloading a file

This commit is contained in:
Régis Hanol 2018-02-24 12:35:57 +01:00
parent b731d5d9b5
commit 0559a4736a
17 changed files with 80 additions and 132 deletions

View File

@ -94,6 +94,8 @@ class StaticController < ApplicationController
redirect_to destination
end
FAVICON ||= -"favicon"
# We need to be able to draw our favicon on a canvas
# and pull it off the canvas into a data uri
# This can work by ensuring people set all the right CORS
@ -101,16 +103,16 @@ class StaticController < ApplicationController
# instead we cache the favicon in redis and serve it out real quick with
# a huge expiry, we also cache these assets in nginx so it bypassed if needed
def favicon
hijack do
data = DistributedMemoizer.memoize('favicon' + SiteSetting.favicon_url, 60 * 30) do
data = DistributedMemoizer.memoize(FAVICON + SiteSetting.favicon_url, 60 * 30) do
begin
file = FileHelper.download(
SiteSetting.favicon_url,
max_file_size: 50.kilobytes,
tmp_file_name: "favicon.png",
tmp_file_name: FAVICON,
follow_redirect: true
)
file ||= Tempfile.new([FAVICON, ".png"])
data = file.read
file.unlink
data

View File

@ -25,57 +25,55 @@ class FileHelper
follow_redirect: false,
read_timeout: 5,
skip_rate_limit: false,
verbose: nil)
# verbose logging is default while debugging onebox
verbose = verbose.nil? ? true : verbose
verbose: false)
url = "https:" + url if url.start_with?("//")
raise Discourse::InvalidParameters.new(:url) unless url =~ /^https?:\/\//
dest = FinalDestination.new(
tmp = nil
fd = FinalDestination.new(
url,
max_redirects: follow_redirect ? 5 : 1,
skip_rate_limit: skip_rate_limit,
verbose: verbose
)
uri = dest.resolve
if !uri && dest.status_code.to_i >= 400
# attempt error API compatability
io = FakeIO.new
io.status = [dest.status_code.to_s, ""]
fd.get do |response, chunk, uri|
if tmp.nil?
# error handling
if uri.blank?
if response.code.to_i >= 400
# attempt error API compatibility
io = FakeIO.new
io.status = [response.code, ""]
raise OpenURI::HTTPError.new("#{response.code} Error", io)
else
log(:error, "FinalDestination did not work for: #{url}") if verbose
throw :done
end
end
# TODO perhaps translate and add Discourse::DownloadError
raise OpenURI::HTTPError.new("#{dest.status_code} Error", io)
end
# first run
tmp_file_ext = File.extname(uri.path)
unless uri
log(:error, "FinalDestination did not work for: #{url}") if verbose
return
end
if tmp_file_ext.blank? && response.content_type.present?
ext = MiniMime.lookup_by_content_type(response.content_type)&.extension
ext = "jpg" if ext == "jpe"
tmp_file_ext = "." + ext if ext.present?
end
downloaded = uri.open("rb", read_timeout: read_timeout)
extension = File.extname(uri.path)
if extension.blank? && downloaded.content_type.present?
ext = MiniMime.lookup_by_content_type(downloaded.content_type)&.extension
ext = "jpg" if ext == "jpe"
extension = "." + ext if ext.present?
end
tmp = Tempfile.new([tmp_file_name, extension])
File.open(tmp.path, "wb") do |f|
while f.size <= max_file_size && data = downloaded.read(512.kilobytes)
f.write(data)
tmp = Tempfile.new([tmp_file_name, tmp_file_ext])
tmp.binmode
end
tmp.write(chunk)
throw :done if tmp.size > max_file_size
end
tmp&.rewind
tmp
ensure
downloaded&.close
end
def self.optimize_image!(filename)

View File

@ -96,9 +96,7 @@ class FinalDestination
uri = URI(uri.to_s)
end
unless validate_uri
return nil
end
return nil unless validate_uri
result, (location, cookie) = safe_get(uri, &blk)
@ -120,9 +118,7 @@ class FinalDestination
return nil if !uri
extra = nil
if cookie
extra = { 'Cookie' => cookie }
end
extra = { 'Cookie' => cookie } if cookie
get(uri, redirects - 1, extra_headers: extra, &blk)
elsif result == :ok
@ -251,7 +247,6 @@ class FinalDestination
end
def is_dest_valid?
return false unless @uri && @uri.host
# Whitelisted hosts
@ -260,9 +255,7 @@ class FinalDestination
hostname_matches?(Discourse.base_url_no_prefix)
if SiteSetting.whitelist_internal_hosts.present?
SiteSetting.whitelist_internal_hosts.split('|').each do |h|
return true if @uri.hostname.downcase == h.downcase
end
return true if SiteSetting.whitelist_internal_hosts.split("|").any? { |h| h.downcase == @uri.hostname.downcase }
end
address_s = @opts[:lookup_ip].call(@uri.hostname)
@ -331,12 +324,10 @@ class FinalDestination
protected
def safe_get(uri)
result = nil
unsafe_close = false
safe_session(uri) do |http|
headers = request_headers.merge(
'Accept-Encoding' => 'gzip',
'Host' => uri.host
@ -350,9 +341,8 @@ class FinalDestination
end
if Net::HTTPSuccess === resp
resp.decode_content = true
resp.read_body { |chunk|
resp.read_body do |chunk|
read_next = true
catch(:done) do
@ -370,19 +360,19 @@ class FinalDestination
http.finish
raise StandardError
end
}
end
result = :ok
else
catch(:done) do
yield resp, nil, nil
end
end
end
end
result
rescue StandardError
if unsafe_close
:ok
else
raise
end
unsafe_close ? :ok : raise
end
def safe_session(uri)

View File

@ -507,8 +507,8 @@ describe CookedPostProcessor do
</html>
HTML
stub_request(:head, url).to_return(status: 200)
stub_request(:get , url).to_return(status: 200, body: body)
stub_request(:head, url)
stub_request(:get , url).to_return(body: body)
FinalDestination.stubs(:lookup_ip).returns('1.2.3.4')
# not an ideal stub but shipping the whole image to fast image can add

View File

@ -15,7 +15,6 @@ describe FileHelper do
it "correctly raises an OpenURI HTTP error if it gets a 404 even with redirect" do
url = "http://fourohfour.com/404"
stub_request(:head, url).to_return(status: 404, body: "404")
stub_request(:get, url).to_return(status: 404, body: "404")
expect do
@ -36,7 +35,6 @@ describe FileHelper do
it "correctly raises an OpenURI HTTP error if it gets a 404" do
url = "http://fourohfour.com/404"
stub_request(:head, url).to_return(status: 404, body: "404")
stub_request(:get, url).to_return(status: 404, body: "404")
expect do

View File

@ -176,7 +176,7 @@ describe FinalDestination do
'Set-Cookie' => 'evil=trout'
}
)
stub_request(:head, 'https://discourse.org').to_return(status: 200)
stub_request(:head, 'https://discourse.org')
end
context "when the status code is 405" do
@ -218,7 +218,6 @@ describe FinalDestination do
stub_request(:head, "https://eviltrout.com")
.with(headers: { "Cookie" => "bar=1; baz=2; foo=219ffwef9w0f" })
.to_return(status: 200, body: "")
final = FinalDestination.new("https://eviltrout.com", opts)
expect(final.resolve.to_s).to eq("https://eviltrout.com")
@ -229,14 +228,13 @@ describe FinalDestination do
it "should use the correct format for cookies when there is only one cookie" do
stub_request(:head, "https://eviltrout.com")
.to_return(status: 302, body: "" , headers: {
.to_return(status: 302, headers: {
"Location" => "https://eviltrout.com",
"Set-Cookie" => "foo=219ffwef9w0f; expires=Mon, 19-Feb-2018 10:44:24 GMT; path=/; domain=eviltrout.com"
})
stub_request(:head, "https://eviltrout.com")
.with(headers: { "Cookie" => "foo=219ffwef9w0f" })
.to_return(status: 200, body: "")
final = FinalDestination.new("https://eviltrout.com", opts)
expect(final.resolve.to_s).to eq("https://eviltrout.com")
@ -246,7 +244,7 @@ describe FinalDestination do
it "should use the correct format for cookies when there are multiple cookies" do
stub_request(:head, "https://eviltrout.com")
.to_return(status: 302, body: "" , headers: {
.to_return(status: 302, headers: {
"Location" => "https://eviltrout.com",
"Set-Cookie" => ["foo=219ffwef9w0f; expires=Mon, 19-Feb-2018 10:44:24 GMT; path=/; domain=eviltrout.com",
"bar=1",
@ -255,7 +253,6 @@ describe FinalDestination do
stub_request(:head, "https://eviltrout.com")
.with(headers: { "Cookie" => "bar=1; baz=2; foo=219ffwef9w0f" })
.to_return(status: 200, body: "")
final = FinalDestination.new("https://eviltrout.com", opts)
expect(final.resolve.to_s).to eq("https://eviltrout.com")
@ -267,7 +264,6 @@ describe FinalDestination do
describe '.get' do
it "can correctly stream with a redirect" do
FinalDestination.clear_https_cache!("wikipedia.com")
stub_request(:get, "http://wikipedia.com/").
@ -399,13 +395,12 @@ describe FinalDestination do
stub_request(:head, "http://wikipedia.com/image.png")
.to_return(status: 302, body: "", headers: { location: 'https://wikipedia.com/image.png' })
stub_request(:head, "https://wikipedia.com/image.png")
.to_return(status: 200, body: "", headers: [])
fd('http://wikipedia.com/image.png').resolve
stub_request(:head, "https://wikipedia.com/image2.png")
.to_return(status: 200, body: "", headers: [])
fd('http://wikipedia.com/image2.png').resolve
end

View File

@ -71,11 +71,8 @@ describe InlineOneboxer do
it "will crawl anything if allowed to" do
SiteSetting.enable_inline_onebox_on_all_domains = true
# Final destination does a HEAD and a GET
stub_request(:head, "https://eviltrout.com/some-path").to_return(status: 200)
stub_request(:get, "https://eviltrout.com/some-path").
to_return(status: 200, body: "<html><head><title>a blog</title></head></html>", headers: {})
to_return(status: 200, body: "<html><head><title>a blog</title></head></html>")
onebox = InlineOneboxer.lookup(
"https://eviltrout.com/some-path",
@ -90,11 +87,8 @@ describe InlineOneboxer do
it "will not return a onebox if it does not meet minimal length" do
SiteSetting.enable_inline_onebox_on_all_domains = true
# Final destination does a HEAD and a GET
stub_request(:head, "https://eviltrout.com/some-path").to_return(status: 200)
stub_request(:get, "https://eviltrout.com/some-path").
to_return(status: 200, body: "<html><head><title>a</title></head></html>", headers: {})
to_return(status: 200, body: "<html><head><title>a</title></head></html>")
onebox = InlineOneboxer.lookup(
"https://eviltrout.com/some-path",

View File

@ -4,8 +4,8 @@ require_dependency 'oneboxer'
describe Oneboxer do
it "returns blank string for an invalid onebox" do
stub_request(:head, "http://boom.com")
stub_request(:get, "http://boom.com").to_return(body: "")
stub_request(:head, "http://boom.com").to_return(body: "")
expect(Oneboxer.preview("http://boom.com")).to eq("")
expect(Oneboxer.onebox("http://boom.com")).to eq("")

View File

@ -952,10 +952,8 @@ HTML
SiteSetting.enable_inline_onebox_on_all_domains = true
InlineOneboxer.purge("http://cnn.com/a")
stub_request(:head, "http://cnn.com/a").to_return(status: 200)
stub_request(:get, "http://cnn.com/a").
to_return(status: 200, body: "<html><head><title>news</title></head></html>", headers: {})
to_return(status: 200, body: "<html><head><title>news</title></head></html>")
expect(PrettyText.cook("- http://cnn.com/a\n- a http://cnn.com/a").split("news").length).to eq(3)
expect(PrettyText.cook("- http://cnn.com/a\n - a http://cnn.com/a").split("news").length).to eq(3)
@ -965,10 +963,8 @@ HTML
SiteSetting.enable_inline_onebox_on_all_domains = true
InlineOneboxer.purge("http://cnn.com?a")
stub_request(:head, "http://cnn.com?a").to_return(status: 200)
stub_request(:get, "http://cnn.com?a").
to_return(status: 200, body: "<html><head><title>news</title></head></html>", headers: {})
to_return(status: 200, body: "<html><head><title>news</title></head></html>")
expect(PrettyText.cook("- http://cnn.com?a\n- a http://cnn.com?a").split("news").length).to eq(3)
expect(PrettyText.cook("- http://cnn.com?a\n - a http://cnn.com?a").split("news").length).to eq(3)
@ -981,9 +977,8 @@ HTML
SiteSetting.enable_inline_onebox_on_all_domains = true
InlineOneboxer.purge("http://cnn.com/")
stub_request(:head, "http://cnn.com/").to_return(status: 200)
stub_request(:get, "http://cnn.com/").
to_return(status: 200, body: "<html><head><title>news</title></head></html>", headers: {})
to_return(status: 200, body: "<html><head><title>news</title></head></html>")
expect(PrettyText.cook("- http://cnn.com/\n- a http://cnn.com/").split("news").length).to eq(1)
expect(PrettyText.cook("- cnn.com\n - a http://cnn.com/").split("news").length).to eq(1)

View File

@ -35,11 +35,8 @@ describe OneboxController do
url = "http://noodle.com/"
stub_request(:head, url).
to_return(status: 200, body: "", headers: {}).then.to_raise
stub_request(:get, url)
.to_return(status: 200, headers: {}, body: onebox_html).then.to_raise
stub_request(:head, url)
stub_request(:get, url).to_return(body: onebox_html).then.to_raise
get :show, params: { url: url, refresh: "true" }, format: :json

View File

@ -59,21 +59,20 @@ describe UploadsController do
expect(id).to be
end
it 'is successful with synchronous api' do
it 'is successful with api' do
SiteSetting.authorized_extensions = "*"
controller.stubs(:is_api?).returns(true)
FinalDestination.stubs(:lookup_ip).returns("1.2.3.4")
Jobs.expects(:enqueue).with(:create_avatar_thumbnails, anything)
stub_request(:head, 'http://example.com/image.png')
stub_request(:get, "http://example.com/image.png").to_return(body: File.read('spec/fixtures/images/logo.png'))
url = "http://example.com/image.png"
png = File.read(Rails.root + "spec/fixtures/images/logo.png")
post :create, params: {
url: 'http://example.com/image.png',
type: "avatar",
synchronous: true,
format: :json
}
stub_request(:get, url).to_return(status: 200, body: png)
post :create, params: { url: url, type: "avatar", format: :json }
json = ::JSON.parse(response.body)

View File

@ -31,7 +31,6 @@ describe UserAvatarsController do
SiteSetting.s3_upload_bucket = "test"
SiteSetting.s3_cdn_url = "http://cdn.com"
stub_request(:head, "http://cdn.com/something/else")
stub_request(:get, "http://cdn.com/something/else").to_return(body: 'image')
GlobalSetting.expects(:cdn_url).returns("http://awesome.com/boom")

View File

@ -99,9 +99,8 @@ describe Jobs::PollFeed do
SiteSetting.feed_polling_url = 'https://blog.discourse.org/feed/'
SiteSetting.embed_by_username = embed_by_username
stub_request(:head, SiteSetting.feed_polling_url).to_return(status: 200)
stub_request(:head, SiteSetting.feed_polling_url)
stub_request(:get, SiteSetting.feed_polling_url).to_return(
status: 200,
body: file_from_fixtures('feed.rss', 'feed').read,
headers: { "Content-Type" => "application/rss+xml" }
)
@ -116,9 +115,8 @@ describe Jobs::PollFeed do
SiteSetting.feed_polling_url = 'https://blog.discourse.org/feed/atom/'
SiteSetting.embed_by_username = embed_by_username
stub_request(:head, SiteSetting.feed_polling_url).to_return(status: 200)
stub_request(:head, SiteSetting.feed_polling_url)
stub_request(:get, SiteSetting.feed_polling_url).to_return(
status: 200,
body: file_from_fixtures('feed.atom', 'feed').read,
headers: { "Content-Type" => "application/atom+xml" }
)

View File

@ -11,10 +11,8 @@ describe Jobs::PullHotlinkedImages do
before do
stub_request(:get, image_url).to_return(body: png, headers: { "Content-Type" => "image/png" })
stub_request(:head, image_url)
stub_request(:head, broken_image_url).to_return(status: 404)
stub_request(:get, broken_image_url).to_return(status: 404)
stub_request(:get, large_image_url).to_return(body: large_png, headers: { "Content-Type" => "image/png" })
stub_request(:head, large_image_url)
SiteSetting.crawl_images = true
SiteSetting.download_remote_images_to_local = true
SiteSetting.max_image_size_kb = 2
@ -59,7 +57,6 @@ describe Jobs::PullHotlinkedImages do
it 'replaces images without extension' do
url = image_url.sub(/\.[a-zA-Z0-9]+$/, '')
stub_request(:get, url).to_return(body: png, headers: { "Content-Type" => "image/png" })
stub_request(:head, url)
post = Fabricate(:post, raw: "<img src='#{url}'>")
Jobs::PullHotlinkedImages.new.execute(post_id: post.id)
@ -75,8 +72,8 @@ describe Jobs::PullHotlinkedImages do
before do
SiteSetting.queue_jobs = true
stub_request(:get, url).to_return(body: '')
stub_request(:head, url)
stub_request(:get, url).to_return(body: '')
stub_request(:get, api_url).to_return(body: "{
\"query\": {
\"pages\": {
@ -91,7 +88,6 @@ describe Jobs::PullHotlinkedImages do
}
}
}")
stub_request(:head, api_url)
end
it 'replaces image src' do

View File

@ -9,7 +9,6 @@ describe Jobs::UpdateGravatar do
png = Base64.decode64("R0lGODlhAQABALMAAAAAAIAAAACAAICAAAAAgIAAgACAgMDAwICAgP8AAAD/AP//AAAA//8A/wD//wBiZCH5BAEAAA8ALAAAAAABAAEAAAQC8EUAOw==")
url = "https://www.gravatar.com/avatar/d10ca8d11301c2f4993ac2279ce4b930.png?s=360&d=404"
stub_request(:head, url).to_return(status: 200)
stub_request(:get, url).to_return(body: png)
SiteSetting.automatically_download_gravatars = true

View File

@ -47,7 +47,7 @@ describe UserAvatar do
describe 'when avatar url returns an invalid status code' do
it 'should not do anything' do
url = "http://thisfakesomething.something.com/"
stub_request(:head, url).to_return(status: 404)
UserAvatar.import_url_for_user(url, user)
user.reload

View File

@ -3,20 +3,14 @@ require 'rails_helper'
describe StaticController do
context '#favicon' do
before do
# this is a mess in test, will fix in a future commit
FinalDestination.stubs(:lookup_ip).returns('1.2.3.4')
end
let(:png) { Base64.decode64("R0lGODlhAQABALMAAAAAAIAAAACAAICAAAAAgIAAgACAgMDAwICAgP8AAAD/AP//AAAA//8A/wD//wBiZCH5BAEAAA8ALAAAAAABAAEAAAQC8EUAOw==") }
before { FinalDestination.stubs(:lookup_ip).returns("1.2.3.4") }
it 'returns the default favicon for a missing download' do
url = "https://fav.icon/#{SecureRandom.hex}.png"
url = "https://somewhere1.over.rainbow/#{SecureRandom.hex}.png"
stub_request(:head, url).
with(headers: { 'Host' => 'somewhere1.over.rainbow' }).
to_return(status: 404, body: "", headers: {})
stub_request(:get, url).to_return(status: 404)
SiteSetting.favicon_url = url
@ -30,14 +24,9 @@ describe StaticController do
end
it 'can proxy a favicon correctly' do
url = "https://somewhere.over.rainbow/#{SecureRandom.hex}.png"
url = "https://fav.icon/#{SecureRandom.hex}.png"
stub_request(:head, url).
with(headers: { 'Host' => 'somewhere.over.rainbow' }).
to_return(status: 200, body: "", headers: {})
stub_request(:get, url).
to_return(status: 200, body: png, headers: {})
stub_request(:get, url).to_return(status: 200, body: png)
SiteSetting.favicon_url = url
@ -51,7 +40,6 @@ describe StaticController do
context '#brotli_asset' do
it 'returns a non brotli encoded 404 if asset is missing' do
get "/brotli_asset/missing.js"
expect(response.status).to eq(404)