From 8ecf313a81242ade17d5369357937278c27cd0e6 Mon Sep 17 00:00:00 2001 From: Sam Date: Thu, 28 Sep 2017 16:35:27 +1000 Subject: [PATCH] FIX: correctly raise errors when downloads fail This corrects an issue where we are hitting Gravatar for 404 over and over Also ensures file download properly reports errors --- app/jobs/regular/pull_hotlinked_images.rb | 2 ++ app/models/user_avatar.rb | 8 +++-- lib/file_helper.rb | 22 ++++++++++-- lib/final_destination.rb | 39 +++++++++++++++------ spec/components/file_helper_spec.rb | 42 +++++++++++++++++++++++ spec/jobs/update_gravatar_spec.rb | 2 +- 6 files changed, 99 insertions(+), 16 deletions(-) diff --git a/app/jobs/regular/pull_hotlinked_images.rb b/app/jobs/regular/pull_hotlinked_images.rb index 939d7bc50c2..0a1d5d6eba4 100644 --- a/app/jobs/regular/pull_hotlinked_images.rb +++ b/app/jobs/regular/pull_hotlinked_images.rb @@ -45,6 +45,8 @@ module Jobs ) rescue Discourse::InvalidParameters log(:error, "InvalidParameters while downloading hotlinked image (#{src}) for post: #{post_id}") + rescue => e + log(:error, "Failed to download image #{e}") end if hotlinked if File.size(hotlinked.path) <= @max_size diff --git a/app/models/user_avatar.rb b/app/models/user_avatar.rb index bb3146970f0..a247bb47529 100644 --- a/app/models/user_avatar.rb +++ b/app/models/user_avatar.rb @@ -19,14 +19,18 @@ class UserAvatar < ActiveRecord::Base self.last_gravatar_download_attempt = Time.new max = Discourse.avatar_sizes.max - gravatar_url = "http://www.gravatar.com/avatar/#{email_hash}.png?s=#{max}&d=404" + gravatar_url = "https://www.gravatar.com/avatar/#{email_hash}.png?s=#{max}&d=404" + + # follow redirects in case gravatar change rules on us tempfile = FileHelper.download( gravatar_url, max_file_size: SiteSetting.max_image_size_kb.kilobytes, tmp_file_name: "gravatar", skip_rate_limit: true, - verbose: false + verbose: false, + follow_redirect: true ) + if tempfile upload = UploadCreator.new(tempfile, 'gravatar.png', origin: gravatar_url, type: "avatar").create_for(user_id) diff --git a/lib/file_helper.rb b/lib/file_helper.rb index f008ac72c45..6de82ad7f29 100644 --- a/lib/file_helper.rb +++ b/lib/file_helper.rb @@ -15,6 +15,10 @@ class FileHelper filename =~ images_regexp end + class FakeIO + attr_accessor :status + end + def self.download(url, max_file_size:, tmp_file_name:, @@ -29,13 +33,25 @@ class FileHelper url = "https:" + url if url.start_with?("//") raise Discourse::InvalidParameters.new(:url) unless url =~ /^https?:\/\// - uri = FinalDestination.new( + uri = + + dest = FinalDestination.new( url, max_redirects: follow_redirect ? 5 : 1, skip_rate_limit: skip_rate_limit - ).resolve + ) + uri = dest.resolve - unless uri.present? + if !uri && dest.status_code.to_i >= 400 + # attempt error API compatability + io = FakeIO.new + io.status = [dest.status_code.to_s, ""] + + # TODO perhaps translate and add Discourse::DownloadError + raise OpenURI::HTTPError.new("#{dest.status_code} Error", io) + end + + unless uri log(:error, "FinalDestination did not work for: #{url}") if verbose return end diff --git a/lib/final_destination.rb b/lib/final_destination.rb index 1a732a952a2..e4223e7a558 100644 --- a/lib/final_destination.rb +++ b/lib/final_destination.rb @@ -6,8 +6,7 @@ require 'rate_limiter' # Determine the final endpoint for a Web URI, following redirects class FinalDestination - attr_reader :status - attr_reader :cookie + attr_reader :status, :cookie, :status_code def initialize(url, opts = nil) @url = url @@ -88,6 +87,10 @@ class FinalDestination ) location = nil + headers = nil + + response_status = response.status.to_i + case response.status when 200 @status = :resolved @@ -95,27 +98,39 @@ class FinalDestination when 405, 409, 501 get_response = small_get(headers) - if get_response.code.to_i == 200 + response_status = get_response.code.to_i + if response_status == 200 @status = :resolved return @uri end + headers = {} if cookie_val = get_response.get_fields('set-cookie') - @cookie = cookie_val.join + headers['set-cookie'] = cookie_val.join end + # TODO this is confusing why grap location for anything not + # between 300-400 ? if location_val = get_response.get_fields('location') - location = location_val.join + headers['location'] = location_val.join end - else + end + + unless headers + headers = {} response.headers.each do |k, v| - case k.downcase - when 'set-cookie' then @cookie = v - when 'location' then location = v - end + headers[k.to_s.downcase] = v end end + if (300..399).include?(response_status) + location = headers["location"] + end + + if set_cookie = headers["set-cookie"] + @cookie = set_cookie + end + if location location = "#{@uri.scheme}://#{@uri.host}#{location}" if location[0] == "/" @uri = URI(location) rescue nil @@ -123,6 +138,10 @@ class FinalDestination return resolve end + # this is weird an exception seems better + @status = :failure + @status_code = response.status + nil rescue Excon::Errors::Timeout nil diff --git a/spec/components/file_helper_spec.rb b/spec/components/file_helper_spec.rb index 4f085de7b2c..d5d0ccd86bb 100644 --- a/spec/components/file_helper_spec.rb +++ b/spec/components/file_helper_spec.rb @@ -12,6 +12,48 @@ describe FileHelper do end describe "download" do + + it "correctly raises an OpenURI HTTP error if it gets a 404 even with redirect" do + url = "http://fourohfour.com/404" + stub_request(:head, url).to_return(status: 404, body: "404") + stub_request(:get, url).to_return(status: 404, body: "404") + + expect do + begin + FileHelper.download( + url, + max_file_size: 10000, + tmp_file_name: 'trouttmp', + follow_redirect: true + ) + rescue => e + expect(e.io.status[0]).to eq("404") + raise + end + end.to raise_error(OpenURI::HTTPError) + end + + it "correctly raises an OpenURI HTTP error if it gets a 404" do + url = "http://fourohfour.com/404" + + stub_request(:head, url).to_return(status: 404, body: "404") + stub_request(:get, url).to_return(status: 404, body: "404") + + expect do + begin + FileHelper.download( + url, + max_file_size: 10000, + tmp_file_name: 'trouttmp', + follow_redirect: false + ) + rescue => e + expect(e.io.status[0]).to eq("404") + raise + end + end.to raise_error(OpenURI::HTTPError) + end + it "returns a file with the image" do tmpfile = FileHelper.download( url, diff --git a/spec/jobs/update_gravatar_spec.rb b/spec/jobs/update_gravatar_spec.rb index 403de9a3604..096a4d6b3c3 100644 --- a/spec/jobs/update_gravatar_spec.rb +++ b/spec/jobs/update_gravatar_spec.rb @@ -8,7 +8,7 @@ describe Jobs::UpdateGravatar do expect(user.user_avatar.gravatar_upload_id).to eq(nil) png = Base64.decode64("R0lGODlhAQABALMAAAAAAIAAAACAAICAAAAAgIAAgACAgMDAwICAgP8AAAD/AP//AAAA//8A/wD//wBiZCH5BAEAAA8ALAAAAAABAAEAAAQC8EUAOw==") - url = "http://www.gravatar.com/avatar/d10ca8d11301c2f4993ac2279ce4b930.png?s=360&d=404" + url = "https://www.gravatar.com/avatar/d10ca8d11301c2f4993ac2279ce4b930.png?s=360&d=404" stub_request(:head, url).to_return(status: 200) stub_request(:get, url).to_return(body: png)