From 502bca2c0d23bdf72b09fa9ed6b33c20973c9645 Mon Sep 17 00:00:00 2001 From: Robin Ward Date: Tue, 6 Jun 2017 13:53:49 -0400 Subject: [PATCH] FIX: If HEAD is not supported, try GET. Also set cookies --- lib/final_destination.rb | 71 ++++++++++++++++++----- spec/components/final_destination_spec.rb | 39 +++++++++++++ 2 files changed, 95 insertions(+), 15 deletions(-) diff --git a/lib/final_destination.rb b/lib/final_destination.rb index 8ce054d19b7..6fb96b49e55 100644 --- a/lib/final_destination.rb +++ b/lib/final_destination.rb @@ -7,6 +7,7 @@ require 'rate_limiter' class FinalDestination attr_reader :status + attr_reader :cookie def initialize(url, opts=nil) @uri = URI(url) rescue nil @@ -21,6 +22,11 @@ class FinalDestination end @limit = @opts[:max_redirects] @status = :ready + @cookie = nil + end + + def self.connection_timeout + 20 end def redirected? @@ -28,9 +34,27 @@ class FinalDestination end def request_headers - { "User-Agent" => "Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36", + result = { + "User-Agent" => "Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36", "Accept" => "text/html", - "Host" => @uri.hostname } + "Host" => @uri.hostname + } + + result['cookie'] = @cookie if @cookie + + result + end + + def small_get(headers) + Net::HTTP.start(@uri.host, @uri.port, use_ssl: @uri.is_a?(URI::HTTPS)) do |http| + http.open_timeout = FinalDestination.connection_timeout + http.read_timeout = FinalDestination.connection_timeout + + request = Net::HTTP::Get.new(@uri.request_uri, headers) + http.request(request) do |response| + return response + end + end end def resolve @@ -41,17 +65,41 @@ class FinalDestination return nil unless validate_uri headers = request_headers - head = Excon.head(@uri.to_s, read_timeout: 20, headers: headers) + response = Excon.head( + @uri.to_s, + read_timeout: FinalDestination.connection_timeout, + headers: headers + ) - # If the site does not allow HEAD, just try the url - return @uri if head.status == 405 - - if head.status == 200 + location = nil + case response.status + when 200 @status = :resolved return @uri + when 405, 501 + get_response = small_get(headers) + + if get_response.code.to_i == 200 + @status = :resolved + return @uri + end + + if cookie_val = get_response.get_fields('set-cookie') + @cookie = cookie_val.join + end + + if location_val = get_response.get_fields('location') + location = location_val.join + end + else + response.headers.each do |k, v| + case k.downcase + when 'set-cookie' then @cookie = v + when 'location' then location = v + end + end end - location = FinalDestination.header_for(head, 'location') if location location = "#{@uri.scheme}://#{@uri.host}#{location}" if location[0] == "/" @uri = URI(location) rescue nil @@ -127,11 +175,4 @@ class FinalDestination IPSocket::getaddress(host) end - def self.header_for(head, name) - header = head.headers.detect do |k, _| - name == k.downcase - end - header[1] if header - end - end diff --git a/spec/components/final_destination_spec.rb b/spec/components/final_destination_spec.rb index 5956ceb1e15..b3b263c8262 100644 --- a/spec/components/final_destination_spec.rb +++ b/spec/components/final_destination_spec.rb @@ -120,6 +120,45 @@ describe FinalDestination do expect(final.status).to eq(:invalid_address) end end + + context "HEAD not supported" do + before do + stub_request(:get, 'https://eviltrout.com').to_return( + status: 301, + headers: { + "Location" => 'https://discourse.org', + 'Set-Cookie' => 'evil=trout' + } + ) + stub_request(:head, 'https://discourse.org').to_return(status: 200) + end + + context "when the status code is 405" do + before do + stub_request(:head, 'https://eviltrout.com').to_return(status: 405) + end + + it "will try a GET" do + final = FinalDestination.new('https://eviltrout.com', opts) + expect(final.resolve.to_s).to eq('https://discourse.org') + expect(final.status).to eq(:resolved) + expect(final.cookie).to eq('evil=trout') + end + end + + context "when the status code is 501" do + before do + stub_request(:head, 'https://eviltrout.com').to_return(status: 501) + end + + it "will try a GET" do + final = FinalDestination.new('https://eviltrout.com', opts) + expect(final.resolve.to_s).to eq('https://discourse.org') + expect(final.status).to eq(:resolved) + expect(final.cookie).to eq('evil=trout') + end + end + end end describe '.validate_uri' do