FIX: Do not raise if title cannot be crawled (#16247)

If the crawled page returned an error, `FinalDestination#safe_get`
yielded `nil` for `uri` and `chunk` arguments. Another problem is that
`get` did not handle the case when `safe_get` failed and did not return
the `location` and `set_cookie` headers.
This commit is contained in:
Dan Ungureanu 2022-03-22 20:13:27 +02:00 committed by GitHub
parent 3f98af73ce
commit 8e9cbe9db4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 10 additions and 0 deletions

View File

@ -145,6 +145,8 @@ class FinalDestination
return if @stop_at_blocked_pages && blocked_domain?(@uri)
result, headers_subset = safe_get(@uri, &blk)
return nil if !result
cookie = headers_subset.set_cookie
location = headers_subset.location

View File

@ -62,6 +62,8 @@ module RetrieveTitle
fd.get do |_response, chunk, uri|
unless Net::HTTPRedirection === _response
throw :done if uri.blank?
if current
current << chunk
else

View File

@ -136,6 +136,12 @@ describe RetrieveTitle do
expect(RetrieveTitle.crawl("https://cat.com/meow/no-onebox")).to be_blank
end
it "doesn't return a title if response is unsuccessful" do
stub_request(:get, "https://example.com").to_return(status: 404, body: "")
expect(RetrieveTitle.crawl("https://example.com")).to eq(nil)
end
end
context 'fetch_title' do