FIX: Support new layout on Amazon product pages (#16091)

Some product pages on Amazon are using a new HTML structure, meaning the previous Onebox engine was unable to gather the price and/or description. This change should allow these pages to be Oneboxed.
This commit is contained in:
jbrw 2022-03-04 18:31:53 -05:00 committed by GitHub
parent d760fd4074
commit fc30669db2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 10537 additions and 3 deletions

View File

@ -124,7 +124,12 @@ module Onebox
elsif !raw.css("#priceblock_ourprice").inner_text.empty?
raw.css("#priceblock_ourprice").inner_text
else
raw.css(".mediaMatrixListItem.a-active .a-color-price").inner_text
result = raw.css('#corePrice_feature_div .a-price .a-offscreen').inner_text
if result.blank?
result = raw.css(".mediaMatrixListItem.a-active .a-color-price").inner_text
end
result
end
end
@ -215,8 +220,10 @@ module Onebox
summary = raw.at("#productDescription")
description = og.description || summary&.inner_text
description ||= raw.css("meta[name=description]").first&.[]("content")
description = og.description || summary&.inner_text&.strip
if description.blank?
description = raw.css("meta[name=description]").first&.[]("content")
end
result[:description] = CGI.unescapeHTML(Onebox::Helpers.truncate(description, 250)) if description
end

File diff suppressed because one or more lines are too long

View File

@ -215,4 +215,34 @@ describe Onebox::Engine::AmazonOnebox do
end
end
context "alternate page layout response from Amazon" do
let(:link) { "https://www.amazon.com/dp/B07FQ7M16H" }
let(:html) { described_class.new(link).to_html }
before do
stub_request(:get, "https://www.amazon.com/dp/B07FQ7M16H")
.to_return(status: 200, body: onebox_response("amazon-alternate"))
stub_request(:get, "https://www.amazon.com/Lnchett-Nibbler-Quality-Attachment-Straight/dp/B07FQ7M16H")
.to_return(status: 200, body: onebox_response("amazon-alternate"))
end
describe "#to_html" do
it "includes image" do
expect(html).to include("https://m.media-amazon.com/images/I/71y4BRqNP7L._AC_SL1500_.jpg")
end
it "includes description" do
expect(html).to include("Drill Attachment for Straight Curve and Circle Cutting, Maximum 14 Gauge Steel")
end
it "includes price" do
expect(html).to include("$37.99")
end
it "includes title" do
expect(html).to include("Quality Nibbler Drill Attachment...")
end
end
end
end