FIX: Handle sites with more than 1 JSON-LD element (#17095)

A followup to #17007
This commit is contained in:
Jarek Radosz 2022-06-15 02:55:55 +02:00 committed by GitHub
parent 4d3c1ceb44
commit f723b4c322
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 21 additions and 20 deletions

View File

@ -12,32 +12,25 @@ module Onebox
private
def extract(doc)
extracted_json = extract_json_from(doc)
parsed_json = parse_json(extracted_json)
return {} if Onebox::Helpers::blank?(doc)
doc.css('script[type="application/ld+json"]').each do |element|
parsed_json = parse_json(element.text)
extracted =
case parsed_json["@type"]
when MOVIE_JSON_LD_TYPE
Onebox::Movie.new(parsed_json)
else
{}
return Onebox::Movie.new(parsed_json).to_h
end
end
extracted.to_h
end
def extract_json_from(doc)
return {} if Onebox::Helpers::blank?(doc)
json_ld = doc.search('script[type="application/ld+json"]').text
return {} if Onebox::Helpers::blank?(json_ld)
json_ld
{}
end
def parse_json(json)
begin
JSON[json]
rescue JSON::ParserError => e
Discourse.warn_exception(e, message: "Error parsing JSON-LD json: #{json}")
Discourse.warn_exception(e, message: "Error parsing JSON-LD: #{json}")
{}
end
end

View File

@ -7,27 +7,26 @@ describe Onebox::JsonLd do
invalid_json = "{\"@type\":invalid-json}"
doc = Nokogiri::HTML("<script type=\"application/ld+json\">#{invalid_json}</script>")
Discourse.expects(:warn_exception).with(
instance_of(JSON::ParserError), { message: "Error parsing JSON-LD json: #{invalid_json}" }
instance_of(JSON::ParserError), { message: "Error parsing JSON-LD: #{invalid_json}" }
)
json_ld = described_class.new(doc)
expect(json_ld.data).to eq({})
end
it 'returns an empty hash if there is no json_ld script tag' do
it 'returns an empty hash if there is no JSON-LD script tag' do
doc = Nokogiri::HTML("<script type=\"something else\"></script>")
json_ld = described_class.new(doc)
expect(json_ld.data).to eq({})
end
it 'returns an empty hash if there is no json_ld data' do
it 'returns an empty hash if there is no JSON-LD data' do
doc = Nokogiri::HTML("<script type=\"application/ld+json\"></script>")
json_ld = described_class.new(doc)
expect(json_ld.data).to eq({})
end
it 'returns an empty hash if the type of JSONLD data is not Movie' do
it 'returns an empty hash if the type of JSON-LD data is not Movie' do
doc = Nokogiri::HTML("<script type=\"application/ld+json\">{\"@type\":\"Something Else\",\"aggregateRating\":{\"@type\":\"AggregateRating\",\"ratingCount\":806928,\"bestRating\":10,\"worstRating\":1}}</script>")
json_ld = described_class.new(doc)
expect(json_ld.data).to eq({})
@ -39,6 +38,15 @@ describe Onebox::JsonLd do
expect(json_ld.data).to eq(expected_movie_hash)
end
it 'does not fail when there is more than one JSON-LD element' do
doc = Nokogiri::HTML(onebox_response('imdb'))
doc.css("body")[0] << "<script type=\"application/ld+json\">{\"@context\":\"http://schema.org\",\"@type\":\"WebPage\",\"url\":\"https:\/\/imdb.com\",\"description\":\"Movies\"}</script>"
Discourse.expects(:warn_exception).never
json_ld = described_class.new(doc)
expect(json_ld.data).to eq(expected_movie_hash)
end
private
def expected_movie_hash