FIX: Use Nokogiri and Loofah consistently (#12693)

CookedPostProcessor used Loofah to parse the cooked content of a post
and Nokogiri to parse cooked Oneboxes. Even though Loofah is built on
top of Nokogiri, replacing an element from the cooked post (a Nokogiri
node) with a parsed onebox (a Loofah node) produced a strange result
which included XML namespaces. Removing the mix and using Loofah
to parse Oneboxes fixed the problem.
This commit is contained in:
Bianca Nenciu 2021-04-14 18:09:55 +03:00 committed by GitHub
parent 07ca35670a
commit 21d1ee1065
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 14 additions and 6 deletions

View File

@ -112,10 +112,7 @@ module Oneboxer
end
# Parse URLs out of HTML, returning the document when finished.
def self.each_onebox_link(string_or_doc, extra_paths: [])
doc = string_or_doc
doc = Nokogiri::HTML5::fragment(doc) if doc.is_a?(String)
def self.each_onebox_link(doc, extra_paths: [])
onebox_links = doc.css("a.#{ONEBOX_CSS_CLASS}", *extra_paths)
if onebox_links.present?
onebox_links.each do |link|
@ -130,14 +127,14 @@ module Oneboxer
def self.apply(string_or_doc, extra_paths: nil)
doc = string_or_doc
doc = Nokogiri::HTML5::fragment(doc) if doc.is_a?(String)
doc = Loofah.fragment(doc) if doc.is_a?(String)
changed = false
each_onebox_link(doc, extra_paths: extra_paths) do |url, element|
onebox, _ = yield(url, element)
next if onebox.blank?
parsed_onebox = Nokogiri::HTML5::fragment(onebox)
parsed_onebox = Loofah.fragment(onebox)
next if parsed_onebox.children.blank?
changed = true

View File

@ -328,6 +328,17 @@ describe Oneboxer do
<p>After Onebox</p>
HTML
end
it 'does keeps SVGs valid' do
raw = "Onebox\n\nhttps://example.com"
cooked = PrettyText.cook(raw)
cooked = Oneboxer.apply(Loofah.fragment(cooked)) { '<div><svg><path></path></svg></div>' }
doc = Nokogiri::HTML5::fragment(cooked.to_html)
expect(doc.to_html).to match_html <<~HTML
<p>Onebox</p>
<div><svg><path></path></svg></div>
HTML
end
end
describe '#force_get_hosts' do