From 21d1ee106526f129d6b369da94814851cbd236c3 Mon Sep 17 00:00:00 2001 From: Bianca Nenciu Date: Wed, 14 Apr 2021 18:09:55 +0300 Subject: [PATCH] FIX: Use Nokogiri and Loofah consistently (#12693) CookedPostProcessor used Loofah to parse the cooked content of a post and Nokogiri to parse cooked Oneboxes. Even though Loofah is built on top of Nokogiri, replacing an element from the cooked post (a Nokogiri node) with a parsed onebox (a Loofah node) produced a strange result which included XML namespaces. Removing the mix and using Loofah to parse Oneboxes fixed the problem. --- lib/oneboxer.rb | 9 +++------ spec/components/oneboxer_spec.rb | 11 +++++++++++ 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/lib/oneboxer.rb b/lib/oneboxer.rb index bdca6e35749..4e0b96ef9aa 100644 --- a/lib/oneboxer.rb +++ b/lib/oneboxer.rb @@ -112,10 +112,7 @@ module Oneboxer end # Parse URLs out of HTML, returning the document when finished. - def self.each_onebox_link(string_or_doc, extra_paths: []) - doc = string_or_doc - doc = Nokogiri::HTML5::fragment(doc) if doc.is_a?(String) - + def self.each_onebox_link(doc, extra_paths: []) onebox_links = doc.css("a.#{ONEBOX_CSS_CLASS}", *extra_paths) if onebox_links.present? onebox_links.each do |link| @@ -130,14 +127,14 @@ module Oneboxer def self.apply(string_or_doc, extra_paths: nil) doc = string_or_doc - doc = Nokogiri::HTML5::fragment(doc) if doc.is_a?(String) + doc = Loofah.fragment(doc) if doc.is_a?(String) changed = false each_onebox_link(doc, extra_paths: extra_paths) do |url, element| onebox, _ = yield(url, element) next if onebox.blank? - parsed_onebox = Nokogiri::HTML5::fragment(onebox) + parsed_onebox = Loofah.fragment(onebox) next if parsed_onebox.children.blank? changed = true diff --git a/spec/components/oneboxer_spec.rb b/spec/components/oneboxer_spec.rb index 1868ec20b6d..b3c27c52e06 100644 --- a/spec/components/oneboxer_spec.rb +++ b/spec/components/oneboxer_spec.rb @@ -328,6 +328,17 @@ describe Oneboxer do

After Onebox

HTML end + + it 'does keeps SVGs valid' do + raw = "Onebox\n\nhttps://example.com" + cooked = PrettyText.cook(raw) + cooked = Oneboxer.apply(Loofah.fragment(cooked)) { '
' } + doc = Nokogiri::HTML5::fragment(cooked.to_html) + expect(doc.to_html).to match_html <<~HTML +

Onebox

+
+ HTML + end end describe '#force_get_hosts' do