some content
+
diff --git a/app/models/topic_embed.rb b/app/models/topic_embed.rb index 39cab1a7983..bb32d3ab4db 100644 --- a/app/models/topic_embed.rb +++ b/app/models/topic_embed.rb @@ -114,17 +114,27 @@ class TopicEmbed < ActiveRecord::Base end def self.find_remote(url) - require "ruby-readability" - url = UrlHelper.normalized_encode(url) - original_uri = URI.parse(url) + URI.parse(url) # ensure url parses, will raise if not fd = FinalDestination.new(url, validate_uri: true, max_redirects: 5, follow_canonical: true) uri = fd.resolve return if uri.blank? + begin + html = uri.read + rescue OpenURI::HTTPError, Net::OpenTimeout + return + end + + parse_html(html, url) + end + + def self.parse_html(html, url) + require "ruby-readability" + opts = { - tags: %w[div p code pre h1 h2 h3 b em i strong a img ul li ol blockquote], + tags: %w[div p code pre h1 h2 h3 b em i strong a img ul li ol blockquote figure figcaption], attributes: %w[href src class], remove_empty_nodes: false, } @@ -139,11 +149,6 @@ class TopicEmbed < ActiveRecord::Base SiteSetting.allowed_embed_classnames if SiteSetting.allowed_embed_classnames.present? response = FetchResponse.new - begin - html = uri.read - rescue OpenURI::HTTPError, Net::OpenTimeout - return - end raw_doc = Nokogiri.HTML5(html) auth_element = @@ -200,7 +205,7 @@ class TopicEmbed < ActiveRecord::Base end end - response.body = doc.to_html + response.body = doc.at("body").children.to_html response end diff --git a/spec/models/topic_embed_spec.rb b/spec/models/topic_embed_spec.rb index 82ed4957e66..ede62d5c7c4 100644 --- a/spec/models/topic_embed_spec.rb +++ b/spec/models/topic_embed_spec.rb @@ -23,6 +23,41 @@ RSpec.describe TopicEmbed do expect(TopicEmbed.count).to eq(0) end + it "Allows figure and figcaption HTML tags" do + html = <<~HTML + +
+some content
+some content
+