From bff36de130f0eac213d86ccc4be856d70ca51a24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9gis=20Hanol?= Date: Wed, 3 May 2017 18:29:25 +0200 Subject: [PATCH] FIX: HtmlToMarkdown should not convert empty/bad tags --- lib/html_to_markdown.rb | 21 +++++++++++++-------- spec/components/html_to_markdown_spec.rb | 6 ++++++ 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/lib/html_to_markdown.rb b/lib/html_to_markdown.rb index 60f88d9095f..6e72d65fd4e 100644 --- a/lib/html_to_markdown.rb +++ b/lib/html_to_markdown.rb @@ -134,20 +134,21 @@ class HtmlToMarkdown end def visit_img(node) - if @opts[:keep_img_tags] - @stack[-1].markdown << node.to_html - else - title = node["alt"].presence || node["title"].presence - @stack[-1].markdown << "![#{title}](#{node["src"]})" + if is_valid_url?(node["src"]) + if @opts[:keep_img_tags] + @stack[-1].markdown << node.to_html + else + title = node["alt"].presence || node["title"].presence + @stack[-1].markdown << "![#{title}](#{node["src"]})" + end end end def visit_a(node) - href = node["href"] - if href.present? && (href.start_with?("http") || href.start_with?("www.")) + if is_valid_url?(node["href"]) @stack[-1].markdown << "[" traverse(node) - @stack[-1].markdown << "](#{href})" + @stack[-1].markdown << "](#{node["href"]})" else traverse(node) end @@ -203,4 +204,8 @@ class HtmlToMarkdown (lines + [""]).join("\n") end + def is_valid_url?(url) + url.present? && (url.start_with?("http") || url.start_with?("www.")) + end + end diff --git a/spec/components/html_to_markdown_spec.rb b/spec/components/html_to_markdown_spec.rb index e5a9c5b8e2c..1e622e93f18 100644 --- a/spec/components/html_to_markdown_spec.rb +++ b/spec/components/html_to_markdown_spec.rb @@ -61,6 +61,12 @@ describe HtmlToMarkdown do expect(HtmlToMarkdown.new(HTML_WITH_IMG, keep_img_tags: true).to_markdown).to eq(HTML_WITH_IMG) end + it "removes empty & invalid " do + expect(html_to_markdown(%Q{})).to eq("") + expect(html_to_markdown(%Q{})).to eq("") + expect(html_to_markdown(%Q{})).to eq("") + end + (1..6).each do |n| it "converts " do expect(html_to_markdown("Header #{n}")).to eq("#" * n + " Header #{n}")