FIX: HtmlToMarkdown should not convert empty/bad <img> tags

This commit is contained in:
Régis Hanol 2017-05-03 18:29:25 +02:00
parent c880af8120
commit bff36de130
2 changed files with 19 additions and 8 deletions

View File

@ -134,20 +134,21 @@ class HtmlToMarkdown
end end
def visit_img(node) def visit_img(node)
if @opts[:keep_img_tags] if is_valid_url?(node["src"])
@stack[-1].markdown << node.to_html if @opts[:keep_img_tags]
else @stack[-1].markdown << node.to_html
title = node["alt"].presence || node["title"].presence else
@stack[-1].markdown << "![#{title}](#{node["src"]})" title = node["alt"].presence || node["title"].presence
@stack[-1].markdown << "![#{title}](#{node["src"]})"
end
end end
end end
def visit_a(node) def visit_a(node)
href = node["href"] if is_valid_url?(node["href"])
if href.present? && (href.start_with?("http") || href.start_with?("www."))
@stack[-1].markdown << "[" @stack[-1].markdown << "["
traverse(node) traverse(node)
@stack[-1].markdown << "](#{href})" @stack[-1].markdown << "](#{node["href"]})"
else else
traverse(node) traverse(node)
end end
@ -203,4 +204,8 @@ class HtmlToMarkdown
(lines + [""]).join("\n") (lines + [""]).join("\n")
end end
def is_valid_url?(url)
url.present? && (url.start_with?("http") || url.start_with?("www."))
end
end end

View File

@ -61,6 +61,12 @@ describe HtmlToMarkdown do
expect(HtmlToMarkdown.new(HTML_WITH_IMG, keep_img_tags: true).to_markdown).to eq(HTML_WITH_IMG) expect(HtmlToMarkdown.new(HTML_WITH_IMG, keep_img_tags: true).to_markdown).to eq(HTML_WITH_IMG)
end end
it "removes empty & invalid <img>" do
expect(html_to_markdown(%Q{<img>})).to eq("")
expect(html_to_markdown(%Q{<img src="">})).to eq("")
expect(html_to_markdown(%Q{<img src="foo.bar">})).to eq("")
end
(1..6).each do |n| (1..6).each do |n|
it "converts <h#{n}>" do it "converts <h#{n}>" do
expect(html_to_markdown("<h#{n}>Header #{n}</h#{n}>")).to eq("#" * n + " Header #{n}") expect(html_to_markdown("<h#{n}>Header #{n}</h#{n}>")).to eq("#" * n + " Header #{n}")