FIX: HtmlToMarkdown should not convert empty/bad <a> tags

This commit is contained in:
Régis Hanol 2017-05-03 16:42:37 +02:00
parent 1706036f2b
commit edbf12622b
2 changed files with 14 additions and 3 deletions

View File

@ -143,9 +143,14 @@ class HtmlToMarkdown
end
def visit_a(node)
@stack[-1].markdown << "["
traverse(node)
@stack[-1].markdown << "](#{node["href"]})"
href = node["href"]
if href.present? && (href.start_with?("http") || href.start_with?("www."))
@stack[-1].markdown << "["
traverse(node)
@stack[-1].markdown << "](#{href})"
else
traverse(node)
end
end
def visit_tt(node)

View File

@ -31,6 +31,12 @@ describe HtmlToMarkdown do
expect(html_to_markdown(%Q{<a href="https://www.discourse.org">Discourse</a>})).to eq("[Discourse](https://www.discourse.org)")
end
it "removes empty & invalid <a>" do
expect(html_to_markdown(%Q{<a>Discourse</a>})).to eq("Discourse")
expect(html_to_markdown(%Q{<a href="">Discourse</a>})).to eq("Discourse")
expect(html_to_markdown(%Q{<a href="foo.bar">Discourse</a>})).to eq("Discourse")
end
HTML_WITH_IMG ||= %Q{<img src="https://www.discourse.org/logo.svg" alt="Discourse Logo">}
it "converts <img>" do