diff --git a/lib/html_to_markdown.rb b/lib/html_to_markdown.rb
index 60f88d9095f..6e72d65fd4e 100644
--- a/lib/html_to_markdown.rb
+++ b/lib/html_to_markdown.rb
@@ -134,20 +134,21 @@ class HtmlToMarkdown
end
def visit_img(node)
- if @opts[:keep_img_tags]
- @stack[-1].markdown << node.to_html
- else
- title = node["alt"].presence || node["title"].presence
- @stack[-1].markdown << "![#{title}](#{node["src"]})"
+ if is_valid_url?(node["src"])
+ if @opts[:keep_img_tags]
+ @stack[-1].markdown << node.to_html
+ else
+ title = node["alt"].presence || node["title"].presence
+ @stack[-1].markdown << "![#{title}](#{node["src"]})"
+ end
end
end
def visit_a(node)
- href = node["href"]
- if href.present? && (href.start_with?("http") || href.start_with?("www."))
+ if is_valid_url?(node["href"])
@stack[-1].markdown << "["
traverse(node)
- @stack[-1].markdown << "](#{href})"
+ @stack[-1].markdown << "](#{node["href"]})"
else
traverse(node)
end
@@ -203,4 +204,8 @@ class HtmlToMarkdown
(lines + [""]).join("\n")
end
+ def is_valid_url?(url)
+ url.present? && (url.start_with?("http") || url.start_with?("www."))
+ end
+
end
diff --git a/spec/components/html_to_markdown_spec.rb b/spec/components/html_to_markdown_spec.rb
index e5a9c5b8e2c..1e622e93f18 100644
--- a/spec/components/html_to_markdown_spec.rb
+++ b/spec/components/html_to_markdown_spec.rb
@@ -61,6 +61,12 @@ describe HtmlToMarkdown do
expect(HtmlToMarkdown.new(HTML_WITH_IMG, keep_img_tags: true).to_markdown).to eq(HTML_WITH_IMG)
end
+ it "removes empty & invalid " do
+ expect(html_to_markdown(%Q{})).to eq("")
+ expect(html_to_markdown(%Q{})).to eq("")
+ expect(html_to_markdown(%Q{})).to eq("")
+ end
+
(1..6).each do |n|
it "converts " do
expect(html_to_markdown("Header #{n}")).to eq("#" * n + " Header #{n}")