mirror of
https://github.com/discourse/discourse.git
synced 2024-11-26 07:43:43 +08:00
Add 'keep_cid_imgs' option to HTML to Markdown converter to improve incoming email parsing
This commit is contained in:
parent
9d7917f79d
commit
768c63c103
|
@ -136,7 +136,7 @@ class HtmlToMarkdown
|
||||||
end
|
end
|
||||||
|
|
||||||
def visit_img(node)
|
def visit_img(node)
|
||||||
if is_valid_url?(node["src"]) && is_visible_img?(node)
|
if is_valid_src?(node["src"]) && is_visible_img?(node)
|
||||||
if @opts[:keep_img_tags]
|
if @opts[:keep_img_tags]
|
||||||
@stack[-1].markdown << node.to_html
|
@stack[-1].markdown << node.to_html
|
||||||
else
|
else
|
||||||
|
@ -147,7 +147,7 @@ class HtmlToMarkdown
|
||||||
end
|
end
|
||||||
|
|
||||||
def visit_a(node)
|
def visit_a(node)
|
||||||
if is_valid_url?(node["href"])
|
if is_valid_href?(node["href"])
|
||||||
@stack[-1].markdown << "["
|
@stack[-1].markdown << "["
|
||||||
traverse(node)
|
traverse(node)
|
||||||
@stack[-1].markdown << "](#{node["href"]})"
|
@stack[-1].markdown << "](#{node["href"]})"
|
||||||
|
@ -206,14 +206,20 @@ class HtmlToMarkdown
|
||||||
(lines + [""]).join("\n")
|
(lines + [""]).join("\n")
|
||||||
end
|
end
|
||||||
|
|
||||||
def is_valid_url?(url)
|
def is_valid_href?(href)
|
||||||
url.present? && (url.start_with?("http") || url.start_with?("www."))
|
href.present? && (href.start_with?("http") || href.start_with?("www."))
|
||||||
|
end
|
||||||
|
|
||||||
|
def is_valid_src?(src)
|
||||||
|
return false if src.blank?
|
||||||
|
return true if @opts[:keep_cid_imgs] && src.start_with?("cid:")
|
||||||
|
src.start_with?("http") || src.start_with?("www.")
|
||||||
end
|
end
|
||||||
|
|
||||||
def is_visible_img?(img)
|
def is_visible_img?(img)
|
||||||
return false if img["width"].present? && img["width"].to_i == 0
|
return false if img["width"].present? && img["width"].to_i == 0
|
||||||
return false if img["height"].present? && img["height"].to_i == 0
|
return false if img["height"].present? && img["height"].to_i == 0
|
||||||
return false if img["style"].present? && img["style"][/(width|height)\s*:\s*0/]
|
return false if img["style"].present? && img["style"][/(width|height)\s*:\s*0/]
|
||||||
true
|
true
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -3,8 +3,8 @@ require 'html_to_markdown'
|
||||||
|
|
||||||
describe HtmlToMarkdown do
|
describe HtmlToMarkdown do
|
||||||
|
|
||||||
def html_to_markdown(html)
|
def html_to_markdown(html, opts={})
|
||||||
HtmlToMarkdown.new(html).to_markdown
|
HtmlToMarkdown.new(html, opts).to_markdown
|
||||||
end
|
end
|
||||||
|
|
||||||
it "remove whitespaces" do
|
it "remove whitespaces" do
|
||||||
|
@ -55,14 +55,15 @@ describe HtmlToMarkdown do
|
||||||
expect(html_to_markdown(%Q{<a href="foo.bar">Discourse</a>})).to eq("Discourse")
|
expect(html_to_markdown(%Q{<a href="foo.bar">Discourse</a>})).to eq("Discourse")
|
||||||
end
|
end
|
||||||
|
|
||||||
HTML_WITH_IMG ||= %Q{<img src="https://www.discourse.org/logo.svg" alt="Discourse Logo">}
|
HTML_WITH_IMG ||= %Q{<img src="https://www.discourse.org/logo.svg" alt="Discourse Logo">}
|
||||||
|
HTML_WITH_CID_IMG ||= %Q{<img src="cid:ii_1525434659ddb4cb" alt="Discourse Logo">}
|
||||||
|
|
||||||
it "converts <img>" do
|
it "converts <img>" do
|
||||||
expect(html_to_markdown(HTML_WITH_IMG)).to eq("![Discourse Logo](https://www.discourse.org/logo.svg)")
|
expect(html_to_markdown(HTML_WITH_IMG)).to eq("![Discourse Logo](https://www.discourse.org/logo.svg)")
|
||||||
end
|
end
|
||||||
|
|
||||||
it "keeps <img> with 'keep_img_tags'" do
|
it "keeps <img> with 'keep_img_tags'" do
|
||||||
expect(HtmlToMarkdown.new(HTML_WITH_IMG, keep_img_tags: true).to_markdown).to eq(HTML_WITH_IMG)
|
expect(html_to_markdown(HTML_WITH_IMG, keep_img_tags: true)).to eq(HTML_WITH_IMG)
|
||||||
end
|
end
|
||||||
|
|
||||||
it "removes empty & invalid <img>" do
|
it "removes empty & invalid <img>" do
|
||||||
|
@ -71,6 +72,11 @@ describe HtmlToMarkdown do
|
||||||
expect(html_to_markdown(%Q{<img src="foo.bar">})).to eq("")
|
expect(html_to_markdown(%Q{<img src="foo.bar">})).to eq("")
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it "keeps <img> with src='cid:' whith 'keep_cid_imgs'" do
|
||||||
|
expect(html_to_markdown(HTML_WITH_CID_IMG, keep_cid_imgs: true)).to eq("![Discourse Logo](cid:ii_1525434659ddb4cb)")
|
||||||
|
expect(html_to_markdown(HTML_WITH_CID_IMG, keep_img_tags: true, keep_cid_imgs: true)).to eq("<img src=\"cid:ii_1525434659ddb4cb\" alt=\"Discourse Logo\">")
|
||||||
|
end
|
||||||
|
|
||||||
it "skips hidden <img>" do
|
it "skips hidden <img>" do
|
||||||
expect(html_to_markdown(%Q{<img src="https://www.discourse.org/logo.svg" width=0>})).to eq("")
|
expect(html_to_markdown(%Q{<img src="https://www.discourse.org/logo.svg" width=0>})).to eq("")
|
||||||
expect(html_to_markdown(%Q{<img src="https://www.discourse.org/logo.svg" height="0">})).to eq("")
|
expect(html_to_markdown(%Q{<img src="https://www.discourse.org/logo.svg" height="0">})).to eq("")
|
||||||
|
|
Loading…
Reference in New Issue
Block a user