# frozen_string_literal: true require "html_to_markdown" RSpec.describe HtmlToMarkdown do def html_to_markdown(html, opts = {}) HtmlToMarkdown.new(html, opts).to_markdown end it "remove whitespaces" do html = <<-HTML
Let me see if it happens by answering your message through Thunderbird.
Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1
HTML markdown = <<~MD Let me see if it happens by answering your message through Thunderbird. Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 MD expect(html_to_markdown(html)).to eq(markdown.strip) html = <<~HTML This post
has lots
of
space
This space was left untouched !HTML markdown = <<~MD This post has lots of space ``` This space was left untouched ! ``` MD expect(html_to_markdown(html)).to eq(markdown.strip) end it "removes tags that aren't allowed" do html = <<~HTML
Hello cruel World!
")).to eq("Hello World!") end it "converts " do expect(html_to_markdown("Strong")).to eq("**Strong**") expect(html_to_markdown("Str*ng")).to eq("__Str*ng__") end it "converts " do expect(html_to_markdown("Bold")).to eq("**Bold**") expect(html_to_markdown("B*ld")).to eq("__B*ld__") html = <<~HTML BeforeBold
if next character is \n" do expect(html_to_markdown("
Before
\nInside
After
" do
expect(html_to_markdown("Code
")).to eq("`Code`")
end
describe "when HTML is used within Markdown" do
HtmlToMarkdown::ALLOWED.each do |tag|
it "keeps mandatory HTML entities in text of <#{tag}>" do
expect(html_to_markdown("<#{tag}>Less than: <#{tag}>")).to eq(
"<#{tag}>Less than: <#{tag}>",
)
expect(html_to_markdown("<#{tag}>Greater than: >")).to eq(
"<#{tag}>Greater than: >#{tag}>",
)
expect(html_to_markdown("<#{tag}>Ampersand: &")).to eq(
"<#{tag}>Ampersand: &#{tag}>",
)
expect(html_to_markdown("<#{tag}>Double Quote: "#{tag}>")).to eq(
"<#{tag}>Double Quote: \"#{tag}>",
)
expect(html_to_markdown("<#{tag}>Single Quote: '#{tag}>")).to eq(
"<#{tag}>Single Quote: '#{tag}>",
)
expect(html_to_markdown("<#{tag}>Copyright Symbol: ©#{tag}>")).to eq(
"<#{tag}>Copyright Symbol: ©#{tag}>",
)
expect(html_to_markdown("<#{tag}>Euro Symbol: €#{tag}>")).to eq(
"<#{tag}>Euro Symbol: €#{tag}>",
)
end
end
end
it "supports " do
expect(html_to_markdown("This is an insertion")).to eq(
"This is an insertion",
)
end
it "supports " do
expect(html_to_markdown("This is a deletion")).to eq("This is a deletion")
end
it "supports " do
expect(html_to_markdown("H2O")).to eq("H2O")
end
it "supports " do
expect(html_to_markdown("This is highlighted!")).to eq(
"This is highlighted!",
)
end
it "supports " do
expect(html_to_markdown("Super Script!")).to eq("Super Script!")
end
it "supports " do
expect(html_to_markdown("Small")).to eq("Small")
expect(html_to_markdown("Small")).to eq(
"Small",
)
expect(html_to_markdown("Small")).to eq(
"**Small**",
)
expect(html_to_markdown("<small>")).to eq(
"**<small>**",
)
end
it "supports " do
expect(html_to_markdown("Big")).to eq("Big")
expect(html_to_markdown("<big>")).to eq("<big>")
end
it "supports " do
expect(html_to_markdown("CTRL+C")).to eq("CTRL+C")
expect(html_to_markdown("<")).to eq("<")
end
it "supports " do
expect(
html_to_markdown(%Q{CDCK}),
).to eq(%Q{CDCK})
expect(
html_to_markdown(
%Q{<abbr>},
),
).to eq(%Q{<abbr>})
end
it "supports " do
expect(html_to_markdown("Strike Through")).to eq("~~Strike Through~~")
end
it "supports " do
expect(html_to_markdown("Strike Through")).to eq("~~Strike Through~~")
end
it "supports " do
expect(html_to_markdown("Quote
")).to eq("> Quote")
end
it "supports " do
expect(html_to_markdown("- 🍏
- 🍐
- 🍌
")).to eq("- 🍏\n- 🍐\n- 🍌")
expect(html_to_markdown("\n- 🍏
\n- 🍐
\n- 🍌
\n
")).to eq(
"- 🍏\n- 🍐\n- 🍌",
)
end
it "supports " do
expect(html_to_markdown("- 🍆
- 🍅
- 🍄
")).to eq("1. 🍆\n1. 🍅\n1. 🍄")
end
it "supports inside
- " do
expect(html_to_markdown("
🍏
🍐
🍌
")).to eq(
"- 🍏\n\n- 🍐\n\n- 🍌",
)
end
it "supports inside " do
expect(html_to_markdown(<<-HTML)).to eq(
- Fruits
- 🍏
- 🍐
- 🍌
- Vegetables
- 🍆
- 🍅
- 🍄
HTML
"- Fruits\n - 🍏\n - 🍐\n - 🍌\n- Vegetables\n - 🍆\n - 🍅\n - 🍄",
)
end
it "supports bare - " do
expect(html_to_markdown("
- I'm alone
")).to eq("- I'm alone")
end
it "supports
" do
expect(html_to_markdown("var foo = 'bar';
")).to eq("```\nvar foo = 'bar';\n```")
expect(html_to_markdown("var foo = 'bar';
")).to eq(
"```\nvar foo = 'bar';\n```",
)
expect(
html_to_markdown(%Q{var foo = 'bar';
}),
).to eq("```javascript\nvar foo = 'bar';\n```")
expect(
html_to_markdown(
" function f() {\n console.log('Hello world!');\n }
",
),
).to eq("```\n function f() {\n console.log('Hello world!');\n }\n```")
html = <<~HTML
Reported-and-tested-by: A <a@example.com>
Reviewed-by: B <b@example.com>
HTML
md = <<~MD
```plaintext
Reported-and-tested-by: A
Reviewed-by: B
```
MD
expect(html_to_markdown(html)).to eq(md.strip)
end
it "supports inside " do
expect(
html_to_markdown("var foo = 'bar';
"),
).to eq("> ```\n> var foo = 'bar';\n> ```")
end
it "works" do
expect(
html_to_markdown(
"A list item with a blockquote:
This is a blockquote
inside a list item.
",
),
).to eq(
"- A list item with a blockquote:\n\n > This is a **blockquote**\n > inside a list item.",
)
end
it "supports html document" do
expect(html_to_markdown("HelloWorld")).to eq(
"Hello\nWorld",
)
end
it "handles " do
expect(html_to_markdown("
1st paragraph
2nd paragraph
")).to eq(
"1st paragraph\n\n2nd paragraph",
)
expect(
html_to_markdown(
"1st paragraph
\n 2nd paragraph\n 2nd paragraph
\n3rd paragraph
",
),
).to eq("1st paragraph\n\n2nd paragraph 2nd paragraph\n\n3rd paragraph")
end
it "handles " do
expect(html_to_markdown("1st div2nd div")).to eq("1st div\n2nd div")
end
it "swallows " do
expect(html_to_markdown("Span")).to eq("Span")
end
it "swallows " do
expect(html_to_markdown("Underline")).to eq("Underline")
end
it "swallows " do
expect(html_to_markdown("Centered ")).to eq("Centered")
end
it "removes ")).to eq("")
end
it "removes ")).to eq("")
end
it "handles and
within " do
html = "1st paragraph2nd paragraph3rd paragraph
"
expect(html_to_markdown(html)).to eq("1st paragraph\n2nd paragraph\n\n3rd paragraph")
end
it "handles and
within " do
html =
"1st paragraph
2nd paragraph3rd paragraph4th paragraph
"
expect(html_to_markdown(html)).to eq(
"1st paragraph\n2nd paragraph\n3rd paragraph\n\n4th paragraph",
)
end
context "with an oddly placed
" do
it "handles " do
expect(html_to_markdown("Hello
Bold World")).to eq(
"Hello\n**Bold** World",
)
expect(html_to_markdown("Hello Bold
World")).to eq(
"Hello **Bold**\nWorld",
)
expect(html_to_markdown("Hello Bold
text World")).to eq(
"Hello **Bold**\n**text** World",
)
end
it "handles " do
expect(html_to_markdown("Hello
Italic World")).to eq("Hello\n*Italic* World")
expect(html_to_markdown("Hello Italic
World")).to eq("Hello *Italic*\nWorld")
expect(html_to_markdown("Hello Italic
text World")).to eq(
"Hello *Italic*\n*text* World",
)
end
it "works" do
expect(html_to_markdown("A B C
D E
F G")).to eq(
"A __B *C*__\n__*D* E__\n**F** G",
)
end
end
context "with an empty tag" do
it "handles " do
expect(html_to_markdown("")).to eq("")
expect(html_to_markdown(" ")).to eq("")
expect(html_to_markdown("Some text")).to eq("Some text")
expect(html_to_markdown("Some text")).to eq("Some text")
end
it "handles " do
expect(html_to_markdown("")).to eq("")
expect(html_to_markdown(" ")).to eq("")
expect(html_to_markdown("Some text")).to eq("Some text")
expect(html_to_markdown("Some text")).to eq("Some text")
end
end
context "with spaces around text" do
it "handles " do
expect(html_to_markdown(" Bold")).to eq("**Bold**")
expect(html_to_markdown(" Bold")).to eq("**Bold**")
expect(html_to_markdown("Bold ")).to eq("**Bold**")
expect(html_to_markdown("Bold ")).to eq("**Bold**")
expect(html_to_markdown("Some bold text")).to eq("Some **bold** text")
expect(html_to_markdown("Some bold text")).to eq("Some **bold** text")
expect(html_to_markdown("Some bold text")).to eq("Some **bold** text")
expect(html_to_markdown("Some bold text")).to eq("Some **bold** text")
end
it "handles " do
expect(html_to_markdown(" Italic")).to eq("*Italic*")
expect(html_to_markdown(" Italic")).to eq("*Italic*")
expect(html_to_markdown("Italic ")).to eq("*Italic*")
expect(html_to_markdown("Italic ")).to eq("*Italic*")
expect(html_to_markdown("Some italic text")).to eq("Some *italic* text")
expect(html_to_markdown("Some italic text")).to eq("Some *italic* text")
expect(html_to_markdown("Some italic text")).to eq("Some *italic* text")
expect(html_to_markdown("Some italic text")).to eq("Some *italic* text")
end
end
it "supports " do
html = <<~HTML
This
is
the
headers
I am
the
first
row
And this
is the
2nd
line
This
is
the
footer
HTML
markdown = <<~MD
| This | is | the | *headers* |
| - | - | - | - |
| I am | the | **first** | row |
| And this | is the | 2nd | line |
| This | is | the | footer |
MD
expect(html_to_markdown(html)).to eq(markdown.strip)
expect(html_to_markdown("Hello World
")).to eq(
"| Hello | World |\n| - | - |",
)
end
it "keeps HTML for badly formatted " do
html = <<~HTML
1
2
3
4
<One>
Two
Three
HTML
markdown = <<~MD
1
2
3
4
<One>
**Two**
Three
MD
expect(html_to_markdown(html)).to eq(markdown.strip)
end
it "keeps HTML for with colspan" do
html = <<~HTML
1
2
One / Two
HTML
markdown = <<~MD
1
2
One / Two
MD
expect(html_to_markdown(html)).to eq(markdown.strip)
end
it "keeps HTML for with rowspan" do
html = <<~HTML
1
2
A
B
C
HTML
markdown = <<~MD
1
2
A
B
C
MD
expect(html_to_markdown(html)).to eq(markdown.strip)
end
end