# frozen_string_literal: true require 'rails_helper' require 'html_to_markdown' describe HtmlToMarkdown do def html_to_markdown(html, opts = {}) HtmlToMarkdown.new(html, opts).to_markdown end it "remove whitespaces" do html = <<-HTML
Hello,

    This is the 1st paragraph.   

        This is another paragraph
HTML expect(html_to_markdown(html)).to eq("Hello,\n\nThis is the 1st paragraph.\n\nThis is another paragraph") html = <<~HTML

Let me see if it happens by answering your message through Thunderbird.

Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1

HTML markdown = <<~MD Let me see if it happens by answering your message through Thunderbird. Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 MD expect(html_to_markdown(html)).to eq(markdown.strip) html = <<~HTML

This post has lots
of space

    This     space    was   left untouched     !
HTML markdown = <<~MD This post has lots of space ``` This space was left untouched ! ``` MD expect(html_to_markdown(html)).to eq(markdown.strip) end it "doesn't error on non-inline elements like (aside, section)" do html = <<~HTML HTML markdown = <<~MD > hello. MD expect(html_to_markdown(html)).to eq(markdown.strip) end it "skips hidden tags" do expect(html_to_markdown(%Q{

Hello World!

})).to eq("Hello World!") end it "converts " do expect(html_to_markdown("Strong")).to eq("**Strong**") expect(html_to_markdown("Str*ng")).to eq("__Str*ng__") end it "converts " do expect(html_to_markdown("Bold")).to eq("**Bold**") expect(html_to_markdown("B*ld")).to eq("__B*ld__") html = <<~HTML Before

Bold

After HTML expect(html_to_markdown(html)).to eq("Before\n\n**Bold**\n\nAfter") end it "converts " do expect(html_to_markdown("Emphasis")).to eq("*Emphasis*") expect(html_to_markdown("Emph*sis")).to eq("_Emph*sis_") end it "converts " do expect(html_to_markdown("Italic")).to eq("*Italic*") expect(html_to_markdown("It*lic")).to eq("_It*lic_") end it "converts " do expect(html_to_markdown(%Q{Discourse})).to eq("[Discourse](https://www.discourse.org)") end it "supports SiteSetting.allowed_href_schemes" do SiteSetting.allowed_href_schemes = "tel|steam" expect(html_to_markdown(%Q{LIMBO})).to eq("[LIMBO](steam://store/48000)") end it "removes empty & invalid " do expect(html_to_markdown(%Q{Discourse})).to eq("Discourse") expect(html_to_markdown(%Q{Discourse})).to eq("Discourse") expect(html_to_markdown(%Q{Discourse})).to eq("Discourse") end HTML_WITH_IMG ||= %Q{Discourse Logo} HTML_WITH_CID_IMG ||= %Q{} it "converts " do expect(html_to_markdown(HTML_WITH_IMG)).to eq("![Discourse Logo](https://www.discourse.org/logo.svg)") end it "keeps with 'keep_img_tags'" do expect(html_to_markdown(HTML_WITH_IMG, keep_img_tags: true)).to eq(HTML_WITH_IMG) end it "removes empty & invalid " do expect(html_to_markdown(%Q{})).to eq("") expect(html_to_markdown(%Q{})).to eq("") expect(html_to_markdown(%Q{})).to eq("") end it "keeps with src='cid:' with 'keep_cid_imgs'" do expect(html_to_markdown(HTML_WITH_CID_IMG, keep_cid_imgs: true)).to eq(HTML_WITH_CID_IMG) end it "skips hidden " do expect(html_to_markdown(%Q{})).to eq("") expect(html_to_markdown(%Q{})).to eq("") end it "supports width/height on " do expect(html_to_markdown(%Q{})).to eq("![](https://www.discourse.org/logo.svg)") expect(html_to_markdown(%Q{})).to eq("![](https://www.discourse.org/logo.svg)") expect(html_to_markdown(%Q{})).to eq("![|200x100](https://www.discourse.org/logo.svg)") end (1..6).each do |n| it "converts " do expect(html_to_markdown("Header #{n}")).to eq("#" * n + " Header #{n}") end end it "converts
" do expect(html_to_markdown("Before
Inside
After")).to eq("Before\nInside\nAfter") end it "skips
inside

if next character is \n" do expect(html_to_markdown("

Before
\nInside
After

")).to eq("Before\nInside\nAfter") end it "converts
" do expect(html_to_markdown("Before
Inside
After")).to eq("Before\n\n---\n\nInside\n\n---\n\nAfter") end it "converts " do expect(html_to_markdown("Teletype")).to eq("`Teletype`") end it "converts " do expect(html_to_markdown("Code")).to eq("`Code`") end it "supports " do expect(html_to_markdown("This is an insertion")).to eq("This is an insertion") end it "supports " do expect(html_to_markdown("This is a deletion")).to eq("This is a deletion") end it "supports " do expect(html_to_markdown("H2O")).to eq("H2O") end it "supports " do expect(html_to_markdown("This is highlighted!")).to eq("This is highlighted!") end it "supports " do expect(html_to_markdown("Super Script!")).to eq("Super Script!") end it "supports " do expect(html_to_markdown("Small")).to eq("Small") end it "supports " do expect(html_to_markdown("CTRL+C")).to eq("CTRL+C") end it "supports " do expect(html_to_markdown(%Q{CDCK})).to eq(%Q{CDCK}) end it "supports " do expect(html_to_markdown("Strike Through")).to eq("~~Strike Through~~") end it "supports " do expect(html_to_markdown("Strike Through")).to eq("~~Strike Through~~") end it "supports
" do expect(html_to_markdown("
Quote
")).to eq("> Quote") end it "supports
    " do expect(html_to_markdown("
    • 🍏
    • 🍐
    • 🍌
    ")).to eq("- 🍏\n- 🍐\n- 🍌") expect(html_to_markdown("
      \n
    • 🍏
    • \n
    • 🍐
    • \n
    • 🍌
    • \n
    ")).to eq("- 🍏\n- 🍐\n- 🍌") end it "supports
      " do expect(html_to_markdown("
      1. 🍆
      2. 🍅
      3. 🍄
      ")).to eq("1. 🍆\n1. 🍅\n1. 🍄") end it "supports

      inside

    1. " do expect(html_to_markdown("
      • 🍏

      • 🍐

      • 🍌

      ")).to eq("- 🍏\n\n- 🍐\n\n- 🍌") end it "supports
        inside
          " do expect(html_to_markdown(<<-HTML
          • Fruits
            • 🍏
            • 🍐
            • 🍌
          • Vegetables
            • 🍆
            • 🍅
            • 🍄
          HTML )).to eq("- Fruits\n - 🍏\n - 🍐\n - 🍌\n- Vegetables\n - 🍆\n - 🍅\n - 🍄") end it "supports bare
        • " do expect(html_to_markdown("
        • I'm alone
        • ")).to eq("- I'm alone") end it "supports
          " do
              expect(html_to_markdown("
          var foo = 'bar';
          ")).to eq("```\nvar foo = 'bar';\n```") expect(html_to_markdown("
          var foo = 'bar';
          ")).to eq("```\nvar foo = 'bar';\n```") expect(html_to_markdown(%Q{
          var foo = 'bar';
          })).to eq("```javascript\nvar foo = 'bar';\n```") expect(html_to_markdown("
              function f() {\n        console.log('Hello world!');\n    }
          ")).to eq("```\n function f() {\n console.log('Hello world!');\n }\n```") end it "supports
           inside 
          " do expect(html_to_markdown("
          var foo = 'bar';
          ")).to eq("> ```\n> var foo = 'bar';\n> ```") end it "works" do expect(html_to_markdown("
          • A list item with a blockquote:

            This is a blockquote
            inside a list item.

          ")).to eq("- A list item with a blockquote:\n\n > This is a **blockquote**\n > inside a list item.") end it "supports html document" do expect(html_to_markdown("Hello
          World
          ")).to eq("Hello\nWorld") end it "handles

          " do expect(html_to_markdown("

          1st paragraph

          2nd paragraph

          ")).to eq("1st paragraph\n\n2nd paragraph") expect(html_to_markdown("

          1st paragraph

          \n

          2nd paragraph\n 2nd paragraph

          \n

          3rd paragraph

          ")).to eq("1st paragraph\n\n2nd paragraph 2nd paragraph\n\n3rd paragraph") end it "handles
          " do expect(html_to_markdown("
          1st div
          2nd div
          ")).to eq("1st div\n2nd div") end it "swallows " do expect(html_to_markdown("Span")).to eq("Span") end it "swallows " do expect(html_to_markdown("Underline")).to eq("Underline") end it "removes ")).to eq("") end it "removes ")).to eq("") end it "handles

          and

          within " do html = "
          1st paragraph
          2nd paragraph

          3rd paragraph

          " expect(html_to_markdown(html)).to eq("1st paragraph\n2nd paragraph\n\n3rd paragraph") end it "handles

          and

          within " do html = "1st paragraph
          2nd paragraph
          3rd paragraph

          4th paragraph

          " expect(html_to_markdown(html)).to eq("1st paragraph\n2nd paragraph\n3rd paragraph\n\n4th paragraph") end context "with an oddly placed
          " do it "handles " do expect(html_to_markdown("Hello
          Bold
          World")).to eq("Hello\n**Bold** World") expect(html_to_markdown("Hello Bold
          World")).to eq("Hello **Bold**\nWorld") expect(html_to_markdown("Hello Bold
          text
          World")).to eq("Hello **Bold**\n**text** World") end it "handles " do expect(html_to_markdown("Hello
          Italic
          World")).to eq("Hello\n*Italic* World") expect(html_to_markdown("Hello Italic
          World")).to eq("Hello *Italic*\nWorld") expect(html_to_markdown("Hello Italic
          text
          World")).to eq("Hello *Italic*\n*text* World") end it "works" do expect(html_to_markdown("
          A B C
          D
          E
          F
          G
          ")).to eq("A __B *C*__\n__*D* E__\n**F** G") end end context "with an empty tag" do it "handles " do expect(html_to_markdown("")).to eq("") expect(html_to_markdown(" ")).to eq("") expect(html_to_markdown("Some text")).to eq("Some text") expect(html_to_markdown("Some text")).to eq("Some text") end it "handles " do expect(html_to_markdown("")).to eq("") expect(html_to_markdown(" ")).to eq("") expect(html_to_markdown("Some text")).to eq("Some text") expect(html_to_markdown("Some text")).to eq("Some text") end end context "with spaces around text" do it "handles " do expect(html_to_markdown(" Bold")).to eq("**Bold**") expect(html_to_markdown(" Bold")).to eq("**Bold**") expect(html_to_markdown("Bold ")).to eq("**Bold**") expect(html_to_markdown("Bold ")).to eq("**Bold**") expect(html_to_markdown("Some bold text")).to eq("Some **bold** text") expect(html_to_markdown("Some bold text")).to eq("Some **bold** text") expect(html_to_markdown("Some bold text")).to eq("Some **bold** text") expect(html_to_markdown("Some bold text")).to eq("Some **bold** text") end it "handles " do expect(html_to_markdown(" Italic")).to eq("*Italic*") expect(html_to_markdown(" Italic")).to eq("*Italic*") expect(html_to_markdown("Italic ")).to eq("*Italic*") expect(html_to_markdown("Italic ")).to eq("*Italic*") expect(html_to_markdown("Some italic text")).to eq("Some *italic* text") expect(html_to_markdown("Some italic text")).to eq("Some *italic* text") expect(html_to_markdown("Some italic text")).to eq("Some *italic* text") expect(html_to_markdown("Some italic text")).to eq("Some *italic* text") end end it "supports " do html = <<~HTML
          This is the headers
          I am the first row
          And this is the 2nd line
          HTML markdown = <<~MD | This | is | the | *headers* | | - | - | - | - | | I am | the | **first** | row | | And this | is the | 2nd | line | MD expect(html_to_markdown(html)).to eq(markdown.strip) expect(html_to_markdown("
          HelloWorld
          ")).to eq("| Hello | World |\n| - | - |") end it "doesn't swallow badly formatted " do html = <<~HTML
          1 2 3 4
          One Two Three
          HTML expect(html_to_markdown(html)).to eq("1 2 3 4 \nOne Two Three") end end