mirror of
https://github.com/discourse/discourse.git
synced 2024-11-26 21:16:18 +08:00
FIX: Extract div
tags within span
s
This commit is contained in:
parent
c2829dce22
commit
b57b635d30
|
@ -8,10 +8,22 @@ class HtmlToMarkdown
|
||||||
|
|
||||||
def initialize(html, opts={})
|
def initialize(html, opts={})
|
||||||
@opts = opts || {}
|
@opts = opts || {}
|
||||||
@doc = Nokogiri::HTML(html)
|
@doc = fix_span_elements(Nokogiri::HTML(html))
|
||||||
|
|
||||||
remove_whitespaces!
|
remove_whitespaces!
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
# If a `<div>` is within a `<span>` that's invalid, so let's hoist the `<div>` up
|
||||||
|
def fix_span_elements(node)
|
||||||
|
if node.name == 'span' && node.at('div')
|
||||||
|
node.swap(node.children)
|
||||||
|
end
|
||||||
|
|
||||||
|
node.children.each {|c| fix_span_elements(c)}
|
||||||
|
node
|
||||||
|
end
|
||||||
|
|
||||||
def remove_whitespaces!
|
def remove_whitespaces!
|
||||||
@doc.traverse do |node|
|
@doc.traverse do |node|
|
||||||
if node.is_a? Nokogiri::XML::Text
|
if node.is_a? Nokogiri::XML::Text
|
||||||
|
@ -33,7 +45,7 @@ class HtmlToMarkdown
|
||||||
end
|
end
|
||||||
|
|
||||||
def traverse(node)
|
def traverse(node)
|
||||||
node.children.each { |node| visit(node) }
|
node.children.each { |n| visit(n) }
|
||||||
end
|
end
|
||||||
|
|
||||||
def visit(node)
|
def visit(node)
|
||||||
|
@ -197,6 +209,7 @@ class HtmlToMarkdown
|
||||||
end
|
end
|
||||||
|
|
||||||
def format_block
|
def format_block
|
||||||
|
|
||||||
lines = @stack[-1].markdown.each_line.map do |line|
|
lines = @stack[-1].markdown.each_line.map do |line|
|
||||||
prefix = @stack.map { |b| b.opened ? b.body : b.head }.join
|
prefix = @stack.map { |b| b.opened ? b.body : b.head }.join
|
||||||
@stack.each { |b| b.opened = true }
|
@stack.each { |b| b.opened = true }
|
||||||
|
|
|
@ -219,4 +219,9 @@ describe HtmlToMarkdown do
|
||||||
expect(html_to_markdown("<style>* { margin: 0 }</style>")).to eq("")
|
expect(html_to_markdown("<style>* { margin: 0 }</style>")).to eq("")
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it "handles divs within spans" do
|
||||||
|
html = "<div>1st paragraph<span><div>2nd paragraph</div></span></div>"
|
||||||
|
expect(html_to_markdown(html)).to eq("1st paragraph\n2nd paragraph")
|
||||||
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in New Issue
Block a user