SECURITY: Strip unrendered unicode bidirectional chars in code blocks (#15032)

When rendering the markdown code blocks we replace the
offending characters in the output string with spans highlighting a textual
representation of the character, along with a title attribute with
information about why the character was highlighted.

The list of characters stripped by this fix, which are the bidirectional
characters considered relevant, are:

U+202A
U+202B
U+202C
U+202D
U+202E
U+2066
U+2067
U+2068
U+2069
This commit is contained in:
Martin Brennan 2021-11-22 10:43:03 +10:00 committed by Martin Brennan
parent 626a0e207e
commit 715d4de981
No known key found for this signature in database
GPG Key ID: A08063EEF3EA26A4
4 changed files with 130 additions and 0 deletions

View File

@ -736,6 +736,14 @@ pre {
background: var(--blend-primary-secondary-5);
max-height: 500px;
}
.bidi-warning,
.bidi-warning span {
font-style: normal;
background-color: var(--highlight);
color: var(--danger);
cursor: help;
}
}
.copy-codeblocks {

View File

@ -729,6 +729,7 @@ en:
post:
image_placeholder:
broken: "This image is broken"
hidden_bidi_character: "Bidirectional characters can change the order that text is rendered. This could be used to obscure malicious code."
has_likes:
one: "%{count} Like"
other: "%{count} Likes"

View File

@ -5,6 +5,19 @@ require 'nokogiri'
require 'erb'
module PrettyText
DANGEROUS_BIDI_CHARACTERS = [
"\u202A",
"\u202B",
"\u202C",
"\u202D",
"\u202E",
"\u2066",
"\u2067",
"\u2068",
"\u2069",
].freeze
DANGEROUS_BIDI_REGEXP = Regexp.new(DANGEROUS_BIDI_CHARACTERS.join("|")).freeze
@mutex = Mutex.new
@ctx_init = Mutex.new
@ -277,6 +290,7 @@ module PrettyText
add_nofollow = !options[:omit_nofollow] && SiteSetting.add_rel_nofollow_to_user_content
add_rel_attributes_to_user_content(doc, add_nofollow)
strip_hidden_unicode_bidirectional_characters(doc)
if SiteSetting.enable_mentions
add_mentions(doc, user_id: opts[:user_id])
@ -289,6 +303,24 @@ module PrettyText
loofah_fragment.scrub!(scrubber).to_html
end
def self.strip_hidden_unicode_bidirectional_characters(doc)
return if !DANGEROUS_BIDI_REGEXP.match?(doc.content)
doc.css("code,pre").each do |code_tag|
next if !DANGEROUS_BIDI_REGEXP.match?(code_tag.content)
DANGEROUS_BIDI_CHARACTERS.each do |bidi|
next if !code_tag.content.include?(bidi)
formatted = "<U+#{bidi.ord.to_s(16).upcase}>"
code_tag.inner_html = code_tag.inner_html.gsub(
bidi,
"<span class=\"bidi-warning\" title=\"#{I18n.t("post.hidden_bidi_character")}\">#{formatted}</span>"
)
end
end
end
def self.add_rel_attributes_to_user_content(doc, add_nofollow)
allowlist = []

View File

@ -552,6 +552,95 @@ describe PrettyText do
Discourse.redis.flushdb
end
end
it "strips out unicode bidirectional (bidi) override characters and replaces with a highlighted span" do
code = <<~MD
X
```js
var isAdmin = false;
/* begin admin only */ if (isAdmin) {
console.log("You are an admin.");
/* end admins only */ }
```
MD
cooked = PrettyText.cook(code)
hidden_bidi_title = I18n.t("post.hidden_bidi_character")
html = <<~HTML
<p>X</p>
<pre><code class="lang-auto">var isAdmin = false;
/*<span class="bidi-warning" title="#{hidden_bidi_title}">&lt;U+202E&gt;</span> begin admin only */<span class="bidi-warning" title="#{hidden_bidi_title}">&lt;U+2066&gt;</span> if (isAdmin) <span class="bidi-warning" title="#{hidden_bidi_title}">&lt;U+2069&gt;</span> <span class="bidi-warning" title="#{hidden_bidi_title}">&lt;U+2066&gt;</span> {
console.log("You are an admin.");
/* end admins only <span class="bidi-warning" title="#{hidden_bidi_title}">&lt;U+202E&gt;</span>*/<span class="bidi-warning" title="#{hidden_bidi_title}">&lt;U+2066&gt;</span> }
</code></pre>
HTML
expect(cooked).to eq(html.strip)
end
it "fuzzes all possible dangerous unicode bidirectional (bidi) override characters, making sure they are replaced" do
bad_bidi = [
"\u202A",
"\u202B",
"\u202C",
"\u202D",
"\u202E",
"\u2066",
"\u2067",
"\u2068",
"\u2069",
]
bad_bidi.each do |bidi|
code = <<~MD
```
#{bidi}
```
MD
cooked = PrettyText.cook(code)
formatted_bidi = format("&lt;U+%04X&gt;", bidi.ord)
html = <<~HTML
<pre><code class="lang-auto"><span class="bidi-warning" title="#{I18n.t("post.hidden_bidi_character")}">#{formatted_bidi}</span>
</code></pre>
HTML
expect(cooked).to eq(html.strip)
end
end
it "fuzzes all possible dangerous unicode bidirectional (bidi) override characters in solo code and pre nodes, making sure they are replaced" do
bad_bidi = [
"\u202A",
"\u202B",
"\u202C",
"\u202D",
"\u202E",
"\u2066",
"\u2067",
"\u2068",
"\u2069",
]
bad_bidi.each do |bidi|
code = <<~MD
<code>#{bidi}</code>
MD
cooked = PrettyText.cook(code)
formatted_bidi = format("&lt;U+%04X&gt;", bidi.ord)
html = <<~HTML
<p><code><span class="bidi-warning" title="#{I18n.t("post.hidden_bidi_character")}">#{formatted_bidi}</span></code></p>
HTML
expect(cooked).to eq(html.strip)
end
bad_bidi.each do |bidi|
code = <<~MD
<pre>#{bidi}</pre>
MD
cooked = PrettyText.cook(code)
formatted_bidi = format("&lt;U+%04X&gt;", bidi.ord)
html = <<~HTML
<pre><span class="bidi-warning" title="#{I18n.t("post.hidden_bidi_character")}">#{formatted_bidi}</span></pre>
HTML
expect(cooked).to eq(html.strip)
end
end
end
describe "rel attributes" do