diff --git a/app/assets/stylesheets/common/base/topic-post.scss b/app/assets/stylesheets/common/base/topic-post.scss index efb68f7c946..60e6621e50b 100644 --- a/app/assets/stylesheets/common/base/topic-post.scss +++ b/app/assets/stylesheets/common/base/topic-post.scss @@ -827,6 +827,14 @@ pre { background: var(--blend-primary-secondary-5); max-height: 500px; } + + .bidi-warning, + .bidi-warning span { + font-style: normal; + background-color: var(--highlight); + color: var(--danger); + cursor: help; + } } .copy-codeblocks { diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml index 9cd23f9b6d6..cfc7881fdea 100644 --- a/config/locales/server.en.yml +++ b/config/locales/server.en.yml @@ -717,6 +717,7 @@ en: post: image_placeholder: broken: "This image is broken" + hidden_bidi_character: "Bidirectional characters can change the order that text is rendered. This could be used to obscure malicious code." has_likes: one: "%{count} Like" other: "%{count} Likes" diff --git a/lib/pretty_text.rb b/lib/pretty_text.rb index a298ce4f80a..fd0b26e8ba0 100644 --- a/lib/pretty_text.rb +++ b/lib/pretty_text.rb @@ -5,6 +5,19 @@ require 'nokogiri' require 'erb' module PrettyText + DANGEROUS_BIDI_CHARACTERS = [ + "\u202A", + "\u202B", + "\u202C", + "\u202D", + "\u202E", + "\u2066", + "\u2067", + "\u2068", + "\u2069", + ].freeze + DANGEROUS_BIDI_REGEXP = Regexp.new(DANGEROUS_BIDI_CHARACTERS.join("|")).freeze + @mutex = Mutex.new @ctx_init = Mutex.new @@ -278,6 +291,7 @@ module PrettyText add_nofollow = !options[:omit_nofollow] && SiteSetting.add_rel_nofollow_to_user_content add_rel_attributes_to_user_content(doc, add_nofollow) + strip_hidden_unicode_bidirectional_characters(doc) if SiteSetting.enable_mentions add_mentions(doc, user_id: opts[:user_id]) @@ -290,6 +304,24 @@ module PrettyText loofah_fragment.scrub!(scrubber).to_html end + def self.strip_hidden_unicode_bidirectional_characters(doc) + return if !DANGEROUS_BIDI_REGEXP.match?(doc.content) + + doc.css("code,pre").each do |code_tag| + next if !DANGEROUS_BIDI_REGEXP.match?(code_tag.content) + + DANGEROUS_BIDI_CHARACTERS.each do |bidi| + next if !code_tag.content.include?(bidi) + + formatted = "<U+#{bidi.ord.to_s(16).upcase}>" + code_tag.inner_html = code_tag.inner_html.gsub( + bidi, + "<span class=\"bidi-warning\" title=\"#{I18n.t("post.hidden_bidi_character")}\">#{formatted}</span>" + ) + end + end + end + def self.add_rel_attributes_to_user_content(doc, add_nofollow) allowlist = [] diff --git a/spec/components/pretty_text_spec.rb b/spec/components/pretty_text_spec.rb index d4b7267291f..1007fb97b5f 100644 --- a/spec/components/pretty_text_spec.rb +++ b/spec/components/pretty_text_spec.rb @@ -552,6 +552,95 @@ describe PrettyText do Discourse.redis.flushdb end end + + it "strips out unicode bidirectional (bidi) override characters and replaces with a highlighted span" do + code = <<~MD + X + ```js + var isAdmin = false; + /* begin admin only */ if (isAdmin) { + console.log("You are an admin."); + /* end admins only */ } + ``` + MD + cooked = PrettyText.cook(code) + hidden_bidi_title = I18n.t("post.hidden_bidi_character") + + html = <<~HTML + <p>X</p> + <pre><code class="lang-auto">var isAdmin = false; + /*<span class="bidi-warning" title="#{hidden_bidi_title}"><U+202E></span> begin admin only */<span class="bidi-warning" title="#{hidden_bidi_title}"><U+2066></span> if (isAdmin) <span class="bidi-warning" title="#{hidden_bidi_title}"><U+2069></span> <span class="bidi-warning" title="#{hidden_bidi_title}"><U+2066></span> { + console.log("You are an admin."); + /* end admins only <span class="bidi-warning" title="#{hidden_bidi_title}"><U+202E></span>*/<span class="bidi-warning" title="#{hidden_bidi_title}"><U+2066></span> } + </code></pre> + HTML + + expect(cooked).to eq(html.strip) + end + + it "fuzzes all possible dangerous unicode bidirectional (bidi) override characters, making sure they are replaced" do + bad_bidi = [ + "\u202A", + "\u202B", + "\u202C", + "\u202D", + "\u202E", + "\u2066", + "\u2067", + "\u2068", + "\u2069", + ] + bad_bidi.each do |bidi| + code = <<~MD + ``` + #{bidi} + ``` + MD + cooked = PrettyText.cook(code) + formatted_bidi = format("<U+%04X>", bidi.ord) + html = <<~HTML + <pre><code class="lang-auto"><span class="bidi-warning" title="#{I18n.t("post.hidden_bidi_character")}">#{formatted_bidi}</span> + </code></pre> + HTML + expect(cooked).to eq(html.strip) + end + end + + it "fuzzes all possible dangerous unicode bidirectional (bidi) override characters in solo code and pre nodes, making sure they are replaced" do + bad_bidi = [ + "\u202A", + "\u202B", + "\u202C", + "\u202D", + "\u202E", + "\u2066", + "\u2067", + "\u2068", + "\u2069", + ] + bad_bidi.each do |bidi| + code = <<~MD + <code>#{bidi}</code> + MD + cooked = PrettyText.cook(code) + formatted_bidi = format("<U+%04X>", bidi.ord) + html = <<~HTML + <p><code><span class="bidi-warning" title="#{I18n.t("post.hidden_bidi_character")}">#{formatted_bidi}</span></code></p> + HTML + expect(cooked).to eq(html.strip) + end + bad_bidi.each do |bidi| + code = <<~MD + <pre>#{bidi}</pre> + MD + cooked = PrettyText.cook(code) + formatted_bidi = format("<U+%04X>", bidi.ord) + html = <<~HTML + <pre><span class="bidi-warning" title="#{I18n.t("post.hidden_bidi_character")}">#{formatted_bidi}</span></pre> + HTML + expect(cooked).to eq(html.strip) + end + end end describe "rel attributes" do