mirror of
https://github.com/discourse/discourse.git
synced 2024-11-30 19:36:22 +08:00
7328a2bfb0
Censored watched words were not censored inside the title of an inline
oneboxes. Malicious users could exploit this behaviour to insert bad
words. The same issue has been fixed for regular Oneboxes in commit
d184fe59ca
.
187 lines
4.6 KiB
Ruby
187 lines
4.6 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
class WordWatcher
|
|
REPLACEMENT_LETTER ||= CGI.unescape_html("■")
|
|
|
|
def initialize(raw)
|
|
@raw = raw
|
|
end
|
|
|
|
@cache_enabled = true
|
|
|
|
def self.disable_cache
|
|
@cache_enabled = false
|
|
end
|
|
|
|
def self.cache_enabled?
|
|
@cache_enabled
|
|
end
|
|
|
|
def self.words_for_action(action)
|
|
words = WatchedWord
|
|
.where(action: WatchedWord.actions[action.to_sym])
|
|
.limit(WatchedWord::MAX_WORDS_PER_ACTION)
|
|
|
|
if WatchedWord.has_replacement?(action.to_sym)
|
|
words.pluck(:word, :replacement).to_h
|
|
else
|
|
words.pluck(:word)
|
|
end
|
|
end
|
|
|
|
def self.words_for_action_exists?(action)
|
|
WatchedWord.where(action: WatchedWord.actions[action.to_sym]).exists?
|
|
end
|
|
|
|
def self.get_cached_words(action)
|
|
if cache_enabled?
|
|
Discourse.cache.fetch(word_matcher_regexp_key(action), expires_in: 1.day) do
|
|
words_for_action(action).presence
|
|
end
|
|
else
|
|
words_for_action(action).presence
|
|
end
|
|
end
|
|
|
|
# This regexp is run in miniracer, and the client JS app
|
|
# Make sure it is compatible with major browsers when changing
|
|
# hint: non-chrome browsers do not support 'lookbehind'
|
|
def self.word_matcher_regexp(action, raise_errors: false)
|
|
words = get_cached_words(action)
|
|
if words
|
|
if WatchedWord.has_replacement?(action.to_sym)
|
|
words = words.keys
|
|
end
|
|
words = words.map do |w|
|
|
word = word_to_regexp(w)
|
|
word = "(#{word})" if SiteSetting.watched_words_regular_expressions?
|
|
word
|
|
end
|
|
regexp = words.join('|')
|
|
if !SiteSetting.watched_words_regular_expressions?
|
|
regexp = "(#{regexp})"
|
|
regexp = "(?:\\W|^)#{regexp}(?=\\W|$)"
|
|
end
|
|
Regexp.new(regexp, Regexp::IGNORECASE)
|
|
end
|
|
rescue RegexpError
|
|
raise if raise_errors
|
|
nil # Admin will be alerted via admin_dashboard_data.rb
|
|
end
|
|
|
|
def self.word_matcher_regexps(action)
|
|
if words = get_cached_words(action)
|
|
words.map { |w, r| [word_to_regexp(w, whole: true), r] }.to_h
|
|
end
|
|
end
|
|
|
|
def self.word_to_regexp(word, whole: false)
|
|
if SiteSetting.watched_words_regular_expressions?
|
|
# Strip ruby regexp format if present, we're going to make the whole thing
|
|
# case insensitive anyway
|
|
regexp = word.start_with?("(?-mix:") ? word[7..-2] : word
|
|
regexp = "(#{regexp})" if whole
|
|
return regexp
|
|
end
|
|
|
|
regexp = Regexp.escape(word).gsub("\\*", '\S*')
|
|
|
|
if whole && !SiteSetting.watched_words_regular_expressions?
|
|
regexp = "(?:\\W|^)(#{regexp})(?=\\W|$)"
|
|
end
|
|
|
|
regexp
|
|
end
|
|
|
|
def self.word_matcher_regexp_key(action)
|
|
"watched-words-list:#{action}"
|
|
end
|
|
|
|
def self.censor(html)
|
|
regexp = WordWatcher.word_matcher_regexp(:censor)
|
|
return html if regexp.blank?
|
|
|
|
doc = Nokogiri::HTML5::fragment(html)
|
|
doc.traverse do |node|
|
|
node.content = censor_text_with_regexp(node.content, regexp) if node.text?
|
|
end
|
|
doc.to_s
|
|
end
|
|
|
|
def self.censor_text(text)
|
|
regexp = WordWatcher.word_matcher_regexp(:censor)
|
|
return text if regexp.blank?
|
|
|
|
censor_text_with_regexp(text, regexp)
|
|
end
|
|
|
|
def self.clear_cache!
|
|
WatchedWord.actions.each do |a, i|
|
|
Discourse.cache.delete word_matcher_regexp_key(a)
|
|
end
|
|
end
|
|
|
|
def requires_approval?
|
|
word_matches_for_action?(:require_approval)
|
|
end
|
|
|
|
def should_flag?
|
|
word_matches_for_action?(:flag)
|
|
end
|
|
|
|
def should_block?
|
|
word_matches_for_action?(:block, all_matches: true)
|
|
end
|
|
|
|
def should_silence?
|
|
word_matches_for_action?(:silence)
|
|
end
|
|
|
|
def word_matches_for_action?(action, all_matches: false)
|
|
regexp = self.class.word_matcher_regexp(action)
|
|
if regexp
|
|
match = regexp.match(@raw)
|
|
return match if !all_matches || !match
|
|
|
|
if SiteSetting.watched_words_regular_expressions?
|
|
set = Set.new
|
|
@raw.scan(regexp).each do |m|
|
|
if Array === m
|
|
set.add(m.find(&:present?))
|
|
elsif String === m
|
|
set.add(m)
|
|
end
|
|
end
|
|
matches = set.to_a
|
|
else
|
|
matches = @raw.scan(regexp)
|
|
matches.flatten!
|
|
matches.uniq!
|
|
end
|
|
matches.compact!
|
|
matches.sort!
|
|
matches
|
|
else
|
|
false
|
|
end
|
|
end
|
|
|
|
def word_matches?(word)
|
|
Regexp.new(WordWatcher.word_to_regexp(word, whole: true), Regexp::IGNORECASE).match?(@raw)
|
|
end
|
|
|
|
private
|
|
|
|
def self.censor_text_with_regexp(text, regexp)
|
|
text.gsub(regexp) do |match|
|
|
# the regex captures leading whitespaces
|
|
padding = match.size - match.lstrip.size
|
|
if padding > 0
|
|
match[0..padding - 1] + REPLACEMENT_LETTER * (match.size - padding)
|
|
else
|
|
REPLACEMENT_LETTER * match.size
|
|
end
|
|
end
|
|
end
|
|
end
|