discourse/app/services/word_watcher.rb
Bianca Nenciu 5f13ca5e54
FIX: Don't cook user fields to apply watched words (#17590)
The previous method for reused the PrettyText logic which applied the
watched word logic, but had the unwanted effect of cooking the text too.
This meant that regular text values were converted to HTML.

Follow up to commit 5a4c35f627.
2022-07-26 18:15:42 +03:00

200 lines
5.0 KiB
Ruby

# frozen_string_literal: true
class WordWatcher
REPLACEMENT_LETTER ||= CGI.unescape_html("■")
def initialize(raw)
@raw = raw
end
@cache_enabled = true
def self.disable_cache
@cache_enabled = false
end
def self.cache_enabled?
@cache_enabled
end
def self.words_for_action(action)
words = WatchedWord
.where(action: WatchedWord.actions[action.to_sym])
.limit(WatchedWord::MAX_WORDS_PER_ACTION)
.order(:id)
if WatchedWord.has_replacement?(action.to_sym)
words.pluck(:word, :replacement).to_h
else
words.pluck(:word)
end
end
def self.words_for_action_exists?(action)
WatchedWord.where(action: WatchedWord.actions[action.to_sym]).exists?
end
def self.get_cached_words(action)
if cache_enabled?
Discourse.cache.fetch(word_matcher_regexp_key(action), expires_in: 1.day) do
words_for_action(action).presence
end
else
words_for_action(action).presence
end
end
# This regexp is run in miniracer, and the client JS app
# Make sure it is compatible with major browsers when changing
# hint: non-chrome browsers do not support 'lookbehind'
def self.word_matcher_regexp(action, raise_errors: false)
words = get_cached_words(action)
if words
if WatchedWord.has_replacement?(action.to_sym)
words = words.keys
end
words = words.map do |w|
word = word_to_regexp(w)
word = "(#{word})" if SiteSetting.watched_words_regular_expressions?
word
end
regexp = words.join('|')
if !SiteSetting.watched_words_regular_expressions?
regexp = "(#{regexp})"
regexp = "(?:\\W|^)#{regexp}(?=\\W|$)"
end
Regexp.new(regexp, Regexp::IGNORECASE)
end
rescue RegexpError
raise if raise_errors
nil # Admin will be alerted via admin_dashboard_data.rb
end
def self.word_matcher_regexps(action)
if words = get_cached_words(action)
words.map { |w, r| [word_to_regexp(w, whole: true), r] }.to_h
end
end
def self.word_to_regexp(word, whole: false)
if SiteSetting.watched_words_regular_expressions?
# Strip ruby regexp format if present, we're going to make the whole thing
# case insensitive anyway
regexp = word.start_with?("(?-mix:") ? word[7..-2] : word
regexp = "(#{regexp})" if whole
return regexp
end
regexp = Regexp.escape(word).gsub("\\*", '\S*')
if whole && !SiteSetting.watched_words_regular_expressions?
regexp = "(?:\\W|^)(#{regexp})(?=\\W|$)"
end
regexp
end
def self.word_matcher_regexp_key(action)
"watched-words-list:#{action}"
end
def self.censor(html)
regexp = word_matcher_regexp(:censor)
return html if regexp.blank?
doc = Nokogiri::HTML5::fragment(html)
doc.traverse do |node|
node.content = censor_text_with_regexp(node.content, regexp) if node.text?
end
doc.to_s
end
def self.censor_text(text)
regexp = word_matcher_regexp(:censor)
return text if regexp.blank?
censor_text_with_regexp(text, regexp)
end
def self.apply_to_text(text)
if regexp = word_matcher_regexp(:censor)
text = censor_text_with_regexp(text, regexp)
end
%i[replace link]
.flat_map { |type| word_matcher_regexps(type).to_a }
.reduce(text) do |t, (word_regexp, replacement)|
t.gsub(Regexp.new(word_regexp)) { |match| "#{match[0]}#{replacement}" }
end
end
def self.clear_cache!
WatchedWord.actions.each do |a, i|
Discourse.cache.delete word_matcher_regexp_key(a)
end
end
def requires_approval?
word_matches_for_action?(:require_approval)
end
def should_flag?
word_matches_for_action?(:flag)
end
def should_block?
word_matches_for_action?(:block, all_matches: true)
end
def should_silence?
word_matches_for_action?(:silence)
end
def word_matches_for_action?(action, all_matches: false)
regexp = self.class.word_matcher_regexp(action)
if regexp
match = regexp.match(@raw)
return match if !all_matches || !match
if SiteSetting.watched_words_regular_expressions?
set = Set.new
@raw.scan(regexp).each do |m|
if Array === m
set.add(m.find(&:present?))
elsif String === m
set.add(m)
end
end
matches = set.to_a
else
matches = @raw.scan(regexp)
matches.flatten!
matches.uniq!
end
matches.compact!
matches.sort!
matches
else
false
end
end
def word_matches?(word)
Regexp.new(WordWatcher.word_to_regexp(word, whole: true), Regexp::IGNORECASE).match?(@raw)
end
private
def self.censor_text_with_regexp(text, regexp)
text.gsub(regexp) do |match|
# the regex captures leading whitespaces
padding = match.size - match.lstrip.size
if padding > 0
match[0..padding - 1] + REPLACEMENT_LETTER * (match.size - padding)
else
REPLACEMENT_LETTER * match.size
end
end
end
end