mirror of
https://github.com/discourse/discourse.git
synced 2024-12-03 08:48:35 +08:00
5f13ca5e54
The previous method for reused the PrettyText logic which applied the
watched word logic, but had the unwanted effect of cooking the text too.
This meant that regular text values were converted to HTML.
Follow up to commit 5a4c35f627
.
200 lines
5.0 KiB
Ruby
200 lines
5.0 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
class WordWatcher
|
|
REPLACEMENT_LETTER ||= CGI.unescape_html("■")
|
|
|
|
def initialize(raw)
|
|
@raw = raw
|
|
end
|
|
|
|
@cache_enabled = true
|
|
|
|
def self.disable_cache
|
|
@cache_enabled = false
|
|
end
|
|
|
|
def self.cache_enabled?
|
|
@cache_enabled
|
|
end
|
|
|
|
def self.words_for_action(action)
|
|
words = WatchedWord
|
|
.where(action: WatchedWord.actions[action.to_sym])
|
|
.limit(WatchedWord::MAX_WORDS_PER_ACTION)
|
|
.order(:id)
|
|
|
|
if WatchedWord.has_replacement?(action.to_sym)
|
|
words.pluck(:word, :replacement).to_h
|
|
else
|
|
words.pluck(:word)
|
|
end
|
|
end
|
|
|
|
def self.words_for_action_exists?(action)
|
|
WatchedWord.where(action: WatchedWord.actions[action.to_sym]).exists?
|
|
end
|
|
|
|
def self.get_cached_words(action)
|
|
if cache_enabled?
|
|
Discourse.cache.fetch(word_matcher_regexp_key(action), expires_in: 1.day) do
|
|
words_for_action(action).presence
|
|
end
|
|
else
|
|
words_for_action(action).presence
|
|
end
|
|
end
|
|
|
|
# This regexp is run in miniracer, and the client JS app
|
|
# Make sure it is compatible with major browsers when changing
|
|
# hint: non-chrome browsers do not support 'lookbehind'
|
|
def self.word_matcher_regexp(action, raise_errors: false)
|
|
words = get_cached_words(action)
|
|
if words
|
|
if WatchedWord.has_replacement?(action.to_sym)
|
|
words = words.keys
|
|
end
|
|
words = words.map do |w|
|
|
word = word_to_regexp(w)
|
|
word = "(#{word})" if SiteSetting.watched_words_regular_expressions?
|
|
word
|
|
end
|
|
regexp = words.join('|')
|
|
if !SiteSetting.watched_words_regular_expressions?
|
|
regexp = "(#{regexp})"
|
|
regexp = "(?:\\W|^)#{regexp}(?=\\W|$)"
|
|
end
|
|
Regexp.new(regexp, Regexp::IGNORECASE)
|
|
end
|
|
rescue RegexpError
|
|
raise if raise_errors
|
|
nil # Admin will be alerted via admin_dashboard_data.rb
|
|
end
|
|
|
|
def self.word_matcher_regexps(action)
|
|
if words = get_cached_words(action)
|
|
words.map { |w, r| [word_to_regexp(w, whole: true), r] }.to_h
|
|
end
|
|
end
|
|
|
|
def self.word_to_regexp(word, whole: false)
|
|
if SiteSetting.watched_words_regular_expressions?
|
|
# Strip ruby regexp format if present, we're going to make the whole thing
|
|
# case insensitive anyway
|
|
regexp = word.start_with?("(?-mix:") ? word[7..-2] : word
|
|
regexp = "(#{regexp})" if whole
|
|
return regexp
|
|
end
|
|
|
|
regexp = Regexp.escape(word).gsub("\\*", '\S*')
|
|
|
|
if whole && !SiteSetting.watched_words_regular_expressions?
|
|
regexp = "(?:\\W|^)(#{regexp})(?=\\W|$)"
|
|
end
|
|
|
|
regexp
|
|
end
|
|
|
|
def self.word_matcher_regexp_key(action)
|
|
"watched-words-list:#{action}"
|
|
end
|
|
|
|
def self.censor(html)
|
|
regexp = word_matcher_regexp(:censor)
|
|
return html if regexp.blank?
|
|
|
|
doc = Nokogiri::HTML5::fragment(html)
|
|
doc.traverse do |node|
|
|
node.content = censor_text_with_regexp(node.content, regexp) if node.text?
|
|
end
|
|
doc.to_s
|
|
end
|
|
|
|
def self.censor_text(text)
|
|
regexp = word_matcher_regexp(:censor)
|
|
return text if regexp.blank?
|
|
|
|
censor_text_with_regexp(text, regexp)
|
|
end
|
|
|
|
def self.apply_to_text(text)
|
|
if regexp = word_matcher_regexp(:censor)
|
|
text = censor_text_with_regexp(text, regexp)
|
|
end
|
|
|
|
%i[replace link]
|
|
.flat_map { |type| word_matcher_regexps(type).to_a }
|
|
.reduce(text) do |t, (word_regexp, replacement)|
|
|
t.gsub(Regexp.new(word_regexp)) { |match| "#{match[0]}#{replacement}" }
|
|
end
|
|
end
|
|
|
|
def self.clear_cache!
|
|
WatchedWord.actions.each do |a, i|
|
|
Discourse.cache.delete word_matcher_regexp_key(a)
|
|
end
|
|
end
|
|
|
|
def requires_approval?
|
|
word_matches_for_action?(:require_approval)
|
|
end
|
|
|
|
def should_flag?
|
|
word_matches_for_action?(:flag)
|
|
end
|
|
|
|
def should_block?
|
|
word_matches_for_action?(:block, all_matches: true)
|
|
end
|
|
|
|
def should_silence?
|
|
word_matches_for_action?(:silence)
|
|
end
|
|
|
|
def word_matches_for_action?(action, all_matches: false)
|
|
regexp = self.class.word_matcher_regexp(action)
|
|
if regexp
|
|
match = regexp.match(@raw)
|
|
return match if !all_matches || !match
|
|
|
|
if SiteSetting.watched_words_regular_expressions?
|
|
set = Set.new
|
|
@raw.scan(regexp).each do |m|
|
|
if Array === m
|
|
set.add(m.find(&:present?))
|
|
elsif String === m
|
|
set.add(m)
|
|
end
|
|
end
|
|
matches = set.to_a
|
|
else
|
|
matches = @raw.scan(regexp)
|
|
matches.flatten!
|
|
matches.uniq!
|
|
end
|
|
matches.compact!
|
|
matches.sort!
|
|
matches
|
|
else
|
|
false
|
|
end
|
|
end
|
|
|
|
def word_matches?(word)
|
|
Regexp.new(WordWatcher.word_to_regexp(word, whole: true), Regexp::IGNORECASE).match?(@raw)
|
|
end
|
|
|
|
private
|
|
|
|
def self.censor_text_with_regexp(text, regexp)
|
|
text.gsub(regexp) do |match|
|
|
# the regex captures leading whitespaces
|
|
padding = match.size - match.lstrip.size
|
|
if padding > 0
|
|
match[0..padding - 1] + REPLACEMENT_LETTER * (match.size - padding)
|
|
else
|
|
REPLACEMENT_LETTER * match.size
|
|
end
|
|
end
|
|
end
|
|
end
|