mirror of
https://github.com/discourse/discourse.git
synced 2024-11-26 02:13:47 +08:00
FEATURE: Censor Oneboxes (#12902)
Previously onebox content was not passed by the censor regex, meaning you could sneak in censored words via onebox.
This commit is contained in:
parent
58cb120aa2
commit
d184fe59ca
|
@ -1,6 +1,7 @@
|
||||||
# frozen_string_literal: true
|
# frozen_string_literal: true
|
||||||
|
|
||||||
class WordWatcher
|
class WordWatcher
|
||||||
|
REPLACEMENT_LETTER ||= CGI.unescape_html("■")
|
||||||
|
|
||||||
def initialize(raw)
|
def initialize(raw)
|
||||||
@raw = raw
|
@raw = raw
|
||||||
|
@ -70,6 +71,27 @@ class WordWatcher
|
||||||
"watched-words-list:#{action}"
|
"watched-words-list:#{action}"
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def self.censor(html)
|
||||||
|
regexp = WordWatcher.word_matcher_regexp(:censor)
|
||||||
|
return html if regexp.blank?
|
||||||
|
|
||||||
|
doc = Nokogiri::HTML5::fragment(html)
|
||||||
|
doc.traverse do |node|
|
||||||
|
if node.text?
|
||||||
|
node.content = node.content.gsub(regexp) do |match|
|
||||||
|
# the regex captures leading whitespaces
|
||||||
|
padding = match.size - match.lstrip.size
|
||||||
|
if padding > 0
|
||||||
|
match[0..padding - 1] + REPLACEMENT_LETTER * (match.size - padding)
|
||||||
|
else
|
||||||
|
REPLACEMENT_LETTER * match.size
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
doc.to_s
|
||||||
|
end
|
||||||
|
|
||||||
def self.clear_cache!
|
def self.clear_cache!
|
||||||
WatchedWord.actions.each do |a, i|
|
WatchedWord.actions.each do |a, i|
|
||||||
Discourse.cache.delete word_matcher_regexp_key(a)
|
Discourse.cache.delete word_matcher_regexp_key(a)
|
||||||
|
|
|
@ -455,7 +455,10 @@ module Oneboxer
|
||||||
onebox_options[:user_agent] = user_agent_override if user_agent_override
|
onebox_options[:user_agent] = user_agent_override if user_agent_override
|
||||||
|
|
||||||
r = Onebox.preview(uri.to_s, onebox_options)
|
r = Onebox.preview(uri.to_s, onebox_options)
|
||||||
result = { onebox: r.to_s, preview: r&.placeholder_html.to_s }
|
result = {
|
||||||
|
onebox: WordWatcher.censor(r.to_s),
|
||||||
|
preview: WordWatcher.censor(r&.placeholder_html.to_s)
|
||||||
|
}
|
||||||
|
|
||||||
# NOTE: Call r.errors after calling placeholder_html
|
# NOTE: Call r.errors after calling placeholder_html
|
||||||
if r.errors.any?
|
if r.errors.any?
|
||||||
|
|
|
@ -177,6 +177,29 @@ describe Oneboxer do
|
||||||
expect(Oneboxer.external_onebox(url)[:onebox]).to be_present
|
expect(Oneboxer.external_onebox(url)[:onebox]).to be_present
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it "censors external oneboxes" do
|
||||||
|
Fabricate(:watched_word, action: WatchedWord.actions[:censor], word: "bad word")
|
||||||
|
|
||||||
|
url = 'https://example.com/'
|
||||||
|
stub_request(:any, url).to_return(status: 200, body: <<~HTML, headers: {})
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta property="og:title" content="title with bad word">
|
||||||
|
<meta property="og:description" content="description with bad word">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<p>content with bad word</p>
|
||||||
|
</body>
|
||||||
|
<html>
|
||||||
|
HTML
|
||||||
|
|
||||||
|
onebox = Oneboxer.external_onebox(url)
|
||||||
|
expect(onebox[:onebox]).to include('title with')
|
||||||
|
expect(onebox[:onebox]).not_to include('bad word')
|
||||||
|
expect(onebox[:preview]).to include('title with')
|
||||||
|
expect(onebox[:preview]).not_to include('bad word')
|
||||||
|
end
|
||||||
|
|
||||||
it "uses the Onebox custom user agent on specified hosts" do
|
it "uses the Onebox custom user agent on specified hosts" do
|
||||||
SiteSetting.force_custom_user_agent_hosts = "http://codepen.io|https://video.discourse.org/"
|
SiteSetting.force_custom_user_agent_hosts = "http://codepen.io|https://video.discourse.org/"
|
||||||
url = 'https://video.discourse.org/presentation.mp4'
|
url = 'https://video.discourse.org/presentation.mp4'
|
||||||
|
|
Loading…
Reference in New Issue
Block a user