2019-05-03 06:17:27 +08:00
# frozen_string_literal: true
2016-05-19 20:25:08 +08:00
require 'mini_racer'
2013-02-06 03:16:51 +08:00
require 'nokogiri'
2016-06-15 02:31:51 +08:00
require 'erb'
2013-02-06 03:16:51 +08:00
module PrettyText
2021-11-22 08:43:03 +08:00
DANGEROUS_BIDI_CHARACTERS = [
" \ u202A " ,
" \ u202B " ,
" \ u202C " ,
" \ u202D " ,
" \ u202E " ,
" \ u2066 " ,
" \ u2067 " ,
" \ u2068 " ,
" \ u2069 " ,
] . freeze
DANGEROUS_BIDI_REGEXP = Regexp . new ( DANGEROUS_BIDI_CHARACTERS . join ( " | " ) ) . freeze
FEATURE: Allow hotlinked media to be blocked (#16940)
This commit introduces a new site setting: `block_hotlinked_media`. When enabled, all attempts to hotlink media (images, videos, and audio) will fail, and be replaced with a linked placeholder. Exceptions to the rule can be added via `block_hotlinked_media_exceptions`.
`download_remote_image_to_local` can be used alongside this feature. In that case, hotlinked images will be blocked immediately when the post is created, but will then be replaced with the downloaded version a few seconds later.
This implementation is purely server-side, and does not impact the composer preview.
Technically, there are two stages to this feature:
1. `PrettyText.sanitize_hotlinked_media` is called during `PrettyText.cook`, and whenever new images are introduced by Onebox. It will iterate over all src/srcset attributes in the post HTML and check if they're allowed. If not, the attributes will be removed and replaced with a `data-blocked-hotlinked-src(set)` attribute
2. In the `CookedPostProcessor`, we iterate over all `data-blocked-hotlinked-src(set)` attributes and check whether we have a downloaded version of the media. If yes, we update the src to use the downloaded version. If not, the entire media element is replaced with a placeholder. The placeholder is labelled 'external media', and is a link to the offsite media.
2022-06-07 22:23:04 +08:00
BLOCKED_HOTLINKED_SRC_ATTR = " data-blocked-hotlinked-src "
BLOCKED_HOTLINKED_SRCSET_ATTR = " data-blocked-hotlinked-srcset "
2016-06-15 02:31:51 +08:00
@mutex = Mutex . new
@ctx_init = Mutex . new
2013-02-06 03:16:51 +08:00
2016-06-15 02:31:51 +08:00
def self . app_root
Rails . root
end
2013-07-16 15:48:48 +08:00
2016-06-15 02:31:51 +08:00
def self . find_file ( root , filename )
return filename if File . file? ( " #{ root } #{ filename } " )
2015-03-13 03:51:28 +08:00
2016-06-15 02:31:51 +08:00
es6_name = " #{ filename } .js.es6 "
return es6_name if File . file? ( " #{ root } #{ es6_name } " )
2013-02-06 03:16:51 +08:00
2016-06-15 02:31:51 +08:00
js_name = " #{ filename } .js "
return js_name if File . file? ( " #{ root } #{ js_name } " )
2015-09-25 11:35:14 +08:00
2016-06-15 02:31:51 +08:00
erb_name = " #{ filename } .js.es6.erb "
return erb_name if File . file? ( " #{ root } #{ erb_name } " )
2020-03-20 21:55:42 +08:00
2016-06-15 02:31:51 +08:00
erb_name = " #{ filename } .js.erb "
return erb_name if File . file? ( " #{ root } #{ erb_name } " )
end
2015-12-28 14:28:16 +08:00
2016-06-15 02:31:51 +08:00
def self . apply_es6_file ( ctx , root_path , part_name )
filename = find_file ( root_path , part_name )
if filename
source = File . read ( " #{ root_path } #{ filename } " )
2020-03-11 21:43:55 +08:00
source = ERB . new ( source ) . result ( binding ) if filename =~ / \ .erb$ /
2016-04-26 03:55:15 +08:00
2020-03-11 21:43:55 +08:00
transpiler = DiscourseJsProcessor :: Transpiler . new
transpiled = transpiler . perform ( source , " #{ Rails . root } /app/assets/javascripts/ " , part_name )
2016-06-15 02:31:51 +08:00
ctx . eval ( transpiled )
else
# Look for vendored stuff
vendor_root = " #{ Rails . root } /vendor/assets/javascripts/ "
filename = find_file ( vendor_root , part_name )
if filename
ctx . eval ( File . read ( " #{ vendor_root } #{ filename } " ) )
2016-04-26 03:55:15 +08:00
end
end
2013-02-06 03:16:51 +08:00
end
2022-08-30 02:11:59 +08:00
def self . ctx_load_directory ( ctx , path )
2016-06-15 02:31:51 +08:00
root_path = " #{ Rails . root } /app/assets/javascripts/ "
2022-08-30 02:11:59 +08:00
Dir [ " #{ root_path } #{ path } /**/* " ] . sort . each do | f |
apply_es6_file ( ctx , root_path , f . sub ( root_path , '' ) . sub ( / \ .js(.es6)?$ / , '' ) )
2016-05-19 20:25:08 +08:00
end
2017-06-09 06:02:30 +08:00
end
def self . create_es6_context
2020-05-15 12:01:54 +08:00
ctx = MiniRacer :: Context . new ( timeout : 25000 , ensure_gc_after_idle : 2000 )
2017-06-09 06:02:30 +08:00
ctx . eval ( " window = {}; window.devicePixelRatio = 2; " ) # hack to make code think stuff is retina
2022-02-12 01:16:27 +08:00
ctx . attach ( " rails.logger.info " , proc { | err | Rails . logger . info ( err . to_s ) } )
ctx . attach ( " rails.logger.warn " , proc { | err | Rails . logger . warn ( err . to_s ) } )
ctx . attach ( " rails.logger.error " , proc { | err | Rails . logger . error ( err . to_s ) } )
ctx . eval << ~ JS
console = {
prefix : " [PrettyText] " ,
log : function ( ... args ) { rails . logger . info ( console . prefix + args . join ( " " ) ) ; } ,
warn : function ( ... args ) { rails . logger . warn ( console . prefix + args . join ( " " ) ) ; } ,
error : function ( ... args ) { rails . logger . error ( console . prefix + args . join ( " " ) ) ; }
}
JS
2019-10-30 21:48:24 +08:00
ctx . eval ( " __PRETTY_TEXT = true " )
2017-06-09 06:02:30 +08:00
2020-05-14 04:23:41 +08:00
PrettyText :: Helpers . instance_methods . each do | method |
ctx . attach ( " __helpers. #{ method } " , PrettyText :: Helpers . method ( method ) )
end
2017-06-09 06:02:30 +08:00
root_path = " #{ Rails . root } /app/assets/javascripts/ "
2022-08-30 02:11:59 +08:00
ctx_load ( ctx , " #{ root_path } /mini-loader.js " )
ctx_load ( ctx , " #{ root_path } /handlebars-shim.js " )
ctx_load ( ctx , " #{ root_path } /node_modules/xss/dist/xss.js " )
ctx . load ( " #{ Rails . root } /lib/pretty_text/vendor-shims.js " )
ctx_load_directory ( ctx , " pretty-text/addon " )
ctx_load_directory ( ctx , " pretty-text/engines/discourse-markdown " )
ctx_load ( ctx , " #{ root_path } /node_modules/markdown-it/dist/markdown-it.js " )
2013-02-06 03:16:51 +08:00
2020-06-04 00:45:26 +08:00
apply_es6_file ( ctx , root_path , " discourse-common/addon/lib/get-url " )
2020-09-02 23:52:54 +08:00
apply_es6_file ( ctx , root_path , " discourse-common/addon/lib/object " )
2020-10-28 10:22:06 +08:00
apply_es6_file ( ctx , root_path , " discourse-common/addon/lib/deprecated " )
2021-03-17 21:11:40 +08:00
apply_es6_file ( ctx , root_path , " discourse-common/addon/lib/escape " )
2022-08-02 16:06:03 +08:00
apply_es6_file ( ctx , root_path , " discourse-common/addon/utils/watched-words " )
2018-05-05 17:21:07 +08:00
apply_es6_file ( ctx , root_path , " discourse/app/lib/to-markdown " )
2016-06-15 02:31:51 +08:00
apply_es6_file ( ctx , root_path , " discourse/app/lib/utilities " )
ctx . load ( " #{ Rails . root } /lib/pretty_text/shims.js " )
ctx . eval ( " __setUnicode( #{ Emoji . unicode_replacements_json } ) " )
2013-08-09 06:14:12 +08:00
2016-06-15 02:31:51 +08:00
to_load = [ ]
DiscoursePluginRegistry . each_globbed_asset do | a |
to_load << a if File . file? ( a ) && a =~ / discourse-markdown /
end
to_load . uniq . each do | f |
if f =~ / ^.+assets \/ javascripts \/ /
root = Regexp . last_match [ 0 ]
apply_es6_file ( ctx , root , f . sub ( root , '' ) . sub ( / \ .js( \ .es6)?$ / , '' ) )
2013-02-06 03:16:51 +08:00
end
end
2018-04-10 14:37:16 +08:00
DiscoursePluginRegistry . vendored_core_pretty_text . each do | vpt |
ctx . eval ( File . read ( vpt ) )
end
2017-04-19 05:49:56 +08:00
DiscoursePluginRegistry . vendored_pretty_text . each do | vpt |
ctx . eval ( File . read ( vpt ) )
end
2013-08-16 06:12:10 +08:00
ctx
end
def self . v8
return @ctx if @ctx
# ensure we only init one of these
@ctx_init . synchronize do
return @ctx if @ctx
2016-06-15 02:31:51 +08:00
@ctx = create_es6_context
2013-08-16 06:12:10 +08:00
end
2014-04-15 04:55:57 +08:00
2013-02-06 03:16:51 +08:00
@ctx
end
2020-05-28 02:11:52 +08:00
def self . reset_translations
v8 . eval ( " __resetTranslationTree() " )
end
2014-11-14 14:51:04 +08:00
def self . reset_context
@ctx_init . synchronize do
2017-07-20 12:17:45 +08:00
@ctx & . dispose
2014-11-14 14:51:04 +08:00
@ctx = nil
end
end
2022-01-06 15:27:12 +08:00
# Acceptable options:
#
# disable_emojis - Disables the emoji markdown engine.
# features - A hash where the key is the markdown feature name and the value is a boolean to enable/disable the markdown feature.
# The hash is merged into the default features set in pretty-text.js which can be used to add new features or disable existing features.
# features_override - An array of markdown feature names to override the default markdown feature set. Currently used by plugins to customize what features should be enabled
# when rendering markdown.
# markdown_it_rules - An array of markdown rule names which will be applied to the markdown-it engine. Currently used by plugins to customize what markdown-it rules should be
# enabled when rendering markdown.
# topic_id - Topic id for the post being cooked.
# user_id - User id for the post being cooked.
2022-02-23 14:13:46 +08:00
# force_quote_link - Always create the link to the quoted topic for [quote] bbcode. Normally this only happens
# if the topic_id provided is different from the [quote topic:X].
2016-07-07 15:52:56 +08:00
def self . markdown ( text , opts = { } )
2013-02-06 03:16:51 +08:00
# we use the exact same markdown converter as the client
2013-02-26 00:42:20 +08:00
# TODO: use the same extensions on both client and server (in particular the template for mentions)
2013-02-06 03:16:51 +08:00
baked = nil
2016-05-19 20:25:08 +08:00
text = text || " "
2013-02-06 03:16:51 +08:00
2014-02-04 08:12:53 +08:00
protect do
2013-08-16 11:03:47 +08:00
context = v8
2013-10-12 04:24:27 +08:00
2016-06-15 02:31:51 +08:00
custom_emoji = { }
2016-11-18 02:35:39 +08:00
Emoji . custom . map { | e | custom_emoji [ e . name ] = e . url }
2016-06-15 02:31:51 +08:00
2022-02-23 14:13:46 +08:00
# note, any additional options added to __optInput here must be
# also be added to the buildOptions function in pretty-text.js,
# otherwise they will be discarded
2019-05-03 06:17:27 +08:00
buffer = + << ~ JS
2017-06-09 06:02:30 +08:00
__optInput = { } ;
__optInput . siteSettings = #{SiteSetting.client_settings_json};
2019-06-03 15:41:26 +08:00
#{"__optInput.disableEmojis = true" if opts[:disable_emojis]}
2018-05-23 22:47:09 +08:00
__paths = #{paths_json};
2017-06-09 06:02:30 +08:00
__optInput . getURL = __getURL ;
2019-07-09 19:42:02 +08:00
#{"__optInput.features = #{opts[:features].to_json};" if opts[:features]}
2022-01-06 15:27:12 +08:00
#{"__optInput.featuresOverride = #{opts[:features_override].to_json};" if opts[:features_override]}
#{"__optInput.markdownItRules = #{opts[:markdown_it_rules].to_json};" if opts[:markdown_it_rules]}
2017-06-09 06:02:30 +08:00
__optInput . getCurrentUser = __getCurrentUser ;
__optInput . lookupAvatar = __lookupAvatar ;
2017-11-03 21:51:40 +08:00
__optInput . lookupPrimaryUserGroup = __lookupPrimaryUserGroup ;
2017-11-21 05:28:03 +08:00
__optInput . formatUsername = __formatUsername ;
2017-06-09 06:02:30 +08:00
__optInput . getTopicInfo = __getTopicInfo ;
__optInput . categoryHashtagLookup = __categoryLookup ;
__optInput . customEmoji = #{custom_emoji.to_json};
2020-05-28 02:11:52 +08:00
__optInput . customEmojiTranslation = #{Plugin::CustomEmoji.translations.to_json};
2017-06-29 01:47:22 +08:00
__optInput . emojiUnicodeReplacer = __emojiUnicodeReplacer ;
2019-05-29 09:00:25 +08:00
__optInput . lookupUploadUrls = __lookupUploadUrls ;
2022-08-02 16:06:03 +08:00
__optInput . censoredRegexp = #{WordWatcher.serializable_word_matcher_regexp(:censor).to_json };
2021-06-02 13:36:49 +08:00
__optInput . watchedWordsReplace = #{WordWatcher.word_matcher_regexps(:replace).to_json};
__optInput . watchedWordsLink = #{WordWatcher.word_matcher_regexps(:link).to_json};
2022-01-28 11:02:02 +08:00
__optInput . additionalOptions = #{Site.markdown_additional_options.to_json};
2017-06-09 06:02:30 +08:00
JS
2022-01-06 15:27:12 +08:00
if opts [ :topic_id ]
buffer << " __optInput.topicId = #{ opts [ :topic_id ] . to_i } ; \n "
2017-06-09 06:02:30 +08:00
end
2022-02-23 14:13:46 +08:00
if opts [ :force_quote_link ]
buffer << " __optInput.forceQuoteLink = #{ opts [ :force_quote_link ] } ; \n "
end
2017-06-09 06:02:30 +08:00
if opts [ :user_id ]
buffer << " __optInput.userId = #{ opts [ :user_id ] . to_i } ; \n "
end
buffer << " __textOptions = __buildOptions(__optInput); \n "
2017-07-14 20:27:28 +08:00
buffer << ( " __pt = new __PrettyText(__textOptions); " )
2016-08-12 02:59:20 +08:00
# Be careful disabling sanitization. We allow for custom emails
if opts [ :sanitize ] == false
2017-07-14 20:27:28 +08:00
buffer << ( '__pt.disableSanitizer();' )
2016-08-12 02:59:20 +08:00
end
2017-06-09 06:02:30 +08:00
opts = context . eval ( buffer )
2016-06-15 02:31:51 +08:00
2016-01-29 22:59:15 +08:00
DiscourseEvent . trigger ( :markdown_context , context )
2016-06-15 02:31:51 +08:00
baked = context . eval ( " __pt.cook( #{ text . inspect } ) " )
2013-02-06 03:16:51 +08:00
end
baked
end
2018-05-23 22:47:09 +08:00
def self . paths_json
paths = {
2020-10-09 19:51:24 +08:00
baseUri : Discourse . base_path ,
2018-05-23 22:47:09 +08:00
CDN : Rails . configuration . action_controller . asset_host ,
}
if SiteSetting . Upload . enable_s3_uploads
if SiteSetting . Upload . s3_cdn_url . present?
paths [ :S3CDN ] = SiteSetting . Upload . s3_cdn_url
end
paths [ :S3BaseUrl ] = Discourse . store . absolute_base_url
end
paths . to_json
end
2013-02-06 03:16:51 +08:00
# leaving this here, cause it invokes v8, don't want to implement twice
2013-08-14 04:08:29 +08:00
def self . avatar_img ( avatar_template , size )
2014-02-04 08:12:53 +08:00
protect do
2018-05-23 22:47:09 +08:00
v8 . eval ( << ~ JS )
__paths = #{paths_json};
__utils . avatarImg ( { size : #{size.inspect}, avatarTemplate: #{avatar_template.inspect}}, __getURL);
JS
2013-02-06 03:16:51 +08:00
end
end
2015-10-15 15:59:29 +08:00
def self . unescape_emoji ( title )
2019-03-21 16:11:33 +08:00
return title unless SiteSetting . enable_emoji? && title
2016-06-15 02:31:51 +08:00
set = SiteSetting . emoji_set . inspect
2019-03-06 19:49:17 +08:00
custom = Emoji . custom . map { | e | [ e . name , e . url ] } . to_h . to_json
2019-12-04 00:32:33 +08:00
2015-10-15 15:59:29 +08:00
protect do
2018-05-23 22:47:09 +08:00
v8 . eval ( << ~ JS )
__paths = #{paths_json};
2019-05-21 22:56:51 +08:00
__performEmojiUnescape ( #{title.inspect}, {
getURL : __getURL ,
emojiSet : #{set},
2021-03-04 04:39:00 +08:00
emojiCDNUrl : " #{ SiteSetting . external_emoji_url . blank? ? " " : SiteSetting . external_emoji_url } " ,
2019-05-21 22:56:51 +08:00
customEmoji : #{custom},
2019-12-04 00:32:33 +08:00
enableEmojiShortcuts : #{SiteSetting.enable_emoji_shortcuts},
inlineEmoji : #{SiteSetting.enable_inline_emoji_translation}
2019-05-21 22:56:51 +08:00
} ) ;
2018-05-23 22:47:09 +08:00
JS
2015-10-15 15:59:29 +08:00
end
end
2019-03-21 16:11:33 +08:00
def self . escape_emoji ( title )
return unless title
2019-05-21 22:56:51 +08:00
replace_emoji_shortcuts = SiteSetting . enable_emoji && SiteSetting . enable_emoji_shortcuts
2019-03-21 16:11:33 +08:00
protect do
v8 . eval ( << ~ JS )
2019-12-04 00:32:33 +08:00
__performEmojiEscape ( #{title.inspect}, {
emojiShortcuts : #{replace_emoji_shortcuts},
inlineEmoji : #{SiteSetting.enable_inline_emoji_translation}
} ) ;
2019-03-21 16:11:33 +08:00
JS
end
end
2013-02-06 03:16:51 +08:00
def self . cook ( text , opts = { } )
2015-04-24 01:33:29 +08:00
options = opts . dup
2015-12-31 03:35:25 +08:00
working_text = text . dup
2016-11-09 05:36:34 +08:00
2018-02-27 04:48:59 +08:00
sanitized = markdown ( working_text , options )
2015-05-26 09:13:12 +08:00
2020-05-05 11:46:57 +08:00
doc = Nokogiri :: HTML5 . fragment ( sanitized )
2015-05-26 09:13:12 +08:00
2020-09-10 23:59:51 +08:00
add_nofollow = ! options [ :omit_nofollow ] && SiteSetting . add_rel_nofollow_to_user_content
add_rel_attributes_to_user_content ( doc , add_nofollow )
2021-11-22 08:43:03 +08:00
strip_hidden_unicode_bidirectional_characters ( doc )
FEATURE: Allow hotlinked media to be blocked (#16940)
This commit introduces a new site setting: `block_hotlinked_media`. When enabled, all attempts to hotlink media (images, videos, and audio) will fail, and be replaced with a linked placeholder. Exceptions to the rule can be added via `block_hotlinked_media_exceptions`.
`download_remote_image_to_local` can be used alongside this feature. In that case, hotlinked images will be blocked immediately when the post is created, but will then be replaced with the downloaded version a few seconds later.
This implementation is purely server-side, and does not impact the composer preview.
Technically, there are two stages to this feature:
1. `PrettyText.sanitize_hotlinked_media` is called during `PrettyText.cook`, and whenever new images are introduced by Onebox. It will iterate over all src/srcset attributes in the post HTML and check if they're allowed. If not, the attributes will be removed and replaced with a `data-blocked-hotlinked-src(set)` attribute
2. In the `CookedPostProcessor`, we iterate over all `data-blocked-hotlinked-src(set)` attributes and check whether we have a downloaded version of the media. If yes, we update the src to use the downloaded version. If not, the entire media element is replaced with a placeholder. The placeholder is labelled 'external media', and is a link to the offsite media.
2022-06-07 22:23:04 +08:00
sanitize_hotlinked_media ( doc )
2015-05-26 09:13:12 +08:00
2018-11-22 16:01:03 +08:00
if SiteSetting . enable_mentions
add_mentions ( doc , user_id : opts [ :user_id ] )
end
2018-11-22 14:28:48 +08:00
2020-05-05 11:46:57 +08:00
scrubber = Loofah :: Scrubber . new do | node |
node . remove if node . name == 'script'
end
loofah_fragment = Loofah . fragment ( doc . to_html )
loofah_fragment . scrub! ( scrubber ) . to_html
2015-05-26 09:13:12 +08:00
end
2021-11-22 08:43:03 +08:00
def self . strip_hidden_unicode_bidirectional_characters ( doc )
return if ! DANGEROUS_BIDI_REGEXP . match? ( doc . content )
doc . css ( " code,pre " ) . each do | code_tag |
next if ! DANGEROUS_BIDI_REGEXP . match? ( code_tag . content )
DANGEROUS_BIDI_CHARACTERS . each do | bidi |
next if ! code_tag . content . include? ( bidi )
formatted = " <U+ #{ bidi . ord . to_s ( 16 ) . upcase } > "
code_tag . inner_html = code_tag . inner_html . gsub (
bidi ,
" <span class= \" bidi-warning \" title= \" #{ I18n . t ( " post.hidden_bidi_character " ) } \" > #{ formatted } </span> "
)
end
end
end
FEATURE: Allow hotlinked media to be blocked (#16940)
This commit introduces a new site setting: `block_hotlinked_media`. When enabled, all attempts to hotlink media (images, videos, and audio) will fail, and be replaced with a linked placeholder. Exceptions to the rule can be added via `block_hotlinked_media_exceptions`.
`download_remote_image_to_local` can be used alongside this feature. In that case, hotlinked images will be blocked immediately when the post is created, but will then be replaced with the downloaded version a few seconds later.
This implementation is purely server-side, and does not impact the composer preview.
Technically, there are two stages to this feature:
1. `PrettyText.sanitize_hotlinked_media` is called during `PrettyText.cook`, and whenever new images are introduced by Onebox. It will iterate over all src/srcset attributes in the post HTML and check if they're allowed. If not, the attributes will be removed and replaced with a `data-blocked-hotlinked-src(set)` attribute
2. In the `CookedPostProcessor`, we iterate over all `data-blocked-hotlinked-src(set)` attributes and check whether we have a downloaded version of the media. If yes, we update the src to use the downloaded version. If not, the entire media element is replaced with a placeholder. The placeholder is labelled 'external media', and is a link to the offsite media.
2022-06-07 22:23:04 +08:00
def self . sanitize_hotlinked_media ( doc )
return if ! SiteSetting . block_hotlinked_media
allowed_pattern = allowed_src_pattern
doc . css ( " img[src], source[src], source[srcset], track[src] " ) . each do | el |
if el [ " src " ] && ! el [ " src " ] . match? ( allowed_pattern )
el [ PrettyText :: BLOCKED_HOTLINKED_SRC_ATTR ] = el . delete ( " src " )
end
if el [ " srcset " ]
srcs = el [ " srcset " ] . split ( ',' ) . map { | e | e . split ( ' ' , 2 ) [ 0 ] . presence }
if srcs . any? { | src | ! src . match? ( allowed_pattern ) }
el [ PrettyText :: BLOCKED_HOTLINKED_SRCSET_ATTR ] = el . delete ( " srcset " )
end
end
end
end
2020-09-10 23:59:51 +08:00
def self . add_rel_attributes_to_user_content ( doc , add_nofollow )
2020-07-27 08:23:54 +08:00
allowlist = [ ]
2013-02-11 16:01:33 +08:00
2013-11-20 11:38:21 +08:00
domains = SiteSetting . exclude_rel_nofollow_domains
2020-07-27 08:23:54 +08:00
allowlist = domains . split ( '|' ) if domains . present?
2013-02-11 16:01:33 +08:00
2013-02-11 08:43:07 +08:00
site_uri = nil
doc . css ( " a " ) . each do | l |
href = l [ " href " ] . to_s
2020-09-10 23:59:51 +08:00
l [ " rel " ] = " noopener " if l [ " target " ] == " _blank "
2013-02-26 00:42:20 +08:00
begin
2020-05-08 09:14:59 +08:00
uri = URI ( UrlHelper . encode_component ( href ) )
2013-02-11 08:43:07 +08:00
site_uri || = URI ( Discourse . base_url )
2013-02-26 00:42:20 +08:00
2020-09-10 23:59:51 +08:00
same_domain = ! uri . host . present? ||
uri . host == site_uri . host ||
uri . host . ends_with? ( " . #{ site_uri . host } " ) ||
allowlist . any? { | u | uri . host == u || uri . host . ends_with? ( " . #{ u } " ) }
l [ " rel " ] = " noopener nofollow ugc " if add_nofollow && ! same_domain
2018-08-14 18:23:32 +08:00
rescue URI :: Error
2013-02-26 00:42:20 +08:00
# add a nofollow anyway
2020-09-10 23:59:51 +08:00
l [ " rel " ] = " noopener nofollow ugc "
2013-02-11 08:43:07 +08:00
end
end
2013-02-06 03:16:51 +08:00
end
2017-02-06 21:45:04 +08:00
class DetectedLink < Struct . new ( :url , :is_quote ) ; end
2014-07-11 12:17:01 +08:00
2013-02-06 03:16:51 +08:00
def self . extract_links ( html )
links = [ ]
2020-05-05 11:46:57 +08:00
doc = Nokogiri :: HTML5 . fragment ( html )
2017-02-06 21:45:04 +08:00
2021-06-18 23:55:24 +08:00
# extract onebox links
doc . css ( " aside.onebox[data-onebox-src] " ) . each { | onebox | links << DetectedLink . new ( onebox [ " data-onebox-src " ] , false ) }
# remove href inside quotes & oneboxes & elided part
doc . css ( " aside.quote a, aside.onebox a, .elided a " ) . remove
2014-07-11 12:17:01 +08:00
2022-01-24 08:33:23 +08:00
# remove hotlinked images
doc . css ( " a.onebox > img " ) . each { | img | img . parent . remove }
2017-02-06 21:45:04 +08:00
# extract all links
doc . css ( " a " ) . each do | a |
2020-04-30 14:48:34 +08:00
if a [ " href " ] . present? && a [ " href " ] [ 0 ] != " # "
2017-02-06 21:45:04 +08:00
links << DetectedLink . new ( a [ " href " ] , false )
2014-07-11 12:17:01 +08:00
end
2017-02-06 21:45:04 +08:00
end
2013-02-26 00:42:20 +08:00
2017-02-06 21:45:04 +08:00
# extract quotes
doc . css ( " aside.quote[data-topic] " ) . each do | aside |
if aside [ " data-topic " ] . present?
2021-02-12 02:21:13 +08:00
url = + " /t/ #{ aside [ " data-topic " ] } "
2017-02-06 21:45:04 +08:00
url << " / #{ aside [ " data-post " ] } " if aside [ " data-post " ] . present?
links << DetectedLink . new ( url , true )
2013-02-14 04:22:04 +08:00
end
end
2017-02-06 21:45:04 +08:00
# extract Youtube links
doc . css ( " div[data-youtube-id] " ) . each do | div |
if div [ " data-youtube-id " ] . present?
links << DetectedLink . new ( " https://www.youtube.com/watch?v= #{ div [ 'data-youtube-id' ] } " , false )
end
2016-09-23 04:50:05 +08:00
end
2013-02-06 03:16:51 +08:00
links
end
2013-05-28 07:48:47 +08:00
def self . excerpt ( html , max_length , options = { } )
2014-11-06 03:37:00 +08:00
# TODO: properly fix this HACK in ExcerptParser without introducing XSS
2020-05-05 11:46:57 +08:00
doc = Nokogiri :: HTML5 . fragment ( html )
2019-05-29 23:05:52 +08:00
DiscourseEvent . trigger ( :reduce_excerpt , doc , options )
2014-11-06 03:37:00 +08:00
strip_image_wrapping ( doc )
2020-02-07 04:08:13 +08:00
strip_oneboxed_media ( doc )
2014-11-06 03:37:00 +08:00
html = doc . to_html
2013-05-28 07:48:47 +08:00
ExcerptParser . get_excerpt ( html , max_length , options )
end
2013-02-06 03:16:51 +08:00
2013-06-06 03:28:10 +08:00
def self . strip_links ( string )
return string if string . blank?
# If the user is not basic, strip links from their bio
2020-05-05 11:46:57 +08:00
fragment = Nokogiri :: HTML5 . fragment ( string )
2014-09-18 00:08:00 +08:00
fragment . css ( 'a' ) . each { | a | a . replace ( a . inner_html ) }
2013-06-06 03:28:10 +08:00
fragment . to_html
end
2018-05-10 01:24:44 +08:00
def self . make_all_links_absolute ( doc )
site_uri = nil
doc . css ( " a " ) . each do | link |
href = link [ " href " ] . to_s
begin
uri = URI ( href )
site_uri || = URI ( Discourse . base_url )
2018-06-09 01:11:52 +08:00
unless uri . host . present? || href . start_with? ( 'mailto' )
link [ " href " ] = " #{ site_uri } #{ link [ 'href' ] } "
2018-06-09 01:56:20 +08:00
end
2018-08-14 18:23:32 +08:00
rescue URI :: Error
2018-05-10 01:24:44 +08:00
# leave it
end
end
end
2016-06-21 23:12:30 +08:00
2014-04-18 00:32:51 +08:00
def self . strip_image_wrapping ( doc )
doc . css ( " .lightbox-wrapper .meta " ) . remove
end
2020-02-07 04:08:13 +08:00
def self . strip_oneboxed_media ( doc )
doc . css ( " audio " ) . remove
2020-02-18 02:52:23 +08:00
doc . css ( " .video-onebox,video " ) . remove
2020-02-07 04:08:13 +08:00
end
2018-05-10 01:24:44 +08:00
def self . convert_vimeo_iframes ( doc )
doc . css ( " iframe[src*='player.vimeo.com'] " ) . each do | iframe |
2019-04-26 19:39:18 +08:00
if iframe [ " data-original-href " ] . present?
2022-08-09 18:28:29 +08:00
vimeo_url = UrlHelper . normalized_encode ( iframe [ " data-original-href " ] )
2019-04-26 19:39:18 +08:00
else
vimeo_id = iframe [ 'src' ] . split ( '/' ) . last
vimeo_url = " https://vimeo.com/ #{ vimeo_id } "
end
2020-12-09 20:58:36 +08:00
iframe . replace Nokogiri :: HTML5 . fragment ( " <p><a href=' #{ vimeo_url } '> #{ vimeo_url } </a></p> " )
2018-05-10 01:24:44 +08:00
end
end
2022-09-29 07:24:33 +08:00
def self . strip_secure_uploads ( doc )
2020-11-02 07:52:21 +08:00
# images inside a lightbox or other link
doc . css ( 'a[href]' ) . each do | a |
2022-09-29 07:24:33 +08:00
next if ! Upload . secure_uploads_url? ( a [ 'href' ] )
2020-11-02 07:52:21 +08:00
non_image_media = %w( video audio ) . include? ( a & . parent & . name )
target = non_image_media ? a . parent : a
2022-09-29 07:24:33 +08:00
next if target . to_s . include? ( 'stripped-secure-view-media' ) || target . to_s . include? ( 'stripped-secure-view-upload' )
2020-11-02 07:52:21 +08:00
2020-11-05 04:45:50 +08:00
next if a . css ( 'img[src]' ) . empty? && ! non_image_media
2020-11-02 07:52:21 +08:00
if a . classes . include? ( 'lightbox' )
img = a . css ( 'img[src]' ) . first
2020-11-05 04:45:50 +08:00
srcset = img & . attributes [ 'srcset' ] & . value
if srcset
# if available, use the first image from the srcset here
# so we get the optimized image instead of the possibly huge original
url = srcset . split ( ',' ) . first
else
url = img [ 'src' ]
end
2022-09-29 07:24:33 +08:00
a . add_next_sibling secure_uploads_placeholder ( doc , url , width : img [ 'width' ] , height : img [ 'height' ] )
2020-11-02 07:52:21 +08:00
a . remove
else
width = non_image_media ? nil : a . at_css ( 'img' ) . attr ( 'width' )
height = non_image_media ? nil : a . at_css ( 'img' ) . attr ( 'height' )
2022-09-29 07:24:33 +08:00
target . add_next_sibling secure_uploads_placeholder ( doc , a [ 'href' ] , width : width , height : height )
2020-09-10 07:50:16 +08:00
target . remove
2019-11-18 09:25:42 +08:00
end
end
2020-11-02 07:52:21 +08:00
# images by themselves or inside a onebox
2020-09-10 07:50:16 +08:00
doc . css ( 'img[src]' ) . each do | img |
2020-11-04 00:53:15 +08:00
url = if img . parent . classes . include? ( " aspect-image " ) && img . attributes [ " srcset " ] . present?
2020-11-02 07:52:21 +08:00
# we are using the first image from the srcset here so we get the
# optimized image instead of the original, because an optimized
# image may be used for the onebox thumbnail
srcset = img . attributes [ " srcset " ] . value
srcset . split ( " , " ) . first
else
img [ 'src' ]
end
2020-11-10 10:55:18 +08:00
width = img [ 'width' ]
height = img [ 'height' ]
2020-11-16 07:58:40 +08:00
onebox_type = nil
if img . ancestors . css ( " .onebox-body " ) . any?
if img . classes . include? ( " onebox-avatar-inline " )
onebox_type = " avatar-inline "
else
onebox_type = " thumbnail "
end
end
2020-11-10 10:55:18 +08:00
# we always want this to be tiny and without any special styles
if img . classes . include? ( 'site-icon' )
2020-11-16 07:58:40 +08:00
onebox_type = nil
2020-11-10 10:55:18 +08:00
width = 16
height = 16
end
2020-11-02 07:52:21 +08:00
2022-09-29 07:24:33 +08:00
if Upload . secure_uploads_url? ( url )
img . add_next_sibling secure_uploads_placeholder ( doc , url , onebox_type : onebox_type , width : width , height : height )
2020-09-10 07:50:16 +08:00
img . remove
end
end
end
2022-09-29 07:24:33 +08:00
def self . secure_uploads_placeholder ( doc , url , onebox_type : false , width : nil , height : nil )
2020-10-22 10:25:09 +08:00
data_width = width ? " data-width= #{ width } " : ''
data_height = height ? " data-height= #{ height } " : ''
2020-11-16 07:58:40 +08:00
data_onebox_type = onebox_type ? " data-onebox-type=' #{ onebox_type } ' " : ''
2020-09-10 07:50:16 +08:00
<< ~ HTML
2022-09-29 07:24:33 +08:00
< div class = " secure-upload-notice " data - stripped - secure - upload = " #{ url } " #{data_onebox_type} #{data_width} #{data_height}>
#{I18n.t('emails.secure_uploads_placeholder')} <a class='stripped-secure-view-upload' href="#{url}">#{I18n.t("emails.view_redacted_media")}</a>.
2020-09-10 07:50:16 +08:00
< / div>
HTML
2019-11-18 09:25:42 +08:00
end
2016-06-21 23:12:30 +08:00
def self . format_for_email ( html , post = nil )
2020-05-05 11:46:57 +08:00
doc = Nokogiri :: HTML5 . fragment ( html )
2016-06-21 23:12:30 +08:00
DiscourseEvent . trigger ( :reduce_cooked , doc , post )
2022-09-29 07:24:33 +08:00
strip_secure_uploads ( doc ) if post & . with_secure_uploads?
2016-06-21 23:12:30 +08:00
strip_image_wrapping ( doc )
2018-05-10 01:24:44 +08:00
convert_vimeo_iframes ( doc )
2016-06-21 23:12:30 +08:00
make_all_links_absolute ( doc )
doc . to_html
2013-11-29 04:57:21 +08:00
end
2013-05-28 07:48:47 +08:00
protected
2013-02-06 03:16:51 +08:00
2014-02-04 08:12:53 +08:00
class JavaScriptError < StandardError
attr_accessor :message , :backtrace
def initialize ( message , backtrace )
@message = message
@backtrace = backtrace
end
end
def self . protect
rval = nil
@mutex . synchronize do
2016-05-19 20:25:08 +08:00
rval = yield
2014-02-04 08:12:53 +08:00
end
rval
end
2013-08-16 06:12:10 +08:00
def self . ctx_load ( ctx , * files )
2013-05-28 07:48:47 +08:00
files . each do | file |
2013-08-16 06:12:10 +08:00
ctx . load ( app_root + file )
2013-02-06 03:16:51 +08:00
end
end
2018-11-22 14:28:48 +08:00
private
USER_TYPE || = 'user'
GROUP_TYPE || = 'group'
2020-02-19 04:45:02 +08:00
GROUP_MENTIONABLE_TYPE || = 'group-mentionable'
2018-11-22 14:28:48 +08:00
2018-11-22 16:01:03 +08:00
def self . add_mentions ( doc , user_id : nil )
2018-11-22 14:28:48 +08:00
elements = doc . css ( " span.mention " )
2018-11-23 08:31:52 +08:00
names = elements . map { | element | element . text [ 1 .. - 1 ] }
2018-11-22 14:28:48 +08:00
2018-11-22 16:01:03 +08:00
mentions = lookup_mentions ( names , user_id : user_id )
2018-11-22 14:28:48 +08:00
2020-02-11 01:31:42 +08:00
elements . each do | element |
2018-11-22 14:28:48 +08:00
name = element . text [ 1 .. - 1 ]
name . downcase!
if type = mentions [ name ]
element . name = 'a'
element . children = PrettyText :: Helpers . format_username (
element . children . text
)
case type
when USER_TYPE
2020-10-09 19:51:24 +08:00
element [ 'href' ] = " #{ Discourse . base_path } /u/ #{ UrlHelper . encode_component ( name ) } "
2020-02-19 04:45:02 +08:00
when GROUP_MENTIONABLE_TYPE
element [ 'class' ] = 'mention-group notify'
2020-10-09 19:51:24 +08:00
element [ 'href' ] = " #{ Discourse . base_path } /groups/ #{ UrlHelper . encode_component ( name ) } "
2018-11-22 14:28:48 +08:00
when GROUP_TYPE
element [ 'class' ] = 'mention-group'
2020-10-09 19:51:24 +08:00
element [ 'href' ] = " #{ Discourse . base_path } /groups/ #{ UrlHelper . encode_component ( name ) } "
2018-11-22 14:28:48 +08:00
end
end
end
end
2018-11-22 16:01:03 +08:00
def self . lookup_mentions ( names , user_id : nil )
2018-11-22 16:42:56 +08:00
return { } if names . blank?
2018-11-22 14:28:48 +08:00
sql = << ~ SQL
(
SELECT
:user_type AS type ,
username_lower AS name
FROM users
2018-11-22 15:00:46 +08:00
WHERE username_lower IN ( :names ) AND staged = false
2018-11-22 14:28:48 +08:00
)
UNION
(
SELECT
:group_type AS type ,
2018-11-26 23:34:56 +08:00
lower ( name ) AS name
2018-11-22 14:28:48 +08:00
FROM groups
)
2020-02-19 04:45:02 +08:00
UNION
(
SELECT
:group_mentionable_type AS type ,
lower ( name ) AS name
FROM groups
WHERE lower ( name ) IN ( :names ) AND ( #{Group.mentionable_sql_clause(include_public: false)})
)
ORDER BY type
2018-11-22 14:28:48 +08:00
SQL
2018-11-22 16:01:03 +08:00
user = User . find_by ( id : user_id )
2018-11-22 16:32:56 +08:00
names . each ( & :downcase! )
2018-11-22 16:01:03 +08:00
2018-11-22 14:28:48 +08:00
results = DB . query ( sql ,
names : names ,
user_type : USER_TYPE ,
2018-11-22 16:01:03 +08:00
group_type : GROUP_TYPE ,
2020-02-19 04:45:02 +08:00
group_mentionable_type : GROUP_MENTIONABLE_TYPE ,
2018-11-22 16:01:03 +08:00
levels : Group . alias_levels ( user ) ,
user_id : user_id
2018-11-22 14:28:48 +08:00
)
mentions = { }
results . each { | result | mentions [ result . name ] = result . type }
mentions
end
FEATURE: Allow hotlinked media to be blocked (#16940)
This commit introduces a new site setting: `block_hotlinked_media`. When enabled, all attempts to hotlink media (images, videos, and audio) will fail, and be replaced with a linked placeholder. Exceptions to the rule can be added via `block_hotlinked_media_exceptions`.
`download_remote_image_to_local` can be used alongside this feature. In that case, hotlinked images will be blocked immediately when the post is created, but will then be replaced with the downloaded version a few seconds later.
This implementation is purely server-side, and does not impact the composer preview.
Technically, there are two stages to this feature:
1. `PrettyText.sanitize_hotlinked_media` is called during `PrettyText.cook`, and whenever new images are introduced by Onebox. It will iterate over all src/srcset attributes in the post HTML and check if they're allowed. If not, the attributes will be removed and replaced with a `data-blocked-hotlinked-src(set)` attribute
2. In the `CookedPostProcessor`, we iterate over all `data-blocked-hotlinked-src(set)` attributes and check whether we have a downloaded version of the media. If yes, we update the src to use the downloaded version. If not, the entire media element is replaced with a placeholder. The placeholder is labelled 'external media', and is a link to the offsite media.
2022-06-07 22:23:04 +08:00
def self . allowed_src_pattern
allowed_src_prefixes = [
Discourse . base_path ,
Discourse . base_url ,
GlobalSetting . s3_cdn_url ,
GlobalSetting . cdn_url ,
SiteSetting . external_emoji_url . presence ,
* SiteSetting . block_hotlinked_media_exceptions . split ( " | " )
]
patterns = allowed_src_prefixes . compact . map do | url |
pattern = Regexp . escape ( url )
# If 'https://example.com' is allowed, ensure 'https://example.com.blah.com' is not
pattern += '(?:/|\z)' if ! pattern . ends_with? ( " \ / " )
pattern
end
/ \ A(data:| #{ patterns . join ( " | " ) } ) /
end
2013-02-06 03:16:51 +08:00
end