2019-05-03 06:17:27 +08:00
# frozen_string_literal: true
2018-03-15 22:27:55 +08:00
require 'uri'
2017-05-23 04:42:19 +08:00
2016-10-25 07:25:44 +08:00
Dir [ " #{ Rails . root } /lib/onebox/engine/*_onebox.rb " ] . sort . each { | f | require f }
2014-02-26 02:35:08 +08:00
2013-02-06 03:16:51 +08:00
module Oneboxer
2018-11-27 16:00:31 +08:00
ONEBOX_CSS_CLASS = " onebox "
2019-10-31 21:13:24 +08:00
AUDIO_REGEX = / ^ \ .(mp3|og[ga]|opus|wav|m4[abpr]|aac|flac)$ /i
2020-01-23 07:41:39 +08:00
VIDEO_REGEX = / ^ \ .(mov|mp4|webm|m4v|3gp|ogv|avi|mpeg|ogv)$ /i
2018-11-27 16:00:31 +08:00
2013-04-30 10:43:21 +08:00
# keep reloaders happy
unless defined? Oneboxer :: Result
Result = Struct . new ( :doc , :changed ) do
def to_html
doc . to_html
end
2013-04-10 15:52:38 +08:00
2013-04-30 10:43:21 +08:00
def changed?
changed
end
2013-04-10 15:52:38 +08:00
end
end
2017-06-27 03:38:23 +08:00
def self . ignore_redirects
2019-11-27 17:22:28 +08:00
@ignore_redirects || = [ 'http://www.dropbox.com' , 'http://store.steampowered.com' , 'http://vimeo.com' , Discourse . base_url ]
2017-06-27 03:38:23 +08:00
end
2017-08-08 17:44:27 +08:00
def self . force_get_hosts
2020-11-19 01:55:16 +08:00
@force_get_hosts || = [ 'http://us.battle.net' , 'https://news.yahoo.com/' ]
2017-08-08 17:44:27 +08:00
end
2019-11-07 17:14:43 +08:00
def self . force_custom_user_agent_hosts
2020-02-07 00:56:54 +08:00
SiteSetting . force_custom_user_agent_hosts . split ( '|' )
2019-11-07 17:14:43 +08:00
end
2018-10-10 18:39:03 +08:00
def self . allowed_post_types
@allowed_post_types || = [ Post . types [ :regular ] , Post . types [ :moderator_action ] ]
end
2014-01-28 04:09:09 +08:00
def self . preview ( url , options = nil )
options || = { }
2016-12-20 07:31:10 +08:00
invalidate ( url ) if options [ :invalidate_oneboxes ]
2018-02-14 07:39:44 +08:00
onebox_raw ( url , options ) [ :preview ]
2013-02-06 03:16:51 +08:00
end
2014-01-28 04:09:09 +08:00
def self . onebox ( url , options = nil )
options || = { }
2016-12-20 07:31:10 +08:00
invalidate ( url ) if options [ :invalidate_oneboxes ]
2018-02-14 07:39:44 +08:00
onebox_raw ( url , options ) [ :onebox ]
2014-03-18 10:12:58 +08:00
end
def self . cached_onebox ( url )
2019-11-27 09:35:14 +08:00
if c = Discourse . cache . read ( onebox_cache_key ( url ) )
2014-04-01 12:29:14 +08:00
c [ :onebox ]
end
2014-05-28 15:15:10 +08:00
rescue = > e
invalidate ( url )
Rails . logger . warn ( " invalid cached onebox for #{ url } #{ e } " )
" "
2014-03-18 10:12:58 +08:00
end
def self . cached_preview ( url )
2019-11-27 09:35:14 +08:00
if c = Discourse . cache . read ( onebox_cache_key ( url ) )
2014-04-01 12:29:14 +08:00
c [ :preview ]
end
2014-05-28 15:15:10 +08:00
rescue = > e
invalidate ( url )
Rails . logger . warn ( " invalid cached preview for #{ url } #{ e } " )
" "
2013-08-14 23:05:53 +08:00
end
2014-01-29 02:18:19 +08:00
def self . invalidate ( url )
2019-11-27 09:35:14 +08:00
Discourse . cache . delete ( onebox_cache_key ( url ) )
2019-11-28 05:48:29 +08:00
Discourse . cache . delete ( onebox_failed_cache_key ( url ) )
2013-02-06 03:16:51 +08:00
end
2014-01-29 02:18:19 +08:00
2013-02-06 03:16:51 +08:00
# Parse URLs out of HTML, returning the document when finished.
2018-11-27 16:00:31 +08:00
def self . each_onebox_link ( string_or_doc , extra_paths : [ ] )
2013-02-06 03:16:51 +08:00
doc = string_or_doc
2020-05-05 11:46:57 +08:00
doc = Nokogiri :: HTML5 :: fragment ( doc ) if doc . is_a? ( String )
2013-02-06 03:16:51 +08:00
2018-11-27 16:00:31 +08:00
onebox_links = doc . css ( " a. #{ ONEBOX_CSS_CLASS } " , * extra_paths )
2013-02-06 03:16:51 +08:00
if onebox_links . present?
onebox_links . each do | link |
2016-11-04 05:48:32 +08:00
yield ( link [ 'href' ] , link ) if link [ 'href' ] . present?
2013-02-06 03:16:51 +08:00
end
end
doc
end
2018-04-12 03:33:45 +08:00
HTML5_BLOCK_ELEMENTS || = %w{ address article aside blockquote canvas center dd div dl dt fieldset figcaption figure footer form h1 h2 h3 h4 h5 h6 header hgroup hr li main nav noscript ol output p pre section table tfoot ul video }
2018-11-27 16:00:31 +08:00
def self . apply ( string_or_doc , extra_paths : nil )
2013-04-10 15:52:38 +08:00
doc = string_or_doc
2020-05-05 11:46:57 +08:00
doc = Nokogiri :: HTML5 :: fragment ( doc ) if doc . is_a? ( String )
2013-04-10 15:52:38 +08:00
changed = false
2018-11-27 16:00:31 +08:00
each_onebox_link ( doc , extra_paths : extra_paths ) do | url , element |
2018-02-14 07:39:44 +08:00
onebox , _ = yield ( url , element )
2020-12-14 23:49:37 +08:00
next if onebox . blank?
parsed_onebox = Nokogiri :: HTML5 :: fragment ( onebox )
next if parsed_onebox . children . blank?
changed = true
parent = element . parent
if parent & . node_name & . downcase == " p " &&
parsed_onebox . children . any? { | child | HTML5_BLOCK_ELEMENTS . include? ( child . node_name . downcase ) }
siblings = parent . children
element_idx = siblings . find_index ( element )
before_idx = first_significant_element_index ( siblings , element_idx - 1 , - 1 )
after_idx = first_significant_element_index ( siblings , element_idx + 1 , + 1 )
if before_idx < 0 && after_idx > = siblings . size
parent . replace parsed_onebox
elsif before_idx < 0
parent . children = siblings [ after_idx .. siblings . size ]
parent . add_previous_sibling ( parsed_onebox )
elsif after_idx > = siblings . size
parent . children = siblings [ 0 .. before_idx ]
parent . add_next_sibling ( parsed_onebox )
else
parent_rest = parent . dup
parent . children = siblings [ 0 .. before_idx ]
parent_rest . children = siblings [ after_idx .. siblings . size ]
parent . add_next_sibling ( parent_rest )
parent . add_next_sibling ( parsed_onebox )
2013-04-10 15:52:38 +08:00
end
2020-12-14 23:49:37 +08:00
else
element . replace parsed_onebox
2013-04-10 15:52:38 +08:00
end
end
2020-12-14 23:49:37 +08:00
Result . new ( doc , changed )
end
def self . first_significant_element_index ( elements , index , step )
while index > = 0 && index < elements . size &&
( elements [ index ] . node_name . downcase == " br " ||
( elements [ index ] . node_name . downcase == " text " && elements [ index ] . to_html . strip . blank? ) )
index = index + step
2020-06-29 17:51:16 +08:00
end
2020-12-14 23:49:37 +08:00
index
2013-04-10 15:52:38 +08:00
end
2016-12-20 07:31:10 +08:00
def self . is_previewing? ( user_id )
2019-12-03 17:05:53 +08:00
Discourse . redis . get ( preview_key ( user_id ) ) == " 1 "
2016-12-20 07:31:10 +08:00
end
def self . preview_onebox! ( user_id )
2019-12-03 17:05:53 +08:00
Discourse . redis . setex ( preview_key ( user_id ) , 1 . minute , " 1 " )
2016-12-20 07:31:10 +08:00
end
def self . onebox_previewed! ( user_id )
2019-12-03 17:05:53 +08:00
Discourse . redis . del ( preview_key ( user_id ) )
2016-12-20 07:31:10 +08:00
end
2017-01-06 10:01:14 +08:00
def self . engine ( url )
2020-08-28 03:12:13 +08:00
Onebox :: Matcher . new ( url , {
allowed_iframe_regexes : Onebox :: Engine . origins_to_regexes ( allowed_iframe_origins )
} ) . oneboxed
2017-01-06 10:01:14 +08:00
end
2019-11-28 05:48:29 +08:00
def self . recently_failed? ( url )
Discourse . cache . read ( onebox_failed_cache_key ( url ) ) . present?
end
def self . cache_failed! ( url )
Discourse . cache . write ( onebox_failed_cache_key ( url ) , true , expires_in : 1 . hour )
end
2014-03-18 10:12:58 +08:00
private
2016-12-20 07:31:10 +08:00
def self . preview_key ( user_id )
2016-12-20 18:18:47 +08:00
" onebox:preview: #{ user_id } "
2016-12-20 07:31:10 +08:00
end
2016-10-24 18:46:22 +08:00
def self . blank_onebox
{ preview : " " , onebox : " " }
2014-04-10 04:57:45 +08:00
end
2016-10-24 18:46:22 +08:00
def self . onebox_cache_key ( url )
" onebox__ #{ url } "
end
2015-08-24 08:43:07 +08:00
2019-11-28 05:48:29 +08:00
def self . onebox_failed_cache_key ( url )
" onebox_failed__ #{ url } "
end
2018-02-14 07:39:44 +08:00
def self . onebox_raw ( url , opts = { } )
2020-12-04 06:16:01 +08:00
url = UrlHelper . escape_uri ( url ) . to_s
2018-02-14 07:39:44 +08:00
local_onebox ( url , opts ) || external_onebox ( url )
rescue = > e
# no point warning here, just cause we have an issue oneboxing a url
# we can later hunt for failed oneboxes by searching logs if needed
Rails . logger . info ( " Failed to onebox #{ url } #{ e } #{ e . backtrace } " )
# return a blank hash, so rest of the code works
blank_onebox
end
def self . local_onebox ( url , opts = { } )
return unless route = Discourse . route_for ( url )
html =
case route [ :controller ]
when " uploads " then local_upload_html ( url )
when " topics " then local_topic_html ( url , route , opts )
when " users " then local_user_html ( url , route )
2020-11-25 07:53:05 +08:00
when " list " then local_category_html ( url , route )
2018-02-14 07:39:44 +08:00
end
2020-12-04 06:16:01 +08:00
html = html . presence || " <a href=' #{ URI ( url ) . to_s } '> #{ URI ( url ) . to_s } </a> "
2018-02-14 07:39:44 +08:00
{ onebox : html , preview : html }
end
def self . local_upload_html ( url )
case File . extname ( URI ( url ) . path || " " )
2019-10-31 21:13:24 +08:00
when VIDEO_REGEX
2019-11-18 09:25:42 +08:00
<< ~ HTML
< div class = " onebox video-onebox " >
< video width = " 100% " height = " 100% " controls = " " >
< source src = '#{url}' >
2020-01-23 07:41:39 +08:00
< a href = '#{url}' > #{url}</a>
2019-11-18 09:25:42 +08:00
< / video>
< / div>
HTML
2019-10-31 21:13:24 +08:00
when AUDIO_REGEX
2018-02-14 07:39:44 +08:00
" <audio controls><source src=' #{ url } '><a href=' #{ url } '> #{ url } </a></audio> "
end
2018-06-07 13:28:18 +08:00
end
2018-02-14 07:39:44 +08:00
2020-02-12 18:11:28 +08:00
def self . local_topic ( url , route , opts )
if current_user = User . find_by ( id : opts [ :user_id ] )
if current_category = Category . find_by ( id : opts [ :category_id ] )
return unless Guardian . new ( current_user ) . can_see_category? ( current_category )
end
2018-02-14 07:39:44 +08:00
2020-02-12 18:11:28 +08:00
if current_topic = Topic . find_by ( id : opts [ :topic_id ] )
return unless Guardian . new ( current_user ) . can_see_topic? ( current_topic )
end
2018-02-20 05:40:14 +08:00
end
2020-06-23 23:18:38 +08:00
return unless topic = Topic . find_by ( id : route [ :id ] || route [ :topic_id ] )
2018-02-16 05:56:13 +08:00
return if topic . private_message?
2018-02-16 05:00:06 +08:00
2020-02-12 18:11:28 +08:00
if current_category . blank? || current_category . id != topic . category_id
2018-02-16 05:56:13 +08:00
return unless Guardian . new . can_see_topic? ( topic )
end
2020-02-12 18:11:28 +08:00
topic
end
def self . local_topic_html ( url , route , opts )
return unless topic = local_topic ( url , route , opts )
2018-02-16 05:56:13 +08:00
post_number = route [ :post_number ] . to_i
2018-02-16 18:21:11 +08:00
post = post_number > 1 ?
topic . posts . where ( post_number : post_number ) . first :
topic . ordered_posts . first
2018-02-16 05:56:13 +08:00
2018-10-10 18:39:03 +08:00
return if ! post || post . hidden || ! allowed_post_types . include? ( post . post_type )
2018-02-14 07:39:44 +08:00
2020-02-12 18:11:28 +08:00
if post_number > 1 && opts [ :topic_id ] == topic . id
2018-02-14 07:39:44 +08:00
excerpt = post . excerpt ( SiteSetting . post_onebox_maxlength )
excerpt . gsub! ( / [ \ r \ n]+ / , " " )
excerpt . gsub! ( " [/quote] " , " [quote] " ) # don't break my quote
quote = " [quote= \" #{ post . user . username } , topic: #{ topic . id } , post: #{ post . post_number } \" ] \n #{ excerpt } \n [/quote] "
PrettyText . cook ( quote )
else
args = {
topic_id : topic . id ,
2018-02-26 23:05:35 +08:00
post_number : post . post_number ,
2018-02-21 02:49:39 +08:00
avatar : PrettyText . avatar_img ( post . user . avatar_template , " tiny " ) ,
2018-02-14 07:39:44 +08:00
original_url : url ,
title : PrettyText . unescape_emoji ( CGI :: escapeHTML ( topic . title ) ) ,
category_html : CategoryBadge . html_for ( topic . category ) ,
2018-02-26 18:16:53 +08:00
quote : PrettyText . unescape_emoji ( post . excerpt ( SiteSetting . post_onebox_maxlength ) ) ,
2018-02-14 07:39:44 +08:00
}
2020-11-19 01:55:16 +08:00
template = template ( " discourse_topic_onebox " )
2018-02-14 07:39:44 +08:00
Mustache . render ( template , args )
end
2018-06-07 13:28:18 +08:00
end
2018-02-14 07:39:44 +08:00
def self . local_user_html ( url , route )
username = route [ :username ] || " "
2018-06-07 13:28:18 +08:00
2018-02-14 07:39:44 +08:00
if user = User . find_by ( username_lower : username . downcase )
2019-03-25 15:20:14 +08:00
name = user . name if SiteSetting . enable_names
2018-02-14 07:39:44 +08:00
args = {
user_id : user . id ,
username : user . username ,
avatar : PrettyText . avatar_img ( user . avatar_template , " extra_large " ) ,
2019-03-25 15:20:14 +08:00
name : name ,
2018-02-14 07:39:44 +08:00
bio : user . user_profile . bio_excerpt ( 230 ) ,
2019-09-18 04:12:50 +08:00
location : Onebox :: Helpers . sanitize ( user . user_profile . location ) ,
2018-02-14 07:39:44 +08:00
joined : I18n . t ( 'joined' ) ,
created_at : user . created_at . strftime ( I18n . t ( 'datetime_formats.formats.date_only' ) ) ,
website : user . user_profile . website ,
website_name : UserSerializer . new ( user ) . website_name ,
original_url : url
}
2018-06-07 13:28:18 +08:00
2020-11-19 01:55:16 +08:00
Mustache . render ( template ( " discourse_user_onebox " ) , args )
2018-02-14 07:39:44 +08:00
else
nil
end
2018-06-07 13:28:18 +08:00
end
2018-02-14 07:39:44 +08:00
2020-11-25 07:53:05 +08:00
def self . local_category_html ( url , route )
return unless route [ :category_slug_path_with_id ]
category = Category . find_by_slug_path_with_id ( route [ :category_slug_path_with_id ] )
if Guardian . new . can_see_category? ( category )
args = {
url : category . url ,
name : category . name ,
color : category . color ,
logo_url : category . uploaded_logo & . url ,
description : category . description ,
has_subcategories : category . subcategories . present? ,
subcategories : category . subcategories . collect { | sc | { name : sc . name , color : sc . color , url : sc . url } }
}
Mustache . render ( template ( " discourse_category_onebox " ) , args )
end
end
2020-07-27 08:23:54 +08:00
def self . blocked_domains
SiteSetting . blocked_onebox_domains . split ( " | " )
2018-09-18 02:00:16 +08:00
end
2018-12-19 14:27:07 +08:00
def self . preserve_fragment_url_hosts
2018-12-19 20:07:39 +08:00
@preserve_fragment_url_hosts || = [ 'http://github.com' ]
2018-12-19 14:27:07 +08:00
end
2020-08-28 03:12:13 +08:00
def self . allowed_iframe_origins
allowed = SiteSetting . allowed_onebox_iframes . split ( " | " )
if allowed . include? ( " * " )
allowed = Onebox :: Engine . all_iframe_origins
end
allowed += SiteSetting . allowed_iframes . split ( " | " )
end
2018-02-14 07:39:44 +08:00
def self . external_onebox ( url )
2019-11-27 09:35:14 +08:00
Discourse . cache . fetch ( onebox_cache_key ( url ) , expires_in : 1 . day ) do
2019-11-07 17:14:43 +08:00
fd = FinalDestination . new ( url ,
2020-11-19 01:55:16 +08:00
ignore_redirects : ignore_redirects ,
ignore_hostnames : blocked_domains ,
force_get_hosts : force_get_hosts ,
force_custom_user_agent_hosts : force_custom_user_agent_hosts ,
preserve_fragment_url_hosts : preserve_fragment_url_hosts )
2017-06-07 03:02:11 +08:00
uri = fd . resolve
2020-11-19 01:55:16 +08:00
if fd . status != :resolved
args = { link : url }
if fd . status == :invalid_address
args [ :error_message ] = I18n . t ( " errors.onebox.invalid_address " , hostname : fd . hostname )
elsif fd . status_code
args [ :error_message ] = I18n . t ( " errors.onebox.error_response " , status_code : fd . status_code )
end
error_box = blank_onebox
error_box [ :preview ] = preview_error_onebox ( args )
return error_box
end
2020-07-27 08:23:54 +08:00
return blank_onebox if uri . blank? || blocked_domains . map { | hostname | uri . hostname . match? ( hostname ) } . any?
2017-12-19 01:31:41 +08:00
2017-06-07 03:02:11 +08:00
options = {
max_width : 695 ,
2020-06-24 11:00:00 +08:00
sanitize_config : Onebox :: DiscourseOneboxSanitizeConfig :: Config :: DISCOURSE_ONEBOX ,
2020-08-28 03:12:13 +08:00
allowed_iframe_origins : allowed_iframe_origins ,
2020-06-24 11:00:00 +08:00
hostname : GlobalSetting . hostname ,
2020-11-19 01:55:16 +08:00
facebook_app_access_token : SiteSetting . facebook_app_access_token ,
2017-06-07 03:02:11 +08:00
}
options [ :cookie ] = fd . cookie if fd . cookie
2017-06-07 04:39:15 +08:00
r = Onebox . preview ( uri . to_s , options )
2020-11-19 01:55:16 +08:00
result = { onebox : r . to_s , preview : r & . placeholder_html . to_s }
# NOTE: Call r.errors after calling placeholder_html
if r . errors . any?
missing_attributes = r . errors . keys . map ( & :to_s ) . sort . join ( I18n . t ( " word_connector.comma " ) )
error_message = I18n . t ( " errors.onebox.missing_data " , missing_attributes : missing_attributes , count : r . errors . keys . size )
args = r . data . merge ( error_message : error_message )
if result [ :preview ] . blank?
result [ :preview ] = preview_error_onebox ( args )
else
doc = Nokogiri :: HTML5 :: fragment ( result [ :preview ] )
aside = doc . at ( 'aside' )
if aside
# Add an error message to the preview that was returned
error_fragment = preview_error_onebox_fragment ( args )
aside . add_child ( error_fragment )
result [ :preview ] = doc . to_html
end
end
end
result
end
end
def self . preview_error_onebox ( args , is_fragment = false )
args [ :title ] || = args [ :link ] if args [ :link ]
args [ :error_message ] = PrettyText . unescape_emoji ( args [ :error_message ] ) if args [ :error_message ]
template_name = is_fragment ? " preview_error_fragment_onebox " : " preview_error_onebox "
Mustache . render ( template ( template_name ) , args )
end
def self . preview_error_onebox_fragment ( args )
preview_error_onebox ( args , true )
end
2017-12-19 01:31:41 +08:00
2020-11-19 01:55:16 +08:00
def self . template ( template_name )
@template_cache || = { }
@template_cache [ template_name ] || = begin
full_path = " #{ Rails . root } /lib/onebox/templates/ #{ template_name } .mustache "
File . read ( full_path )
2016-10-24 18:46:22 +08:00
end
2018-06-07 13:28:18 +08:00
end
2014-03-18 10:12:58 +08:00
2013-02-06 03:16:51 +08:00
end