2019-05-03 06:17:27 +08:00
# frozen_string_literal: true
2018-03-15 22:27:55 +08:00
require 'uri'
2017-05-23 04:42:19 +08:00
2016-10-25 07:25:44 +08:00
Dir [ " #{ Rails . root } /lib/onebox/engine/*_onebox.rb " ] . sort . each { | f | require f }
2014-02-26 02:35:08 +08:00
2013-02-06 03:16:51 +08:00
module Oneboxer
2018-11-27 16:00:31 +08:00
ONEBOX_CSS_CLASS = " onebox "
2019-10-31 21:13:24 +08:00
AUDIO_REGEX = / ^ \ .(mp3|og[ga]|opus|wav|m4[abpr]|aac|flac)$ /i
2020-01-23 07:41:39 +08:00
VIDEO_REGEX = / ^ \ .(mov|mp4|webm|m4v|3gp|ogv|avi|mpeg|ogv)$ /i
2018-11-27 16:00:31 +08:00
2013-04-30 10:43:21 +08:00
# keep reloaders happy
unless defined? Oneboxer :: Result
Result = Struct . new ( :doc , :changed ) do
def to_html
doc . to_html
end
2013-04-10 15:52:38 +08:00
2013-04-30 10:43:21 +08:00
def changed?
changed
end
2013-04-10 15:52:38 +08:00
end
end
2017-06-27 03:38:23 +08:00
def self . ignore_redirects
2019-11-27 17:22:28 +08:00
@ignore_redirects || = [ 'http://www.dropbox.com' , 'http://store.steampowered.com' , 'http://vimeo.com' , Discourse . base_url ]
2017-06-27 03:38:23 +08:00
end
2017-08-08 17:44:27 +08:00
def self . force_get_hosts
@force_get_hosts || = [ 'http://us.battle.net' ]
end
2019-11-07 17:14:43 +08:00
def self . force_custom_user_agent_hosts
2020-02-07 00:56:54 +08:00
SiteSetting . force_custom_user_agent_hosts . split ( '|' )
2019-11-07 17:14:43 +08:00
end
2018-10-10 18:39:03 +08:00
def self . allowed_post_types
@allowed_post_types || = [ Post . types [ :regular ] , Post . types [ :moderator_action ] ]
end
2017-07-28 09:20:09 +08:00
def self . preview ( url , options = nil )
2014-01-28 04:09:09 +08:00
options || = { }
2016-12-20 07:31:10 +08:00
invalidate ( url ) if options [ :invalidate_oneboxes ]
2018-02-14 07:39:44 +08:00
onebox_raw ( url , options ) [ :preview ]
2013-02-06 03:16:51 +08:00
end
2017-07-28 09:20:09 +08:00
def self . onebox ( url , options = nil )
2014-01-28 04:09:09 +08:00
options || = { }
2016-12-20 07:31:10 +08:00
invalidate ( url ) if options [ :invalidate_oneboxes ]
2018-02-14 07:39:44 +08:00
onebox_raw ( url , options ) [ :onebox ]
2014-03-18 10:12:58 +08:00
end
def self . cached_onebox ( url )
2019-11-27 09:35:14 +08:00
if c = Discourse . cache . read ( onebox_cache_key ( url ) )
2014-04-01 12:29:14 +08:00
c [ :onebox ]
end
2014-05-28 15:15:10 +08:00
rescue = > e
invalidate ( url )
Rails . logger . warn ( " invalid cached onebox for #{ url } #{ e } " )
" "
2014-03-18 10:12:58 +08:00
end
def self . cached_preview ( url )
2019-11-27 09:35:14 +08:00
if c = Discourse . cache . read ( onebox_cache_key ( url ) )
2014-04-01 12:29:14 +08:00
c [ :preview ]
end
2014-05-28 15:15:10 +08:00
rescue = > e
invalidate ( url )
Rails . logger . warn ( " invalid cached preview for #{ url } #{ e } " )
" "
2013-08-14 23:05:53 +08:00
end
2014-01-29 02:18:19 +08:00
def self . invalidate ( url )
2019-11-27 09:35:14 +08:00
Discourse . cache . delete ( onebox_cache_key ( url ) )
2019-11-28 05:48:29 +08:00
Discourse . cache . delete ( onebox_failed_cache_key ( url ) )
2013-02-06 03:16:51 +08:00
end
2014-01-29 02:18:19 +08:00
2013-02-06 03:16:51 +08:00
# Parse URLs out of HTML, returning the document when finished.
2018-11-27 16:00:31 +08:00
def self . each_onebox_link ( string_or_doc , extra_paths : [ ] )
2013-02-06 03:16:51 +08:00
doc = string_or_doc
2013-04-10 15:52:38 +08:00
doc = Nokogiri :: HTML :: fragment ( doc ) if doc . is_a? ( String )
2013-02-06 03:16:51 +08:00
2018-11-27 16:00:31 +08:00
onebox_links = doc . css ( " a. #{ ONEBOX_CSS_CLASS } " , * extra_paths )
2013-02-06 03:16:51 +08:00
if onebox_links . present?
onebox_links . each do | link |
2016-11-04 05:48:32 +08:00
yield ( link [ 'href' ] , link ) if link [ 'href' ] . present?
2013-02-06 03:16:51 +08:00
end
end
doc
end
2018-04-12 03:33:45 +08:00
HTML5_BLOCK_ELEMENTS || = %w{ address article aside blockquote canvas center dd div dl dt fieldset figcaption figure footer form h1 h2 h3 h4 h5 h6 header hgroup hr li main nav noscript ol output p pre section table tfoot ul video }
2018-11-27 16:00:31 +08:00
def self . apply ( string_or_doc , extra_paths : nil )
2013-04-10 15:52:38 +08:00
doc = string_or_doc
doc = Nokogiri :: HTML :: fragment ( doc ) if doc . is_a? ( String )
changed = false
2018-11-27 16:00:31 +08:00
each_onebox_link ( doc , extra_paths : extra_paths ) do | url , element |
2018-02-14 07:39:44 +08:00
onebox , _ = yield ( url , element )
2018-11-27 16:00:31 +08:00
2013-04-10 15:52:38 +08:00
if onebox
parsed_onebox = Nokogiri :: HTML :: fragment ( onebox )
2013-05-01 14:37:27 +08:00
next unless parsed_onebox . children . count > 0
2013-04-10 15:52:38 +08:00
2018-04-12 03:33:45 +08:00
if element & . parent & . node_name & . downcase == " p " &&
element . parent . children . count == 1 &&
HTML5_BLOCK_ELEMENTS . include? ( parsed_onebox . children [ 0 ] . node_name . downcase )
2013-05-01 14:37:27 +08:00
element = element . parent
2013-04-10 15:52:38 +08:00
end
2018-02-14 07:39:44 +08:00
2013-04-10 15:52:38 +08:00
changed = true
element . swap parsed_onebox . to_html
end
end
2018-04-12 03:33:45 +08:00
# strip empty <p> elements
2020-01-29 08:37:04 +08:00
doc . css ( " p " ) . each do | p |
if p . children . empty? && doc . children . count > 1
p . remove
end
end
2018-04-12 03:33:45 +08:00
2013-04-10 15:52:38 +08:00
Result . new ( doc , changed )
end
2016-12-20 07:31:10 +08:00
def self . is_previewing? ( user_id )
2019-12-03 17:05:53 +08:00
Discourse . redis . get ( preview_key ( user_id ) ) == " 1 "
2016-12-20 07:31:10 +08:00
end
def self . preview_onebox! ( user_id )
2019-12-03 17:05:53 +08:00
Discourse . redis . setex ( preview_key ( user_id ) , 1 . minute , " 1 " )
2016-12-20 07:31:10 +08:00
end
def self . onebox_previewed! ( user_id )
2019-12-03 17:05:53 +08:00
Discourse . redis . del ( preview_key ( user_id ) )
2016-12-20 07:31:10 +08:00
end
2017-01-06 10:01:14 +08:00
def self . engine ( url )
Onebox :: Matcher . new ( url ) . oneboxed
end
2019-11-28 05:48:29 +08:00
def self . recently_failed? ( url )
Discourse . cache . read ( onebox_failed_cache_key ( url ) ) . present?
end
def self . cache_failed! ( url )
Discourse . cache . write ( onebox_failed_cache_key ( url ) , true , expires_in : 1 . hour )
end
2014-03-18 10:12:58 +08:00
private
2018-06-07 13:28:18 +08:00
def self . preview_key ( user_id )
" onebox:preview: #{ user_id } "
end
2016-12-20 07:31:10 +08:00
2018-06-07 13:28:18 +08:00
def self . blank_onebox
{ preview : " " , onebox : " " }
end
2014-04-10 04:57:45 +08:00
2018-06-07 13:28:18 +08:00
def self . onebox_cache_key ( url )
" onebox__ #{ url } "
end
2015-08-24 08:43:07 +08:00
2019-11-28 05:48:29 +08:00
def self . onebox_failed_cache_key ( url )
" onebox_failed__ #{ url } "
end
2018-06-07 13:28:18 +08:00
def self . onebox_raw ( url , opts = { } )
url = URI ( url ) . to_s
local_onebox ( url , opts ) || external_onebox ( url )
rescue = > e
# no point warning here, just cause we have an issue oneboxing a url
# we can later hunt for failed oneboxes by searching logs if needed
Rails . logger . info ( " Failed to onebox #{ url } #{ e } #{ e . backtrace } " )
# return a blank hash, so rest of the code works
blank_onebox
end
2018-02-14 07:39:44 +08:00
2018-06-07 13:28:18 +08:00
def self . local_onebox ( url , opts = { } )
return unless route = Discourse . route_for ( url )
2018-02-14 07:39:44 +08:00
2018-06-07 13:28:18 +08:00
html =
case route [ :controller ]
when " uploads " then local_upload_html ( url )
when " topics " then local_topic_html ( url , route , opts )
when " users " then local_user_html ( url , route )
end
2018-02-14 07:39:44 +08:00
2018-06-07 13:28:18 +08:00
html = html . presence || " <a href=' #{ url } '> #{ url } </a> "
{ onebox : html , preview : html }
end
2018-02-14 07:39:44 +08:00
2018-06-07 13:28:18 +08:00
def self . local_upload_html ( url )
case File . extname ( URI ( url ) . path || " " )
2019-10-31 21:13:24 +08:00
when VIDEO_REGEX
2019-11-18 09:25:42 +08:00
<< ~ HTML
< div class = " onebox video-onebox " >
< video width = " 100% " height = " 100% " controls = " " >
< source src = '#{url}' >
2020-01-23 07:41:39 +08:00
< a href = '#{url}' > #{url}</a>
2019-11-18 09:25:42 +08:00
< / video>
< / div>
HTML
2019-10-31 21:13:24 +08:00
when AUDIO_REGEX
2018-06-07 13:28:18 +08:00
" <audio controls><source src=' #{ url } '><a href=' #{ url } '> #{ url } </a></audio> "
2018-02-14 07:39:44 +08:00
end
2018-06-07 13:28:18 +08:00
end
2018-02-14 07:39:44 +08:00
2020-02-12 18:11:28 +08:00
def self . local_topic ( url , route , opts )
if current_user = User . find_by ( id : opts [ :user_id ] )
if current_category = Category . find_by ( id : opts [ :category_id ] )
return unless Guardian . new ( current_user ) . can_see_category? ( current_category )
end
2018-02-14 07:39:44 +08:00
2020-02-12 18:11:28 +08:00
if current_topic = Topic . find_by ( id : opts [ :topic_id ] )
return unless Guardian . new ( current_user ) . can_see_topic? ( current_topic )
end
2018-06-07 13:28:18 +08:00
end
2018-02-20 05:40:14 +08:00
2018-06-07 13:28:18 +08:00
topic = Topic . find_by ( id : route [ :topic_id ] )
2018-02-14 07:39:44 +08:00
2018-06-07 13:28:18 +08:00
return unless topic
return if topic . private_message?
2018-02-16 05:00:06 +08:00
2020-02-12 18:11:28 +08:00
if current_category . blank? || current_category . id != topic . category_id
2018-06-07 13:28:18 +08:00
return unless Guardian . new . can_see_topic? ( topic )
end
2018-02-16 05:56:13 +08:00
2020-02-12 18:11:28 +08:00
topic
end
def self . local_topic_html ( url , route , opts )
return unless topic = local_topic ( url , route , opts )
2018-06-07 13:28:18 +08:00
post_number = route [ :post_number ] . to_i
2018-02-16 18:21:11 +08:00
2018-06-07 13:28:18 +08:00
post = post_number > 1 ?
topic . posts . where ( post_number : post_number ) . first :
topic . ordered_posts . first
2018-02-16 05:56:13 +08:00
2018-10-10 18:39:03 +08:00
return if ! post || post . hidden || ! allowed_post_types . include? ( post . post_type )
2018-02-14 07:39:44 +08:00
2020-02-12 18:11:28 +08:00
if post_number > 1 && opts [ :topic_id ] == topic . id
2018-06-07 13:28:18 +08:00
excerpt = post . excerpt ( SiteSetting . post_onebox_maxlength )
excerpt . gsub! ( / [ \ r \ n]+ / , " " )
excerpt . gsub! ( " [/quote] " , " [quote] " ) # don't break my quote
2018-02-14 07:39:44 +08:00
2018-06-07 13:28:18 +08:00
quote = " [quote= \" #{ post . user . username } , topic: #{ topic . id } , post: #{ post . post_number } \" ] \n #{ excerpt } \n [/quote] "
2018-02-14 07:39:44 +08:00
2018-06-07 13:28:18 +08:00
PrettyText . cook ( quote )
else
args = {
topic_id : topic . id ,
post_number : post . post_number ,
avatar : PrettyText . avatar_img ( post . user . avatar_template , " tiny " ) ,
original_url : url ,
title : PrettyText . unescape_emoji ( CGI :: escapeHTML ( topic . title ) ) ,
category_html : CategoryBadge . html_for ( topic . category ) ,
quote : PrettyText . unescape_emoji ( post . excerpt ( SiteSetting . post_onebox_maxlength ) ) ,
}
2018-02-14 07:39:44 +08:00
2018-06-07 13:28:18 +08:00
template = File . read ( " #{ Rails . root } /lib/onebox/templates/discourse_topic_onebox.hbs " )
Mustache . render ( template , args )
2018-02-14 07:39:44 +08:00
end
2018-06-07 13:28:18 +08:00
end
2018-02-14 07:39:44 +08:00
2018-06-07 13:28:18 +08:00
def self . local_user_html ( url , route )
username = route [ :username ] || " "
if user = User . find_by ( username_lower : username . downcase )
2019-03-25 15:20:14 +08:00
name = user . name if SiteSetting . enable_names
2018-06-07 13:28:18 +08:00
args = {
user_id : user . id ,
username : user . username ,
avatar : PrettyText . avatar_img ( user . avatar_template , " extra_large " ) ,
2019-03-25 15:20:14 +08:00
name : name ,
2018-06-07 13:28:18 +08:00
bio : user . user_profile . bio_excerpt ( 230 ) ,
2019-09-18 04:12:50 +08:00
location : Onebox :: Helpers . sanitize ( user . user_profile . location ) ,
2018-06-07 13:28:18 +08:00
joined : I18n . t ( 'joined' ) ,
created_at : user . created_at . strftime ( I18n . t ( 'datetime_formats.formats.date_only' ) ) ,
website : user . user_profile . website ,
website_name : UserSerializer . new ( user ) . website_name ,
original_url : url
}
template = File . read ( " #{ Rails . root } /lib/onebox/templates/discourse_user_onebox.hbs " )
Mustache . render ( template , args )
else
nil
2018-02-14 07:39:44 +08:00
end
2018-06-07 13:28:18 +08:00
end
2018-02-14 07:39:44 +08:00
2018-09-18 02:00:16 +08:00
def self . blacklisted_domains
SiteSetting . onebox_domains_blacklist . split ( " | " )
end
2018-12-19 14:27:07 +08:00
def self . preserve_fragment_url_hosts
2018-12-19 20:07:39 +08:00
@preserve_fragment_url_hosts || = [ 'http://github.com' ]
2018-12-19 14:27:07 +08:00
end
2018-06-07 13:28:18 +08:00
def self . external_onebox ( url )
2019-11-27 09:35:14 +08:00
Discourse . cache . fetch ( onebox_cache_key ( url ) , expires_in : 1 . day ) do
2019-11-07 17:14:43 +08:00
fd = FinalDestination . new ( url ,
ignore_redirects : ignore_redirects ,
ignore_hostnames : blacklisted_domains ,
force_get_hosts : force_get_hosts ,
force_custom_user_agent_hosts : force_custom_user_agent_hosts ,
preserve_fragment_url_hosts : preserve_fragment_url_hosts )
2018-06-07 13:28:18 +08:00
uri = fd . resolve
2018-09-18 02:00:16 +08:00
return blank_onebox if uri . blank? || blacklisted_domains . map { | hostname | uri . hostname . match? ( hostname ) } . any?
2017-12-19 01:31:41 +08:00
2018-06-07 13:28:18 +08:00
options = {
max_width : 695 ,
2019-10-02 12:01:53 +08:00
sanitize_config : Onebox :: DiscourseOneboxSanitizeConfig :: Config :: DISCOURSE_ONEBOX
2018-06-07 13:28:18 +08:00
}
2017-06-07 03:02:11 +08:00
2018-06-07 13:28:18 +08:00
options [ :cookie ] = fd . cookie if fd . cookie
2017-06-07 03:02:11 +08:00
2018-06-07 13:28:18 +08:00
r = Onebox . preview ( uri . to_s , options )
2017-12-19 01:31:41 +08:00
2018-06-07 13:28:18 +08:00
{ onebox : r . to_s , preview : r & . placeholder_html . to_s }
2016-10-24 18:46:22 +08:00
end
2018-06-07 13:28:18 +08:00
end
2014-03-18 10:12:58 +08:00
2013-02-06 03:16:51 +08:00
end