2019-05-03 06:17:27 +08:00
# frozen_string_literal: true
2018-03-15 22:27:55 +08:00
require 'uri'
2017-05-23 04:42:19 +08:00
2016-10-25 07:25:44 +08:00
Dir [ " #{ Rails . root } /lib/onebox/engine/*_onebox.rb " ] . sort . each { | f | require f }
2014-02-26 02:35:08 +08:00
2013-02-06 03:16:51 +08:00
module Oneboxer
2018-11-27 16:00:31 +08:00
ONEBOX_CSS_CLASS = " onebox "
2019-10-31 21:13:24 +08:00
AUDIO_REGEX = / ^ \ .(mp3|og[ga]|opus|wav|m4[abpr]|aac|flac)$ /i
2020-01-23 07:41:39 +08:00
VIDEO_REGEX = / ^ \ .(mov|mp4|webm|m4v|3gp|ogv|avi|mpeg|ogv)$ /i
2018-11-27 16:00:31 +08:00
2013-04-30 10:43:21 +08:00
# keep reloaders happy
unless defined? Oneboxer :: Result
Result = Struct . new ( :doc , :changed ) do
def to_html
doc . to_html
end
2013-04-10 15:52:38 +08:00
2013-04-30 10:43:21 +08:00
def changed?
changed
end
2013-04-10 15:52:38 +08:00
end
end
2017-06-27 03:38:23 +08:00
def self . ignore_redirects
2021-04-08 01:32:27 +08:00
@ignore_redirects || = [ 'http://www.dropbox.com' , 'http://store.steampowered.com' , 'http://vimeo.com' , 'https://www.youtube.com' , Discourse . base_url ]
2017-06-27 03:38:23 +08:00
end
2021-04-01 01:19:34 +08:00
def self . amazon_domains
amazon_suffixes = %w( com com.br ca cn fr de in it co.jp com.mx nl pl sa sg es se com.tr ae co.uk )
amazon_suffixes . collect { | suffix | " https://www.amazon. #{ suffix } " }
end
2017-08-08 17:44:27 +08:00
def self . force_get_hosts
2021-05-14 03:48:35 +08:00
hosts = [ ]
hosts += SiteSetting . force_get_hosts . split ( '|' ) . collect { | domain | " https:// #{ domain } " }
2021-04-01 01:19:34 +08:00
hosts += SiteSetting . cache_onebox_response_body_domains . split ( '|' ) . collect { | domain | " https://www. #{ domain } " }
hosts += amazon_domains
2021-03-11 03:42:17 +08:00
2021-04-01 01:19:34 +08:00
hosts . uniq
2017-08-08 17:44:27 +08:00
end
2019-11-07 17:14:43 +08:00
def self . force_custom_user_agent_hosts
2020-02-07 00:56:54 +08:00
SiteSetting . force_custom_user_agent_hosts . split ( '|' )
2019-11-07 17:14:43 +08:00
end
2018-10-10 18:39:03 +08:00
def self . allowed_post_types
@allowed_post_types || = [ Post . types [ :regular ] , Post . types [ :moderator_action ] ]
end
2022-05-24 01:02:02 +08:00
def self . local_handlers
@local_handlers || = { }
end
def self . register_local_handler ( controller , & handler )
local_handlers [ controller ] = handler
end
2014-01-28 04:09:09 +08:00
def self . preview ( url , options = nil )
options || = { }
2016-12-20 07:31:10 +08:00
invalidate ( url ) if options [ :invalidate_oneboxes ]
2018-02-14 07:39:44 +08:00
onebox_raw ( url , options ) [ :preview ]
2013-02-06 03:16:51 +08:00
end
2014-01-28 04:09:09 +08:00
def self . onebox ( url , options = nil )
options || = { }
2016-12-20 07:31:10 +08:00
invalidate ( url ) if options [ :invalidate_oneboxes ]
2018-02-14 07:39:44 +08:00
onebox_raw ( url , options ) [ :onebox ]
2014-03-18 10:12:58 +08:00
end
def self . cached_onebox ( url )
2019-11-27 09:35:14 +08:00
if c = Discourse . cache . read ( onebox_cache_key ( url ) )
2014-04-01 12:29:14 +08:00
c [ :onebox ]
end
2014-05-28 15:15:10 +08:00
rescue = > e
invalidate ( url )
Rails . logger . warn ( " invalid cached onebox for #{ url } #{ e } " )
" "
2014-03-18 10:12:58 +08:00
end
def self . cached_preview ( url )
2019-11-27 09:35:14 +08:00
if c = Discourse . cache . read ( onebox_cache_key ( url ) )
2014-04-01 12:29:14 +08:00
c [ :preview ]
end
2014-05-28 15:15:10 +08:00
rescue = > e
invalidate ( url )
Rails . logger . warn ( " invalid cached preview for #{ url } #{ e } " )
" "
2013-08-14 23:05:53 +08:00
end
2014-01-29 02:18:19 +08:00
def self . invalidate ( url )
2019-11-27 09:35:14 +08:00
Discourse . cache . delete ( onebox_cache_key ( url ) )
2019-11-28 05:48:29 +08:00
Discourse . cache . delete ( onebox_failed_cache_key ( url ) )
2013-02-06 03:16:51 +08:00
end
2014-01-29 02:18:19 +08:00
2021-04-01 01:19:34 +08:00
def self . cache_response_body? ( uri )
uri = URI . parse ( uri ) if uri . is_a? ( String )
if SiteSetting . cache_onebox_response_body?
SiteSetting . cache_onebox_response_body_domains . split ( " | " ) . any? { | domain | uri . hostname . ends_with? ( domain ) }
end
end
def self . cache_response_body ( uri , response )
key = redis_cached_response_body_key ( uri )
Discourse . redis . without_namespace . setex ( key , 1 . minutes . to_i , response )
end
def self . cached_response_body_exists? ( uri )
key = redis_cached_response_body_key ( uri )
Discourse . redis . without_namespace . exists ( key ) . to_i > 0
end
def self . fetch_cached_response_body ( uri )
key = redis_cached_response_body_key ( uri )
Discourse . redis . without_namespace . get ( key )
end
def self . redis_cached_response_body_key ( uri )
" CACHED_RESPONSE_ #{ uri } "
end
2013-02-06 03:16:51 +08:00
# Parse URLs out of HTML, returning the document when finished.
2021-04-14 23:09:55 +08:00
def self . each_onebox_link ( doc , extra_paths : [ ] )
2018-11-27 16:00:31 +08:00
onebox_links = doc . css ( " a. #{ ONEBOX_CSS_CLASS } " , * extra_paths )
2013-02-06 03:16:51 +08:00
if onebox_links . present?
onebox_links . each do | link |
2016-11-04 05:48:32 +08:00
yield ( link [ 'href' ] , link ) if link [ 'href' ] . present?
2013-02-06 03:16:51 +08:00
end
end
doc
end
2018-04-12 03:33:45 +08:00
HTML5_BLOCK_ELEMENTS || = %w{ address article aside blockquote canvas center dd div dl dt fieldset figcaption figure footer form h1 h2 h3 h4 h5 h6 header hgroup hr li main nav noscript ol output p pre section table tfoot ul video }
2018-11-27 16:00:31 +08:00
def self . apply ( string_or_doc , extra_paths : nil )
2013-04-10 15:52:38 +08:00
doc = string_or_doc
2021-04-14 23:09:55 +08:00
doc = Loofah . fragment ( doc ) if doc . is_a? ( String )
2013-04-10 15:52:38 +08:00
changed = false
2018-11-27 16:00:31 +08:00
each_onebox_link ( doc , extra_paths : extra_paths ) do | url , element |
2018-02-14 07:39:44 +08:00
onebox , _ = yield ( url , element )
2020-12-14 23:49:37 +08:00
next if onebox . blank?
2021-04-14 23:09:55 +08:00
parsed_onebox = Loofah . fragment ( onebox )
2020-12-14 23:49:37 +08:00
next if parsed_onebox . children . blank?
changed = true
parent = element . parent
if parent & . node_name & . downcase == " p " &&
parsed_onebox . children . any? { | child | HTML5_BLOCK_ELEMENTS . include? ( child . node_name . downcase ) }
siblings = parent . children
element_idx = siblings . find_index ( element )
before_idx = first_significant_element_index ( siblings , element_idx - 1 , - 1 )
after_idx = first_significant_element_index ( siblings , element_idx + 1 , + 1 )
if before_idx < 0 && after_idx > = siblings . size
parent . replace parsed_onebox
elsif before_idx < 0
parent . children = siblings [ after_idx .. siblings . size ]
parent . add_previous_sibling ( parsed_onebox )
elsif after_idx > = siblings . size
parent . children = siblings [ 0 .. before_idx ]
parent . add_next_sibling ( parsed_onebox )
else
parent_rest = parent . dup
parent . children = siblings [ 0 .. before_idx ]
parent_rest . children = siblings [ after_idx .. siblings . size ]
parent . add_next_sibling ( parent_rest )
parent . add_next_sibling ( parsed_onebox )
2013-04-10 15:52:38 +08:00
end
2020-12-14 23:49:37 +08:00
else
element . replace parsed_onebox
2013-04-10 15:52:38 +08:00
end
end
2020-12-14 23:49:37 +08:00
Result . new ( doc , changed )
end
def self . first_significant_element_index ( elements , index , step )
while index > = 0 && index < elements . size &&
( elements [ index ] . node_name . downcase == " br " ||
( elements [ index ] . node_name . downcase == " text " && elements [ index ] . to_html . strip . blank? ) )
index = index + step
2020-06-29 17:51:16 +08:00
end
2020-12-14 23:49:37 +08:00
index
2013-04-10 15:52:38 +08:00
end
2016-12-20 07:31:10 +08:00
def self . is_previewing? ( user_id )
2019-12-03 17:05:53 +08:00
Discourse . redis . get ( preview_key ( user_id ) ) == " 1 "
2016-12-20 07:31:10 +08:00
end
def self . preview_onebox! ( user_id )
2019-12-03 17:05:53 +08:00
Discourse . redis . setex ( preview_key ( user_id ) , 1 . minute , " 1 " )
2016-12-20 07:31:10 +08:00
end
def self . onebox_previewed! ( user_id )
2019-12-03 17:05:53 +08:00
Discourse . redis . del ( preview_key ( user_id ) )
2016-12-20 07:31:10 +08:00
end
2017-01-06 10:01:14 +08:00
def self . engine ( url )
2020-08-28 03:12:13 +08:00
Onebox :: Matcher . new ( url , {
allowed_iframe_regexes : Onebox :: Engine . origins_to_regexes ( allowed_iframe_origins )
} ) . oneboxed
2017-01-06 10:01:14 +08:00
end
2019-11-28 05:48:29 +08:00
def self . recently_failed? ( url )
Discourse . cache . read ( onebox_failed_cache_key ( url ) ) . present?
end
def self . cache_failed! ( url )
Discourse . cache . write ( onebox_failed_cache_key ( url ) , true , expires_in : 1 . hour )
end
2014-03-18 10:12:58 +08:00
private
2016-12-20 07:31:10 +08:00
def self . preview_key ( user_id )
2016-12-20 18:18:47 +08:00
" onebox:preview: #{ user_id } "
2016-12-20 07:31:10 +08:00
end
2016-10-24 18:46:22 +08:00
def self . blank_onebox
{ preview : " " , onebox : " " }
2014-04-10 04:57:45 +08:00
end
2016-10-24 18:46:22 +08:00
def self . onebox_cache_key ( url )
" onebox__ #{ url } "
end
2015-08-24 08:43:07 +08:00
2019-11-28 05:48:29 +08:00
def self . onebox_failed_cache_key ( url )
" onebox_failed__ #{ url } "
end
2018-02-14 07:39:44 +08:00
def self . onebox_raw ( url , opts = { } )
2020-12-04 06:16:01 +08:00
url = UrlHelper . escape_uri ( url ) . to_s
2018-02-14 07:39:44 +08:00
local_onebox ( url , opts ) || external_onebox ( url )
rescue = > e
# no point warning here, just cause we have an issue oneboxing a url
# we can later hunt for failed oneboxes by searching logs if needed
Rails . logger . info ( " Failed to onebox #{ url } #{ e } #{ e . backtrace } " )
# return a blank hash, so rest of the code works
blank_onebox
end
def self . local_onebox ( url , opts = { } )
return unless route = Discourse . route_for ( url )
html =
case route [ :controller ]
when " uploads " then local_upload_html ( url )
when " topics " then local_topic_html ( url , route , opts )
when " users " then local_user_html ( url , route )
2020-11-25 07:53:05 +08:00
when " list " then local_category_html ( url , route )
2022-05-24 01:02:02 +08:00
else
if handler = local_handlers [ route [ :controller ] ]
handler . call ( url , route )
end
2018-02-14 07:39:44 +08:00
end
2020-12-04 06:16:01 +08:00
html = html . presence || " <a href=' #{ URI ( url ) . to_s } '> #{ URI ( url ) . to_s } </a> "
2018-02-14 07:39:44 +08:00
{ onebox : html , preview : html }
end
def self . local_upload_html ( url )
2021-02-25 10:39:15 +08:00
additional_controls = \
if SiteSetting . disable_onebox_media_download_controls
" controlslist='nodownload' "
else
" "
end
2018-02-14 07:39:44 +08:00
case File . extname ( URI ( url ) . path || " " )
2019-10-31 21:13:24 +08:00
when VIDEO_REGEX
2019-11-18 09:25:42 +08:00
<< ~ HTML
< div class = " onebox video-onebox " >
2021-02-25 10:39:15 +08:00
< video #{additional_controls} width="100%" height="100%" controls="">
2019-11-18 09:25:42 +08:00
< source src = '#{url}' >
2020-01-23 07:41:39 +08:00
< a href = '#{url}' > #{url}</a>
2019-11-18 09:25:42 +08:00
< / video>
< / div>
HTML
2019-10-31 21:13:24 +08:00
when AUDIO_REGEX
2021-02-25 10:39:15 +08:00
" <audio #{ additional_controls } controls><source src=' #{ url } '><a href=' #{ url } '> #{ url } </a></audio> "
2018-02-14 07:39:44 +08:00
end
2018-06-07 13:28:18 +08:00
end
2018-02-14 07:39:44 +08:00
2020-02-12 18:11:28 +08:00
def self . local_topic ( url , route , opts )
if current_user = User . find_by ( id : opts [ :user_id ] )
if current_category = Category . find_by ( id : opts [ :category_id ] )
return unless Guardian . new ( current_user ) . can_see_category? ( current_category )
end
2018-02-14 07:39:44 +08:00
2020-02-12 18:11:28 +08:00
if current_topic = Topic . find_by ( id : opts [ :topic_id ] )
return unless Guardian . new ( current_user ) . can_see_topic? ( current_topic )
end
2018-02-20 05:40:14 +08:00
end
2020-06-23 23:18:38 +08:00
return unless topic = Topic . find_by ( id : route [ :id ] || route [ :topic_id ] )
2018-02-16 05:56:13 +08:00
return if topic . private_message?
2018-02-16 05:00:06 +08:00
2020-02-12 18:11:28 +08:00
if current_category . blank? || current_category . id != topic . category_id
2018-02-16 05:56:13 +08:00
return unless Guardian . new . can_see_topic? ( topic )
end
2020-02-12 18:11:28 +08:00
topic
end
def self . local_topic_html ( url , route , opts )
return unless topic = local_topic ( url , route , opts )
2018-02-16 05:56:13 +08:00
post_number = route [ :post_number ] . to_i
2018-02-16 18:21:11 +08:00
post = post_number > 1 ?
topic . posts . where ( post_number : post_number ) . first :
topic . ordered_posts . first
2018-02-16 05:56:13 +08:00
2018-10-10 18:39:03 +08:00
return if ! post || post . hidden || ! allowed_post_types . include? ( post . post_type )
2018-02-14 07:39:44 +08:00
2020-02-12 18:11:28 +08:00
if post_number > 1 && opts [ :topic_id ] == topic . id
2018-02-14 07:39:44 +08:00
excerpt = post . excerpt ( SiteSetting . post_onebox_maxlength )
excerpt . gsub! ( / [ \ r \ n]+ / , " " )
excerpt . gsub! ( " [/quote] " , " [quote] " ) # don't break my quote
quote = " [quote= \" #{ post . user . username } , topic: #{ topic . id } , post: #{ post . post_number } \" ] \n #{ excerpt } \n [/quote] "
PrettyText . cook ( quote )
else
args = {
topic_id : topic . id ,
2018-02-26 23:05:35 +08:00
post_number : post . post_number ,
2021-11-25 20:07:34 +08:00
avatar : PrettyText . avatar_img ( post . user . avatar_template_url , " tiny " ) ,
2018-02-14 07:39:44 +08:00
original_url : url ,
title : PrettyText . unescape_emoji ( CGI :: escapeHTML ( topic . title ) ) ,
category_html : CategoryBadge . html_for ( topic . category ) ,
2018-02-26 18:16:53 +08:00
quote : PrettyText . unescape_emoji ( post . excerpt ( SiteSetting . post_onebox_maxlength ) ) ,
2018-02-14 07:39:44 +08:00
}
2020-11-19 01:55:16 +08:00
template = template ( " discourse_topic_onebox " )
2018-02-14 07:39:44 +08:00
Mustache . render ( template , args )
end
2018-06-07 13:28:18 +08:00
end
2018-02-14 07:39:44 +08:00
def self . local_user_html ( url , route )
username = route [ :username ] || " "
2018-06-07 13:28:18 +08:00
2018-02-14 07:39:44 +08:00
if user = User . find_by ( username_lower : username . downcase )
2019-03-25 15:20:14 +08:00
name = user . name if SiteSetting . enable_names
2018-02-14 07:39:44 +08:00
args = {
user_id : user . id ,
username : user . username ,
avatar : PrettyText . avatar_img ( user . avatar_template , " extra_large " ) ,
2019-03-25 15:20:14 +08:00
name : name ,
2018-02-14 07:39:44 +08:00
bio : user . user_profile . bio_excerpt ( 230 ) ,
2019-09-18 04:12:50 +08:00
location : Onebox :: Helpers . sanitize ( user . user_profile . location ) ,
2018-02-14 07:39:44 +08:00
joined : I18n . t ( 'joined' ) ,
created_at : user . created_at . strftime ( I18n . t ( 'datetime_formats.formats.date_only' ) ) ,
website : user . user_profile . website ,
website_name : UserSerializer . new ( user ) . website_name ,
original_url : url
}
2018-06-07 13:28:18 +08:00
2020-11-19 01:55:16 +08:00
Mustache . render ( template ( " discourse_user_onebox " ) , args )
2018-02-14 07:39:44 +08:00
else
nil
end
2018-06-07 13:28:18 +08:00
end
2018-02-14 07:39:44 +08:00
2020-11-25 07:53:05 +08:00
def self . local_category_html ( url , route )
return unless route [ :category_slug_path_with_id ]
category = Category . find_by_slug_path_with_id ( route [ :category_slug_path_with_id ] )
if Guardian . new . can_see_category? ( category )
args = {
url : category . url ,
name : category . name ,
color : category . color ,
logo_url : category . uploaded_logo & . url ,
description : category . description ,
has_subcategories : category . subcategories . present? ,
subcategories : category . subcategories . collect { | sc | { name : sc . name , color : sc . color , url : sc . url } }
}
Mustache . render ( template ( " discourse_category_onebox " ) , args )
end
end
2018-12-19 14:27:07 +08:00
def self . preserve_fragment_url_hosts
2018-12-19 20:07:39 +08:00
@preserve_fragment_url_hosts || = [ 'http://github.com' ]
2018-12-19 14:27:07 +08:00
end
2020-08-28 03:12:13 +08:00
def self . allowed_iframe_origins
allowed = SiteSetting . allowed_onebox_iframes . split ( " | " )
if allowed . include? ( " * " )
allowed = Onebox :: Engine . all_iframe_origins
end
allowed += SiteSetting . allowed_iframes . split ( " | " )
end
2021-05-14 03:48:35 +08:00
def self . external_onebox ( url , available_strategies = nil )
2019-11-27 09:35:14 +08:00
Discourse . cache . fetch ( onebox_cache_key ( url ) , expires_in : 1 . day ) do
2021-05-14 03:48:35 +08:00
uri = URI ( url )
available_strategies || = Oneboxer . ordered_strategies ( uri . hostname )
strategy = available_strategies . shift
2022-05-23 18:52:06 +08:00
if SiteSetting . block_onebox_on_redirect
max_redirects = 0
end
2022-03-11 14:18:12 +08:00
fd = FinalDestination . new (
url ,
2022-05-23 18:52:06 +08:00
get_final_destination_options ( url , strategy ) . merge (
stop_at_blocked_pages : true ,
max_redirects : max_redirects ,
initial_https_redirect_ignore_limit : SiteSetting . block_onebox_on_redirect
)
2022-03-11 14:18:12 +08:00
)
2017-06-07 03:02:11 +08:00
uri = fd . resolve
2020-11-19 01:55:16 +08:00
2022-03-11 14:18:12 +08:00
return blank_onebox if fd . status == :blocked_page
2020-11-19 01:55:16 +08:00
if fd . status != :resolved
args = { link : url }
if fd . status == :invalid_address
args [ :error_message ] = I18n . t ( " errors.onebox.invalid_address " , hostname : fd . hostname )
2021-05-15 03:23:20 +08:00
elsif ( fd . status_code || uri . nil? ) && available_strategies . present?
2021-05-14 03:48:35 +08:00
# Try a different oneboxing strategy, if we have any options left:
2021-05-15 03:23:20 +08:00
return external_onebox ( url , available_strategies )
elsif fd . status_code
2020-11-19 01:55:16 +08:00
args [ :error_message ] = I18n . t ( " errors.onebox.error_response " , status_code : fd . status_code )
end
error_box = blank_onebox
error_box [ :preview ] = preview_error_onebox ( args )
return error_box
end
2022-03-11 14:18:12 +08:00
return blank_onebox if uri . blank?
2017-12-19 01:31:41 +08:00
2021-04-01 01:19:34 +08:00
onebox_options = {
2017-06-07 03:02:11 +08:00
max_width : 695 ,
2022-04-06 21:19:41 +08:00
sanitize_config : Onebox :: SanitizeConfig :: DISCOURSE_ONEBOX ,
2020-08-28 03:12:13 +08:00
allowed_iframe_origins : allowed_iframe_origins ,
2020-06-24 11:00:00 +08:00
hostname : GlobalSetting . hostname ,
2020-11-19 01:55:16 +08:00
facebook_app_access_token : SiteSetting . facebook_app_access_token ,
2021-04-01 01:19:34 +08:00
disable_media_download_controls : SiteSetting . disable_onebox_media_download_controls ,
2021-07-31 01:36:30 +08:00
body_cacher : self ,
content_type : fd . content_type
2017-06-07 03:02:11 +08:00
}
2021-04-01 01:19:34 +08:00
onebox_options [ :cookie ] = fd . cookie if fd . cookie
2021-07-01 22:39:29 +08:00
user_agent_override = SiteSetting . cache_onebox_user_agent if Oneboxer . cache_response_body? ( url ) && SiteSetting . cache_onebox_user_agent . present?
2021-04-01 01:19:34 +08:00
onebox_options [ :user_agent ] = user_agent_override if user_agent_override
2017-06-07 03:02:11 +08:00
2021-04-01 01:19:34 +08:00
r = Onebox . preview ( uri . to_s , onebox_options )
2021-06-03 09:39:12 +08:00
result = {
onebox : WordWatcher . censor ( r . to_s ) ,
preview : WordWatcher . censor ( r & . placeholder_html . to_s )
}
2020-11-19 01:55:16 +08:00
# NOTE: Call r.errors after calling placeholder_html
if r . errors . any?
FIX: Don’t display error if only error is a missing image (#12216)
`Onebox.preview` can return 0-to-n errors, where the errors are missing OpenGraph attributes (e.g. title, description, image, etc.). If any of these attributes are missing, we construct an error message and attach it to the Oneboxer preview HTML. The error message is something like:
“Sorry, we were unable to generate a preview for this web page, because the following oEmbed / OpenGraph tags could not be found: description, image”
However, if the only missing tag is `image` we don’t need to display the error, as we have enough other data (title, description, etc.) to construct a useful/complete Onebox.
2021-02-26 03:30:40 +08:00
error_keys = r . errors . keys
skip_if_only_error = [ :image ]
unless error_keys . length == 1 && skip_if_only_error . include? ( error_keys . first )
missing_attributes = error_keys . map ( & :to_s ) . sort . join ( I18n . t ( " word_connector.comma " ) )
error_message = I18n . t ( " errors.onebox.missing_data " , missing_attributes : missing_attributes , count : error_keys . size )
2021-06-02 04:23:18 +08:00
args = r . verified_data . merge ( error_message : error_message )
FIX: Don’t display error if only error is a missing image (#12216)
`Onebox.preview` can return 0-to-n errors, where the errors are missing OpenGraph attributes (e.g. title, description, image, etc.). If any of these attributes are missing, we construct an error message and attach it to the Oneboxer preview HTML. The error message is something like:
“Sorry, we were unable to generate a preview for this web page, because the following oEmbed / OpenGraph tags could not be found: description, image”
However, if the only missing tag is `image` we don’t need to display the error, as we have enough other data (title, description, etc.) to construct a useful/complete Onebox.
2021-02-26 03:30:40 +08:00
if result [ :preview ] . blank?
result [ :preview ] = preview_error_onebox ( args )
else
doc = Nokogiri :: HTML5 :: fragment ( result [ :preview ] )
aside = doc . at ( 'aside' )
if aside
# Add an error message to the preview that was returned
error_fragment = preview_error_onebox_fragment ( args )
aside . add_child ( error_fragment )
result [ :preview ] = doc . to_html
end
2020-11-19 01:55:16 +08:00
end
end
end
2021-05-14 03:48:35 +08:00
Oneboxer . cache_preferred_strategy ( uri . hostname , strategy )
2020-11-19 01:55:16 +08:00
result
end
end
def self . preview_error_onebox ( args , is_fragment = false )
args [ :title ] || = args [ :link ] if args [ :link ]
args [ :error_message ] = PrettyText . unescape_emoji ( args [ :error_message ] ) if args [ :error_message ]
template_name = is_fragment ? " preview_error_fragment_onebox " : " preview_error_onebox "
Mustache . render ( template ( template_name ) , args )
end
def self . preview_error_onebox_fragment ( args )
preview_error_onebox ( args , true )
end
2017-12-19 01:31:41 +08:00
2020-11-19 01:55:16 +08:00
def self . template ( template_name )
@template_cache || = { }
@template_cache [ template_name ] || = begin
full_path = " #{ Rails . root } /lib/onebox/templates/ #{ template_name } .mustache "
File . read ( full_path )
2016-10-24 18:46:22 +08:00
end
2018-06-07 13:28:18 +08:00
end
2014-03-18 10:12:58 +08:00
2021-05-14 03:48:35 +08:00
def self . ordered_strategies ( hostname )
all = strategies . keys
preferred = Oneboxer . preferred_strategy ( hostname )
all . insert ( 0 , all . delete ( preferred ) ) if all . include? ( preferred )
all
end
def self . strategies
{
default : { } , # don't override anything by default
force_get_and_ua : {
force_get_host : true ,
force_custom_user_agent_host : true ,
} ,
}
end
def self . cache_preferred_strategy ( hostname , strategy )
return if strategy == :default
key = redis_oneboxer_strategy_key ( hostname )
Discourse . redis . without_namespace . setex ( key , 2 . weeks . to_i , strategy . to_s )
end
def self . clear_preferred_strategy! ( hostname )
key = redis_oneboxer_strategy_key ( hostname )
Discourse . redis . without_namespace . del ( key )
end
def self . preferred_strategy ( hostname )
key = redis_oneboxer_strategy_key ( hostname )
Discourse . redis . without_namespace . get ( key ) & . to_sym
end
def self . redis_oneboxer_strategy_key ( hostname )
" ONEBOXER_STRATEGY_ #{ hostname } "
end
2021-07-01 22:39:29 +08:00
def self . get_final_destination_options ( url , strategy = nil )
fd_options = {
ignore_redirects : ignore_redirects ,
force_get_hosts : force_get_hosts ,
force_custom_user_agent_hosts : force_custom_user_agent_hosts ,
preserve_fragment_url_hosts : preserve_fragment_url_hosts ,
timeout : 5
}
uri = URI ( url )
if strategy . blank?
strategy = Oneboxer . ordered_strategies ( uri . hostname ) . shift
end
if strategy && Oneboxer . strategies [ strategy ] [ :force_get_host ]
fd_options [ :force_get_hosts ] = [ " https:// #{ uri . hostname } " ]
end
if strategy && Oneboxer . strategies [ strategy ] [ :force_custom_user_agent_host ]
fd_options [ :force_custom_user_agent_hosts ] = [ " https:// #{ uri . hostname } " ]
end
user_agent_override = SiteSetting . cache_onebox_user_agent if Oneboxer . cache_response_body? ( url ) && SiteSetting . cache_onebox_user_agent . present?
fd_options [ :default_user_agent ] = user_agent_override if user_agent_override
fd_options
end
2013-02-06 03:16:51 +08:00
end