2019-05-03 06:17:27 +08:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
2013-05-31 02:34:44 +08:00
|
|
|
class PostAnalyzer
|
|
|
|
|
|
|
|
def initialize(raw, topic_id)
|
|
|
|
@raw = raw
|
|
|
|
@topic_id = topic_id
|
2018-06-14 02:57:32 +08:00
|
|
|
@onebox_urls = []
|
2019-09-10 18:59:48 +08:00
|
|
|
@found_oneboxes = false
|
2016-04-13 02:09:59 +08:00
|
|
|
end
|
|
|
|
|
|
|
|
def found_oneboxes?
|
2019-09-10 18:59:48 +08:00
|
|
|
@found_oneboxes
|
2013-05-31 02:34:44 +08:00
|
|
|
end
|
|
|
|
|
2018-02-09 07:26:56 +08:00
|
|
|
def has_oneboxes?
|
|
|
|
return false unless @raw.present?
|
|
|
|
|
|
|
|
cooked_stripped
|
|
|
|
found_oneboxes?
|
|
|
|
end
|
|
|
|
|
2013-05-31 02:34:44 +08:00
|
|
|
# What we use to cook posts
|
2017-10-18 02:37:51 +08:00
|
|
|
def cook(raw, opts = {})
|
|
|
|
cook_method = opts[:cook_method]
|
|
|
|
return raw if cook_method == Post.cook_methods[:raw_html]
|
|
|
|
|
|
|
|
if cook_method == Post.cook_methods[:email]
|
2017-11-15 23:39:29 +08:00
|
|
|
cooked = EmailCook.new(raw).cook(opts)
|
2017-10-18 02:37:51 +08:00
|
|
|
else
|
|
|
|
cooked = PrettyText.cook(raw, opts)
|
|
|
|
end
|
2013-05-31 02:34:44 +08:00
|
|
|
|
2018-02-14 07:39:44 +08:00
|
|
|
result = Oneboxer.apply(cooked) do |url|
|
2018-06-14 02:57:32 +08:00
|
|
|
@onebox_urls << url
|
2020-06-24 17:54:54 +08:00
|
|
|
if opts[:invalidate_oneboxes]
|
|
|
|
Oneboxer.invalidate(url)
|
|
|
|
InlineOneboxer.invalidate(url)
|
|
|
|
end
|
2019-09-10 18:59:48 +08:00
|
|
|
onebox = Oneboxer.cached_onebox(url)
|
|
|
|
@found_oneboxes = true if onebox.present?
|
|
|
|
onebox
|
2013-05-31 02:34:44 +08:00
|
|
|
end
|
|
|
|
|
|
|
|
cooked = result.to_html if result.changed?
|
|
|
|
cooked
|
|
|
|
end
|
|
|
|
|
|
|
|
# How many images are present in the post
|
2020-08-08 00:08:59 +08:00
|
|
|
def embedded_media_count
|
2013-05-31 02:34:44 +08:00
|
|
|
return 0 unless @raw.present?
|
|
|
|
|
2020-08-08 00:08:59 +08:00
|
|
|
# TODO - do we need to look for tags other than img, video and audio?
|
|
|
|
cooked_stripped.css("img", "video", "audio").reject do |t|
|
2016-12-05 22:19:15 +08:00
|
|
|
if dom_class = t["class"]
|
2020-07-27 08:23:54 +08:00
|
|
|
(Post.allowed_image_classes & dom_class.split).count > 0
|
2013-05-31 02:34:44 +08:00
|
|
|
end
|
|
|
|
end.count
|
|
|
|
end
|
|
|
|
|
2013-07-22 08:39:17 +08:00
|
|
|
# How many attachments are present in the post
|
|
|
|
def attachment_count
|
|
|
|
return 0 unless @raw.present?
|
2013-10-18 00:44:09 +08:00
|
|
|
|
2016-12-05 22:19:15 +08:00
|
|
|
attachments = cooked_stripped.css("a.attachment[href^=\"#{Discourse.store.absolute_base_url}\"]")
|
|
|
|
attachments += cooked_stripped.css("a.attachment[href^=\"#{Discourse.store.relative_base_url}\"]") if Discourse.store.internal?
|
2013-08-01 05:26:34 +08:00
|
|
|
attachments.count
|
2013-07-22 08:39:17 +08:00
|
|
|
end
|
|
|
|
|
2013-05-31 02:34:44 +08:00
|
|
|
def raw_mentions
|
|
|
|
return [] if @raw.blank?
|
|
|
|
return @raw_mentions if @raw_mentions.present?
|
2013-07-31 06:01:42 +08:00
|
|
|
|
2016-02-24 16:09:18 +08:00
|
|
|
raw_mentions = cooked_stripped.css('.mention, .mention-group').map do |e|
|
2016-02-24 14:47:55 +08:00
|
|
|
if name = e.inner_text
|
|
|
|
name = name[1..-1]
|
2019-04-23 18:22:47 +08:00
|
|
|
name = User.normalize_username(name)
|
2016-02-24 14:47:55 +08:00
|
|
|
name
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
raw_mentions.compact!
|
|
|
|
raw_mentions.uniq!
|
2017-09-12 02:01:24 +08:00
|
|
|
@raw_mentions = raw_mentions
|
2013-05-31 02:34:44 +08:00
|
|
|
end
|
|
|
|
|
2015-03-18 13:25:24 +08:00
|
|
|
# from rack ... compat with ruby 2.2
|
|
|
|
def self.parse_uri_rfc2396(uri)
|
|
|
|
@parser ||= defined?(URI::RFC2396_Parser) ? URI::RFC2396_Parser.new : URI
|
|
|
|
@parser.parse(uri)
|
|
|
|
end
|
|
|
|
|
2013-05-31 02:34:44 +08:00
|
|
|
# Count how many hosts are linked in the post
|
|
|
|
def linked_hosts
|
2018-06-14 02:57:32 +08:00
|
|
|
all_links = raw_links + @onebox_urls
|
|
|
|
|
|
|
|
return {} if all_links.blank?
|
2013-05-31 02:34:44 +08:00
|
|
|
return @linked_hosts if @linked_hosts.present?
|
|
|
|
|
|
|
|
@linked_hosts = {}
|
2013-10-18 00:44:09 +08:00
|
|
|
|
2018-06-14 02:57:32 +08:00
|
|
|
all_links.each do |u|
|
2013-07-15 18:11:23 +08:00
|
|
|
begin
|
2015-03-18 13:25:24 +08:00
|
|
|
uri = self.class.parse_uri_rfc2396(u)
|
2013-07-15 18:11:23 +08:00
|
|
|
host = uri.host
|
2014-03-07 17:44:04 +08:00
|
|
|
@linked_hosts[host] ||= 1 unless host.nil?
|
2018-08-14 18:23:32 +08:00
|
|
|
rescue URI::Error
|
2018-06-14 02:57:32 +08:00
|
|
|
# An invalid URI does not count as a host
|
2013-07-15 18:11:23 +08:00
|
|
|
next
|
|
|
|
end
|
2013-05-31 02:34:44 +08:00
|
|
|
end
|
2013-10-18 00:44:09 +08:00
|
|
|
|
2013-05-31 02:34:44 +08:00
|
|
|
@linked_hosts
|
|
|
|
end
|
|
|
|
|
|
|
|
# Returns an array of all links in a post excluding mentions
|
|
|
|
def raw_links
|
|
|
|
return [] unless @raw.present?
|
|
|
|
return @raw_links if @raw_links.present?
|
|
|
|
|
|
|
|
@raw_links = []
|
2018-03-29 00:32:16 +08:00
|
|
|
cooked_stripped.css("a").each do |l|
|
2014-03-07 17:44:04 +08:00
|
|
|
# Don't include @mentions in the link count
|
2018-03-29 00:32:16 +08:00
|
|
|
next if link_is_a_mention?(l)
|
2016-12-05 22:19:15 +08:00
|
|
|
@raw_links << l['href'].to_s
|
2013-05-31 02:34:44 +08:00
|
|
|
end
|
2013-10-18 00:44:09 +08:00
|
|
|
|
2013-05-31 02:34:44 +08:00
|
|
|
@raw_links
|
|
|
|
end
|
|
|
|
|
|
|
|
# How many links are present in the post
|
|
|
|
def link_count
|
2019-09-10 18:59:48 +08:00
|
|
|
raw_links.size + @onebox_urls.size
|
2013-05-31 02:34:44 +08:00
|
|
|
end
|
|
|
|
|
2016-12-05 22:19:15 +08:00
|
|
|
def cooked_stripped
|
|
|
|
@cooked_stripped ||= begin
|
2020-05-05 11:46:57 +08:00
|
|
|
doc = Nokogiri::HTML5.fragment(cook(@raw, topic_id: @topic_id))
|
2020-02-11 01:31:42 +08:00
|
|
|
doc.css("pre .mention, aside.quote > .title, aside.quote .mention, aside.quote .mention-group, .onebox, .elided").remove
|
2016-12-05 22:19:15 +08:00
|
|
|
doc
|
|
|
|
end
|
2018-06-07 13:28:18 +08:00
|
|
|
end
|
2016-12-05 22:19:15 +08:00
|
|
|
|
2018-09-13 16:34:32 +08:00
|
|
|
private
|
|
|
|
|
2016-12-05 22:19:15 +08:00
|
|
|
def link_is_a_mention?(l)
|
|
|
|
html_class = l['class']
|
|
|
|
return false if html_class.blank?
|
2017-03-29 00:16:58 +08:00
|
|
|
href = l['href'].to_s
|
|
|
|
html_class.to_s['mention'] && href[/^\/u\//] || href[/^\/users\//]
|
2016-12-05 22:19:15 +08:00
|
|
|
end
|
2013-07-23 04:24:47 +08:00
|
|
|
|
2013-05-31 02:34:44 +08:00
|
|
|
end
|