discourse/app/models/post_analyzer.rb

148 lines
3.5 KiB
Ruby
Raw Normal View History

require_dependency 'oneboxer'
require_dependency 'email_cook'
class PostAnalyzer
def initialize(raw, topic_id)
2017-07-28 09:20:09 +08:00
@raw = raw
@topic_id = topic_id
@onebox_urls = []
2016-04-13 02:09:59 +08:00
end
def found_oneboxes?
@onebox_urls.present?
end
def has_oneboxes?
return false unless @raw.present?
cooked_stripped
found_oneboxes?
end
# What we use to cook posts
def cook(raw, opts = {})
cook_method = opts[:cook_method]
return raw if cook_method == Post.cook_methods[:raw_html]
if cook_method == Post.cook_methods[:email]
cooked = EmailCook.new(raw).cook(opts)
else
cooked = PrettyText.cook(raw, opts)
end
result = Oneboxer.apply(cooked) do |url|
@onebox_urls << url
Oneboxer.invalidate(url) if opts[:invalidate_oneboxes]
Oneboxer.cached_onebox(url)
end
cooked = result.to_html if result.changed?
cooked
end
# How many images are present in the post
def image_count
return 0 unless @raw.present?
cooked_stripped.css("img").reject do |t|
if dom_class = t["class"]
2017-08-24 08:54:21 +08:00
(Post.white_listed_image_classes & dom_class.split).count > 0
end
end.count
end
2013-07-22 08:39:17 +08:00
# How many attachments are present in the post
def attachment_count
return 0 unless @raw.present?
2013-10-18 00:44:09 +08:00
attachments = cooked_stripped.css("a.attachment[href^=\"#{Discourse.store.absolute_base_url}\"]")
attachments += cooked_stripped.css("a.attachment[href^=\"#{Discourse.store.relative_base_url}\"]") if Discourse.store.internal?
attachments.count
2013-07-22 08:39:17 +08:00
end
def raw_mentions
return [] if @raw.blank?
return @raw_mentions if @raw_mentions.present?
2013-07-31 06:01:42 +08:00
2016-02-24 16:09:18 +08:00
raw_mentions = cooked_stripped.css('.mention, .mention-group').map do |e|
2017-07-28 09:20:09 +08:00
if name = e.inner_text
name = name[1..-1]
name.downcase! if name
name
end
end
raw_mentions.compact!
raw_mentions.uniq!
@raw_mentions = raw_mentions
end
# from rack ... compat with ruby 2.2
def self.parse_uri_rfc2396(uri)
@parser ||= defined?(URI::RFC2396_Parser) ? URI::RFC2396_Parser.new : URI
@parser.parse(uri)
end
# Count how many hosts are linked in the post
def linked_hosts
all_links = raw_links + @onebox_urls
return {} if all_links.blank?
return @linked_hosts if @linked_hosts.present?
@linked_hosts = {}
2013-10-18 00:44:09 +08:00
all_links.each do |u|
2013-07-15 18:11:23 +08:00
begin
uri = self.class.parse_uri_rfc2396(u)
2013-07-15 18:11:23 +08:00
host = uri.host
@linked_hosts[host] ||= 1 unless host.nil?
rescue URI::InvalidURIError, URI::InvalidComponentError
# An invalid URI does not count as a host
2013-07-15 18:11:23 +08:00
next
end
end
2013-10-18 00:44:09 +08:00
@linked_hosts
end
# Returns an array of all links in a post excluding mentions
def raw_links
return [] unless @raw.present?
return @raw_links if @raw_links.present?
@raw_links = []
2018-03-29 00:32:16 +08:00
cooked_stripped.css("a").each do |l|
# Don't include @mentions in the link count
2018-03-29 00:32:16 +08:00
next if link_is_a_mention?(l)
@raw_links << l['href'].to_s
end
2013-10-18 00:44:09 +08:00
@raw_links
end
# How many links are present in the post
def link_count
raw_links.size
end
private
2018-06-07 13:28:18 +08:00
def cooked_stripped
@cooked_stripped ||= begin
doc = Nokogiri::HTML.fragment(cook(@raw, topic_id: @topic_id))
doc.css("pre .mention, aside.quote > .title, aside.quote .mention, .onebox, .elided").remove
doc
end
2018-06-07 13:28:18 +08:00
end
2018-06-07 13:28:18 +08:00
def link_is_a_mention?(l)
html_class = l['class']
return false if html_class.blank?
href = l['href'].to_s
html_class.to_s['mention'] && href[/^\/u\//] || href[/^\/users\//]
end
end