2013-02-06 03:16:51 +08:00
|
|
|
require 'uri'
|
|
|
|
require_dependency 'slug'
|
|
|
|
|
|
|
|
class TopicLink < ActiveRecord::Base
|
2016-04-01 05:33:25 +08:00
|
|
|
|
|
|
|
def self.max_domain_length
|
|
|
|
100
|
|
|
|
end
|
|
|
|
|
|
|
|
def self.max_url_length
|
|
|
|
500
|
|
|
|
end
|
2014-06-26 09:38:23 +08:00
|
|
|
|
2013-02-06 03:16:51 +08:00
|
|
|
belongs_to :topic
|
|
|
|
belongs_to :user
|
|
|
|
belongs_to :post
|
|
|
|
belongs_to :link_topic, class_name: 'Topic'
|
2014-03-18 10:12:07 +08:00
|
|
|
belongs_to :link_post, class_name: 'Post'
|
2013-02-06 03:16:51 +08:00
|
|
|
|
|
|
|
validates_presence_of :url
|
|
|
|
|
|
|
|
validates_length_of :url, maximum: 500
|
|
|
|
|
|
|
|
validates_uniqueness_of :url, scope: [:topic_id, :post_id]
|
|
|
|
|
2013-06-14 01:41:45 +08:00
|
|
|
has_many :topic_link_clicks, dependent: :destroy
|
2013-02-06 03:16:51 +08:00
|
|
|
|
|
|
|
validate :link_to_self
|
|
|
|
|
2014-04-06 02:47:25 +08:00
|
|
|
after_commit :crawl_link_title
|
|
|
|
|
2013-02-06 03:16:51 +08:00
|
|
|
# Make sure a topic can't link to itself
|
|
|
|
def link_to_self
|
|
|
|
errors.add(:base, "can't link to the same topic") if (topic_id == link_topic_id)
|
|
|
|
end
|
|
|
|
|
2013-11-16 01:15:46 +08:00
|
|
|
def self.topic_map(guardian, topic_id)
|
2013-06-05 14:10:26 +08:00
|
|
|
|
|
|
|
# Sam: complicated reports are really hard in AR
|
|
|
|
builder = SqlBuilder.new("SELECT ftl.url,
|
2014-04-06 02:47:25 +08:00
|
|
|
COALESCE(ft.title, ftl.title) AS title,
|
2013-06-05 14:10:26 +08:00
|
|
|
ftl.link_topic_id,
|
|
|
|
ftl.reflection,
|
|
|
|
ftl.internal,
|
2014-04-06 02:47:25 +08:00
|
|
|
ftl.domain,
|
2013-06-05 14:10:26 +08:00
|
|
|
MIN(ftl.user_id) AS user_id,
|
|
|
|
SUM(clicks) AS clicks
|
|
|
|
FROM topic_links AS ftl
|
|
|
|
LEFT JOIN topics AS ft ON ftl.link_topic_id = ft.id
|
|
|
|
LEFT JOIN categories AS c ON c.id = ft.category_id
|
|
|
|
/*where*/
|
2014-04-06 02:47:25 +08:00
|
|
|
GROUP BY ftl.url, ft.title, ftl.title, ftl.link_topic_id, ftl.reflection, ftl.internal, ftl.domain
|
2013-06-05 14:10:26 +08:00
|
|
|
ORDER BY clicks DESC")
|
|
|
|
|
|
|
|
builder.where('ftl.topic_id = :topic_id', topic_id: topic_id)
|
|
|
|
builder.where('ft.deleted_at IS NULL')
|
2014-05-12 03:53:57 +08:00
|
|
|
builder.where("COALESCE(ft.archetype, 'regular') <> :archetype", archetype: Archetype.private_message)
|
2013-06-05 14:10:26 +08:00
|
|
|
|
|
|
|
builder.secure_category(guardian.secure_category_ids)
|
|
|
|
|
|
|
|
builder.exec.to_a
|
|
|
|
|
|
|
|
end
|
|
|
|
|
|
|
|
def self.counts_for(guardian,topic, posts)
|
|
|
|
return {} if posts.blank?
|
|
|
|
|
|
|
|
# Sam: I don't know how to write this cleanly in AR,
|
|
|
|
# in particular the securing logic is tricky and would fallback to SQL anyway
|
|
|
|
builder = SqlBuilder.new("SELECT
|
|
|
|
l.post_id,
|
|
|
|
l.url,
|
|
|
|
l.clicks,
|
2014-04-06 02:47:25 +08:00
|
|
|
COALESCE(t.title, l.title) AS title,
|
2013-06-05 14:10:26 +08:00
|
|
|
l.internal,
|
2014-04-06 02:47:25 +08:00
|
|
|
l.reflection,
|
|
|
|
l.domain
|
2013-06-05 14:10:26 +08:00
|
|
|
FROM topic_links l
|
|
|
|
LEFT JOIN topics t ON t.id = l.link_topic_id
|
|
|
|
LEFT JOIN categories AS c ON c.id = t.category_id
|
|
|
|
/*where*/
|
|
|
|
ORDER BY reflection ASC, clicks DESC")
|
|
|
|
|
|
|
|
builder.where('t.deleted_at IS NULL')
|
2014-05-12 03:53:57 +08:00
|
|
|
builder.where("COALESCE(t.archetype, 'regular') <> :archetype", archetype: Archetype.private_message)
|
2013-06-05 14:10:26 +08:00
|
|
|
|
|
|
|
# not certain if pluck is right, cause it may interfere with caching
|
|
|
|
builder.where('l.post_id IN (:post_ids)', post_ids: posts.map(&:id))
|
|
|
|
builder.secure_category(guardian.secure_category_ids)
|
|
|
|
|
|
|
|
builder.map_exec(OpenStruct).each_with_object({}) do |l,result|
|
|
|
|
result[l.post_id] ||= []
|
|
|
|
result[l.post_id] << {url: l.url,
|
|
|
|
clicks: l.clicks,
|
|
|
|
title: l.title,
|
|
|
|
internal: l.internal,
|
|
|
|
reflection: l.reflection}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2013-02-06 03:16:51 +08:00
|
|
|
# Extract any urls in body
|
|
|
|
def self.extract_from(post)
|
|
|
|
return unless post.present?
|
2013-02-07 23:45:24 +08:00
|
|
|
|
2014-04-06 02:47:25 +08:00
|
|
|
added_urls = []
|
2013-02-06 03:16:51 +08:00
|
|
|
TopicLink.transaction do
|
|
|
|
|
|
|
|
added_urls = []
|
|
|
|
reflected_urls = []
|
|
|
|
|
|
|
|
PrettyText
|
|
|
|
.extract_links(post.cooked)
|
2014-07-11 12:17:01 +08:00
|
|
|
.map{|u| [u, URI.parse(u.url)] rescue nil}
|
2014-08-15 05:54:55 +08:00
|
|
|
.reject{|_, p| p.nil?}
|
|
|
|
.uniq{|_, p| p}
|
2014-07-11 12:17:01 +08:00
|
|
|
.each do |link, parsed|
|
2013-02-06 03:16:51 +08:00
|
|
|
begin
|
|
|
|
|
2014-07-11 12:17:01 +08:00
|
|
|
url = link.url
|
2013-02-06 03:16:51 +08:00
|
|
|
internal = false
|
|
|
|
topic_id = nil
|
|
|
|
post_number = nil
|
2013-07-19 07:26:23 +08:00
|
|
|
|
2013-08-01 05:26:34 +08:00
|
|
|
if Discourse.store.has_been_uploaded?(url)
|
|
|
|
internal = Discourse.store.internal?
|
2013-07-19 07:26:23 +08:00
|
|
|
elsif parsed.host == Discourse.current_hostname || !parsed.host
|
2013-02-06 03:16:51 +08:00
|
|
|
internal = true
|
2013-02-07 23:45:24 +08:00
|
|
|
|
2013-02-06 03:16:51 +08:00
|
|
|
route = Rails.application.routes.recognize_path(parsed.path)
|
2013-02-12 01:27:32 +08:00
|
|
|
|
|
|
|
# We aren't interested in tracking internal links to users
|
|
|
|
next if route[:controller] == 'users'
|
|
|
|
|
2013-02-06 03:16:51 +08:00
|
|
|
topic_id = route[:topic_id]
|
|
|
|
post_number = route[:post_number] || 1
|
2013-02-14 04:22:04 +08:00
|
|
|
|
|
|
|
# Store the canonical URL
|
2014-05-06 21:41:59 +08:00
|
|
|
topic = Topic.find_by(id: topic_id)
|
2014-06-26 09:38:23 +08:00
|
|
|
topic_id = nil unless topic
|
2013-02-14 04:22:04 +08:00
|
|
|
|
|
|
|
if topic.present?
|
2015-05-15 02:26:18 +08:00
|
|
|
url = "#{Discourse.base_url_no_prefix}#{topic.relative_url}"
|
2013-02-14 04:22:04 +08:00
|
|
|
url << "/#{post_number}" if post_number.to_i > 1
|
|
|
|
end
|
|
|
|
|
2013-02-06 03:16:51 +08:00
|
|
|
end
|
|
|
|
|
|
|
|
# Skip linking to ourselves
|
|
|
|
next if topic_id == post.topic_id
|
|
|
|
|
2014-03-18 10:12:07 +08:00
|
|
|
reflected_post = nil
|
|
|
|
if post_number && topic_id
|
2014-05-06 21:41:59 +08:00
|
|
|
reflected_post = Post.find_by(topic_id: topic_id, post_number: post_number.to_i)
|
2014-03-18 10:12:07 +08:00
|
|
|
end
|
|
|
|
|
2016-04-01 05:33:25 +08:00
|
|
|
url = url[0...TopicLink.max_url_length]
|
|
|
|
next if parsed && parsed.host && parsed.host.length > TopicLink.max_domain_length
|
2014-06-26 09:38:23 +08:00
|
|
|
|
2013-02-06 03:16:51 +08:00
|
|
|
added_urls << url
|
2013-02-07 23:45:24 +08:00
|
|
|
TopicLink.create(post_id: post.id,
|
2013-05-11 04:58:23 +08:00
|
|
|
user_id: post.user_id,
|
|
|
|
topic_id: post.topic_id,
|
|
|
|
url: url,
|
|
|
|
domain: parsed.host || Discourse.current_hostname,
|
|
|
|
internal: internal,
|
2014-03-18 10:12:07 +08:00
|
|
|
link_topic_id: topic_id,
|
2014-07-11 12:17:01 +08:00
|
|
|
link_post_id: reflected_post.try(:id),
|
|
|
|
quote: link.is_quote
|
|
|
|
)
|
2013-02-06 03:16:51 +08:00
|
|
|
|
|
|
|
# Create the reflection if we can
|
|
|
|
if topic_id.present?
|
2014-05-06 21:41:59 +08:00
|
|
|
topic = Topic.find_by(id: topic_id)
|
2013-02-06 03:16:51 +08:00
|
|
|
|
2013-06-07 01:52:30 +08:00
|
|
|
if topic && post.topic && post.topic.archetype != 'private_message' && topic.archetype != 'private_message'
|
2013-02-06 03:16:51 +08:00
|
|
|
|
2015-05-15 02:26:18 +08:00
|
|
|
prefix = Discourse.base_url_no_prefix
|
2013-02-06 03:16:51 +08:00
|
|
|
|
|
|
|
reflected_url = "#{prefix}#{post.topic.relative_url(post.post_number)}"
|
|
|
|
|
|
|
|
reflected_urls << reflected_url
|
|
|
|
TopicLink.create(user_id: post.user_id,
|
|
|
|
topic_id: topic_id,
|
|
|
|
post_id: reflected_post.try(:id),
|
|
|
|
url: reflected_url,
|
|
|
|
domain: Discourse.current_hostname,
|
|
|
|
reflection: true,
|
|
|
|
internal: true,
|
|
|
|
link_topic_id: post.topic_id,
|
|
|
|
link_post_id: post.id)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
rescue URI::InvalidURIError
|
|
|
|
# if the URI is invalid, don't store it.
|
|
|
|
rescue ActionController::RoutingError
|
2013-02-07 23:45:24 +08:00
|
|
|
# If we can't find the route, no big deal
|
2013-02-06 03:16:51 +08:00
|
|
|
end
|
2013-02-07 23:45:24 +08:00
|
|
|
end
|
2013-02-06 03:16:51 +08:00
|
|
|
|
|
|
|
# Remove links that aren't there anymore
|
|
|
|
if added_urls.present?
|
2014-04-26 04:49:48 +08:00
|
|
|
TopicLink.delete_all ["(url not in (:urls)) AND (post_id = :post_id AND NOT reflection)", urls: added_urls, post_id: post.id]
|
|
|
|
TopicLink.delete_all ["(url not in (:urls)) AND (link_post_id = :post_id AND reflection)", urls: reflected_urls, post_id: post.id]
|
2013-02-06 03:16:51 +08:00
|
|
|
else
|
2014-04-26 04:49:48 +08:00
|
|
|
TopicLink.delete_all ["(post_id = :post_id AND NOT reflection) OR (link_post_id = :post_id AND reflection)", post_id: post.id]
|
2013-02-06 03:16:51 +08:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
2014-04-06 02:47:25 +08:00
|
|
|
|
|
|
|
# Crawl a link's title after it's saved
|
|
|
|
def crawl_link_title
|
|
|
|
Jobs.enqueue(:crawl_topic_link, topic_link_id: id)
|
|
|
|
end
|
2013-02-06 03:16:51 +08:00
|
|
|
end
|
2013-05-24 10:48:32 +08:00
|
|
|
|
|
|
|
# == Schema Information
|
|
|
|
#
|
|
|
|
# Table name: topic_links
|
|
|
|
#
|
|
|
|
# id :integer not null, primary key
|
|
|
|
# topic_id :integer not null
|
|
|
|
# post_id :integer
|
|
|
|
# user_id :integer not null
|
|
|
|
# url :string(500) not null
|
|
|
|
# domain :string(100) not null
|
|
|
|
# internal :boolean default(FALSE), not null
|
|
|
|
# link_topic_id :integer
|
2014-08-27 13:19:25 +08:00
|
|
|
# created_at :datetime not null
|
|
|
|
# updated_at :datetime not null
|
2013-05-24 10:48:32 +08:00
|
|
|
# reflection :boolean default(FALSE)
|
|
|
|
# clicks :integer default(0), not null
|
|
|
|
# link_post_id :integer
|
2016-04-01 05:33:25 +08:00
|
|
|
# title :string
|
2014-04-08 23:35:44 +08:00
|
|
|
# crawled_at :datetime
|
2014-07-15 09:29:44 +08:00
|
|
|
# quote :boolean default(FALSE), not null
|
2013-05-24 10:48:32 +08:00
|
|
|
#
|
|
|
|
# Indexes
|
|
|
|
#
|
2015-09-18 08:41:10 +08:00
|
|
|
# index_topic_links_on_link_post_id_and_reflection (link_post_id,reflection)
|
|
|
|
# index_topic_links_on_post_id (post_id)
|
|
|
|
# index_topic_links_on_topic_id (topic_id)
|
|
|
|
# unique_post_links (topic_id,post_id,url) UNIQUE
|
2013-05-24 10:48:32 +08:00
|
|
|
#
|