mirror of
https://github.com/discourse/discourse.git
synced 2024-12-15 08:53:40 +08:00
30990006a9
This reduces chances of errors where consumers of strings mutate inputs and reduces memory usage of the app. Test suite passes now, but there may be some stuff left, so we will run a few sites on a branch prior to merging
324 lines
10 KiB
Ruby
324 lines
10 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
require 'uri'
|
|
require_dependency 'slug'
|
|
require_dependency 'discourse'
|
|
|
|
class TopicLink < ActiveRecord::Base
|
|
|
|
def self.max_domain_length
|
|
100
|
|
end
|
|
|
|
def self.max_url_length
|
|
500
|
|
end
|
|
|
|
belongs_to :topic
|
|
belongs_to :user
|
|
belongs_to :post
|
|
belongs_to :link_topic, class_name: 'Topic'
|
|
belongs_to :link_post, class_name: 'Post'
|
|
|
|
validates_presence_of :url
|
|
|
|
validates_length_of :url, maximum: 500
|
|
|
|
validates_uniqueness_of :url, scope: [:topic_id, :post_id]
|
|
|
|
has_many :topic_link_clicks, dependent: :destroy
|
|
|
|
validate :link_to_self
|
|
|
|
after_commit :crawl_link_title
|
|
|
|
# Make sure a topic can't link to itself
|
|
def link_to_self
|
|
errors.add(:base, "can't link to the same topic") if (topic_id == link_topic_id)
|
|
end
|
|
|
|
def self.topic_map(guardian, topic_id)
|
|
|
|
# Sam: complicated reports are really hard in AR
|
|
builder = DB.build(<<~SQL)
|
|
SELECT ftl.url,
|
|
COALESCE(ft.title, ftl.title) AS title,
|
|
ftl.link_topic_id,
|
|
ftl.reflection,
|
|
ftl.internal,
|
|
ftl.domain,
|
|
MIN(ftl.user_id) AS user_id,
|
|
SUM(clicks) AS clicks
|
|
FROM topic_links AS ftl
|
|
LEFT JOIN topics AS ft ON ftl.link_topic_id = ft.id
|
|
LEFT JOIN categories AS c ON c.id = ft.category_id
|
|
/*where*/
|
|
GROUP BY ftl.url, ft.title, ftl.title, ftl.link_topic_id, ftl.reflection, ftl.internal, ftl.domain
|
|
ORDER BY clicks DESC, count(*) DESC
|
|
LIMIT 50
|
|
SQL
|
|
|
|
builder.where('ftl.topic_id = :topic_id', topic_id: topic_id)
|
|
builder.where('ft.deleted_at IS NULL')
|
|
# note that ILIKE means "case insensitive LIKE"
|
|
builder.where("NOT(ftl.url ILIKE '%.png' OR ftl.url ILIKE '%.jpg' OR ftl.url ILIKE '%.gif')")
|
|
builder.where("COALESCE(ft.archetype, 'regular') <> :archetype", archetype: Archetype.private_message)
|
|
builder.where("clicks > 0")
|
|
|
|
builder.secure_category(guardian.secure_category_ids)
|
|
|
|
builder.query
|
|
|
|
end
|
|
|
|
def self.counts_for(guardian, topic, posts)
|
|
return {} if posts.blank?
|
|
|
|
# Sam: this is not tidy in AR and also happens to be a critical path
|
|
# for topic view
|
|
builder = DB.build("SELECT
|
|
l.post_id,
|
|
l.url,
|
|
l.clicks,
|
|
COALESCE(t.title, l.title) AS title,
|
|
l.internal,
|
|
l.reflection,
|
|
l.domain
|
|
FROM topic_links l
|
|
LEFT JOIN topics t ON t.id = l.link_topic_id
|
|
LEFT JOIN categories AS c ON c.id = t.category_id
|
|
/*where*/
|
|
ORDER BY reflection ASC, clicks DESC")
|
|
|
|
builder.where('t.deleted_at IS NULL')
|
|
builder.where("COALESCE(t.archetype, 'regular') <> :archetype", archetype: Archetype.private_message)
|
|
|
|
# not certain if pluck is right, cause it may interfere with caching
|
|
builder.where('l.post_id in (:post_ids)', post_ids: posts.map(&:id))
|
|
builder.secure_category(guardian.secure_category_ids)
|
|
|
|
result = {}
|
|
builder.query.each do |l|
|
|
result[l.post_id] ||= []
|
|
result[l.post_id] << { url: l.url,
|
|
clicks: l.clicks,
|
|
title: l.title,
|
|
internal: l.internal,
|
|
reflection: l.reflection }
|
|
end
|
|
result
|
|
end
|
|
|
|
def self.extract_from(post)
|
|
return if post.blank? || post.whisper?
|
|
|
|
current_urls = []
|
|
reflected_ids = []
|
|
|
|
PrettyText
|
|
.extract_links(post.cooked)
|
|
.map do |u|
|
|
uri = UrlHelper.relaxed_parse(u.url)
|
|
[u, uri]
|
|
end
|
|
.reject { |_, p| p.nil? || "mailto".freeze == p.scheme }
|
|
.uniq { |_, p| p }
|
|
.each do |link, parsed|
|
|
|
|
TopicLink.transaction do
|
|
begin
|
|
url, reflected_id = self.ensure_entry_for(post, link, parsed)
|
|
current_urls << url unless url.nil?
|
|
reflected_ids << reflected_id unless reflected_id.nil?
|
|
rescue URI::Error
|
|
# if the URI is invalid, don't store it.
|
|
rescue ActionController::RoutingError
|
|
# If we can't find the route, no big deal
|
|
end
|
|
end
|
|
end
|
|
|
|
self.cleanup_entries(post, current_urls, reflected_ids)
|
|
|
|
end
|
|
|
|
# Crawl a link's title after it's saved
|
|
def crawl_link_title
|
|
Jobs.enqueue(:crawl_topic_link, topic_link_id: id)
|
|
end
|
|
|
|
def self.duplicate_lookup(topic)
|
|
results = TopicLink
|
|
.includes(:post, :user)
|
|
.joins(:post, :user)
|
|
.where("posts.id IS NOT NULL AND users.id IS NOT NULL")
|
|
.where(topic_id: topic.id, reflection: false)
|
|
.last(200)
|
|
|
|
lookup = {}
|
|
results.each do |tl|
|
|
normalized = tl.url.downcase.sub(/^https?:\/\//, '').sub(/\/$/, '')
|
|
lookup[normalized] = { domain: tl.domain,
|
|
username: tl.user.username_lower,
|
|
posted_at: tl.post.created_at,
|
|
post_number: tl.post.post_number }
|
|
end
|
|
|
|
lookup
|
|
end
|
|
|
|
private
|
|
|
|
def self.ensure_entry_for(post, link, parsed)
|
|
url = link.url
|
|
internal = false
|
|
topic_id = nil
|
|
post_number = nil
|
|
|
|
if upload = Upload.get_from_url(url)
|
|
internal = Discourse.store.internal?
|
|
# Store the same URL that will be used in the cooked version of the post
|
|
url = UrlHelper.cook_url(upload.url)
|
|
elsif route = Discourse.route_for(parsed)
|
|
internal = true
|
|
|
|
# We aren't interested in tracking internal links to users
|
|
return nil if route[:controller] == 'users'
|
|
|
|
topic_id = route[:topic_id].to_i
|
|
post_number = route[:post_number] || 1
|
|
|
|
# Store the canonical URL
|
|
topic = Topic.find_by(id: topic_id)
|
|
topic_id = nil unless topic
|
|
|
|
if topic.present?
|
|
url = +"#{Discourse.base_url_no_prefix}#{topic.relative_url}"
|
|
url << "/#{post_number}" if post_number.to_i > 1
|
|
end
|
|
end
|
|
|
|
# Skip linking to ourselves
|
|
return nil if topic_id == post.topic_id
|
|
|
|
reflected_post = nil
|
|
if post_number && topic_id
|
|
reflected_post = Post.find_by(topic_id: topic_id, post_number: post_number.to_i)
|
|
end
|
|
|
|
url = url[0...TopicLink.max_url_length]
|
|
return nil if parsed && parsed.host && parsed.host.length > TopicLink.max_domain_length
|
|
|
|
unless TopicLink.exists?(topic_id: post.topic_id, post_id: post.id, url: url)
|
|
file_extension = File.extname(parsed.path)[1..10].downcase unless parsed.path.nil? || File.extname(parsed.path).empty?
|
|
begin
|
|
TopicLink.create!(post_id: post.id,
|
|
user_id: post.user_id,
|
|
topic_id: post.topic_id,
|
|
url: url,
|
|
domain: parsed.host || Discourse.current_hostname,
|
|
internal: internal,
|
|
link_topic_id: topic_id,
|
|
link_post_id: reflected_post.try(:id),
|
|
quote: link.is_quote,
|
|
extension: file_extension)
|
|
rescue ActiveRecord::RecordNotUnique
|
|
# it's fine
|
|
end
|
|
end
|
|
|
|
reflected_id = nil
|
|
|
|
# Create the reflection if we can
|
|
if topic_id.present?
|
|
topic = Topic.find_by(id: topic_id)
|
|
|
|
if topic && post.topic && topic.archetype != 'private_message' && post.topic.archetype != 'private_message' && post.topic.visible?
|
|
prefix = Discourse.base_url_no_prefix
|
|
reflected_url = "#{prefix}#{post.topic.relative_url(post.post_number)}"
|
|
tl = TopicLink.find_by(topic_id: topic_id,
|
|
post_id: reflected_post.try(:id),
|
|
url: reflected_url)
|
|
|
|
unless tl
|
|
tl = TopicLink.create(user_id: post.user_id,
|
|
topic_id: topic_id,
|
|
post_id: reflected_post.try(:id),
|
|
url: reflected_url,
|
|
domain: Discourse.current_hostname,
|
|
reflection: true,
|
|
internal: true,
|
|
link_topic_id: post.topic_id,
|
|
link_post_id: post.id)
|
|
|
|
end
|
|
|
|
reflected_id = tl.id if tl.persisted?
|
|
end
|
|
end
|
|
|
|
[url, reflected_id]
|
|
end
|
|
|
|
def self.cleanup_entries(post, current_urls, current_reflected_ids)
|
|
# Remove links that aren't there anymore
|
|
if current_urls.present?
|
|
TopicLink.where(
|
|
"(url not in (:urls)) AND (post_id = :post_id AND NOT reflection)",
|
|
urls: current_urls, post_id: post.id
|
|
).delete_all
|
|
|
|
current_reflected_ids.compact!
|
|
if current_reflected_ids.present?
|
|
TopicLink.where(
|
|
"(id not in (:reflected_ids)) AND (link_post_id = :post_id AND reflection)",
|
|
reflected_ids: current_reflected_ids, post_id: post.id
|
|
).delete_all
|
|
else
|
|
TopicLink
|
|
.where("link_post_id = :post_id AND reflection", post_id: post.id)
|
|
.delete_all
|
|
end
|
|
else
|
|
TopicLink
|
|
.where(
|
|
"(post_id = :post_id AND NOT reflection) OR (link_post_id = :post_id AND reflection)",
|
|
post_id: post.id
|
|
)
|
|
.delete_all
|
|
end
|
|
end
|
|
end
|
|
|
|
# == Schema Information
|
|
#
|
|
# Table name: topic_links
|
|
#
|
|
# id :integer not null, primary key
|
|
# topic_id :integer not null
|
|
# post_id :integer
|
|
# user_id :integer not null
|
|
# url :string(500) not null
|
|
# domain :string(100) not null
|
|
# internal :boolean default(FALSE), not null
|
|
# link_topic_id :integer
|
|
# created_at :datetime not null
|
|
# updated_at :datetime not null
|
|
# reflection :boolean default(FALSE)
|
|
# clicks :integer default(0), not null
|
|
# link_post_id :integer
|
|
# title :string
|
|
# crawled_at :datetime
|
|
# quote :boolean default(FALSE), not null
|
|
# extension :string(10)
|
|
#
|
|
# Indexes
|
|
#
|
|
# index_topic_links_on_extension (extension)
|
|
# index_topic_links_on_link_post_id_and_reflection (link_post_id,reflection)
|
|
# index_topic_links_on_post_id (post_id)
|
|
# index_topic_links_on_topic_id (topic_id)
|
|
# index_topic_links_on_user_id (user_id)
|
|
# unique_post_links (topic_id,post_id,url) UNIQUE
|
|
#
|