mirror of
https://github.com/discourse/discourse.git
synced 2024-12-05 08:30:12 +08:00
38cbca3f67
This did not work sometimes if a topic had the same URL with and without query params because it did not try to select the best matching URL.
133 lines
3.9 KiB
Ruby
133 lines
3.9 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
require 'ipaddr'
|
|
require 'url_helper'
|
|
|
|
class TopicLinkClick < ActiveRecord::Base
|
|
belongs_to :topic_link, counter_cache: :clicks
|
|
belongs_to :user
|
|
|
|
validates_presence_of :topic_link_id
|
|
|
|
ALLOWED_REDIRECT_HOSTNAMES = Set.new(%W{www.youtube.com youtu.be})
|
|
include ActiveSupport::Deprecation::DeprecatedConstantAccessor
|
|
deprecate_constant 'WHITELISTED_REDIRECT_HOSTNAMES', 'TopicLinkClick::ALLOWED_REDIRECT_HOSTNAMES'
|
|
|
|
# Create a click from a URL and post_id
|
|
def self.create_from(args = {})
|
|
url = args[:url][0...TopicLink.max_url_length]
|
|
return nil if url.blank?
|
|
|
|
uri = UrlHelper.relaxed_parse(url)
|
|
urls = Set.new
|
|
urls << url
|
|
if url =~ /^http/
|
|
urls << url.sub(/^https/, 'http')
|
|
urls << url.sub(/^http:/, 'https:')
|
|
urls << UrlHelper.schemaless(url)
|
|
end
|
|
urls << UrlHelper.absolute_without_cdn(url)
|
|
urls << uri.path if uri.try(:host) == Discourse.current_hostname
|
|
|
|
query = url.index('?')
|
|
unless query.nil?
|
|
endpos = url.index('#') || url.size
|
|
urls << url[0..query - 1] + url[endpos..-1]
|
|
end
|
|
|
|
# link can have query params, and analytics can add more to the end:
|
|
i = url.length
|
|
while i = url.rindex('&', i - 1)
|
|
urls << url[0...i]
|
|
end
|
|
|
|
# add a cdn link
|
|
if uri
|
|
if Discourse.asset_host.present?
|
|
cdn_uri = begin
|
|
URI.parse(Discourse.asset_host)
|
|
rescue URI::Error
|
|
end
|
|
|
|
if cdn_uri && cdn_uri.hostname == uri.hostname && uri.path.starts_with?(cdn_uri.path)
|
|
is_cdn_link = true
|
|
urls << uri.path[cdn_uri.path.length..-1]
|
|
end
|
|
end
|
|
|
|
if SiteSetting.Upload.s3_cdn_url.present?
|
|
cdn_uri = begin
|
|
URI.parse(SiteSetting.Upload.s3_cdn_url)
|
|
rescue URI::Error
|
|
end
|
|
|
|
if cdn_uri && cdn_uri.hostname == uri.hostname && uri.path.starts_with?(cdn_uri.path)
|
|
is_cdn_link = true
|
|
path = uri.path[cdn_uri.path.length..-1]
|
|
urls << path
|
|
urls << "#{Discourse.store.absolute_base_url}#{path}"
|
|
end
|
|
end
|
|
end
|
|
|
|
# test for all possible URLs
|
|
link = TopicLink.where(url: urls)
|
|
|
|
# Find the forum topic link
|
|
link = link.where(post_id: args[:post_id]) if args[:post_id].present?
|
|
|
|
# If we don't have a post, just find the first occurrence of the link
|
|
link = link.where(topic_id: args[:topic_id]) if args[:topic_id].present?
|
|
|
|
# select the TopicLink associated to first url
|
|
link = link.order("array_position(ARRAY[#{urls.map { |s| "#{ActiveRecord::Base.connection.quote(s)}" }.join(',')}], url::text)").first
|
|
|
|
# If no link is found...
|
|
unless link.present?
|
|
# ... return the url for relative links or when using the same host
|
|
return url if url =~ /^\/[^\/]/ || uri.try(:host) == Discourse.current_hostname
|
|
|
|
# If we have it somewhere else on the site, just allow the redirect.
|
|
# This is likely due to a onebox of another topic.
|
|
link = TopicLink.find_by(url: url)
|
|
return link.url if link.present?
|
|
|
|
return nil unless uri
|
|
|
|
# Only redirect to allowlisted hostnames
|
|
return url if ALLOWED_REDIRECT_HOSTNAMES.include?(uri.hostname) || is_cdn_link
|
|
|
|
return nil
|
|
end
|
|
|
|
return url if args[:user_id] && link.user_id == args[:user_id]
|
|
|
|
# Rate limit the click counts to once in 24 hours
|
|
rate_key = "link-clicks:#{link.id}:#{args[:user_id] || args[:ip]}"
|
|
if Discourse.redis.setnx(rate_key, "1")
|
|
Discourse.redis.expire(rate_key, 1.day.to_i)
|
|
args[:ip] = nil if args[:user_id]
|
|
create!(topic_link_id: link.id, user_id: args[:user_id], ip_address: args[:ip])
|
|
end
|
|
|
|
url
|
|
end
|
|
|
|
end
|
|
|
|
# == Schema Information
|
|
#
|
|
# Table name: topic_link_clicks
|
|
#
|
|
# id :integer not null, primary key
|
|
# topic_link_id :integer not null
|
|
# user_id :integer
|
|
# created_at :datetime not null
|
|
# updated_at :datetime not null
|
|
# ip_address :inet
|
|
#
|
|
# Indexes
|
|
#
|
|
# by_link (topic_link_id)
|
|
#
|