mirror of
https://github.com/discourse/discourse.git
synced 2025-01-08 17:26:18 +08:00
142571bba0
* `rescue nil` is a really bad pattern to use in our code base. We should rescue errors that we expect the code to throw and not rescue everything because we're unsure of what errors the code would throw. This would reduce the amount of pain we face when debugging why something isn't working as expexted. I've been bitten countless of times by errors being swallowed as a result during debugging sessions.
134 lines
3.7 KiB
Ruby
134 lines
3.7 KiB
Ruby
require_dependency 'discourse'
|
|
require 'ipaddr'
|
|
require 'url_helper'
|
|
|
|
class TopicLinkClick < ActiveRecord::Base
|
|
belongs_to :topic_link, counter_cache: :clicks
|
|
belongs_to :user
|
|
|
|
validates_presence_of :topic_link_id
|
|
validates_presence_of :ip_address
|
|
|
|
WHITELISTED_REDIRECT_HOSTNAMES = Set.new(%W{www.youtube.com youtu.be})
|
|
|
|
# Create a click from a URL and post_id
|
|
def self.create_from(args = {})
|
|
url = args[:url][0...TopicLink.max_url_length]
|
|
return nil if url.blank?
|
|
|
|
uri = begin
|
|
URI.parse(url)
|
|
rescue URI::InvalidURIError
|
|
end
|
|
|
|
urls = Set.new
|
|
urls << url
|
|
if url =~ /^http/
|
|
urls << url.sub(/^https/, 'http')
|
|
urls << url.sub(/^http:/, 'https:')
|
|
urls << UrlHelper.schemaless(url)
|
|
end
|
|
urls << UrlHelper.absolute_without_cdn(url)
|
|
urls << uri.path if uri.try(:host) == Discourse.current_hostname
|
|
|
|
query = url.index('?')
|
|
unless query.nil?
|
|
endpos = url.index('#') || url.size
|
|
urls << url[0..query - 1] + url[endpos..-1]
|
|
end
|
|
|
|
# link can have query params, and analytics can add more to the end:
|
|
i = url.length
|
|
while i = url.rindex('&', i - 1)
|
|
urls << url[0...i]
|
|
end
|
|
|
|
# add a cdn link
|
|
if uri
|
|
if Discourse.asset_host.present?
|
|
cdn_uri = begin
|
|
URI.parse(Discourse.asset_host)
|
|
rescue URI::InvalidURIError
|
|
end
|
|
|
|
if cdn_uri && cdn_uri.hostname == uri.hostname && uri.path.starts_with?(cdn_uri.path)
|
|
is_cdn_link = true
|
|
urls << uri.path[cdn_uri.path.length..-1]
|
|
end
|
|
end
|
|
|
|
if SiteSetting.Upload.s3_cdn_url.present?
|
|
cdn_uri = begin
|
|
URI.parse(SiteSetting.Upload.s3_cdn_url)
|
|
rescue URI::InvalidURIError
|
|
end
|
|
|
|
if cdn_uri && cdn_uri.hostname == uri.hostname && uri.path.starts_with?(cdn_uri.path)
|
|
is_cdn_link = true
|
|
path = uri.path[cdn_uri.path.length..-1]
|
|
urls << path
|
|
urls << "#{Discourse.store.absolute_base_url}#{path}"
|
|
end
|
|
end
|
|
end
|
|
|
|
link = TopicLink.select([:id, :user_id])
|
|
|
|
# test for all possible URLs
|
|
link = link.where(Array.new(urls.count, "url = ?").join(" OR "), *urls)
|
|
|
|
# Find the forum topic link
|
|
link = link.where(post_id: args[:post_id]) if args[:post_id].present?
|
|
|
|
# If we don't have a post, just find the first occurance of the link
|
|
link = link.where(topic_id: args[:topic_id]) if args[:topic_id].present?
|
|
link = link.first
|
|
|
|
# If no link is found...
|
|
unless link.present?
|
|
# ... return the url for relative links or when using the same host
|
|
return url if url =~ /^\/[^\/]/ || uri.try(:host) == Discourse.current_hostname
|
|
|
|
# If we have it somewhere else on the site, just allow the redirect.
|
|
# This is likely due to a onebox of another topic.
|
|
link = TopicLink.find_by(url: url)
|
|
return link.url if link.present?
|
|
|
|
return nil unless uri
|
|
|
|
# Only redirect to whitelisted hostnames
|
|
return url if WHITELISTED_REDIRECT_HOSTNAMES.include?(uri.hostname) || is_cdn_link
|
|
|
|
return nil
|
|
end
|
|
|
|
return url if args[:user_id] && link.user_id == args[:user_id]
|
|
|
|
# Rate limit the click counts to once in 24 hours
|
|
rate_key = "link-clicks:#{link.id}:#{args[:user_id] || args[:ip]}"
|
|
if $redis.setnx(rate_key, "1")
|
|
$redis.expire(rate_key, 1.day.to_i)
|
|
create!(topic_link_id: link.id, user_id: args[:user_id], ip_address: args[:ip])
|
|
end
|
|
|
|
url
|
|
end
|
|
|
|
end
|
|
|
|
# == Schema Information
|
|
#
|
|
# Table name: topic_link_clicks
|
|
#
|
|
# id :integer not null, primary key
|
|
# topic_link_id :integer not null
|
|
# user_id :integer
|
|
# created_at :datetime not null
|
|
# updated_at :datetime not null
|
|
# ip_address :inet not null
|
|
#
|
|
# Indexes
|
|
#
|
|
# by_link (topic_link_id)
|
|
#
|