discourse/app/models/topic_embed.rb
Josh Soref 59097b207f
DEV: Correct typos and spelling mistakes (#12812)
Over the years we accrued many spelling mistakes in the code base. 

This PR attempts to fix spelling mistakes and typos in all areas of the code that are extremely safe to change 

- comments
- test descriptions
- other low risk areas
2021-05-21 11:43:47 +10:00

281 lines
8.0 KiB
Ruby

# frozen_string_literal: true
class TopicEmbed < ActiveRecord::Base
include Trashable
belongs_to :topic
belongs_to :post
validates_presence_of :embed_url
validates_uniqueness_of :embed_url
before_validation(on: :create) do
unless (topic_embed = TopicEmbed.with_deleted.where('deleted_at IS NOT NULL AND embed_url = ?', embed_url).first).nil?
topic_embed.destroy!
end
end
class FetchResponse
attr_accessor :title, :body, :author
end
def self.normalize_url(url)
url.downcase.sub(/\/$/, '').sub(/\-+/, '-').strip
end
def self.imported_from_html(url)
"\n<hr>\n<small>#{I18n.t('embed.imported_from', link: "<a href='#{url}'>#{url}</a>")}</small>\n"
end
# Import an article from a source (RSS/Atom/Other)
def self.import(user, url, title, contents)
return unless url =~ /^https?\:\/\//
if SiteSetting.embed_truncate
contents = first_paragraph_from(contents)
end
contents ||= ''
contents = +contents << imported_from_html(url)
url = normalize_url(url)
embed = TopicEmbed.find_by("lower(embed_url) = ?", url)
content_sha1 = Digest::SHA1.hexdigest(contents)
post = nil
# If there is no embed, create a topic, post and the embed.
if embed.blank?
Topic.transaction do
eh = EmbeddableHost.record_for_url(url)
cook_method = if SiteSetting.embed_support_markdown
Post.cook_methods[:regular]
else
Post.cook_methods[:raw_html]
end
create_args = {
title: title,
raw: absolutize_urls(url, contents),
skip_validations: true,
cook_method: cook_method,
category: eh.try(:category_id)
}
if SiteSetting.embed_unlisted?
create_args[:visible] = false
end
creator = PostCreator.new(user, create_args)
post = creator.create
if post.present?
TopicEmbed.create!(topic_id: post.topic_id,
embed_url: url,
content_sha1: content_sha1,
post_id: post.id)
end
end
else
absolutize_urls(url, contents)
post = embed.post
# Update the topic if it changed
if post&.topic
if post.user != user
PostOwnerChanger.new(
post_ids: [post.id],
topic_id: post.topic_id,
new_owner: user,
acting_user: Discourse.system_user
).change_owner!
# make sure the post returned has the right author
post.reload
end
if (content_sha1 != embed.content_sha1) || (title && title != post&.topic&.title)
changes = { raw: absolutize_urls(url, contents) }
changes[:title] = title if title.present?
post.revise(user, changes, skip_validations: true, bypass_rate_limiter: true)
embed.update!(content_sha1: content_sha1)
end
end
end
post
end
def self.find_remote(url)
require 'ruby-readability'
url = UrlHelper.escape_uri(url)
original_uri = URI.parse(url)
fd = FinalDestination.new(
url,
validate_uri: true,
max_redirects: 5
)
url = fd.resolve
return if url.blank?
opts = {
tags: %w[div p code pre h1 h2 h3 b em i strong a img ul li ol blockquote],
attributes: %w[href src class],
remove_empty_nodes: false
}
opts[:whitelist] = SiteSetting.allowed_embed_selectors if SiteSetting.allowed_embed_selectors.present?
opts[:blacklist] = SiteSetting.blocked_embed_selectors if SiteSetting.blocked_embed_selectors.present?
allowed_embed_classnames = SiteSetting.allowed_embed_classnames if SiteSetting.allowed_embed_classnames.present?
response = FetchResponse.new
begin
html = open(url, allow_redirections: :safe).read
rescue OpenURI::HTTPError, Net::OpenTimeout
return
end
raw_doc = Nokogiri::HTML5(html)
auth_element = raw_doc.at('meta[@name="author"]')
if auth_element.present?
response.author = User.where(username_lower: auth_element[:content].strip).first
end
read_doc = Readability::Document.new(html, opts)
title = +(raw_doc.title || '')
title.strip!
if SiteSetting.embed_title_scrubber.present?
title.sub!(Regexp.new(SiteSetting.embed_title_scrubber), '')
title.strip!
end
response.title = title
doc = Nokogiri::HTML5(read_doc.content)
tags = { 'img' => 'src', 'script' => 'src', 'a' => 'href' }
doc.search(tags.keys.join(',')).each do |node|
url_param = tags[node.name]
src = node[url_param]
unless (src.nil? || src.empty?)
begin
uri = URI.parse(UrlHelper.escape_uri(src))
unless uri.host
uri.scheme = original_uri.scheme
uri.host = original_uri.host
node[url_param] = uri.to_s
end
rescue URI::Error, Addressable::URI::InvalidURIError
# If there is a mistyped URL, just do nothing
end
end
# only allow classes in the allowlist
allowed_classes = if allowed_embed_classnames.blank? then [] else allowed_embed_classnames.split(/[ ,]+/i) end
doc.search('[class]:not([class=""])').each do |classnode|
classes = classnode[:class].split(' ').select { |classname| allowed_classes.include?(classname) }
if classes.length === 0
classnode.delete('class')
else
classnode[:class] = classes.join(' ')
end
end
end
response.body = doc.to_html
response
end
def self.import_remote(import_user, url, opts = nil)
opts = opts || {}
response = find_remote(url)
return if response.nil?
response.title = opts[:title] if opts[:title].present?
import_user = response.author if response.author.present?
TopicEmbed.import(import_user, url, response.title, response.body)
end
# Convert any relative URLs to absolute. RSS is annoying for this.
def self.absolutize_urls(url, contents)
url = normalize_url(url)
begin
uri = URI(UrlHelper.escape_uri(url))
rescue URI::Error
return contents
end
prefix = "#{uri.scheme}://#{uri.host}"
prefix += ":#{uri.port}" if uri.port != 80 && uri.port != 443
fragment = Nokogiri::HTML5.fragment("<div>#{contents}</div>")
fragment.css('a').each do |a|
href = a['href']
if href.present? && href.start_with?('/')
a['href'] = "#{prefix}/#{href.sub(/^\/+/, '')}"
end
end
fragment.css('img').each do |a|
src = a['src']
if src.present? && src.start_with?('/')
a['src'] = "#{prefix}/#{src.sub(/^\/+/, '')}"
end
end
fragment.at('div').inner_html
end
def self.topic_id_for_embed(embed_url)
embed_url = normalize_url(embed_url).sub(/^https?\:\/\//, '')
TopicEmbed.where("embed_url ~* ?", "^https?://#{Regexp.escape(embed_url)}$").pluck_first(:topic_id)
end
def self.first_paragraph_from(html)
doc = Nokogiri::HTML5(html)
result = +""
doc.css('p').each do |p|
if p.text.present?
result << p.to_s
return result if result.size >= 100
end
end
return result unless result.blank?
# If there is no first paragraph, return the first div (onebox)
doc.css('div').first.to_s
end
def self.expanded_for(post)
Discourse.cache.fetch("embed-topic:#{post.topic_id}", expires_in: 10.minutes) do
url = TopicEmbed.where(topic_id: post.topic_id).pluck_first(:embed_url)
response = TopicEmbed.find_remote(url)
body = response.body
body << TopicEmbed.imported_from_html(url)
body
end
end
def self.open(uri, **kwargs)
URI.open(uri, **kwargs)
end
end
# == Schema Information
#
# Table name: topic_embeds
#
# id :integer not null, primary key
# topic_id :integer not null
# post_id :integer not null
# embed_url :string(1000) not null
# content_sha1 :string(40)
# created_at :datetime not null
# updated_at :datetime not null
# deleted_at :datetime
# deleted_by_id :integer
#
# Indexes
#
# index_topic_embeds_on_embed_url (embed_url) UNIQUE
#