Merge pull request #3812 from gwwar/emoji-embed

FIX: allow emoji class when crawling embedded content
This commit is contained in:
Robin Ward 2015-11-06 14:52:59 -05:00
commit e2a663bff1
8 changed files with 103 additions and 2 deletions

View File

@ -120,6 +120,7 @@ group :test, :development do
gem 'simplecov', require: false
gem 'timecop'
gem 'rspec-given'
gem 'rspec-html-matchers'
gem 'pry-nav'
gem 'spork-rails'
gem 'byebug', require: ENV['RM_INFO'].nil?

View File

@ -337,6 +337,9 @@ GEM
rspec-given (3.5.4)
given_core (= 3.5.4)
rspec (>= 2.12)
rspec-html-matchers (0.7.0)
nokogiri (~> 1)
rspec (~> 3)
rspec-logsplit (0.1.3)
rspec-mocks (3.2.1)
diff-lcs (>= 1.2.0, < 2.0)
@ -511,6 +514,7 @@ DEPENDENCIES
rmmseg-cpp
rspec (~> 3.2.0)
rspec-given
rspec-html-matchers
rspec-rails
rtlit
ruby-readability

View File

@ -53,6 +53,10 @@
{{embedding-setting field="embed_blacklist_selector"
value=embedding.embed_blacklist_selector
placeholder=".ad-unit, header"}}
{{embedding-setting field="embed_classname_whitelist"
value=embedding.embed_classname_whitelist
placeholder="emoji, classname"}}
</div>
<div class='embedding-secondary'>

View File

@ -9,6 +9,7 @@ class Embedding < OpenStruct
embed_truncate
embed_whitelist_selector
embed_blacklist_selector
embed_classname_whitelist
feed_polling_enabled
feed_polling_url
embed_username_key_from_feed)

View File

@ -69,12 +69,13 @@ class TopicEmbed < ActiveRecord::Base
original_uri = URI.parse(url)
opts = {
tags: %w[div p code pre h1 h2 h3 b em i strong a img ul li ol blockquote],
attributes: %w[href src],
attributes: %w[href src class],
remove_empty_nodes: false
}
opts[:whitelist] = SiteSetting.embed_whitelist_selector if SiteSetting.embed_whitelist_selector.present?
opts[:blacklist] = SiteSetting.embed_blacklist_selector if SiteSetting.embed_blacklist_selector.present?
embed_classname_whitelist = SiteSetting.embed_classname_whitelist if SiteSetting.embed_classname_whitelist.present?
doc = Readability::Document.new(open(url).read, opts)
@ -96,6 +97,16 @@ class TopicEmbed < ActiveRecord::Base
# If there is a mistyped URL, just do nothing
end
end
# only allow classes in the whitelist
allowed_classes = if embed_classname_whitelist.blank? then [] else embed_classname_whitelist.split(/[ ,]+/i) end
doc.search('[class]:not([class=""])').each do |node|
classes = node[:class].split(' ').select{ |classname| allowed_classes.include?(classname) }
if classes.length === 0
node.delete('class')
else
node[:class] = classes.join(' ')
end
end
end
[title, doc.to_html]

View File

@ -812,6 +812,9 @@ embedding:
embed_blacklist_selector:
default: ''
hidden: true
embed_classname_whitelist:
default: 'emoji'
hidden: true
legal:
tos_url:

View File

@ -1,4 +1,5 @@
require 'spec_helper'
require 'stringio'
describe TopicEmbed do
@ -30,7 +31,8 @@ describe TopicEmbed do
expect(post.cooked).to eq(post.raw)
# It converts relative URLs to absolute
expect(post.cooked.start_with?("hello world new post <a href=\"http://eviltrout.com/hello\">hello</a> <img src=\"http://eviltrout.com/images/wat.jpg\">")).to eq(true)
expect(post.cooked).to have_tag('a', with: { href: 'http://eviltrout.com/hello' })
expect(post.cooked).to have_tag('img', with: { src: 'http://eviltrout.com/images/wat.jpg' })
expect(post.topic.has_topic_embed?).to eq(true)
expect(TopicEmbed.where(topic_id: post.topic_id)).to be_present
@ -58,4 +60,78 @@ describe TopicEmbed do
end
describe '.find_remote' do
context 'post with allowed classes "foo" and "emoji"' do
let(:user) { Fabricate(:user) }
let(:url) { 'http://eviltrout.com/123' }
let(:contents) { "my normal size emoji <p class='foo'>Hi</p> <img class='emoji other foo' src='/images/smiley.jpg'>" }
let!(:embeddable_host) { Fabricate(:embeddable_host) }
let!(:file) { StringIO.new }
content = ''
before(:each) do
SiteSetting.stubs(:embed_classname_whitelist).returns 'emoji , foo'
file.stubs(:read).returns contents
TopicEmbed.stubs(:open).returns file
title, content = TopicEmbed.find_remote(url)
end
it 'img node has emoji class' do
expect(content).to have_tag('img', with: { class: 'emoji' })
end
it 'img node has foo class' do
expect(content).to have_tag('img', with: { class: 'foo' })
end
it 'p node has foo class' do
expect(content).to have_tag('p', with: { class: 'foo' })
end
it 'nodes removes classes other than emoji' do
expect(content).to have_tag('img', without: { class: 'other' })
end
end
context 'post with no allowed classes' do
let(:user) { Fabricate(:user) }
let(:url) { 'http://eviltrout.com/123' }
let(:contents) { "my normal size emoji <p class='foo'>Hi</p> <img class='emoji other foo' src='/images/smiley.jpg'>" }
let!(:embeddable_host) { Fabricate(:embeddable_host) }
let!(:file) { StringIO.new }
content = ''
before(:each) do
SiteSetting.stubs(:embed_classname_whitelist).returns ' '
file.stubs(:read).returns contents
TopicEmbed.stubs(:open).returns file
title, content = TopicEmbed.find_remote(url)
end
it 'img node doesn\'t have emoji class' do
expect(content).to have_tag('img', without: { class: 'emoji' })
end
it 'img node doesn\'t have foo class' do
expect(content).to have_tag('img', without: { class: 'foo' })
end
it 'p node doesn\'t foo class' do
expect(content).to have_tag('p', without: { class: 'foo' })
end
it 'img node doesn\'t have other class' do
expect(content).to have_tag('img', without: { class: 'other' })
end
end
end
end

View File

@ -42,6 +42,7 @@ Spork.prefork do
config.fail_fast = ENV['RSPEC_FAIL_FAST'] == "1"
config.include Helpers
config.include MessageBus
config.include RSpecHtmlMatchers
config.mock_framework = :mocha
config.order = 'random'
config.infer_spec_type_from_file_location!