clean up onebox application so it uses a single code path

use fragments for oneboxes
strip parent <p> if <div> is in it
clean some tests
This commit is contained in:
Sam 2013-04-10 17:52:38 +10:00
parent ab9c55689e
commit 33e3ad1603
6 changed files with 90 additions and 29 deletions

View File

@ -243,15 +243,11 @@ class Post < ActiveRecord::Base
# If we have any of the oneboxes in the cache, throw them in right away, don't
# wait for the post processor.
dirty = false
doc = Oneboxer.each_onebox_link(cooked) do |url, elem|
cached = Oneboxer.render_from_cache(url)
if cached.present?
elem.swap(cached)
dirty = true
end
result = Oneboxer.apply(cooked) do |url, elem|
Oneboxer.render_from_cache(url)
end
cooked = doc.to_html if dirty
cooked = result.to_html if result.changed?
cooked
end

View File

@ -10,7 +10,7 @@ class CookedPostProcessor
@dirty = false
@opts = opts
@post = post
@doc = Nokogiri::HTML(post.cooked)
@doc = Nokogiri::HTML::fragment(post.cooked)
@size_cache = {}
end
@ -23,13 +23,10 @@ class CookedPostProcessor
args = {post_id: @post.id}
args[:invalidate_oneboxes] = true if @opts[:invalidate_oneboxes]
Oneboxer.each_onebox_link(@doc) do |url, element|
onebox, preview = Oneboxer.onebox(url, args)
if onebox
element.swap onebox
@dirty = true
end
result = Oneboxer.apply(@doc) do |url, element|
Oneboxer.onebox(url, args)
end
@dirty ||= result.changed?
end
# First let's consider the images
@ -127,6 +124,10 @@ class CookedPostProcessor
@doc.try(:to_html)
end
def doc
@doc
end
def get_size(url)
return nil unless SiteSetting.crawl_images? || url.start_with?(Discourse.base_url)
@size_cache[url] ||= FastImage.size(url)

View File

@ -10,6 +10,16 @@ Dir["#{Rails.root}/lib/oneboxer/*_onebox.rb"].each {|f|
module Oneboxer
extend Oneboxer::Base
Result = Struct.new(:doc, :changed) do
def to_html
doc.to_html
end
def changed?
changed
end
end
Dir["#{Rails.root}/lib/oneboxer/*_onebox.rb"].sort.each do |f|
add_onebox "Oneboxer::#{Pathname.new(f).basename.to_s.gsub(/\.rb$/, '').classify}".constantize
end
@ -66,7 +76,7 @@ module Oneboxer
# Parse URLs out of HTML, returning the document when finished.
def self.each_onebox_link(string_or_doc)
doc = string_or_doc
doc = Nokogiri::HTML(doc) if doc.is_a?(String)
doc = Nokogiri::HTML::fragment(doc) if doc.is_a?(String)
onebox_links = doc.search("a.onebox")
if onebox_links.present?
@ -80,6 +90,32 @@ module Oneboxer
doc
end
def self.apply(string_or_doc)
doc = string_or_doc
doc = Nokogiri::HTML::fragment(doc) if doc.is_a?(String)
changed = false
Oneboxer.each_onebox_link(doc) do |url, element|
onebox, preview = yield(url,element)
if onebox
parsed_onebox = Nokogiri::HTML::fragment(onebox)
next unless parsed_onebox.children.count > 0
# special logic to strip empty p elements
if element.parent &&
element.parent.node_name.downcase == "p" &&
element.parent.children.count == 1 &&
parsed_onebox.children.first.name.downcase == "div"
element = element.parent
end
changed = true
element.swap parsed_onebox.to_html
end
end
Result.new(doc, changed)
end
def self.cache_key_for(url)
"onebox:#{Digest::SHA1.hexdigest(url)}"
end

View File

@ -3,19 +3,19 @@ require 'spec_helper'
require 'cooked_post_processor'
describe CookedPostProcessor do
let :cpp do
def cpp(cooked = nil, options = {})
post = Fabricate.build(:post_with_youtube)
post.cooked = cooked if cooked
post.id = 123
CookedPostProcessor.new(post)
CookedPostProcessor.new(post, options)
end
context 'process_onebox' do
before do
post = Fabricate.build(:post_with_youtube)
post.id = 123
@cpp = CookedPostProcessor.new(post, invalidate_oneboxes: true)
Oneboxer.expects(:onebox).with("http://www.youtube.com/watch?v=9bZkp7q19f0", post_id: 123, invalidate_oneboxes: true).returns('GANGNAM STYLE')
@cpp = cpp(nil, invalidate_oneboxes: true)
Oneboxer.expects(:onebox).with("http://www.youtube.com/watch?v=9bZkp7q19f0", post_id: 123, invalidate_oneboxes: true).returns('<div>GANGNAM STYLE</div>')
@cpp.post_process_oneboxes
end
@ -23,15 +23,13 @@ describe CookedPostProcessor do
@cpp.should be_dirty
end
it 'inserts the onebox' do
@cpp.html.should == <<EXPECTED
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
<html><body>GANGNAM STYLE</body></html>
EXPECTED
it 'inserts the onebox without wrapping p' do
@cpp.html.should match_html "<div>GANGNAM STYLE</div>"
end
end
context 'process_images' do
it "has no topic image if there isn't one in the post" do

View File

@ -159,7 +159,7 @@ describe Oneboxer do
element.is_a?(Nokogiri::XML::Element).should be_true
url.should == 'http://discourse.org'
end
result.kind_of?(Nokogiri::HTML::Document).should be_true
result.kind_of?(Nokogiri::HTML::DocumentFragment).should be_true
end
it 'yields each url and element when given a doc' do
@ -172,5 +172,35 @@ describe Oneboxer do
end
context "apply_onebox" do
it "is able to nuke wrapping p" do
doc = Oneboxer.apply "<p><a href='http://bla.com' class='onebox'>bla</p>" do |url, element|
"<div>foo</div>" if url == "http://bla.com"
end
doc.changed? == true
doc.to_html.should match_html "<div>foo</div>"
end
it "is able to do nothing if nil is returned" do
orig = "<p><a href='http://bla.com' class='onebox'>bla</p>"
doc = Oneboxer.apply orig do |url, element|
nil
end
doc.changed? == false
doc.to_html.should match_html orig
end
it "does not strip if there is a br in same node" do
doc = Oneboxer.apply "<p><br><a href='http://bla.com' class='onebox'>bla</p>" do |url, element|
"<div>foo</div>" if url == "http://bla.com"
end
doc.changed? == true
doc.to_html.should match_html "<p><br><div>foo</div></p>"
end
end
end

View File

@ -5,7 +5,7 @@ Fabricator(:post) do
end
Fabricator(:post_with_youtube, from: :post) do
cooked '<a href="http://www.youtube.com/watch?v=9bZkp7q19f0" class="onebox" target="_blank">http://www.youtube.com/watch?v=9bZkp7q19f0</a>'
cooked '<p><a href="http://www.youtube.com/watch?v=9bZkp7q19f0" class="onebox" target="_blank">http://www.youtube.com/watch?v=9bZkp7q19f0</a></p>'
end
Fabricator(:old_post, from: :post) do
@ -75,4 +75,4 @@ Fabricator(:private_message_post, from: :post) do
)
end
raw "Ssshh! This is our secret conversation!"
end
end