From 01855b70b4ee6c644da48f18592dc91ac213ee95 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9gis=20Hanol?= <regis@hanol.fr>
Date: Wed, 5 Jun 2013 20:53:07 +0200
Subject: [PATCH] FIX: Have onebox ignore internal links

---
 lib/pretty_text.rb                  | 15 ++++++++-------
 spec/components/pretty_text_spec.rb | 11 +++++++++++
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/lib/pretty_text.rb b/lib/pretty_text.rb
index c912f60bc09..f2050a96892 100644
--- a/lib/pretty_text.rb
+++ b/lib/pretty_text.rb
@@ -213,17 +213,18 @@ module PrettyText
   end
 
   def self.extract_links(html)
-    doc = Nokogiri::HTML.fragment(html)
     links = []
-    doc.css("a").each do |l|
-      links << l.attributes["href"].to_s
-    end
-
+    doc = Nokogiri::HTML.fragment(html)
+    # remove href inside quotes
+    doc.css("aside.quote a").each { |l| l["href"] = "" }
+    # extract all links from the post
+    doc.css("a").each { |l| links << l["href"] unless l["href"].empty? }
+    # extract links to quotes
     doc.css("aside.quote").each do |a|
-      topic_id = a.attributes['data-topic']
+      topic_id = a['data-topic']
 
       url = "/t/topic/#{topic_id}"
-      if post_number = a.attributes['data-post']
+      if post_number = a['data-post']
         url << "/#{post_number}"
       end
 
diff --git a/spec/components/pretty_text_spec.rb b/spec/components/pretty_text_spec.rb
index 16f6d14e85f..8c74454e278 100644
--- a/spec/components/pretty_text_spec.rb
+++ b/spec/components/pretty_text_spec.rb
@@ -162,6 +162,17 @@ test
       PrettyText.extract_links("<aside class=\"quote\" data-topic=\"1234\" data-post=\"4567\">aside</aside>").to_a.should == ["/t/topic/1234/4567"]
     end
 
+    it "should not extract links inside quotes" do
+      PrettyText.extract_links("
+        <a href='http://body_only.com'>http://useless1.com</a>
+        <aside class=\"quote\" data-topic=\"1234\">
+          <a href='http://body_and_quote.com'>http://useless3.com</a>
+          <a href='http://quote_only.com'>http://useless4.com</a>
+        </aside>
+        <a href='http://body_and_quote.com'>http://useless2.com</a>
+        ").to_a.should == ["http://body_only.com", "http://body_and_quote.com", "/t/topic/1234"]
+    end
+
     it "should not preserve tags in code blocks" do
       PrettyText.excerpt("<pre><code class='handlebars'>&lt;h3&gt;Hours&lt;/h3&gt;</code></pre>",100).should == "&lt;h3&gt;Hours&lt;/h3&gt;"
     end