From 91f9844211bb2161f78d912c0349b9cf1e69bb19 Mon Sep 17 00:00:00 2001
From: Jeremy Banks <jeremy@jeremybanks.ca>
Date: Tue, 19 Feb 2013 01:49:58 -0500
Subject: [PATCH] =?UTF-8?q?Include=20/^http=E2=80=A6=20prefix=20in=20onebo?=
 =?UTF-8?q?x=20whitelist=20URLs=20regexes=20to=20prevent=20XSS.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 lib/oneboxer/clikthrough_onebox.rb |   2 +-
 lib/oneboxer/dailymotion_onebox.rb |   2 +-
 lib/oneboxer/dotsub_onebox.rb      |   2 +-
 lib/oneboxer/github_blob_onebox.rb |   2 +-
 lib/oneboxer/kinomap_onebox.rb     |   2 +-
 lib/oneboxer/nfb_onebox.rb         |   2 +-
 lib/oneboxer/viddler_onebox.rb     |   2 +-
 lib/oneboxer/whitelist.rb          | 140 ++++++++++++++---------------
 lib/oneboxer/yfrog_onebox.rb       |   2 +-
 9 files changed, 78 insertions(+), 78 deletions(-)

diff --git a/lib/oneboxer/clikthrough_onebox.rb b/lib/oneboxer/clikthrough_onebox.rb
index 8561c655591..7869ea895e4 100644
--- a/lib/oneboxer/clikthrough_onebox.rb
+++ b/lib/oneboxer/clikthrough_onebox.rb
@@ -3,7 +3,7 @@ require_dependency 'oneboxer/oembed_onebox'
 module Oneboxer
   class ClikthroughOnebox < OembedOnebox
 
-    matcher /clikthrough\.com\/theater\/video\/\d+$/
+    matcher /^https?:\/\/(?:www\.)?clikthrough\.com\/theater\/video\/\d+$/
 
     def oembed_endpoint
       "http://clikthrough.com/services/oembed?url=#{BaseOnebox.uriencode(@url)}"
diff --git a/lib/oneboxer/dailymotion_onebox.rb b/lib/oneboxer/dailymotion_onebox.rb
index 83ae8647ca9..fccabbb02bc 100644
--- a/lib/oneboxer/dailymotion_onebox.rb
+++ b/lib/oneboxer/dailymotion_onebox.rb
@@ -3,7 +3,7 @@ require_dependency 'oneboxer/oembed_onebox'
 module Oneboxer
   class DailymotionOnebox < OembedOnebox
 
-    matcher /dailymotion\.com\/.+$/
+    matcher /^https?:\/\/(?:www\.)?dailymotion\.com\/.+$/
 
     def oembed_endpoint
       "http://www.dailymotion.com/api/oembed/?url=#{BaseOnebox.uriencode(@url)}"
diff --git a/lib/oneboxer/dotsub_onebox.rb b/lib/oneboxer/dotsub_onebox.rb
index 2d524e9372c..96bbabe5b84 100644
--- a/lib/oneboxer/dotsub_onebox.rb
+++ b/lib/oneboxer/dotsub_onebox.rb
@@ -3,7 +3,7 @@ require_dependency 'oneboxer/oembed_onebox'
 module Oneboxer
   class DotsubOnebox < OembedOnebox
 
-    matcher /dotsub\.com\/.+$/
+    matcher /^https?:\/\/(?:www\.)?dotsub\.com\/.+$/
 
     def oembed_endpoint
       "http://dotsub.com/services/oembed?url=#{BaseOnebox.uriencode(@url)}"
diff --git a/lib/oneboxer/github_blob_onebox.rb b/lib/oneboxer/github_blob_onebox.rb
index 59fb282dcaf..859ccf15cf4 100644
--- a/lib/oneboxer/github_blob_onebox.rb
+++ b/lib/oneboxer/github_blob_onebox.rb
@@ -3,7 +3,7 @@ require_dependency 'oneboxer/handlebars_onebox'
 module Oneboxer
   class GithubBlobOnebox < HandlebarsOnebox
 
-    matcher /github\.com\/[^\/]+\/[^\/]+\/blob\/.*/
+    matcher /^https?:\/\/(?:www\.)?github\.com\/[^\/]+\/[^\/]+\/blob\/.*/
     favicon 'github.png'
 
     def translate_url  
diff --git a/lib/oneboxer/kinomap_onebox.rb b/lib/oneboxer/kinomap_onebox.rb
index 1e1af58dea2..cb427ee4fe8 100644
--- a/lib/oneboxer/kinomap_onebox.rb
+++ b/lib/oneboxer/kinomap_onebox.rb
@@ -3,7 +3,7 @@ require_dependency 'oneboxer/oembed_onebox'
 module Oneboxer
   class KinomapOnebox < OembedOnebox
 
-    matcher /kinomap\.com/
+    matcher /^https?:\/\/(?:www\.)?kinomap\.com/
 
     def oembed_endpoint
       "http://www.kinomap.com/oembed?url=#{BaseOnebox.uriencode(@url)}&format=json"
diff --git a/lib/oneboxer/nfb_onebox.rb b/lib/oneboxer/nfb_onebox.rb
index f8677f0d42f..f749c097fc5 100644
--- a/lib/oneboxer/nfb_onebox.rb
+++ b/lib/oneboxer/nfb_onebox.rb
@@ -3,7 +3,7 @@ require_dependency 'oneboxer/oembed_onebox'
 module Oneboxer
   class NfbOnebox < OembedOnebox
 
-    matcher /nfb\.ca\/film\/[-\w]+\/?/
+    matcher /^https?:\/\/(?:www\.)?nfb\.ca\/film\/[-\w]+\/?/
 
     def oembed_endpoint
       "http://www.nfb.ca/remote/services/oembed/?url=#{BaseOnebox.uriencode(@url)}&format=json"
diff --git a/lib/oneboxer/viddler_onebox.rb b/lib/oneboxer/viddler_onebox.rb
index 987e05cd143..31a150b5ff6 100644
--- a/lib/oneboxer/viddler_onebox.rb
+++ b/lib/oneboxer/viddler_onebox.rb
@@ -3,7 +3,7 @@ require_dependency 'oneboxer/oembed_onebox'
 module Oneboxer
   class ViddlerOnebox < OembedOnebox
 
-    matcher /viddler\.com\/.+$/
+    matcher /^https?:\/\/(?:www\.)?viddler\.com\/.+$/
 
     def oembed_endpoint
       "http://lab.viddler.com/services/oembed/?url=#{BaseOnebox.uriencode(@url)}"
diff --git a/lib/oneboxer/whitelist.rb b/lib/oneboxer/whitelist.rb
index 38f27760622..6c4c95f0ecc 100644
--- a/lib/oneboxer/whitelist.rb
+++ b/lib/oneboxer/whitelist.rb
@@ -2,76 +2,76 @@ module Oneboxer
 
   module Whitelist
     def self.entries
-      [/cnn\.com\/.+/,
-       /washingtonpost\.com\/.+/,
-       /\/\d{4}\/\d{2}\/\d{2}\//,   # wordpress
-       /funnyordie\.com\/.+/,
-       /youtube\.com\/.+/,
-       /youtu\.be\/.+/,
-       /500px\.com\/.+/,
-       /scribd\.com\/.+/,
-       /photobucket\.com\/.+/,
-       /ebay\.(com|ca|co\.uk)\/.+/,
-       /nytimes\.com\/.+/,
-       /tumblr\.com\/.+/,
-       /pinterest\.com\/.+/,
-       /imdb\.com\/.+/,
-       /bbc\.co\.uk\/.+/,
-       /ask\.com\/.+/,
-       /huffingtonpost\.com\/.+/,
-       /aol\.(com|ca)\/.+/,
-       /espn\.go\.com\/.+/,
-       /about\.com\/.+/,
-       /cnet\.com\/.+/,
-       /ehow\.com\/.+/,
-       /dailymail\.co\.uk\/.+/,
-       /indiatimes\.com\/.+/,
-       /answers\.com\/.+/,
-       /instagr\.am\/.+/,
-       /battle\.net\/.+/,
-       /sourceforge\.net\/.+/,
-       /myspace\.com\/.+/,
-       /wikia\.com\/.+/,
-       /etsy\.com\/.+/,
-       /walmart\.com\/.+/,
-       /reference\.com\/.+/,
-       /yelp\.com\/.+/,
-       /foxnews\.com\/.+/,
-       /guardian\.co\.uk\/.+/,
-       /digg\.com\/.+/,
-       /squidoo\.com\/.+/,
-       /wsj\.com\/.+/,
-       /archive\.org\/.+/,
-       /nba\.com\/.+/,
-       /samsung\.com\/.+/,
-       /mashable\.com\/.+/,
-       /forbes\.com\/.+/,
-       /soundcloud\.com\/.+/,
-       /thefreedictionary\.com\/.+/,
-       /groupon\.com\/.+/,
-       /ikea\.com\/.+/,
-       /dell\.com\/.+/,
-       /mlb\.com\/.+/,
-       /bestbuy\.(com|ca)\/.+/,
-       /bloomberg\.com\/.+/,
-       /ign\.com\/.+/,
-       /twitpic\.com\/.+/,
-       /techcrunch\.com\/.+/,
-       /usatoday\.com\/.+/,
-       /go\.com\/.+/,
-       /businessinsider\.com\/.+/,
-       /zillow\.com\/.+/,
-       /tmz\.com\/.+/,
-       /thesun\.co\.uk\/.+/,
-       /thestar\.(com|ca)\/.+/,
-       /theglobeandmail\.com\/.+/,
-       /torontosun\.com\/.+/,
-       /kickstarter\.com\/.+/,
-       /wired\.com\/.+/,
-       /time\.com\/.+/,
-       /npr\.org\/.+/,
-       /cracked\.com\/.+/,
-       /deadline\.com\/.+/
+      [/^https?:\/\/(?:www\.)?cnn\.com\/.+/,
+       /^https?:\/\/(?:www\.)?washingtonpost\.com\/.+/,
+       /^https?:\/\/(?:www\.)?\/\d{4}\/\d{2}\/\d{2}\//,   # wordpress
+       /^https?:\/\/(?:www\.)?funnyordie\.com\/.+/,
+       /^https?:\/\/(?:www\.)?youtube\.com\/.+/,
+       /^https?:\/\/(?:www\.)?youtu\.be\/.+/,
+       /^https?:\/\/(?:www\.)?500px\.com\/.+/,
+       /^https?:\/\/(?:www\.)?scribd\.com\/.+/,
+       /^https?:\/\/(?:www\.)?photobucket\.com\/.+/,
+       /^https?:\/\/(?:www\.)?ebay\.(com|ca|co\.uk)\/.+/,
+       /^https?:\/\/(?:www\.)?nytimes\.com\/.+/,
+       /^https?:\/\/(?:www\.)?tumblr\.com\/.+/,
+       /^https?:\/\/(?:www\.)?pinterest\.com\/.+/,
+       /^https?:\/\/(?:www\.)?imdb\.com\/.+/,
+       /^https?:\/\/(?:www\.)?bbc\.co\.uk\/.+/,
+       /^https?:\/\/(?:www\.)?ask\.com\/.+/,
+       /^https?:\/\/(?:www\.)?huffingtonpost\.com\/.+/,
+       /^https?:\/\/(?:www\.)?aol\.(com|ca)\/.+/,
+       /^https?:\/\/(?:www\.)?espn\.go\.com\/.+/,
+       /^https?:\/\/(?:www\.)?about\.com\/.+/,
+       /^https?:\/\/(?:www\.)?cnet\.com\/.+/,
+       /^https?:\/\/(?:www\.)?ehow\.com\/.+/,
+       /^https?:\/\/(?:www\.)?dailymail\.co\.uk\/.+/,
+       /^https?:\/\/(?:www\.)?indiatimes\.com\/.+/,
+       /^https?:\/\/(?:www\.)?answers\.com\/.+/,
+       /^https?:\/\/(?:www\.)?instagr\.am\/.+/,
+       /^https?:\/\/(?:www\.)?battle\.net\/.+/,
+       /^https?:\/\/(?:www\.)?sourceforge\.net\/.+/,
+       /^https?:\/\/(?:www\.)?myspace\.com\/.+/,
+       /^https?:\/\/(?:www\.)?wikia\.com\/.+/,
+       /^https?:\/\/(?:www\.)?etsy\.com\/.+/,
+       /^https?:\/\/(?:www\.)?walmart\.com\/.+/,
+       /^https?:\/\/(?:www\.)?reference\.com\/.+/,
+       /^https?:\/\/(?:www\.)?yelp\.com\/.+/,
+       /^https?:\/\/(?:www\.)?foxnews\.com\/.+/,
+       /^https?:\/\/(?:www\.)?guardian\.co\.uk\/.+/,
+       /^https?:\/\/(?:www\.)?digg\.com\/.+/,
+       /^https?:\/\/(?:www\.)?squidoo\.com\/.+/,
+       /^https?:\/\/(?:www\.)?wsj\.com\/.+/,
+       /^https?:\/\/(?:www\.)?archive\.org\/.+/,
+       /^https?:\/\/(?:www\.)?nba\.com\/.+/,
+       /^https?:\/\/(?:www\.)?samsung\.com\/.+/,
+       /^https?:\/\/(?:www\.)?mashable\.com\/.+/,
+       /^https?:\/\/(?:www\.)?forbes\.com\/.+/,
+       /^https?:\/\/(?:www\.)?soundcloud\.com\/.+/,
+       /^https?:\/\/(?:www\.)?thefreedictionary\.com\/.+/,
+       /^https?:\/\/(?:www\.)?groupon\.com\/.+/,
+       /^https?:\/\/(?:www\.)?ikea\.com\/.+/,
+       /^https?:\/\/(?:www\.)?dell\.com\/.+/,
+       /^https?:\/\/(?:www\.)?mlb\.com\/.+/,
+       /^https?:\/\/(?:www\.)?bestbuy\.(com|ca)\/.+/,
+       /^https?:\/\/(?:www\.)?bloomberg\.com\/.+/,
+       /^https?:\/\/(?:www\.)?ign\.com\/.+/,
+       /^https?:\/\/(?:www\.)?twitpic\.com\/.+/,
+       /^https?:\/\/(?:www\.)?techcrunch\.com\/.+/,
+       /^https?:\/\/(?:www\.)?usatoday\.com\/.+/,
+       /^https?:\/\/(?:www\.)?go\.com\/.+/,
+       /^https?:\/\/(?:www\.)?businessinsider\.com\/.+/,
+       /^https?:\/\/(?:www\.)?zillow\.com\/.+/,
+       /^https?:\/\/(?:www\.)?tmz\.com\/.+/,
+       /^https?:\/\/(?:www\.)?thesun\.co\.uk\/.+/,
+       /^https?:\/\/(?:www\.)?thestar\.(com|ca)\/.+/,
+       /^https?:\/\/(?:www\.)?theglobeandmail\.com\/.+/,
+       /^https?:\/\/(?:www\.)?torontosun\.com\/.+/,
+       /^https?:\/\/(?:www\.)?kickstarter\.com\/.+/,
+       /^https?:\/\/(?:www\.)?wired\.com\/.+/,
+       /^https?:\/\/(?:www\.)?time\.com\/.+/,
+       /^https?:\/\/(?:www\.)?npr\.org\/.+/,
+       /^https?:\/\/(?:www\.)?cracked\.com\/.+/,
+       /^https?:\/\/(?:www\.)?deadline\.com\/.+/
      ]
     end    
 
diff --git a/lib/oneboxer/yfrog_onebox.rb b/lib/oneboxer/yfrog_onebox.rb
index 6090930d9cc..96c72918cfa 100644
--- a/lib/oneboxer/yfrog_onebox.rb
+++ b/lib/oneboxer/yfrog_onebox.rb
@@ -3,7 +3,7 @@ require_dependency 'oneboxer/oembed_onebox'
 module Oneboxer
   class YfrogOnebox < OembedOnebox
 
-    matcher /yfrog\.(com|ru|com\.tr|it|fr|co\.il|co\.uk|com\.pl|pl|eu|us)\/[a-zA-Z0-9]+/
+    matcher /^https?:\/\/(?:www\.)?yfrog\.(com|ru|com\.tr|it|fr|co\.il|co\.uk|com\.pl|pl|eu|us)\/[a-zA-Z0-9]+/
 
     def oembed_endpoint
       "http://www.yfrog.com/api/oembed/?url=#{BaseOnebox.uriencode(@url)}&format=json"