From 1113b8d7a8761f972a5584ed985c7114d071509c Mon Sep 17 00:00:00 2001
From: Robin Ward <robin.ward@gmail.com>
Date: Fri, 18 Oct 2013 15:20:27 -0400
Subject: [PATCH] FIX: Don't double sanitize values, allow blockquotes with
 leading text

---
 .../javascripts/discourse/dialects/dialect.js | 88 ++++++++++++-------
 .../javascripts/discourse/dialects/html.js    | 28 +++++-
 .../discourse/dialects/newline_dialect.js     |  4 +-
 test/javascripts/components/markdown_test.js  |  9 ++
 4 files changed, 92 insertions(+), 37 deletions(-)

diff --git a/app/assets/javascripts/discourse/dialects/dialect.js b/app/assets/javascripts/discourse/dialects/dialect.js
index a5ea056f906..d39b93f2ddc 100644
--- a/app/assets/javascripts/discourse/dialects/dialect.js
+++ b/app/assets/javascripts/discourse/dialects/dialect.js
@@ -7,7 +7,8 @@
 var parser = window.BetterMarkdown,
     MD = parser.Markdown,
     dialect = MD.dialects.Discourse = MD.subclassDialect( MD.dialects.Gruber ),
-    initialized = false;
+    initialized = false,
+    emitters = [];
 
 /**
   Initialize our dialects for processing.
@@ -21,6 +22,51 @@ function initializeDialects() {
   initialized = true;
 }
 
+/**
+  Process the text nodes in the JsonML tree, calling any emitters that have
+  been added.
+
+  @method processTextNodes
+  @param {Array} node the JsonML tree
+  @param {Object} event the parse node event data
+**/
+function processTextNodes(node, event) {
+  if (node.length < 2) { return; }
+
+  if (node[0] === '__RAW') {
+    return;
+  }
+
+  var skipSanitize = [];
+  for (var j=1; j<node.length; j++) {
+    var textContent = node[j];
+    if (typeof textContent === "string") {
+
+      if (dialect.options.sanitize && !skipSanitize[textContent]) {
+        textContent = Discourse.Markdown.sanitize(textContent);
+      }
+
+      var result = textContent;
+      emitters.forEach(function (e) {
+        result = e(result, event)
+      });
+
+      if (result) {
+        if (result instanceof Array) {
+          result.forEach(function (r) { skipSanitize[r] = true; });
+          node.splice.apply(node, [j, 1].concat(result));
+        } else {
+          node[j] = result;
+        }
+      } else {
+        node[j] = textContent;
+      }
+
+    }
+  }
+
+}
+
 /**
   Parse a JSON ML tree, using registered handlers to adjust it if necessary.
 
@@ -33,8 +79,9 @@ function initializeDialects() {
 function parseTree(tree, path, insideCounts) {
 
   if (tree instanceof Array) {
-    Discourse.Dialect.trigger('parseNode', {node: tree, path: path, dialect: dialect, insideCounts: insideCounts || {}});
-
+    var event = {node: tree, path: path, dialect: dialect, insideCounts: insideCounts || {}};
+    Discourse.Dialect.trigger('parseNode', event);
+    processTextNodes(tree, event);
 
     path = path || [];
     insideCounts = insideCounts || {};
@@ -100,7 +147,9 @@ Discourse.Dialect = {
     if (!initialized) { initializeDialects(); }
     dialect.options = opts;
     var tree = parser.toHTMLTree(text, 'Discourse');
-    return parser.renderJsonML(parseTree(tree));
+        html = parser.renderJsonML(parseTree(tree));
+
+    return html;
   },
 
   /**
@@ -363,36 +412,7 @@ Discourse.Dialect = {
     @param {Function} emitter The function to call with the text. It returns JsonML to modify the tree.
   **/
   postProcessText: function(emitter) {
-    Discourse.Dialect.on("parseNode", function(event) {
-      var node = event.node;
-
-      if (node.length < 2) { return; }
-
-      if (node[0] === '__RAW') {
-        return;
-      }
-
-      for (var j=1; j<node.length; j++) {
-        var textContent = node[j];
-        if (typeof textContent === "string") {
-
-          if (dialect.options.sanitize) {
-            textContent = Discourse.Markdown.sanitize(textContent);
-          }
-
-          var result = emitter(textContent, event);
-          if (result) {
-            if (result instanceof Array) {
-              node.splice.apply(node, [j, 1].concat(result));
-            } else {
-              node[j] = result;
-            }
-          } else {
-            node[j] = textContent;
-          }
-        }
-      }
-    });
+    emitters.push(emitter);
   },
 
   /**
diff --git a/app/assets/javascripts/discourse/dialects/html.js b/app/assets/javascripts/discourse/dialects/html.js
index 5de0a39056f..d0682b5e154 100644
--- a/app/assets/javascripts/discourse/dialects/html.js
+++ b/app/assets/javascripts/discourse/dialects/html.js
@@ -4,14 +4,38 @@
 var blockTags = ['address', 'article', 'aside', 'audio', 'blockquote', 'canvas', 'dd', 'div',
                  'dl', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'h1', 'h2', 'h3',
                  'h4', 'h5', 'h6', 'header', 'hgroup', 'hr', 'noscript', 'ol', 'output',
-                 'p', 'pre', 'section', 'table', 'tfoot', 'ul', 'video'];
+                 'p', 'pre', 'section', 'table', 'tfoot', 'ul', 'video'],
+
+    splitAtLast = function(tag, block, next, first) {
+      var endTag = "</" + tag + ">",
+          endTagIndex = first ? block.indexOf(endTag) : block.lastIndexOf(endTag);
+
+      if (endTagIndex !== -1) {
+        endTagIndex += endTag.length;
+
+        var leading = block.substr(0, endTagIndex),
+            trailing = block.substr(endTagIndex).replace(/^\s+/, '');
+
+        if (trailing.length) {
+          next.unshift(trailing);
+        }
+
+        return [ leading ];
+      }
+    };
 
 Discourse.Dialect.registerBlock('html', function(block, next) {
 
-  var m = /^<([^>]+)\>/.exec(block);
+  // Fix manual blockquote paragraphing even though it's not strictly correct
+  var split = splitAtLast('blockquote', block, next, true);
+  if (split) { return split; }
+
+  var m = /^<([^>]+)\>/m.exec(block);
   if (m && m[1]) {
     var tag = m[1].split(/\s/);
     if (tag && tag[0] && blockTags.indexOf(tag[0]) !== -1) {
+      split = splitAtLast(tag[0], block, next);
+      if (split) { return split; }
       return [ block.toString() ];
     }
   }
diff --git a/app/assets/javascripts/discourse/dialects/newline_dialect.js b/app/assets/javascripts/discourse/dialects/newline_dialect.js
index 22c90f65ccf..d08a8a30f33 100644
--- a/app/assets/javascripts/discourse/dialects/newline_dialect.js
+++ b/app/assets/javascripts/discourse/dialects/newline_dialect.js
@@ -10,10 +10,11 @@ Discourse.Dialect.postProcessText(function (text, event) {
   if (linebreaks || (insideCounts.pre > 0)) { return; }
 
   if (text === "\n") {
-    // If the tage is just a new line, replace it with a `<br>`
+    // If the tag is just a new line, replace it with a `<br>`
     return [['br']];
   } else {
 
+
     // If the text node contains new lines, perhaps with text between them, insert the
     // `<br>` tags.
     var split = text.split(/\n+/);
@@ -25,6 +26,7 @@ Discourse.Dialect.postProcessText(function (text, event) {
           if (i !== split.length-1) { replacement.push(['br']); }
         }
       }
+
       return replacement;
     }
   }
diff --git a/test/javascripts/components/markdown_test.js b/test/javascripts/components/markdown_test.js
index 4e465dfd999..239748467f2 100644
--- a/test/javascripts/components/markdown_test.js
+++ b/test/javascripts/components/markdown_test.js
@@ -54,6 +54,14 @@ test("Line Breaks", function() {
   cooked("[] first choice\n[] second choice",
          "<p>[] first choice<br/>[] second choice</p>",
          "it handles new lines correctly with [] options");
+
+  cooked("<blockquote>evil</blockquote>\ntrout",
+         "<blockquote>evil</blockquote>\n\n<p>trout</p>",
+         "it doesn't insert <br> after blockquotes");
+
+  cooked("leading<blockquote>evil</blockquote>\ntrout",
+         "leading<blockquote>evil</blockquote>\n\n<p>trout</p>",
+         "it doesn't insert <br> after blockquotes with leading text");
 });
 
 test("Paragraphs for HTML", function() {
@@ -311,6 +319,7 @@ test("sanitize", function() {
          "<p><a href=\"http://disneyland.disney.go.com/\">disney</a> <a href=\"http://reddit.com\">reddit</a></p>",
          "we can embed proper links");
 
+  cooked("<blockquote>a\n</blockquote>\n", "<blockquote>a\n\n<br/>\n\n</blockquote>", "it does not double sanitize");
 });
 
 test("URLs in BBCode tags", function() {