FIX: pre-hoist code blocks & spans

This commit is contained in:
Régis Hanol 2015-03-09 12:32:37 +01:00
parent 5b1ee3fedf
commit 70f00e31df
7 changed files with 120 additions and 31 deletions

View File

@ -10,7 +10,7 @@ var acceptableCodeClasses =
"perl", "php", "profile", "python", "r", "rib", "rsl", "ruby", "rust", "scala", "smalltalk", "sql",
"tex", "text", "vala", "vbscript", "vhdl"];
var textCodeClasses = ["text", "pre"];
var textCodeClasses = ["text", "pre", "plain"];
function flattenBlocks(blocks) {
var result = "";
@ -39,6 +39,17 @@ Discourse.Dialect.replaceBlock({
}
});
Discourse.Dialect.replaceBlock({
start: /(<pre[^\>]*\>)([\s\S]*)/igm,
stop: /<\/pre>/igm,
rawContents: true,
skipIfTradtionalLinebreaks: true,
emitter: function(blockContents) {
return ['p', ['pre', flattenBlocks(blockContents)]];
}
});
// Ensure that content in a code block is fully escaped. This way it's not white listed
// and we can use HTML and Javascript examples.
Discourse.Dialect.on('parseNode', function (event) {
@ -51,7 +62,6 @@ Discourse.Dialect.on('parseNode', function (event) {
if (path && path[path.length-1] && path[path.length-1][0] && path[path.length-1][0] === "pre") {
regexp = / +$/g;
} else {
regexp = /^ +| +$/g;
}
@ -59,17 +69,6 @@ Discourse.Dialect.on('parseNode', function (event) {
}
});
Discourse.Dialect.replaceBlock({
start: /(<pre[^\>]*\>)([\s\S]*)/igm,
stop: /<\/pre>/igm,
rawContents: true,
skipIfTradtionalLinebreaks: true,
emitter: function(blockContents) {
return ['p', ['pre', flattenBlocks(blockContents)]];
}
});
// Whitelist the language classes
var regexpSource = "^lang-(" + acceptableCodeClasses.join('|') + ")$";
Discourse.Markdown.whiteListTag('code', 'class', new RegExp(regexpSource, "i"));

View File

@ -12,7 +12,8 @@ var parser = window.BetterMarkdown,
initialized = false,
emitters = [],
hoisted,
preProcessors = [];
preProcessors = [],
escape = Handlebars.Utils.escapeExpression;
/**
Initialize our dialects for processing.
@ -162,6 +163,69 @@ function hoister(t, target, replacement) {
return t;
}
function outdent(t) {
return t.replace(/^([ ]{4}|\t)/gm, "");
}
function hideBackslashEscapedCharacters(t) {
return t.replace(/\\\\/g, "\u1E800")
.replace(/\\`/g, "\u1E8001");
}
function showBackslashEscapedCharacters(t) {
return t.replace(/\u1E8001/g, "\\`")
.replace(/\u1E800/g, "\\\\");
}
function hoistCodeBlocksAndSpans(text) {
// replace all "\`" with a single character
text = hideBackslashEscapedCharacters(text);
// <pre>...</pre> code blocks
text = text.replace(/(^\n*|\n\n)<pre>([\s\S]*?)<\/pre>/ig, function(_, before, content) {
var hash = md5(content);
hoisted[hash] = escape(showBackslashEscapedCharacters(content.trim()));
return before + "<pre>" + hash + "</pre>";
});
// markdown code blocks
text = text.replace(/(^\n*|\n\n)((?:(?:[ ]{4}|\t).*\n*)+)/g, function(match, before, content, index) {
// make sure we aren't in a list
var previousLine = text.slice(0, index).trim().match(/.*$/);
if (previousLine && previousLine[0].length) {
previousLine = previousLine[0].trim();
if (/^(?:\*|\+|-|\d+\.)\s+/.test(previousLine)) {
return match;
}
}
// we can safely hoist the code block
var hash = md5(content);
// only remove trailing whitespace
content = content.replace(/\s+$/, "");
hoisted[hash] = escape(outdent(showBackslashEscapedCharacters(content)));
return before + " " + hash + "\n";
});
// fenced code blocks (AKA GitHub code blocks)
text = text.replace(/(^\n*|\n\n)```([a-z0-9\-]*)\n([\s\S]*?)\n```/g, function(_, before, language, content) {
var hash = md5(content);
hoisted[hash] = escape(showBackslashEscapedCharacters(content.trim()));
return before + "```" + language + "\n" + hash + "\n```";
});
// code spans (double & single `)
["``", "`"].forEach(function(delimiter) {
var regexp = new RegExp("(^|[^`])" + delimiter + "([^`\\n]+?)" + delimiter + "([^`]|$)", "g");
text = text.replace(regexp, function(_, before, content, after) {
var hash = md5(content);
hoisted[hash] = escape(showBackslashEscapedCharacters(content.trim()));
return before + delimiter + hash + delimiter + after;
});
});
// replace back all weird character with "\`"
return showBackslashEscapedCharacters(text);
}
/**
An object used for rendering our dialects.
@ -183,14 +247,19 @@ Discourse.Dialect = {
cook: function(text, opts) {
if (!initialized) { initializeDialects(); }
dialect.options = opts;
// Helps us hoist out HTML
hoisted = {};
// pre-hoist all code-blocks/spans
text = hoistCodeBlocksAndSpans(text);
// pre-processors
preProcessors.forEach(function(p) {
text = p(text, hoister);
});
dialect.options = opts;
var tree = parser.toHTMLTree(text, 'Discourse'),
result = parser.renderJsonML(parseTree(tree));
@ -203,12 +272,11 @@ Discourse.Dialect = {
// If we hoisted out anything, put it back
var keys = Object.keys(hoisted);
if (keys.length) {
keys.forEach(function(k) {
result = result.replace(new RegExp(k,"g"), hoisted[k]);
keys.forEach(function(key) {
result = result.replace(new RegExp(key, "g"), hoisted[key]);
});
}
hoisted = {};
return result.trim();
},

View File

@ -216,15 +216,33 @@ test("Mentions", function() {
"<p><a href=\"https://twitter.com/codinghorror\">@codinghorror</a></p>",
"it doesn't do link mentions within links");
cooked("Hello @EvilTrout", "<p>Hello <span class=\"mention\">@EvilTrout</span></p>", "adds a mention class");
cooked("robin@email.host", "<p>robin@email.host</p>", "won't add mention class to an email address");
cooked("hanzo55@yahoo.com", "<p>hanzo55@yahoo.com</p>", "won't be affected by email addresses that have a number before the @ symbol");
cooked("@EvilTrout yo", "<p><span class=\"mention\">@EvilTrout</span> yo</p>", "it handles mentions at the beginning of a string");
cooked("yo\n@EvilTrout", "<p>yo<br/><span class=\"mention\">@EvilTrout</span></p>", "it handles mentions at the beginning of a new line");
cooked("Hello @EvilTrout",
"<p>Hello <span class=\"mention\">@EvilTrout</span></p>",
"adds a mention class");
cooked("robin@email.host",
"<p>robin@email.host</p>",
"won't add mention class to an email address");
cooked("hanzo55@yahoo.com",
"<p>hanzo55@yahoo.com</p>",
"won't be affected by email addresses that have a number before the @ symbol");
cooked("@EvilTrout yo",
"<p><span class=\"mention\">@EvilTrout</span> yo</p>",
"it handles mentions at the beginning of a string");
cooked("yo\n@EvilTrout",
"<p>yo<br/><span class=\"mention\">@EvilTrout</span></p>",
"it handles mentions at the beginning of a new line");
cooked("`evil` @EvilTrout `trout`",
"<p><code>evil</code> <span class=\"mention\">@EvilTrout</span> <code>trout</code></p>",
"deals correctly with multiple <code> blocks");
cooked("```\na @test\n```", "<p><pre><code class=\"lang-auto\">a @test</code></pre></p>", "should not do mentions within a code block.");
cooked("```\na @test\n```",
"<p><pre><code class=\"lang-auto\">a @test</code></pre></p>",
"should not do mentions within a code block.");
cooked("> foo bar baz @eviltrout",
"<blockquote><p>foo bar baz <span class=\"mention\">@eviltrout</span></p></blockquote>",
@ -357,7 +375,9 @@ test("Code Blocks", function() {
"<p><pre><code class=\"lang-ruby\">&lt;header&gt;hello&lt;/header&gt;</code></pre></p>",
"it escapes code in the code block");
cooked("```text\ntext\n```", "<p><pre><code class=\"lang-nohighlight\">text</code></pre></p>", "handles text by adding nohighlight");
cooked("```text\ntext\n```",
"<p><pre><code class=\"lang-nohighlight\">text</code></pre></p>",
"handles text by adding nohighlight");
cooked("```ruby\n# cool\n```",
"<p><pre><code class=\"lang-ruby\"># cool</code></pre></p>",
@ -403,7 +423,9 @@ test("Code Blocks", function() {
"<pre><code>[quote]test[/quote]</code></pre>",
"it does not parse other block types in markdown code blocks");
cooked("## a\nb\n```\nc\n```", "<h2>a</h2>\n\n<p><pre><code class=\"lang-auto\">c</code></pre></p>", "it handles headings with code blocks after them.");
cooked("## a\nb\n```\nc\n```",
"<h2>a</h2>\n\n<p><pre><code class=\"lang-auto\">c</code></pre></p>",
"it handles headings with code blocks after them.");
});
test("sanitize", function() {

View File

@ -73,7 +73,7 @@ Nor should these, which occur in code spans:
Backslash: `\\`
Backtick: `` \` ``
Backtick: `\``
Asterisk: `\*`
@ -113,7 +113,7 @@ other Markdown constructs:
\`backticks\`
This is a code span with a literal backslash-backtick sequence: `` \` ``
This is a code span with a literal backslash-backtick sequence: `\``
This is a tag with unescaped backticks <span attr='`ticks`'>bar</span>.

View File

@ -2,4 +2,4 @@
Fix for backticks within HTML tag: <span attr='`ticks`'>like this</span>
Here's how you put `` `backticks` `` in a code span.
Here's how you put `` `backticks` `` in a code span.

View File

@ -86,7 +86,7 @@ Multiple paragraphs:
1. Item 1, graf one.
Item 2. graf two. The quick brown fox jumped over the lazy dog's
Item 1. graf two. The quick brown fox jumped over the lazy dog's
back.
2. Item 2.

View File

@ -91,7 +91,7 @@
<ol>
<li><p>Item 1, graf one.</p>
<p>Item 2. graf two. The quick brown fox jumped over the lazy dog's
<p>Item 1. graf two. The quick brown fox jumped over the lazy dog's
back.</p></li>
<li><p>Item 2.</p></li>
<li><p>Item 3.</p></li>