diff --git a/app/assets/javascripts/discourse/components/bbcode.js b/app/assets/javascripts/discourse/components/bbcode.js deleted file mode 100644 index 27c8372fa42..00000000000 --- a/app/assets/javascripts/discourse/components/bbcode.js +++ /dev/null @@ -1,297 +0,0 @@ -/*global HANDLEBARS_TEMPLATES:true md5:true*/ - -/** - Support for BBCode rendering - - @class BBCode - @namespace Discourse - @module Discourse -**/ -Discourse.BBCode = { - - QUOTE_REGEXP: /\[quote=([^\]]*)\]((?:[\s\S](?!\[quote=[^\]]*\]))*?)\[\/quote\]/im, - IMG_REGEXP: /\[img\]([\s\S]*?)\[\/img\]/i, - URL_REGEXP: /\[url\]([\s\S]*?)\[\/url\]/i, - URL_WITH_TITLE_REGEXP: /\[url=(.+?)\]([\s\S]*?)\[\/url\]/i, - - // Define our replacers - replacers: { - base: { - withoutArgs: { - "ol": function(_, content) { return "
    " + content + "
"; }, - "li": function(_, content) { return "
  • " + content + "
  • "; }, - "ul": function(_, content) { return ""; }, - "code": function(_, content) { return "
    " + content + "
    "; }, - "url": function(_, url) { return "" + url + ""; }, - "email": function(_, address) { return "" + address + ""; }, - "img": function(_, src) { return ""; } - }, - withArgs: { - "url": function(_, href, title) { return "" + title + ""; }, - "email": function(_, address, title) { return "" + title + ""; }, - "color": function(_, color, content) { - if (!/^(\#[0-9a-fA-F]{3}([0-9a-fA-F]{3})?)|(aqua|black|blue|fuchsia|gray|green|lime|maroon|navy|olive|purple|red|silver|teal|white|yellow)$/.test(color)) { - return content; - } - return "" + content + ""; - } - } - }, - - // For HTML emails - email: { - withoutArgs: { - "b": function(_, content) { return "" + content + ""; }, - "i": function(_, content) { return "" + content + ""; }, - "u": function(_, content) { return "" + content + ""; }, - "s": function(_, content) { return "" + content + ""; }, - "spoiler": function(_, content) { return "" + content + ""; } - }, - withArgs: { - "size": function(_, size, content) { return "" + content + ""; } - } - }, - - // For sane environments that support CSS - "default": { - withoutArgs: { - "b": function(_, content) { return "" + content + ""; }, - "i": function(_, content) { return "" + content + ""; }, - "u": function(_, content) { return "" + content + ""; }, - "s": function(_, content) { return "" + content + ""; }, - "spoiler": function(_, content) { return "" + content + ""; - } - }, - withArgs: { - "size": function(_, size, content) { return "" + content + ""; } - } - } - }, - - /** - Apply a particular set of replacers - - @method apply - @param {String} text The text we want to format - @param {String} environment The environment in which this - **/ - apply: function(text, environment) { - var replacer = Discourse.BBCode.parsedReplacers()[environment]; - // apply all available replacers - replacer.forEach(function(r) { - text = text.replace(r.regexp, r.fn); - }); - return text; - }, - - /** - Lazy parse replacers - - @property parsedReplacers - **/ - parsedReplacers: function() { - if (this.parsed) return this.parsed; - - var result = {}; - - _.each(Discourse.BBCode.replacers, function(rules, name) { - - var parsed = result[name] = []; - - _.each(_.extend(Discourse.BBCode.replacers.base.withoutArgs, rules.withoutArgs), function(val, tag) { - parsed.push({ regexp: new RegExp("\\[" + tag + "\\]([\\s\\S]*?)\\[\\/" + tag + "\\]", "igm"), fn: val }); - }); - - _.each(_.extend(Discourse.BBCode.replacers.base.withArgs, rules.withArgs), function(val, tag) { - parsed.push({ regexp: new RegExp("\\[" + tag + "=?(.+?)\\]([\\s\\S]*?)\\[\\/" + tag + "\\]", "igm"), fn: val }); - }); - - }); - - this.parsed = result; - return this.parsed; - }, - - /** - Build the BBCode quote around the selected text - - @method buildQuoteBBCode - @param {Discourse.Post} post The post we are quoting - @param {String} contents The text selected - **/ - buildQuoteBBCode: function(post, contents) { - var contents_hashed, result, sansQuotes, stripped, stripped_hashed, tmp; - if (!contents) contents = ""; - - sansQuotes = contents.replace(this.QUOTE_REGEXP, '').trim(); - if (sansQuotes.length === 0) return ""; - - result = "[quote=\"" + (post.get('username')) + ", post:" + (post.get('post_number')) + ", topic:" + (post.get('topic_id')); - - /* Strip the HTML from cooked */ - tmp = document.createElement('div'); - tmp.innerHTML = post.get('cooked'); - stripped = tmp.textContent || tmp.innerText; - - /* - Let's remove any non alphanumeric characters as a kind of hash. Yes it's - not accurate but it should work almost every time we need it to. It would be unlikely - that the user would quote another post that matches in exactly this way. - */ - stripped_hashed = stripped.replace(/[^a-zA-Z0-9]/g, ''); - contents_hashed = contents.replace(/[^a-zA-Z0-9]/g, ''); - - /* If the quote is the full message, attribute it as such */ - if (stripped_hashed === contents_hashed) result += ", full:true"; - result += "\"]\n" + sansQuotes + "\n[/quote]\n\n"; - - return result; - }, - - /** - We want to remove urls in BBCode tags from a string before applying markdown - to prevent them from being modified by markdown. - This will return an object that contains: - - a new version of the text with the urls replaced with unique ids - - a `template()` function for reapplying them later. - - @method extractUrls - @param {String} text The text inside which we want to replace urls - @returns {Object} object containing the new string and template function - **/ - extractUrls: function(text) { - var result = { text: "" + text, replacements: [] }; - var replacements = []; - var matches, key; - - _.each([Discourse.BBCode.IMG_REGEXP, Discourse.BBCode.URL_REGEXP, Discourse.BBCode.URL_WITH_TITLE_REGEXP], function(r) { - while (matches = r.exec(result.text)) { - key = md5(matches[0]); - replacements.push({ key: key, value: matches[0] }); - result.text = result.text.replace(matches[0], key); - } - }); - - result.template = function(input) { - _.each(replacements, function(r) { - input = input.replace(r.key, r.value); - }); - return input; - }; - - return (result); - }, - - - /** - We want to remove quotes from a string before applying markdown to avoid - weird stuff with newlines and such. This will return an object that - contains a new version of the text with the quotes replaced with - unique ids and `template()` function for reapplying them later. - - @method extractQuotes - @param {String} text The text inside which we want to replace quotes - @returns {Object} object containing the new string and template function - **/ - extractQuotes: function(text) { - var result = { text: "" + text, replacements: [] }; - var replacements = []; - var matches, key; - - while (matches = Discourse.BBCode.QUOTE_REGEXP.exec(result.text)) { - key = md5(matches[0]); - replacements.push({ - key: key, - value: matches[0], - content: matches[2].trim() - }); - result.text = result.text.replace(matches[0], key + "\n"); - } - - result.template = function(input) { - _.each(replacements,function(r) { - var val = r.value.trim(); - val = val.replace(r.content, r.content.replace(/\n/g, '
    ')); - input = input.replace(r.key, val); - }); - return input; - }; - - return (result); - }, - - /** - Replace quotes with appropriate markup - - @method formatQuote - @param {String} text The text inside which we want to replace quotes - @param {Object} opts Rendering options - **/ - formatQuote: function(text, opts) { - var args, matches, params, paramsSplit, paramsString, templateName, username; - - var splitter = function(p,i) { - if (i > 0) { - var assignment = p.split(':'); - if (assignment[0] && assignment[1]) { - return params.push({ - key: assignment[0], - value: assignment[1].trim() - }); - } - } - }; - - while (matches = this.QUOTE_REGEXP.exec(text)) { - paramsString = matches[1].replace(/\"/g, ''); - paramsSplit = paramsString.split(/\, */); - params = []; - _.each(paramsSplit, splitter); - username = paramsSplit[0]; - - // remove leading
    s - var content = matches[2].trim(); - - var avatarImg; - if (opts.lookupAvatarByPostNumber) { - // client-side, we can retrieve the avatar from the post - var postNumber = parseInt(_.find(params, { 'key' : 'post' }).value, 10); - avatarImg = opts.lookupAvatarByPostNumber(postNumber); - } else if (opts.lookupAvatar) { - // server-side, we need to lookup the avatar from the username - avatarImg = opts.lookupAvatar(username); - } - - // Arguments for formatting - args = { - username: I18n.t('user.said', {username: username}), - params: params, - quote: content, - avatarImg: avatarImg - }; - - // Name of the template - templateName = 'quote'; - if (opts && opts.environment) templateName = "quote_" + opts.environment; - // Apply the template - text = text.replace(matches[0], "

    " + HANDLEBARS_TEMPLATES[templateName](args) + "

    "); - } - return text; - }, - - /** - Format a text string using BBCode - - @method format - @param {String} text The text we want to format - @param {Object} opts Rendering options - **/ - format: function(text, opts) { - var environment = opts && opts.environment ? opts.environment : 'default'; - // Apply replacers for basic tags - text = Discourse.BBCode.apply(text, environment); - // Format - text = Discourse.BBCode.formatQuote(text, opts); - return text; - } -}; diff --git a/app/assets/javascripts/discourse/components/markdown.js b/app/assets/javascripts/discourse/components/markdown.js index 33528c4259e..57fb2e0dd43 100644 --- a/app/assets/javascripts/discourse/components/markdown.js +++ b/app/assets/javascripts/discourse/components/markdown.js @@ -1,4 +1,4 @@ -/*global Markdown:true */ +/*global Markdown:true BetterMarkdown:true */ /** Contains methods to help us with markdown formatting. @@ -94,116 +94,30 @@ Discourse.Markdown = { markdownConverter: function(opts) { if (!opts) opts = {}; - var converter = new Markdown.Converter(); - var mentionLookup = opts.mentionLookup || Discourse.Mention.lookupCache; + return { + makeHtml: function(text) { - var quoteTemplate = null, urlsTemplate = null; - - // Before cooking callbacks - converter.hooks.chain("preConversion", function(text) { - // If a user puts text right up against a quote, make sure the spacing is equivalnt to a new line - return text.replace(/\[\/quote\]/, "[/quote]\n"); - }); - - converter.hooks.chain("preConversion", function(text) { - Discourse.Markdown.textResult = null; - Discourse.Markdown.trigger('beforeCook', { detail: text, opts: opts }); - return Discourse.Markdown.textResult || text; - }); - - // Extract quotes so their contents are not passed through markdown. - converter.hooks.chain("preConversion", function(text) { - var extracted = Discourse.BBCode.extractQuotes(text); - quoteTemplate = extracted.template; - return extracted.text; - }); - - // Extract urls in BBCode tags so they are not passed through markdown. - converter.hooks.chain("preConversion", function(text) { - var extracted = Discourse.BBCode.extractUrls(text); - urlsTemplate = extracted.template; - return extracted.text; - }); - - // Support autolinking of www.something.com - converter.hooks.chain("preConversion", function(text) { - return text.replace(/(^|[\s\n])(www\.[a-z\.\-\_\(\)\/\?\=\%0-9]+)/gim, function(full, _, rest) { - return " " + rest + ""; - }); - }); - - // newline prediction in trivial cases - var linebreaks = opts.traditional_markdown_linebreaks || Discourse.SiteSettings.traditional_markdown_linebreaks; - if (!linebreaks) { - converter.hooks.chain("preConversion", function(text) { - return text.replace(/(^[\w<][^\n]*\n+)/gim, function(t) { - if (t.match(/\n{2}/gim)) return t; - return t.replace("\n", " \n"); - }); - }); - } - - // github style fenced code - converter.hooks.chain("preConversion", function(text) { - return text.replace(/^`{3}(?:(.*$)\n)?([\s\S]*?)^`{3}/gm, function(wholeMatch, m1, m2) { - var escaped = Handlebars.Utils.escapeExpression(m2); - return "

    " + escaped + "
    "; - }); - }); - - converter.hooks.chain("postConversion", function(text) { - if (!text) return ""; - - // don't do @username mentions inside
     or  blocks
    -      text = text.replace(/<(pre|code)>([\s\S](?!<(pre|code)>))*?@([\s\S](?!<(pre|code)>))*?<\/(pre|code)>/gi, function(m) {
    -        return m.replace(/@/g, '@');
    -      });
    -
    -      // add @username mentions, if valid; must be bounded on left and right by non-word characters
    -      text = text.replace(/(\W)(@[A-Za-z0-9][A-Za-z0-9_]{2,14})(?=\W)/g, function(x, pre, name) {
    -        if (mentionLookup(name.substr(1))) {
    -          return pre + "" + name + "";
    -        } else {
    -          return pre + "" + name + "";
    +        // Linebreaks
    +        var linebreaks = opts.traditional_markdown_linebreaks || Discourse.SiteSettings.traditional_markdown_linebreaks;
    +        if (!linebreaks) {
    +          text = text.replace(/(^[\w<][^\n]*\n+)/gim, function(t) {
    +            if (t.match(/\n{2}/gim)) return t;
    +            return t.replace("\n", "  \n");
    +          });
             }
    -      });
     
    -      // a primitive attempt at oneboxing, this regex gives me much eye sores
    -      text = text.replace(/(
  • )?((

    |
    )[\s\n\r]*)(]*)>([^<]+<\/a>[\s\n\r]*(?=<\/p>|
    ))/gi, function() { - // We don't onebox items in a list - if (arguments[1]) return arguments[0]; - var url = arguments[5]; - var onebox; + text = Discourse.Dialect.cook(text, opts); - if (Discourse && Discourse.Onebox) { - onebox = Discourse.Onebox.lookupCache(url); + if (!text) return ""; + + if (opts.sanitize) { + if (!window.sanitizeHtml) return ""; + text = window.sanitizeHtml(text); } - if (onebox && onebox.trim().length > 0) { - return arguments[2] + onebox; - } else { - return arguments[2] + arguments[4] + " class=\"onebox\" target=\"_blank\">" + arguments[6]; - } - }); - return(text); - }); - - converter.hooks.chain("postConversion", function(text) { - // reapply quotes - if (quoteTemplate) { text = quoteTemplate(text); } - // reapply urls - if (urlsTemplate) { text = urlsTemplate(text); } - // format with BBCode - return Discourse.BBCode.format(text, opts); - }); - - if (opts.sanitize) { - converter.hooks.chain("postConversion", function(text) { - if (!window.sanitizeHtml) return ""; - return window.sanitizeHtml(text); - }); - } - return converter; + return text; + } + }; } }; diff --git a/app/assets/javascripts/discourse/components/quote.js b/app/assets/javascripts/discourse/components/quote.js new file mode 100644 index 00000000000..496a2dc99f0 --- /dev/null +++ b/app/assets/javascripts/discourse/components/quote.js @@ -0,0 +1,48 @@ +/** + Build the BBCode for a Quote + + @class BBCode + @namespace Discourse + @module Discourse +**/ +Discourse.Quote = { + + REGEXP: /\[quote=([^\]]*)\]((?:[\s\S](?!\[quote=[^\]]*\]))*?)\[\/quote\]/im, + + /** + Build the BBCode quote around the selected text + + @method buildQuote + @param {Discourse.Post} post The post we are quoting + @param {String} contents The text selected + **/ + build: function(post, contents) { + var contents_hashed, result, sansQuotes, stripped, stripped_hashed, tmp; + if (!contents) contents = ""; + + sansQuotes = contents.replace(this.REGEXP, '').trim(); + if (sansQuotes.length === 0) return ""; + + result = "[quote=\"" + post.get('username') + ", post:" + post.get('post_number') + ", topic:" + post.get('topic_id'); + + /* Strip the HTML from cooked */ + tmp = document.createElement('div'); + tmp.innerHTML = post.get('cooked'); + stripped = tmp.textContent || tmp.innerText; + + /* + Let's remove any non alphanumeric characters as a kind of hash. Yes it's + not accurate but it should work almost every time we need it to. It would be unlikely + that the user would quote another post that matches in exactly this way. + */ + stripped_hashed = stripped.replace(/[^a-zA-Z0-9]/g, ''); + contents_hashed = contents.replace(/[^a-zA-Z0-9]/g, ''); + + /* If the quote is the full message, attribute it as such */ + if (stripped_hashed === contents_hashed) result += ", full:true"; + result += "\"]\n" + sansQuotes + "\n[/quote]\n\n"; + + return result; + } + +}; diff --git a/app/assets/javascripts/discourse/controllers/quote_button_controller.js b/app/assets/javascripts/discourse/controllers/quote_button_controller.js index a4b08f73174..e0713bb8945 100644 --- a/app/assets/javascripts/discourse/controllers/quote_button_controller.js +++ b/app/assets/javascripts/discourse/controllers/quote_button_controller.js @@ -117,7 +117,7 @@ Discourse.QuoteButtonController = Discourse.Controller.extend({ } var buffer = this.get('buffer'); - var quotedText = Discourse.BBCode.buildQuoteBBCode(post, buffer); + var quotedText = Discourse.Quote.build(post, buffer); if (composerController.get('content.replyDirty')) { composerController.appendText(quotedText); } else { diff --git a/app/assets/javascripts/discourse/controllers/topic_controller.js b/app/assets/javascripts/discourse/controllers/topic_controller.js index 4db234f377e..6dc6c23c60b 100644 --- a/app/assets/javascripts/discourse/controllers/topic_controller.js +++ b/app/assets/javascripts/discourse/controllers/topic_controller.js @@ -323,7 +323,7 @@ Discourse.TopicController = Discourse.ObjectController.extend(Discourse.Selected replyToPost: function(post) { var composerController = this.get('controllers.composer'); var quoteController = this.get('controllers.quoteButton'); - var quotedText = Discourse.BBCode.buildQuoteBBCode(quoteController.get('post'), quoteController.get('buffer')); + var quotedText = Discourse.Quote.build(quoteController.get('post'), quoteController.get('buffer')); var topic = post ? post.get('topic') : this.get('model'); diff --git a/app/assets/javascripts/discourse/dialects/autolink_dialect.js b/app/assets/javascripts/discourse/dialects/autolink_dialect.js new file mode 100644 index 00000000000..897ca40744f --- /dev/null +++ b/app/assets/javascripts/discourse/dialects/autolink_dialect.js @@ -0,0 +1,63 @@ +/** + This addition handles auto linking of text. When included, it will parse out links and create + a hrefs for them. + + @event register + @namespace Discourse.Dialect +**/ +Discourse.Dialect.on("register", function(event) { + + var dialect = event.dialect, + MD = event.MD; + + /** + Parses out links from HTML. + + @method autoLink + @param {Markdown.Block} block the block to examine + @param {Array} next the next blocks in the sequence + @return {Array} the JsonML containing the markup or undefined if nothing changed. + @namespace Discourse.Dialect + **/ + dialect.block['autolink'] = function autoLink(block, next) { + var pattern = /(^|\s)((?:https?:(?:\/{1,3}|[a-z0-9%])|www\d{0,3}[.])(?:[^\s()<>]+|\([^\s()<>]+\))+(?:\([^\s()<>]+\)|[^`!()\[\]{};:'".,<>?«»“”‘’\s]))/gm, + result, + remaining = block, + m; + + var pushIt = function(p) { result.push(p) }; + + while (m = pattern.exec(remaining)) { + result = result || ['p']; + + var url = m[2], + urlIndex = remaining.indexOf(url), + before = remaining.slice(0, urlIndex); + + if (before.match(/\[\d+\]/)) { return; } + + pattern.lastIndex = 0; + remaining = remaining.slice(urlIndex + url.length); + + if (before) { + this.processInline(before).forEach(pushIt); + } + + var displayUrl = url; + if (url.match(/^www/)) { url = "http://" + url; } + result.push(['a', {href: url}, displayUrl]); + + if (remaining && remaining.match(/\n/)) { + next.unshift(MD.mk_block(remaining)); + } + } + + if (result) { + if (remaining.length) { + this.processInline(remaining).forEach(pushIt); + } + return [result]; + } + }; + +}); \ No newline at end of file diff --git a/app/assets/javascripts/discourse/dialects/bbcode_dialect.js b/app/assets/javascripts/discourse/dialects/bbcode_dialect.js new file mode 100644 index 00000000000..1c4c38ea6b1 --- /dev/null +++ b/app/assets/javascripts/discourse/dialects/bbcode_dialect.js @@ -0,0 +1,171 @@ +/** + Regsiter all functionality for supporting BBCode in Discourse. + + @event register + @namespace Discourse.Dialect +**/ +Discourse.Dialect.on("register", function(event) { + + var dialect = event.dialect, + MD = event.MD; + + var createBBCode = function(tag, builder, hasArgs) { + return function(text, orig_match) { + var bbcodePattern = new RegExp("\\[" + tag + "=?([^\\[\\]]+)?\\]([\\s\\S]*?)\\[\\/" + tag + "\\]", "igm"); + var m = bbcodePattern.exec(text); + if (m && m[0]) { + return [m[0].length, builder(m, this)]; + } + }; + }; + + var bbcodes = {'b': ['span', {'class': 'bbcode-b'}], + 'i': ['span', {'class': 'bbcode-i'}], + 'u': ['span', {'class': 'bbcode-u'}], + 's': ['span', {'class': 'bbcode-s'}], + 'spoiler': ['span', {'class': 'spoiler'}], + 'li': ['li'], + 'ul': ['ul'], + 'ol': ['ol']}; + + Object.keys(bbcodes).forEach(function(tag) { + var element = bbcodes[tag]; + dialect.inline["[" + tag + "]"] = createBBCode(tag, function(m, self) { + return element.concat(self.processInline(m[2])); + }); + }); + + dialect.inline["[img]"] = createBBCode('img', function(m) { + return ['img', {href: m[2]}]; + }); + + dialect.inline["[email]"] = createBBCode('email', function(m) { + return ['a', {href: "mailto:" + m[2], 'data-bbcode': true}, m[2]]; + }); + + dialect.inline["[url]"] = createBBCode('url', function(m) { + return ['a', {href: m[2], 'data-bbcode': true}, m[2]]; + }); + + dialect.inline["[url="] = createBBCode('url', function(m, self) { + return ['a', {href: m[1], 'data-bbcode': true}].concat(self.processInline(m[2])); + }); + + dialect.inline["[email="] = createBBCode('email', function(m, self) { + return ['a', {href: "mailto:" + m[1], 'data-bbcode': true}].concat(self.processInline(m[2])); + }); + + dialect.inline["[size="] = createBBCode('size', function(m, self) { + return ['span', {'class': "bbcode-size-" + m[1]}].concat(self.processInline(m[2])); + }); + + dialect.inline["[color="] = function(text, orig_match) { + var bbcodePattern = new RegExp("\\[color=?([^\\[\\]]+)?\\]([\\s\\S]*?)\\[\\/color\\]", "igm"), + m = bbcodePattern.exec(text); + + if (m && m[0]) { + if (!/^(\#[0-9a-fA-F]{3}([0-9a-fA-F]{3})?)|(aqua|black|blue|fuchsia|gray|green|lime|maroon|navy|olive|purple|red|silver|teal|white|yellow)$/.test(m[1])) { + return [m[0].length].concat(this.processInline(m[2])); + } + return [m[0].length, ['span', {style: "color: " + m[1]}].concat(this.processInline(m[2]))]; + } + }; + + /** + Support BBCode [code] blocks + + @method bbcodeCode + @param {Markdown.Block} block the block to examine + @param {Array} next the next blocks in the sequence + @return {Array} the JsonML containing the markup or undefined if nothing changed. + @namespace Discourse.Dialect + **/ + dialect.inline["[code]"] = function bbcodeCode(text, orig_match) { + var bbcodePattern = new RegExp("\\[code\\]([\\s\\S]*?)\\[\\/code\\]", "igm"), + m = bbcodePattern.exec(text); + + if (m) { + var contents = m[1].trim().split("\n"); + + var html = ['pre', "\n"]; + contents.forEach(function (n) { + html.push(n.trim()); + html.push(["br"]); + html.push("\n"); + }); + + return [m[0].length, html]; + } + }; + + /** + Support BBCode [quote] blocks + + @method bbcodeQuote + @param {Markdown.Block} block the block to examine + @param {Array} next the next blocks in the sequence + @return {Array} the JsonML containing the markup or undefined if nothing changed. + @namespace Discourse.Dialect + **/ + dialect.inline["[quote="] = function bbcodeQuote(text, orig_match) { + var bbcodePattern = new RegExp("\\[quote=?([^\\[\\]]+)?\\]([\\s\\S]*?)\\[\\/quote\\]", "igm"), + m = bbcodePattern.exec(text); + + if (!m) { return; } + var paramsString = m[1].replace(/\"/g, ''), + params = {'class': 'quote'}, + paramsSplit = paramsString.split(/\, */), + username = paramsSplit[0], + opts = dialect.options; + + paramsSplit.forEach(function(p,i) { + if (i > 0) { + var assignment = p.split(':'); + if (assignment[0] && assignment[1]) { + params['data-' + assignment[0]] = assignment[1].trim(); + } + } + }); + + var avatarImg; + if (opts.lookupAvatarByPostNumber) { + // client-side, we can retrieve the avatar from the post + var postNumber = parseInt(params['data-post'], 10); + avatarImg = opts.lookupAvatarByPostNumber(postNumber); + } else if (opts.lookupAvatar) { + // server-side, we need to lookup the avatar from the username + avatarImg = opts.lookupAvatar(username); + } + + var quote = ['aside', params, + ['div', {'class': 'title'}, + ['div', {'class': 'quote-controls'}], + avatarImg ? avatarImg + "\n" : "", + I18n.t('user.said',{username: username}) + ], + ['blockquote'].concat(this.processInline(m[2])) + ]; + + return [m[0].length, quote]; + }; + +}); + + +Discourse.Dialect.on("parseNode", function(event) { + + var node = event.node, + path = event.path; + + // Make sure any quotes are followed by a
    . The formatting looks weird otherwise. + if (node[0] === 'aside' && node[1] && node[1]['class'] === 'quote') { + var parent = path[path.length - 1], + location = parent.indexOf(node)+1, + trailing = parent.slice(location); + + if (trailing.length) { + parent.splice(location, 0, ['br']); + } + } + +}); diff --git a/app/assets/javascripts/discourse/dialects/dialect.js b/app/assets/javascripts/discourse/dialects/dialect.js new file mode 100644 index 00000000000..3fe570c2d25 --- /dev/null +++ b/app/assets/javascripts/discourse/dialects/dialect.js @@ -0,0 +1,110 @@ +/** + + Discourse uses the Markdown.js as its main parser. `Discourse.Dialect` is the framework + for extending it with additional formatting. + + To extend the dialect, you can register a handler, and you will receive an `event` object + with a handle to the markdown `Dialect` from Markdown.js that we are defining. Here's + a sample dialect that replaces all occurances of "evil trout" with a link that says + "EVIL TROUT IS AWESOME": + + ```javascript + + Discourse.Dialect.on("register", function(event) { + var dialect = event.dialect; + + // To see how this works, review one of our samples or the Markdown.js code: + dialect.inline["evil trout"] = function(text) { + return ["evil trout".length, ['a', {href: "http://eviltrout.com"}, "EVIL TROUT IS AWESOME"] ]; + }; + + }); + ``` + + You can also manipulate the JsonML tree that is produced by the parser before it converted to HTML. + This is useful if the markup you want needs a certain structure of HTML elements. Rather than + writing regular expressions to match HTML, consider parsing the tree instead! We use this for + making sure a onebox is on one line, as an example. + + This example changes the content of any `` tags. + + The `event.path` attribute contains the current path to the node. + + ```javascript + Discourse.Dialect.on("parseNode", function(event) { + var node = event.node, + path = event.path; + + if (node[0] === 'code') { + node[node.length-1] = "EVIL TROUT HACKED YOUR CODE"; + } + }); + ``` + +**/ +var parser = window.BetterMarkdown, + MD = parser.Markdown, + + // Our dialect + dialect = MD.dialects.Discourse = MD.subclassDialect( MD.dialects.Gruber ), + + initialized = false, + + /** + Initialize our dialects for processing. + + @method initializeDialects + **/ + initializeDialects = function() { + Discourse.Dialect.trigger('register', {dialect: dialect, MD: MD}); + MD.buildBlockOrder(dialect.block); + MD.buildInlinePatterns(dialect.inline); + initialized = true; + }, + + /** + Parse a JSON ML tree, using registered handlers to adjust it if necessary. + + @method parseTree + @param {Array} tree the JsonML tree to parse + @param {Array} path the path of ancestors to the current node in the tree. Can be used for matching. + @returns {Array} the parsed tree + **/ + parseTree = function parseTree(tree, path) { + if (tree instanceof Array) { + Discourse.Dialect.trigger('parseNode', {node: tree, path: path}); + + path = path || []; + path.push(tree); + tree.slice(1).forEach(function (n) { + parseTree(n, path); + }); + path.pop(); + } + return tree; + }; + +/** + An object used for rendering our dialects. + + @class Dialect + @namespace Discourse + @module Discourse +**/ +Discourse.Dialect = { + + /** + Cook text using the dialects. + + @method cook + @param {String} text the raw text to cook + @returns {String} the cooked text + **/ + cook: function(text, opts) { + if (!initialized) { initializeDialects(); } + dialect.options = opts; + return parser.renderJsonML(parseTree(parser.toHTMLTree(text, 'Discourse'))); + } +}; + +RSVP.EventTarget.mixin(Discourse.Dialect); diff --git a/app/assets/javascripts/discourse/dialects/github_code_dialect.js b/app/assets/javascripts/discourse/dialects/github_code_dialect.js new file mode 100644 index 00000000000..425194dfb1a --- /dev/null +++ b/app/assets/javascripts/discourse/dialects/github_code_dialect.js @@ -0,0 +1,87 @@ +/** + Support for github style code blocks, here you begin with three backticks and supply a language, + The language is made into a class on the resulting `` element. + + @event register + @namespace Discourse.Dialect +**/ +Discourse.Dialect.on("register", function(event) { + var dialect = event.dialect, + MD = event.MD; + + /** + Support for github style code blocks + + @method githubCode + @param {Markdown.Block} block the block to examine + @param {Array} next the next blocks in the sequence + @return {Array} the JsonML containing the markup or undefined if nothing changed. + @namespace Discourse.Dialect + **/ + dialect.block['github_code'] = function githubCode(block, next) { + var m = /^`{3}([^\n]+)?\n?([\s\S]*)?/gm.exec(block); + + if (m) { + var startPos = block.indexOf(m[0]), + leading, + codeContents = [], + result = [], + lineNumber = block.lineNumber; + + if (startPos > 0) { + leading = block.slice(0, startPos); + lineNumber += (leading.split("\n").length - 1); + + var para = ['p']; + this.processInline(leading).forEach(function (l) { + para.push(l); + }); + + result.push(para); + } + + if (m[2]) { next.unshift(MD.mk_block(m[2], null, lineNumber + 1)); } + + while (next.length > 0) { + var b = next.shift(), + n = b.match(/([^`]*)```([^`]*)/m), + diff = ((typeof b.lineNumber === "undefined") ? lineNumber : b.lineNumber) - lineNumber; + + lineNumber = b.lineNumber; + for (var i=1; i 0) { + var prev = siblings[idx-1]; + if (prev[0] !== 'br') { return false; } + } + + if (idx < siblings.length) { + var next = siblings[idx+1]; + if (next && (!((next[0] === 'br') || (typeof next === 'string' && next.trim() === "")))) { return false; } + } + + return true; +}; + +/** + We only onebox stuff that is on its own line. This navigates the JsonML tree and + correctly inserts the oneboxes. + + @event parseNode + @namespace Discourse.Dialect +**/ +Discourse.Dialect.on("parseNode", function(event) { + var node = event.node, + path = event.path; + + // We only care about links + if (node[0] !== 'a') { return; } + + var parent = path[path.length - 1]; + + // We don't onebox bbcode + if (node[1]['data-bbcode']) { + delete node[1]['data-bbcode']; + return; + } + + // Don't onebox links within a list + for (var i=0; i - // and tags get encoded. - // - - // This will only happen if makeHtml on the same converter instance is called from a plugin hook. - // Don't do that. - if (g_urls) - throw new Error("Recursive call to converter.makeHtml"); - - // Create the private state objects. - g_urls = new SaveHash(); - g_titles = new SaveHash(); - g_html_blocks = []; - g_list_level = 0; - - text = pluginHooks.preConversion(text); - - // attacklab: Replace ~ with ~T - // This lets us use tilde as an escape char to avoid md5 hashes - // The choice of character is arbitray; anything that isn't - // magic in Markdown will work. - text = text.replace(/~/g, "~T"); - - // attacklab: Replace $ with ~D - // RegExp interprets $ as a special character - // when it's in a replacement string - text = text.replace(/\$/g, "~D"); - - // Standardize line endings - text = text.replace(/\r\n/g, "\n"); // DOS to Unix - text = text.replace(/\r/g, "\n"); // Mac to Unix - - // Make sure text begins and ends with a couple of newlines: - text = "\n\n" + text + "\n\n"; - - // Convert all tabs to spaces. - text = _Detab(text); - - // Strip any lines consisting only of spaces and tabs. - // This makes subsequent regexen easier to write, because we can - // match consecutive blank lines with /\n+/ instead of something - // contorted like /[ \t]*\n+/ . - text = text.replace(/^[ \t]+$/mg, ""); - - text = pluginHooks.postNormalization(text); - - // Turn block-level HTML blocks into hash entries - text = _HashHTMLBlocks(text); - - // Strip link definitions, store in hashes. - text = _StripLinkDefinitions(text); - - text = _RunBlockGamut(text); - - text = _UnescapeSpecialChars(text); - - // attacklab: Restore dollar signs - text = text.replace(/~D/g, "$$"); - - // attacklab: Restore tildes - text = text.replace(/~T/g, "~"); - - text = pluginHooks.postConversion(text); - - g_html_blocks = g_titles = g_urls = null; - - return text; - }; - - function _StripLinkDefinitions(text) { - // - // Strips link definitions from text, stores the URLs and titles in - // hash references. - // - - // Link defs are in the form: ^[id]: url "optional title" - - /* - text = text.replace(/ - ^[ ]{0,3}\[(.+)\]: // id = $1 attacklab: g_tab_width - 1 - [ \t]* - \n? // maybe *one* newline - [ \t]* - ? // url = $2 - (?=\s|$) // lookahead for whitespace instead of the lookbehind removed below - [ \t]* - \n? // maybe one newline - [ \t]* - ( // (potential) title = $3 - (\n*) // any lines skipped = $4 attacklab: lookbehind removed - [ \t]+ - ["(] - (.+?) // title = $5 - [")] - [ \t]* - )? // title is optional - (?:\n+|$) - /gm, function(){...}); - */ - - text = text.replace(/^[ ]{0,3}\[(.+)\]:[ \t]*\n?[ \t]*?(?=\s|$)[ \t]*\n?[ \t]*((\n*)["(](.+?)[")][ \t]*)?(?:\n+)/gm, - function (wholeMatch, m1, m2, m3, m4, m5) { - m1 = m1.toLowerCase(); - g_urls.set(m1, _EncodeAmpsAndAngles(m2)); // Link IDs are case-insensitive - if (m4) { - // Oops, found blank lines, so it's not a title. - // Put back the parenthetical statement we stole. - return m3; - } else if (m5) { - g_titles.set(m1, m5.replace(/"/g, """)); - } - - // Completely remove the definition from the text - return ""; - } - ); - - return text; - } - - function _HashHTMLBlocks(text) { - - // Hashify HTML blocks: - // We only want to do this for block-level HTML tags, such as headers, - // lists, and tables. That's because we still want to wrap

    s around - // "paragraphs" that are wrapped in non-block-level tags, such as anchors, - // phrase emphasis, and spans. The list of tags we're looking for is - // hard-coded: - var block_tags_a = "p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del" - var block_tags_b = "p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math" - - // First, look for nested blocks, e.g.: - //

    - //
    - // tags for inner block must be indented. - //
    - //
    - // - // The outermost tags must start at the left margin for this to match, and - // the inner nested divs must be indented. - // We need to do this before the next, more liberal match, because the next - // match will start at the first `
    ` and stop at the first `
    `. - - // attacklab: This regex can be expensive when it fails. - - /* - text = text.replace(/ - ( // save in $1 - ^ // start of line (with /m) - <($block_tags_a) // start tag = $2 - \b // word break - // attacklab: hack around khtml/pcre bug... - [^\r]*?\n // any number of lines, minimally matching - // the matching end tag - [ \t]* // trailing spaces/tabs - (?=\n+) // followed by a newline - ) // attacklab: there are sentinel newlines at end of document - /gm,function(){...}}; - */ - text = text.replace(/^(<(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del)\b[^\r]*?\n<\/\2>[ \t]*(?=\n+))/gm, hashElement); - - // - // Now match more liberally, simply from `\n` to `\n` - // - - /* - text = text.replace(/ - ( // save in $1 - ^ // start of line (with /m) - <($block_tags_b) // start tag = $2 - \b // word break - // attacklab: hack around khtml/pcre bug... - [^\r]*? // any number of lines, minimally matching - .* // the matching end tag - [ \t]* // trailing spaces/tabs - (?=\n+) // followed by a newline - ) // attacklab: there are sentinel newlines at end of document - /gm,function(){...}}; - */ - text = text.replace(/^(<(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math)\b[^\r]*?.*<\/\2>[ \t]*(?=\n+)\n)/gm, hashElement); - - // Special case just for
    . It was easier to make a special case than - // to make the other regex more complicated. - - /* - text = text.replace(/ - \n // Starting after a blank line - [ ]{0,3} - ( // save in $1 - (<(hr) // start tag = $2 - \b // word break - ([^<>])*? - \/?>) // the matching end tag - [ \t]* - (?=\n{2,}) // followed by a blank line - ) - /g,hashElement); - */ - text = text.replace(/\n[ ]{0,3}((<(hr)\b([^<>])*?\/?>)[ \t]*(?=\n{2,}))/g, hashElement); - - // Special case for standalone HTML comments: - - /* - text = text.replace(/ - \n\n // Starting after a blank line - [ ]{0,3} // attacklab: g_tab_width - 1 - ( // save in $1 - -]|-[^>])(?:[^-]|-[^-])*)--) // see http://www.w3.org/TR/html-markup/syntax.html#comments and http://meta.stackoverflow.com/q/95256 - > - [ \t]* - (?=\n{2,}) // followed by a blank line - ) - /g,hashElement); - */ - text = text.replace(/\n\n[ ]{0,3}(-]|-[^>])(?:[^-]|-[^-])*)--)>[ \t]*(?=\n{2,}))/g, hashElement); - - // PHP and ASP-style processor instructions ( and <%...%>) - - /* - text = text.replace(/ - (?: - \n\n // Starting after a blank line - ) - ( // save in $1 - [ ]{0,3} // attacklab: g_tab_width - 1 - (?: - <([?%]) // $2 - [^\r]*? - \2> - ) - [ \t]* - (?=\n{2,}) // followed by a blank line - ) - /g,hashElement); - */ - text = text.replace(/(?:\n\n)([ ]{0,3}(?:<([?%])[^\r]*?\2>)[ \t]*(?=\n{2,}))/g, hashElement); - - return text; - } - - function hashElement(wholeMatch, m1) { - var blockText = m1; - - // Undo double lines - blockText = blockText.replace(/^\n+/, ""); - - // strip trailing blank lines - blockText = blockText.replace(/\n+$/g, ""); - - // Replace the element text with a marker ("~KxK" where x is its key) - blockText = "\n\n~K" + (g_html_blocks.push(blockText) - 1) + "K\n\n"; - - return blockText; - } - - var blockGamutHookCallback = function (t) { return _RunBlockGamut(t); } - - function _RunBlockGamut(text, doNotUnhash) { - // - // These are all the transformations that form block-level - // tags like paragraphs, headers, and list items. - // - - text = pluginHooks.preBlockGamut(text, blockGamutHookCallback); - - text = _DoHeaders(text); - - // Do Horizontal Rules: - var replacement = "
    \n"; - text = text.replace(/^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$/gm, replacement); - text = text.replace(/^[ ]{0,2}([ ]?-[ ]?){3,}[ \t]*$/gm, replacement); - text = text.replace(/^[ ]{0,2}([ ]?_[ ]?){3,}[ \t]*$/gm, replacement); - - text = _DoLists(text); - text = _DoCodeBlocks(text); - text = _DoBlockQuotes(text); - - text = pluginHooks.postBlockGamut(text, blockGamutHookCallback); - - // We already ran _HashHTMLBlocks() before, in Markdown(), but that - // was to escape raw HTML in the original Markdown source. This time, - // we're escaping the markup we've just created, so that we don't wrap - //

    tags around block-level tags. - text = _HashHTMLBlocks(text); - text = _FormParagraphs(text, doNotUnhash); - - return text; - } - - function _RunSpanGamut(text) { - // - // These are all the transformations that occur *within* block-level - // tags like paragraphs, headers, and list items. - // - - text = pluginHooks.preSpanGamut(text); - - text = _DoCodeSpans(text); - text = _EscapeSpecialCharsWithinTagAttributes(text); - text = _EncodeBackslashEscapes(text); - - // Process anchor and image tags. Images must come first, - // because ![foo][f] looks like an anchor. - text = _DoImages(text); - text = _DoAnchors(text); - - // Make links out of things like `` - // Must come after _DoAnchors(), because you can use < and > - // delimiters in inline links like [this](). - text = _DoAutoLinks(text); - - text = text.replace(/~P/g, "://"); // put in place to prevent autolinking; reset now - - text = _EncodeAmpsAndAngles(text); - text = _DoItalicsAndBold(text); - - // Do hard breaks: - text = text.replace(/ +\n/g, "
    \n"); - - text = pluginHooks.postSpanGamut(text); - - return text; - } - - function _EscapeSpecialCharsWithinTagAttributes(text) { - // - // Within tags -- meaning between < and > -- encode [\ ` * _] so they - // don't conflict with their use in Markdown for code, italics and strong. - // - - // Build a regex to find HTML tags and comments. See Friedl's - // "Mastering Regular Expressions", 2nd Ed., pp. 200-201. - - // SE: changed the comment part of the regex - - var regex = /(<[a-z\/!$]("[^"]*"|'[^']*'|[^'">])*>|-]|-[^>])(?:[^-]|-[^-])*)--)>)/gi; - - text = text.replace(regex, function (wholeMatch) { - var tag = wholeMatch.replace(/(.)<\/?code>(?=.)/g, "$1`"); - tag = escapeCharacters(tag, wholeMatch.charAt(1) == "!" ? "\\`*_/" : "\\`*_"); // also escape slashes in comments to prevent autolinking there -- http://meta.stackoverflow.com/questions/95987 - return tag; - }); - - return text; - } - - function _DoAnchors(text) { - // - // Turn Markdown link shortcuts into XHTML
    tags. - // - // - // First, handle reference-style links: [link text] [id] - // - - /* - text = text.replace(/ - ( // wrap whole match in $1 - \[ - ( - (?: - \[[^\]]*\] // allow brackets nested one level - | - [^\[] // or anything else - )* - ) - \] - - [ ]? // one optional space - (?:\n[ ]*)? // one optional newline followed by spaces - - \[ - (.*?) // id = $3 - \] - ) - ()()()() // pad remaining backreferences - /g, writeAnchorTag); - */ - text = text.replace(/(\[((?:\[[^\]]*\]|[^\[\]])*)\][ ]?(?:\n[ ]*)?\[(.*?)\])()()()()/g, writeAnchorTag); - - // - // Next, inline-style links: [link text](url "optional title") - // - - /* - text = text.replace(/ - ( // wrap whole match in $1 - \[ - ( - (?: - \[[^\]]*\] // allow brackets nested one level - | - [^\[\]] // or anything else - )* - ) - \] - \( // literal paren - [ \t]* - () // no id, so leave $3 empty - ? - [ \t]* - ( // $5 - (['"]) // quote char = $6 - (.*?) // Title = $7 - \6 // matching quote - [ \t]* // ignore any spaces/tabs between closing quote and ) - )? // title is optional - \) - ) - /g, writeAnchorTag); - */ - - text = text.replace(/(\[((?:\[[^\]]*\]|[^\[\]])*)\]\([ \t]*()?[ \t]*((['"])(.*?)\6[ \t]*)?\))/g, writeAnchorTag); - - // - // Last, handle reference-style shortcuts: [link text] - // These must come last in case you've also got [link test][1] - // or [link test](/foo) - // - - /* - text = text.replace(/ - ( // wrap whole match in $1 - \[ - ([^\[\]]+) // link text = $2; can't contain '[' or ']' - \] - ) - ()()()()() // pad rest of backreferences - /g, writeAnchorTag); - */ - text = text.replace(/(\[([^\[\]]+)\])()()()()()/g, writeAnchorTag); - - return text; - } - - function writeAnchorTag(wholeMatch, m1, m2, m3, m4, m5, m6, m7) { - if (m7 == undefined) m7 = ""; - var whole_match = m1; - var link_text = m2.replace(/:\/\//g, "~P"); // to prevent auto-linking withing the link. will be converted back after the auto-linker runs - var link_id = m3.toLowerCase(); - var url = m4; - var title = m7; - - if (url == "") { - if (link_id == "") { - // lower-case and turn embedded newlines into spaces - link_id = link_text.toLowerCase().replace(/ ?\n/g, " "); - } - url = "#" + link_id; - - if (g_urls.get(link_id) != undefined) { - url = g_urls.get(link_id); - if (g_titles.get(link_id) != undefined) { - title = g_titles.get(link_id); - } - } - else { - if (whole_match.search(/\(\s*\)$/m) > -1) { - // Special case for explicit empty url - url = ""; - } else { - return whole_match; - } - } - } - url = encodeProblemUrlChars(url); - url = escapeCharacters(url, "*_"); - var result = ""; - - return result; - } - - function _DoImages(text) { - // - // Turn Markdown image shortcuts into tags. - // - - // - // First, handle reference-style labeled images: ![alt text][id] - // - - /* - text = text.replace(/ - ( // wrap whole match in $1 - !\[ - (.*?) // alt text = $2 - \] - - [ ]? // one optional space - (?:\n[ ]*)? // one optional newline followed by spaces - - \[ - (.*?) // id = $3 - \] - ) - ()()()() // pad rest of backreferences - /g, writeImageTag); - */ - text = text.replace(/(!\[(.*?)\][ ]?(?:\n[ ]*)?\[(.*?)\])()()()()/g, writeImageTag); - - // - // Next, handle inline images: ![alt text](url "optional title") - // Don't forget: encode * and _ - - /* - text = text.replace(/ - ( // wrap whole match in $1 - !\[ - (.*?) // alt text = $2 - \] - \s? // One optional whitespace character - \( // literal paren - [ \t]* - () // no id, so leave $3 empty - ? // src url = $4 - [ \t]* - ( // $5 - (['"]) // quote char = $6 - (.*?) // title = $7 - \6 // matching quote - [ \t]* - )? // title is optional - \) - ) - /g, writeImageTag); - */ - text = text.replace(/(!\[(.*?)\]\s?\([ \t]*()?[ \t]*((['"])(.*?)\6[ \t]*)?\))/g, writeImageTag); - - return text; - } - - function attributeEncode(text) { - // unconditionally replace angle brackets here -- what ends up in an attribute (e.g. alt or title) - // never makes sense to have verbatim HTML in it (and the sanitizer would totally break it) - return text.replace(/>/g, ">").replace(/" + _RunSpanGamut(m1) + "\n\n"; } - ); - - text = text.replace(/^(.+)[ \t]*\n-+[ \t]*\n+/gm, - function (matchFound, m1) { return "

    " + _RunSpanGamut(m1) + "

    \n\n"; } - ); - - // atx-style headers: - // # Header 1 - // ## Header 2 - // ## Header 2 with closing hashes ## - // ... - // ###### Header 6 - // - - /* - text = text.replace(/ - ^(\#{1,6}) // $1 = string of #'s - [ \t]* - (.+?) // $2 = Header text - [ \t]* - \#* // optional closing #'s (not counted) - \n+ - /gm, function() {...}); - */ - - text = text.replace(/^(\#{1,6})[ \t]*(.+?)[ \t]*\#*\n+/gm, - function (wholeMatch, m1, m2) { - var h_level = m1.length; - return "" + _RunSpanGamut(m2) + "\n\n"; - } - ); - - return text; - } - - function _DoLists(text) { - // - // Form HTML ordered (numbered) and unordered (bulleted) lists. - // - - // attacklab: add sentinel to hack around khtml/safari bug: - // http://bugs.webkit.org/show_bug.cgi?id=11231 - text += "~0"; - - // Re-usable pattern to match any entirel ul or ol list: - - /* - var whole_list = / - ( // $1 = whole list - ( // $2 - [ ]{0,3} // attacklab: g_tab_width - 1 - ([*+-]|\d+[.]) // $3 = first list item marker - [ \t]+ - ) - [^\r]+? - ( // $4 - ~0 // sentinel for workaround; should be $ - | - \n{2,} - (?=\S) - (?! // Negative lookahead for another list item marker - [ \t]* - (?:[*+-]|\d+[.])[ \t]+ - ) - ) - ) - /g - */ - var whole_list = /^(([ ]{0,3}([*+-]|\d+[.])[ \t]+)[^\r]+?(~0|\n{2,}(?=\S)(?![ \t]*(?:[*+-]|\d+[.])[ \t]+)))/gm; - - if (g_list_level) { - text = text.replace(whole_list, function (wholeMatch, m1, m2) { - var list = m1; - var list_type = (m2.search(/[*+-]/g) > -1) ? "ul" : "ol"; - - var result = _ProcessListItems(list, list_type); - - // Trim any trailing whitespace, to put the closing `` - // up on the preceding line, to get it past the current stupid - // HTML block parser. This is a hack to work around the terrible - // hack that is the HTML block parser. - result = result.replace(/\s+$/, ""); - result = "<" + list_type + ">" + result + "\n"; - return result; - }); - } else { - whole_list = /(\n\n|^\n?)(([ ]{0,3}([*+-]|\d+[.])[ \t]+)[^\r]+?(~0|\n{2,}(?=\S)(?![ \t]*(?:[*+-]|\d+[.])[ \t]+)))/g; - text = text.replace(whole_list, function (wholeMatch, m1, m2, m3) { - var runup = m1; - var list = m2; - - var list_type = (m3.search(/[*+-]/g) > -1) ? "ul" : "ol"; - var result = _ProcessListItems(list, list_type); - result = runup + "<" + list_type + ">\n" + result + "\n"; - return result; - }); - } - - // attacklab: strip sentinel - text = text.replace(/~0/, ""); - - return text; - } - - var _listItemMarkers = { ol: "\\d+[.]", ul: "[*+-]" }; - - function _ProcessListItems(list_str, list_type) { - // - // Process the contents of a single ordered or unordered list, splitting it - // into individual list items. - // - // list_type is either "ul" or "ol". - - // The $g_list_level global keeps track of when we're inside a list. - // Each time we enter a list, we increment it; when we leave a list, - // we decrement. If it's zero, we're not in a list anymore. - // - // We do this because when we're not inside a list, we want to treat - // something like this: - // - // I recommend upgrading to version - // 8. Oops, now this line is treated - // as a sub-list. - // - // As a single paragraph, despite the fact that the second line starts - // with a digit-period-space sequence. - // - // Whereas when we're inside a list (or sub-list), that line will be - // treated as the start of a sub-list. What a kludge, huh? This is - // an aspect of Markdown's syntax that's hard to parse perfectly - // without resorting to mind-reading. Perhaps the solution is to - // change the syntax rules such that sub-lists must start with a - // starting cardinal number; e.g. "1." or "a.". - - g_list_level++; - - // trim trailing blank lines: - list_str = list_str.replace(/\n{2,}$/, "\n"); - - // attacklab: add sentinel to emulate \z - list_str += "~0"; - - // In the original attacklab showdown, list_type was not given to this function, and anything - // that matched /[*+-]|\d+[.]/ would just create the next
  • , causing this mismatch: - // - // Markdown rendered by WMD rendered by MarkdownSharp - // ------------------------------------------------------------------ - // 1. first 1. first 1. first - // 2. second 2. second 2. second - // - third 3. third * third - // - // We changed this to behave identical to MarkdownSharp. This is the constructed RegEx, - // with {MARKER} being one of \d+[.] or [*+-], depending on list_type: - - /* - list_str = list_str.replace(/ - (^[ \t]*) // leading whitespace = $1 - ({MARKER}) [ \t]+ // list marker = $2 - ([^\r]+? // list item text = $3 - (\n+) - ) - (?= - (~0 | \2 ({MARKER}) [ \t]+) - ) - /gm, function(){...}); - */ - - var marker = _listItemMarkers[list_type]; - var re = new RegExp("(^[ \\t]*)(" + marker + ")[ \\t]+([^\\r]+?(\\n+))(?=(~0|\\1(" + marker + ")[ \\t]+))", "gm"); - var last_item_had_a_double_newline = false; - list_str = list_str.replace(re, - function (wholeMatch, m1, m2, m3) { - var item = m3; - var leading_space = m1; - var ends_with_double_newline = /\n\n$/.test(item); - var contains_double_newline = ends_with_double_newline || item.search(/\n{2,}/) > -1; - - if (contains_double_newline || last_item_had_a_double_newline) { - item = _RunBlockGamut(_Outdent(item), /* doNotUnhash = */true); - } - else { - // Recursion for sub-lists: - item = _DoLists(_Outdent(item)); - item = item.replace(/\n$/, ""); // chomp(item) - item = _RunSpanGamut(item); - } - last_item_had_a_double_newline = ends_with_double_newline; - return "
  • " + item + "
  • \n"; - } - ); - - // attacklab: strip sentinel - list_str = list_str.replace(/~0/g, ""); - - g_list_level--; - return list_str; - } - - function _DoCodeBlocks(text) { - // - // Process Markdown `
    ` blocks.
    -            //
    -
    -            /*
    -            text = text.replace(/
    -                (?:\n\n|^)
    -                (                               // $1 = the code block -- one or more lines, starting with a space/tab
    -                    (?:
    -                        (?:[ ]{4}|\t)           // Lines must start with a tab or a tab-width of spaces - attacklab: g_tab_width
    -                        .*\n+
    -                    )+
    -                )
    -                (\n*[ ]{0,3}[^ \t\n]|(?=~0))    // attacklab: g_tab_width
    -            /g ,function(){...});
    -            */
    -
    -            // attacklab: sentinel workarounds for lack of \A and \Z, safari\khtml bug
    -            text += "~0";
    -
    -            text = text.replace(/(?:\n\n|^\n?)((?:(?:[ ]{4}|\t).*\n+)+)(\n*[ ]{0,3}[^ \t\n]|(?=~0))/g,
    -                function (wholeMatch, m1, m2) {
    -                    var codeblock = m1;
    -                    var nextChar = m2;
    -
    -                    codeblock = _EncodeCode(_Outdent(codeblock));
    -                    codeblock = _Detab(codeblock);
    -                    codeblock = codeblock.replace(/^\n+/g, ""); // trim leading newlines
    -                    codeblock = codeblock.replace(/\n+$/g, ""); // trim trailing whitespace
    -
    -                    codeblock = "
    " + codeblock + "\n
    "; - - return "\n\n" + codeblock + "\n\n" + nextChar; - } - ); - - // attacklab: strip sentinel - text = text.replace(/~0/, ""); - - return text; - } - - function hashBlock(text) { - text = text.replace(/(^\n+|\n+$)/g, ""); - return "\n\n~K" + (g_html_blocks.push(text) - 1) + "K\n\n"; - } - - function _DoCodeSpans(text) { - // - // * Backtick quotes are used for spans. - // - // * You can use multiple backticks as the delimiters if you want to - // include literal backticks in the code span. So, this input: - // - // Just type ``foo `bar` baz`` at the prompt. - // - // Will translate to: - // - //

    Just type foo `bar` baz at the prompt.

    - // - // There's no arbitrary limit to the number of backticks you - // can use as delimters. If you need three consecutive backticks - // in your code, use four for delimiters, etc. - // - // * You can use spaces to get literal backticks at the edges: - // - // ... type `` `bar` `` ... - // - // Turns to: - // - // ... type `bar` ... - // - - /* - text = text.replace(/ - (^|[^\\]) // Character before opening ` can't be a backslash - (`+) // $2 = Opening run of ` - ( // $3 = The code block - [^\r]*? - [^`] // attacklab: work around lack of lookbehind - ) - \2 // Matching closer - (?!`) - /gm, function(){...}); - */ - - text = text.replace(/(^|[^\\])(`+)([^\r]*?[^`])\2(?!`)/gm, - function (wholeMatch, m1, m2, m3, m4) { - var c = m3; - c = c.replace(/^([ \t]*)/g, ""); // leading whitespace - c = c.replace(/[ \t]*$/g, ""); // trailing whitespace - c = _EncodeCode(c); - c = c.replace(/:\/\//g, "~P"); // to prevent auto-linking. Not necessary in code *blocks*, but in code spans. Will be converted back after the auto-linker runs. - return m1 + "" + c + ""; - } - ); - - return text; - } - - function _EncodeCode(text) { - // - // Encode/escape certain characters inside Markdown code runs. - // The point is that in code, these characters are literals, - // and lose their special Markdown meanings. - // - // Encode all ampersands; HTML entities are not - // entities within a Markdown code span. - text = text.replace(/&/g, "&"); - - // Do the angle bracket song and dance: - text = text.replace(//g, ">"); - - // Now, escape characters that are magic in Markdown: - text = escapeCharacters(text, "\*_{}[]\\", false); - - // jj the line above breaks this: - //--- - - //* Item - - // 1. Subitem - - // special char: * - //--- - - return text; - } - - function _DoItalicsAndBold(text) { - - // must go first: - text = text.replace(/([\W_]|^)(\*\*|__)(?=\S)([^\r]*?\S[\*_]*)\2([\W_]|$)/g, - "$1$3$4"); - - text = text.replace(/([\W_]|^)(\*|_)(?=\S)([^\r\*_]*?\S)\2([\W_]|$)/g, - "$1$3$4"); - - return text; - } - - function _DoBlockQuotes(text) { - - /* - text = text.replace(/ - ( // Wrap whole match in $1 - ( - ^[ \t]*>[ \t]? // '>' at the start of a line - .+\n // rest of the first line - (.+\n)* // subsequent consecutive lines - \n* // blanks - )+ - ) - /gm, function(){...}); - */ - - text = text.replace(/((^[ \t]*>[ \t]?.+\n(.+\n)*\n*)+)/gm, - function (wholeMatch, m1) { - var bq = m1; - - // attacklab: hack around Konqueror 3.5.4 bug: - // "----------bug".replace(/^-/g,"") == "bug" - - bq = bq.replace(/^[ \t]*>[ \t]?/gm, "~0"); // trim one level of quoting - - // attacklab: clean up hack - bq = bq.replace(/~0/g, ""); - - bq = bq.replace(/^[ \t]+$/gm, ""); // trim whitespace-only lines - bq = _RunBlockGamut(bq); // recurse - - bq = bq.replace(/(^|\n)/g, "$1 "); - // These leading spaces screw with
     content, so we need to fix that:
    -                    bq = bq.replace(
    -                            /(\s*
    [^\r]+?<\/pre>)/gm,
    -                        function (wholeMatch, m1) {
    -                            var pre = m1;
    -                            // attacklab: hack around Konqueror 3.5.4 bug:
    -                            pre = pre.replace(/^  /mg, "~0");
    -                            pre = pre.replace(/~0/g, "");
    -                            return pre;
    -                        });
    -
    -                    return hashBlock("
    \n" + bq + "\n
    "); - } - ); - return text; - } - - function _FormParagraphs(text, doNotUnhash) { - // - // Params: - // $text - string to process with html

    tags - // - - // Strip leading and trailing lines: - text = text.replace(/^\n+/g, ""); - text = text.replace(/\n+$/g, ""); - - var grafs = text.split(/\n{2,}/g); - var grafsOut = []; - - var markerRe = /~K(\d+)K/; - - // - // Wrap

    tags. - // - var end = grafs.length; - for (var i = 0; i < end; i++) { - var str = grafs[i]; - - // if this is an HTML marker, copy it - if (markerRe.test(str)) { - grafsOut.push(str); - } - else if (/\S/.test(str)) { - str = _RunSpanGamut(str); - str = str.replace(/^([ \t]*)/g, "

    "); - str += "

    " - grafsOut.push(str); - } - - } - // - // Unhashify HTML blocks - // - if (!doNotUnhash) { - end = grafsOut.length; - for (var i = 0; i < end; i++) { - var foundAny = true; - while (foundAny) { // we may need several runs, since the data may be nested - foundAny = false; - grafsOut[i] = grafsOut[i].replace(/~K(\d+)K/g, function (wholeMatch, id) { - foundAny = true; - return g_html_blocks[id]; - }); - } - } - } - return grafsOut.join("\n\n"); - } - - function _EncodeAmpsAndAngles(text) { - // Smart processing for ampersands and angle brackets that need to be encoded. - - // Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin: - // http://bumppo.net/projects/amputator/ - text = text.replace(/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/g, "&"); - - // Encode naked <'s - text = text.replace(/<(?![a-z\/?\$!])/gi, "<"); - - return text; - } - - function _EncodeBackslashEscapes(text) { - // - // Parameter: String. - // Returns: The string, with after processing the following backslash - // escape sequences. - // - - // attacklab: The polite way to do this is with the new - // escapeCharacters() function: - // - // text = escapeCharacters(text,"\\",true); - // text = escapeCharacters(text,"`*_{}[]()>#+-.!",true); - // - // ...but we're sidestepping its use of the (slow) RegExp constructor - // as an optimization for Firefox. This function gets called a LOT. - - text = text.replace(/\\(\\)/g, escapeCharacters_callback); - text = text.replace(/\\([`*_{}\[\]()>#+-.!])/g, escapeCharacters_callback); - return text; - } - - function handleTrailingParens(wholeMatch, lookbehind, protocol, link) { - if (lookbehind) - return wholeMatch; - if (link.charAt(link.length - 1) !== ")") - return "<" + protocol + link + ">"; - var parens = link.match(/[()]/g); - var level = 0; - for (var i = 0; i < parens.length; i++) { - if (parens[i] === "(") { - if (level <= 0) - level = 1; - else - level++; - } - else { - level--; - } - } - var tail = ""; - if (level < 0) { - var re = new RegExp("\\){1," + (-level) + "}$"); - link = link.replace(re, function (trailingParens) { - tail = trailingParens; - return ""; - }); - } - - return "<" + protocol + link + ">" + tail; - } - - function _DoAutoLinks(text) { - - // note that at this point, all other URL in the text are already hyperlinked as
    - // *except* for the case - - // automatically add < and > around unadorned raw hyperlinks - // must be preceded by a non-word character (and not by =" or <) and followed by non-word/EOF character - // simulating the lookbehind in a consuming way is okay here, since a URL can neither and with a " nor - // with a <, so there is no risk of overlapping matches. - text = text.replace(/(="|='|<)?\b(https?|ftp)(:\/\/[-A-Z0-9+&@#\/%?=~_|\[\]\(\)!:,\.;]*[-A-Z0-9+&@#\/%=~_|\[\])])(?=$|\W)/gi, handleTrailingParens); - - // autolink anything like - - var replacer = function (wholematch, m1) { - var m1encoded = m1.replace(/\_\_/, '%5F%5F'); - return "" + pluginHooks.plainLinkText(m1) + ""; - } - text = text.replace(/<((https?|ftp):[^'">\s]+)>/gi, replacer); - - return text; - } - - function _UnescapeSpecialChars(text) { - // - // Swap back in all the special characters we've hidden. - // - text = text.replace(/~E(\d+)E/g, - function (wholeMatch, m1) { - var charCodeToReplace = parseInt(m1); - return String.fromCharCode(charCodeToReplace); - } - ); - return text; - } - - function _Outdent(text) { - // - // Remove one level of line-leading tabs or spaces - // - - // attacklab: hack around Konqueror 3.5.4 bug: - // "----------bug".replace(/^-/g,"") == "bug" - - text = text.replace(/^(\t|[ ]{1,4})/gm, "~0"); // attacklab: g_tab_width - - // attacklab: clean up hack - text = text.replace(/~0/g, "") - - return text; - } - - function _Detab(text) { - if (!/\t/.test(text)) - return text; - - var spaces = [" ", " ", " ", " "], - skew = 0, - v; - - return text.replace(/[\n\t]/g, function (match, offset) { - if (match === "\n") { - skew = offset + 1; - return match; - } - v = (offset - skew) % 4; - skew = offset + 1; - return spaces[v]; - }); - } - - // - // attacklab: Utility functions - // - - var _problemUrlChars = /(?:["'*()[\]:]|~D)/g; - - // hex-encodes some unusual "problem" chars in URLs to avoid URL detection problems - function encodeProblemUrlChars(url) { - if (!url) - return ""; - - var len = url.length; - - return url.replace(_problemUrlChars, function (match, offset) { - if (match == "~D") // escape for dollar - return "%24"; - if (match == ":") { - if (offset == len - 1 || /[0-9\/]/.test(url.charAt(offset + 1))) - return ":" - } - return "%" + match.charCodeAt(0).toString(16); - }); - } - - - function escapeCharacters(text, charsToEscape, afterBackslash) { - // First we have to escape the escape characters so that - // we can build a character class out of them - var regexString = "([" + charsToEscape.replace(/([\[\]\\])/g, "\\$1") + "])"; - - if (afterBackslash) { - regexString = "\\\\" + regexString; - } - - var regex = new RegExp(regexString, "g"); - text = text.replace(regex, escapeCharacters_callback); - - return text; - } - - - function escapeCharacters_callback(wholeMatch, m1) { - var charCodeToEscape = m1.charCodeAt(0); - return "~E" + charCodeToEscape + "E"; - } - - }; // end of the Markdown.Converter constructor - })(); diff --git a/app/assets/javascripts/external/markdown.js b/app/assets/javascripts/external/markdown.js new file mode 100644 index 00000000000..e11d71e6c27 --- /dev/null +++ b/app/assets/javascripts/external/markdown.js @@ -0,0 +1,1436 @@ +// Released under MIT license +// Copyright (c) 2009-2010 Dominic Baggott +// Copyright (c) 2009-2010 Ash Berlin +// Copyright (c) 2011 Christoph Dorn (http://www.christophdorn.com) + +/*jshint browser:true, devel:true */ + +(function( expose ) { + +/** + * class Markdown + * + * Markdown processing in Javascript done right. We have very particular views + * on what constitutes 'right' which include: + * + * - produces well-formed HTML (this means that em and strong nesting is + * important) + * + * - has an intermediate representation to allow processing of parsed data (We + * in fact have two, both as [JsonML]: a markdown tree and an HTML tree). + * + * - is easily extensible to add new dialects without having to rewrite the + * entire parsing mechanics + * + * - has a good test suite + * + * This implementation fulfills all of these (except that the test suite could + * do with expanding to automatically run all the fixtures from other Markdown + * implementations.) + * + * ##### Intermediate Representation + * + * *TODO* Talk about this :) Its JsonML, but document the node names we use. + * + * [JsonML]: http://jsonml.org/ "JSON Markup Language" + **/ + +var Markdown = expose.Markdown = function(dialect) { + switch (typeof dialect) { + case "undefined": + this.dialect = Markdown.dialects.Gruber; + break; + case "object": + this.dialect = dialect; + break; + default: + if ( dialect in Markdown.dialects ) { + this.dialect = Markdown.dialects[dialect]; + } + else { + throw new Error("Unknown Markdown dialect '" + String(dialect) + "'"); + } + break; + } + this.em_state = []; + this.strong_state = []; + this.debug_indent = ""; +}; + +/** + * parse( markdown, [dialect] ) -> JsonML + * - markdown (String): markdown string to parse + * - dialect (String | Dialect): the dialect to use, defaults to gruber + * + * Parse `markdown` and return a markdown document as a Markdown.JsonML tree. + **/ +expose.parse = function( source, dialect ) { + // dialect will default if undefined + var md = new Markdown( dialect ); + return md.toTree( source ); +}; + +/** + * toHTML( markdown, [dialect] ) -> String + * toHTML( md_tree ) -> String + * - markdown (String): markdown string to parse + * - md_tree (Markdown.JsonML): parsed markdown tree + * + * Take markdown (either as a string or as a JsonML tree) and run it through + * [[toHTMLTree]] then turn it into a well-formated HTML fragment. + **/ +expose.toHTML = function toHTML( source , dialect , options ) { + var input = expose.toHTMLTree( source , dialect , options ); + + return expose.renderJsonML( input ); +}; + +/** + * toHTMLTree( markdown, [dialect] ) -> JsonML + * toHTMLTree( md_tree ) -> JsonML + * - markdown (String): markdown string to parse + * - dialect (String | Dialect): the dialect to use, defaults to gruber + * - md_tree (Markdown.JsonML): parsed markdown tree + * + * Turn markdown into HTML, represented as a JsonML tree. If a string is given + * to this function, it is first parsed into a markdown tree by calling + * [[parse]]. + **/ +expose.toHTMLTree = function toHTMLTree( input, dialect , options ) { + // convert string input to an MD tree + if ( typeof input ==="string" ) input = this.parse( input, dialect ); + + // Now convert the MD tree to an HTML tree + + // remove references from the tree + var attrs = extract_attr( input ), + refs = {}; + + if ( attrs && attrs.references ) { + refs = attrs.references; + } + + var html = convert_tree_to_html( input, refs , options ); + merge_text_nodes( html ); + return html; +}; + +// For Spidermonkey based engines +function mk_block_toSource() { + return "Markdown.mk_block( " + + uneval(this.toString()) + + ", " + + uneval(this.trailing) + + ", " + + uneval(this.lineNumber) + + " )"; +} + +// node +function mk_block_inspect() { + var util = require("util"); + return "Markdown.mk_block( " + + util.inspect(this.toString()) + + ", " + + util.inspect(this.trailing) + + ", " + + util.inspect(this.lineNumber) + + " )"; + +} + +var mk_block = Markdown.mk_block = function(block, trail, line) { + // Be helpful for default case in tests. + if ( arguments.length == 1 ) trail = "\n\n"; + + var s = new String(block); + s.trailing = trail; + // To make it clear its not just a string + s.inspect = mk_block_inspect; + s.toSource = mk_block_toSource; + + if ( line != undefined ) + s.lineNumber = line; + + return s; +}; + +function count_lines( str ) { + var n = 0, i = -1; + while ( ( i = str.indexOf("\n", i + 1) ) !== -1 ) n++; + return n; +} + +// Internal - split source into rough blocks +Markdown.prototype.split_blocks = function splitBlocks( input, startLine ) { + input = input.replace(/(\r\n|\n|\r)/g, "\n"); + // [\s\S] matches _anything_ (newline or space) + // [^] is equivalent but doesn't work in IEs. + var re = /([\s\S]+?)($|\n#|\n(?:\s*\n|$)+)/g, + blocks = [], + m; + + var line_no = 1; + + if ( ( m = /^(\s*\n)/.exec(input) ) != null ) { + // skip (but count) leading blank lines + line_no += count_lines( m[0] ); + re.lastIndex = m[0].length; + } + + while ( ( m = re.exec(input) ) !== null ) { + if (m[2] == "\n#") { + m[2] = "\n"; + re.lastIndex--; + } + blocks.push( mk_block( m[1], m[2], line_no ) ); + line_no += count_lines( m[0] ); + } + + return blocks; +}; + +/** + * Markdown#processBlock( block, next ) -> undefined | [ JsonML, ... ] + * - block (String): the block to process + * - next (Array): the following blocks + * + * Process `block` and return an array of JsonML nodes representing `block`. + * + * It does this by asking each block level function in the dialect to process + * the block until one can. Succesful handling is indicated by returning an + * array (with zero or more JsonML nodes), failure by a false value. + * + * Blocks handlers are responsible for calling [[Markdown#processInline]] + * themselves as appropriate. + * + * If the blocks were split incorrectly or adjacent blocks need collapsing you + * can adjust `next` in place using shift/splice etc. + * + * If any of this default behaviour is not right for the dialect, you can + * define a `__call__` method on the dialect that will get invoked to handle + * the block processing. + */ +Markdown.prototype.processBlock = function processBlock( block, next ) { + var cbs = this.dialect.block, + ord = cbs.__order__; + + if ( "__call__" in cbs ) { + return cbs.__call__.call(this, block, next); + } + + for ( var i = 0; i < ord.length; i++ ) { + //D:this.debug( "Testing", ord[i] ); + var res = cbs[ ord[i] ].call( this, block, next ); + if ( res ) { + //D:this.debug(" matched"); + if ( !isArray(res) || ( res.length > 0 && !( isArray(res[0]) ) ) ) + this.debug(ord[i], "didn't return a proper array"); + //D:this.debug( "" ); + return res; + } + } + + // Uhoh! no match! Should we throw an error? + return []; +}; + +Markdown.prototype.processInline = function processInline( block ) { + return this.dialect.inline.__call__.call( this, String( block ) ); +}; + +/** + * Markdown#toTree( source ) -> JsonML + * - source (String): markdown source to parse + * + * Parse `source` into a JsonML tree representing the markdown document. + **/ +// custom_tree means set this.tree to `custom_tree` and restore old value on return +Markdown.prototype.toTree = function toTree( source, custom_root ) { + var blocks = source instanceof Array ? source : this.split_blocks( source ); + + // Make tree a member variable so its easier to mess with in extensions + var old_tree = this.tree; + try { + this.tree = custom_root || this.tree || [ "markdown" ]; + + blocks: + while ( blocks.length ) { + var b = this.processBlock( blocks.shift(), blocks ); + + // Reference blocks and the like won't return any content + if ( !b.length ) continue blocks; + + this.tree.push.apply( this.tree, b ); + } + return this.tree; + } + finally { + if ( custom_root ) { + this.tree = old_tree; + } + } +}; + +// Noop by default +Markdown.prototype.debug = function () { + var args = Array.prototype.slice.call( arguments); + args.unshift(this.debug_indent); + if ( typeof print !== "undefined" ) + print.apply( print, args ); + if ( typeof console !== "undefined" && typeof console.log !== "undefined" ) + console.log.apply( null, args ); +} + +Markdown.prototype.loop_re_over_block = function( re, block, cb ) { + // Dont use /g regexps with this + var m, + b = block.valueOf(); + + while ( b.length && (m = re.exec(b) ) != null ) { + b = b.substr( m[0].length ); + cb.call(this, m); + } + return b; +}; + +/** + * Markdown.dialects + * + * Namespace of built-in dialects. + **/ +Markdown.dialects = {}; + +/** + * Markdown.dialects.Gruber + * + * The default dialect that follows the rules set out by John Gruber's + * markdown.pl as closely as possible. Well actually we follow the behaviour of + * that script which in some places is not exactly what the syntax web page + * says. + **/ +Markdown.dialects.Gruber = { + block: { + atxHeader: function atxHeader( block, next ) { + var m = block.match( /^(#{1,6})\s*(.*?)\s*#*\s*(?:\n|$)/ ); + + if ( !m ) return undefined; + + var header = [ "header", { level: m[ 1 ].length } ]; + Array.prototype.push.apply(header, this.processInline(m[ 2 ])); + + if ( m[0].length < block.length ) + next.unshift( mk_block( block.substr( m[0].length ), block.trailing, block.lineNumber + 2 ) ); + + return [ header ]; + }, + + setextHeader: function setextHeader( block, next ) { + var m = block.match( /^(.*)\n([-=])\2\2+(?:\n|$)/ ); + + if ( !m ) return undefined; + + var level = ( m[ 2 ] === "=" ) ? 1 : 2; + var header = [ "header", { level : level }, m[ 1 ] ]; + + if ( m[0].length < block.length ) + next.unshift( mk_block( block.substr( m[0].length ), block.trailing, block.lineNumber + 2 ) ); + + return [ header ]; + }, + + code: function code( block, next ) { + // | Foo + // |bar + // should be a code block followed by a paragraph. Fun + // + // There might also be adjacent code block to merge. + + var ret = [], + re = /^(?: {0,3}\t| {4})(.*)\n?/, + lines; + + // 4 spaces + content + if ( !block.match( re ) ) return undefined; + + block_search: + do { + // Now pull out the rest of the lines + var b = this.loop_re_over_block( + re, block.valueOf(), function( m ) { ret.push( m[1] ); } ); + + if ( b.length ) { + // Case alluded to in first comment. push it back on as a new block + next.unshift( mk_block(b, block.trailing) ); + break block_search; + } + else if ( next.length ) { + // Check the next block - it might be code too + if ( !next[0].match( re ) ) break block_search; + + // Pull how how many blanks lines follow - minus two to account for .join + ret.push ( block.trailing.replace(/[^\n]/g, "").substring(2) ); + + block = next.shift(); + } + else { + break block_search; + } + } while ( true ); + + return [ [ "code_block", ret.join("\n") ] ]; + }, + + horizRule: function horizRule( block, next ) { + // this needs to find any hr in the block to handle abutting blocks + var m = block.match( /^(?:([\s\S]*?)\n)?[ \t]*([-_*])(?:[ \t]*\2){2,}[ \t]*(?:\n([\s\S]*))?$/ ); + + if ( !m ) { + return undefined; + } + + var jsonml = [ [ "hr" ] ]; + + // if there's a leading abutting block, process it + if ( m[ 1 ] ) { + jsonml.unshift.apply( jsonml, this.processBlock( m[ 1 ], [] ) ); + } + + // if there's a trailing abutting block, stick it into next + if ( m[ 3 ] ) { + next.unshift( mk_block( m[ 3 ] ) ); + } + + return jsonml; + }, + + // There are two types of lists. Tight and loose. Tight lists have no whitespace + // between the items (and result in text just in the
  • ) and loose lists, + // which have an empty line between list items, resulting in (one or more) + // paragraphs inside the
  • . + // + // There are all sorts weird edge cases about the original markdown.pl's + // handling of lists: + // + // * Nested lists are supposed to be indented by four chars per level. But + // if they aren't, you can get a nested list by indenting by less than + // four so long as the indent doesn't match an indent of an existing list + // item in the 'nest stack'. + // + // * The type of the list (bullet or number) is controlled just by the + // first item at the indent. Subsequent changes are ignored unless they + // are for nested lists + // + lists: (function( ) { + // Use a closure to hide a few variables. + var any_list = "[*+-]|\\d+\\.", + bullet_list = /[*+-]/, + number_list = /\d+\./, + // Capture leading indent as it matters for determining nested lists. + is_list_re = new RegExp( "^( {0,3})(" + any_list + ")[ \t]+" ), + indent_re = "(?: {0,3}\\t| {4})"; + + // TODO: Cache this regexp for certain depths. + // Create a regexp suitable for matching an li for a given stack depth + function regex_for_depth( depth ) { + + return new RegExp( + // m[1] = indent, m[2] = list_type + "(?:^(" + indent_re + "{0," + depth + "} {0,3})(" + any_list + ")\\s+)|" + + // m[3] = cont + "(^" + indent_re + "{0," + (depth-1) + "}[ ]{0,4})" + ); + } + function expand_tab( input ) { + return input.replace( / {0,3}\t/g, " " ); + } + + // Add inline content `inline` to `li`. inline comes from processInline + // so is an array of content + function add(li, loose, inline, nl) { + if ( loose ) { + li.push( [ "para" ].concat(inline) ); + return; + } + // Hmmm, should this be any block level element or just paras? + var add_to = li[li.length -1] instanceof Array && li[li.length - 1][0] == "para" + ? li[li.length -1] + : li; + + // If there is already some content in this list, add the new line in + if ( nl && li.length > 1 ) inline.unshift(nl); + + for ( var i = 0; i < inline.length; i++ ) { + var what = inline[i], + is_str = typeof what == "string"; + if ( is_str && add_to.length > 1 && typeof add_to[add_to.length-1] == "string" ) { + add_to[ add_to.length-1 ] += what; + } + else { + add_to.push( what ); + } + } + } + + // contained means have an indent greater than the current one. On + // *every* line in the block + function get_contained_blocks( depth, blocks ) { + + var re = new RegExp( "^(" + indent_re + "{" + depth + "}.*?\\n?)*$" ), + replace = new RegExp("^" + indent_re + "{" + depth + "}", "gm"), + ret = []; + + while ( blocks.length > 0 ) { + if ( re.exec( blocks[0] ) ) { + var b = blocks.shift(), + // Now remove that indent + x = b.replace( replace, ""); + + ret.push( mk_block( x, b.trailing, b.lineNumber ) ); + } + else { + break; + } + } + return ret; + } + + // passed to stack.forEach to turn list items up the stack into paras + function paragraphify(s, i, stack) { + var list = s.list; + var last_li = list[list.length-1]; + + if ( last_li[1] instanceof Array && last_li[1][0] == "para" ) { + return; + } + if ( i + 1 == stack.length ) { + // Last stack frame + // Keep the same array, but replace the contents + last_li.push( ["para"].concat( last_li.splice(1, last_li.length - 1) ) ); + } + else { + var sublist = last_li.pop(); + last_li.push( ["para"].concat( last_li.splice(1, last_li.length - 1) ), sublist ); + } + } + + // The matcher function + return function( block, next ) { + var m = block.match( is_list_re ); + if ( !m ) return undefined; + + function make_list( m ) { + var list = bullet_list.exec( m[2] ) + ? ["bulletlist"] + : ["numberlist"]; + + stack.push( { list: list, indent: m[1] } ); + return list; + } + + + var stack = [], // Stack of lists for nesting. + list = make_list( m ), + last_li, + loose = false, + ret = [ stack[0].list ], + i; + + // Loop to search over block looking for inner block elements and loose lists + loose_search: + while ( true ) { + // Split into lines preserving new lines at end of line + var lines = block.split( /(?=\n)/ ); + + // We have to grab all lines for a li and call processInline on them + // once as there are some inline things that can span lines. + var li_accumulate = ""; + + // Loop over the lines in this block looking for tight lists. + tight_search: + for ( var line_no = 0; line_no < lines.length; line_no++ ) { + var nl = "", + l = lines[line_no].replace(/^\n/, function(n) { nl = n; return ""; }); + + // TODO: really should cache this + var line_re = regex_for_depth( stack.length ); + + m = l.match( line_re ); + //print( "line:", uneval(l), "\nline match:", uneval(m) ); + + // We have a list item + if ( m[1] !== undefined ) { + // Process the previous list item, if any + if ( li_accumulate.length ) { + add( last_li, loose, this.processInline( li_accumulate ), nl ); + // Loose mode will have been dealt with. Reset it + loose = false; + li_accumulate = ""; + } + + m[1] = expand_tab( m[1] ); + var wanted_depth = Math.floor(m[1].length/4)+1; + //print( "want:", wanted_depth, "stack:", stack.length); + if ( wanted_depth > stack.length ) { + // Deep enough for a nested list outright + //print ( "new nested list" ); + list = make_list( m ); + last_li.push( list ); + last_li = list[1] = [ "listitem" ]; + } + else { + // We aren't deep enough to be strictly a new level. This is + // where Md.pl goes nuts. If the indent matches a level in the + // stack, put it there, else put it one deeper then the + // wanted_depth deserves. + var found = false; + for ( i = 0; i < stack.length; i++ ) { + if ( stack[ i ].indent != m[1] ) continue; + list = stack[ i ].list; + stack.splice( i+1, stack.length - (i+1) ); + found = true; + break; + } + + if (!found) { + //print("not found. l:", uneval(l)); + wanted_depth++; + if ( wanted_depth <= stack.length ) { + stack.splice(wanted_depth, stack.length - wanted_depth); + //print("Desired depth now", wanted_depth, "stack:", stack.length); + list = stack[wanted_depth-1].list; + //print("list:", uneval(list) ); + } + else { + //print ("made new stack for messy indent"); + list = make_list(m); + last_li.push(list); + } + } + + //print( uneval(list), "last", list === stack[stack.length-1].list ); + last_li = [ "listitem" ]; + list.push(last_li); + } // end depth of shenegains + nl = ""; + } + + // Add content + if ( l.length > m[0].length ) { + li_accumulate += nl + l.substr( m[0].length ); + } + } // tight_search + + if ( li_accumulate.length ) { + add( last_li, loose, this.processInline( li_accumulate ), nl ); + // Loose mode will have been dealt with. Reset it + loose = false; + li_accumulate = ""; + } + + // Look at the next block - we might have a loose list. Or an extra + // paragraph for the current li + var contained = get_contained_blocks( stack.length, next ); + + // Deal with code blocks or properly nested lists + if ( contained.length > 0 ) { + // Make sure all listitems up the stack are paragraphs + forEach( stack, paragraphify, this); + + last_li.push.apply( last_li, this.toTree( contained, [] ) ); + } + + var next_block = next[0] && next[0].valueOf() || ""; + + if ( next_block.match(is_list_re) || next_block.match( /^ / ) ) { + block = next.shift(); + + // Check for an HR following a list: features/lists/hr_abutting + var hr = this.dialect.block.horizRule( block, next ); + + if ( hr ) { + ret.push.apply(ret, hr); + break; + } + + // Make sure all listitems up the stack are paragraphs + forEach( stack, paragraphify, this); + + loose = true; + continue loose_search; + } + break; + } // loose_search + + return ret; + }; + })(), + + blockquote: function blockquote( block, next ) { + if ( !block.match( /^>/m ) ) + return undefined; + + var jsonml = []; + + // separate out the leading abutting block, if any. I.e. in this case: + // + // a + // > b + // + if ( block[ 0 ] != ">" ) { + var lines = block.split( /\n/ ), + prev = [], + line_no = block.lineNumber; + + // keep shifting lines until you find a crotchet + while ( lines.length && lines[ 0 ][ 0 ] != ">" ) { + prev.push( lines.shift() ); + line_no++; + } + + var abutting = mk_block( prev.join( "\n" ), "\n", block.lineNumber ); + jsonml.push.apply( jsonml, this.processBlock( abutting, [] ) ); + // reassemble new block of just block quotes! + block = mk_block( lines.join( "\n" ), block.trailing, line_no ); + } + + + // if the next block is also a blockquote merge it in + while ( next.length && next[ 0 ][ 0 ] == ">" ) { + var b = next.shift(); + block = mk_block( block + block.trailing + b, b.trailing, block.lineNumber ); + } + + // Strip off the leading "> " and re-process as a block. + var input = block.replace( /^> ?/gm, "" ), + old_tree = this.tree, + processedBlock = this.toTree( input, [ "blockquote" ] ), + attr = extract_attr( processedBlock ); + + // If any link references were found get rid of them + if ( attr && attr.references ) { + delete attr.references; + // And then remove the attribute object if it's empty + if ( isEmpty( attr ) ) { + processedBlock.splice( 1, 1 ); + } + } + + jsonml.push( processedBlock ); + return jsonml; + }, + + referenceDefn: function referenceDefn( block, next) { + var re = /^\s*\[(.*?)\]:\s*(\S+)(?:\s+(?:(['"])(.*?)\3|\((.*?)\)))?\n?/; + // interesting matches are [ , ref_id, url, , title, title ] + + if ( !block.match(re) ) + return undefined; + + // make an attribute node if it doesn't exist + if ( !extract_attr( this.tree ) ) { + this.tree.splice( 1, 0, {} ); + } + + var attrs = extract_attr( this.tree ); + + // make a references hash if it doesn't exist + if ( attrs.references === undefined ) { + attrs.references = {}; + } + + var b = this.loop_re_over_block(re, block, function( m ) { + + if ( m[2] && m[2][0] == "<" && m[2][m[2].length-1] == ">" ) + m[2] = m[2].substring( 1, m[2].length - 1 ); + + var ref = attrs.references[ m[1].toLowerCase() ] = { + href: m[2] + }; + + if ( m[4] !== undefined ) + ref.title = m[4]; + else if ( m[5] !== undefined ) + ref.title = m[5]; + + } ); + + if ( b.length ) + next.unshift( mk_block( b, block.trailing ) ); + + return []; + }, + + para: function para( block, next ) { + // everything's a para! + return [ ["para"].concat( this.processInline( block ) ) ]; + } + } +}; + +Markdown.dialects.Gruber.inline = { + + __oneElement__: function oneElement( text, patterns_or_re, previous_nodes ) { + var m, + res, + lastIndex = 0; + + patterns_or_re = patterns_or_re || this.dialect.inline.__patterns__; + var re = new RegExp( "([\\s\\S]*?)(" + (patterns_or_re.source || patterns_or_re) + ")" ); + + m = re.exec( text ); + if (!m) { + // Just boring text + return [ text.length, text ]; + } + else if ( m[1] ) { + // Some un-interesting text matched. Return that first + return [ m[1].length, m[1] ]; + } + + var res; + if ( m[2] in this.dialect.inline ) { + res = this.dialect.inline[ m[2] ].call( + this, + text.substr( m.index ), m, previous_nodes || [] ); + } + // Default for now to make dev easier. just slurp special and output it. + res = res || [ m[2].length, m[2] ]; + return res; + }, + + __call__: function inline( text, patterns ) { + + var out = [], + res; + + function add(x) { + //D:self.debug(" adding output", uneval(x)); + if ( typeof x == "string" && typeof out[out.length-1] == "string" ) + out[ out.length-1 ] += x; + else + out.push(x); + } + + while ( text.length > 0 ) { + res = this.dialect.inline.__oneElement__.call(this, text, patterns, out ); + text = text.substr( res.shift() ); + forEach(res, add ) + } + + return out; + }, + + // These characters are intersting elsewhere, so have rules for them so that + // chunks of plain text blocks don't include them + "]": function () {}, + "}": function () {}, + + __escape__ : /^\\[\\`\*_{}\[\]()#\+.!\-]/, + + "\\": function escaped( text ) { + // [ length of input processed, node/children to add... ] + // Only esacape: \ ` * _ { } [ ] ( ) # * + - . ! + if ( this.dialect.inline.__escape__.exec( text ) ) + return [ 2, text.charAt( 1 ) ]; + else + // Not an esacpe + return [ 1, "\\" ]; + }, + + "![": function image( text ) { + + // Unlike images, alt text is plain text only. no other elements are + // allowed in there + + // ![Alt text](/path/to/img.jpg "Optional title") + // 1 2 3 4 <--- captures + var m = text.match( /^!\[(.*?)\][ \t]*\([ \t]*([^")]*?)(?:[ \t]+(["'])(.*?)\3)?[ \t]*\)/ ); + + if ( m ) { + if ( m[2] && m[2][0] == "<" && m[2][m[2].length-1] == ">" ) + m[2] = m[2].substring( 1, m[2].length - 1 ); + + m[2] = this.dialect.inline.__call__.call( this, m[2], /\\/ )[0]; + + var attrs = { alt: m[1], href: m[2] || "" }; + if ( m[4] !== undefined) + attrs.title = m[4]; + + return [ m[0].length, [ "img", attrs ] ]; + } + + // ![Alt text][id] + m = text.match( /^!\[(.*?)\][ \t]*\[(.*?)\]/ ); + + if ( m ) { + // We can't check if the reference is known here as it likely wont be + // found till after. Check it in md tree->hmtl tree conversion + return [ m[0].length, [ "img_ref", { alt: m[1], ref: m[2].toLowerCase(), original: m[0] } ] ]; + } + + // Just consume the '![' + return [ 2, "![" ]; + }, + + "[": function link( text ) { + + var orig = String(text); + // Inline content is possible inside `link text` + var res = Markdown.DialectHelpers.inline_until_char.call( this, text.substr(1), "]" ); + + // No closing ']' found. Just consume the [ + if ( !res ) return [ 1, "[" ]; + + var consumed = 1 + res[ 0 ], + children = res[ 1 ], + link, + attrs; + + // At this point the first [...] has been parsed. See what follows to find + // out which kind of link we are (reference or direct url) + text = text.substr( consumed ); + + // [link text](/path/to/img.jpg "Optional title") + // 1 2 3 <--- captures + // This will capture up to the last paren in the block. We then pull + // back based on if there a matching ones in the url + // ([here](/url/(test)) + // The parens have to be balanced + var m = text.match( /^\s*\([ \t]*([^"']*)(?:[ \t]+(["'])(.*?)\2)?[ \t]*\)/ ); + if ( m ) { + var url = m[1]; + consumed += m[0].length; + + if ( url && url[0] == "<" && url[url.length-1] == ">" ) + url = url.substring( 1, url.length - 1 ); + + // If there is a title we don't have to worry about parens in the url + if ( !m[3] ) { + var open_parens = 1; // One open that isn't in the capture + for ( var len = 0; len < url.length; len++ ) { + switch ( url[len] ) { + case "(": + open_parens++; + break; + case ")": + if ( --open_parens == 0) { + consumed -= url.length - len; + url = url.substring(0, len); + } + break; + } + } + } + + // Process escapes only + url = this.dialect.inline.__call__.call( this, url, /\\/ )[0]; + + attrs = { href: url || "" }; + if ( m[3] !== undefined) + attrs.title = m[3]; + + link = [ "link", attrs ].concat( children ); + return [ consumed, link ]; + } + + // [Alt text][id] + // [Alt text] [id] + m = text.match( /^\s*\[(.*?)\]/ ); + + if ( m ) { + + consumed += m[ 0 ].length; + + // [links][] uses links as its reference + attrs = { ref: ( m[ 1 ] || String(children) ).toLowerCase(), original: orig.substr( 0, consumed ) }; + + link = [ "link_ref", attrs ].concat( children ); + + // We can't check if the reference is known here as it likely wont be + // found till after. Check it in md tree->hmtl tree conversion. + // Store the original so that conversion can revert if the ref isn't found. + return [ consumed, link ]; + } + + // [id] + // Only if id is plain (no formatting.) + if ( children.length == 1 && typeof children[0] == "string" ) { + + attrs = { ref: children[0].toLowerCase(), original: orig.substr( 0, consumed ) }; + link = [ "link_ref", attrs, children[0] ]; + return [ consumed, link ]; + } + + // Just consume the "[" + return [ 1, "[" ]; + }, + + + "<": function autoLink( text ) { + var m; + + if ( ( m = text.match( /^<(?:((https?|ftp|mailto):[^>]+)|(.*?@.*?\.[a-zA-Z]+))>/ ) ) != null ) { + if ( m[3] ) { + return [ m[0].length, [ "link", { href: "mailto:" + m[3] }, m[3] ] ]; + + } + else if ( m[2] == "mailto" ) { + return [ m[0].length, [ "link", { href: m[1] }, m[1].substr("mailto:".length ) ] ]; + } + else + return [ m[0].length, [ "link", { href: m[1] }, m[1] ] ]; + } + + return [ 1, "<" ]; + }, + + "`": function inlineCode( text ) { + // Inline code block. as many backticks as you like to start it + // Always skip over the opening ticks. + var m = text.match( /(`+)(([\s\S]*?)\1)/ ); + + if ( m && m[2] ) + return [ m[1].length + m[2].length, [ "inlinecode", m[3] ] ]; + else { + // TODO: No matching end code found - warn! + return [ 1, "`" ]; + } + }, + + " \n": function lineBreak( text ) { + return [ 3, [ "linebreak" ] ]; + } + +}; + +// Meta Helper/generator method for em and strong handling +function strong_em( tag, md ) { + + var state_slot = tag + "_state", + other_slot = tag == "strong" ? "em_state" : "strong_state"; + + function CloseTag(len) { + this.len_after = len; + this.name = "close_" + md; + } + + return function ( text, orig_match ) { + + if ( this[state_slot][0] == md ) { + // Most recent em is of this type + //D:this.debug("closing", md); + this[state_slot].shift(); + + // "Consume" everything to go back to the recrusion in the else-block below + return[ text.length, new CloseTag(text.length-md.length) ]; + } + else { + // Store a clone of the em/strong states + var other = this[other_slot].slice(), + state = this[state_slot].slice(); + + this[state_slot].unshift(md); + + //D:this.debug_indent += " "; + + // Recurse + var res = this.processInline( text.substr( md.length ) ); + //D:this.debug_indent = this.debug_indent.substr(2); + + var last = res[res.length - 1]; + + //D:this.debug("processInline from", tag + ": ", uneval( res ) ); + + var check = this[state_slot].shift(); + if ( last instanceof CloseTag ) { + res.pop(); + // We matched! Huzzah. + var consumed = text.length - last.len_after; + return [ consumed, [ tag ].concat(res) ]; + } + else { + // Restore the state of the other kind. We might have mistakenly closed it. + this[other_slot] = other; + this[state_slot] = state; + + // We can't reuse the processed result as it could have wrong parsing contexts in it. + return [ md.length, md ]; + } + } + }; // End returned function +} + +Markdown.dialects.Gruber.inline["**"] = strong_em("strong", "**"); +Markdown.dialects.Gruber.inline["__"] = strong_em("strong", "__"); +Markdown.dialects.Gruber.inline["*"] = strong_em("em", "*"); +Markdown.dialects.Gruber.inline["_"] = strong_em("em", "_"); + + +// Build default order from insertion order. +Markdown.buildBlockOrder = function(d) { + var ord = []; + for ( var i in d ) { + if ( i == "__order__" || i == "__call__" ) continue; + ord.push( i ); + } + d.__order__ = ord; +}; + +// Build patterns for inline matcher +Markdown.buildInlinePatterns = function(d) { + var patterns = []; + + for ( var i in d ) { + // __foo__ is reserved and not a pattern + if ( i.match( /^__.*__$/) ) continue; + var l = i.replace( /([\\.*+?|()\[\]{}])/g, "\\$1" ) + .replace( /\n/, "\\n" ); + patterns.push( i.length == 1 ? l : "(?:" + l + ")" ); + } + + patterns = patterns.join("|"); + d.__patterns__ = patterns; + //print("patterns:", uneval( patterns ) ); + + var fn = d.__call__; + d.__call__ = function(text, pattern) { + if ( pattern != undefined ) { + return fn.call(this, text, pattern); + } + else + { + return fn.call(this, text, patterns); + } + }; +}; + +Markdown.DialectHelpers = {}; +Markdown.DialectHelpers.inline_until_char = function( text, want ) { + var consumed = 0, + nodes = []; + + while ( true ) { + if ( text.charAt( consumed ) == want ) { + // Found the character we were looking for + consumed++; + return [ consumed, nodes ]; + } + + if ( consumed >= text.length ) { + // No closing char found. Abort. + return null; + } + + var res = this.dialect.inline.__oneElement__.call(this, text.substr( consumed ) ); + consumed += res[ 0 ]; + // Add any returned nodes. + nodes.push.apply( nodes, res.slice( 1 ) ); + } +} + +// Helper function to make sub-classing a dialect easier +Markdown.subclassDialect = function( d ) { + function Block() {} + Block.prototype = d.block; + function Inline() {} + Inline.prototype = d.inline; + + return { block: new Block(), inline: new Inline() }; +}; + +Markdown.buildBlockOrder ( Markdown.dialects.Gruber.block ); +Markdown.buildInlinePatterns( Markdown.dialects.Gruber.inline ); + +var isArray = Array.isArray || function(obj) { + return Object.prototype.toString.call(obj) == "[object Array]"; +}; + +var forEach; +// Don't mess with Array.prototype. Its not friendly +if ( Array.prototype.forEach ) { + forEach = function( arr, cb, thisp ) { + return arr.forEach( cb, thisp ); + }; +} +else { + forEach = function(arr, cb, thisp) { + for (var i = 0; i < arr.length; i++) { + cb.call(thisp || arr, arr[i], i, arr); + } + } +} + +var isEmpty = function( obj ) { + for ( var key in obj ) { + if ( hasOwnProperty.call( obj, key ) ) { + return false; + } + } + + return true; +} + +function extract_attr( jsonml ) { + return isArray(jsonml) + && jsonml.length > 1 + && typeof jsonml[ 1 ] === "object" + && !( isArray(jsonml[ 1 ]) ) + ? jsonml[ 1 ] + : undefined; +} + + + +/** + * renderJsonML( jsonml[, options] ) -> String + * - jsonml (Array): JsonML array to render to XML + * - options (Object): options + * + * Converts the given JsonML into well-formed XML. + * + * The options currently understood are: + * + * - root (Boolean): wether or not the root node should be included in the + * output, or just its children. The default `false` is to not include the + * root itself. + */ +expose.renderJsonML = function( jsonml, options ) { + options = options || {}; + // include the root element in the rendered output? + options.root = options.root || false; + + var content = []; + + if ( options.root ) { + content.push( render_tree( jsonml ) ); + } + else { + jsonml.shift(); // get rid of the tag + if ( jsonml.length && typeof jsonml[ 0 ] === "object" && !( jsonml[ 0 ] instanceof Array ) ) { + jsonml.shift(); // get rid of the attributes + } + + while ( jsonml.length ) { + content.push( render_tree( jsonml.shift() ) ); + } + } + + return content.join( "\n\n" ); +}; + +function escapeHTML( text ) { + return text.replace( /&/g, "&" ) + .replace( //g, ">" ) + .replace( /"/g, """ ) + .replace( /'/g, "'" ); +} + +function render_tree( jsonml ) { + // basic case + if ( typeof jsonml === "string" ) { + return jsonml; + } + + var tag = jsonml.shift(), + attributes = {}, + content = []; + + if ( jsonml.length && typeof jsonml[ 0 ] === "object" && !( jsonml[ 0 ] instanceof Array ) ) { + attributes = jsonml.shift(); + } + + while ( jsonml.length ) { + content.push( render_tree( jsonml.shift() ) ); + } + + var tag_attrs = ""; + for ( var a in attributes ) { + tag_attrs += " " + a + '="' + escapeHTML( attributes[ a ] ) + '"'; + } + + // be careful about adding whitespace here for inline elements + if ( tag == "img" || tag == "br" || tag == "hr" ) { + return "<"+ tag + tag_attrs + "/>"; + } + else { + return "<"+ tag + tag_attrs + ">" + content.join( "" ) + ""; + } +} + +function convert_tree_to_html( tree, references, options ) { + var i; + options = options || {}; + + // shallow clone + var jsonml = tree.slice( 0 ); + + if ( typeof options.preprocessTreeNode === "function" ) { + jsonml = options.preprocessTreeNode(jsonml, references); + } + + // Clone attributes if they exist + var attrs = extract_attr( jsonml ); + if ( attrs ) { + jsonml[ 1 ] = {}; + for ( i in attrs ) { + jsonml[ 1 ][ i ] = attrs[ i ]; + } + attrs = jsonml[ 1 ]; + } + + // basic case + if ( typeof jsonml === "string" ) { + return jsonml; + } + + // convert this node + switch ( jsonml[ 0 ] ) { + case "header": + jsonml[ 0 ] = "h" + jsonml[ 1 ].level; + delete jsonml[ 1 ].level; + break; + case "bulletlist": + jsonml[ 0 ] = "ul"; + break; + case "numberlist": + jsonml[ 0 ] = "ol"; + break; + case "listitem": + jsonml[ 0 ] = "li"; + break; + case "para": + jsonml[ 0 ] = "p"; + break; + case "markdown": + jsonml[ 0 ] = "html"; + if ( attrs ) delete attrs.references; + break; + case "code_block": + jsonml[ 0 ] = "pre"; + i = attrs ? 2 : 1; + var code = [ "code" ]; + code.push.apply( code, jsonml.splice( i, jsonml.length - i ) ); + jsonml[ i ] = code; + break; + case "inlinecode": + jsonml[ 0 ] = "code"; + break; + case "img": + jsonml[ 1 ].src = jsonml[ 1 ].href; + delete jsonml[ 1 ].href; + break; + case "linebreak": + jsonml[ 0 ] = "br"; + break; + case "link": + jsonml[ 0 ] = "a"; + break; + case "link_ref": + jsonml[ 0 ] = "a"; + + // grab this ref and clean up the attribute node + var ref = references[ attrs.ref ]; + + // if the reference exists, make the link + if ( ref ) { + delete attrs.ref; + + // add in the href and title, if present + attrs.href = ref.href; + if ( ref.title ) { + attrs.title = ref.title; + } + + // get rid of the unneeded original text + delete attrs.original; + } + // the reference doesn't exist, so revert to plain text + else { + return attrs.original; + } + break; + case "img_ref": + jsonml[ 0 ] = "img"; + + // grab this ref and clean up the attribute node + var ref = references[ attrs.ref ]; + + // if the reference exists, make the link + if ( ref ) { + delete attrs.ref; + + // add in the href and title, if present + attrs.src = ref.href; + if ( ref.title ) { + attrs.title = ref.title; + } + + // get rid of the unneeded original text + delete attrs.original; + } + // the reference doesn't exist, so revert to plain text + else { + return attrs.original; + } + break; + } + + // convert all the children + i = 1; + + // deal with the attribute node, if it exists + if ( attrs ) { + // if there are keys, skip over it + for ( var key in jsonml[ 1 ] ) { + i = 2; + break; + } + // if there aren't, remove it + if ( i === 1 ) { + jsonml.splice( i, 1 ); + } + } + + for ( ; i < jsonml.length; ++i ) { + jsonml[ i ] = convert_tree_to_html( jsonml[ i ], references, options ); + } + + return jsonml; +} + + +// merges adjacent text nodes into a single node +function merge_text_nodes( jsonml ) { + // skip the tag name and attribute hash + var i = extract_attr( jsonml ) ? 2 : 1; + + while ( i < jsonml.length ) { + // if it's a string check the next item too + if ( typeof jsonml[ i ] === "string" ) { + if ( i + 1 < jsonml.length && typeof jsonml[ i + 1 ] === "string" ) { + // merge the second string into the first and remove it + jsonml[ i ] += jsonml.splice( i + 1, 1 )[ 0 ]; + } + else { + ++i; + } + } + // if it's not a string recurse + else { + merge_text_nodes( jsonml[ i ] ); + ++i; + } + } +} + +} )( (function() { + if ( typeof exports === "undefined" ) { + window.BetterMarkdown = {}; + return window.BetterMarkdown; + } + else { + return exports; + } +} )() ); diff --git a/app/assets/javascripts/external/twitter-text-1.5.0.js b/app/assets/javascripts/external/twitter-text-1.5.0.js deleted file mode 100644 index 4822276ed3b..00000000000 --- a/app/assets/javascripts/external/twitter-text-1.5.0.js +++ /dev/null @@ -1,1294 +0,0 @@ -/*! - * twitter-text-js 1.5.0 - * - * Copyright 2011 Twitter, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this work except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - */ -if (typeof window === "undefined" || window === null) { - window = { twttr: {} }; -} -if (window.twttr == null) { - window.twttr = {}; -} -if (typeof twttr === "undefined" || twttr === null) { - twttr = {}; -} - -(function() { - twttr.txt = {}; - twttr.txt.regexen = {}; - - var HTML_ENTITIES = { - '&': '&', - '>': '>', - '<': '<', - '"': '"', - "'": ''' - }; - - // HTML escaping - twttr.txt.htmlEscape = function(text) { - return text && text.replace(/[&"'><]/g, function(character) { - return HTML_ENTITIES[character]; - }); - }; - - // Builds a RegExp - function regexSupplant(regex, flags) { - flags = flags || ""; - if (typeof regex !== "string") { - if (regex.global && flags.indexOf("g") < 0) { - flags += "g"; - } - if (regex.ignoreCase && flags.indexOf("i") < 0) { - flags += "i"; - } - if (regex.multiline && flags.indexOf("m") < 0) { - flags += "m"; - } - - regex = regex.source; - } - - return new RegExp(regex.replace(/#\{(\w+)\}/g, function(match, name) { - var newRegex = twttr.txt.regexen[name] || ""; - if (typeof newRegex !== "string") { - newRegex = newRegex.source; - } - return newRegex; - }), flags); - } - - twttr.txt.regexSupplant = regexSupplant; - - // simple string interpolation - function stringSupplant(str, values) { - return str.replace(/#\{(\w+)\}/g, function(match, name) { - return values[name] || ""; - }); - } - - twttr.txt.stringSupplant = stringSupplant; - - function addCharsToCharClass(charClass, start, end) { - var s = String.fromCharCode(start); - if (end !== start) { - s += "-" + String.fromCharCode(end); - } - charClass.push(s); - return charClass; - } - - twttr.txt.addCharsToCharClass = addCharsToCharClass; - - // Space is more than %20, U+3000 for example is the full-width space used with Kanji. Provide a short-hand - // to access both the list of characters and a pattern suitible for use with String#split - // Taken from: ActiveSupport::Multibyte::Handlers::UTF8Handler::UNICODE_WHITESPACE - var fromCode = String.fromCharCode; - var UNICODE_SPACES = [ - fromCode(0x0020), // White_Space # Zs SPACE - fromCode(0x0085), // White_Space # Cc - fromCode(0x00A0), // White_Space # Zs NO-BREAK SPACE - fromCode(0x1680), // White_Space # Zs OGHAM SPACE MARK - fromCode(0x180E), // White_Space # Zs MONGOLIAN VOWEL SEPARATOR - fromCode(0x2028), // White_Space # Zl LINE SEPARATOR - fromCode(0x2029), // White_Space # Zp PARAGRAPH SEPARATOR - fromCode(0x202F), // White_Space # Zs NARROW NO-BREAK SPACE - fromCode(0x205F), // White_Space # Zs MEDIUM MATHEMATICAL SPACE - fromCode(0x3000) // White_Space # Zs IDEOGRAPHIC SPACE - ]; - addCharsToCharClass(UNICODE_SPACES, 0x009, 0x00D); // White_Space # Cc [5] .. - addCharsToCharClass(UNICODE_SPACES, 0x2000, 0x200A); // White_Space # Zs [11] EN QUAD..HAIR SPACE - - var INVALID_CHARS = [ - fromCode(0xFFFE), - fromCode(0xFEFF), // BOM - fromCode(0xFFFF) // Special - ]; - addCharsToCharClass(INVALID_CHARS, 0x202A, 0x202E); // Directional change - - twttr.txt.regexen.spaces_group = regexSupplant(UNICODE_SPACES.join("")); - twttr.txt.regexen.spaces = regexSupplant("[" + UNICODE_SPACES.join("") + "]"); - twttr.txt.regexen.invalid_chars_group = regexSupplant(INVALID_CHARS.join("")); - twttr.txt.regexen.punct = /\!'#%&'\(\)*\+,\\\-\.\/:;<=>\?@\[\]\^_{|}~\$/; - - var nonLatinHashtagChars = []; - // Cyrillic - addCharsToCharClass(nonLatinHashtagChars, 0x0400, 0x04ff); // Cyrillic - addCharsToCharClass(nonLatinHashtagChars, 0x0500, 0x0527); // Cyrillic Supplement - addCharsToCharClass(nonLatinHashtagChars, 0x2de0, 0x2dff); // Cyrillic Extended A - addCharsToCharClass(nonLatinHashtagChars, 0xa640, 0xa69f); // Cyrillic Extended B - // Hebrew - addCharsToCharClass(nonLatinHashtagChars, 0x0591, 0x05bf); // Hebrew - addCharsToCharClass(nonLatinHashtagChars, 0x05c1, 0x05c2); - addCharsToCharClass(nonLatinHashtagChars, 0x05c4, 0x05c5); - addCharsToCharClass(nonLatinHashtagChars, 0x05c7, 0x05c7); - addCharsToCharClass(nonLatinHashtagChars, 0x05d0, 0x05ea); - addCharsToCharClass(nonLatinHashtagChars, 0x05f0, 0x05f4); - addCharsToCharClass(nonLatinHashtagChars, 0xfb12, 0xfb28); // Hebrew Presentation Forms - addCharsToCharClass(nonLatinHashtagChars, 0xfb2a, 0xfb36); - addCharsToCharClass(nonLatinHashtagChars, 0xfb38, 0xfb3c); - addCharsToCharClass(nonLatinHashtagChars, 0xfb3e, 0xfb3e); - addCharsToCharClass(nonLatinHashtagChars, 0xfb40, 0xfb41); - addCharsToCharClass(nonLatinHashtagChars, 0xfb43, 0xfb44); - addCharsToCharClass(nonLatinHashtagChars, 0xfb46, 0xfb4f); - // Arabic - addCharsToCharClass(nonLatinHashtagChars, 0x0610, 0x061a); // Arabic - addCharsToCharClass(nonLatinHashtagChars, 0x0620, 0x065f); - addCharsToCharClass(nonLatinHashtagChars, 0x066e, 0x06d3); - addCharsToCharClass(nonLatinHashtagChars, 0x06d5, 0x06dc); - addCharsToCharClass(nonLatinHashtagChars, 0x06de, 0x06e8); - addCharsToCharClass(nonLatinHashtagChars, 0x06ea, 0x06ef); - addCharsToCharClass(nonLatinHashtagChars, 0x06fa, 0x06fc); - addCharsToCharClass(nonLatinHashtagChars, 0x06ff, 0x06ff); - addCharsToCharClass(nonLatinHashtagChars, 0x0750, 0x077f); // Arabic Supplement - addCharsToCharClass(nonLatinHashtagChars, 0x08a0, 0x08a0); // Arabic Extended A - addCharsToCharClass(nonLatinHashtagChars, 0x08a2, 0x08ac); - addCharsToCharClass(nonLatinHashtagChars, 0x08e4, 0x08fe); - addCharsToCharClass(nonLatinHashtagChars, 0xfb50, 0xfbb1); // Arabic Pres. Forms A - addCharsToCharClass(nonLatinHashtagChars, 0xfbd3, 0xfd3d); - addCharsToCharClass(nonLatinHashtagChars, 0xfd50, 0xfd8f); - addCharsToCharClass(nonLatinHashtagChars, 0xfd92, 0xfdc7); - addCharsToCharClass(nonLatinHashtagChars, 0xfdf0, 0xfdfb); - addCharsToCharClass(nonLatinHashtagChars, 0xfe70, 0xfe74); // Arabic Pres. Forms B - addCharsToCharClass(nonLatinHashtagChars, 0xfe76, 0xfefc); - addCharsToCharClass(nonLatinHashtagChars, 0x200c, 0x200c); // Zero-Width Non-Joiner - // Thai - addCharsToCharClass(nonLatinHashtagChars, 0x0e01, 0x0e3a); - addCharsToCharClass(nonLatinHashtagChars, 0x0e40, 0x0e4e); - // Hangul (Korean) - addCharsToCharClass(nonLatinHashtagChars, 0x1100, 0x11ff); // Hangul Jamo - addCharsToCharClass(nonLatinHashtagChars, 0x3130, 0x3185); // Hangul Compatibility Jamo - addCharsToCharClass(nonLatinHashtagChars, 0xA960, 0xA97F); // Hangul Jamo Extended-A - addCharsToCharClass(nonLatinHashtagChars, 0xAC00, 0xD7AF); // Hangul Syllables - addCharsToCharClass(nonLatinHashtagChars, 0xD7B0, 0xD7FF); // Hangul Jamo Extended-B - addCharsToCharClass(nonLatinHashtagChars, 0xFFA1, 0xFFDC); // half-width Hangul - // Japanese and Chinese - addCharsToCharClass(nonLatinHashtagChars, 0x30A1, 0x30FA); // Katakana (full-width) - addCharsToCharClass(nonLatinHashtagChars, 0x30FC, 0x30FE); // Katakana Chouon and iteration marks (full-width) - addCharsToCharClass(nonLatinHashtagChars, 0xFF66, 0xFF9F); // Katakana (half-width) - addCharsToCharClass(nonLatinHashtagChars, 0xFF70, 0xFF70); // Katakana Chouon (half-width) - addCharsToCharClass(nonLatinHashtagChars, 0xFF10, 0xFF19); // \ - addCharsToCharClass(nonLatinHashtagChars, 0xFF21, 0xFF3A); // - Latin (full-width) - addCharsToCharClass(nonLatinHashtagChars, 0xFF41, 0xFF5A); // / - addCharsToCharClass(nonLatinHashtagChars, 0x3041, 0x3096); // Hiragana - addCharsToCharClass(nonLatinHashtagChars, 0x3099, 0x309E); // Hiragana voicing and iteration mark - addCharsToCharClass(nonLatinHashtagChars, 0x3400, 0x4DBF); // Kanji (CJK Extension A) - addCharsToCharClass(nonLatinHashtagChars, 0x4E00, 0x9FFF); // Kanji (Unified) - // -- Disabled as it breaks the Regex. - //addCharsToCharClass(nonLatinHashtagChars, 0x20000, 0x2A6DF); // Kanji (CJK Extension B) - addCharsToCharClass(nonLatinHashtagChars, 0x2A700, 0x2B73F); // Kanji (CJK Extension C) - addCharsToCharClass(nonLatinHashtagChars, 0x2B740, 0x2B81F); // Kanji (CJK Extension D) - addCharsToCharClass(nonLatinHashtagChars, 0x2F800, 0x2FA1F); // Kanji (CJK supplement) - addCharsToCharClass(nonLatinHashtagChars, 0x3003, 0x3003); // Kanji iteration mark - addCharsToCharClass(nonLatinHashtagChars, 0x3005, 0x3005); // Kanji iteration mark - addCharsToCharClass(nonLatinHashtagChars, 0x303B, 0x303B); // Han iteration mark - - twttr.txt.regexen.nonLatinHashtagChars = regexSupplant(nonLatinHashtagChars.join("")); - - var latinAccentChars = []; - // Latin accented characters (subtracted 0xD7 from the range, it's a confusable multiplication sign. Looks like "x") - addCharsToCharClass(latinAccentChars, 0x00c0, 0x00d6); - addCharsToCharClass(latinAccentChars, 0x00d8, 0x00f6); - addCharsToCharClass(latinAccentChars, 0x00f8, 0x00ff); - // Latin Extended A and B - addCharsToCharClass(latinAccentChars, 0x0100, 0x024f); - // assorted IPA Extensions - addCharsToCharClass(latinAccentChars, 0x0253, 0x0254); - addCharsToCharClass(latinAccentChars, 0x0256, 0x0257); - addCharsToCharClass(latinAccentChars, 0x0259, 0x0259); - addCharsToCharClass(latinAccentChars, 0x025b, 0x025b); - addCharsToCharClass(latinAccentChars, 0x0263, 0x0263); - addCharsToCharClass(latinAccentChars, 0x0268, 0x0268); - addCharsToCharClass(latinAccentChars, 0x026f, 0x026f); - addCharsToCharClass(latinAccentChars, 0x0272, 0x0272); - addCharsToCharClass(latinAccentChars, 0x0289, 0x0289); - addCharsToCharClass(latinAccentChars, 0x028b, 0x028b); - // Okina for Hawaiian (it *is* a letter character) - addCharsToCharClass(latinAccentChars, 0x02bb, 0x02bb); - // Combining diacritics - addCharsToCharClass(latinAccentChars, 0x0300, 0x036f); - // Latin Extended Additional - addCharsToCharClass(latinAccentChars, 0x1e00, 0x1eff); - twttr.txt.regexen.latinAccentChars = regexSupplant(latinAccentChars.join("")); - - // A hashtag must contain characters, numbers and underscores, but not all numbers. - twttr.txt.regexen.hashSigns = /[##]/; - twttr.txt.regexen.hashtagAlpha = regexSupplant(/[a-z_#{latinAccentChars}#{nonLatinHashtagChars}]/i); - twttr.txt.regexen.hashtagAlphaNumeric = regexSupplant(/[a-z0-9_#{latinAccentChars}#{nonLatinHashtagChars}]/i); - twttr.txt.regexen.endHashtagMatch = regexSupplant(/^(?:#{hashSigns}|:\/\/)/); - twttr.txt.regexen.hashtagBoundary = regexSupplant(/(?:^|$|[^&a-z0-9_#{latinAccentChars}#{nonLatinHashtagChars}])/); - twttr.txt.regexen.validHashtag = regexSupplant(/(#{hashtagBoundary})(#{hashSigns})(#{hashtagAlphaNumeric}*#{hashtagAlpha}#{hashtagAlphaNumeric}*)/gi); - - // Mention related regex collection - twttr.txt.regexen.validMentionPrecedingChars = /(?:^|[^a-zA-Z0-9_!#$%&*@@]|RT:?)/; - twttr.txt.regexen.atSigns = /[@@]/; - twttr.txt.regexen.validMentionOrList = regexSupplant( - '(#{validMentionPrecedingChars})' + // $1: Preceding character - '(#{atSigns})' + // $2: At mark - '([a-zA-Z0-9_]{1,20})' + // $3: Screen name - '(\/[a-zA-Z][a-zA-Z0-9_\-]{0,24})?' // $4: List (optional) - , 'g'); - twttr.txt.regexen.validReply = regexSupplant(/^(?:#{spaces})*#{atSigns}([a-zA-Z0-9_]{1,20})/); - twttr.txt.regexen.endMentionMatch = regexSupplant(/^(?:#{atSigns}|[#{latinAccentChars}]|:\/\/)/); - - // URL related regex collection - twttr.txt.regexen.validUrlPrecedingChars = regexSupplant(/(?:[^A-Za-z0-9@@$###{invalid_chars_group}]|^)/); - twttr.txt.regexen.invalidUrlWithoutProtocolPrecedingChars = /[-_.\/]$/; - twttr.txt.regexen.invalidDomainChars = stringSupplant("#{punct}#{spaces_group}#{invalid_chars_group}", twttr.txt.regexen); - twttr.txt.regexen.validDomainChars = regexSupplant(/[^#{invalidDomainChars}]/); - twttr.txt.regexen.validSubdomain = regexSupplant(/(?:(?:#{validDomainChars}(?:[_-]|#{validDomainChars})*)?#{validDomainChars}\.)/); - twttr.txt.regexen.validDomainName = regexSupplant(/(?:(?:#{validDomainChars}(?:-|#{validDomainChars})*)?#{validDomainChars}\.)/); - twttr.txt.regexen.validGTLD = regexSupplant(/(?:(?:aero|asia|biz|cat|com|coop|edu|gov|info|int|jobs|mil|mobi|museum|name|net|org|pro|tel|travel|xxx)(?=[^0-9a-zA-Z]|$))/); - twttr.txt.regexen.validCCTLD = regexSupplant(/(?:(?:ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cs|cu|cv|cx|cy|cz|dd|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|ss|st|su|sv|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|za|zm|zw)(?=[^0-9a-zA-Z]|$))/); - twttr.txt.regexen.validPunycode = regexSupplant(/(?:xn--[0-9a-z]+)/); - twttr.txt.regexen.validDomain = regexSupplant(/(?:#{validSubdomain}*#{validDomainName}(?:#{validGTLD}|#{validCCTLD}|#{validPunycode}))/); - twttr.txt.regexen.validAsciiDomain = regexSupplant(/(?:(?:[a-z0-9#{latinAccentChars}]+)\.)+(?:#{validGTLD}|#{validCCTLD}|#{validPunycode})/gi); - twttr.txt.regexen.invalidShortDomain = regexSupplant(/^#{validDomainName}#{validCCTLD}$/); - - twttr.txt.regexen.validPortNumber = regexSupplant(/[0-9]+/); - - twttr.txt.regexen.validGeneralUrlPathChars = regexSupplant(/[a-z0-9!\*';:=\+,\.\$\/%#\[\]\-_~|&#{latinAccentChars}]/i); - // Allow URL paths to contain balanced parens - // 1. Used in Wikipedia URLs like /Primer_(film) - // 2. Used in IIS sessions like /S(dfd346)/ - twttr.txt.regexen.validUrlBalancedParens = regexSupplant(/\(#{validGeneralUrlPathChars}+\)/i); - // Valid end-of-path chracters (so /foo. does not gobble the period). - // 1. Allow =&# for empty URL parameters and other URL-join artifacts - twttr.txt.regexen.validUrlPathEndingChars = regexSupplant(/[\+\-a-z0-9=_#\/#{latinAccentChars}]|(?:#{validUrlBalancedParens})/i); - // Allow @ in a url, but only in the middle. Catch things like http://example.com/@user/ - twttr.txt.regexen.validUrlPath = regexSupplant('(?:' + - '(?:' + - '#{validGeneralUrlPathChars}*' + - '(?:#{validUrlBalancedParens}#{validGeneralUrlPathChars}*)*' + - '#{validUrlPathEndingChars}'+ - ')|(?:@#{validGeneralUrlPathChars}+\/)'+ - ')', 'i'); - - twttr.txt.regexen.validUrlQueryChars = /[a-z0-9!?\*'\(\);:&=\+\$\/%#\[\]\-_\.,~|]/i; - twttr.txt.regexen.validUrlQueryEndingChars = /[a-z0-9_&=#\/]/i; - twttr.txt.regexen.extractUrl = regexSupplant( - '(' + // $1 total match - '(#{validUrlPrecedingChars})' + // $2 Preceeding chracter - '(' + // $3 URL - '(https?:\\/\\/)?' + // $4 Protocol (optional) - '(#{validDomain})' + // $5 Domain(s) - '(?::(#{validPortNumber}))?' + // $6 Port number (optional) - '(\\/#{validUrlPath}*)?' + // $7 URL Path - '(\\?#{validUrlQueryChars}*#{validUrlQueryEndingChars})?' + // $8 Query String - ')' + - ')' - , 'gi'); - - twttr.txt.regexen.validTcoUrl = /^https?:\/\/t\.co\/[a-z0-9]+/i; - - // cashtag related regex - twttr.txt.regexen.cashtag = /[a-z]{1,6}(?:[._][a-z]{1,2})?/i; - twttr.txt.regexen.validCashtag = regexSupplant('(?:^|#{spaces})\\$(#{cashtag})(?=$|\\s|[#{punct}])', 'gi'); - - // These URL validation pattern strings are based on the ABNF from RFC 3986 - twttr.txt.regexen.validateUrlUnreserved = /[a-z0-9\-._~]/i; - twttr.txt.regexen.validateUrlPctEncoded = /(?:%[0-9a-f]{2})/i; - twttr.txt.regexen.validateUrlSubDelims = /[!$&'()*+,;=]/i; - twttr.txt.regexen.validateUrlPchar = regexSupplant('(?:' + - '#{validateUrlUnreserved}|' + - '#{validateUrlPctEncoded}|' + - '#{validateUrlSubDelims}|' + - '[:|@]' + - ')', 'i'); - - twttr.txt.regexen.validateUrlScheme = /(?:[a-z][a-z0-9+\-.]*)/i; - twttr.txt.regexen.validateUrlUserinfo = regexSupplant('(?:' + - '#{validateUrlUnreserved}|' + - '#{validateUrlPctEncoded}|' + - '#{validateUrlSubDelims}|' + - ':' + - ')*', 'i'); - - twttr.txt.regexen.validateUrlDecOctet = /(?:[0-9]|(?:[1-9][0-9])|(?:1[0-9]{2})|(?:2[0-4][0-9])|(?:25[0-5]))/i; - twttr.txt.regexen.validateUrlIpv4 = regexSupplant(/(?:#{validateUrlDecOctet}(?:\.#{validateUrlDecOctet}){3})/i); - - // Punting on real IPv6 validation for now - twttr.txt.regexen.validateUrlIpv6 = /(?:\[[a-f0-9:\.]+\])/i; - - // Also punting on IPvFuture for now - twttr.txt.regexen.validateUrlIp = regexSupplant('(?:' + - '#{validateUrlIpv4}|' + - '#{validateUrlIpv6}' + - ')', 'i'); - - // This is more strict than the rfc specifies - twttr.txt.regexen.validateUrlSubDomainSegment = /(?:[a-z0-9](?:[a-z0-9_\-]*[a-z0-9])?)/i; - twttr.txt.regexen.validateUrlDomainSegment = /(?:[a-z0-9](?:[a-z0-9\-]*[a-z0-9])?)/i; - twttr.txt.regexen.validateUrlDomainTld = /(?:[a-z](?:[a-z0-9\-]*[a-z0-9])?)/i; - twttr.txt.regexen.validateUrlDomain = regexSupplant(/(?:(?:#{validateUrlSubDomainSegment]}\.)*(?:#{validateUrlDomainSegment]}\.)#{validateUrlDomainTld})/i); - - twttr.txt.regexen.validateUrlHost = regexSupplant('(?:' + - '#{validateUrlIp}|' + - '#{validateUrlDomain}' + - ')', 'i'); - - // Unencoded internationalized domains - this doesn't check for invalid UTF-8 sequences - twttr.txt.regexen.validateUrlUnicodeSubDomainSegment = /(?:(?:[a-z0-9]|[^\u0000-\u007f])(?:(?:[a-z0-9_\-]|[^\u0000-\u007f])*(?:[a-z0-9]|[^\u0000-\u007f]))?)/i; - twttr.txt.regexen.validateUrlUnicodeDomainSegment = /(?:(?:[a-z0-9]|[^\u0000-\u007f])(?:(?:[a-z0-9\-]|[^\u0000-\u007f])*(?:[a-z0-9]|[^\u0000-\u007f]))?)/i; - twttr.txt.regexen.validateUrlUnicodeDomainTld = /(?:(?:[a-z]|[^\u0000-\u007f])(?:(?:[a-z0-9\-]|[^\u0000-\u007f])*(?:[a-z0-9]|[^\u0000-\u007f]))?)/i; - twttr.txt.regexen.validateUrlUnicodeDomain = regexSupplant(/(?:(?:#{validateUrlUnicodeSubDomainSegment}\.)*(?:#{validateUrlUnicodeDomainSegment}\.)#{validateUrlUnicodeDomainTld})/i); - - twttr.txt.regexen.validateUrlUnicodeHost = regexSupplant('(?:' + - '#{validateUrlIp}|' + - '#{validateUrlUnicodeDomain}' + - ')', 'i'); - - twttr.txt.regexen.validateUrlPort = /[0-9]{1,5}/; - - twttr.txt.regexen.validateUrlUnicodeAuthority = regexSupplant( - '(?:(#{validateUrlUserinfo})@)?' + // $1 userinfo - '(#{validateUrlUnicodeHost})' + // $2 host - '(?::(#{validateUrlPort}))?' //$3 port - , "i"); - - twttr.txt.regexen.validateUrlAuthority = regexSupplant( - '(?:(#{validateUrlUserinfo})@)?' + // $1 userinfo - '(#{validateUrlHost})' + // $2 host - '(?::(#{validateUrlPort}))?' // $3 port - , "i"); - - twttr.txt.regexen.validateUrlPath = regexSupplant(/(\/#{validateUrlPchar}*)*/i); - twttr.txt.regexen.validateUrlQuery = regexSupplant(/(#{validateUrlPchar}|\/|\?)*/i); - twttr.txt.regexen.validateUrlFragment = regexSupplant(/(#{validateUrlPchar}|\/|\?)*/i); - - // Modified version of RFC 3986 Appendix B - twttr.txt.regexen.validateUrlUnencoded = regexSupplant( - '^' + // Full URL - '(?:' + - '([^:/?#]+):\\/\\/' + // $1 Scheme - ')?' + - '([^/?#]*)' + // $2 Authority - '([^?#]*)' + // $3 Path - '(?:' + - '\\?([^#]*)' + // $4 Query - ')?' + - '(?:' + - '#(.*)' + // $5 Fragment - ')?$' - , "i"); - - - // Default CSS class for auto-linked lists (along with the url class) - var DEFAULT_LIST_CLASS = "tweet-url list-slug"; - // Default CSS class for auto-linked usernames (along with the url class) - var DEFAULT_USERNAME_CLASS = "tweet-url username"; - // Default CSS class for auto-linked hashtags (along with the url class) - var DEFAULT_HASHTAG_CLASS = "tweet-url hashtag"; - // Default CSS class for auto-linked cashtags (along with the url class) - var DEFAULT_CASHTAG_CLASS = "tweet-url cashtag"; - // Options which should not be passed as HTML attributes - var OPTIONS_NOT_ATTRIBUTES = {'urlClass':true, 'listClass':true, 'usernameClass':true, 'hashtagClass':true, 'cashtagClass':true, - 'usernameUrlBase':true, 'listUrlBase':true, 'hashtagUrlBase':true, 'cashtagUrlBase':true, - 'usernameUrlBlock':true, 'listUrlBlock':true, 'hashtagUrlBlock':true, 'linkUrlBlock':true, - 'usernameIncludeSymbol':true, 'suppressLists':true, 'suppressNoFollow':true, - 'suppressDataScreenName':true, 'urlEntities':true, 'symbolTag':true, 'textWithSymbolTag':true, 'urlTarget':true, - 'invisibleTagAttrs':true, 'linkAttributeBlock':true, 'linkTextBlock': true - }; - var BOOLEAN_ATTRIBUTES = {'disabled':true, 'readonly':true, 'multiple':true, 'checked':true}; - - // Simple object cloning function for simple objects - function clone(o) { - var r = {}; - for (var k in o) { - if (o.hasOwnProperty(k)) { - r[k] = o[k]; - } - } - - return r; - } - - twttr.txt.tagAttrs = function(attributes) { - var htmlAttrs = ""; - for (var k in attributes) { - var v = attributes[k]; - if (BOOLEAN_ATTRIBUTES[k]) { - v = v ? k : null; - } - if (v == null) continue; - htmlAttrs += " " + twttr.txt.htmlEscape(k) + "=\"" + twttr.txt.htmlEscape(v.toString()) + "\""; - } - return htmlAttrs; - }; - - twttr.txt.linkToText = function(entity, text, attributes, options) { - if (!options.suppressNoFollow) { - attributes.rel = "nofollow"; - } - // if linkAttributeBlock is specified, call it to modify the attributes - if (options.linkAttributeBlock) { - options.linkAttributeBlock(entity, attributes); - } - // if linkTextBlock is specified, call it to get a new/modified link text - if (options.linkTextBlock) { - text = options.linkTextBlock(entity, text); - } - var d = { - text: text, - attr: twttr.txt.tagAttrs(attributes) - }; - return stringSupplant("#{text}", d); - }; - - twttr.txt.linkToTextWithSymbol = function(entity, symbol, text, attributes, options) { - var taggedSymbol = options.symbolTag ? "<" + options.symbolTag + ">" + symbol + "" : symbol; - text = twttr.txt.htmlEscape(text); - var taggedText = options.textWithSymbolTag ? "<" + options.textWithSymbolTag + ">" + text + "" : text; - - if (options.usernameIncludeSymbol || !symbol.match(twttr.txt.regexen.atSigns)) { - return twttr.txt.linkToText(entity, taggedSymbol + taggedText, attributes, options); - } else { - return taggedSymbol + twttr.txt.linkToText(entity, taggedText, attributes, options); - } - }; - - twttr.txt.linkToHashtag = function(entity, text, options) { - var hash = text.substring(entity.indices[0], entity.indices[0] + 1); - var hashtag = twttr.txt.htmlEscape(entity.hashtag); - var attrs = clone(options.htmlAttrs || {}); - attrs.href = options.hashtagUrlBase + hashtag; - attrs.title = "#" + hashtag; - attrs["class"] = options.hashtagClass; - - return twttr.txt.linkToTextWithSymbol(entity, hash, hashtag, attrs, options); - }; - - twttr.txt.linkToCashtag = function(entity, text, options) { - var cashtag = twttr.txt.htmlEscape(entity.cashtag); - var attrs = clone(options.htmlAttrs || {}); - attrs.href = options.cashtagUrlBase + cashtag; - attrs.title = "$" + cashtag; - attrs["class"] = options.cashtagClass; - - return twttr.txt.linkToTextWithSymbol(entity, "$", cashtag, attrs, options); - }; - - twttr.txt.linkToMentionAndList = function(entity, text, options) { - var at = text.substring(entity.indices[0], entity.indices[0] + 1); - var user = twttr.txt.htmlEscape(entity.screenName); - var slashListname = twttr.txt.htmlEscape(entity.listSlug); - var isList = entity.listSlug && !options.suppressLists; - var attrs = clone(options.htmlAttrs || {}); - attrs["class"] = (isList ? options.listClass : options.usernameClass); - attrs.href = isList ? options.listUrlBase + user + slashListname : options.usernameUrlBase + user; - if (!isList && !options.suppressDataScreenName) { - attrs['data-screen-name'] = user; - } - - return twttr.txt.linkToTextWithSymbol(entity, at, isList ? user + slashListname : user, attrs, options); - }; - - twttr.txt.linkToUrl = function(entity, text, options) { - var url = entity.url; - var displayUrl = url; - var linkText = twttr.txt.htmlEscape(displayUrl); - - // If the caller passed a urlEntities object (provided by a Twitter API - // response with include_entities=true), we use that to render the display_url - // for each URL instead of it's underlying t.co URL. - var urlEntity = (options.urlEntities && options.urlEntities[url]) || entity; - if (urlEntity.display_url) { - linkText = twttr.txt.linkTextWithEntity(urlEntity, options); - } - - var attrs = clone(options.htmlAttrs || {}); - attrs.href = url; - - // set class only if urlClass is specified. - if (options.urlClass) { - attrs["class"] = options.urlClass; - } - - // set target only if urlTarget is specified. - if (options.urlTarget) { - attrs.target = options.urlTarget; - } - - if (!options.title && urlEntity.display_url) { - attrs.title = urlEntity.expanded_url; - } - - return twttr.txt.linkToText(entity, linkText, attrs, options); - }; - - twttr.txt.linkTextWithEntity = function (entity, options) { - var displayUrl = entity.display_url; - var expandedUrl = entity.expanded_url; - - // Goal: If a user copies and pastes a tweet containing t.co'ed link, the resulting paste - // should contain the full original URL (expanded_url), not the display URL. - // - // Method: Whenever possible, we actually emit HTML that contains expanded_url, and use - // font-size:0 to hide those parts that should not be displayed (because they are not part of display_url). - // Elements with font-size:0 get copied even though they are not visible. - // Note that display:none doesn't work here. Elements with display:none don't get copied. - // - // Additionally, we want to *display* ellipses, but we don't want them copied. To make this happen we - // wrap the ellipses in a tco-ellipsis class and provide an onCopy handler that sets display:none on - // everything with the tco-ellipsis class. - // - // Exception: pic.twitter.com images, for which expandedUrl = "https://twitter.com/#!/username/status/1234/photo/1 - // For those URLs, display_url is not a substring of expanded_url, so we don't do anything special to render the elided parts. - // For a pic.twitter.com URL, the only elided part will be the "https://", so this is fine. - - var displayUrlSansEllipses = displayUrl.replace(/…/g, ""); // We have to disregard ellipses for matching - // Note: we currently only support eliding parts of the URL at the beginning or the end. - // Eventually we may want to elide parts of the URL in the *middle*. If so, this code will - // become more complicated. We will probably want to create a regexp out of display URL, - // replacing every ellipsis with a ".*". - if (expandedUrl.indexOf(displayUrlSansEllipses) != -1) { - var displayUrlIndex = expandedUrl.indexOf(displayUrlSansEllipses); - var v = { - displayUrlSansEllipses: displayUrlSansEllipses, - // Portion of expandedUrl that precedes the displayUrl substring - beforeDisplayUrl: expandedUrl.substr(0, displayUrlIndex), - // Portion of expandedUrl that comes after displayUrl - afterDisplayUrl: expandedUrl.substr(displayUrlIndex + displayUrlSansEllipses.length), - precedingEllipsis: displayUrl.match(/^…/) ? "…" : "", - followingEllipsis: displayUrl.match(/…$/) ? "…" : "" - }; - for (var k in v) { - if (v.hasOwnProperty(k)) { - v[k] = twttr.txt.htmlEscape(v[k]); - } - } - // As an example: The user tweets "hi http://longdomainname.com/foo" - // This gets shortened to "hi http://t.co/xyzabc", with display_url = "…nname.com/foo" - // This will get rendered as: - // - // … - // - // http://longdomai - // - // - // nname.com/foo - // - // - //   - // … - // - v['invisible'] = options.invisibleTagAttrs; - return stringSupplant("#{precedingEllipsis} #{beforeDisplayUrl}#{displayUrlSansEllipses}#{afterDisplayUrl} #{followingEllipsis}", v); - } - return displayUrl; - }; - - twttr.txt.autoLinkEntities = function(text, entities, options) { - options = clone(options || {}); - - options.hashtagClass = options.hashtagClass || DEFAULT_HASHTAG_CLASS; - options.hashtagUrlBase = options.hashtagUrlBase || "https://twitter.com/#!/search?q=%23"; - options.cashtagClass = options.cashtagClass || DEFAULT_CASHTAG_CLASS; - options.cashtagUrlBase = options.cashtagUrlBase || "https://twitter.com/#!/search?q=%24"; - options.listClass = options.listClass || DEFAULT_LIST_CLASS; - options.usernameClass = options.usernameClass || DEFAULT_USERNAME_CLASS; - options.usernameUrlBase = options.usernameUrlBase || "https://twitter.com/"; - options.listUrlBase = options.listUrlBase || "https://twitter.com/"; - options.htmlAttrs = twttr.txt.extractHtmlAttrsFromOptions(options); - options.invisibleTagAttrs = options.invisibleTagAttrs || "style='position:absolute;left:-9999px;'"; - - // remap url entities to hash - var urlEntities, i, len; - if(options.urlEntities) { - urlEntities = {}; - for(i = 0, len = options.urlEntities.length; i < len; i++) { - urlEntities[options.urlEntities[i].url] = options.urlEntities[i]; - } - options.urlEntities = urlEntities; - } - - var result = ""; - var beginIndex = 0; - - // sort entities by start index - entities.sort(function(a,b){ return a.indices[0] - b.indices[0]; }); - - for (var i = 0; i < entities.length; i++) { - var entity = entities[i]; - result += text.substring(beginIndex, entity.indices[0]); - - if (entity.url) { - result += twttr.txt.linkToUrl(entity, text, options); - } else if (entity.hashtag) { - result += twttr.txt.linkToHashtag(entity, text, options); - } else if (entity.screenName) { - result += twttr.txt.linkToMentionAndList(entity, text, options); - } else if (entity.cashtag) { - result += twttr.txt.linkToCashtag(entity, text, options); - } - beginIndex = entity.indices[1]; - } - result += text.substring(beginIndex, text.length); - return result; - }; - - twttr.txt.autoLinkWithJSON = function(text, json, options) { - // concatenate all entities - var entities = []; - for (var key in json) { - entities = entities.concat(json[key]); - } - // map JSON entity to twitter-text entity - for (var i = 0; i < entities.length; i++) { - entity = entities[i]; - if (entity.screen_name) { - // this is @mention - entity.screenName = entity.screen_name; - } else if (entity.text) { - // this is #hashtag - entity.hashtag = entity.text; - } - } - // modify indices to UTF-16 - twttr.txt.modifyIndicesFromUnicodeToUTF16(text, entities); - - return twttr.txt.autoLinkEntities(text, entities, options); - }; - - twttr.txt.extractHtmlAttrsFromOptions = function(options) { - var htmlAttrs = {}; - for (var k in options) { - var v = options[k]; - if (OPTIONS_NOT_ATTRIBUTES[k]) continue; - if (BOOLEAN_ATTRIBUTES[k]) { - v = v ? k : null; - } - if (v == null) continue; - htmlAttrs[k] = v; - } - return htmlAttrs; - }; - - twttr.txt.autoLink = function(text, options) { - var entities = twttr.txt.extractEntitiesWithIndices(text, {extractUrlWithoutProtocol: false}); - return twttr.txt.autoLinkEntities(text, entities, options); - }; - - twttr.txt.autoLinkUsernamesOrLists = function(text, options) { - var entities = twttr.txt.extractMentionsOrListsWithIndices(text); - return twttr.txt.autoLinkEntities(text, entities, options); - }; - - twttr.txt.autoLinkHashtags = function(text, options) { - var entities = twttr.txt.extractHashtagsWithIndices(text); - return twttr.txt.autoLinkEntities(text, entities, options); - }; - - twttr.txt.autoLinkCashtags = function(text, options) { - var entities = twttr.txt.extractCashtagsWithIndices(text); - return twttr.txt.autoLinkEntities(text, entities, options); - }; - - twttr.txt.autoLinkUrlsCustom = function(text, options) { - var entities = twttr.txt.extractUrlsWithIndices(text, {extractUrlWithoutProtocol: false}); - return twttr.txt.autoLinkEntities(text, entities, options); - }; - - twttr.txt.removeOverlappingEntities = function(entities) { - entities.sort(function(a,b){ return a.indices[0] - b.indices[0]; }); - - var prev = entities[0]; - for (var i = 1; i < entities.length; i++) { - if (prev.indices[1] > entities[i].indices[0]) { - entities.splice(i, 1); - i--; - } else { - prev = entities[i]; - } - } - }; - - twttr.txt.extractEntitiesWithIndices = function(text, options) { - var entities = twttr.txt.extractUrlsWithIndices(text, options) - .concat(twttr.txt.extractMentionsOrListsWithIndices(text)) - .concat(twttr.txt.extractHashtagsWithIndices(text, {checkUrlOverlap: false})) - .concat(twttr.txt.extractCashtagsWithIndices(text)); - - if (entities.length == 0) { - return []; - } - - twttr.txt.removeOverlappingEntities(entities); - return entities; - }; - - twttr.txt.extractMentions = function(text) { - var screenNamesOnly = [], - screenNamesWithIndices = twttr.txt.extractMentionsWithIndices(text); - - for (var i = 0; i < screenNamesWithIndices.length; i++) { - var screenName = screenNamesWithIndices[i].screenName; - screenNamesOnly.push(screenName); - } - - return screenNamesOnly; - }; - - twttr.txt.extractMentionsWithIndices = function(text) { - var mentions = []; - var mentionsOrLists = twttr.txt.extractMentionsOrListsWithIndices(text); - - for (var i = 0 ; i < mentionsOrLists.length; i++) { - mentionOrList = mentionsOrLists[i]; - if (mentionOrList.listSlug == '') { - mentions.push({ - screenName: mentionOrList.screenName, - indices: mentionOrList.indices - }); - } - } - - return mentions; - }; - - /** - * Extract list or user mentions. - * (Presence of listSlug indicates a list) - */ - twttr.txt.extractMentionsOrListsWithIndices = function(text) { - if (!text || !text.match(twttr.txt.regexen.atSigns)) { - return []; - } - - var possibleNames = [], - position = 0; - - text.replace(twttr.txt.regexen.validMentionOrList, function(match, before, atSign, screenName, slashListname, offset, chunk) { - var after = chunk.slice(offset + match.length); - if (!after.match(twttr.txt.regexen.endMentionMatch)) { - slashListname = slashListname || ''; - var startPosition = text.indexOf(atSign + screenName + slashListname, position); - position = startPosition + screenName.length + slashListname.length + 1; - possibleNames.push({ - screenName: screenName, - listSlug: slashListname, - indices: [startPosition, position] - }); - } - }); - - return possibleNames; - }; - - - twttr.txt.extractReplies = function(text) { - if (!text) { - return null; - } - - var possibleScreenName = text.match(twttr.txt.regexen.validReply); - if (!possibleScreenName || - RegExp.rightContext.match(twttr.txt.regexen.endMentionMatch)) { - return null; - } - - return possibleScreenName[1]; - }; - - twttr.txt.extractUrls = function(text, options) { - var urlsOnly = [], - urlsWithIndices = twttr.txt.extractUrlsWithIndices(text, options); - - for (var i = 0; i < urlsWithIndices.length; i++) { - urlsOnly.push(urlsWithIndices[i].url); - } - - return urlsOnly; - }; - - twttr.txt.extractUrlsWithIndices = function(text, options) { - if (!options) { - options = {extractUrlsWithoutProtocol: true}; - } - - if (!text || (options.extractUrlsWithoutProtocol ? !text.match(/\./) : !text.match(/:/))) { - return []; - } - - var urls = []; - - while (twttr.txt.regexen.extractUrl.exec(text)) { - var before = RegExp.$2, url = RegExp.$3, protocol = RegExp.$4, domain = RegExp.$5, path = RegExp.$7; - var endPosition = twttr.txt.regexen.extractUrl.lastIndex, - startPosition = endPosition - url.length; - - // if protocol is missing and domain contains non-ASCII characters, - // extract ASCII-only domains. - if (!protocol) { - if (!options.extractUrlsWithoutProtocol - || before.match(twttr.txt.regexen.invalidUrlWithoutProtocolPrecedingChars)) { - continue; - } - var lastUrl = null, - lastUrlInvalidMatch = false, - asciiEndPosition = 0; - domain.replace(twttr.txt.regexen.validAsciiDomain, function(asciiDomain) { - var asciiStartPosition = domain.indexOf(asciiDomain, asciiEndPosition); - asciiEndPosition = asciiStartPosition + asciiDomain.length; - lastUrl = { - url: asciiDomain, - indices: [startPosition + asciiStartPosition, startPosition + asciiEndPosition] - }; - lastUrlInvalidMatch = asciiDomain.match(twttr.txt.regexen.invalidShortDomain); - if (!lastUrlInvalidMatch) { - urls.push(lastUrl); - } - }); - - // no ASCII-only domain found. Skip the entire URL. - if (lastUrl == null) { - continue; - } - - // lastUrl only contains domain. Need to add path and query if they exist. - if (path) { - if (lastUrlInvalidMatch) { - urls.push(lastUrl); - } - lastUrl.url = url.replace(domain, lastUrl.url); - lastUrl.indices[1] = endPosition; - } - } else { - // In the case of t.co URLs, don't allow additional path characters. - if (url.match(twttr.txt.regexen.validTcoUrl)) { - url = RegExp.lastMatch; - endPosition = startPosition + url.length; - } - urls.push({ - url: url, - indices: [startPosition, endPosition] - }); - } - } - - return urls; - }; - - twttr.txt.extractHashtags = function(text) { - var hashtagsOnly = [], - hashtagsWithIndices = twttr.txt.extractHashtagsWithIndices(text); - - for (var i = 0; i < hashtagsWithIndices.length; i++) { - hashtagsOnly.push(hashtagsWithIndices[i].hashtag); - } - - return hashtagsOnly; - }; - - twttr.txt.extractHashtagsWithIndices = function(text, options) { - if (!options) { - options = {checkUrlOverlap: true}; - } - - if (!text || !text.match(twttr.txt.regexen.hashSigns)) { - return []; - } - - var tags = [], - position = 0; - - text.replace(twttr.txt.regexen.validHashtag, function(match, before, hash, hashText, offset, chunk) { - var after = chunk.slice(offset + match.length); - if (after.match(twttr.txt.regexen.endHashtagMatch)) - return; - var startPosition = text.indexOf(hash + hashText, position); - position = startPosition + hashText.length + 1; - tags.push({ - hashtag: hashText, - indices: [startPosition, position] - }); - }); - - if (options.checkUrlOverlap) { - // also extract URL entities - var urls = twttr.txt.extractUrlsWithIndices(text); - if (urls.length > 0) { - var entities = tags.concat(urls); - // remove overlap - twttr.txt.removeOverlappingEntities(entities); - // only push back hashtags - tags = []; - for (var i = 0; i < entities.length; i++) { - if (entities[i].hashtag) { - tags.push(entities[i]); - } - } - } - } - - return tags; - }; - - twttr.txt.extractCashtags = function(text) { - var cashtagsOnly = [], - cashtagsWithIndices = twttr.txt.extractCashtagsWithIndices(text); - - for (var i = 0; i < cashtagsWithIndices.length; i++) { - cashtagsOnly.push(cashtagsWithIndices[i].cashtag); - } - - return cashtagsOnly; - }; - - twttr.txt.extractCashtagsWithIndices = function(text) { - if (!text || text.indexOf("$") == -1) { - return []; - } - - var tags = [], - position = 0; - - text.replace(twttr.txt.regexen.validCashtag, function(match, cashtag, offset, chunk) { - // cashtag doesn't contain $ sign, so need to decrement index by 1. - var startPosition = text.indexOf(cashtag, position) - 1; - position = startPosition + cashtag.length + 1; - tags.push({ - cashtag: cashtag, - indices: [startPosition, position] - }); - }); - - return tags; - }; - - twttr.txt.modifyIndicesFromUnicodeToUTF16 = function(text, entities) { - twttr.txt.convertUnicodeIndices(text, entities, false); - }; - - twttr.txt.modifyIndicesFromUTF16ToUnicode = function(text, entities) { - twttr.txt.convertUnicodeIndices(text, entities, true); - }; - - twttr.txt.convertUnicodeIndices = function(text, entities, indicesInUTF16) { - if (entities.length == 0) { - return; - } - - var charIndex = 0; - var codePointIndex = 0; - - // sort entities by start index - entities.sort(function(a,b){ return a.indices[0] - b.indices[0]; }); - var entityIndex = 0; - var entity = entities[0]; - - while (charIndex < text.length) { - if (entity.indices[0] == (indicesInUTF16 ? charIndex : codePointIndex)) { - var len = entity.indices[1] - entity.indices[0]; - entity.indices[0] = indicesInUTF16 ? codePointIndex : charIndex; - entity.indices[1] = entity.indices[0] + len; - - entityIndex++; - if (entityIndex == entities.length) { - // no more entity - break; - } - entity = entities[entityIndex]; - } - - var c = text.charCodeAt(charIndex); - if (0xD800 <= c && c <= 0xDBFF && charIndex < text.length - 1) { - // Found high surrogate char - c = text.charCodeAt(charIndex + 1); - if (0xDC00 <= c && c <= 0xDFFF) { - // Found surrogate pair - charIndex++; - } - } - codePointIndex++; - charIndex++; - } - }; - - // this essentially does text.split(/<|>/) - // except that won't work in IE, where empty strings are ommitted - // so "<>".split(/<|>/) => [] in IE, but is ["", "", ""] in all others - // but "<<".split("<") => ["", "", ""] - twttr.txt.splitTags = function(text) { - var firstSplits = text.split("<"), - secondSplits, - allSplits = [], - split; - - for (var i = 0; i < firstSplits.length; i += 1) { - split = firstSplits[i]; - if (!split) { - allSplits.push(""); - } else { - secondSplits = split.split(">"); - for (var j = 0; j < secondSplits.length; j += 1) { - allSplits.push(secondSplits[j]); - } - } - } - - return allSplits; - }; - - twttr.txt.hitHighlight = function(text, hits, options) { - var defaultHighlightTag = "em"; - - hits = hits || []; - options = options || {}; - - if (hits.length === 0) { - return text; - } - - var tagName = options.tag || defaultHighlightTag, - tags = ["<" + tagName + ">", ""], - chunks = twttr.txt.splitTags(text), - i, - j, - result = "", - chunkIndex = 0, - chunk = chunks[0], - prevChunksLen = 0, - chunkCursor = 0, - startInChunk = false, - chunkChars = chunk, - flatHits = [], - index, - hit, - tag, - placed, - hitSpot; - - for (i = 0; i < hits.length; i += 1) { - for (j = 0; j < hits[i].length; j += 1) { - flatHits.push(hits[i][j]); - } - } - - for (index = 0; index < flatHits.length; index += 1) { - hit = flatHits[index]; - tag = tags[index % 2]; - placed = false; - - while (chunk != null && hit >= prevChunksLen + chunk.length) { - result += chunkChars.slice(chunkCursor); - if (startInChunk && hit === prevChunksLen + chunkChars.length) { - result += tag; - placed = true; - } - - if (chunks[chunkIndex + 1]) { - result += "<" + chunks[chunkIndex + 1] + ">"; - } - - prevChunksLen += chunkChars.length; - chunkCursor = 0; - chunkIndex += 2; - chunk = chunks[chunkIndex]; - chunkChars = chunk; - startInChunk = false; - } - - if (!placed && chunk != null) { - hitSpot = hit - prevChunksLen; - result += chunkChars.slice(chunkCursor, hitSpot) + tag; - chunkCursor = hitSpot; - if (index % 2 === 0) { - startInChunk = true; - } else { - startInChunk = false; - } - } else if(!placed) { - placed = true; - result += tag; - } - } - - if (chunk != null) { - if (chunkCursor < chunkChars.length) { - result += chunkChars.slice(chunkCursor); - } - for (index = chunkIndex + 1; index < chunks.length; index += 1) { - result += (index % 2 === 0 ? chunks[index] : "<" + chunks[index] + ">"); - } - } - - return result; - }; - - var MAX_LENGTH = 140; - - // Characters not allowed in Tweets - var INVALID_CHARACTERS = [ - // BOM - fromCode(0xFFFE), - fromCode(0xFEFF), - - // Special - fromCode(0xFFFF), - - // Directional Change - fromCode(0x202A), - fromCode(0x202B), - fromCode(0x202C), - fromCode(0x202D), - fromCode(0x202E) - ]; - - // Returns the length of Tweet text with consideration to t.co URL replacement - twttr.txt.getTweetLength = function(text, options) { - if (!options) { - options = { - short_url_length: 20, - short_url_length_https: 21 - }; - } - var textLength = text.length; - var urlsWithIndices = twttr.txt.extractUrlsWithIndices(text); - - for (var i = 0; i < urlsWithIndices.length; i++) { - // Subtract the length of the original URL - textLength += urlsWithIndices[i].indices[0] - urlsWithIndices[i].indices[1]; - - // Add 21 characters for URL starting with https:// - // Otherwise add 20 characters - if (urlsWithIndices[i].url.toLowerCase().match(/^https:\/\//)) { - textLength += options.short_url_length_https; - } else { - textLength += options.short_url_length; - } - } - - return textLength; - }; - - // Check the text for any reason that it may not be valid as a Tweet. This is meant as a pre-validation - // before posting to api.twitter.com. There are several server-side reasons for Tweets to fail but this pre-validation - // will allow quicker feedback. - // - // Returns false if this text is valid. Otherwise one of the following strings will be returned: - // - // "too_long": if the text is too long - // "empty": if the text is nil or empty - // "invalid_characters": if the text contains non-Unicode or any of the disallowed Unicode characters - twttr.txt.isInvalidTweet = function(text) { - if (!text) { - return "empty"; - } - - // Determine max length independent of URL length - if (twttr.txt.getTweetLength(text) > MAX_LENGTH) { - return "too_long"; - } - - for (var i = 0; i < INVALID_CHARACTERS.length; i++) { - if (text.indexOf(INVALID_CHARACTERS[i]) >= 0) { - return "invalid_characters"; - } - } - - return false; - }; - - twttr.txt.isValidTweetText = function(text) { - return !twttr.txt.isInvalidTweet(text); - }; - - twttr.txt.isValidUsername = function(username) { - if (!username) { - return false; - } - - var extracted = twttr.txt.extractMentions(username); - - // Should extract the username minus the @ sign, hence the .slice(1) - return extracted.length === 1 && extracted[0] === username.slice(1); - }; - - var VALID_LIST_RE = regexSupplant(/^#{validMentionOrList}$/); - - twttr.txt.isValidList = function(usernameList) { - var match = usernameList.match(VALID_LIST_RE); - - // Must have matched and had nothing before or after - return !!(match && match[1] == "" && match[4]); - }; - - twttr.txt.isValidHashtag = function(hashtag) { - if (!hashtag) { - return false; - } - - var extracted = twttr.txt.extractHashtags(hashtag); - - // Should extract the hashtag minus the # sign, hence the .slice(1) - return extracted.length === 1 && extracted[0] === hashtag.slice(1); - }; - - twttr.txt.isValidUrl = function(url, unicodeDomains, requireProtocol) { - if (unicodeDomains == null) { - unicodeDomains = true; - } - - if (requireProtocol == null) { - requireProtocol = true; - } - - if (!url) { - return false; - } - - var urlParts = url.match(twttr.txt.regexen.validateUrlUnencoded); - - if (!urlParts || urlParts[0] !== url) { - return false; - } - - var scheme = urlParts[1], - authority = urlParts[2], - path = urlParts[3], - query = urlParts[4], - fragment = urlParts[5]; - - if (!( - (!requireProtocol || (isValidMatch(scheme, twttr.txt.regexen.validateUrlScheme) && scheme.match(/^https?$/i))) && - isValidMatch(path, twttr.txt.regexen.validateUrlPath) && - isValidMatch(query, twttr.txt.regexen.validateUrlQuery, true) && - isValidMatch(fragment, twttr.txt.regexen.validateUrlFragment, true) - )) { - return false; - } - - return (unicodeDomains && isValidMatch(authority, twttr.txt.regexen.validateUrlUnicodeAuthority)) || - (!unicodeDomains && isValidMatch(authority, twttr.txt.regexen.validateUrlAuthority)); - }; - - function isValidMatch(string, regex, optional) { - if (!optional) { - // RegExp["$&"] is the text of the last match - // blank strings are ok, but are falsy, so we check stringiness instead of truthiness - return ((typeof string === "string") && string.match(regex) && RegExp["$&"] === string); - } - - // RegExp["$&"] is the text of the last match - return (!string || (string.match(regex) && RegExp["$&"] === string)); - } - - if (typeof module != 'undefined' && module.exports) { - module.exports = twttr.txt; - } - -}()); diff --git a/app/assets/javascripts/main_include.js b/app/assets/javascripts/main_include.js index 06fa8462178..0fb2fde9845 100644 --- a/app/assets/javascripts/main_include.js +++ b/app/assets/javascripts/main_include.js @@ -28,6 +28,8 @@ //= require ./discourse/routes/discourse_route //= require ./discourse/routes/discourse_restricted_user_route +//= require ./discourse/dialects/dialect +//= require_tree ./discourse/dialects //= require_tree ./discourse/controllers //= require_tree ./discourse/components //= require_tree ./discourse/models diff --git a/app/models/post_analyzer.rb b/app/models/post_analyzer.rb index 0ad69f34af4..ac1cf6fc54c 100644 --- a/app/models/post_analyzer.rb +++ b/app/models/post_analyzer.rb @@ -88,6 +88,7 @@ class PostAnalyzer # Returns an array of all links in a post excluding mentions def raw_links + return [] unless @raw.present? return @raw_links if @raw_links.present? diff --git a/lib/pretty_text.rb b/lib/pretty_text.rb index 2d9607739a6..48fe7fdf23c 100644 --- a/lib/pretty_text.rb +++ b/lib/pretty_text.rb @@ -96,7 +96,6 @@ module PrettyText "app/assets/javascripts/external/md5.js", "app/assets/javascripts/external/lodash.js", "app/assets/javascripts/external/Markdown.Converter.js", - "app/assets/javascripts/external/twitter-text-1.5.0.js", "lib/headless-ember.js", "app/assets/javascripts/external/rsvp.js", Rails.configuration.ember.handlebars_location) @@ -106,10 +105,17 @@ module PrettyText ctx.eval("var I18n = {}; I18n.t = function(a,b){ return helpers.t(a,b); }"); ctx_load(ctx, - "app/assets/javascripts/discourse/components/bbcode.js", + "app/assets/javascripts/external/markdown.js", + "app/assets/javascripts/discourse/dialects/dialect.js", "app/assets/javascripts/discourse/components/utilities.js", "app/assets/javascripts/discourse/components/markdown.js") + Dir["#{Rails.root}/app/assets/javascripts/discourse/dialects/**.js"].each do |dialect| + unless dialect =~ /\/dialect\.js$/ + ctx.load(dialect) + end + end + # Load server side javascripts if DiscoursePluginRegistry.server_side_javascripts.present? DiscoursePluginRegistry.server_side_javascripts.each do |ssjs| diff --git a/spec/components/pretty_text_spec.rb b/spec/components/pretty_text_spec.rb index 6fe8c978e9c..b41ba19bc77 100644 --- a/spec/components/pretty_text_spec.rb +++ b/spec/components/pretty_text_spec.rb @@ -4,15 +4,6 @@ require 'pretty_text' describe PrettyText do describe "Cooking" do - it "should support github style code blocks" do - PrettyText.cook("``` -test -```").should match_html "
    test  \n
    " - end - - it "should support quoting [] " do - PrettyText.cook("[quote=\"EvilTrout, post:123, topic:456, full:true\"][sam][/quote]").should =~ /\[sam\]/ - end describe "with avatar" do @@ -23,15 +14,15 @@ test end it "produces a quote even with new lines in it" do - PrettyText.cook("[quote=\"EvilTrout, post:123, topic:456, full:true\"]ddd\n[/quote]").should match_html "

    " + PrettyText.cook("[quote=\"EvilTrout, post:123, topic:456, full:true\"]ddd\n[/quote]").should match_html "

    " end it "should produce a quote" do - PrettyText.cook("[quote=\"EvilTrout, post:123, topic:456, full:true\"]ddd[/quote]").should match_html "

    " + PrettyText.cook("[quote=\"EvilTrout, post:123, topic:456, full:true\"]ddd[/quote]").should match_html "

    " end it "trims spaces on quote params" do - PrettyText.cook("[quote=\"EvilTrout, post:555, topic: 666\"]ddd[/quote]").should match_html "

    " + PrettyText.cook("[quote=\"EvilTrout, post:555, topic: 666\"]ddd[/quote]").should match_html "

    " end end @@ -40,36 +31,10 @@ test PrettyText.cook('@hello @hello @hello').should match_html "

    @hello @hello @hello

    " end - it "should not do weird @ mention stuff inside a pre block" do - - PrettyText.cook("``` -a @test -```").should match_html "
    a @test  \n
    " - - end - it "should sanitize the html" do PrettyText.cook("").should match_html "alert(42)" end - it "should escape html within the code block" do - - PrettyText.cook("```text -
    hello
    -```").should match_html "
    <header>hello</header>  \n
    " - end - - it "should support language choices" do - - PrettyText.cook("```ruby -test -```").should match_html "
    test  \n
    " - end - - it 'should decorate @mentions' do - PrettyText.cook("Hello @eviltrout").should match_html "

    Hello @eviltrout

    " - end - it 'should allow for @mentions to have punctuation' do PrettyText.cook("hello @bob's @bob,@bob; @bob\"").should match_html "

    hello @bob's @bob,@bob; @bob\"

    " @@ -78,11 +43,6 @@ test it 'should add spoiler tags' do PrettyText.cook("[spoiler]hello[/spoiler]").should match_html "

    hello

    " end - - it "should only detect ``` at the begining of lines" do - PrettyText.cook(" ```\n hello\n ```") - .should match_html "
    ```\nhello\n```\n
    " - end end describe "rel nofollow" do diff --git a/spec/models/topic_link_spec.rb b/spec/models/topic_link_spec.rb index 0318089a154..83f92270750 100644 --- a/spec/models/topic_link_spec.rb +++ b/spec/models/topic_link_spec.rb @@ -57,6 +57,7 @@ describe TopicLink do @topic.posts.create(user: @user, raw: 'initial post') @post = @topic.posts.create(user: @user, raw: "Link to another topic:\n\n#{@url}\n\n") @post.reload + TopicLink.extract_from(@post) @link = @topic.topic_links.first diff --git a/test/javascripts/components/bbcode_test.js b/test/javascripts/components/bbcode_test.js index a80a2bfec20..dc04b8e56d2 100644 --- a/test/javascripts/components/bbcode_test.js +++ b/test/javascripts/components/bbcode_test.js @@ -7,13 +7,14 @@ var format = function(input, expected, text) { }; test('basic bbcode', function() { - format("[b]strong[/b]", "strong", "bolds text"); - format("[i]emphasis[/i]", "emphasis", "italics text"); - format("[u]underlined[/u]", "underlined", "underlines text"); - format("[s]strikethrough[/s]", "strikethrough", "strikes-through text"); - format("[code]\nx++\n[/code]", "
    \nx++ 
    \n
    ", "makes code into pre"); + format("[b]strong[/b]", "strong", "bolds text"); + format("[i]emphasis[/i]", "emphasis", "italics text"); + format("[u]underlined[/u]", "underlined", "underlines text"); + format("[s]strikethrough[/s]", "strikethrough", "strikes-through text"); + format("[code]\nx++\n[/code]", "
    \nx++
    \n
    ", "makes code into pre"); + format("[code]\nx++\ny++\nz++\n[/code]", "
    \nx++
    \ny++
    \nz++
    \n
    ", "makes code into pre"); format("[spoiler]it's a sled[/spoiler]", "it's a sled", "supports spoiler tags"); - format("[img]http://eviltrout.com/eviltrout.png[/img]", "", "links images"); + format("[img]http://eviltrout.com/eviltrout.png[/img]", "", "links images"); format("[url]http://bettercallsaul.com[/url]", "http://bettercallsaul.com", "supports [url] without a title"); format("[email]eviltrout@mailinator.com[/email]", "eviltrout@mailinator.com", "supports [email] without a title"); }); @@ -31,11 +32,11 @@ test('color', function() { }); test('tags with arguments', function() { - format("[size=35]BIG[/size]", "BIG", "supports [size=]"); + format("[size=35]BIG [b]whoop[/b][/size]", "BIG whoop", "supports [size=]"); format("[url=http://bettercallsaul.com]better call![/url]", "better call!", "supports [url] with a title"); format("[email=eviltrout@mailinator.com]evil trout[/email]", "evil trout", "supports [email] with a title"); - format("[u][i]abc[/i][/u]", "abc", "can nest tags"); - format("[b]first[/b] [b]second[/b]", "first second", "can bold two things on the same line"); + format("[u][i]abc[/i][/u]", "abc", "can nest tags"); + format("[b]first[/b] [b]second[/b]", "first second", "can bold two things on the same line"); }); @@ -49,7 +50,7 @@ test("quotes", function() { }); var formatQuote = function(val, expected, text) { - equal(Discourse.BBCode.buildQuoteBBCode(post, val), expected, text); + equal(Discourse.Quote.build(post, val), expected, text); }; formatQuote(undefined, "", "empty string for undefined content"); @@ -58,6 +59,7 @@ test("quotes", function() { formatQuote("lorem", "[quote=\"eviltrout, post:1, topic:2\"]\nlorem\n[/quote]\n\n", "correctly formats quotes"); + formatQuote(" lorem \t ", "[quote=\"eviltrout, post:1, topic:2\"]\nlorem\n[/quote]\n\n", "trims white spaces before & after the quoted contents"); @@ -74,34 +76,27 @@ test("quotes", function() { test("quote formatting", function() { - // TODO: This HTML matching is quite ugly. + format("[quote=\"EvilTrout, post:123, topic:456, full:true\"][sam][/quote]", + "", + "it allows quotes with [] inside"); + format("[quote=\"eviltrout, post:1, topic:1\"]abc[/quote]", - "

    \n

    ", + "

    ", "renders quotes properly"); - format("[quote=\"eviltrout, post:1, topic:1\"]abc[quote=\"eviltrout, post:2, topic:2\"]nested[/quote][/quote]", - "

    \n

    ", - "can nest quotes"); + format("[quote=\"eviltrout, post:1, topic:1\"]abc[/quote]\nhello", + "


    \nhello", + "handles new lines properly"); format("before[quote=\"eviltrout, post:1, topic:1\"]first[/quote]middle[quote=\"eviltrout, post:2, topic:2\"]second[/quote]after", - "before

    \n

    \n\n

    middle

    \n


    \nafter", + "before


    middle
    after", "can handle more than one quote"); }); -test("extract quotes", function() { - - var q = "[quote=\"eviltrout, post:1, topic:2\"]hello[/quote]"; - var result = Discourse.BBCode.extractQuotes(q + " world"); - - equal(result.text, md5(q) + "\n world"); - present(result.template); - -}); - diff --git a/test/javascripts/components/markdown_test.js b/test/javascripts/components/markdown_test.js index 8346f1a81c4..ff3683ea2f0 100644 --- a/test/javascripts/components/markdown_test.js +++ b/test/javascripts/components/markdown_test.js @@ -7,7 +7,8 @@ module("Discourse.Markdown", { }); var cooked = function(input, expected, text) { - equal(Discourse.Markdown.cook(input, {mentionLookup: false }), expected, text); + var result = Discourse.Markdown.cook(input, {mentionLookup: false, sanitize: true}); + equal(result, expected, text); }; var cookedOptions = function(input, opts, expected, text) { @@ -21,7 +22,7 @@ test("basic cooking", function() { test("Line Breaks", function() { var input = "1\n2\n3"; - cooked(input, "

    1
    \n2
    \n3

    ", "automatically handles trivial newlines"); + cooked(input, "

    1
    2
    3

    ", "automatically handles trivial newlines"); var traditionalOutput = "

    1\n2\n3

    "; @@ -36,13 +37,18 @@ test("Line Breaks", function() { }); test("Links", function() { + cooked("Youtube: http://www.youtube.com/watch?v=1MrpeBRkM5A", '

    Youtube: http://www.youtube.com/watch?v=1MrpeBRkM5A

    ', "allows links to contain query params"); cooked("Derpy: http://derp.com?__test=1", - '

    Derpy: http://derp.com?__test=1

    ', - "escapes double underscores in URLs"); + '

    Derpy: http://derp.com?__test=1

    ', + "works with double underscores in urls"); + + cooked("Derpy: http://derp.com?_test_=1", + '

    Derpy: http://derp.com?_test_=1

    ', + "works with underscores in urls"); cooked("Atwood: www.codinghorror.com", '

    Atwood: www.codinghorror.com

    ', @@ -63,34 +69,48 @@ test("Links", function() { cooked("Batman: http://en.wikipedia.org/wiki/The_Dark_Knight_(film)", '

    Batman: http://en.wikipedia.org/wiki/The_Dark_Knight_(film)

    ', "autolinks a URL with parentheses (like Wikipedia)"); + + cooked("Here's a tweet:\nhttps://twitter.com/evil_trout/status/345954894420787200", + "

    Here's a tweet:
    https://twitter.com/evil_trout/status/345954894420787200

    ", + "It doesn't strip the new line."); + + cooked("[3]: http://eviltrout.com", "", "It doesn't autolink markdown link references"); + + cooked("http://discourse.org and http://discourse.org/another_url and http://www.imdb.com/name/nm2225369", + "

    http://discourse.org and " + + "http://discourse.org/another_url and " + + "http://www.imdb.com/name/nm2225369

    ", + 'allows multiple links on one line'); + }); test("Quotes", function() { cookedOptions("1[quote=\"bob, post:1\"]my quote[/quote]2", { topicId: 2, lookupAvatar: function(name) { return "" + name; } }, - "

    1

    \n

    \n\n

    2

    ", + "

    1


    2

    ", "handles quotes properly"); cookedOptions("1[quote=\"bob, post:1\"]my quote[/quote]2", { topicId: 2, lookupAvatar: function(name) { } }, - "

    1

    \n

    \n\n

    2

    ", + "

    1


    2

    ", "includes no avatar if none is found"); }); test("Mentions", function() { cookedOptions("Hello @sam", { mentionLookup: (function() { return true; }) }, - "

    Hello @sam

    ", + "

    Hello @sam

    ", "translates mentions to links"); - cooked("Hello @EvilTrout", "

    Hello @EvilTrout

    ", "adds a mention class"); + cooked("Hello @EvilTrout", "

    Hello @EvilTrout

    ", "adds a mention class"); cooked("robin@email.host", "

    robin@email.host

    ", "won't add mention class to an email address"); cooked("hanzo55@yahoo.com", "

    hanzo55@yahoo.com

    ", "won't be affected by email addresses that have a number before the @ symbol"); - cooked("@EvilTrout yo", "

    @EvilTrout yo

    ", "doesn't do @username mentions inside
     or  blocks");
    +  cooked("@EvilTrout yo", "

    @EvilTrout yo

    ", "it handles mentions at the beginning of a string"); + cooked("yo\n@EvilTrout", "

    yo
    @EvilTrout

    ", "it handles mentions at the beginning of a new line"); cooked("`evil` @EvilTrout `trout`", - "

    evil @EvilTrout trout

    ", + "

    evil @EvilTrout trout

    ", "deals correctly with multiple blocks"); + cooked("```\na @test\n```", "

    a @test

    ", "should not do mentions within a code block."); }); @@ -101,22 +121,59 @@ test("Oneboxing", function() { }; ok(!matches("- http://www.textfiles.com/bbs/MINDVOX/FORUMS/ethics\n\n- http://drupal.org", /onebox/), - "doesn't onebox a link within a list"); + "doesn't onebox a link within a list"); + ok(matches("http://test.com", /onebox/), "adds a onebox class to a link on its own line"); ok(matches("http://test.com\nhttp://test2.com", /onebox[\s\S]+onebox/m), "supports multiple links"); ok(!matches("http://test.com bob", /onebox/), "doesn't onebox links that have trailing text"); cooked("http://en.wikipedia.org/wiki/Homicide:_Life_on_the_Street", - "

    http://en.wikipedia.org/wiki/Homicide:_Life_on_the_Street

    ", "works with links that have underscores in them"); }); +test("Code Blocks", function() { + + cooked("```\ntest\n```", + "

    test

    ", + "it supports basic code blocks"); + + cooked("```json\n{hello: 'world'}\n```\ntrailing", + "

    {hello: 'world'}

    \n\n

    \ntrailing

    ", + "It does not truncate text after a code block."); + + cooked("```json\nline 1\n\nline 2\n\n\nline3\n```", + "

    line 1\n\nline 2\n\n\nline3

    ", + "it maintains new lines inside a code block."); + + cooked("hello\nworld\n```json\nline 1\n\nline 2\n\n\nline3\n```", + "

    hello
    world

    \n\n

    line 1\n\nline 2\n\n\nline3

    ", + "it maintains new lines inside a code block with leading content."); + + cooked("```text\n
    hello
    \n```", + "

    <header>hello</header>

    ", + "it escapes code in the code block"); + + cooked("```ruby\n# cool\n```", + "

    # cool

    ", + "it supports changing the language"); + + cooked(" ```\n hello\n ```", + "
    ```\nhello\n```
    ", + "only detect ``` at the begining of lines"); +}); + test("SanitizeHTML", function() { equal(sanitizeHtml("
    "), "
    "); equal(sanitizeHtml("

    hello

    "), "

    hello

    "); + cooked("hello", "

    hello

    ", "it sanitizes while cooking"); + + cooked("disney reddit", + "

    disney reddit

    ", + "we can embed proper links"); }); diff --git a/vendor/gems/discourse_emoji/vendor/assets/javascripts/discourse_emoji.js b/vendor/gems/discourse_emoji/vendor/assets/javascripts/discourse_emoji.js index 0924cb15d0e..9f972d323fb 100644 --- a/vendor/gems/discourse_emoji/vendor/assets/javascripts/discourse_emoji.js +++ b/vendor/gems/discourse_emoji/vendor/assets/javascripts/discourse_emoji.js @@ -2,20 +2,19 @@ var emoji = ["+1", "-1", "100", "1234", "8ball", "a", "ab", "abc", "abcd", "accept", "aerial_tramway", "airplane", "alarm_clock", "alien", "ambulance", "anchor", "angel", "anger", "angry", "anguished", "ant", "apple", "aquarius", "aries", "arrow_backward", "arrow_double_down", "arrow_double_up", "arrow_down", "arrow_down_small", "arrow_forward", "arrow_heading_down", "arrow_heading_up", "arrow_left", "arrow_lower_left", "arrow_lower_right", "arrow_right", "arrow_right_hook", "arrow_up", "arrow_up_down", "arrow_up_small", "arrow_upper_left", "arrow_upper_right", "arrows_clockwise", "arrows_counterclockwise", "art", "articulated_lorry", "astonished", "athletic_shoe", "atm", "b", "baby", "baby_bottle", "baby_chick", "baby_symbol", "back", "baggage_claim", "balloon", "ballot_box_with_check", "bamboo", "banana", "bangbang", "bank", "bar_chart", "barber", "baseball", "basketball", "bath", "bathtub", "battery", "bear", "bee", "beer", "beers", "beetle", "beginner", "bell", "bento", "bicyclist", "bike", "bikini", "bird", "birthday", "black_circle", "black_joker", "black_large_square", "black_medium_small_square", "black_medium_square", "black_nib", "black_small_square", "black_square_button", "blossom", "blowfish", "blue_book", "blue_car", "blue_heart", "blush", "boar", "boat", "bomb", "book", "bookmark", "bookmark_tabs", "books", "boom", "boot", "bouquet", "bow", "bowling", "bowtie", "boy", "bread", "bride_with_veil", "bridge_at_night", "briefcase", "broken_heart", "bug", "bulb", "bullettrain_front", "bullettrain_side", "bus", "busstop", "bust_in_silhouette", "busts_in_silhouette", "cactus", "cake", "calendar", "calling", "camel", "camera", "cancer", "candy", "capital_abcd", "capricorn", "car", "card_index", "carousel_horse", "cat", "cat2", "cd", "chart", "chart_with_downwards_trend", "chart_with_upwards_trend", "checkered_flag", "cherries", "cherry_blossom", "chestnut", "chicken", "children_crossing", "chocolate_bar", "christmas_tree", "church", "cinema", "circus_tent", "city_sunrise", "city_sunset", "cl", "clap", "clapper", "clipboard", "clock1", "clock10", "clock1030", "clock11", "clock1130", "clock12", "clock1230", "clock130", "clock2", "clock230", "clock3", "clock330", "clock4", "clock430", "clock5", "clock530", "clock6", "clock630", "clock7", "clock730", "clock8", "clock830", "clock9", "clock930", "closed_book", "closed_lock_with_key", "closed_umbrella", "cloud", "clubs", "cn", "cocktail", "coffee", "cold_sweat", "collision", "computer", "confetti_ball", "confounded", "confused", "congratulations", "construction", "construction_worker", "convenience_store", "cookie", "cool", "cop", "copyright", "corn", "couple", "couple_with_heart", "couplekiss", "cow", "cow2", "credit_card", "crescent_moon", "crocodile", "crossed_flags", "crown", "cry", "crying_cat_face", "crystal_ball", "cupid", "curly_loop", "currency_exchange", "curry", "custard", "customs", "cyclone", "dancer", "dancers", "dango", "dart", "dash", "date", "de", "deciduous_tree", "department_store", "diamond_shape_with_a_dot_inside", "diamonds", "disappointed", "disappointed_relieved", "dizzy", "dizzy_face", "do_not_litter", "dog", "dog2", "dollar", "dolls", "dolphin", "door", "doughnut", "dragon", "dragon_face", "dress", "dromedary_camel", "droplet", "dvd", "e-mail", "ear", "ear_of_rice", "earth_africa", "earth_americas", "earth_asia", "egg", "eggplant", "eight", "eight_pointed_black_star", "eight_spoked_asterisk", "electric_plug", "elephant", "email", "end", "envelope", "envelope_with_arrow", "es", "euro", "european_castle", "european_post_office", "evergreen_tree", "exclamation", "expressionless", "eyeglasses", "eyes", "facepunch", "factory", "fallen_leaf", "family", "fast_forward", "fax", "fearful", "feelsgood", "feet", "ferris_wheel", "file_folder", "finnadie", "fire", "fire_engine", "fireworks", "first_quarter_moon", "first_quarter_moon_with_face", "fish", "fish_cake", "fishing_pole_and_fish", "fist", "five", "flags", "flashlight", "flipper", "floppy_disk", "flower_playing_cards", "flushed", "foggy", "football", "footprints", "fork_and_knife", "fountain", "four", "four_leaf_clover", "fr", "free", "fried_shrimp", "fries", "frog", "frowning", "fu", "fuelpump", "full_moon", "full_moon_with_face", "game_die", "gb", "gem", "gemini", "ghost", "gift", "gift_heart", "girl", "globe_with_meridians", "goat", "goberserk", "godmode", "golf", "grapes", "green_apple", "green_book", "green_heart", "grey_exclamation", "grey_question", "grimacing", "grin", "grinning", "guardsman", "guitar", "gun", "haircut", "hamburger", "hammer", "hamster", "hand", "handbag", "hankey", "hash", "hatched_chick", "hatching_chick", "headphones", "hear_no_evil", "heart", "heart_decoration", "heart_eyes", "heart_eyes_cat", "heartbeat", "heartpulse", "hearts", "heavy_check_mark", "heavy_division_sign", "heavy_dollar_sign", "heavy_exclamation_mark", "heavy_minus_sign", "heavy_multiplication_x", "heavy_plus_sign", "helicopter", "herb", "hibiscus", "high_brightness", "high_heel", "hocho", "honey_pot", "honeybee", "horse", "horse_racing", "hospital", "hotel", "hotsprings", "hourglass", "hourglass_flowing_sand", "house", "house_with_garden", "hurtrealbad", "hushed", "ice_cream", "icecream", "id", "ideograph_advantage", "imp", "inbox_tray", "incoming_envelope", "information_desk_person", "information_source", "innocent", "interrobang", "iphone", "it", "izakaya_lantern", "jack_o_lantern", "japan", "japanese_castle", "japanese_goblin", "japanese_ogre", "jeans", "joy", "joy_cat", "jp", "key", "keycap_ten", "kimono", "kiss", "kissing", "kissing_cat", "kissing_closed_eyes", "kissing_heart", "kissing_smiling_eyes", "koala", "koko", "kr", "lantern", "large_blue_circle", "large_blue_diamond", "large_orange_diamond", "last_quarter_moon", "last_quarter_moon_with_face", "laughing", "leaves", "ledger", "left_luggage", "left_right_arrow", "leftwards_arrow_with_hook", "lemon", "leo", "leopard", "libra", "light_rail", "link", "lips", "lipstick", "lock", "lock_with_ink_pen", "lollipop", "loop", "loudspeaker", "love_hotel", "love_letter", "low_brightness", "m", "mag", "mag_right", "mahjong", "mailbox", "mailbox_closed", "mailbox_with_mail", "mailbox_with_no_mail", "man", "man_with_gua_pi_mao", "man_with_turban", "mans_shoe", "maple_leaf", "mask", "massage", "meat_on_bone", "mega", "melon", "memo", "mens", "metal", "metro", "microphone", "microscope", "milky_way", "minibus", "minidisc", "mobile_phone_off", "money_with_wings", "moneybag", "monkey", "monkey_face", "monorail", "moon", "mortar_board", "mount_fuji", "mountain_bicyclist", "mountain_cableway", "mountain_railway", "mouse", "mouse2", "movie_camera", "moyai", "muscle", "mushroom", "musical_keyboard", "musical_note", "musical_score", "mute", "nail_care", "name_badge", "neckbeard", "necktie", "negative_squared_cross_mark", "neutral_face", "new", "new_moon", "new_moon_with_face", "newspaper", "ng", "nine", "no_bell", "no_bicycles", "no_entry", "no_entry_sign", "no_good", "no_mobile_phones", "no_mouth", "no_pedestrians", "no_smoking", "non-potable_water", "nose", "notebook", "notebook_with_decorative_cover", "notes", "nut_and_bolt", "o", "o2", "ocean", "octocat", "octopus", "oden", "office", "ok", "ok_hand", "ok_woman", "older_man", "older_woman", "on", "oncoming_automobile", "oncoming_bus", "oncoming_police_car", "oncoming_taxi", "one", "open_book", "open_file_folder", "open_hands", "open_mouth", "ophiuchus", "orange_book", "outbox_tray", "ox", "package", "page_facing_up", "page_with_curl", "pager", "palm_tree", "panda_face", "paperclip", "parking", "part_alternation_mark", "partly_sunny", "passport_control", "paw_prints", "peach", "pear", "pencil", "pencil2", "penguin", "pensive", "performing_arts", "persevere", "person_frowning", "person_with_blond_hair", "person_with_pouting_face", "phone", "pig", "pig2", "pig_nose", "pill", "pineapple", "pisces", "pizza", "point_down", "point_left", "point_right", "point_up", "point_up_2", "police_car", "poodle", "poop", "post_office", "postal_horn", "postbox", "potable_water", "pouch", "poultry_leg", "pound", "pouting_cat", "pray", "princess", "punch", "purple_heart", "purse", "pushpin", "put_litter_in_its_place", "question", "rabbit", "rabbit2", "racehorse", "radio", "radio_button", "rage", "rage1", "rage2", "rage3", "rage4", "railway_car", "rainbow", "raised_hand", "raised_hands", "raising_hand", "ram", "ramen", "rat", "recycle", "red_car", "red_circle", "registered", "relaxed", "relieved", "repeat", "repeat_one", "restroom", "revolving_hearts", "rewind", "ribbon", "rice", "rice_ball", "rice_cracker", "rice_scene", "ring", "rocket", "roller_coaster", "rooster", "rose", "rotating_light", "round_pushpin", "rowboat", "ru", "rugby_football", "runner", "running", "running_shirt_with_sash", "sa", "sagittarius", "sailboat", "sake", "sandal", "santa", "satellite", "satisfied", "saxophone", "school", "school_satchel", "scissors", "scorpius", "scream", "scream_cat", "scroll", "seat", "secret", "see_no_evil", "seedling", "seven", "shaved_ice", "sheep", "shell", "ship", "shipit", "shirt", "shit", "shoe", "shower", "signal_strength", "six", "six_pointed_star", "ski", "skull", "sleeping", "sleepy", "slot_machine", "small_blue_diamond", "small_orange_diamond", "small_red_triangle", "small_red_triangle_down", "smile", "smile_cat", "smiley", "smiley_cat", "smiling_imp", "smirk", "smirk_cat", "smoking", "snail", "snake", "snowboarder", "snowflake", "snowman", "sob", "soccer", "soon", "sos", "sound", "space_invader", "spades", "spaghetti", "sparkle", "sparkler", "sparkles", "sparkling_heart", "speak_no_evil", "speaker", "speech_balloon", "speedboat", "squirrel", "star", "star2", "stars", "station", "statue_of_liberty", "steam_locomotive", "stew", "straight_ruler", "strawberry", "stuck_out_tongue", "stuck_out_tongue_closed_eyes", "stuck_out_tongue_winking_eye", "sun_with_face", "sunflower", "sunglasses", "sunny", "sunrise", "sunrise_over_mountains", "surfer", "sushi", "suspect", "suspension_railway", "sweat", "sweat_drops", "sweat_smile", "sweet_potato", "swimmer", "symbols", "syringe", "tada", "tanabata_tree", "tangerine", "taurus", "taxi", "tea", "telephone", "telephone_receiver", "telescope", "tennis", "tent", "thought_balloon", "three", "thumbsdown", "thumbsup", "ticket", "tiger", "tiger2", "tired_face", "tm", "toilet", "tokyo_tower", "tomato", "tongue", "top", "tophat", "tractor", "traffic_light", "train", "train2", "tram", "triangular_flag_on_post", "triangular_ruler", "trident", "triumph", "trolleybus", "trollface", "trophy", "tropical_drink", "tropical_fish", "truck", "trumpet", "tshirt", "tulip", "turtle", "tv", "twisted_rightwards_arrows", "two", "two_hearts", "two_men_holding_hands", "two_women_holding_hands", "u5272", "u5408", "u55b6", "u6307", "u6708", "u6709", "u6e80", "u7121", "u7533", "u7981", "u7a7a", "uk", "umbrella", "unamused", "underage", "unlock", "up", "us", "v", "vertical_traffic_light", "vhs", "vibration_mode", "video_camera", "video_game", "violin", "virgo", "volcano", "vs", "walking", "waning_crescent_moon", "waning_gibbous_moon", "warning", "watch", "water_buffalo", "watermelon", "wave", "wavy_dash", "waxing_crescent_moon", "waxing_gibbous_moon", "wc", "weary", "wedding", "whale", "whale2", "wheelchair", "white_check_mark", "white_circle", "white_flower", "white_large_square", "white_medium_small_square", "white_medium_square", "white_small_square", "white_square_button", "wind_chime", "wine_glass", "wink", "wolf", "woman", "womans_clothes", "womans_hat", "womens", "worried", "wrench", "x", "yellow_heart", "yen", "yum", "zap", "zero", "zzz"] - // Regiest a before cook event - Discourse.Markdown.on("beforeCook", function(event) { - var text = this.textResult || event.detail; - var opts = event.opts; + Discourse.Dialect.on("register", function(event) { + var dialect = event.dialect, + MD = event.MD; - this.textResult = text.replace(/\:([a-z\_\+\-0-9]+)\:/g, function (m1, m2) { - var url = Discourse.getURL('/assets/emoji/' + m2 + '.png'); - return (emoji.indexOf(m2) !== -1) ? - '' + m2 + '' : - m1; - }); + dialect.inline[":"] = function(text, orig_match) { + var m = /\:([a-z\_\+\-0-9]+)\:/.exec(text); + if (m && (emoji.indexOf(m[1]) !== -1)) { + var url = Discourse.getURL('/assets/emoji/' + m[1] + '.png'); + return [m[0].length, ['img', {href: url, title: ':' + m[1] + ':', 'class': 'emoji', alt: m[1]}] ]; + } + }; }); - if (Discourse && Discourse.ComposerView) { Discourse.ComposerView.on("initWmdEditor", function(event){