From fd4aea7bc549f977da2974b96597d77a9b14c965 Mon Sep 17 00:00:00 2001 From: Sam Date: Tue, 25 Apr 2023 11:28:32 +1000 Subject: [PATCH] FIX: bbcode URLs not handling paths correctly (#21215) Due to the order we were parsing markdown, bbcode [url] elements were not handled properly. `[url]https://example.com/path[/url]` was not currectly parsing cause linkify was detecting the url as: `https://example.com/path[/url]` which is legit. To resolve this I swapped url to use a replace rule, and instead re-parsed the internal payload and injected the tokens in. This fix is complex cause we support stuff like `[url][b]test.com[/b][/url]` So we need to parse the content inside url `[b]test.com[/b]` --- .../tests/unit/lib/pretty-text-test.js | 20 +++- .../discourse-markdown/bbcode-inline.js | 106 ++++++++++++------ 2 files changed, 88 insertions(+), 38 deletions(-) diff --git a/app/assets/javascripts/discourse/tests/unit/lib/pretty-text-test.js b/app/assets/javascripts/discourse/tests/unit/lib/pretty-text-test.js index 5886c320874..7f3cd646666 100644 --- a/app/assets/javascripts/discourse/tests/unit/lib/pretty-text-test.js +++ b/app/assets/javascripts/discourse/tests/unit/lib/pretty-text-test.js @@ -989,6 +989,24 @@ eviltrout

'

discourse

', "named links are properly parsed" ); + + assert.cooked( + "[url]https://discourse.org/path[/url]", + '

https://discourse.org/path

', + "paths are correctly handled" + ); + + assert.cooked( + "[url]discourse.org/path[/url]", + '

discourse.org/path

', + "paths are correctly handled" + ); + + assert.cooked( + "[url][b]discourse.org/path[/b][/url]", + '

discourse.org/path

', + "paths are correctly handled" + ); }); test("images", function (assert) { @@ -1216,7 +1234,7 @@ eviltrout

); assert.cookedPara( "[url]abc.com[/url]", - 'abc.com', + 'abc.com', "it magically links using linkify" ); assert.cookedPara( diff --git a/app/assets/javascripts/pretty-text/engines/discourse-markdown/bbcode-inline.js b/app/assets/javascripts/pretty-text/engines/discourse-markdown/bbcode-inline.js index 27a1a1d4284..681dadf943a 100644 --- a/app/assets/javascripts/pretty-text/engines/discourse-markdown/bbcode-inline.js +++ b/app/assets/javascripts/pretty-text/engines/discourse-markdown/bbcode-inline.js @@ -182,56 +182,88 @@ export function setup(helper) { const simpleUrlRegex = /^https?:\/\//; ruler.push("url", { tag: "url", - wrap(startToken, endToken, tagInfo, content, state) { - const url = (tagInfo.attrs["_default"] || content).trim(); - let linkifyFound = false; - if (state.md.options.linkify) { - const tokens = state.tokens; - const startIndex = tokens.indexOf(startToken); - const endIndex = tokens.indexOf(endToken); + replace(state, tagInfo, content) { + let token; - // reuse existing tokens from linkify if they exist - for (let index = startIndex + 1; index < endIndex; index++) { - const token = tokens[index]; + // we need to tokenize the content and reinsert tokens in the stream + // this is because we need to support nested bbcode + let tokens = []; + md.inline.parse(content, state.md, state.env, tokens); - if ( - token.markup === "linkify" && - token.info === "auto" && - token.type === "link_open" - ) { - linkifyFound = true; - token.attrs.push(["data-bbcode", "true"]); + let url = tagInfo.attrs["_default"]; + + if (!url) { + // try to find the actual url in the tokens + for (let i = 0; i < tokens.length; i++) { + token = tokens[i]; + // nested linkify or link, just pick it + if (token.type === "link_open") { + for (let j = 0; j < token.attrs.length; j++) { + if (token.attrs[j][0] === "href") { + url = token.attrs[j][1]; + break; + } + } + if (url) { + break; + } + } + if (token.type === "text") { + url = token.content; break; } } } - if (!linkifyFound && simpleUrlRegex.test(url)) { - startToken.type = "link_open"; - startToken.tag = "a"; - startToken.attrs = [ + if (md.linkify) { + let match = null; + + // linkify has trouble with strings containing spaces, so just ban + // them outright + if (url && !url.includes(" ")) { + match = md.linkify.matchAtStart(url); + if (!match) { + match = md.linkify.matchAtStart("https://" + url); + } + } + + if (match) { + url = match.url; + } else { + url = null; + } + } else if (!url.match(simpleUrlRegex)) { + url = "https://" + url; + } + + if (url) { + token = state.push("link_open", "a", 0); + token.attrs = ["href", url]; + token.attrs = [ ["href", url], ["data-bbcode", "true"], ]; - startToken.content = ""; - startToken.nesting = 1; - - endToken.type = "link_close"; - endToken.tag = "a"; - endToken.content = ""; - endToken.nesting = -1; - } else { - // just strip the bbcode tag - endToken.content = ""; - startToken.content = ""; - - // edge case, we don't want this detected as a onebox if auto linked - // this ensures it is not stripped - startToken.type = "html_inline"; + token.content = ""; + token.nesting = 1; } - return false; + for (let i = 0; i < tokens.length; i++) { + token = tokens[i]; + if (token.type === "link_open" || token.type === "link_close") { + // linkify nested tokens, do nothing + } else { + state.tokens.push(token); + } + } + + if (url) { + token = state.push("link_close", "a", 0); + token.nesting = -1; + token.content = ""; + } + + return true; }, });