FIX: better iframe sanitization

This improves the way we sanitize `iframe` and correctly strips them from the "raw" before cooking it.

Otherwise, we would show an empty iframe box.

Internal ref - t/131430
This commit is contained in:
Régis Hanol 2024-06-28 14:21:31 +02:00
parent 307b207e91
commit 8dc358e4d5
2 changed files with 25 additions and 2 deletions

View File

@ -77,6 +77,24 @@ module("Unit | Utility | sanitizer", function (hooks) {
"it allows iframe to OpenStreetMap"
);
cooked(
`BEFORE\n\n<iframe src=http://example.com>\n\nINSIDE\n\n</iframe>\n\nAFTER`,
`<p>BEFORE</p>\n\n<p>AFTER</p>`,
"it strips unauthorized iframes - unallowed src"
);
cooked(
`BEFORE\n\n<iframe src=''>\n\nINSIDE\n\n</iframe>\n\nAFTER`,
`<p>BEFORE</p>\n\n<p>AFTER</p>`,
"it strips unauthorized iframes - empty src"
);
cooked(
`BEFORE\n\n<iframe src='http://example.com'>\n\nAFTER`,
`<p>BEFORE</p>`,
"it strips unauthorized partial iframes"
);
assert.strictEqual(engine.sanitize("<textarea>hullo</textarea>"), "hullo");
assert.strictEqual(
engine.sanitize("<button>press me!</button>"),

View File

@ -1,6 +1,10 @@
import xss from "xss";
import escape from "discourse-common/lib/escape";
// Should match any <iframe> without a src attribute
const IFRAME_REGEXP =
/<iframe(?![^>]*\s+src\s*=)[^>]*>[\s\S]*?(<\/iframe\s*>|$)/gi;
function attr(name, value) {
if (value) {
return `${name}="${xss.escapeAttrValue(value)}"`;
@ -146,7 +150,8 @@ export function sanitize(text, allowLister) {
}
if (tag === "iframe" && name === "src") {
return "-STRIP-";
// This iframe is not allowed
return "";
}
if (tag === "video" && name === "autoplay") {
@ -176,7 +181,7 @@ export function sanitize(text, allowLister) {
return result
.replace(/\[removed\]/g, "")
.replace(/\<iframe[^>]+\-STRIP\-[^>]*>[^<]*<\/iframe>/g, "")
.replace(IFRAME_REGEXP, "")
.replace(/&(?![#\w]+;)/g, "&amp;")
.replace(/&#39;/g, "'")
.replace(/ \/>/g, ">");