mirror of
https://github.com/discourse/discourse.git
synced 2024-11-23 02:19:27 +08:00
PERF: improve findAllMatches
speed (#22083)
When we introduced unicode support in the regular expressions used in watched words (9a27803
) we didn't realize the cost adding the `u` flag would be.
Turns out, it's pretty bad when you have lots of regular expressions to test. A customer had slightly less than 200 watched words, and it would freeze the browser for about 2s on the first check of those regular expressions (roughly 10ms per regular expression).
This commit introduces a new field (`word`) to the serialized watched words which is then converted to a very fast and cheap regular expression on the client-side. We use that regexp to quicly check whether a matcher is even worth trying so that we don't incure the cost of compiling the expensive unicode regexp.
This commit also busts the `WordWatcher` cache since we added a new field to be serialized.
One nice side effect of using `matchAll` instead of a `while / exec` loop is that the likeliness of having a bad regexp matching infinitely is vastly reduced 🙌
This commit is contained in:
parent
367b3be035
commit
4cb3412a56
|
@ -1683,6 +1683,7 @@ var bar = 'bar';
|
|||
const opts = {
|
||||
watchedWordsReplace: {
|
||||
"(?:\\W|^)(fun)(?=\\W|$)": {
|
||||
word: "fun",
|
||||
replacement: "times",
|
||||
case_sensitive: false,
|
||||
},
|
||||
|
@ -1697,6 +1698,7 @@ var bar = 'bar';
|
|||
const opts = {
|
||||
watchedWordsLink: {
|
||||
"(?:\\W|^)(fun)(?=\\W|$)": {
|
||||
word: "fun",
|
||||
replacement: "https://discourse.org",
|
||||
case_sensitive: false,
|
||||
},
|
||||
|
@ -1711,18 +1713,21 @@ var bar = 'bar';
|
|||
});
|
||||
|
||||
test("watched words replace with bad regex", function (assert) {
|
||||
const maxMatches = 100; // same limit as MD watched-words-replace plugin
|
||||
const opts = {
|
||||
siteSettings: { watched_words_regular_expressions: true },
|
||||
watchedWordsReplace: {
|
||||
"(\\bu?\\b)": { replacement: "you", case_sensitive: false },
|
||||
"(\\bu?\\b)": {
|
||||
word: "(\\bu?\\b)",
|
||||
replacement: "you",
|
||||
case_sensitive: false,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
assert.cookedOptions(
|
||||
"one",
|
||||
opts,
|
||||
`<p>${"you".repeat(maxMatches)}one</p>`,
|
||||
`<p>youoneyou</p>`,
|
||||
"does not loop infinitely"
|
||||
);
|
||||
});
|
||||
|
|
|
@ -16,22 +16,26 @@ function isLinkClose(str) {
|
|||
function findAllMatches(text, matchers) {
|
||||
const matches = [];
|
||||
|
||||
let count = 0;
|
||||
|
||||
matchers.forEach((matcher) => {
|
||||
let match;
|
||||
while (
|
||||
(match = matcher.pattern.exec(text)) !== null &&
|
||||
count++ < MAX_MATCHES
|
||||
) {
|
||||
matches.push({
|
||||
index: match.index + match[0].indexOf(match[1]),
|
||||
text: match[1],
|
||||
replacement: matcher.replacement,
|
||||
link: matcher.link,
|
||||
});
|
||||
for (const { word, pattern, replacement, link } of matchers) {
|
||||
if (matches.length >= MAX_MATCHES) {
|
||||
break;
|
||||
}
|
||||
});
|
||||
|
||||
if (word.test(text)) {
|
||||
for (const match of text.matchAll(pattern)) {
|
||||
matches.push({
|
||||
index: match.index + match[0].indexOf(match[1]),
|
||||
text: match[1],
|
||||
replacement,
|
||||
link,
|
||||
});
|
||||
|
||||
if (matches.length >= MAX_MATCHES) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return matches.sort((a, b) => a.index - b.index);
|
||||
}
|
||||
|
@ -52,11 +56,12 @@ export function setup(helper) {
|
|||
const matchers = [];
|
||||
|
||||
if (md.options.discourse.watchedWordsReplace) {
|
||||
Object.entries(md.options.discourse.watchedWordsReplace).map(
|
||||
Object.entries(md.options.discourse.watchedWordsReplace).forEach(
|
||||
([regexpString, options]) => {
|
||||
const word = toWatchedWord({ [regexpString]: options });
|
||||
|
||||
matchers.push({
|
||||
word: new RegExp(options.word, options.case_sensitive ? "" : "i"),
|
||||
pattern: createWatchedWordRegExp(word),
|
||||
replacement: options.replacement,
|
||||
link: false,
|
||||
|
@ -66,11 +71,12 @@ export function setup(helper) {
|
|||
}
|
||||
|
||||
if (md.options.discourse.watchedWordsLink) {
|
||||
Object.entries(md.options.discourse.watchedWordsLink).map(
|
||||
Object.entries(md.options.discourse.watchedWordsLink).forEach(
|
||||
([regexpString, options]) => {
|
||||
const word = toWatchedWord({ [regexpString]: options });
|
||||
|
||||
matchers.push({
|
||||
word: new RegExp(options.word, options.case_sensitive ? "" : "i"),
|
||||
pattern: createWatchedWordRegExp(word),
|
||||
replacement: options.replacement,
|
||||
link: true,
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
class WordWatcher
|
||||
REPLACEMENT_LETTER ||= CGI.unescape_html("■")
|
||||
CACHE_VERSION = 2
|
||||
CACHE_VERSION ||= 3
|
||||
|
||||
def initialize(raw)
|
||||
@raw = raw
|
||||
|
@ -24,8 +24,9 @@ class WordWatcher
|
|||
.limit(WatchedWord::MAX_WORDS_PER_ACTION)
|
||||
.order(:id)
|
||||
.pluck(:word, :replacement, :case_sensitive)
|
||||
.map { |w, r, c| [w, { replacement: r, case_sensitive: c }.compact] }
|
||||
.to_h
|
||||
.to_h do |w, r, c|
|
||||
[w, { word: word_to_regexp(w, whole: false), replacement: r, case_sensitive: c }.compact]
|
||||
end
|
||||
end
|
||||
|
||||
def self.words_for_action_exists?(action)
|
||||
|
@ -78,9 +79,7 @@ class WordWatcher
|
|||
end
|
||||
|
||||
def self.word_matcher_regexps(action, engine: :ruby)
|
||||
if words = get_cached_words(action)
|
||||
words.map { |word, attrs| [word_to_regexp(word, engine: engine), attrs] }.to_h
|
||||
end
|
||||
get_cached_words(action)&.to_h { |word, attrs| [word_to_regexp(word, engine: engine), attrs] }
|
||||
end
|
||||
|
||||
def self.word_to_regexp(word, engine: :ruby, whole: true)
|
||||
|
|
|
@ -21,9 +21,11 @@ RSpec.describe WordWatcher do
|
|||
expect(described_class.words_for_action(:block)).to include(
|
||||
word1 => {
|
||||
case_sensitive: false,
|
||||
word: word1,
|
||||
},
|
||||
word2 => {
|
||||
case_sensitive: true,
|
||||
word: word2,
|
||||
},
|
||||
)
|
||||
end
|
||||
|
@ -40,6 +42,7 @@ RSpec.describe WordWatcher do
|
|||
word => {
|
||||
case_sensitive: false,
|
||||
replacement: "http://test.localhost/",
|
||||
word: word,
|
||||
},
|
||||
)
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue
Block a user