mirror of
https://github.com/discourse/discourse.git
synced 2025-01-22 19:03:01 +08:00
862007fb18
* FEATURE: Add case-sensitivity flag to watched_words Currently, all watched words are matched case-insensitively. This flag allows a watched word to be flagged for case-sensitive matching. To allow allow for backwards compatibility the flag is set to false by default. * FEATURE: Support case-sensitive creation of Watched Words via API Extend admin creation and upload of Watched Words to support case sensitive flag. This lays the ground work for supporting case-insensitive matching of Watched Words. Support for an extra column has also been introduced for the Watched Words upload CSV file. The new column structure is as follows: word,replacement,case_sentive * FEATURE: Enable case-sensitive matching of Watched Words WordWatcher's word_matcher_regexp now returns a list of regular expressions instead of one case-insensitive regular expression. With the ability to flag a Watched Word as case-sensitive, an action can have words of both sensitivities.This makes the use of the global Regexp::IGNORECASE flag added to all words problematic. To get around platform limitations around the use of subexpression level switches/flags, a list of regular expressions is returned instead, one for each case sensitivity. Word matching has also been updated to use this list of regular expressions instead of one. * FEATURE: Use case-sensitive regular expressions for Watched Words Update Watched Words regular expressions matching and processing to handle the extra metadata which comes along with the introduction of case-sensitive Watched Words. This allows case-sensitive Watched Words to matched as such. * DEV: Simplify type casting of case-sensitive flag from uploads Use builtin semantics instead of a custom method for converting string case flags in uploaded Watched Words to boolean. * UX: Add case-sensitivity details to Admin Watched Words UI Update Watched Word form to include a toggle for case-sensitivity. This also adds support for, case-sensitive testing and matching of Watched Word in the admin UI. * DEV: Code improvements from review feedback - Extract watched word regex creation out to a utility function - Make JS array presence check more explicit and readable * DEV: Extract Watched Word regex creation to utility function Clean-up work from review feedback. Reduce code duplication. * DEV: Rename word_matcher_regexp to word_matcher_regexp_list Since a list is returned now instead of a single regular expression, change `word_matcher_regexp` to `word_matcher_regexp_list` to better communicate this change. * DEV: Incorporate WordWatcher updates from upstream Resolve conflicts and ensure apply_to_text does not remove non-word characters in matches that aren't at the beginning of the line.
245 lines
6.9 KiB
JavaScript
245 lines
6.9 KiB
JavaScript
import {
|
|
createWatchedWordRegExp,
|
|
toWatchedWord,
|
|
} from "discourse-common/utils/watched-words";
|
|
|
|
const MAX_MATCHES = 100;
|
|
|
|
function isLinkOpen(str) {
|
|
return /^<a[>\s]/i.test(str);
|
|
}
|
|
|
|
function isLinkClose(str) {
|
|
return /^<\/a\s*>/i.test(str);
|
|
}
|
|
|
|
function findAllMatches(text, matchers) {
|
|
const matches = [];
|
|
|
|
let count = 0;
|
|
|
|
matchers.forEach((matcher) => {
|
|
let match;
|
|
while (
|
|
(match = matcher.pattern.exec(text)) !== null &&
|
|
count++ < MAX_MATCHES
|
|
) {
|
|
matches.push({
|
|
index: match.index + match[0].indexOf(match[1]),
|
|
text: match[1],
|
|
replacement: matcher.replacement,
|
|
link: matcher.link,
|
|
});
|
|
}
|
|
});
|
|
|
|
return matches.sort((a, b) => a.index - b.index);
|
|
}
|
|
|
|
// We need this to load after mentions and hashtags which are priority 0
|
|
export const priority = 1;
|
|
|
|
const NONE = 0;
|
|
const MENTION = 1;
|
|
const HASHTAG_LINK = 2;
|
|
const HASHTAG_SPAN = 3;
|
|
|
|
export function setup(helper) {
|
|
const opts = helper.getOptions();
|
|
|
|
helper.registerPlugin((md) => {
|
|
const matchers = [];
|
|
|
|
if (md.options.discourse.watchedWordsReplace) {
|
|
Object.entries(md.options.discourse.watchedWordsReplace).map(
|
|
([regexpString, options]) => {
|
|
const word = toWatchedWord({ [regexpString]: options });
|
|
|
|
matchers.push({
|
|
pattern: createWatchedWordRegExp(word),
|
|
replacement: options.replacement,
|
|
link: false,
|
|
});
|
|
}
|
|
);
|
|
}
|
|
|
|
if (md.options.discourse.watchedWordsLink) {
|
|
Object.entries(md.options.discourse.watchedWordsLink).map(
|
|
([regexpString, options]) => {
|
|
const word = toWatchedWord({ [regexpString]: options });
|
|
|
|
matchers.push({
|
|
pattern: createWatchedWordRegExp(word),
|
|
replacement: options.replacement,
|
|
link: true,
|
|
});
|
|
}
|
|
);
|
|
}
|
|
|
|
if (matchers.length === 0) {
|
|
return;
|
|
}
|
|
|
|
const cache = new Map();
|
|
|
|
md.core.ruler.push("watched-words", (state) => {
|
|
for (let j = 0, l = state.tokens.length; j < l; j++) {
|
|
if (state.tokens[j].type !== "inline") {
|
|
continue;
|
|
}
|
|
|
|
let tokens = state.tokens[j].children;
|
|
|
|
let htmlLinkLevel = 0;
|
|
|
|
// We scan once to mark tokens that must be skipped because they are
|
|
// mentions or hashtags
|
|
let lastType = NONE;
|
|
for (let i = 0; i < tokens.length; ++i) {
|
|
const currentToken = tokens[i];
|
|
|
|
if (currentToken.type === "mention_open") {
|
|
lastType = MENTION;
|
|
} else if (
|
|
(currentToken.type === "link_open" ||
|
|
currentToken.type === "span_open") &&
|
|
currentToken.attrs &&
|
|
currentToken.attrs.some(
|
|
(attr) => attr[0] === "class" && attr[1] === "hashtag"
|
|
)
|
|
) {
|
|
lastType =
|
|
currentToken.type === "link_open" ? HASHTAG_LINK : HASHTAG_SPAN;
|
|
}
|
|
|
|
if (lastType !== NONE) {
|
|
currentToken.skipReplace = true;
|
|
}
|
|
|
|
if (
|
|
(lastType === MENTION && currentToken.type === "mention_close") ||
|
|
(lastType === HASHTAG_LINK && currentToken.type === "link_close") ||
|
|
(lastType === HASHTAG_SPAN && currentToken.type === "span_close")
|
|
) {
|
|
lastType = NONE;
|
|
}
|
|
}
|
|
|
|
// We scan from the end, to keep position when new tags added.
|
|
// Use reversed logic in links start/end match
|
|
for (let i = tokens.length - 1; i >= 0; i--) {
|
|
const currentToken = tokens[i];
|
|
|
|
// Skip content of markdown links
|
|
if (currentToken.type === "link_close") {
|
|
i--;
|
|
while (
|
|
tokens[i].level !== currentToken.level &&
|
|
tokens[i].type !== "link_open"
|
|
) {
|
|
i--;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
// Skip content of html tag links
|
|
if (currentToken.type === "html_inline") {
|
|
if (isLinkOpen(currentToken.content) && htmlLinkLevel > 0) {
|
|
htmlLinkLevel--;
|
|
}
|
|
|
|
if (isLinkClose(currentToken.content)) {
|
|
htmlLinkLevel++;
|
|
}
|
|
}
|
|
|
|
// Skip content of mentions or hashtags
|
|
if (currentToken.skipReplace) {
|
|
continue;
|
|
}
|
|
|
|
if (currentToken.type === "text") {
|
|
const text = currentToken.content;
|
|
|
|
let matches;
|
|
if (cache.has(text)) {
|
|
matches = cache.get(text);
|
|
} else {
|
|
matches = findAllMatches(text, matchers);
|
|
cache.set(text, matches);
|
|
}
|
|
|
|
// Now split string to nodes
|
|
const nodes = [];
|
|
let level = currentToken.level;
|
|
let lastPos = 0;
|
|
|
|
let token;
|
|
for (let ln = 0; ln < matches.length; ln++) {
|
|
if (matches[ln].index < lastPos) {
|
|
continue;
|
|
}
|
|
|
|
if (matches[ln].index > lastPos) {
|
|
token = new state.Token("text", "", 0);
|
|
token.content = text.slice(lastPos, matches[ln].index);
|
|
token.level = level;
|
|
nodes.push(token);
|
|
}
|
|
|
|
if (matches[ln].link) {
|
|
const url = state.md.normalizeLink(matches[ln].replacement);
|
|
if (htmlLinkLevel === 0 && state.md.validateLink(url)) {
|
|
token = new state.Token("link_open", "a", 1);
|
|
token.attrs = [["href", url]];
|
|
if (opts.discourse.previewing) {
|
|
token.attrs.push(["data-word", ""]);
|
|
}
|
|
token.level = level++;
|
|
token.markup = "linkify";
|
|
token.info = "auto";
|
|
nodes.push(token);
|
|
|
|
token = new state.Token("text", "", 0);
|
|
token.content = matches[ln].text;
|
|
token.level = level;
|
|
nodes.push(token);
|
|
|
|
token = new state.Token("link_close", "a", -1);
|
|
token.level = --level;
|
|
token.markup = "linkify";
|
|
token.info = "auto";
|
|
nodes.push(token);
|
|
}
|
|
} else {
|
|
token = new state.Token("text", "", 0);
|
|
token.content = matches[ln].replacement;
|
|
token.level = level;
|
|
nodes.push(token);
|
|
}
|
|
|
|
lastPos = matches[ln].index + matches[ln].text.length;
|
|
}
|
|
|
|
if (lastPos < text.length) {
|
|
token = new state.Token("text", "", 0);
|
|
token.content = text.slice(lastPos);
|
|
token.level = level;
|
|
nodes.push(token);
|
|
}
|
|
|
|
// replace current node
|
|
state.tokens[j].children = tokens = md.utils.arrayReplaceAt(
|
|
tokens,
|
|
i,
|
|
nodes
|
|
);
|
|
}
|
|
}
|
|
}
|
|
});
|
|
});
|
|
}
|