mirror of
https://github.com/HabitRPG/habitica.git
synced 2025-12-16 06:07:21 +01:00
* fix(banned words): fix partial matching of words containing diacritics against banned words list (#12309) * lint: remove whitespace to fix error * test: add test to prevent partial matching of words containing diacritics against banned words list (#12309) * doc: add link to Unicode table of diacritical marks (#12309)
29 lines
1.2 KiB
JavaScript
29 lines
1.2 KiB
JavaScript
export function removePunctuationFromString (str) {
|
|
return str.replace(/[.,/#!@$%^&;:{}=\-_`~()]/g, ' ');
|
|
}
|
|
|
|
// NOTE: the wordsToMatch aren't escaped in order to support regular expressions,
|
|
// so this method should not be used if wordsToMatch contains unsanitized user input
|
|
|
|
export function getMatchesByWordArray (str, wordsToMatch) {
|
|
// remove accented characters from the string, which would trip up the regEx
|
|
// later on, by using the built-in Unicode normalisation methods
|
|
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/normalize
|
|
// https://www.unicode.org/reports/tr15/#Canon_Compat_Equivalence
|
|
// https://unicode-table.com/en/#combining-diacritical-marks
|
|
|
|
const normalizedStr = str.normalize('NFD').replace(/[\u0300-\u036f]/g, '');
|
|
|
|
const matchedWords = [];
|
|
const wordRegexs = wordsToMatch.map(word => new RegExp(`\\b([^a-z]+)?${word}([^a-z]+)?\\b`, 'i'));
|
|
for (let i = 0; i < wordRegexs.length; i += 1) {
|
|
const regEx = wordRegexs[i];
|
|
const match = normalizedStr.match(regEx);
|
|
if (match !== null && match[0] !== null) {
|
|
const trimmedMatch = removePunctuationFromString(match[0]).trim();
|
|
matchedWords.push(trimmedMatch);
|
|
}
|
|
}
|
|
return matchedWords;
|
|
}
|