Files
habitica/website/server/libs/stringUtils.js
Carlton McFarlane a53355872b Add checks for profanity to profile updates (#12445)
* fix(profile): detect attempt to use banned words as display name. refactor profanity detection method.

* fix(profile): detect attempt to use banned words in blurb. further refactor profanity detection. inform the user their chat privileges have been revoked.

* refactor: add function to normalize Unicode strings and remove diacritics

* fix: improve regEx to prevent false partial matches e.g. 'hello' being recognised as banned words. porting fix from #12309

* fix(profile): refactor of profanity detection for #12445

* fix(profile): add test for swear words in new profile. fix existing tests

* fix(profile): show different error message for attempted slur use in username by new users.

* fix(profile): remove incorrect slur test

* fix(profile): fix slurs not caught at start of end of strings connect by punctuation

* tests(profile): fix tests for profanity checking

* remove exclusive test

* 11865 - update text for slur warnings

* 11865 - remove unused string from locale files

* 11865 - improve naming of banned word usage locale string

* 11865 - improve logic so that differentiated warnings are shown depending on whether a slur or other profanity has been used in a display name

* 11865 - construct slur regexes outside the validation function in which they are used

* 11865 - fix tests
2021-04-30 15:47:39 -05:00

34 lines
1.4 KiB
JavaScript

export function normalizeUnicodeString (str) {
return str.normalize('NFD').replace(/[\u0300-\u036f]/g, '');
}
export function removePunctuationFromString (str) {
return str.replace(/[.,/#!@$%^&;:{}=\-_`~()]/g, ' ');
}
// NOTE: the wordsToMatch aren't escaped in order to support regular expressions,
// so this method should not be used if wordsToMatch contains unsanitized user input
export function getMatchesByWordArray (str, wordsToMatch) {
// remove accented characters from the string, which would trip up the regEx
// later on, by using the built-in Unicode normalisation methods
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/normalize
// https://www.unicode.org/reports/tr15/#Canon_Compat_Equivalence
// https://unicode-table.com/en/#combining-diacritical-marks
const matchedWords = [];
const wordRegexs = wordsToMatch.map(word => {
const normalizedWord = removePunctuationFromString(normalizeUnicodeString(word));
return new RegExp(`\\b([^a-z]+)?${normalizedWord}([^a-z]+)?\\b`, 'i');
});
for (let i = 0; i < wordRegexs.length; i += 1) {
const regEx = wordRegexs[i];
const match = removePunctuationFromString(normalizeUnicodeString(str)).match(regEx);
if (match !== null && match[0] !== null) {
const trimmedMatch = match[0].trim();
matchedWords.push(trimmedMatch);
}
}
return matchedWords;
}