Revert "fix(banned words): fix partial matching of words containing diacritic… (#12444)"

This reverts commit f2bcdd21de.
This commit is contained in:
Sabe Jones
2020-11-05 15:44:59 -06:00
parent 9d6fb2ca26
commit 5362058f35
2 changed files with 1 additions and 15 deletions

View File

@@ -8,10 +8,5 @@ describe('stringUtils', () => {
const matches = getMatchesByWordArray(message, bannedWords); const matches = getMatchesByWordArray(message, bannedWords);
expect(matches.length).to.equal(bannedWords.length); expect(matches.length).to.equal(bannedWords.length);
}); });
it('doesn\'t flag names with accented characters', () => {
const name = 'TESTPLACEHOLDERSWEARWORDHEREé';
const matches = getMatchesByWordArray(name, bannedWords);
expect(matches.length).to.equal(0);
});
}); });
}); });

View File

@@ -4,21 +4,12 @@ export function removePunctuationFromString (str) {
// NOTE: the wordsToMatch aren't escaped in order to support regular expressions, // NOTE: the wordsToMatch aren't escaped in order to support regular expressions,
// so this method should not be used if wordsToMatch contains unsanitized user input // so this method should not be used if wordsToMatch contains unsanitized user input
export function getMatchesByWordArray (str, wordsToMatch) { export function getMatchesByWordArray (str, wordsToMatch) {
// remove accented characters from the string, which would trip up the regEx
// later on, by using the built-in Unicode normalisation methods
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/normalize
// https://www.unicode.org/reports/tr15/#Canon_Compat_Equivalence
// https://unicode-table.com/en/#combining-diacritical-marks
const normalizedStr = str.normalize('NFD').replace(/[\u0300-\u036f]/g, '');
const matchedWords = []; const matchedWords = [];
const wordRegexs = wordsToMatch.map(word => new RegExp(`\\b([^a-z]+)?${word}([^a-z]+)?\\b`, 'i')); const wordRegexs = wordsToMatch.map(word => new RegExp(`\\b([^a-z]+)?${word}([^a-z]+)?\\b`, 'i'));
for (let i = 0; i < wordRegexs.length; i += 1) { for (let i = 0; i < wordRegexs.length; i += 1) {
const regEx = wordRegexs[i]; const regEx = wordRegexs[i];
const match = normalizedStr.match(regEx); const match = str.match(regEx);
if (match !== null && match[0] !== null) { if (match !== null && match[0] !== null) {
const trimmedMatch = removePunctuationFromString(match[0]).trim(); const trimmedMatch = removePunctuationFromString(match[0]).trim();
matchedWords.push(trimmedMatch); matchedWords.push(trimmedMatch);