diff --git a/test/api/unit/libs/stringUtils.test.js b/test/api/unit/libs/stringUtils.test.js index 0596cd16b9..947274df53 100644 --- a/test/api/unit/libs/stringUtils.test.js +++ b/test/api/unit/libs/stringUtils.test.js @@ -8,5 +8,10 @@ describe('stringUtils', () => { const matches = getMatchesByWordArray(message, bannedWords); expect(matches.length).to.equal(bannedWords.length); }); + it('doesn\'t flag names with accented characters', () => { + const name = 'TESTPLACEHOLDERSWEARWORDHEREé'; + const matches = getMatchesByWordArray(name, bannedWords); + expect(matches.length).to.equal(0); + }); }); }); diff --git a/website/server/libs/stringUtils.js b/website/server/libs/stringUtils.js index a7e4b08aa8..39f75356c5 100644 --- a/website/server/libs/stringUtils.js +++ b/website/server/libs/stringUtils.js @@ -4,12 +4,21 @@ export function removePunctuationFromString (str) { // NOTE: the wordsToMatch aren't escaped in order to support regular expressions, // so this method should not be used if wordsToMatch contains unsanitized user input + export function getMatchesByWordArray (str, wordsToMatch) { + // remove accented characters from the string, which would trip up the regEx + // later on, by using the built-in Unicode normalisation methods + // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/normalize + // https://www.unicode.org/reports/tr15/#Canon_Compat_Equivalence + // https://unicode-table.com/en/#combining-diacritical-marks + + const normalizedStr = str.normalize('NFD').replace(/[\u0300-\u036f]/g, ''); + const matchedWords = []; const wordRegexs = wordsToMatch.map(word => new RegExp(`\\b([^a-z]+)?${word}([^a-z]+)?\\b`, 'i')); for (let i = 0; i < wordRegexs.length; i += 1) { const regEx = wordRegexs[i]; - const match = str.match(regEx); + const match = normalizedStr.match(regEx); if (match !== null && match[0] !== null) { const trimmedMatch = removePunctuationFromString(match[0]).trim(); matchedWords.push(trimmedMatch);