Revert "Revert "fix(banned words): fix partial matching of words containing diacritic… (#12444)""

This reverts commit 5362058f35.
2025-12-17 22:57:21 +01:00 · 2020-11-09 11:34:28 +01:00
parent 2e59260149
commit 174ac6d7e3
2 changed files with 15 additions and 1 deletions
--- a/test/api/unit/libs/stringUtils.test.js
+++ b/test/api/unit/libs/stringUtils.test.js
@@ -8,5 +8,10 @@ describe('stringUtils', () => {
      const matches = getMatchesByWordArray(message, bannedWords);
      expect(matches.length).to.equal(bannedWords.length);
    });
    it('doesn\'t flag names with accented characters', () => {
      const name = 'TESTPLACEHOLDERSWEARWORDHEREé';
      const matches = getMatchesByWordArray(name, bannedWords);
      expect(matches.length).to.equal(0);
    });
  });
 });
--- a/website/server/libs/stringUtils.js
+++ b/website/server/libs/stringUtils.js
@@ -4,12 +4,21 @@ export function removePunctuationFromString (str) {
 // NOTE: the wordsToMatch aren't escaped in order to support regular expressions,
 // so this method should not be used if wordsToMatch contains unsanitized user input
 export function getMatchesByWordArray (str, wordsToMatch) {
  // remove accented characters from the string, which would trip up the regEx
  // later on, by using the built-in Unicode normalisation methods
  // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/normalize
  // https://www.unicode.org/reports/tr15/#Canon_Compat_Equivalence
  // https://unicode-table.com/en/#combining-diacritical-marks
  const normalizedStr = str.normalize('NFD').replace(/[\u0300-\u036f]/g, '');
  const matchedWords = [];
  const wordRegexs = wordsToMatch.map(word => new RegExp(`\\b([^a-z]+)?${word}([^a-z]+)?\\b`, 'i'));
  for (let i = 0; i < wordRegexs.length; i += 1) {
    const regEx = wordRegexs[i];
-    const match = str.match(regEx);
+    const match = normalizedStr.match(regEx);
    if (match !== null && match[0] !== null) {
      const trimmedMatch = removePunctuationFromString(match[0]).trim();
      matchedWords.push(trimmedMatch);