Don't break up links with user profile link injections (#12100)

* Issue 10924 - Don't break up links with mention links * Issue 10924 - Recognise links between brackets. * Issue 10924 - Rely on markdown parser for link determination * Issue 10924 - Only increment index once in findTextBlocks
2025-12-16 14:17:22 +01:00 · 2020-05-09 19:37:08 +02:00
parent 29f6bf7dc6
commit ef99943646
2 changed files with 100 additions and 38 deletions
--- a/test/api/unit/libs/highlightMentions.test.js
+++ b/test/api/unit/libs/highlightMentions.test.js
@@ -64,6 +64,44 @@ describe('highlightMentions', () => {
    expect(result[0]).to.equal(text);
  });

+  describe('link interactions', async () => {
+    it('doesn\'t highlight users in link', async () => {
+      const text = 'http://www.medium.com/@user/blog';
+      const result = await highlightMentions(text);
+      expect(result[0]).to.equal(text);
+    });
+
+    it('doesn\'t highlight user in link between brackets', async () => {
+      const text = '(http://www.medium.com/@user/blog)';
+      const result = await highlightMentions(text);
+      expect(result[0]).to.equal(text);
+    });
+
+    it('doesn\'t highlight user in an autolink', async () => {
+      const text = '<http://www.medium.com/@user/blog>';
+      const result = await highlightMentions(text);
+      expect(result[0]).to.equal(text);
+    });
+
+    it('doesn\'t highlight users in link text', async () => {
+      const text = '[Check awesome blog written by @user](http://www.medium.com/@user/blog)';
+      const result = await highlightMentions(text);
+      expect(result[0]).to.equal(text);
+    });
+
+    it('doesn\'t highlight users in link with newlines and markup', async () => {
+      const text = '[Check `awesome` \nblog **written** by @user](http://www.medium.com/@user/blog)';
+      const result = await highlightMentions(text);
+      expect(result[0]).to.equal(text);
+    });
+
+    it('doesn\'t highlight users in link when followed by same @user mention', async () => {
+      const text = 'http://www.medium.com/@user/blog @user';
+      const result = await highlightMentions(text);
+      expect(result[0]).to.equal('http://www.medium.com/@user/blog [@user](/profile/111)');
+    });
+  });
+
  describe('exceptions in code blocks', () => {
    it('doesn\'t highlight user in inline code block', async () => {
      const text = '`@user`';
--- a/website/server/libs/highlightMentions.js
+++ b/website/server/libs/highlightMentions.js
@@ -4,21 +4,21 @@ import habiticaMarkdown from 'habitica-markdown';
 import { model as User } from '../models/user';

 const mentionRegex = /\B@[-\w]+/g;
-const codeTokenTypes = ['code_block', 'code_inline', 'fence'];
+const ignoreTokenTypes = ['code_block', 'code_inline', 'fence', 'link_open'];

 /**
- * Container class for text blocks and code blocks combined
- * Blocks have the properties `text` and `isCodeBlock`
+ * Container class for valid text blocks and text blocks that should be ignored.
+ * Blocks have the properties `text` and `ignore`
 */
-class TextWithCodeBlocks {
+class TextBlocks {
  constructor (blocks) {
    this.blocks = blocks;
-    this.textBlocks = blocks.filter(block => !block.isCodeBlock);
-    this.allText = this.textBlocks.map(block => block.text).join('\n');
+    this.validBlocks = blocks.filter(block => !block.ignore);
+    this.allValidText = this.validBlocks.map(block => block.text).join('\n');
  }

-  transformTextBlocks (transform) {
-    this.textBlocks.forEach(block => {
+  transformValidBlocks (transform) {
+    this.validBlocks.forEach(block => {
      block.text = transform(block.text);
    });
  }
@@ -32,18 +32,31 @@ class TextWithCodeBlocks {
 * Since tokens have both order and can be nested until infinite depth,
 * use a branching recursive algorithm to maintain order and check all tokens.
 */
-function findCodeBlocks (tokens, aggregator) {
-  const result = aggregator || [];
-  const [head, ...tail] = tokens;
-  if (!head) {
-    return result;
+function findIgnoreBlocks (tokens) {
+  // Links span multiple tokens, so keep local state of whether we're in a link
+  let inLink = false;
+
+  function recursor (ts, result) {
+    const [head, ...tail] = ts;
+    if (!head) {
+      return result;
+    }
+
+    if (!inLink && ignoreTokenTypes.includes(head.type)) {
+      result.push(head);
+    }
+
+    if (head.type.includes('link')) {
+      inLink = !inLink;
+    } else if (inLink && head.type === 'text') {
+      const linkBlock = result[result.length - 1];
+      linkBlock.textContents = (linkBlock.textContents || []).concat(head.content);
+    }
+
+    return recursor(tail, head.children ? recursor(head.children, result) : result);
  }

-  if (codeTokenTypes.includes(head.type)) {
-    result.push(head);
-  }
-
-  return findCodeBlocks(tail, head.children ? findCodeBlocks(head.children, result) : result);
+  return recursor(tokens, []);
 }

 /**
@@ -57,7 +70,10 @@ function withOptionalIndentation (content) {
  return content.split('\n').map(line => `\\s*${line}`).join('\n');
 }

-function createCodeBlockRegex ({ content, type, markup }) {
+// This is essentially a workaround around the fact that markdown-it doesn't
+// provide sourcemap functionality and is the most brittle part of this code.
+function toSourceMapRegex (token) {
+  const { type, content, markup } = token;
  const contentRegex = escapeRegExp(content);
  let regexStr = '';

@@ -65,42 +81,49 @@ function createCodeBlockRegex ({ content, type, markup }) {
    regexStr = withOptionalIndentation(contentRegex);
  } else if (type === 'fence') {
    regexStr = `\\s*${markup}.*\n${withOptionalIndentation(contentRegex)}\\s*${markup}`;
-  } else { // type === code_inline
+  } else if (type === 'code_inline') {
    regexStr = `${markup} ?${contentRegex} ?${markup}`;
+  } else if (type === 'link_open') {
+    const texts = token.textContents.map(escapeRegExp);
+    regexStr = markup === 'linkify' || markup === 'autolink' ? texts[0]
+      : `\\[.*${texts.join('.*')}.*\\]\\(${escapeRegExp(token.attrs[0][1])}\\)`;
+  } else {
+    throw new Error(`No source mapping regex defined for ignore blocks of type ${type}`);
  }

-  return new RegExp(regexStr);
+  return new RegExp(regexStr, 's');
 }

 /**
- * Uses habiticaMarkdown to determine what part of the text are code blocks
+ * Uses habiticaMarkdown to determine which text blocks should be ignored (links and code blocks)
 * according to the specification here: https://spec.commonmark.org/0.29/
 */
-function findTextAndCodeBlocks (text) {
+function findTextBlocks (text) {
  // For token description see https://markdown-it.github.io/markdown-it/#Token
  // The second parameter is mandatory even if not used, see
  // https://markdown-it.github.io/markdown-it/#MarkdownIt.parse
  const tokens = habiticaMarkdown.parse(text, {});
-  const codeBlocks = findCodeBlocks(tokens);
+  const ignoreBlockRegexes = findIgnoreBlocks(tokens).map(toSourceMapRegex);

  const blocks = [];
-  let remainingText = text;
-  codeBlocks.forEach(codeBlock => {
-    const codeBlockRegex = createCodeBlockRegex(codeBlock);
-    const match = remainingText.match(codeBlockRegex);
+  let index = 0;
+  ignoreBlockRegexes.forEach(regex => {
+    const targetText = text.substr(index);
+    const match = targetText.match(regex);

    if (match.index) {
-      blocks.push({ text: remainingText.substr(0, match.index), isCodeBlock: false });
+      blocks.push({ text: targetText.substr(0, match.index), ignore: false });
    }
-    blocks.push({ text: match[0], isCodeBlock: true });

-    remainingText = remainingText.substr(match.index + match[0].length);
+    blocks.push({ text: match[0], ignore: true });
+    index += match.index + match[0].length;
  });

-  if (remainingText) {
-    blocks.push({ text: remainingText, isCodeBlock: false });
+  if (index < text.length) {
+    blocks.push({ text: text.substr(index), ignore: false });
  }
-  return new TextWithCodeBlocks(blocks);
+
+  return new TextBlocks(blocks);
 }

 /**
@@ -108,11 +131,12 @@ function findTextAndCodeBlocks (text) {
 * a link towards the user's profile page.
 * - Only works if there are no more that 5 user mentions
 * - Skips mentions in code blocks as defined by https://spec.commonmark.org/0.29/
+ * - Skips mentions in links
 */
 export default async function highlightMentions (text) {
-  const textAndCodeBlocks = findTextAndCodeBlocks(text);
+  const textBlocks = findTextBlocks(text);

-  const mentions = textAndCodeBlocks.allText.match(mentionRegex);
+  const mentions = textBlocks.allValidText.match(mentionRegex);
  let members = [];

  if (mentions && mentions.length <= 5) {
@@ -127,9 +151,9 @@ export default async function highlightMentions (text) {
      const regex = new RegExp(`@${username}(?![\\-\\w])`, 'g');
      const replacement = `[@${username}](/profile/${member._id})`;

-      textAndCodeBlocks.transformTextBlocks(blockText => blockText.replace(regex, replacement));
+      textBlocks.transformValidBlocks(blockText => blockText.replace(regex, replacement));
    });
  }

-  return [textAndCodeBlocks.rebuild(), mentions, members];
+  return [textBlocks.rebuild(), mentions, members];
 }