Don't break up links with user profile link injections (#12100)

* Issue 10924 - Don't break up links with mention links

* Issue 10924 - Recognise links between brackets.

* Issue 10924 - Rely on markdown parser for link determination

* Issue 10924 - Only increment index once in findTextBlocks
This commit is contained in:
Bart Enkelaar
2020-05-09 19:37:08 +02:00
committed by GitHub
parent 29f6bf7dc6
commit ef99943646
2 changed files with 100 additions and 38 deletions

View File

@@ -64,6 +64,44 @@ describe('highlightMentions', () => {
expect(result[0]).to.equal(text); expect(result[0]).to.equal(text);
}); });
describe('link interactions', async () => {
it('doesn\'t highlight users in link', async () => {
const text = 'http://www.medium.com/@user/blog';
const result = await highlightMentions(text);
expect(result[0]).to.equal(text);
});
it('doesn\'t highlight user in link between brackets', async () => {
const text = '(http://www.medium.com/@user/blog)';
const result = await highlightMentions(text);
expect(result[0]).to.equal(text);
});
it('doesn\'t highlight user in an autolink', async () => {
const text = '<http://www.medium.com/@user/blog>';
const result = await highlightMentions(text);
expect(result[0]).to.equal(text);
});
it('doesn\'t highlight users in link text', async () => {
const text = '[Check awesome blog written by @user](http://www.medium.com/@user/blog)';
const result = await highlightMentions(text);
expect(result[0]).to.equal(text);
});
it('doesn\'t highlight users in link with newlines and markup', async () => {
const text = '[Check `awesome` \nblog **written** by @user](http://www.medium.com/@user/blog)';
const result = await highlightMentions(text);
expect(result[0]).to.equal(text);
});
it('doesn\'t highlight users in link when followed by same @user mention', async () => {
const text = 'http://www.medium.com/@user/blog @user';
const result = await highlightMentions(text);
expect(result[0]).to.equal('http://www.medium.com/@user/blog [@user](/profile/111)');
});
});
describe('exceptions in code blocks', () => { describe('exceptions in code blocks', () => {
it('doesn\'t highlight user in inline code block', async () => { it('doesn\'t highlight user in inline code block', async () => {
const text = '`@user`'; const text = '`@user`';

View File

@@ -4,21 +4,21 @@ import habiticaMarkdown from 'habitica-markdown';
import { model as User } from '../models/user'; import { model as User } from '../models/user';
const mentionRegex = /\B@[-\w]+/g; const mentionRegex = /\B@[-\w]+/g;
const codeTokenTypes = ['code_block', 'code_inline', 'fence']; const ignoreTokenTypes = ['code_block', 'code_inline', 'fence', 'link_open'];
/** /**
* Container class for text blocks and code blocks combined * Container class for valid text blocks and text blocks that should be ignored.
* Blocks have the properties `text` and `isCodeBlock` * Blocks have the properties `text` and `ignore`
*/ */
class TextWithCodeBlocks { class TextBlocks {
constructor (blocks) { constructor (blocks) {
this.blocks = blocks; this.blocks = blocks;
this.textBlocks = blocks.filter(block => !block.isCodeBlock); this.validBlocks = blocks.filter(block => !block.ignore);
this.allText = this.textBlocks.map(block => block.text).join('\n'); this.allValidText = this.validBlocks.map(block => block.text).join('\n');
} }
transformTextBlocks (transform) { transformValidBlocks (transform) {
this.textBlocks.forEach(block => { this.validBlocks.forEach(block => {
block.text = transform(block.text); block.text = transform(block.text);
}); });
} }
@@ -32,18 +32,31 @@ class TextWithCodeBlocks {
* Since tokens have both order and can be nested until infinite depth, * Since tokens have both order and can be nested until infinite depth,
* use a branching recursive algorithm to maintain order and check all tokens. * use a branching recursive algorithm to maintain order and check all tokens.
*/ */
function findCodeBlocks (tokens, aggregator) { function findIgnoreBlocks (tokens) {
const result = aggregator || []; // Links span multiple tokens, so keep local state of whether we're in a link
const [head, ...tail] = tokens; let inLink = false;
function recursor (ts, result) {
const [head, ...tail] = ts;
if (!head) { if (!head) {
return result; return result;
} }
if (codeTokenTypes.includes(head.type)) { if (!inLink && ignoreTokenTypes.includes(head.type)) {
result.push(head); result.push(head);
} }
return findCodeBlocks(tail, head.children ? findCodeBlocks(head.children, result) : result); if (head.type.includes('link')) {
inLink = !inLink;
} else if (inLink && head.type === 'text') {
const linkBlock = result[result.length - 1];
linkBlock.textContents = (linkBlock.textContents || []).concat(head.content);
}
return recursor(tail, head.children ? recursor(head.children, result) : result);
}
return recursor(tokens, []);
} }
/** /**
@@ -57,7 +70,10 @@ function withOptionalIndentation (content) {
return content.split('\n').map(line => `\\s*${line}`).join('\n'); return content.split('\n').map(line => `\\s*${line}`).join('\n');
} }
function createCodeBlockRegex ({ content, type, markup }) { // This is essentially a workaround around the fact that markdown-it doesn't
// provide sourcemap functionality and is the most brittle part of this code.
function toSourceMapRegex (token) {
const { type, content, markup } = token;
const contentRegex = escapeRegExp(content); const contentRegex = escapeRegExp(content);
let regexStr = ''; let regexStr = '';
@@ -65,42 +81,49 @@ function createCodeBlockRegex ({ content, type, markup }) {
regexStr = withOptionalIndentation(contentRegex); regexStr = withOptionalIndentation(contentRegex);
} else if (type === 'fence') { } else if (type === 'fence') {
regexStr = `\\s*${markup}.*\n${withOptionalIndentation(contentRegex)}\\s*${markup}`; regexStr = `\\s*${markup}.*\n${withOptionalIndentation(contentRegex)}\\s*${markup}`;
} else { // type === code_inline } else if (type === 'code_inline') {
regexStr = `${markup} ?${contentRegex} ?${markup}`; regexStr = `${markup} ?${contentRegex} ?${markup}`;
} else if (type === 'link_open') {
const texts = token.textContents.map(escapeRegExp);
regexStr = markup === 'linkify' || markup === 'autolink' ? texts[0]
: `\\[.*${texts.join('.*')}.*\\]\\(${escapeRegExp(token.attrs[0][1])}\\)`;
} else {
throw new Error(`No source mapping regex defined for ignore blocks of type ${type}`);
} }
return new RegExp(regexStr); return new RegExp(regexStr, 's');
} }
/** /**
* Uses habiticaMarkdown to determine what part of the text are code blocks * Uses habiticaMarkdown to determine which text blocks should be ignored (links and code blocks)
* according to the specification here: https://spec.commonmark.org/0.29/ * according to the specification here: https://spec.commonmark.org/0.29/
*/ */
function findTextAndCodeBlocks (text) { function findTextBlocks (text) {
// For token description see https://markdown-it.github.io/markdown-it/#Token // For token description see https://markdown-it.github.io/markdown-it/#Token
// The second parameter is mandatory even if not used, see // The second parameter is mandatory even if not used, see
// https://markdown-it.github.io/markdown-it/#MarkdownIt.parse // https://markdown-it.github.io/markdown-it/#MarkdownIt.parse
const tokens = habiticaMarkdown.parse(text, {}); const tokens = habiticaMarkdown.parse(text, {});
const codeBlocks = findCodeBlocks(tokens); const ignoreBlockRegexes = findIgnoreBlocks(tokens).map(toSourceMapRegex);
const blocks = []; const blocks = [];
let remainingText = text; let index = 0;
codeBlocks.forEach(codeBlock => { ignoreBlockRegexes.forEach(regex => {
const codeBlockRegex = createCodeBlockRegex(codeBlock); const targetText = text.substr(index);
const match = remainingText.match(codeBlockRegex); const match = targetText.match(regex);
if (match.index) { if (match.index) {
blocks.push({ text: remainingText.substr(0, match.index), isCodeBlock: false }); blocks.push({ text: targetText.substr(0, match.index), ignore: false });
} }
blocks.push({ text: match[0], isCodeBlock: true });
remainingText = remainingText.substr(match.index + match[0].length); blocks.push({ text: match[0], ignore: true });
index += match.index + match[0].length;
}); });
if (remainingText) { if (index < text.length) {
blocks.push({ text: remainingText, isCodeBlock: false }); blocks.push({ text: text.substr(index), ignore: false });
} }
return new TextWithCodeBlocks(blocks);
return new TextBlocks(blocks);
} }
/** /**
@@ -108,11 +131,12 @@ function findTextAndCodeBlocks (text) {
* a link towards the user's profile page. * a link towards the user's profile page.
* - Only works if there are no more that 5 user mentions * - Only works if there are no more that 5 user mentions
* - Skips mentions in code blocks as defined by https://spec.commonmark.org/0.29/ * - Skips mentions in code blocks as defined by https://spec.commonmark.org/0.29/
* - Skips mentions in links
*/ */
export default async function highlightMentions (text) { export default async function highlightMentions (text) {
const textAndCodeBlocks = findTextAndCodeBlocks(text); const textBlocks = findTextBlocks(text);
const mentions = textAndCodeBlocks.allText.match(mentionRegex); const mentions = textBlocks.allValidText.match(mentionRegex);
let members = []; let members = [];
if (mentions && mentions.length <= 5) { if (mentions && mentions.length <= 5) {
@@ -127,9 +151,9 @@ export default async function highlightMentions (text) {
const regex = new RegExp(`@${username}(?![\\-\\w])`, 'g'); const regex = new RegExp(`@${username}(?![\\-\\w])`, 'g');
const replacement = `[@${username}](/profile/${member._id})`; const replacement = `[@${username}](/profile/${member._id})`;
textAndCodeBlocks.transformTextBlocks(blockText => blockText.replace(regex, replacement)); textBlocks.transformValidBlocks(blockText => blockText.replace(regex, replacement));
}); });
} }
return [textAndCodeBlocks.rebuild(), mentions, members]; return [textBlocks.rebuild(), mentions, members];
} }