Skip to content

Commit

Permalink
Fix ExpensiMark: prevent html tags in alt attribute
Browse files Browse the repository at this point in the history
Related to: #658 (comment)

Content intended for the alt attribute in images is being incorrectly parsed from Markdown to HTML if it contains MD special characters
  • Loading branch information
kidroca committed Mar 19, 2024
1 parent f843a1f commit 8c0b7a1
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 12 deletions.
15 changes: 6 additions & 9 deletions __tests__/ExpensiMark-HTML-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -1779,14 +1779,14 @@ describe('when should keep raw input flag is enabled', () => {
});
});
});

test('Test code fence within inline code', () => {
let testString = 'Hello world `(```test```)` Hello world';
expect(parser.replace(testString)).toBe('Hello world &#x60;(<pre>test</pre>)&#x60; Hello world');

testString = 'Hello world `(```test\ntest```)` Hello world';
expect(parser.replace(testString)).toBe('Hello world &#x60;(<pre>test<br />test</pre>)&#x60; Hello world');

testString = 'Hello world ```(`test`)``` Hello world';
expect(parser.replace(testString)).toBe('Hello world <pre>(&#x60;test&#x60;)</pre> Hello world');

Expand Down Expand Up @@ -1893,12 +1893,9 @@ describe('Image markdown conversion to html tag', () => {
expect(parser.replace(testString)).toBe(resultString);
});

// Currently any markdown used inside the square brackets is converted to html string in the alt attribute
// The attributes should only contain plain text, but it doesn't seem possible to convert markdown to plain text
// or let the parser know not to convert markdown to html for html attributes
xtest('Image with alt text containing markdown', () => {
const testString = '![*bold* _italic_ ~strike~](https://example.com/image.png)';
const resultString = '<img src="https://example.com/image.png" alt="*bold* _italic_ ~strike~" />';
test('Image with alt text containing markdown', () => {
const testString = '![# fake-heading *bold* _italic_ ~strike~ [:-)]](https://example.com/image.png)';
const resultString = '<img src="https://example.com/image.png" alt="# fake-heading &ast;bold&ast; &lowbar;italic&lowbar; &#126;strike&#126; &lbrack;:-)&rbrack;" />';
expect(parser.replace(testString)).toBe(resultString);
});

Expand Down
6 changes: 6 additions & 0 deletions __tests__/ExpensiMark-Markdown-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -769,4 +769,10 @@ describe('Image tag conversion to markdown', () => {
const resultString = '![https://example.com/image.png](https://example.com/image.png)';
expect(parser.htmlToMarkdown(testString)).toBe(resultString);
});

test('Image with alt text containing escaped markdown', () => {
const testString = '<img src="https://example.com/image.png" alt="&ast;bold&ast; &lowbar;italic&lowbar; &#126;strike&#126;" />';
const resultString = '![*bold* _italic_ ~strike~](https://example.com/image.png)';
expect(parser.htmlToMarkdown(testString)).toBe(resultString);
});
});
29 changes: 26 additions & 3 deletions lib/ExpensiMark.js
Original file line number Diff line number Diff line change
Expand Up @@ -113,13 +113,13 @@ export default class ExpensiMark {
* Converts markdown style images to img tags e.g. ![Expensify](https://www.expensify.com/attachment.png)
* We need to convert before linking rules since they will not try to create a link from an existing img
* tag.
* Additional sanitization is done to the alt attribute to prevent parsing it further to html by later rules.
*/
{
name: 'image',
regex: MARKDOWN_IMAGE_REGEX,

replacement: (match, g1, g2) => `<img src="${Str.sanitizeURL(g2)}" alt="${g1}" />`,
rawInputReplacement: (match, g1, g2) => `<img src="${Str.sanitizeURL(g2)}" alt="${g1}" data-raw-href="${g2}" data-link-variant="labeled" />`
replacement: (match, g1, g2) => `<img src="${Str.sanitizeURL(g2)}" alt="${this.escapeMarkdownEntities(g1)}" />`,
rawInputReplacement: (match, g1, g2) => `<img src="${Str.sanitizeURL(g2)}" alt="${this.escapeMarkdownEntities(g1)}" data-raw-href="${g2}" data-link-variant="labeled" />`
},

/**
Expand Down Expand Up @@ -946,4 +946,27 @@ export default class ExpensiMark {
const linksInNew = this.extractLinksInMarkdownComment(newComment);
return linksInOld === undefined || linksInNew === undefined ? [] : _.difference(linksInOld, linksInNew);
}

/**
* Replace MD characters with their HTML entity equivalent
* @param {String} text
* @return {String}
*/
escapeMarkdownEntities(text) {
// A regex pattern matching special MD characters we'd like to escape
const pattern = /([*_{}[\]#~])/g;

// A map of MD characters to their HTML entity equivalent
const entities = {
'*': '&ast;',
_: '&lowbar;',
'{': '&lbrace;',
'}': '&rbrace;',
'[': '&lbrack;',
']': '&rbrack;',
'~': '&#126;',
};

return text.replace(pattern, char => entities[char] || char);
}
}

0 comments on commit 8c0b7a1

Please sign in to comment.