Skip to content

Commit

Permalink
fix(markdown-serializer): Override Turndown escaping behaviour with c…
Browse files Browse the repository at this point in the history
…ustom rules

BREAKING CHANGE: With this change, we are making sure that the Markdown
serializer escaping behaviour is tailored to our needs in every place
that it's used. Thus, we no longer need the `options` parameter to
disable the escaping behaviour.
  • Loading branch information
rfgamaral committed Jan 16, 2023
1 parent 3599b44 commit 1e4a581
Show file tree
Hide file tree
Showing 3 changed files with 199 additions and 120 deletions.
9 changes: 8 additions & 1 deletion src/constants/regular-expressions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,11 @@
*/
const REGEX_LINE_BREAKS = /(?:\r\n|\r|\n)/g

export { REGEX_LINE_BREAKS }
/**
* A regex for standard punctuation characters for US-ASCII plus unicode punctuation.
*
* @see https://stackoverflow.com/a/25575009
*/
const REGEX_PUNCTUATION = /[\u2000-\u206f\u2e00-\u2e7f'!"#$%&()*+,\-./:;<=>?@\\[\]^_`{|}~]/

export { REGEX_LINE_BREAKS, REGEX_PUNCTUATION }
257 changes: 160 additions & 97 deletions src/serializers/markdown/markdown.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,13 @@ import { createMarkdownSerializer } from './markdown'

import type { MarkdownSerializerReturnType } from './markdown'

const HTML_INPUT_SPECIAL_HTML_CHARS = `Ambition &amp; Balance<br>
&lt;doist&gt;<br>
&lt;/doist&gt;<br>
&lt;doist&gt;&lt;/doist&gt;<br>
&quot;Doist&quot;<br>
&#39;Doist&#39;`

const HTML_INPUT_HEADINGS = `<h1>Heading level 1</h1>
<h2>Heading level 2</h2>
<h3>Heading level 3</h3>
Expand Down Expand Up @@ -94,11 +101,6 @@ const HTML_INPUT_UNORDERED_LISTS = `<ul>
</ul>
<hr>
<ul>
<li>1968. A great year!</li>
<li>I think 1969 was second best.</li>
</ul>
<hr>
<ul>
<li>This is the first list item.</li>
<li>Here&#39;s the second list item.<br> I need to add another paragraph below the second list item.</li>
<li>And here&#39;s the third list item.</li>
Expand Down Expand Up @@ -137,11 +139,6 @@ const HTML_INPUT_TASK_LISTS = `<ul data-type="taskList">
</ul>
<hr>
<ul data-type="taskList">
<li data-type="taskItem" data-checked="false">1968. A great year!</li>
<li data-type="taskItem" data-checked="true">I think 1969 was second best.</li>
</ul>
<hr>
<ul data-type="taskList">
<li data-type="taskItem" data-checked="false">This is the first list item.</li>
<li data-type="taskItem" data-checked="false">Here&#39;s the second list item.<br> I need to add another paragraph below the second list item.</li>
<li data-type="taskItem" data-checked="false">And here&#39;s the third list item.</li>
Expand Down Expand Up @@ -183,29 +180,40 @@ const HTML_INPUT_LINKS = `<p>My favorite search engine is <a href="https://duckd

const HTML_INPUT_STYLED_LINKS = `<p>I love supporting the <strong><a href="https://eff.org">EFF</a></strong>.<br>This is the <em><a href="https://www.markdownguide.org">Markdown Guide</a></em>.<br>See the section on <a href="#code"><code>code</code></a>.</p>`

describe('Markdown Serializer', () => {
const HTML_INPUT_SPECIAL_HTML_CHARS = `Ambition &amp; Balance<br>
&lt;doist&gt;<br>
&lt;/doist&gt;<br>
&lt;doist&gt;&lt;/doist&gt;<br>
&quot;Doist&quot;<br>
&#39;Doist&#39;`

const HTML_INPUT_SPECIAL_MARKDOWN_CHARS = `before \\ after<br>
before * after<br>
- after<br>
+ after<br>
= after<br>
=== after<br>
\` after <br>
~~~ after<br>
before [ after<br>
before ] after<br>
> after<br>
before _ after<br>
1. after<br>
99. after<br>`
const HTML_INPUT_PONCTUATION_CHARACTERS = `<p>\\'</p>
<p>\\!</p>
<p>\\"</p>
<p>\\#</p>
<p>\\$</p>
<p>\\%</p>
<p>\\&amp;</p>
<p>\\(</p>
<p>\\)</p>
<p>\\*</p>
<p>\\+</p>
<p>\\,</p>
<p>\\\\</p>
<p>\\-</p>
<p>\\.</p>
<p>\\/</p>
<p>\\:</p>
<p>\\;</p>
<p>\\&lt;</p>
<p>\\=</p>
<p>\\&gt;</p>
<p>\\?</p>
<p>\\@</p>
<p>\\[</p>
<p>\\]</p>
<p>\\^</p>
<p>\\_</p>
<p>\\\`</p>
<p>\\{</p>
<p>\\|</p>
<p>\\}</p>
<p>\\~</p>`

describe('Markdown Serializer', () => {
describe('Plain-text Document', () => {
describe('with default extensions', () => {
let markdownSerializer: MarkdownSerializerReturnType
Expand All @@ -227,24 +235,6 @@ before _ after<br>
'Doist'`)
})

test('special Markdown characters are NOT escaped', () => {
expect(markdownSerializer.serialize(HTML_INPUT_SPECIAL_MARKDOWN_CHARS))
.toBe(`before \\ after
before * after
- after
+ after
= after
=== after
\` after
~~~ after
before [ after
before ] after
> after
before _ after
1. after
99. after`)
})

test('paragraphs Markdown output is correct', () => {
expect(markdownSerializer.serialize(HTML_INPUT_PARAGRAPHS)).toBe(
`I really like using Markdown.
Expand All @@ -260,6 +250,49 @@ I think I'll use it to format all of my documents from now on.`,
replacement: expect.any(Function),
})
})

describe('with overridden `escape` function', () => {
test('return the input as-is (escaping behaviour disabled)', () => {
expect(
markdownSerializer.serialize(`<p>- 1968. A great year!</p>
<p>- I think 1969 was second best.</p>`),
).toBe(`- 1968. A great year!
- I think 1969 was second best.`)
expect(markdownSerializer.serialize(HTML_INPUT_PONCTUATION_CHARACTERS))
.toBe(`\\'
\\!
\\"
\\#
\\$
\\%
\\&
\\(
\\)
\\*
\\+
\\,
\\\\
\\-
\\.
\\/
\\:
\\;
\\<
\\=
\\>
\\?
\\@
\\[
\\]
\\^
\\_
\\\`
\\{
\\|
\\}
\\~`)
})
})
})

describe('with custom `*Suggestion` extensions', () => {
Expand Down Expand Up @@ -292,7 +325,7 @@ Answer: [Doist Frontend](channel://190200)`)
})

describe('Rich-text Document', () => {
describe('without default extensions', () => {
describe('with default extensions', () => {
let markdownSerializer: MarkdownSerializerReturnType

beforeEach(() => {
Expand All @@ -309,24 +342,6 @@ Answer: [Doist Frontend](channel://190200)`)
'Doist'`)
})

test('special Markdown characters are escaped', () => {
expect(markdownSerializer.serialize(HTML_INPUT_SPECIAL_MARKDOWN_CHARS))
.toBe(`before \\\\ after
before \\* after
\\- after
\\+ after
\\= after
\\=== after
\\\` after
\\~~~ after
before \\[ after
before \\] after
\\> after
before \\_ after
1\\. after
99\\. after`)
})

test('headings Markdown output is correct', () => {
expect(markdownSerializer.serialize(HTML_INPUT_HEADINGS)).toBe(
'# Heading level 1\n\n## Heading level 2\n\n### Heading level 3\n\n#### Heading level 4\n\n##### Heading level 5\n\n###### Heading level 6',
Expand Down Expand Up @@ -427,11 +442,6 @@ Strikethrough uses two tildes: ~~scratch this~~`,
---
- 1968\\. A great year!
- I think 1969 was second best.
---
- This is the first list item.
- Here's the second list item.
I need to add another paragraph below the second list item.
Expand Down Expand Up @@ -471,11 +481,6 @@ Strikethrough uses two tildes: ~~scratch this~~`,
---
- 1968\\. A great year!
- I think 1969 was second best.
---
- This is the first list item.
- Here's the second list item.
I need to add another paragraph below the second list item.
Expand Down Expand Up @@ -554,15 +559,83 @@ See the section on [\`code\`](#code).`,
)
})

test('special Markdown characters are NOT escaped if `escape` is disabled', () => {
const customSerializer = createMarkdownSerializer(getSchema([RichTextKit]), {
escape: false,
describe('with overridden `escape` function', () => {
test('backslash characters preceding punctuation characters are escaped correctly', () => {
expect(markdownSerializer.serialize(HTML_INPUT_PONCTUATION_CHARACTERS))
.toBe(`\\\\'
\\\\!
\\\\"
\\\\#
\\\\$
\\\\%
\\\\&
\\\\(
\\\\)
\\\\*
\\\\+
\\\\,
\\\\\\
\\\\-
\\\\.
\\\\/
\\\\:
\\\\;
\\\\<
\\\\=
\\\\>
\\\\?
\\\\@
\\\\[
\\\\]
\\\\^
\\\\_
\\\\\`
\\\\{
\\\\|
\\\\}
\\\\~`)
})

test('text content that matches the ordered list syntax is escaped correctly', () => {
expect(
markdownSerializer.serialize(`<ul>
<li>1968. A great year!</li>
<li>I think 1969 was second best.</li>
</ul>`),
).toBe(`- 1968\\. A great year!
- I think 1969 was second best.`)
})
expect(
customSerializer.serialize(
`<p><strong>Wrapped markdown</strong> **still markdown**</p>`,
),
).toBe(`**Wrapped markdown** **still markdown**`)
})
})

Expand Down Expand Up @@ -620,11 +693,6 @@ See the section on [\`code\`](#code).`,
---
- 1968\\. A great year!
- I think 1969 was second best.
---
- This is the first list item.
- Here's the second list item.
I need to add another paragraph below the second list item.
Expand Down Expand Up @@ -686,11 +754,6 @@ See the section on [\`code\`](#code).`,
---
- [ ] 1968\\. A great year!
- [x] I think 1969 was second best.
---
- [ ] This is the first list item.
- [ ] Here's the second list item.
I need to add another paragraph below the second list item.
Expand Down
Loading

0 comments on commit 1e4a581

Please sign in to comment.