From 2f8593f27d6dde45d6a92a1c14f659fc0199b75e Mon Sep 17 00:00:00 2001 From: taku0 Date: Thu, 9 Feb 2023 23:15:30 +0900 Subject: [PATCH] Fix list tightness. According to the specification, blank lines in a block quote doesn't separate list items: https://spec.commonmark.org/0.30/#example-320 Therefore, the following example should be tight: - > - a > - b The specification also say that link reference definitions can be children of list items when checking list tightness: https://spec.commonmark.org/0.30/#example-317 Therefore, the following example should be loose: - [aaa]: / [bbb]: / - b This commit fixes those problems with the following strategy: - Using source end position and start position of adjoining elements to check tightness. This requires adjusting source end position of some block types to exclude trailing blank lines. - Delaying removal of link reference definitions until the entire document is parsed. --- lib/blocks.js | 145 +++++++++++++++++++----------------- lib/node.js | 2 - test/regression.txt | 174 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 251 insertions(+), 70 deletions(-) diff --git a/lib/blocks.js b/lib/blocks.js index 6548a186..77d20787 100644 --- a/lib/blocks.js +++ b/lib/blocks.js @@ -74,23 +74,10 @@ var peek = function(ln, pos) { // These are methods of a Parser object, defined below. -// Returns true if block ends with a blank line, descending if needed -// into lists and sublists. +// Returns true if block ends with a blank line. var endsWithBlankLine = function(block) { - while (block) { - if (block._lastLineBlank) { - return true; - } - var t = block.type; - if (!block._lastLineChecked && (t === "list" || t === "item")) { - block._lastLineChecked = true; - block = block._lastChild; - } else { - block._lastLineChecked = true; - break; - } - } - return false; + return block.next && + block.sourcepos[1][0] !== block.next.sourcepos[0][0] - 1; }; // Add a line to the block at the tip. We assume the tip @@ -221,6 +208,60 @@ var closeUnmatchedBlocks = function() { } }; +// Remove link reference definitions from given tree. +var removeLinkReferenceDefinitions = function(parser, tree) { + var event, node; + var walker = tree.walker(); + var emptyNodes = []; + + while ((event = walker.next())) { + node = event.node; + if (event.entering && node.type === "paragraph") { + var pos; + var hasReferenceDefs = false; + + // Try parsing the beginning as link reference definitions; + // Note that link reference definitions must be the beginning of a + // paragraph node since link reference definitions cannot interrupt + // paragraphs. + while ( + peek(node._string_content, 0) === C_OPEN_BRACKET && + (pos = parser.inlineParser.parseReference( + node._string_content, + parser.refmap + )) + ) { + node._string_content = node._string_content.slice(pos); + hasReferenceDefs = true; + } + if (hasReferenceDefs && isBlank(node._string_content)) { + emptyNodes.push(node); + } + } + } + + for (node of emptyNodes) { + node.unlink(); + } +}; + +var trimTrailingSpaces = function(block, includeFinalNewline) { + var lines = block._string_content.split("\n"); + // Note that indented code block nor HTML block cannot be empty, + // so lines.length cannot be zero. + while (/^[ \t]*$/.test(lines[lines.length - 1])) { + lines.pop(); + } + block._literal = lines.join("\n"); + if (includeFinalNewline) { + block._literal = block._literal + "\n"; + } + block.sourcepos[1][0] = + block.sourcepos[0][0] + lines.length - 1; + block.sourcepos[1][1] = + block.sourcepos[0][1] + lines[lines.length - 1].length - 1; +} + // 'finalize' is run when the block is closed. // 'continue' is run to check whether the block is continuing // at a certain line and offset (e.g. whether a block quote @@ -231,7 +272,8 @@ var blocks = { continue: function() { return 0; }, - finalize: function() { + finalize: function(parser, block) { + removeLinkReferenceDefinitions(parser, block); return; }, canContain: function(t) { @@ -247,7 +289,7 @@ var blocks = { var item = block._firstChild; while (item) { // check for non-final list item ending with blank line: - if (endsWithBlankLine(item) && item._next) { + if (item._next && endsWithBlankLine(item)) { block._listData.tight = false; break; } @@ -256,8 +298,8 @@ var blocks = { var subitem = item._firstChild; while (subitem) { if ( - endsWithBlankLine(subitem) && - (item._next || subitem._next) + subitem._next && + endsWithBlankLine(subitem) ) { block._listData.tight = false; break; @@ -266,6 +308,7 @@ var blocks = { } item = item._next; } + block.sourcepos[1] = block._lastChild.sourcepos[1]; }, canContain: function(t) { return t === "item"; @@ -320,7 +363,16 @@ var blocks = { } return 0; }, - finalize: function() { + finalize: function(parser, block) { + if (block._lastChild) { + block.sourcepos[1] = block._lastChild.sourcepos[1]; + } else { + // Empty list item + block.sourcepos[1][0] = block.sourcepos[0][0]; + block.sourcepos[1][1] = + block._listData.markerOffset + block._listData.padding; + } + return; }, canContain: function(t) { @@ -402,10 +454,7 @@ var blocks = { block._literal = rest; } else { // indented - block._literal = block._string_content.replace( - /(\n *)+$/, - "\n" - ); + trimTrailingSpaces(block, true); } block._string_content = null; // allow GC }, @@ -423,7 +472,7 @@ var blocks = { : 0; }, finalize: function(parser, block) { - block._literal = block._string_content.replace(/(\n *)+$/, ""); + trimTrailingSpaces(block, false); block._string_content = null; // allow GC }, canContain: function() { @@ -435,24 +484,8 @@ var blocks = { continue: function(parser) { return parser.blank ? 1 : 0; }, - finalize: function(parser, block) { - var pos; - var hasReferenceDefs = false; - - // try parsing the beginning as link reference definitions: - while ( - peek(block._string_content, 0) === C_OPEN_BRACKET && - (pos = parser.inlineParser.parseReference( - block._string_content, - parser.refmap - )) - ) { - block._string_content = block._string_content.slice(pos); - hasReferenceDefs = true; - } - if (hasReferenceDefs && isBlank(block._string_content)) { - block.unlink(); - } + finalize: function() { + return; }, canContain: function() { return false; @@ -835,33 +868,9 @@ var incorporateLine = function(ln) { // finalize any blocks not matched this.closeUnmatchedBlocks(); - if (this.blank && container.lastChild) { - container.lastChild._lastLineBlank = true; - } t = container.type; - // Block quote lines are never blank as they start with > - // and we don't count blanks in fenced code for purposes of tight/loose - // lists or breaking out of lists. We also don't set _lastLineBlank - // on an empty list item, or if we just closed a fenced block. - var lastLineBlank = - this.blank && - !( - t === "block_quote" || - (t === "code_block" && container._isFenced) || - (t === "item" && - !container._firstChild && - container.sourcepos[0][0] === this.lineNumber) - ); - - // propagate lastLineBlank up through parents: - var cont = container; - while (cont) { - cont._lastLineBlank = lastLineBlank; - cont = cont._parent; - } - if (this.blocks[t].acceptsLines) { this.addLine(); // if HtmlBlock, check for end condition diff --git a/lib/node.js b/lib/node.js index 0e9c4b6f..12a17e03 100644 --- a/lib/node.js +++ b/lib/node.js @@ -74,8 +74,6 @@ var Node = function(nodeType, sourcepos) { this._prev = null; this._next = null; this._sourcepos = sourcepos; - this._lastLineBlank = false; - this._lastLineChecked = false; this._open = true; this._string_content = null; this._literal = null; diff --git a/test/regression.txt b/test/regression.txt index 91513f04..64debb82 100644 --- a/test/regression.txt +++ b/test/regression.txt @@ -253,3 +253,177 @@ x

x

```````````````````````````````` +Block-quoted blank line shouldn't make parent list loose. +```````````````````````````````` example +## Case 1 + +- > a + > +- b + + +## Case 2 + +- > - a + > +- b + + +## Case 3 + +- > > a + > +- b + + +## Case 4 + +- > # a + > +- b + + +## Case 5 + +- ``` + The following line is part of code block. + +- b + +## Case 6 + +- The following line is **not** part of code block. + +- b + +## Case 7 + +-
The following line is **not** part of HTML block.
+
+- 
+- b +. +

Case 1

+ +

Case 2

+ +

Case 3

+ +

Case 4

+ +

Case 5

+ +

Case 6

+ +

Case 7

+ +```````````````````````````````` + +Link reference definitions are blocks when checking list tightness. +```````````````````````````````` example +## Case 1 + +- [aaa]: / + + [aaa]: / +- b + + +## Case 2 + +- a + + [aaa]: / +- b + + +## Case 3 + +- [aaa]: / + + a +- b +. +

Case 1

+ +

Case 2

+ +

Case 3

+ +````````````````````````````````