From 30f6828290f9bc84c5d9b7d04a1abb1285b6de25 Mon Sep 17 00:00:00 2001 From: digitalMoksha Date: Mon, 2 Sep 2024 12:09:34 -0500 Subject: [PATCH 1/3] Add support for multiple description definitions --- script/cibuild | 2 + src/html.rs | 2 +- src/parser/mod.rs | 21 ++ src/tests/description_lists.rs | 4 +- src/tests/fixtures/description_lists.md | 308 ++++++++++++++++++++++++ 5 files changed, 334 insertions(+), 3 deletions(-) create mode 100644 src/tests/fixtures/description_lists.md diff --git a/script/cibuild b/script/cibuild index 794441cc..64016bb7 100755 --- a/script/cibuild +++ b/script/cibuild @@ -48,6 +48,8 @@ python3 spec_tests.py --no-normalize --spec ../../../src/tests/fixtures/wikilink || failed=1 python3 spec_tests.py --no-normalize --spec ../../../src/tests/fixtures/wikilinks_title_before_pipe.md "$PROGRAM_ARG -e wikilinks-title-before-pipe" \ || failed=1 +python3 spec_tests.py --no-normalize --spec ../../../src/tests/fixtures/description_lists.md "$PROGRAM_ARG -e description-lists" \ + || failed=1 python3 spec_tests.py --no-normalize --spec regression.txt "$PROGRAM_ARG" \ || failed=1 diff --git a/src/html.rs b/src/html.rs index e8ac2303..35fdebe6 100644 --- a/src/html.rs +++ b/src/html.rs @@ -512,7 +512,7 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> { self.cr()?; self.output.write_all(b"")?; + self.output.write_all(b">\n")?; } else { self.output.write_all(b"\n")?; } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 0b8f3171..3450d48c 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1815,6 +1815,27 @@ impl<'a, 'o, 'c: 'o> Parser<'a, 'o, 'c> { *container = details; + true + } else if node_matches!(last_child, NodeValue::DescriptionItem(..)) { + let parent = last_child.parent().unwrap(); + reopen_ast_nodes(parent); + + let metadata = NodeDescriptionItem { + marker_offset: self.indent, + padding: 2, + }; + + let item = self.add_child( + parent, + NodeValue::DescriptionItem(metadata), + self.first_nonspace + 1, + ); + + let details = + self.add_child(item, NodeValue::DescriptionDetails, self.first_nonspace + 1); + + *container = details; + true } else { false diff --git a/src/tests/description_lists.rs b/src/tests/description_lists.rs index 06970fc0..13f11e9f 100644 --- a/src/tests/description_lists.rs +++ b/src/tests/description_lists.rs @@ -14,7 +14,7 @@ fn description_lists() { ": Definition 2\n" ), concat!( - "
", + "
\n", "
Term 1
\n", "
\n", "

Definition 1

\n", @@ -41,7 +41,7 @@ fn description_lists() { "
    \n", "
  • \n", "

    Nested

    \n", - "
    ", + "
    \n", "
    Term 1
    \n", "
    \n", "

    Definition 1

    \n", diff --git a/src/tests/fixtures/description_lists.md b/src/tests/fixtures/description_lists.md new file mode 100644 index 00000000..72d4cace --- /dev/null +++ b/src/tests/fixtures/description_lists.md @@ -0,0 +1,308 @@ +--- +title: Description / defintition lists +based_on: https://github.com/jgm/commonmark-hs/blob/master/commonmark-extensions/test/definition_lists.md +--- + +## Definition lists + +The term is given on a line by itself, followed by +one or more definitions. Each definition must begin +with `:` (after 0-2 spaces); subsequent lines must +be indented unless they are lazy paragraph +continuations. + +The list is tight if there is no blank line between +the term and the first definition, otherwise loose. + +```````````````````````````````` example +apple +: red fruit + +orange +: orange fruit +. +
    +
    apple
    +
    red fruit +
    +
    orange
    +
    orange fruit +
    +
    +```````````````````````````````` + +Loose: + +```````````````````````````````` example +apple + +: red fruit + +orange + +: orange fruit +. +
    +
    apple
    +
    +

    red fruit

    +
    +
    orange
    +
    +

    orange fruit

    +
    +
    +```````````````````````````````` + +Indented marker: + +```````````````````````````````` example +apple + : red fruit + +orange + : orange fruit +. +
    +
    apple
    +
    red fruit +
    +
    orange
    +
    orange fruit +
    +
    +```````````````````````````````` + +```````````````````````````````` example +apple + + : red fruit + +orange + + : orange fruit +. +
    +
    apple
    +
    +

    red fruit

    +
    +
    orange
    +
    +

    orange fruit

    +
    +
    +```````````````````````````````` + +Multiple blocks in a definition: + +```````````````````````````````` example +*apple* + +: red fruit + + contains seeds, + crisp, pleasant to taste + +*orange* + +: orange fruit + + { orange code block } + + > orange block quote +. +
    +
    apple
    +
    +

    red fruit

    +

    contains seeds, +crisp, pleasant to taste

    +
    +
    orange
    +
    +

    orange fruit

    +
    { orange code block }
    +
    +
    +

    orange block quote

    +
    +
    +
    +```````````````````````````````` + +Nested lists: + +```````````````````````````````` example +term + +: 1. Para one + + Para two +. +
    +
    term
    +
    +
      +
    1. Para one

      +

      Para two

    2. +
    +
    +
    +```````````````````````````````` + +Multiple definitions, tight: + +```````````````````````````````` example +apple +: red fruit +: computer company + +orange +: orange fruit +: telecom company +. +
    +
    apple
    +
    red fruit +
    +
    computer company +
    +
    orange
    +
    orange fruit +
    +
    telecom company +
    +
    +```````````````````````````````` + +Multiple definitions, loose: + +```````````````````````````````` example +apple + +: red fruit + +: computer company + +orange + +: orange fruit +: telecom company +. +
    +
    apple
    +
    +

    red fruit

    +
    +
    +

    computer company

    +
    +
    orange
    +
    +

    orange fruit

    +
    +
    +

    telecom company

    +
    +
    +```````````````````````````````` + +Lazy line continuations: + +```````````````````````````````` example +apple + +: red fruit + +: computer +company + +orange + +: orange +fruit +: telecom company +. +
    +
    apple
    +
    +

    red fruit

    +
    +
    +

    computer +company

    +
    +
    orange
    +
    +

    orange +fruit

    +
    +
    +

    telecom company

    +
    +
    +```````````````````````````````` + + + +`~` may be used as a marker instead of `:`: + +```````````````````````````````` example +apple + ~ red fruit + +orange + ~ orange fruit +. +
    +
    apple
    +
    red fruit +
    +
    orange
    +
    orange fruit +
    +
    +```````````````````````````````` + +Definition terms may span multiple lines: + +```````````````````````````````` example +a +b\ +c + +: foo +. +
    +
    a +b
    +c
    +
    +

    foo

    +
    +
    +```````````````````````````````` + +Definition list with preceding paragraph +(): + +```````````````````````````````` example +Foo + +bar +: baz + +bim +: bor +. +

    Foo

    +
    +
    bar
    +
    baz +
    +
    bim
    +
    bor +
    +
    +```````````````````````````````` From 2db55104fef2e45569d277d8ac893380ca628fd7 Mon Sep 17 00:00:00 2001 From: digitalMoksha Date: Fri, 27 Sep 2024 21:06:24 -0500 Subject: [PATCH 2/3] Allow defintions to follow a term without a blank line --- src/parser/mod.rs | 20 +++-- src/scanners.re | 10 +++ src/scanners.rs | 100 +++++++++++++++++++++++- src/tests/fixtures/description_lists.md | 50 ++++++++---- 4 files changed, 158 insertions(+), 22 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 3450d48c..6dfdf431 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -288,7 +288,7 @@ pub struct ExtensionOptions { /// let mut options = Options::default(); /// options.extension.description_lists = true; /// assert_eq!(markdown_to_html("Term\n\n: Definition", &options), - /// "
    Term
    \n
    \n

    Definition

    \n
    \n
    \n"); + /// "
    \n
    Term
    \n
    \n

    Definition

    \n
    \n
    \n"); /// ``` pub description_lists: bool, @@ -1503,10 +1503,13 @@ impl<'a, 'o, 'c: 'o> Parser<'a, 'o, 'c> { container.data.borrow_mut().internal_offset = matched; } else if !indented && self.options.extension.description_lists - && line[self.first_nonspace] == b':' + && unwrap_into( + scanners::description_item_start(&line[self.first_nonspace..]), + &mut matched, + ) && self.parse_desc_list_details(container) { - let offset = self.first_nonspace + 1 - self.offset; + let offset = self.first_nonspace + matched - self.offset; self.advance_offset(line, offset, false); if strings::is_space_or_tab(line[self.offset]) { self.advance_offset(line, 1, true); @@ -1748,10 +1751,17 @@ impl<'a, 'o, 'c: 'o> Parser<'a, 'o, 'c> { } } - fn parse_desc_list_details(&mut self, container: &mut &'a AstNode<'a>) -> bool { + fn parse_desc_list_details(&mut self, node: &mut &'a AstNode<'a>) -> bool { + let container = node; + let last_child = match container.last_child() { Some(lc) => lc, - None => return false, + None => { + // Happens when the detail line is directly after the term, + // without a blank line between. + *container = container.parent().unwrap(); + container.last_child().unwrap() + } }; if node_matches!(last_child, NodeValue::Paragraph) { diff --git a/src/scanners.re b/src/scanners.re index 35bbc4f6..ecdcaa8c 100644 --- a/src/scanners.re +++ b/src/scanners.re @@ -431,4 +431,14 @@ pub fn tasklist(s: &[u8]) -> Option<(usize, u8)> { */ } +pub fn description_item_start(s: &[u8]) -> Option { + let mut cursor = 0; + let _marker = 0; + let len = s.len(); +/*!re2c + [:~] ([ \t]+|[\r\n]) { return Some(cursor); } + * { return None; } +*/ +} + // vim: set ft=rust: diff --git a/src/scanners.rs b/src/scanners.rs index 52b2a747..43e72ca6 100644 --- a/src/scanners.rs +++ b/src/scanners.rs @@ -1,4 +1,4 @@ -/* Generated by re2c 3.1 */ +/* Generated by re2c 3.0 */ pub fn atx_heading_start(s: &[u8]) -> Option { let mut cursor = 0; @@ -23848,4 +23848,102 @@ pub fn tasklist(s: &[u8]) -> Option<(usize, u8)> { } } +pub fn description_item_start(s: &[u8]) -> Option { + let mut cursor = 0; + let _marker = 0; + let len = s.len(); + + { + #[allow(unused_assignments)] + let mut yych: u8 = 0; + let mut yystate: usize = 0; + 'yyl: loop { + match yystate { + 0 => { + yych = unsafe { + if cursor < len { + *s.get_unchecked(cursor) + } else { + 0 + } + }; + cursor += 1; + match yych { + 0x3A | 0x7E => { + yystate = 3; + continue 'yyl; + } + _ => { + yystate = 1; + continue 'yyl; + } + } + } + 1 => { + yystate = 2; + continue 'yyl; + } + 2 => { + return None; + } + 3 => { + yych = unsafe { + if cursor < len { + *s.get_unchecked(cursor) + } else { + 0 + } + }; + match yych { + 0x09 | 0x20 => { + cursor += 1; + yystate = 4; + continue 'yyl; + } + 0x0A | 0x0D => { + cursor += 1; + yystate = 6; + continue 'yyl; + } + _ => { + yystate = 2; + continue 'yyl; + } + } + } + 4 => { + yych = unsafe { + if cursor < len { + *s.get_unchecked(cursor) + } else { + 0 + } + }; + match yych { + 0x09 | 0x20 => { + cursor += 1; + yystate = 4; + continue 'yyl; + } + _ => { + yystate = 5; + continue 'yyl; + } + } + } + 5 => { + return Some(cursor); + } + 6 => { + yystate = 5; + continue 'yyl; + } + _ => { + panic!("internal lexer error") + } + } + } + } +} + // vim: set ft=rust: diff --git a/src/tests/fixtures/description_lists.md b/src/tests/fixtures/description_lists.md index 72d4cace..a9ca0fc4 100644 --- a/src/tests/fixtures/description_lists.md +++ b/src/tests/fixtures/description_lists.md @@ -11,8 +11,9 @@ with `:` (after 0-2 spaces); subsequent lines must be indented unless they are lazy paragraph continuations. -The list is tight if there is no blank line between -the term and the first definition, otherwise loose. +There is no distinction between a "tight" list or a +"loose" list. Definitions are always wrapped in `

    ` +tags. ```````````````````````````````` example apple @@ -23,10 +24,12 @@ orange .

    apple
    -
    red fruit +
    +

    red fruit

    orange
    -
    orange fruit +
    +

    orange fruit

    ```````````````````````````````` @@ -65,10 +68,12 @@ orange .
    apple
    -
    red fruit +
    +

    red fruit

    orange
    -
    orange fruit +
    +

    orange fruit

    ```````````````````````````````` @@ -96,6 +101,8 @@ orange Multiple blocks in a definition: +Note that the column + ```````````````````````````````` example *apple* @@ -144,14 +151,17 @@ term
    term
      -
    1. Para one

      -

      Para two

    2. +
    3. +

      Para one

      +

      Para two

      +
    ```````````````````````````````` Multiple definitions, tight: +(always rendered as loose) ```````````````````````````````` example apple @@ -164,14 +174,18 @@ orange .
    apple
    -
    red fruit +
    +

    red fruit

    -
    computer company +
    +

    computer company

    orange
    -
    orange fruit +
    +

    orange fruit

    -
    telecom company +
    +

    telecom company

    ```````````````````````````````` @@ -257,10 +271,12 @@ orange .
    apple
    -
    red fruit +
    +

    red fruit

    orange
    -
    orange fruit +
    +

    orange fruit

    ```````````````````````````````` @@ -299,10 +315,12 @@ bim

    Foo

    bar
    -
    baz +
    +

    baz

    bim
    -
    bor +
    +

    bor

    ```````````````````````````````` From 3694ebe2e951e2459489f2f26d010f6e51dff9cd Mon Sep 17 00:00:00 2001 From: digitalMoksha Date: Sat, 28 Sep 2024 12:58:46 -0500 Subject: [PATCH 3/3] Try and get tight / loose working --- src/html.rs | 11 ++--- src/nodes.rs | 4 ++ src/parser/mod.rs | 28 ++++++++++++- src/tests/fixtures/description_lists.md | 56 +++++++------------------ 4 files changed, 51 insertions(+), 48 deletions(-) diff --git a/src/html.rs b/src/html.rs index 35fdebe6..b4142fd3 100644 --- a/src/html.rs +++ b/src/html.rs @@ -696,14 +696,15 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> { .map(|n| n.data.borrow().value.clone()) { Some(NodeValue::List(nl)) => nl.tight, + Some(NodeValue::DescriptionItem(nd)) => nd.tight, _ => false, }; - let tight = tight - || matches!( - node.parent().map(|n| n.data.borrow().value.clone()), - Some(NodeValue::DescriptionTerm) - ); + // let tight = tight + // || matches!( + // node.parent().map(|n| n.data.borrow().value.clone()), + // Some(NodeValue::DescriptionTerm) + // ); if !tight { if entering { diff --git a/src/nodes.rs b/src/nodes.rs index 95b58a59..d2964866 100644 --- a/src/nodes.rs +++ b/src/nodes.rs @@ -307,6 +307,10 @@ pub struct NodeDescriptionItem { /// Number of characters between the start of the list marker and the item text (including the list marker(s)). pub padding: usize, + + /// Whether the list is [tight](https://github.github.com/gfm/#tight), i.e. whether the + /// paragraphs are wrapped in `

    ` tags when formatted as HTML. + pub tight: bool, } /// The type of list. diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 6dfdf431..6993a145 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1753,12 +1753,14 @@ impl<'a, 'o, 'c: 'o> Parser<'a, 'o, 'c> { fn parse_desc_list_details(&mut self, node: &mut &'a AstNode<'a>) -> bool { let container = node; - + let mut tight = false; + let last_child = match container.last_child() { Some(lc) => lc, None => { // Happens when the detail line is directly after the term, // without a blank line between. + tight = true; *container = container.parent().unwrap(); container.last_child().unwrap() } @@ -1809,6 +1811,7 @@ impl<'a, 'o, 'c: 'o> Parser<'a, 'o, 'c> { let metadata = NodeDescriptionItem { marker_offset: self.indent, padding: 2, + tight, }; let item = self.add_child( @@ -1827,12 +1830,14 @@ impl<'a, 'o, 'c: 'o> Parser<'a, 'o, 'c> { true } else if node_matches!(last_child, NodeValue::DescriptionItem(..)) { + // ORIGINAL CODE let parent = last_child.parent().unwrap(); reopen_ast_nodes(parent); let metadata = NodeDescriptionItem { marker_offset: self.indent, padding: 2, + tight, }; let item = self.add_child( @@ -1847,6 +1852,27 @@ impl<'a, 'o, 'c: 'o> Parser<'a, 'o, 'c> { *container = details; true + + // ATTEMPT 1 + // reopen_ast_nodes(last_child); + // + // let details = + // self.add_child(last_child, NodeValue::DescriptionDetails, self.first_nonspace + 1); + // *container = details; + // + // true + + // ATTEMPT 2 + // let parent = last_child.parent().unwrap(); + // let item = parent.last_child().unwrap(); + // + // reopen_ast_nodes(item); + // + // let details = + // self.add_child(item, NodeValue::DescriptionDetails, self.first_nonspace + 1); + // *container = details; + // + // true } else { false } diff --git a/src/tests/fixtures/description_lists.md b/src/tests/fixtures/description_lists.md index a9ca0fc4..5f689014 100644 --- a/src/tests/fixtures/description_lists.md +++ b/src/tests/fixtures/description_lists.md @@ -11,9 +11,8 @@ with `:` (after 0-2 spaces); subsequent lines must be indented unless they are lazy paragraph continuations. -There is no distinction between a "tight" list or a -"loose" list. Definitions are always wrapped in `

    ` -tags. +The list is tight if there is no blank line between +the term and the first definition, otherwise loose. ```````````````````````````````` example apple @@ -24,13 +23,9 @@ orange .

    apple
    -
    -

    red fruit

    -
    +
    red fruit
    orange
    -
    -

    orange fruit

    -
    +
    orange fruit
    ```````````````````````````````` @@ -68,13 +63,9 @@ orange .
    apple
    -
    -

    red fruit

    -
    +
    red fruit
    orange
    -
    -

    orange fruit

    -
    +
    orange fruit
    ```````````````````````````````` @@ -101,8 +92,6 @@ orange Multiple blocks in a definition: -Note that the column - ```````````````````````````````` example *apple* @@ -161,7 +150,6 @@ term ```````````````````````````````` Multiple definitions, tight: -(always rendered as loose) ```````````````````````````````` example apple @@ -174,19 +162,11 @@ orange .
    apple
    -
    -

    red fruit

    -
    -
    -

    computer company

    -
    +
    red fruit
    +
    computer company
    orange
    -
    -

    orange fruit

    -
    -
    -

    telecom company

    -
    +
    orange fruit
    +
    telecom company
    ```````````````````````````````` @@ -271,13 +251,9 @@ orange .
    apple
    -
    -

    red fruit

    -
    +
    red fruit
    orange
    -
    -

    orange fruit

    -
    +
    orange fruit
    ```````````````````````````````` @@ -315,12 +291,8 @@ bim

    Foo

    bar
    -
    -

    baz

    -
    +
    baz
    bim
    -
    -

    bor

    -
    +
    bor
    ````````````````````````````````