diff --git a/src/nodes.rs b/src/nodes.rs index c04f244c..ddd68e05 100644 --- a/src/nodes.rs +++ b/src/nodes.rs @@ -534,6 +534,7 @@ pub struct Ast { pub(crate) open: bool, pub(crate) last_line_blank: bool, pub(crate) table_visited: bool, + pub(crate) line_offsets: Vec, } /// Represents the position in the source Markdown this node was rendered from. @@ -609,6 +610,7 @@ impl Ast { open: true, last_line_blank: false, table_visited: false, + line_offsets: Vec::new(), } } } diff --git a/src/parser/inlines.rs b/src/parser/inlines.rs index 33407e4e..bd54bb5a 100644 --- a/src/parser/inlines.rs +++ b/src/parser/inlines.rs @@ -31,8 +31,8 @@ pub struct Subject<'a: 'd, 'r, 'o, 'c, 'd, 'i> { pub input: &'i [u8], line: usize, pub pos: usize, - block_offset: usize, column_offset: isize, + line_offset: usize, flags: Flags, pub refmap: &'r mut RefMap, delimiter_arena: &'d Arena>, @@ -116,7 +116,6 @@ impl<'a, 'r, 'o, 'c, 'd, 'i> Subject<'a, 'r, 'o, 'c, 'd, 'i> { options: &'o Options<'c>, input: &'i [u8], line: usize, - block_offset: usize, refmap: &'r mut RefMap, delimiter_arena: &'d Arena>, ) -> Self { @@ -126,8 +125,8 @@ impl<'a, 'r, 'o, 'c, 'd, 'i> Subject<'a, 'r, 'o, 'c, 'd, 'i> { input, line, pos: 0, - block_offset, column_offset: 0, + line_offset: 0, flags: Flags::default(), refmap, delimiter_arena, @@ -182,6 +181,11 @@ impl<'a, 'r, 'o, 'c, 'd, 'i> Subject<'a, 'r, 'o, 'c, 'd, 'i> { None => return false, Some(ch) => *ch as char, }; + + let node_ast = node.data.borrow(); + let adjusted_line = self.line - node_ast.sourcepos.start.line; + self.line_offset = *node_ast.line_offsets.get(adjusted_line).unwrap_or(&0); + let new_inl: Option<&'a AstNode<'a>> = match c { '\0' => return false, '\r' | '\n' => Some(self.handle_newline()), @@ -1604,7 +1608,7 @@ impl<'a, 'r, 'o, 'c, 'd, 'i> Subject<'a, 'r, 'o, 'c, 'd, 'i> { inl.data.borrow_mut().sourcepos.start.column = bracket_inl_text.data.borrow().sourcepos.start.column; inl.data.borrow_mut().sourcepos.end.column = usize::try_from( - self.pos as isize + self.column_offset + self.block_offset as isize, + self.pos as isize + self.column_offset + self.line_offset as isize, ) .unwrap(); bracket_inl_text.insert_before(inl); @@ -1655,7 +1659,7 @@ impl<'a, 'r, 'o, 'c, 'd, 'i> Subject<'a, 'r, 'o, 'c, 'd, 'i> { .sourcepos .start; inl.data.borrow_mut().sourcepos.end.column = - usize::try_from(self.pos as isize + self.column_offset + self.block_offset as isize) + usize::try_from(self.pos as isize + self.column_offset + self.line_offset as isize) .unwrap(); self.brackets[brackets_len - 1].inl_text.insert_before(inl); @@ -1847,8 +1851,8 @@ impl<'a, 'r, 'o, 'c, 'd, 'i> Subject<'a, 'r, 'o, 'c, 'd, 'i> { end_column: usize, ) -> &'a AstNode<'a> { let start_column = - start_column as isize + 1 + self.column_offset + self.block_offset as isize; - let end_column = end_column as isize + 1 + self.column_offset + self.block_offset as isize; + start_column as isize + 1 + self.column_offset + self.line_offset as isize; + let end_column = end_column as isize + 1 + self.column_offset + self.line_offset as isize; let ast = Ast { value, @@ -1864,6 +1868,7 @@ impl<'a, 'r, 'o, 'c, 'd, 'i> Subject<'a, 'r, 'o, 'c, 'd, 'i> { open: false, last_line_blank: false, table_visited: false, + line_offsets: Vec::new(), }; self.arena.alloc(Node::new(RefCell::new(ast))) } @@ -1972,6 +1977,7 @@ pub fn make_inline<'a>( open: false, last_line_blank: false, table_visited: false, + line_offsets: Vec::new(), }; arena.alloc(Node::new(RefCell::new(ast))) } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index fed397bc..0db0ef56 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -67,6 +67,7 @@ pub fn parse_document<'a>( open: true, last_line_blank: false, table_visited: false, + line_offsets: Vec::new(), }))); let mut parser = Parser::new(arena, root, options); let mut linebuf = Vec::with_capacity(buffer.len()); @@ -1998,6 +1999,11 @@ impl<'a, 'o, 'c: 'o> Parser<'a, 'o, 'c> { } } if self.offset < line.len() { + // since whitespace is stripped off the beginning of lines, we need to keep + // track of how much was stripped off. This allows us to properly calculate + // inline sourcepos during inline processing. + ast.line_offsets.push(self.offset); + ast.content .push_str(str::from_utf8(&line[self.offset..]).unwrap()); } @@ -2185,7 +2191,6 @@ impl<'a, 'o, 'c: 'o> Parser<'a, 'o, 'c> { self.options, content, node_data.sourcepos.start.line, - node_data.sourcepos.start.column - 1 + node_data.internal_offset, &mut self.refmap, &delimiter_arena, ); @@ -2439,7 +2444,6 @@ impl<'a, 'o, 'c: 'o> Parser<'a, 'o, 'c> { self.options, content, 0, // XXX -1 in upstream; never used? - 0, &mut self.refmap, &delimiter_arena, ); diff --git a/src/parser/table.rs b/src/parser/table.rs index 651810bd..0fda7250 100644 --- a/src/parser/table.rs +++ b/src/parser/table.rs @@ -116,6 +116,10 @@ fn try_opening_header<'a>( start.column_add((cell.end_offset - header_row.paragraph_offset) as isize); ast.internal_offset = cell.internal_offset; ast.content.clone_from(&cell.content); + ast.line_offsets.push( + start.column + cell.start_offset - 1 + cell.internal_offset + - header_row.paragraph_offset, + ); i += 1; } @@ -172,6 +176,9 @@ fn try_opening_row<'a>( cell_ast.internal_offset = cell.internal_offset; cell_ast.sourcepos.end.column = sourcepos.start.column + cell.end_offset; cell_ast.content.clone_from(&cell.content); + cell_ast + .line_offsets + .push(sourcepos.start.column + cell.start_offset - 1 + cell.internal_offset); last_column = cell_ast.sourcepos.end.column; @@ -295,16 +302,15 @@ fn try_inserting_table_header_paragraph<'a>( let mut paragraph = Ast::new(NodeValue::Paragraph, start); paragraph.sourcepos.end.line = start.line + newlines - 1; - // XXX We don't have the last_line_length to go on by this point, - // so we have no idea what the end column should be. - // We can't track it in row() like we do paragraph_offset, because - // we've already discarded the leading whitespace for that line. - // This is hard to avoid with this backtracking approach to - // creating the pre-table paragraph — we're doing the work of - // finalize() here, but without the parser state at that time. - // Approximate by just counting the line length as it is and adding - // to the start column. - paragraph.sourcepos.end.column = start.column - 1 + // copy over the line offsets related to the paragraph + for n in 0..newlines { + paragraph + .line_offsets + .push(*container_ast.line_offsets.get(n).unwrap_or(&0)); + } + + let last_line_offset = *paragraph.line_offsets.last().unwrap_or(&0); + paragraph.sourcepos.end.column = last_line_offset + preface .iter() .rev() diff --git a/src/tests/core.rs b/src/tests/core.rs index 0ed9419b..f6a3eb51 100644 --- a/src/tests/core.rs +++ b/src/tests/core.rs @@ -529,8 +529,6 @@ fn link_sourcepos_newline() { ); } -// Ignored per https://github.com/kivikakk/comrak/pull/439#issuecomment-2225129960. -#[ignore] #[test] fn link_sourcepos_truffle() { assert_ast_match!( @@ -577,8 +575,6 @@ fn link_sourcepos_truffle_twist() { ); } -// Ignored per https://github.com/kivikakk/comrak/pull/439#issuecomment-2225129960. -#[ignore] #[test] fn link_sourcepos_truffle_bergamot() { assert_ast_match!( @@ -601,3 +597,103 @@ fn link_sourcepos_truffle_bergamot() { ]) ); } + +#[test] +fn link_sourcepos_inline_paragraph_multiline() { + assert_ast_match!( + [], + " A\n" + " B\n", + (document (1:1-2:4) [ + (paragraph (1:3-2:4) [ + (text (1:3-1:3) "A") + (softbreak (1:4-1:4)) + (text (2:4-2:4) "B") + ]) + ]) + ); +} + +#[test] +fn link_sourcepos_inline_listitem_multiline() { + assert_ast_match!( + [], + "- A\n" + "B\n", + (document (1:1-2:1) [ + (list (1:1-2:1) [ + (item (1:1-2:1) [ + (paragraph (1:3-2:1) [ + (text (1:3-1:3) "A") + (softbreak (1:4-1:4)) + (text (2:1-2:1) "B") + ]) + ]) + ]) + ]) + ); +} + +#[test] +fn link_sourcepos_inline_listitem_multiline_2() { + assert_ast_match!( + [], + "- A\n" + " B\n" + "- C\n" + " D", + (document (1:1-4:2) [ + (list (1:1-4:2) [ + (item (1:1-2:4) [ + (paragraph (1:3-2:4) [ + (text (1:3-1:3) "A") + (softbreak (1:4-1:4)) + (text (2:4-2:4) "B") + ]) + ]) + (item (3:1-4:2) [ + (paragraph (3:4-4:2) [ + (text (3:4-3:4) "C") + (softbreak (3:5-3:5)) + (text (4:2-4:2) "D") + ]) + ]) + ]) + ]) + ); +} + +#[test] +fn link_sourcepos_inline_double_emphasis_1() { + assert_ast_match!( + [], + "_**this**_\n", + (document (1:1-1:10) [ + (paragraph (1:1-1:10) [ + (emph (1:1-1:10) [ + (strong (1:2-1:9) [ + (text (1:4-1:7) "this") + ]) + ]) + ]) + ]) + ); +} + +#[ignore] +#[test] +fn link_sourcepos_inline_double_emphasis_2() { + assert_ast_match!( + [], + "___this___\n", + (document (1:1-1:10) [ + (paragraph (1:1-1:10) [ + (emph (1:1-1:10) [ + (strong (1:2-1:9) [ + (text (1:4-1:7) "this") + ]) + ]) + ]) + ]) + ); +} diff --git a/src/tests/table.rs b/src/tests/table.rs index 3f95cdd2..32ec8ce8 100644 --- a/src/tests/table.rs +++ b/src/tests/table.rs @@ -192,14 +192,10 @@ fn sourcepos_with_preceding_para_offset() { " | c | d |\n" , (document (1:1-5:10) [ - - // XXX This should be 1:2-2:5; see - // crate::parser::table::try_inserting_table_header_paragraph. - (paragraph (1:2-2:4) [ - + (paragraph (1:2-2:5) [ (text (1:2-1:4) "123") (softbreak (1:5-1:5)) - (text (2:2-2:4) "456") + (text (2:3-2:5) "456") ]) (table (3:2-5:10) [ (table_row (3:2-3:10) [