From a2ca1d2f1ee9001d47a852995cff401ebce42f56 Mon Sep 17 00:00:00 2001 From: Manuel Hatzl Date: Thu, 21 Sep 2023 18:02:22 +0200 Subject: [PATCH 01/43] feat: create SymbolIterator --- commons/src/scanner/symbol/iterator.rs | 196 ++++++++++++++++++ .../src/scanner/{symbol.rs => symbol/mod.rs} | 2 + 2 files changed, 198 insertions(+) create mode 100644 commons/src/scanner/symbol/iterator.rs rename commons/src/scanner/{symbol.rs => symbol/mod.rs} (99%) diff --git a/commons/src/scanner/symbol/iterator.rs b/commons/src/scanner/symbol/iterator.rs new file mode 100644 index 00000000..94f3f33d --- /dev/null +++ b/commons/src/scanner/symbol/iterator.rs @@ -0,0 +1,196 @@ +use std::sync::Arc; + +use super::{Symbol, SymbolKind}; + +#[derive(Default, Clone)] +pub struct SymbolIterator<'input, 'end_fn> { + symbols: &'input [Symbol<'input>], + curr_index: usize, + start_index: usize, + line_prefixes: Vec>, + end: Vec + 'end_fn>>, +} + +pub trait IteratorEndFn<'input>: Fn(&'input [Symbol<'input>]) -> bool + Send + Sync {} + +impl<'input, 'end_fn> SymbolIterator<'input, 'end_fn> { + pub fn new(symbols: &'input [Symbol<'input>], start_index: usize) -> Self { + SymbolIterator { + symbols, + curr_index: start_index, + start_index, + line_prefixes: vec![], + end: vec![], + } + } + + pub fn with( + symbols: &'input [Symbol<'input>], + start_index: usize, + line_prefix: impl Into>>, + end: impl IteratorEndFn<'input> + 'end_fn, + ) -> Self { + SymbolIterator { + symbols, + curr_index: start_index, + start_index, + line_prefixes: line_prefix.into(), + end: vec![Arc::new(end)], + } + } + + pub fn start_index(&self) -> usize { + self.start_index + } + + pub fn curr_index(&self) -> usize { + self.curr_index + } + + pub fn set_curr_index(&mut self, index: usize) { + if index >= self.start_index { + self.curr_index = index; + } + } + + pub fn eoi(&self) -> bool { + self.curr_index == self.symbols.len() + } + + pub fn nest<'inner_end>( + &self, + line_prefix: &[SymbolKind], + end: Option + 'inner_end>, + ) -> SymbolIterator<'input, 'inner_end> + where + 'end_fn: 'inner_end, + { + let mut nested_prefixes = self.line_prefixes.clone(); + if nested_prefixes.is_empty() { + nested_prefixes.push(vec![]); + } + + if !line_prefix.contains(&SymbolKind::Blankline) { + nested_prefixes + .iter_mut() + .for_each(|p| p.extend_from_slice(line_prefix)); + } + + let mut outer_end = self.end.clone(); + let merged_end = match end { + Some(inner_end) => { + outer_end.push(Arc::new(inner_end)); + outer_end + } + None => outer_end, + }; + + SymbolIterator { + symbols: self.symbols, + curr_index: self.curr_index, + start_index: self.curr_index, + line_prefixes: nested_prefixes, + end: merged_end, + } + } + + pub fn next(&mut self) -> Result<&Symbol<'input>, SymbolIteratorError> { + if self.eoi() { + return Err(SymbolIteratorError::Eoi); + } + + let mut curr_symbolkind = match self.symbols.get(self.curr_index) { + Some(curr_symbol) => curr_symbol.kind, + None => return Err(SymbolIteratorError::Eoi), + }; + + if curr_symbolkind == SymbolKind::Newline && !self.line_prefixes.is_empty() { + let curr_prefix_symbolkinds: Vec<_> = self.symbols[self.curr_index + 1..] + .iter() + .map(|s| s.kind) + .collect(); + + let mut prefix_matched = false; + + for prefix in &self.line_prefixes { + if prefix == &curr_prefix_symbolkinds { + prefix_matched = true; + self.curr_index += prefix.len(); + curr_symbolkind = match self.symbols.get(self.curr_index) { + Some(curr_symbol) => curr_symbol.kind, + None => return Err(SymbolIteratorError::Eoi), + }; + break; + } + } + + if !prefix_matched { + return Err(SymbolIteratorError::PrefixMismatch); + } + } else if curr_symbolkind == SymbolKind::Blankline + && contains_only_non_whitespace_sequences(&self.line_prefixes) + { + return Err(SymbolIteratorError::PrefixMismatch); + } + + for f in &self.end { + if f(&self.symbols[self.curr_index..]) { + return Err(SymbolIteratorError::EndReached); + } + } + + let symbol_opt = self.symbols.get(self.curr_index); + self.curr_index += 1; + + symbol_opt.ok_or(SymbolIteratorError::Eoi) + } + + pub fn skip_to_end(mut self) -> Self { + let mut end_reached = false; + + while !end_reached || !self.eoi() { + for f in &self.end { + if f(&self.symbols[self.curr_index..]) { + end_reached = true; + } + } + + if !end_reached { + self.curr_index += 1; + } + } + + self + } +} + +pub enum SymbolIteratorError { + /// At least one end-function returned `true`. + EndReached, + /// A new line did not start with the expected prefix. + PrefixMismatch, + /// Reached end of input. + Eoi, +} + +fn contains_only_non_whitespace_sequences(sequences: &[Vec]) -> bool { + let mut whitespace_sequence_found = false; + + for sequence in sequences { + whitespace_sequence_found = whitespace_sequence_found || !contains_non_whitespace(sequence); + } + whitespace_sequence_found +} + +fn contains_non_whitespace(sequence: &[SymbolKind]) -> bool { + for kind in sequence { + if !matches!( + kind, + SymbolKind::Whitespace | SymbolKind::Newline | SymbolKind::Blankline + ) { + return true; + } + } + + false +} diff --git a/commons/src/scanner/symbol.rs b/commons/src/scanner/symbol/mod.rs similarity index 99% rename from commons/src/scanner/symbol.rs rename to commons/src/scanner/symbol/mod.rs index fdfe6d87..87265dff 100644 --- a/commons/src/scanner/symbol.rs +++ b/commons/src/scanner/symbol/mod.rs @@ -4,6 +4,8 @@ use core::fmt; use super::position::{Offset, Position}; +pub mod iterator; + /// Possible kinds of Symbol found in Unimarkup document. #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] pub enum SymbolKind { From 998d29144d9411ae2ce93c13da146631489b333c Mon Sep 17 00:00:00 2001 From: Manuel Hatzl Date: Thu, 21 Sep 2023 23:51:12 +0200 Subject: [PATCH 02/43] feat: switch block parser to SymbolIterator --- commons/src/scanner/mod.rs | 2 +- commons/src/scanner/symbol/iterator.rs | 239 +++++++++++++++++++---- core/tests/test_runner/mod.rs | 2 +- parser/src/elements/atomic/heading.rs | 71 ++++--- parser/src/elements/atomic/paragraph.rs | 64 ++---- parser/src/elements/enclosed/verbatim.rs | 93 +++++---- parser/src/parser.rs | 30 +-- 7 files changed, 335 insertions(+), 166 deletions(-) diff --git a/commons/src/scanner/mod.rs b/commons/src/scanner/mod.rs index 72372135..9808c555 100644 --- a/commons/src/scanner/mod.rs +++ b/commons/src/scanner/mod.rs @@ -7,7 +7,7 @@ mod symbol; use icu::segmenter::{GraphemeClusterSegmenter, SegmenterError}; use icu_provider_adapters::fallback::LocaleFallbackProvider; use position::{Offset, Position}; -pub use symbol::{Symbol, SymbolKind}; +pub use symbol::{iterator::*, Symbol, SymbolKind}; #[derive(Debug, Clone)] struct IcuDataProvider; diff --git a/commons/src/scanner/symbol/iterator.rs b/commons/src/scanner/symbol/iterator.rs index 94f3f33d..2db3bdaf 100644 --- a/commons/src/scanner/symbol/iterator.rs +++ b/commons/src/scanner/symbol/iterator.rs @@ -8,10 +8,35 @@ pub struct SymbolIterator<'input, 'end_fn> { curr_index: usize, start_index: usize, line_prefixes: Vec>, - end: Vec + 'end_fn>>, + end: Vec>>, } -pub trait IteratorEndFn<'input>: Fn(&'input [Symbol<'input>]) -> bool + Send + Sync {} +pub type IteratorEndFn<'input, 'end_fn> = + Box]) -> bool + Send + Sync + 'end_fn>; + +impl<'input, 'end_fn> From<&'input [Symbol<'input>]> for SymbolIterator<'input, 'end_fn> { + fn from(value: &'input [Symbol<'input>]) -> Self { + SymbolIterator { + symbols: value, + curr_index: 0, + start_index: 0, + line_prefixes: vec![], + end: vec![], + } + } +} + +impl<'input, 'end_fn> From<&'input Vec>> for SymbolIterator<'input, 'end_fn> { + fn from(value: &'input Vec>) -> Self { + SymbolIterator { + symbols: value, + curr_index: 0, + start_index: 0, + line_prefixes: vec![], + end: vec![], + } + } +} impl<'input, 'end_fn> SymbolIterator<'input, 'end_fn> { pub fn new(symbols: &'input [Symbol<'input>], start_index: usize) -> Self { @@ -28,7 +53,7 @@ impl<'input, 'end_fn> SymbolIterator<'input, 'end_fn> { symbols: &'input [Symbol<'input>], start_index: usize, line_prefix: impl Into>>, - end: impl IteratorEndFn<'input> + 'end_fn, + end: IteratorEndFn<'input, 'end_fn>, ) -> Self { SymbolIterator { symbols, @@ -39,6 +64,14 @@ impl<'input, 'end_fn> SymbolIterator<'input, 'end_fn> { } } + pub fn len(&self) -> usize { + self.symbols.len() + } + + pub fn is_empty(&self) -> bool { + self.symbols.is_empty() + } + pub fn start_index(&self) -> usize { self.start_index } @@ -57,10 +90,22 @@ impl<'input, 'end_fn> SymbolIterator<'input, 'end_fn> { self.curr_index == self.symbols.len() } + pub fn remaining_symbols(&self) -> &'input [Symbol<'input>] { + &self.symbols[self.curr_index..] + } + + pub fn peek(&self) -> Option<&'input Symbol<'input>> { + self.symbols.get(self.curr_index + 1) + } + + pub fn peek_kind(&self) -> Option { + self.symbols.get(self.curr_index + 1).map(|s| s.kind) + } + pub fn nest<'inner_end>( &self, line_prefix: &[SymbolKind], - end: Option + 'inner_end>, + end: Option>, ) -> SymbolIterator<'input, 'inner_end> where 'end_fn: 'inner_end, @@ -94,14 +139,158 @@ impl<'input, 'end_fn> SymbolIterator<'input, 'end_fn> { } } - pub fn next(&mut self) -> Result<&Symbol<'input>, SymbolIteratorError> { + pub fn nest_prefixes<'inner_end>( + &self, + line_prefixes: &[Vec], + end: Option>, + ) -> SymbolIterator<'input, 'inner_end> + where + 'end_fn: 'inner_end, + { + let prefixes = if self.line_prefixes.is_empty() { + let mut nested_prefixes = self.line_prefixes.clone(); + nested_prefixes.extend_from_slice(line_prefixes); + nested_prefixes + } else { + // create cartesian prefix + self.line_prefixes + .iter() + .flat_map(|outer_prefixes| { + line_prefixes.iter().map(|inner_prefixes| { + let mut prefix = outer_prefixes.clone(); + + if !inner_prefixes.contains(&SymbolKind::Blankline) { + prefix.extend(inner_prefixes); + } + + prefix + }) + }) + .collect() + }; + + let mut outer_end = self.end.clone(); + let merged_end = match end { + Some(inner_end) => { + outer_end.push(Arc::new(inner_end)); + outer_end + } + None => outer_end, + }; + + SymbolIterator { + symbols: self.symbols, + curr_index: self.curr_index, + start_index: self.curr_index, + line_prefixes: prefixes, + end: merged_end, + } + } + + // #[allow(clippy::should_implement_trait)] + // pub fn next(&mut self) -> Result<&Symbol<'input>, SymbolIteratorError> { + // if self.eoi() { + // return Err(SymbolIteratorError::Eoi); + // } + + // let mut curr_symbolkind = match self.symbols.get(self.curr_index) { + // Some(curr_symbol) => curr_symbol.kind, + // None => return Err(SymbolIteratorError::Eoi), + // }; + + // if curr_symbolkind == SymbolKind::Newline && !self.line_prefixes.is_empty() { + // let curr_prefix_symbolkinds: Vec<_> = self.symbols[self.curr_index + 1..] + // .iter() + // .map(|s| s.kind) + // .collect(); + + // let mut prefix_matched = false; + + // for prefix in &self.line_prefixes { + // if prefix == &curr_prefix_symbolkinds { + // prefix_matched = true; + // self.curr_index += prefix.len(); + // curr_symbolkind = match self.symbols.get(self.curr_index) { + // Some(curr_symbol) => curr_symbol.kind, + // None => return Err(SymbolIteratorError::Eoi), + // }; + // break; + // } + // } + + // if !prefix_matched { + // return Err(SymbolIteratorError::PrefixMismatch); + // } + // } else if curr_symbolkind == SymbolKind::Blankline + // && contains_only_non_whitespace_sequences(&self.line_prefixes) + // { + // return Err(SymbolIteratorError::PrefixMismatch); + // } + + // for f in &self.end { + // if f(&self.symbols[self.curr_index..]) { + // return Err(SymbolIteratorError::EndReached); + // } + // } + + // let symbol_opt = self.symbols.get(self.curr_index); + // self.curr_index += 1; + + // symbol_opt.ok_or(SymbolIteratorError::Eoi) + // } + + pub fn skip_to_end(mut self) -> Self { + let mut end_reached = false; + + while !end_reached || !self.eoi() { + for f in &self.end { + if f(&self.symbols[self.curr_index..]) { + end_reached = true; + } + } + + if !end_reached { + self.curr_index += 1; + } + } + + self + } + + /// Collects and returns all symbols until one of the end functions signals the end, + /// or until no line prefix is matched after a new line. + pub fn take_to_end(&mut self) -> Vec<&'input Symbol<'input>> { + let mut symbols = Vec::new(); + + for symbol in self.by_ref() { + symbols.push(symbol); + } + + symbols + } + + pub fn end_reached(&self) -> bool { + for f in &self.end { + if f(&self.symbols[self.curr_index..]) { + return true; + } + } + + false + } +} + +impl<'input, 'end_fn> Iterator for SymbolIterator<'input, 'end_fn> { + type Item = &'input Symbol<'input>; + + fn next(&mut self) -> Option { if self.eoi() { - return Err(SymbolIteratorError::Eoi); + return None; } - let mut curr_symbolkind = match self.symbols.get(self.curr_index) { + let curr_symbolkind = match self.symbols.get(self.curr_index) { Some(curr_symbol) => curr_symbol.kind, - None => return Err(SymbolIteratorError::Eoi), + None => return None, }; if curr_symbolkind == SymbolKind::Newline && !self.line_prefixes.is_empty() { @@ -116,51 +305,27 @@ impl<'input, 'end_fn> SymbolIterator<'input, 'end_fn> { if prefix == &curr_prefix_symbolkinds { prefix_matched = true; self.curr_index += prefix.len(); - curr_symbolkind = match self.symbols.get(self.curr_index) { - Some(curr_symbol) => curr_symbol.kind, - None => return Err(SymbolIteratorError::Eoi), - }; break; } } if !prefix_matched { - return Err(SymbolIteratorError::PrefixMismatch); + return None; } } else if curr_symbolkind == SymbolKind::Blankline && contains_only_non_whitespace_sequences(&self.line_prefixes) { - return Err(SymbolIteratorError::PrefixMismatch); + return None; } - for f in &self.end { - if f(&self.symbols[self.curr_index..]) { - return Err(SymbolIteratorError::EndReached); - } + if self.end_reached() { + return None; } let symbol_opt = self.symbols.get(self.curr_index); self.curr_index += 1; - symbol_opt.ok_or(SymbolIteratorError::Eoi) - } - - pub fn skip_to_end(mut self) -> Self { - let mut end_reached = false; - - while !end_reached || !self.eoi() { - for f in &self.end { - if f(&self.symbols[self.curr_index..]) { - end_reached = true; - } - } - - if !end_reached { - self.curr_index += 1; - } - } - - self + symbol_opt } } diff --git a/core/tests/test_runner/mod.rs b/core/tests/test_runner/mod.rs index 71a7d36e..873da642 100644 --- a/core/tests/test_runner/mod.rs +++ b/core/tests/test_runner/mod.rs @@ -44,7 +44,7 @@ macro_rules! snapshot_parser { |input| { let parse = <$ty>::generate_parser(); - parse(input) + parse(input.into()) .map(|(block, rest)| (Snapshot(block).as_snapshot(), rest)) .expect("Could not parse content!") } diff --git a/parser/src/elements/atomic/heading.rs b/parser/src/elements/atomic/heading.rs index ce44887a..0b0e083e 100644 --- a/parser/src/elements/atomic/heading.rs +++ b/parser/src/elements/atomic/heading.rs @@ -4,7 +4,7 @@ use unimarkup_inline::{Inline, ParseInlines}; use crate::elements::blocks::Block; use crate::elements::Blocks; use crate::parser::{ElementParser, TokenizeOutput}; -use unimarkup_commons::scanner::{Symbol, SymbolKind}; +use unimarkup_commons::scanner::{Symbol, SymbolIterator, SymbolKind}; use super::log_id::AtomicError; @@ -112,7 +112,7 @@ pub enum HeadingToken<'a> { Level(HeadingLevel), /// Content of the heading - Content(&'a [Symbol<'a>]), + Content(Vec<&'a Symbol<'a>>), /// Marks the end of the heading End, @@ -121,37 +121,55 @@ pub enum HeadingToken<'a> { impl ElementParser for Heading { type Token<'a> = self::HeadingToken<'a>; - fn tokenize<'i>(input: &'i [Symbol<'i>]) -> Option>> { - let mut level_depth = input - .iter() + fn tokenize<'i>( + mut input: SymbolIterator<'i, '_>, + ) -> Option>> { + let mut heading_start: Vec = input + .by_ref() .take_while(|symbol| matches!(symbol.kind, SymbolKind::Hash)) - .count(); + .map(|s| s.kind) + .collect(); + + let level_depth = heading_start.len(); let level: HeadingLevel = HeadingLevel::try_from(level_depth).ok()?; - if input.get(level_depth)?.kind != SymbolKind::Whitespace { + if input.next()?.kind != SymbolKind::Whitespace { return None; } - level_depth += 1; // +1 space offset - - let content_symbols = input - .iter() - .skip(level_depth) - .take_while(|symbol| !matches!(symbol.kind, SymbolKind::Blankline | SymbolKind::EOI)) - .count(); - - let content_start = level_depth; - let content_end = content_start + content_symbols; - let content = &input[content_start..content_end]; - let rest = &input[content_end..]; + heading_start.push(SymbolKind::Whitespace); + let whitespace_indents = std::iter::repeat(SymbolKind::Whitespace) + .take(heading_start.len()) + .collect(); + + let mut sub_heading_start: Vec = std::iter::repeat(SymbolKind::Hash) + .take(heading_start.len()) + .collect(); + sub_heading_start.push(SymbolKind::Whitespace); + + let heading_end = |sequence: &[Symbol<'_>]| match sequence.first() { + Some(symbol) => matches!(symbol.kind, SymbolKind::Blankline | SymbolKind::EOI), + None => false, + } || sequence[..sub_heading_start.len()].iter().map(|s| s.kind).collect::>().starts_with(&sub_heading_start); + + let mut content_iter = input.nest_prefixes( + &[heading_start, whitespace_indents], + Some(Box::new(heading_end)), + ); + let content_symbols = content_iter.take_to_end(); + + // Line prefixes violated => invalid heading syntax + if !content_iter.end_reached() { + return None; + } let output = TokenizeOutput { tokens: vec![ HeadingToken::Level(level), - HeadingToken::Content(content), + HeadingToken::Content(content_symbols), HeadingToken::End, ], - rest_of_input: rest, + rest_of_input: content_iter.remaining_symbols(), }; Some(output) @@ -159,10 +177,17 @@ impl ElementParser for Heading { fn parse(input: Vec>) -> Option { let HeadingToken::Level(level) = input[0] else {return None}; - let HeadingToken::Content(symbols) = input[1] else {return None}; + let HeadingToken::Content(ref symbols) = input[1] else {return None}; let inline_start = symbols.get(0)?.start; - let content = symbols.parse_inlines().collect(); + // TODO: Adapt inline lexer to also work with Vec<&'input Symbol> + let content = symbols + .iter() + .map(|&s| *s) + .collect::>>() + .parse_inlines() + .collect(); + let line_nr = inline_start.line; let block = Self { id: String::default(), diff --git a/parser/src/elements/atomic/paragraph.rs b/parser/src/elements/atomic/paragraph.rs index 32983742..d754b427 100644 --- a/parser/src/elements/atomic/paragraph.rs +++ b/parser/src/elements/atomic/paragraph.rs @@ -7,7 +7,7 @@ use crate::{ elements::{blocks::Block, types}, parser::TokenizeOutput, }; -use unimarkup_commons::scanner::{Symbol, SymbolKind}; +use unimarkup_commons::scanner::{Symbol, SymbolIterator, SymbolKind}; /// Structure of a Unimarkup paragraph element. #[derive(Debug, Default, Clone, PartialEq, Eq)] @@ -28,9 +28,14 @@ pub struct Paragraph { impl Paragraph {} -impl From<&[Symbol<'_>]> for Paragraph { - fn from(value: &[Symbol<'_>]) -> Self { - let content = value.parse_inlines().collect(); +impl From<&Vec<&'_ Symbol<'_>>> for Paragraph { + fn from(value: &Vec<&'_ Symbol<'_>>) -> Self { + let content = value + .iter() + .map(|&s| *s) + .collect::>>() + .parse_inlines() + .collect(); let line_nr = value.get(0).map(|symbol| symbol.start.line).unwrap_or(0); let id = crate::generate_id::generate_id(&format!( @@ -55,57 +60,24 @@ fn not_closing_symbol(symbol: &&Symbol) -> bool { .all(|closing| *closing != symbol.kind) } -enum TokenKind<'a> { - Start, - End, - Text(&'a [Symbol<'a>]), -} - -pub(crate) struct ParagraphToken<'a> { - kind: TokenKind<'a>, -} - impl ElementParser for Paragraph { - type Token<'a> = self::ParagraphToken<'a>; - - fn tokenize<'input>( - input: &'input [Symbol<'input>], - ) -> Option>> { - let iter = input.iter(); - - let taken = iter.take_while(not_closing_symbol).count(); - let end_of_input = taken.min(input.len()); - - let tokens = vec![ - ParagraphToken { - kind: TokenKind::Start, - }, - ParagraphToken { - kind: TokenKind::Text(&input[..end_of_input]), - }, - ParagraphToken { - kind: TokenKind::End, - }, - ]; - - let input = &input[end_of_input..]; + type Token<'a> = &'a Symbol<'a>; + + fn tokenize<'i>( + mut input: SymbolIterator<'i, '_>, + ) -> Option>> { + let content = input.by_ref().take_while(not_closing_symbol).collect(); let output = TokenizeOutput { - tokens, - rest_of_input: input, + tokens: content, + rest_of_input: input.remaining_symbols(), }; Some(output) } fn parse(input: Vec>) -> Option { - let content = match input[1].kind { - TokenKind::Start => &[], - TokenKind::End => &[], - TokenKind::Text(symbols) => symbols, - }; - - let block = Block::Paragraph(Paragraph::from(content)); + let block = Block::Paragraph(Paragraph::from(&input)); Some(vec![block]) } diff --git a/parser/src/elements/enclosed/verbatim.rs b/parser/src/elements/enclosed/verbatim.rs index 837ac397..3bc88ebe 100644 --- a/parser/src/elements/enclosed/verbatim.rs +++ b/parser/src/elements/enclosed/verbatim.rs @@ -3,7 +3,7 @@ use serde::{Deserialize, Serialize}; use crate::elements::blocks::Block; use crate::elements::Blocks; use crate::parser::{ElementParser, TokenizeOutput}; -use unimarkup_commons::scanner::{Symbol, SymbolKind}; +use unimarkup_commons::scanner::{Symbol, SymbolIterator, SymbolKind}; /// Structure of a Unimarkup verbatim block element. #[derive(Debug, PartialEq, Eq, Clone)] @@ -24,76 +24,83 @@ pub struct Verbatim { } pub(crate) enum Token<'a> { - Delimiter { line: usize }, - Content(&'a [Symbol<'a>]), + StartDelim(Vec<&'a Symbol<'a>>), + Content(Vec<&'a Symbol<'a>>), } impl ElementParser for Verbatim { type Token<'a> = self::Token<'a>; - fn tokenize<'i>(input: &'i [Symbol<'i>]) -> Option>> { - let start_delim = input - .iter() + fn tokenize<'i>( + mut input: SymbolIterator<'i, '_>, + ) -> Option>> { + let start_delim: Vec<_> = input + .by_ref() .take_while(|symbol| matches!(symbol.kind, SymbolKind::Tick)) - .count(); + .collect(); + let start_delim_len = start_delim.len(); - if start_delim < 3 { + if start_delim_len < 3 { return None; }; - // we know there are at least 3 - let first_delim = input[0]; - - // TODO: handle language attribute - let content_count = input - .iter() - .skip(start_delim) - .take_while(|symbol| !matches!(symbol.kind, SymbolKind::Tick)) - .count(); - - let end_delim = input - .iter() - .skip(start_delim + content_count) - .take_while(|sym| matches!(sym.kind, SymbolKind::Tick)) - .count(); - - if end_delim != start_delim { + let end_sequence = std::iter::repeat(SymbolKind::Tick) + .take(start_delim_len) + .collect::>(); + let mut content_iter = input.nest( + &[], + Some(Box::new(|sequence| { + sequence[..start_delim_len] + .iter() + .map(|s| s.kind) + .collect::>() + .starts_with(&end_sequence) + })), + ); + + let content = content_iter.take_to_end(); + if !content_iter.end_reached() { return None; } - let start_content = start_delim; - let end_content = start_content + content_count; - let content = &input[start_content..end_content]; - let rest = &input[end_content + end_delim..]; + input.set_curr_index(content_iter.curr_index()); + + match input + .by_ref() + .take(start_delim_len) + .map(|s| s.kind) + .collect::>() + { + end if end == end_sequence => { + if input.peek_kind() == Some(SymbolKind::Tick) { + return None; + } + } + _ => return None, + } - let last_delim = input[end_content]; + // TODO: handle language attribute let output = TokenizeOutput { - tokens: vec![ - Token::Delimiter { - line: first_delim.start.line, - }, - Token::Content(content), - Token::Delimiter { - line: last_delim.start.line, - }, - ], - rest_of_input: rest, + tokens: vec![Token::StartDelim(start_delim), Token::Content(content)], + rest_of_input: input.remaining_symbols(), }; Some(output) } fn parse(input: Vec>) -> Option { - let Token::Delimiter { line } = input.get(0)? else {return None}; + let Token::StartDelim(start) = input.get(0)? else { return None }; + let line_nr = start.get(0)?.start.line; + let Token::Content(symbols) = input.get(1)? else { return None }; - let content = Symbol::flatten(symbols)?; + let content = Symbol::flatten_iter(symbols.iter().copied())?; let block = Self { id: String::default(), content: String::from(content), attributes: None, - line_nr: *line, + line_nr, }; Some(vec![Block::Verbatim(block)]) diff --git a/parser/src/parser.rs b/parser/src/parser.rs index 71272ebe..d04669ad 100644 --- a/parser/src/parser.rs +++ b/parser/src/parser.rs @@ -1,7 +1,7 @@ //! Module for parsing of Unimarkup elements. use logid::log; -use unimarkup_commons::scanner::{Scanner, Symbol, SymbolKind}; +use unimarkup_commons::scanner::{Scanner, Symbol, SymbolIterator, SymbolKind}; use crate::{ document::Document, @@ -17,15 +17,15 @@ use crate::{ use unimarkup_commons::config::Config; /// Parser as function that can parse Unimarkup content -pub type ParserFn = for<'i> fn(&'i [Symbol<'i>]) -> Option<(Blocks, &'i [Symbol<'i>])>; +pub type ParserFn = for<'i, 'f> fn(SymbolIterator<'i, 'f>) -> Option<(Blocks, &'i [Symbol<'i>])>; /// Output of symbol tokenization by a parser of a block. -pub(crate) struct TokenizeOutput<'a, T> +pub(crate) struct TokenizeOutput<'i, T> where - T: 'a, + T: 'i, { pub(crate) tokens: Vec, - pub(crate) rest_of_input: &'a [Symbol<'a>], + pub(crate) rest_of_input: &'i [Symbol<'i>], } /// Trait implemented by a parser for each Unimarkup element. @@ -34,7 +34,7 @@ pub(crate) trait ElementParser { type Token<'a>; /// Function that converts input symbols into tokens specific for the given element. - fn tokenize<'i>(input: &'i [Symbol<'i>]) -> Option>>; + fn tokenize<'i>(input: SymbolIterator<'i, '_>) -> Option>>; /// Function that parses tokenization output and produces one or more Unimarkup elements. fn parse(input: Vec>) -> Option; @@ -103,17 +103,17 @@ impl MainParser { } /// Parses Unimarkup content and produces Unimarkup blocks. - pub fn parse<'s>(&self, input: impl AsRef<[Symbol<'s>]>) -> Blocks { - let mut input = input.as_ref(); + pub fn parse<'i, 'f>(&self, input: impl Into>) -> Blocks { + let mut input = input.into(); let mut blocks = Vec::default(); #[cfg(debug_assertions)] let mut input_len = input.len(); - 'outer: while let Some(sym) = input.first() { + 'outer: while let Some(sym) = input.next() { match sym.kind { // skip blanklines - SymbolKind::Blankline => input = &input[1..], + SymbolKind::Blankline => {} // stop parsing when end of input is reached SymbolKind::EOI => break, @@ -124,15 +124,15 @@ impl MainParser { .expect("Default parser could not parse content!"); blocks.append(&mut res_blocks); - input = rest_of_input; + input = SymbolIterator::from(rest_of_input); } // symbol is start of a block, some parser should match _ => { for parser_fn in &self.parsers { - if let Some((mut res_blocks, rest_of_input)) = parser_fn(input) { + if let Some((mut res_blocks, rest_of_input)) = parser_fn(input.clone()) { blocks.append(&mut res_blocks); - input = rest_of_input; + input = SymbolIterator::from(rest_of_input); continue 'outer; // start from first parser on next input } } @@ -142,7 +142,7 @@ impl MainParser { .expect("Default parser could not parse content!"); blocks.append(&mut res_blocks); - input = rest_of_input; + input = SymbolIterator::from(rest_of_input); } } @@ -165,7 +165,7 @@ pub fn parse_unimarkup(um_content: &str, config: &mut Config) -> Document { .expect("Must be valid provider.") .scan_str(um_content); - let blocks = parser.parse(symbols); + let blocks = parser.parse(&symbols); let mut unimarkup = Document { config: config.clone(), From f1dc3733346d33b24f2e7c4cb35e3eaa8d43311d Mon Sep 17 00:00:00 2001 From: Manuel Hatzl Date: Fri, 22 Sep 2023 13:16:06 +0200 Subject: [PATCH 03/43] feat: add itertools for SymbolIterator --- commons/Cargo.toml | 1 + commons/src/scanner/symbol/iterator.rs | 185 +++++++++++++++++++++---- commons/src/scanner/symbol/mod.rs | 22 +-- parser/src/elements/atomic/heading.rs | 10 +- parser/src/parser.rs | 10 +- 5 files changed, 185 insertions(+), 43 deletions(-) diff --git a/commons/Cargo.toml b/commons/Cargo.toml index e8c17433..25c98e1a 100644 --- a/commons/Cargo.toml +++ b/commons/Cargo.toml @@ -25,6 +25,7 @@ icu_provider = "=1.2.0" icu_provider_adapters = "=1.2.0" regex = { version = "1.8.1", optional = true } insta = {version = "1.29.0", features = ["serde"], optional = true} +itertools = "0.11.0" [features] test_runner = ["dep:regex", "dep:once_cell", "dep:insta"] diff --git a/commons/src/scanner/symbol/iterator.rs b/commons/src/scanner/symbol/iterator.rs index 2db3bdaf..b28af11e 100644 --- a/commons/src/scanner/symbol/iterator.rs +++ b/commons/src/scanner/symbol/iterator.rs @@ -1,12 +1,17 @@ use std::sync::Arc; +use itertools::PeekingNext; + use super::{Symbol, SymbolKind}; +pub use itertools::*; + #[derive(Default, Clone)] pub struct SymbolIterator<'input, 'end_fn> { symbols: &'input [Symbol<'input>], curr_index: usize, start_index: usize, + peek_index: usize, line_prefixes: Vec>, end: Vec>>, } @@ -20,6 +25,7 @@ impl<'input, 'end_fn> From<&'input [Symbol<'input>]> for SymbolIterator<'input, symbols: value, curr_index: 0, start_index: 0, + peek_index: 0, line_prefixes: vec![], end: vec![], } @@ -32,6 +38,7 @@ impl<'input, 'end_fn> From<&'input Vec>> for SymbolIterator<'inpu symbols: value, curr_index: 0, start_index: 0, + peek_index: 0, line_prefixes: vec![], end: vec![], } @@ -44,6 +51,7 @@ impl<'input, 'end_fn> SymbolIterator<'input, 'end_fn> { symbols, curr_index: start_index, start_index, + peek_index: start_index, line_prefixes: vec![], end: vec![], } @@ -59,17 +67,18 @@ impl<'input, 'end_fn> SymbolIterator<'input, 'end_fn> { symbols, curr_index: start_index, start_index, + peek_index: start_index, line_prefixes: line_prefix.into(), end: vec![Arc::new(end)], } } pub fn len(&self) -> usize { - self.symbols.len() + self.symbols[self.start_index..].len() } pub fn is_empty(&self) -> bool { - self.symbols.is_empty() + self.symbols[self.start_index..].is_empty() } pub fn start_index(&self) -> usize { @@ -83,6 +92,7 @@ impl<'input, 'end_fn> SymbolIterator<'input, 'end_fn> { pub fn set_curr_index(&mut self, index: usize) { if index >= self.start_index { self.curr_index = index; + self.peek_index = self.curr_index; } } @@ -95,11 +105,11 @@ impl<'input, 'end_fn> SymbolIterator<'input, 'end_fn> { } pub fn peek(&self) -> Option<&'input Symbol<'input>> { - self.symbols.get(self.curr_index + 1) + self.symbols.get(self.curr_index) } pub fn peek_kind(&self) -> Option { - self.symbols.get(self.curr_index + 1).map(|s| s.kind) + self.symbols.get(self.curr_index).map(|s| s.kind) } pub fn nest<'inner_end>( @@ -134,6 +144,7 @@ impl<'input, 'end_fn> SymbolIterator<'input, 'end_fn> { symbols: self.symbols, curr_index: self.curr_index, start_index: self.curr_index, + peek_index: self.curr_index, line_prefixes: nested_prefixes, end: merged_end, } @@ -182,6 +193,7 @@ impl<'input, 'end_fn> SymbolIterator<'input, 'end_fn> { symbols: self.symbols, curr_index: self.curr_index, start_index: self.curr_index, + peek_index: self.curr_index, line_prefixes: prefixes, end: merged_end, } @@ -240,19 +252,7 @@ impl<'input, 'end_fn> SymbolIterator<'input, 'end_fn> { // } pub fn skip_to_end(mut self) -> Self { - let mut end_reached = false; - - while !end_reached || !self.eoi() { - for f in &self.end { - if f(&self.symbols[self.curr_index..]) { - end_reached = true; - } - } - - if !end_reached { - self.curr_index += 1; - } - } + while self.next().is_some() {} self } @@ -288,23 +288,26 @@ impl<'input, 'end_fn> Iterator for SymbolIterator<'input, 'end_fn> { return None; } - let curr_symbolkind = match self.symbols.get(self.curr_index) { + let curr_symbol_opt = self.symbols.get(self.curr_index); + let curr_symbolkind = match curr_symbol_opt { Some(curr_symbol) => curr_symbol.kind, None => return None, }; if curr_symbolkind == SymbolKind::Newline && !self.line_prefixes.is_empty() { - let curr_prefix_symbolkinds: Vec<_> = self.symbols[self.curr_index + 1..] - .iter() - .map(|s| s.kind) - .collect(); - let mut prefix_matched = false; for prefix in &self.line_prefixes { + let curr_prefix_symbolkinds: Vec<_> = self.symbols + [self.curr_index + 1..self.curr_index + prefix.len()] + .iter() + .map(|s| s.kind) + .collect(); + if prefix == &curr_prefix_symbolkinds { prefix_matched = true; self.curr_index += prefix.len(); + self.peek_index = self.curr_index; break; } } @@ -322,10 +325,35 @@ impl<'input, 'end_fn> Iterator for SymbolIterator<'input, 'end_fn> { return None; } - let symbol_opt = self.symbols.get(self.curr_index); self.curr_index += 1; + self.peek_index = self.curr_index; + curr_symbol_opt + } +} - symbol_opt +impl<'input, 'end_fn> PeekingNext for SymbolIterator<'input, 'end_fn> { + fn peeking_next(&mut self, accept: F) -> Option + where + Self: Sized, + F: FnOnce(&Self::Item) -> bool, + { + let curr_index = self.curr_index; + self.curr_index = self.peek_index; // Note: peek_index increases until `next()` is called directly + let next_item = self.next(); + + // revert index to simulate lookahead + self.curr_index = curr_index; + + match next_item { + Some(symbol) => { + if (accept)(&symbol) { + next_item + } else { + None + } + } + None => None, + } } } @@ -359,3 +387,110 @@ fn contains_non_whitespace(sequence: &[SymbolKind]) -> bool { false } + +#[cfg(test)] +mod test { + use itertools::{Itertools, PeekingNext}; + + use crate::scanner::{Scanner, SymbolKind}; + + use super::SymbolIterator; + + #[test] + fn peek_while_index() { + let symbols = Scanner::try_new() + .expect("Must be valid provider.") + .scan_str("## "); + + let mut iterator = SymbolIterator::from(&symbols); + let hash_cnt = iterator + .peeking_take_while(|symbol| symbol.kind == SymbolKind::Hash) + .count(); + + let next_symbol = iterator.nth(hash_cnt); + let curr_index = iterator.curr_index(); + + assert_eq!(hash_cnt, 2, "Hash symbols in input not correctly detected."); + assert_eq!(curr_index, 3, "Current index was not updated correctly."); + assert_eq!( + next_symbol.map(|s| s.kind), + Some(SymbolKind::Whitespace), + "Whitespace after hash symbols was not detected." + ); + assert!( + iterator.next().is_none(), + "Input end reached, but new symbol was returned." + ); + } + + #[test] + fn peek_next() { + let symbols = Scanner::try_new() + .expect("Must be valid provider.") + .scan_str("#*"); + + let mut iterator = SymbolIterator::from(&symbols); + + let peeked_symbol = iterator.peeking_next(|_| true); + let next_symbol = iterator.next(); + let next_peeked_symbol = iterator.peeking_next(|_| true); + let curr_index = iterator.curr_index(); + + assert_eq!(curr_index, 1, "Current index was not updated correctly."); + assert_eq!( + peeked_symbol.map(|s| s.kind), + Some(SymbolKind::Hash), + "peek_next() did not return hash symbol." + ); + assert_eq!( + next_symbol.map(|s| s.kind), + Some(SymbolKind::Hash), + "next() did not return hash symbol." + ); + assert_eq!( + next_peeked_symbol.map(|s| s.kind), + Some(SymbolKind::Star), + "Star symbol not peeked next." + ); + assert_eq!( + iterator.next().map(|s| s.kind), + Some(SymbolKind::Star), + "Star symbol not returned." + ); + } + + #[test] + fn reach_end() { + let symbols = Scanner::try_new() + .expect("Must be valid provider.") + .scan_str("text*"); + + let mut iterator = SymbolIterator::from(&symbols).nest( + &[], + Some(Box::new(|sequence| { + sequence + .get(0) + .map(|s| s.kind == SymbolKind::Star) + .unwrap_or(false) + })), + ); + + let taken_symkinds = iterator + .take_to_end() + .iter() + .map(|s| s.kind) + .collect::>(); + + assert!(iterator.end_reached(), "Iterator end was not reached."); + assert_eq!( + taken_symkinds, + vec![ + SymbolKind::Plain, + SymbolKind::Plain, + SymbolKind::Plain, + SymbolKind::Plain + ], + "Symbols till end was reached are incorrect." + ); + } +} diff --git a/commons/src/scanner/symbol/mod.rs b/commons/src/scanner/symbol/mod.rs index 87265dff..72ce8f30 100644 --- a/commons/src/scanner/symbol/mod.rs +++ b/commons/src/scanner/symbol/mod.rs @@ -64,6 +64,19 @@ impl Default for SymbolKind { } } +impl SymbolKind { + pub fn is_not_keyword(&self) -> bool { + matches!( + self, + SymbolKind::Newline + | SymbolKind::Whitespace + | SymbolKind::Plain + | SymbolKind::Blankline + | SymbolKind::EOI + ) + } +} + /// Symbol representation of literals found in Unimarkup document. #[derive(Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] pub struct Symbol<'a> { @@ -106,14 +119,7 @@ impl fmt::Debug for Symbol<'_> { impl Symbol<'_> { // TODO: extension trait in core? pub fn is_not_keyword(&self) -> bool { - matches!( - self.kind, - SymbolKind::Newline - | SymbolKind::Whitespace - | SymbolKind::Plain - | SymbolKind::Blankline - | SymbolKind::EOI - ) + self.kind.is_not_keyword() } /// Returns the original string representation of the symbol. diff --git a/parser/src/elements/atomic/heading.rs b/parser/src/elements/atomic/heading.rs index 0b0e083e..66f707e8 100644 --- a/parser/src/elements/atomic/heading.rs +++ b/parser/src/elements/atomic/heading.rs @@ -4,7 +4,7 @@ use unimarkup_inline::{Inline, ParseInlines}; use crate::elements::blocks::Block; use crate::elements::Blocks; use crate::parser::{ElementParser, TokenizeOutput}; -use unimarkup_commons::scanner::{Symbol, SymbolIterator, SymbolKind}; +use unimarkup_commons::scanner::{Itertools, Symbol, SymbolIterator, SymbolKind}; use super::log_id::AtomicError; @@ -125,15 +125,14 @@ impl ElementParser for Heading { mut input: SymbolIterator<'i, '_>, ) -> Option>> { let mut heading_start: Vec = input - .by_ref() - .take_while(|symbol| matches!(symbol.kind, SymbolKind::Hash)) + .peeking_take_while(|symbol| matches!(symbol.kind, SymbolKind::Hash)) .map(|s| s.kind) .collect(); let level_depth = heading_start.len(); let level: HeadingLevel = HeadingLevel::try_from(level_depth).ok()?; - if input.next()?.kind != SymbolKind::Whitespace { + if input.by_ref().nth(level_depth)?.kind != SymbolKind::Whitespace { return None; } @@ -150,7 +149,7 @@ impl ElementParser for Heading { let heading_end = |sequence: &[Symbol<'_>]| match sequence.first() { Some(symbol) => matches!(symbol.kind, SymbolKind::Blankline | SymbolKind::EOI), None => false, - } || sequence[..sub_heading_start.len()].iter().map(|s| s.kind).collect::>().starts_with(&sub_heading_start); + } || (level != HeadingLevel::Level6 && sequence[..sub_heading_start.len()].iter().map(|s| s.kind).collect::>().starts_with(&sub_heading_start)); let mut content_iter = input.nest_prefixes( &[heading_start, whitespace_indents], @@ -160,6 +159,7 @@ impl ElementParser for Heading { // Line prefixes violated => invalid heading syntax if !content_iter.end_reached() { + println!("heading end not reached. {:?}", &content_symbols); return None; } diff --git a/parser/src/parser.rs b/parser/src/parser.rs index d04669ad..c733f2ca 100644 --- a/parser/src/parser.rs +++ b/parser/src/parser.rs @@ -104,14 +104,14 @@ impl MainParser { /// Parses Unimarkup content and produces Unimarkup blocks. pub fn parse<'i, 'f>(&self, input: impl Into>) -> Blocks { - let mut input = input.into(); + let mut input: SymbolIterator<'i, 'f> = input.into(); let mut blocks = Vec::default(); #[cfg(debug_assertions)] let mut input_len = input.len(); - 'outer: while let Some(sym) = input.next() { - match sym.kind { + 'outer: while let Some(kind) = input.peek_kind() { + match kind { // skip blanklines SymbolKind::Blankline => {} @@ -119,7 +119,7 @@ impl MainParser { SymbolKind::EOI => break, // no parser will match, parse with default parser - _ if sym.is_not_keyword() => { + _ if kind.is_not_keyword() => { let (mut res_blocks, rest_of_input) = (self.default_parser)(input) .expect("Default parser could not parse content!"); @@ -164,7 +164,7 @@ pub fn parse_unimarkup(um_content: &str, config: &mut Config) -> Document { let symbols = Scanner::try_new() .expect("Must be valid provider.") .scan_str(um_content); - + println!("{:?}", &symbols.iter().map(|s| s.kind).collect::>()); let blocks = parser.parse(&symbols); let mut unimarkup = Document { From de52811bcf2c2c19e651913fda8f1b7935610ae9 Mon Sep 17 00:00:00 2001 From: Manuel Hatzl Date: Fri, 22 Sep 2023 17:01:53 +0200 Subject: [PATCH 04/43] feat: switch to nesting symbol iterators --- commons/src/scanner/symbol/iterator.rs | 412 ++++++++++++----------- parser/src/elements/atomic/heading.rs | 4 +- parser/src/elements/atomic/paragraph.rs | 2 +- parser/src/elements/enclosed/verbatim.rs | 38 +-- parser/src/parser.rs | 11 +- 5 files changed, 241 insertions(+), 226 deletions(-) diff --git a/commons/src/scanner/symbol/iterator.rs b/commons/src/scanner/symbol/iterator.rs index b28af11e..c0810cd8 100644 --- a/commons/src/scanner/symbol/iterator.rs +++ b/commons/src/scanner/symbol/iterator.rs @@ -1,4 +1,4 @@ -use std::sync::Arc; +use std::{borrow::BorrowMut, rc::Rc}; use itertools::PeekingNext; @@ -6,56 +6,93 @@ use super::{Symbol, SymbolKind}; pub use itertools::*; -#[derive(Default, Clone)] -pub struct SymbolIterator<'input, 'end_fn> { - symbols: &'input [Symbol<'input>], - curr_index: usize, +#[derive(Clone)] +pub struct SymbolIterator<'input, 'parent, 'end_fn> { + kind: SymbolIteratorKind<'input, 'parent>, start_index: usize, - peek_index: usize, line_prefixes: Vec>, - end: Vec>>, + end: Option>>, + iter_end: bool, } -pub type IteratorEndFn<'input, 'end_fn> = - Box]) -> bool + Send + Sync + 'end_fn>; +#[derive(Clone)] +pub struct SymbolIteratorRoot<'input> { + symbols: &'input [Symbol<'input>], + curr_index: usize, + peek_index: usize, + new_line: bool, +} -impl<'input, 'end_fn> From<&'input [Symbol<'input>]> for SymbolIterator<'input, 'end_fn> { +impl<'input> From<&'input [Symbol<'input>]> for SymbolIteratorRoot<'input> { fn from(value: &'input [Symbol<'input>]) -> Self { - SymbolIterator { + SymbolIteratorRoot { symbols: value, curr_index: 0, - start_index: 0, peek_index: 0, - line_prefixes: vec![], - end: vec![], + new_line: false, } } } -impl<'input, 'end_fn> From<&'input Vec>> for SymbolIterator<'input, 'end_fn> { +impl<'input> From<&'input Vec>> for SymbolIteratorRoot<'input> { fn from(value: &'input Vec>) -> Self { - SymbolIterator { + SymbolIteratorRoot { symbols: value, curr_index: 0, - start_index: 0, peek_index: 0, + new_line: false, + } + } +} + +impl<'input> SymbolIteratorRoot<'input> { + fn remaining_symbols(&self) -> &'input [Symbol<'input>] { + &self.symbols[self.curr_index..] + } +} + +#[derive(Clone)] +pub enum SymbolIteratorKind<'input, 'parent> { + Nested(Rc>), + Root(SymbolIteratorRoot<'input>), +} + +pub type IteratorEndFn<'input, 'end_fn> = Box]) -> bool + 'end_fn>; + +impl<'input, 'parent, 'end_fn> From<&'input [Symbol<'input>]> + for SymbolIterator<'input, 'parent, 'end_fn> +{ + fn from(value: &'input [Symbol<'input>]) -> Self { + SymbolIterator { + kind: SymbolIteratorKind::Root(SymbolIteratorRoot::from(value)), + start_index: 0, line_prefixes: vec![], - end: vec![], + end: None, + iter_end: false, } } } -impl<'input, 'end_fn> SymbolIterator<'input, 'end_fn> { - pub fn new(symbols: &'input [Symbol<'input>], start_index: usize) -> Self { +impl<'input, 'parent, 'end_fn> From<&'input Vec>> + for SymbolIterator<'input, 'parent, 'end_fn> +{ + fn from(value: &'input Vec>) -> Self { SymbolIterator { - symbols, - curr_index: start_index, - start_index, - peek_index: start_index, + kind: SymbolIteratorKind::Root(SymbolIteratorRoot::from(value)), + start_index: 0, line_prefixes: vec![], - end: vec![], + end: None, + iter_end: false, } } +} + +impl<'input, 'parent, 'end_fn> SymbolIterator<'input, 'parent, 'end_fn> { + pub fn new(symbols: &'input [Symbol<'input>], start_index: usize) -> Self { + let mut iter = SymbolIterator::from(symbols); + iter.start_index = start_index; + iter + } pub fn with( symbols: &'input [Symbol<'input>], @@ -64,21 +101,26 @@ impl<'input, 'end_fn> SymbolIterator<'input, 'end_fn> { end: IteratorEndFn<'input, 'end_fn>, ) -> Self { SymbolIterator { - symbols, - curr_index: start_index, + kind: SymbolIteratorKind::Root(SymbolIteratorRoot::from(symbols)), start_index, - peek_index: start_index, line_prefixes: line_prefix.into(), - end: vec![Arc::new(end)], + end: Some(Rc::new(end)), + iter_end: false, } } pub fn len(&self) -> usize { - self.symbols[self.start_index..].len() + match &self.kind { + SymbolIteratorKind::Nested(parent) => parent.len(), + SymbolIteratorKind::Root(root) => root.symbols[self.start_index..].len(), + } } pub fn is_empty(&self) -> bool { - self.symbols[self.start_index..].is_empty() + match &self.kind { + SymbolIteratorKind::Nested(parent) => parent.is_empty(), + SymbolIteratorKind::Root(root) => root.symbols[self.start_index..].is_empty(), + } } pub fn start_index(&self) -> usize { @@ -86,171 +128,99 @@ impl<'input, 'end_fn> SymbolIterator<'input, 'end_fn> { } pub fn curr_index(&self) -> usize { - self.curr_index + match &self.kind { + SymbolIteratorKind::Nested(parent) => parent.curr_index(), + SymbolIteratorKind::Root(root) => root.curr_index, + } } pub fn set_curr_index(&mut self, index: usize) { if index >= self.start_index { - self.curr_index = index; - self.peek_index = self.curr_index; + match self.kind.borrow_mut() { + SymbolIteratorKind::Nested(parent) => { + if let Some(p) = Rc::get_mut(parent) { + p.set_curr_index(index) + } + } + SymbolIteratorKind::Root(root) => { + root.curr_index = index; + root.peek_index = index; + } + } } } pub fn eoi(&self) -> bool { - self.curr_index == self.symbols.len() + self.curr_index() == self.len() } pub fn remaining_symbols(&self) -> &'input [Symbol<'input>] { - &self.symbols[self.curr_index..] + match &self.kind { + SymbolIteratorKind::Nested(parent) => parent.remaining_symbols(), + SymbolIteratorKind::Root(root) => root.remaining_symbols(), + } + } + + pub fn new_line(&self) -> bool { + match &self.kind { + SymbolIteratorKind::Nested(parent) => parent.new_line(), + SymbolIteratorKind::Root(root) => root.new_line, + } } pub fn peek(&self) -> Option<&'input Symbol<'input>> { - self.symbols.get(self.curr_index) + match &self.kind { + SymbolIteratorKind::Nested(parent) => parent.peek(), + SymbolIteratorKind::Root(root) => root.symbols.get(root.curr_index), + } } pub fn peek_kind(&self) -> Option { - self.symbols.get(self.curr_index).map(|s| s.kind) + self.peek().map(|s| s.kind) } pub fn nest<'inner_end>( - &self, + self, line_prefix: &[SymbolKind], end: Option>, - ) -> SymbolIterator<'input, 'inner_end> + ) -> SymbolIterator<'input, 'parent, 'inner_end> where 'end_fn: 'inner_end, + 'end_fn: 'parent, { - let mut nested_prefixes = self.line_prefixes.clone(); - if nested_prefixes.is_empty() { - nested_prefixes.push(vec![]); - } - - if !line_prefix.contains(&SymbolKind::Blankline) { - nested_prefixes - .iter_mut() - .for_each(|p| p.extend_from_slice(line_prefix)); - } - - let mut outer_end = self.end.clone(); - let merged_end = match end { - Some(inner_end) => { - outer_end.push(Arc::new(inner_end)); - outer_end - } - None => outer_end, - }; + let curr_index = self.curr_index(); + let iter_end = self.iter_end; SymbolIterator { - symbols: self.symbols, - curr_index: self.curr_index, - start_index: self.curr_index, - peek_index: self.curr_index, - line_prefixes: nested_prefixes, - end: merged_end, + kind: SymbolIteratorKind::Nested(Rc::new(self)), + start_index: curr_index, + line_prefixes: vec![line_prefix.to_vec()], + end: end.map(Rc::new), + iter_end, } } pub fn nest_prefixes<'inner_end>( - &self, - line_prefixes: &[Vec], + self, + line_prefixes: impl Into>>, end: Option>, - ) -> SymbolIterator<'input, 'inner_end> + ) -> SymbolIterator<'input, 'parent, 'inner_end> where 'end_fn: 'inner_end, + 'end_fn: 'parent, { - let prefixes = if self.line_prefixes.is_empty() { - let mut nested_prefixes = self.line_prefixes.clone(); - nested_prefixes.extend_from_slice(line_prefixes); - nested_prefixes - } else { - // create cartesian prefix - self.line_prefixes - .iter() - .flat_map(|outer_prefixes| { - line_prefixes.iter().map(|inner_prefixes| { - let mut prefix = outer_prefixes.clone(); - - if !inner_prefixes.contains(&SymbolKind::Blankline) { - prefix.extend(inner_prefixes); - } - - prefix - }) - }) - .collect() - }; - - let mut outer_end = self.end.clone(); - let merged_end = match end { - Some(inner_end) => { - outer_end.push(Arc::new(inner_end)); - outer_end - } - None => outer_end, - }; + let curr_index = self.curr_index(); + let iter_end = self.iter_end; SymbolIterator { - symbols: self.symbols, - curr_index: self.curr_index, - start_index: self.curr_index, - peek_index: self.curr_index, - line_prefixes: prefixes, - end: merged_end, + kind: SymbolIteratorKind::Nested(Rc::new(self)), + start_index: curr_index, + line_prefixes: line_prefixes.into(), + end: end.map(Rc::new), + iter_end, } } - // #[allow(clippy::should_implement_trait)] - // pub fn next(&mut self) -> Result<&Symbol<'input>, SymbolIteratorError> { - // if self.eoi() { - // return Err(SymbolIteratorError::Eoi); - // } - - // let mut curr_symbolkind = match self.symbols.get(self.curr_index) { - // Some(curr_symbol) => curr_symbol.kind, - // None => return Err(SymbolIteratorError::Eoi), - // }; - - // if curr_symbolkind == SymbolKind::Newline && !self.line_prefixes.is_empty() { - // let curr_prefix_symbolkinds: Vec<_> = self.symbols[self.curr_index + 1..] - // .iter() - // .map(|s| s.kind) - // .collect(); - - // let mut prefix_matched = false; - - // for prefix in &self.line_prefixes { - // if prefix == &curr_prefix_symbolkinds { - // prefix_matched = true; - // self.curr_index += prefix.len(); - // curr_symbolkind = match self.symbols.get(self.curr_index) { - // Some(curr_symbol) => curr_symbol.kind, - // None => return Err(SymbolIteratorError::Eoi), - // }; - // break; - // } - // } - - // if !prefix_matched { - // return Err(SymbolIteratorError::PrefixMismatch); - // } - // } else if curr_symbolkind == SymbolKind::Blankline - // && contains_only_non_whitespace_sequences(&self.line_prefixes) - // { - // return Err(SymbolIteratorError::PrefixMismatch); - // } - - // for f in &self.end { - // if f(&self.symbols[self.curr_index..]) { - // return Err(SymbolIteratorError::EndReached); - // } - // } - - // let symbol_opt = self.symbols.get(self.curr_index); - // self.curr_index += 1; - - // symbol_opt.ok_or(SymbolIteratorError::Eoi) - // } - pub fn skip_to_end(mut self) -> Self { while self.next().is_some() {} @@ -270,89 +240,133 @@ impl<'input, 'end_fn> SymbolIterator<'input, 'end_fn> { } pub fn end_reached(&self) -> bool { - for f in &self.end { - if f(&self.symbols[self.curr_index..]) { - return true; - } + self.iter_end + } + + pub fn parent(self) -> Option> { + match self.kind { + SymbolIteratorKind::Nested(parent) => Rc::into_inner(parent), + SymbolIteratorKind::Root(_) => None, } + } +} + +impl<'input> Iterator for SymbolIteratorRoot<'input> { + type Item = &'input Symbol<'input>; - false + fn next(&mut self) -> Option { + let sym = self.symbols.get(self.curr_index); + + if let Some(symbol) = sym { + self.curr_index += 1; + self.peek_index = self.curr_index; + self.new_line = symbol.kind == SymbolKind::Newline; + } + + sym } } -impl<'input, 'end_fn> Iterator for SymbolIterator<'input, 'end_fn> { +impl<'input> PeekingNext for SymbolIteratorRoot<'input> { + fn peeking_next(&mut self, accept: F) -> Option + where + Self: Sized, + F: FnOnce(&Self::Item) -> bool, + { + let curr_index = self.curr_index; + self.curr_index = self.peek_index; // Note: peek_index increases until `next()` is called directly + let next_item = self.next(); + + // revert index to simulate lookahead + self.curr_index = curr_index; + + match next_item { + Some(symbol) => { + if (accept)(&symbol) { + next_item + } else { + None + } + } + None => None, + } + } +} + +impl<'input, 'parent, 'end_fn> Iterator for SymbolIterator<'input, 'parent, 'end_fn> { type Item = &'input Symbol<'input>; fn next(&mut self) -> Option { - if self.eoi() { + if self.eoi() || self.end_reached() { return None; } - let curr_symbol_opt = self.symbols.get(self.curr_index); - let curr_symbolkind = match curr_symbol_opt { - Some(curr_symbol) => curr_symbol.kind, - None => return None, + if self.peek_kind()? == SymbolKind::Blankline + && contains_only_non_whitespace_sequences(&self.line_prefixes) + { + return None; + } + + let symbols = match &self.kind { + SymbolIteratorKind::Nested(parent) => { + if parent.end_reached() { + self.iter_end = true; + return None; + } else { + parent.remaining_symbols() + } + } + SymbolIteratorKind::Root(root) => root.remaining_symbols(), + }; + + if let Some(end_fn) = &self.end { + if (end_fn)(symbols) { + self.iter_end = true; + return None; + } + } + + let curr_symbol_opt = match self.kind.borrow_mut() { + SymbolIteratorKind::Nested(parent) => Rc::get_mut(parent)?.next(), + SymbolIteratorKind::Root(root) => root.next(), }; - if curr_symbolkind == SymbolKind::Newline && !self.line_prefixes.is_empty() { + if self.new_line() && !self.line_prefixes.is_empty() { let mut prefix_matched = false; for prefix in &self.line_prefixes { - let curr_prefix_symbolkinds: Vec<_> = self.symbols - [self.curr_index + 1..self.curr_index + prefix.len()] + let curr_prefix_symbolkinds: Vec<_> = self.remaining_symbols()[..prefix.len()] .iter() .map(|s| s.kind) .collect(); if prefix == &curr_prefix_symbolkinds { prefix_matched = true; - self.curr_index += prefix.len(); - self.peek_index = self.curr_index; + // Note: Only update index. Prevents `new_line()` from being changed by possible parent + self.set_curr_index(self.curr_index() + prefix.len()); break; } } + // Note: This mostly indicates a syntax violation, so skipped symbol is ok. if !prefix_matched { return None; } - } else if curr_symbolkind == SymbolKind::Blankline - && contains_only_non_whitespace_sequences(&self.line_prefixes) - { - return None; - } - - if self.end_reached() { - return None; } - self.curr_index += 1; - self.peek_index = self.curr_index; curr_symbol_opt } } -impl<'input, 'end_fn> PeekingNext for SymbolIterator<'input, 'end_fn> { +impl<'input, 'parent, 'end_fn> PeekingNext for SymbolIterator<'input, 'parent, 'end_fn> { fn peeking_next(&mut self, accept: F) -> Option where Self: Sized, F: FnOnce(&Self::Item) -> bool, { - let curr_index = self.curr_index; - self.curr_index = self.peek_index; // Note: peek_index increases until `next()` is called directly - let next_item = self.next(); - - // revert index to simulate lookahead - self.curr_index = curr_index; - - match next_item { - Some(symbol) => { - if (accept)(&symbol) { - next_item - } else { - None - } - } - None => None, + match self.kind.borrow_mut() { + SymbolIteratorKind::Nested(parent) => Rc::get_mut(parent)?.peeking_next(accept), + SymbolIteratorKind::Root(root) => root.peeking_next(accept), } } } diff --git a/parser/src/elements/atomic/heading.rs b/parser/src/elements/atomic/heading.rs index 66f707e8..79058154 100644 --- a/parser/src/elements/atomic/heading.rs +++ b/parser/src/elements/atomic/heading.rs @@ -122,7 +122,7 @@ impl ElementParser for Heading { type Token<'a> = self::HeadingToken<'a>; fn tokenize<'i>( - mut input: SymbolIterator<'i, '_>, + mut input: SymbolIterator<'i, '_, '_>, ) -> Option>> { let mut heading_start: Vec = input .peeking_take_while(|symbol| matches!(symbol.kind, SymbolKind::Hash)) @@ -152,7 +152,7 @@ impl ElementParser for Heading { } || (level != HeadingLevel::Level6 && sequence[..sub_heading_start.len()].iter().map(|s| s.kind).collect::>().starts_with(&sub_heading_start)); let mut content_iter = input.nest_prefixes( - &[heading_start, whitespace_indents], + [heading_start, whitespace_indents], Some(Box::new(heading_end)), ); let content_symbols = content_iter.take_to_end(); diff --git a/parser/src/elements/atomic/paragraph.rs b/parser/src/elements/atomic/paragraph.rs index d754b427..697367be 100644 --- a/parser/src/elements/atomic/paragraph.rs +++ b/parser/src/elements/atomic/paragraph.rs @@ -64,7 +64,7 @@ impl ElementParser for Paragraph { type Token<'a> = &'a Symbol<'a>; fn tokenize<'i>( - mut input: SymbolIterator<'i, '_>, + mut input: SymbolIterator<'i, '_, '_>, ) -> Option>> { let content = input.by_ref().take_while(not_closing_symbol).collect(); diff --git a/parser/src/elements/enclosed/verbatim.rs b/parser/src/elements/enclosed/verbatim.rs index 3bc88ebe..574e5958 100644 --- a/parser/src/elements/enclosed/verbatim.rs +++ b/parser/src/elements/enclosed/verbatim.rs @@ -32,7 +32,7 @@ impl ElementParser for Verbatim { type Token<'a> = self::Token<'a>; fn tokenize<'i>( - mut input: SymbolIterator<'i, '_>, + mut input: SymbolIterator<'i, '_, '_>, ) -> Option>> { let start_delim: Vec<_> = input .by_ref() @@ -47,24 +47,22 @@ impl ElementParser for Verbatim { let end_sequence = std::iter::repeat(SymbolKind::Tick) .take(start_delim_len) .collect::>(); - let mut content_iter = input.nest( - &[], - Some(Box::new(|sequence| { - sequence[..start_delim_len] - .iter() - .map(|s| s.kind) - .collect::>() - .starts_with(&end_sequence) - })), - ); - - let content = content_iter.take_to_end(); - if !content_iter.end_reached() { - return None; - } - - input.set_curr_index(content_iter.curr_index()); - + let end_fn = Box::new(|sequence: &[Symbol<'i>]| { + sequence[..start_delim_len] + .iter() + .map(|s| s.kind) + .collect::>() + .starts_with(&end_sequence) + }); + + // let mut content_iter = input.nest(&[], Some(end_fn)); + + // let content = content_iter.take_to_end(); + // if !content_iter.end_reached() { + // return None; + // } + + // input = content_iter.parent()?; match input .by_ref() .take(start_delim_len) @@ -82,7 +80,7 @@ impl ElementParser for Verbatim { // TODO: handle language attribute let output = TokenizeOutput { - tokens: vec![Token::StartDelim(start_delim), Token::Content(content)], + tokens: vec![Token::StartDelim(start_delim), Token::Content(vec![])], //content)], rest_of_input: input.remaining_symbols(), }; diff --git a/parser/src/parser.rs b/parser/src/parser.rs index c733f2ca..81897b2f 100644 --- a/parser/src/parser.rs +++ b/parser/src/parser.rs @@ -17,7 +17,8 @@ use crate::{ use unimarkup_commons::config::Config; /// Parser as function that can parse Unimarkup content -pub type ParserFn = for<'i, 'f> fn(SymbolIterator<'i, 'f>) -> Option<(Blocks, &'i [Symbol<'i>])>; +pub type ParserFn = + for<'i, 'p, 'f> fn(SymbolIterator<'i, 'p, 'f>) -> Option<(Blocks, &'i [Symbol<'i>])>; /// Output of symbol tokenization by a parser of a block. pub(crate) struct TokenizeOutput<'i, T> @@ -34,7 +35,9 @@ pub(crate) trait ElementParser { type Token<'a>; /// Function that converts input symbols into tokens specific for the given element. - fn tokenize<'i>(input: SymbolIterator<'i, '_>) -> Option>>; + fn tokenize<'i>( + input: SymbolIterator<'i, '_, '_>, + ) -> Option>>; /// Function that parses tokenization output and produces one or more Unimarkup elements. fn parse(input: Vec>) -> Option; @@ -103,8 +106,8 @@ impl MainParser { } /// Parses Unimarkup content and produces Unimarkup blocks. - pub fn parse<'i, 'f>(&self, input: impl Into>) -> Blocks { - let mut input: SymbolIterator<'i, 'f> = input.into(); + pub fn parse<'i, 'p, 'f>(&self, input: impl Into>) -> Blocks { + let mut input: SymbolIterator<'i, 'p, 'f> = input.into(); let mut blocks = Vec::default(); #[cfg(debug_assertions)] From 5398be03b2f86d28619dd8afa010cafd6ae500a6 Mon Sep 17 00:00:00 2001 From: Manuel Hatzl Date: Fri, 22 Sep 2023 20:16:57 +0200 Subject: [PATCH 05/43] fix: add prefix line test for symbol iterator --- commons/src/scanner/symbol/iterator.rs | 36 ++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/commons/src/scanner/symbol/iterator.rs b/commons/src/scanner/symbol/iterator.rs index c0810cd8..e2fadcaa 100644 --- a/commons/src/scanner/symbol/iterator.rs +++ b/commons/src/scanner/symbol/iterator.rs @@ -507,4 +507,40 @@ mod test { "Symbols till end was reached are incorrect." ); } + + #[test] + fn with_nested_and_parent_prefix() { + let symbols = Scanner::try_new() + .expect("Must be valid provider.") + .scan_str("a\n* *b"); + + let iterator = SymbolIterator::with( + &symbols, + 0, + vec![vec![SymbolKind::Star, SymbolKind::Whitespace]], + Box::new(|_| false), + ); + + let mut inner = iterator.nest( + &[SymbolKind::Star], + Some(Box::new(|sequence| { + sequence + .get(0) + .map(|s| s.kind == SymbolKind::Star) + .unwrap_or(false) + })), + ); + + let sym_kinds = inner + .take_to_end() + .iter() + .map(|s| s.kind) + .collect::>(); + + assert_eq!( + sym_kinds, + vec![SymbolKind::Plain, SymbolKind::Newline, SymbolKind::Plain], + "Prefix symbols not correctly skipped" + ); + } } From aba822442ac5886ca0392e652faf26585d7025b8 Mon Sep 17 00:00:00 2001 From: Nadir Fejzic Date: Fri, 22 Sep 2023 20:41:48 +0200 Subject: [PATCH 06/43] feat: simplify iterator nesting parsers --- commons/src/scanner/mod.rs | 8 +- commons/src/scanner/symbol/iterator.rs | 97 ++++++++++-------------- core/tests/test_runner/mod.rs | 2 +- parser/src/elements/atomic/heading.rs | 41 ++++++---- parser/src/elements/atomic/paragraph.rs | 4 +- parser/src/elements/enclosed/verbatim.rs | 14 ++-- parser/src/parser.rs | 36 ++++----- 7 files changed, 101 insertions(+), 101 deletions(-) diff --git a/commons/src/scanner/mod.rs b/commons/src/scanner/mod.rs index 9808c555..604f0461 100644 --- a/commons/src/scanner/mod.rs +++ b/commons/src/scanner/mod.rs @@ -6,7 +6,7 @@ mod symbol; use icu::segmenter::{GraphemeClusterSegmenter, SegmenterError}; use icu_provider_adapters::fallback::LocaleFallbackProvider; -use position::{Offset, Position}; +use position::{Offset, Position as SymPos}; pub use symbol::{iterator::*, Symbol, SymbolKind}; #[derive(Debug, Clone)] @@ -48,7 +48,7 @@ impl Scanner { pub fn scan_str<'s>(&self, input: &'s str) -> Vec> { let mut symbols: Vec = Vec::new(); - let mut curr_pos: Position = Position::default(); + let mut curr_pos: SymPos = SymPos::default(); let mut prev_offset = 0; // skip(1) to ignore break at start of input @@ -57,12 +57,12 @@ impl Scanner { let mut kind = SymbolKind::from(grapheme); let end_pos = if kind == SymbolKind::Newline { - Position { + SymPos { line: (curr_pos.line + 1), ..Default::default() } } else { - Position { + SymPos { line: curr_pos.line, col_utf8: (curr_pos.col_utf8 + grapheme.len()), col_utf16: (curr_pos.col_utf16 + grapheme.encode_utf16().count()), diff --git a/commons/src/scanner/symbol/iterator.rs b/commons/src/scanner/symbol/iterator.rs index c0810cd8..ea337f53 100644 --- a/commons/src/scanner/symbol/iterator.rs +++ b/commons/src/scanner/symbol/iterator.rs @@ -1,17 +1,15 @@ use std::{borrow::BorrowMut, rc::Rc}; -use itertools::PeekingNext; - use super::{Symbol, SymbolKind}; pub use itertools::*; #[derive(Clone)] -pub struct SymbolIterator<'input, 'parent, 'end_fn> { - kind: SymbolIteratorKind<'input, 'parent>, +pub struct SymbolIterator<'input> { + kind: SymbolIteratorKind<'input>, start_index: usize, line_prefixes: Vec>, - end: Option>>, + end: Option>, iter_end: bool, } @@ -52,16 +50,14 @@ impl<'input> SymbolIteratorRoot<'input> { } #[derive(Clone)] -pub enum SymbolIteratorKind<'input, 'parent> { - Nested(Rc>), +pub enum SymbolIteratorKind<'input> { + Nested(Box>), Root(SymbolIteratorRoot<'input>), } -pub type IteratorEndFn<'input, 'end_fn> = Box]) -> bool + 'end_fn>; +pub type IteratorEndFn<'input> = Rc]) -> bool)>; -impl<'input, 'parent, 'end_fn> From<&'input [Symbol<'input>]> - for SymbolIterator<'input, 'parent, 'end_fn> -{ +impl<'input> From<&'input [Symbol<'input>]> for SymbolIterator<'input> { fn from(value: &'input [Symbol<'input>]) -> Self { SymbolIterator { kind: SymbolIteratorKind::Root(SymbolIteratorRoot::from(value)), @@ -73,9 +69,7 @@ impl<'input, 'parent, 'end_fn> From<&'input [Symbol<'input>]> } } -impl<'input, 'parent, 'end_fn> From<&'input Vec>> - for SymbolIterator<'input, 'parent, 'end_fn> -{ +impl<'input> From<&'input Vec>> for SymbolIterator<'input> { fn from(value: &'input Vec>) -> Self { SymbolIterator { kind: SymbolIteratorKind::Root(SymbolIteratorRoot::from(value)), @@ -87,7 +81,7 @@ impl<'input, 'parent, 'end_fn> From<&'input Vec>> } } -impl<'input, 'parent, 'end_fn> SymbolIterator<'input, 'parent, 'end_fn> { +impl<'input> SymbolIterator<'input> { pub fn new(symbols: &'input [Symbol<'input>], start_index: usize) -> Self { let mut iter = SymbolIterator::from(symbols); iter.start_index = start_index; @@ -98,13 +92,13 @@ impl<'input, 'parent, 'end_fn> SymbolIterator<'input, 'parent, 'end_fn> { symbols: &'input [Symbol<'input>], start_index: usize, line_prefix: impl Into>>, - end: IteratorEndFn<'input, 'end_fn>, + end: IteratorEndFn<'input>, ) -> Self { SymbolIterator { kind: SymbolIteratorKind::Root(SymbolIteratorRoot::from(symbols)), start_index, line_prefixes: line_prefix.into(), - end: Some(Rc::new(end)), + end: Some(end), iter_end: false, } } @@ -137,11 +131,7 @@ impl<'input, 'parent, 'end_fn> SymbolIterator<'input, 'parent, 'end_fn> { pub fn set_curr_index(&mut self, index: usize) { if index >= self.start_index { match self.kind.borrow_mut() { - SymbolIteratorKind::Nested(parent) => { - if let Some(p) = Rc::get_mut(parent) { - p.set_curr_index(index) - } - } + SymbolIteratorKind::Nested(parent) => parent.set_curr_index(index), SymbolIteratorKind::Root(root) => { root.curr_index = index; root.peek_index = index; @@ -179,44 +169,36 @@ impl<'input, 'parent, 'end_fn> SymbolIterator<'input, 'parent, 'end_fn> { self.peek().map(|s| s.kind) } - pub fn nest<'inner_end>( + pub fn nest( self, line_prefix: &[SymbolKind], - end: Option>, - ) -> SymbolIterator<'input, 'parent, 'inner_end> - where - 'end_fn: 'inner_end, - 'end_fn: 'parent, - { + end: Option>, + ) -> SymbolIterator<'input> { let curr_index = self.curr_index(); let iter_end = self.iter_end; SymbolIterator { - kind: SymbolIteratorKind::Nested(Rc::new(self)), + kind: SymbolIteratorKind::Nested(Box::new(self)), start_index: curr_index, line_prefixes: vec![line_prefix.to_vec()], - end: end.map(Rc::new), + end, iter_end, } } - pub fn nest_prefixes<'inner_end>( - self, + pub fn nest_prefixes( + &self, line_prefixes: impl Into>>, - end: Option>, - ) -> SymbolIterator<'input, 'parent, 'inner_end> - where - 'end_fn: 'inner_end, - 'end_fn: 'parent, - { + end: Option>, + ) -> SymbolIterator<'input> { let curr_index = self.curr_index(); let iter_end = self.iter_end; SymbolIterator { - kind: SymbolIteratorKind::Nested(Rc::new(self)), + kind: SymbolIteratorKind::Nested(Box::new(self.clone())), start_index: curr_index, line_prefixes: line_prefixes.into(), - end: end.map(Rc::new), + end, iter_end, } } @@ -243,9 +225,9 @@ impl<'input, 'parent, 'end_fn> SymbolIterator<'input, 'parent, 'end_fn> { self.iter_end } - pub fn parent(self) -> Option> { + pub fn parent(self) -> Option> { match self.kind { - SymbolIteratorKind::Nested(parent) => Rc::into_inner(parent), + SymbolIteratorKind::Nested(parent) => Some(*parent), SymbolIteratorKind::Root(_) => None, } } @@ -255,15 +237,16 @@ impl<'input> Iterator for SymbolIteratorRoot<'input> { type Item = &'input Symbol<'input>; fn next(&mut self) -> Option { - let sym = self.symbols.get(self.curr_index); + match self.symbols.get(self.curr_index) { + Some(symbol) => { + self.curr_index += 1; + self.peek_index = self.curr_index; + self.new_line = symbol.kind == SymbolKind::Newline; - if let Some(symbol) = sym { - self.curr_index += 1; - self.peek_index = self.curr_index; - self.new_line = symbol.kind == SymbolKind::Newline; + Some(symbol) + } + None => None, } - - sym } } @@ -293,7 +276,7 @@ impl<'input> PeekingNext for SymbolIteratorRoot<'input> { } } -impl<'input, 'parent, 'end_fn> Iterator for SymbolIterator<'input, 'parent, 'end_fn> { +impl<'input> Iterator for SymbolIterator<'input> { type Item = &'input Symbol<'input>; fn next(&mut self) -> Option { @@ -326,8 +309,8 @@ impl<'input, 'parent, 'end_fn> Iterator for SymbolIterator<'input, 'parent, 'end } } - let curr_symbol_opt = match self.kind.borrow_mut() { - SymbolIteratorKind::Nested(parent) => Rc::get_mut(parent)?.next(), + let curr_symbol_opt = match &mut self.kind { + SymbolIteratorKind::Nested(parent) => parent.next(), SymbolIteratorKind::Root(root) => root.next(), }; @@ -358,14 +341,14 @@ impl<'input, 'parent, 'end_fn> Iterator for SymbolIterator<'input, 'parent, 'end } } -impl<'input, 'parent, 'end_fn> PeekingNext for SymbolIterator<'input, 'parent, 'end_fn> { +impl<'input> PeekingNext for SymbolIterator<'input> { fn peeking_next(&mut self, accept: F) -> Option where Self: Sized, F: FnOnce(&Self::Item) -> bool, { match self.kind.borrow_mut() { - SymbolIteratorKind::Nested(parent) => Rc::get_mut(parent)?.peeking_next(accept), + SymbolIteratorKind::Nested(parent) => parent.peeking_next(accept), SymbolIteratorKind::Root(root) => root.peeking_next(accept), } } @@ -404,6 +387,8 @@ fn contains_non_whitespace(sequence: &[SymbolKind]) -> bool { #[cfg(test)] mod test { + use std::rc::Rc; + use itertools::{Itertools, PeekingNext}; use crate::scanner::{Scanner, SymbolKind}; @@ -481,7 +466,7 @@ mod test { let mut iterator = SymbolIterator::from(&symbols).nest( &[], - Some(Box::new(|sequence| { + Some(Rc::new(|sequence| { sequence .get(0) .map(|s| s.kind == SymbolKind::Star) diff --git a/core/tests/test_runner/mod.rs b/core/tests/test_runner/mod.rs index 873da642..f923e72d 100644 --- a/core/tests/test_runner/mod.rs +++ b/core/tests/test_runner/mod.rs @@ -44,7 +44,7 @@ macro_rules! snapshot_parser { |input| { let parse = <$ty>::generate_parser(); - parse(input.into()) + parse(&mut input.into()) .map(|(block, rest)| (Snapshot(block).as_snapshot(), rest)) .expect("Could not parse content!") } diff --git a/parser/src/elements/atomic/heading.rs b/parser/src/elements/atomic/heading.rs index 79058154..84491c53 100644 --- a/parser/src/elements/atomic/heading.rs +++ b/parser/src/elements/atomic/heading.rs @@ -1,3 +1,5 @@ +use std::rc::Rc; + use strum_macros::*; use unimarkup_inline::{Inline, ParseInlines}; @@ -121,9 +123,7 @@ pub enum HeadingToken<'a> { impl ElementParser for Heading { type Token<'a> = self::HeadingToken<'a>; - fn tokenize<'i>( - mut input: SymbolIterator<'i, '_, '_>, - ) -> Option>> { + fn tokenize<'i>(input: &mut SymbolIterator<'i>) -> Option>> { let mut heading_start: Vec = input .peeking_take_while(|symbol| matches!(symbol.kind, SymbolKind::Hash)) .map(|s| s.kind) @@ -141,25 +141,36 @@ impl ElementParser for Heading { .take(heading_start.len()) .collect(); - let mut sub_heading_start: Vec = std::iter::repeat(SymbolKind::Hash) + let sub_heading_start: Vec = std::iter::repeat(SymbolKind::Hash) .take(heading_start.len()) + .chain([SymbolKind::Whitespace]) .collect(); - sub_heading_start.push(SymbolKind::Whitespace); - let heading_end = |sequence: &[Symbol<'_>]| match sequence.first() { - Some(symbol) => matches!(symbol.kind, SymbolKind::Blankline | SymbolKind::EOI), - None => false, - } || (level != HeadingLevel::Level6 && sequence[..sub_heading_start.len()].iter().map(|s| s.kind).collect::>().starts_with(&sub_heading_start)); + let heading_end = move |sequence: &[Symbol<'_>]| { + let is_eoi = match sequence.first() { + Some(symbol) => matches!(symbol.kind, SymbolKind::Blankline | SymbolKind::EOI), + None => false, + }; + + let sequence_matched = level != HeadingLevel::Level6 + && sequence[..sub_heading_start.len()] + .iter() + .map(|s| s.kind) + .zip(&sub_heading_start) + .all(|(seq, sub)| seq == *sub); + + is_eoi || sequence_matched + }; let mut content_iter = input.nest_prefixes( [heading_start, whitespace_indents], - Some(Box::new(heading_end)), + Some(Rc::new(heading_end)), ); let content_symbols = content_iter.take_to_end(); // Line prefixes violated => invalid heading syntax if !content_iter.end_reached() { - println!("heading end not reached. {:?}", &content_symbols); + // println!("heading end not reached. {:?}", &content_symbols); return None; } @@ -176,8 +187,12 @@ impl ElementParser for Heading { } fn parse(input: Vec>) -> Option { - let HeadingToken::Level(level) = input[0] else {return None}; - let HeadingToken::Content(ref symbols) = input[1] else {return None}; + let HeadingToken::Level(level) = input[0] else { + return None; + }; + let HeadingToken::Content(ref symbols) = input[1] else { + return None; + }; let inline_start = symbols.get(0)?.start; // TODO: Adapt inline lexer to also work with Vec<&'input Symbol> diff --git a/parser/src/elements/atomic/paragraph.rs b/parser/src/elements/atomic/paragraph.rs index 697367be..8ac699fa 100644 --- a/parser/src/elements/atomic/paragraph.rs +++ b/parser/src/elements/atomic/paragraph.rs @@ -63,9 +63,7 @@ fn not_closing_symbol(symbol: &&Symbol) -> bool { impl ElementParser for Paragraph { type Token<'a> = &'a Symbol<'a>; - fn tokenize<'i>( - mut input: SymbolIterator<'i, '_, '_>, - ) -> Option>> { + fn tokenize<'i>(input: &mut SymbolIterator<'i>) -> Option>> { let content = input.by_ref().take_while(not_closing_symbol).collect(); let output = TokenizeOutput { diff --git a/parser/src/elements/enclosed/verbatim.rs b/parser/src/elements/enclosed/verbatim.rs index 574e5958..b7887b33 100644 --- a/parser/src/elements/enclosed/verbatim.rs +++ b/parser/src/elements/enclosed/verbatim.rs @@ -31,9 +31,7 @@ pub(crate) enum Token<'a> { impl ElementParser for Verbatim { type Token<'a> = self::Token<'a>; - fn tokenize<'i>( - mut input: SymbolIterator<'i, '_, '_>, - ) -> Option>> { + fn tokenize<'i>(input: &mut SymbolIterator<'i>) -> Option>> { let start_delim: Vec<_> = input .by_ref() .take_while(|symbol| matches!(symbol.kind, SymbolKind::Tick)) @@ -47,7 +45,7 @@ impl ElementParser for Verbatim { let end_sequence = std::iter::repeat(SymbolKind::Tick) .take(start_delim_len) .collect::>(); - let end_fn = Box::new(|sequence: &[Symbol<'i>]| { + let _end_fn = Box::new(|sequence: &[Symbol<'i>]| { sequence[..start_delim_len] .iter() .map(|s| s.kind) @@ -88,10 +86,14 @@ impl ElementParser for Verbatim { } fn parse(input: Vec>) -> Option { - let Token::StartDelim(start) = input.get(0)? else { return None }; + let Token::StartDelim(start) = input.get(0)? else { + return None; + }; let line_nr = start.get(0)?.start.line; - let Token::Content(symbols) = input.get(1)? else { return None }; + let Token::Content(symbols) = input.get(1)? else { + return None; + }; let content = Symbol::flatten_iter(symbols.iter().copied())?; let block = Self { diff --git a/parser/src/parser.rs b/parser/src/parser.rs index 81897b2f..16a87df8 100644 --- a/parser/src/parser.rs +++ b/parser/src/parser.rs @@ -17,8 +17,7 @@ use crate::{ use unimarkup_commons::config::Config; /// Parser as function that can parse Unimarkup content -pub type ParserFn = - for<'i, 'p, 'f> fn(SymbolIterator<'i, 'p, 'f>) -> Option<(Blocks, &'i [Symbol<'i>])>; +pub type ParserFn = for<'i> fn(&mut SymbolIterator<'i>) -> Option<(Blocks, &'i [Symbol<'i>])>; /// Output of symbol tokenization by a parser of a block. pub(crate) struct TokenizeOutput<'i, T> @@ -35,9 +34,7 @@ pub(crate) trait ElementParser { type Token<'a>; /// Function that converts input symbols into tokens specific for the given element. - fn tokenize<'i>( - input: SymbolIterator<'i, '_, '_>, - ) -> Option>>; + fn tokenize<'i>(input: &mut SymbolIterator<'i>) -> Option>>; /// Function that parses tokenization output and produces one or more Unimarkup elements. fn parse(input: Vec>) -> Option; @@ -106,12 +103,11 @@ impl MainParser { } /// Parses Unimarkup content and produces Unimarkup blocks. - pub fn parse<'i, 'p, 'f>(&self, input: impl Into>) -> Blocks { - let mut input: SymbolIterator<'i, 'p, 'f> = input.into(); + pub fn parse(&self, input: &mut SymbolIterator) -> Blocks { let mut blocks = Vec::default(); #[cfg(debug_assertions)] - let mut input_len = input.len(); + let mut curr_idx = input.len(); 'outer: while let Some(kind) = input.peek_kind() { match kind { @@ -123,36 +119,39 @@ impl MainParser { // no parser will match, parse with default parser _ if kind.is_not_keyword() => { - let (mut res_blocks, rest_of_input) = (self.default_parser)(input) + let (mut res_blocks, _) = (self.default_parser)(input) .expect("Default parser could not parse content!"); blocks.append(&mut res_blocks); - input = SymbolIterator::from(rest_of_input); } // symbol is start of a block, some parser should match _ => { for parser_fn in &self.parsers { - if let Some((mut res_blocks, rest_of_input)) = parser_fn(input.clone()) { + let mut iter = input.clone(); + if let Some((mut res_blocks, _)) = parser_fn(&mut iter) { blocks.append(&mut res_blocks); - input = SymbolIterator::from(rest_of_input); + // TODO: clarify if this is ok? Wouldn't we lose sequences this way? + // input = SymbolIterator::from(rest_of_input); + + // Maybe this is better? Continue where parser left of + *input = iter; continue 'outer; // start from first parser on next input } } // no registered parser matched -> use default parser - let (mut res_blocks, rest_of_input) = (self.default_parser)(input) + let (mut res_blocks, _) = (self.default_parser)(input) .expect("Default parser could not parse content!"); blocks.append(&mut res_blocks); - input = SymbolIterator::from(rest_of_input); } } #[cfg(debug_assertions)] { - assert_ne!(input.len(), input_len); - input_len = input.len(); + assert_ne!(input.curr_index(), curr_idx); + curr_idx = input.curr_index(); } } @@ -167,8 +166,9 @@ pub fn parse_unimarkup(um_content: &str, config: &mut Config) -> Document { let symbols = Scanner::try_new() .expect("Must be valid provider.") .scan_str(um_content); - println!("{:?}", &symbols.iter().map(|s| s.kind).collect::>()); - let blocks = parser.parse(&symbols); + let mut symbols_iter = SymbolIterator::from(&symbols); + // println!("{:?}", &symbols.iter().map(|s| s.kind).collect::>()); + let blocks = parser.parse(&mut symbols_iter); let mut unimarkup = Document { config: config.clone(), From fbefb50db6fb250f747c601700dd6a411b579217 Mon Sep 17 00:00:00 2001 From: Manuel Hatzl Date: Fri, 22 Sep 2023 22:31:19 +0200 Subject: [PATCH 07/43] fix: correct heading end closure to detect heading --- commons/src/scanner/symbol/iterator.rs | 25 ++++++++----------------- parser/src/elements/atomic/heading.rs | 25 ++++++++++++++++++++----- 2 files changed, 28 insertions(+), 22 deletions(-) diff --git a/commons/src/scanner/symbol/iterator.rs b/commons/src/scanner/symbol/iterator.rs index d9aad5ed..063d178d 100644 --- a/commons/src/scanner/symbol/iterator.rs +++ b/commons/src/scanner/symbol/iterator.rs @@ -203,6 +203,12 @@ impl<'input> SymbolIterator<'input> { } } + pub fn update(self, parent: &mut Self) { + if let SymbolIteratorKind::Nested(self_parent) = self.kind { + *parent = *self_parent; + } + } + pub fn skip_to_end(mut self) -> Self { while self.next().is_some() {} @@ -224,13 +230,6 @@ impl<'input> SymbolIterator<'input> { pub fn end_reached(&self) -> bool { self.iter_end } - - pub fn parent(self) -> Option> { - match self.kind { - SymbolIteratorKind::Nested(parent) => Some(*parent), - SymbolIteratorKind::Root(_) => None, - } - } } impl<'input> Iterator for SymbolIteratorRoot<'input> { @@ -503,18 +502,10 @@ mod test { &symbols, 0, vec![vec![SymbolKind::Star, SymbolKind::Whitespace]], - Box::new(|_| false), + Rc::new(|_| false), ); - let mut inner = iterator.nest( - &[SymbolKind::Star], - Some(Box::new(|sequence| { - sequence - .get(0) - .map(|s| s.kind == SymbolKind::Star) - .unwrap_or(false) - })), - ); + let mut inner = iterator.nest(&[SymbolKind::Star], None); let sym_kinds = inner .take_to_end() diff --git a/parser/src/elements/atomic/heading.rs b/parser/src/elements/atomic/heading.rs index 84491c53..79bdb3ab 100644 --- a/parser/src/elements/atomic/heading.rs +++ b/parser/src/elements/atomic/heading.rs @@ -147,10 +147,22 @@ impl ElementParser for Heading { .collect(); let heading_end = move |sequence: &[Symbol<'_>]| { - let is_eoi = match sequence.first() { - Some(symbol) => matches!(symbol.kind, SymbolKind::Blankline | SymbolKind::EOI), + if match sequence.get(..2) { + Some(slice) => matches!( + [slice[0].kind, slice[1].kind], + [SymbolKind::Newline, SymbolKind::Blankline] + ), None => false, - }; + } { + return true; + } + + if match sequence.first() { + Some(symbol) => matches!(symbol.kind, SymbolKind::EOI), + None => false, + } { + return true; + } let sequence_matched = level != HeadingLevel::Level6 && sequence[..sub_heading_start.len()] @@ -159,7 +171,7 @@ impl ElementParser for Heading { .zip(&sub_heading_start) .all(|(seq, sub)| seq == *sub); - is_eoi || sequence_matched + sequence_matched }; let mut content_iter = input.nest_prefixes( @@ -174,13 +186,16 @@ impl ElementParser for Heading { return None; } + let rest_of_input = content_iter.remaining_symbols(); + content_iter.update(input); + let output = TokenizeOutput { tokens: vec![ HeadingToken::Level(level), HeadingToken::Content(content_symbols), HeadingToken::End, ], - rest_of_input: content_iter.remaining_symbols(), + rest_of_input, }; Some(output) From cd608b3a446c1339634f35643d8989c69f54eba4 Mon Sep 17 00:00:00 2001 From: Manuel Hatzl Date: Fri, 22 Sep 2023 22:39:21 +0200 Subject: [PATCH 08/43] fix: ignore newlines between elements --- parser/src/parser.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/parser/src/parser.rs b/parser/src/parser.rs index 16a87df8..edeb1d4c 100644 --- a/parser/src/parser.rs +++ b/parser/src/parser.rs @@ -111,8 +111,10 @@ impl MainParser { 'outer: while let Some(kind) = input.peek_kind() { match kind { - // skip blanklines - SymbolKind::Blankline => {} + // skip newlines between elements + SymbolKind::Blankline | SymbolKind::Newline => { + input.next(); + } // stop parsing when end of input is reached SymbolKind::EOI => break, From 32778c98f2cbec592761355c16480729fb887139 Mon Sep 17 00:00:00 2001 From: Manuel Hatzl Date: Fri, 22 Sep 2023 22:49:20 +0200 Subject: [PATCH 09/43] feat: make end-fn optional for new symbol iterator --- commons/src/scanner/symbol/iterator.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/commons/src/scanner/symbol/iterator.rs b/commons/src/scanner/symbol/iterator.rs index 063d178d..c0ae04fa 100644 --- a/commons/src/scanner/symbol/iterator.rs +++ b/commons/src/scanner/symbol/iterator.rs @@ -92,13 +92,13 @@ impl<'input> SymbolIterator<'input> { symbols: &'input [Symbol<'input>], start_index: usize, line_prefix: impl Into>>, - end: IteratorEndFn<'input>, + end: Option>, ) -> Self { SymbolIterator { kind: SymbolIteratorKind::Root(SymbolIteratorRoot::from(symbols)), start_index, line_prefixes: line_prefix.into(), - end: Some(end), + end, iter_end: false, } } @@ -170,7 +170,7 @@ impl<'input> SymbolIterator<'input> { } pub fn nest( - self, + &self, line_prefix: &[SymbolKind], end: Option>, ) -> SymbolIterator<'input> { @@ -178,7 +178,7 @@ impl<'input> SymbolIterator<'input> { let iter_end = self.iter_end; SymbolIterator { - kind: SymbolIteratorKind::Nested(Box::new(self)), + kind: SymbolIteratorKind::Nested(Box::new(self.clone())), start_index: curr_index, line_prefixes: vec![line_prefix.to_vec()], end, @@ -502,7 +502,7 @@ mod test { &symbols, 0, vec![vec![SymbolKind::Star, SymbolKind::Whitespace]], - Rc::new(|_| false), + None, ); let mut inner = iterator.nest(&[SymbolKind::Star], None); From 1a5c5b07f283273264190cb88644f5c343c5475d Mon Sep 17 00:00:00 2001 From: Manuel Hatzl Date: Sat, 23 Sep 2023 01:53:37 +0200 Subject: [PATCH 10/43] fix: change end fns to get SymboliterMatcher --- commons/src/scanner/symbol/iterator.rs | 127 ++++++++++++++++++++----- parser/src/elements/atomic/heading.rs | 36 ++----- 2 files changed, 109 insertions(+), 54 deletions(-) diff --git a/commons/src/scanner/symbol/iterator.rs b/commons/src/scanner/symbol/iterator.rs index c0ae04fa..c1e68eb4 100644 --- a/commons/src/scanner/symbol/iterator.rs +++ b/commons/src/scanner/symbol/iterator.rs @@ -9,7 +9,7 @@ pub struct SymbolIterator<'input> { kind: SymbolIteratorKind<'input>, start_index: usize, line_prefixes: Vec>, - end: Option>, + end: Option, iter_end: bool, } @@ -55,7 +55,82 @@ pub enum SymbolIteratorKind<'input> { Root(SymbolIteratorRoot<'input>), } -pub type IteratorEndFn<'input> = Rc]) -> bool)>; +pub type IteratorEndFn = Rc bool)>; + +pub trait SymbolIterMatcher { + fn is_empty_line(&mut self) -> bool; + fn consumed_is_empty_line(&mut self) -> bool; + fn matches(&mut self, sequence: &[SymbolKind]) -> bool; + fn consumed_matches(&mut self, sequence: &[SymbolKind]) -> bool; +} + +impl<'input> SymbolIterMatcher for SymbolIterator<'input> { + fn is_empty_line(&mut self) -> bool { + let next = self + .peeking_next(|s| matches!(s.kind, SymbolKind::Blankline | SymbolKind::Newline)) + .map(|s| s.kind); + + if Some(SymbolKind::Newline) == next { + let _ = self.peeking_take_while(|s| s.kind == SymbolKind::Whitespace); + self.set_peek_index(self.peek_index().saturating_sub(1)); // Note: To compensate last "peeking_next()" in "peeking_take_whil()" + + return self + .peeking_next(|s| matches!(s.kind, SymbolKind::Blankline | SymbolKind::Newline)) + .is_some(); + } + + Some(SymbolKind::Blankline) == next + } + + fn consumed_is_empty_line(&mut self) -> bool { + let next = self + .peeking_next(|s| matches!(s.kind, SymbolKind::Blankline | SymbolKind::Newline)) + .map(|s| s.kind); + + if Some(SymbolKind::Newline) == next { + let whitespaces = self + .peeking_take_while(|s| s.kind == SymbolKind::Whitespace) + .count(); + self.set_peek_index(self.peek_index().saturating_sub(1)); // Note: To compensate last "peeking_next()" in "peeking_take_whil()" + + let end = self + .peeking_next(|s| matches!(s.kind, SymbolKind::Blankline | SymbolKind::Newline)) + .map(|s| s.kind); + + if end.is_some() { + let _ = self.skip(whitespaces + 2); // +2 for starting newline + (blankline | newline) + dbg!("consume"); + return true; + } + } else if Some(SymbolKind::Blankline) == next { + let _ = self.skip(1); + } + + Some(SymbolKind::Blankline) == next + } + + fn matches(&mut self, sequence: &[SymbolKind]) -> bool { + for kind in sequence { + if self.peeking_next(|s| s.kind == *kind).is_none() { + return false; + } + } + + true + } + + fn consumed_matches(&mut self, sequence: &[SymbolKind]) -> bool { + for kind in sequence { + if self.peeking_next(|s| s.kind == *kind).is_none() { + return false; + } + } + + let _ = self.skip(sequence.len()); + + true + } +} impl<'input> From<&'input [Symbol<'input>]> for SymbolIterator<'input> { fn from(value: &'input [Symbol<'input>]) -> Self { @@ -92,7 +167,7 @@ impl<'input> SymbolIterator<'input> { symbols: &'input [Symbol<'input>], start_index: usize, line_prefix: impl Into>>, - end: Option>, + end: Option, ) -> Self { SymbolIterator { kind: SymbolIteratorKind::Root(SymbolIteratorRoot::from(symbols)), @@ -140,6 +215,24 @@ impl<'input> SymbolIterator<'input> { } } + fn peek_index(&self) -> usize { + match &self.kind { + SymbolIteratorKind::Nested(parent) => parent.peek_index(), + SymbolIteratorKind::Root(root) => root.peek_index, + } + } + + pub fn set_peek_index(&mut self, index: usize) { + if index >= self.curr_index() { + match self.kind.borrow_mut() { + SymbolIteratorKind::Nested(parent) => parent.set_peek_index(index), + SymbolIteratorKind::Root(root) => { + root.peek_index = index; + } + } + } + } + pub fn eoi(&self) -> bool { self.curr_index() == self.len() } @@ -172,7 +265,7 @@ impl<'input> SymbolIterator<'input> { pub fn nest( &self, line_prefix: &[SymbolKind], - end: Option>, + end: Option, ) -> SymbolIterator<'input> { let curr_index = self.curr_index(); let iter_end = self.iter_end; @@ -189,7 +282,7 @@ impl<'input> SymbolIterator<'input> { pub fn nest_prefixes( &self, line_prefixes: impl Into>>, - end: Option>, + end: Option, ) -> SymbolIterator<'input> { let curr_index = self.curr_index(); let iter_end = self.iter_end; @@ -289,20 +382,8 @@ impl<'input> Iterator for SymbolIterator<'input> { return None; } - let symbols = match &self.kind { - SymbolIteratorKind::Nested(parent) => { - if parent.end_reached() { - self.iter_end = true; - return None; - } else { - parent.remaining_symbols() - } - } - SymbolIteratorKind::Root(root) => root.remaining_symbols(), - }; - - if let Some(end_fn) = &self.end { - if (end_fn)(symbols) { + if let Some(end_fn) = self.end.clone() { + if (end_fn)(self) { self.iter_end = true; return None; } @@ -332,6 +413,7 @@ impl<'input> Iterator for SymbolIterator<'input> { // Note: This mostly indicates a syntax violation, so skipped symbol is ok. if !prefix_matched { + dbg!("bad prefix"); return None; } } @@ -465,12 +547,7 @@ mod test { let mut iterator = SymbolIterator::from(&symbols).nest( &[], - Some(Rc::new(|sequence| { - sequence - .get(0) - .map(|s| s.kind == SymbolKind::Star) - .unwrap_or(false) - })), + Some(Rc::new(|matcher| matcher.matches(&[SymbolKind::Star]))), ); let taken_symkinds = iterator diff --git a/parser/src/elements/atomic/heading.rs b/parser/src/elements/atomic/heading.rs index 79bdb3ab..6a4d1e4d 100644 --- a/parser/src/elements/atomic/heading.rs +++ b/parser/src/elements/atomic/heading.rs @@ -6,7 +6,9 @@ use unimarkup_inline::{Inline, ParseInlines}; use crate::elements::blocks::Block; use crate::elements::Blocks; use crate::parser::{ElementParser, TokenizeOutput}; -use unimarkup_commons::scanner::{Itertools, Symbol, SymbolIterator, SymbolKind}; +use unimarkup_commons::scanner::{ + Itertools, Symbol, SymbolIterMatcher, SymbolIterator, SymbolKind, +}; use super::log_id::AtomicError; @@ -130,7 +132,6 @@ impl ElementParser for Heading { .collect(); let level_depth = heading_start.len(); - let level: HeadingLevel = HeadingLevel::try_from(level_depth).ok()?; if input.by_ref().nth(level_depth)?.kind != SymbolKind::Whitespace { return None; @@ -146,32 +147,10 @@ impl ElementParser for Heading { .chain([SymbolKind::Whitespace]) .collect(); - let heading_end = move |sequence: &[Symbol<'_>]| { - if match sequence.get(..2) { - Some(slice) => matches!( - [slice[0].kind, slice[1].kind], - [SymbolKind::Newline, SymbolKind::Blankline] - ), - None => false, - } { - return true; - } - - if match sequence.first() { - Some(symbol) => matches!(symbol.kind, SymbolKind::EOI), - None => false, - } { - return true; - } - - let sequence_matched = level != HeadingLevel::Level6 - && sequence[..sub_heading_start.len()] - .iter() - .map(|s| s.kind) - .zip(&sub_heading_start) - .all(|(seq, sub)| seq == *sub); - - sequence_matched + let heading_end = move |matcher: &mut dyn SymbolIterMatcher| { + matcher.consumed_is_empty_line() + || matcher.matches(&[SymbolKind::EOI]) + || level != HeadingLevel::Level6 && matcher.matches(&sub_heading_start) }; let mut content_iter = input.nest_prefixes( @@ -182,7 +161,6 @@ impl ElementParser for Heading { // Line prefixes violated => invalid heading syntax if !content_iter.end_reached() { - // println!("heading end not reached. {:?}", &content_symbols); return None; } From b8d430b771854dee16ab1a8e4ea3a639b3ead139 Mon Sep 17 00:00:00 2001 From: Manuel Hatzl Date: Sat, 23 Sep 2023 01:56:19 +0200 Subject: [PATCH 11/43] fix: remove new_line from SymbolIterRoot --- commons/src/scanner/symbol/iterator.rs | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/commons/src/scanner/symbol/iterator.rs b/commons/src/scanner/symbol/iterator.rs index c1e68eb4..014b803b 100644 --- a/commons/src/scanner/symbol/iterator.rs +++ b/commons/src/scanner/symbol/iterator.rs @@ -18,7 +18,6 @@ pub struct SymbolIteratorRoot<'input> { symbols: &'input [Symbol<'input>], curr_index: usize, peek_index: usize, - new_line: bool, } impl<'input> From<&'input [Symbol<'input>]> for SymbolIteratorRoot<'input> { @@ -27,7 +26,6 @@ impl<'input> From<&'input [Symbol<'input>]> for SymbolIteratorRoot<'input> { symbols: value, curr_index: 0, peek_index: 0, - new_line: false, } } } @@ -38,7 +36,6 @@ impl<'input> From<&'input Vec>> for SymbolIteratorRoot<'input> { symbols: value, curr_index: 0, peek_index: 0, - new_line: false, } } } @@ -99,7 +96,6 @@ impl<'input> SymbolIterMatcher for SymbolIterator<'input> { if end.is_some() { let _ = self.skip(whitespaces + 2); // +2 for starting newline + (blankline | newline) - dbg!("consume"); return true; } } else if Some(SymbolKind::Blankline) == next { @@ -244,13 +240,6 @@ impl<'input> SymbolIterator<'input> { } } - pub fn new_line(&self) -> bool { - match &self.kind { - SymbolIteratorKind::Nested(parent) => parent.new_line(), - SymbolIteratorKind::Root(root) => root.new_line, - } - } - pub fn peek(&self) -> Option<&'input Symbol<'input>> { match &self.kind { SymbolIteratorKind::Nested(parent) => parent.peek(), @@ -333,7 +322,6 @@ impl<'input> Iterator for SymbolIteratorRoot<'input> { Some(symbol) => { self.curr_index += 1; self.peek_index = self.curr_index; - self.new_line = symbol.kind == SymbolKind::Newline; Some(symbol) } @@ -394,7 +382,7 @@ impl<'input> Iterator for SymbolIterator<'input> { SymbolIteratorKind::Root(root) => root.next(), }; - if self.new_line() && !self.line_prefixes.is_empty() { + if curr_symbol_opt?.kind == SymbolKind::Newline && !self.line_prefixes.is_empty() { let mut prefix_matched = false; for prefix in &self.line_prefixes { @@ -413,7 +401,6 @@ impl<'input> Iterator for SymbolIterator<'input> { // Note: This mostly indicates a syntax violation, so skipped symbol is ok. if !prefix_matched { - dbg!("bad prefix"); return None; } } From 6ad4a8b7d688423f587c7e6ce1eb8512ad151055 Mon Sep 17 00:00:00 2001 From: Manuel Hatzl Date: Sat, 23 Sep 2023 02:35:51 +0200 Subject: [PATCH 12/43] fix: remove remaining symbols from tokenize output --- commons/src/scanner/symbol/iterator.rs | 12 +++++++----- commons/src/test_runner/snap_test_runner.rs | 6 ++---- core/tests/test_runner/mod.rs | 2 +- inline/tests/lexer/mod.rs | 4 +--- inline/tests/parser/mod.rs | 4 +--- parser/src/elements/atomic/heading.rs | 4 +--- parser/src/elements/atomic/paragraph.rs | 7 ++----- parser/src/elements/enclosed/verbatim.rs | 3 +-- parser/src/parser.rs | 20 ++++++++------------ 9 files changed, 24 insertions(+), 38 deletions(-) diff --git a/commons/src/scanner/symbol/iterator.rs b/commons/src/scanner/symbol/iterator.rs index 014b803b..20eba32e 100644 --- a/commons/src/scanner/symbol/iterator.rs +++ b/commons/src/scanner/symbol/iterator.rs @@ -41,8 +41,8 @@ impl<'input> From<&'input Vec>> for SymbolIteratorRoot<'input> { } impl<'input> SymbolIteratorRoot<'input> { - fn remaining_symbols(&self) -> &'input [Symbol<'input>] { - &self.symbols[self.curr_index..] + fn remaining_symbols(&self) -> Option<&'input [Symbol<'input>]> { + self.symbols.get(self.curr_index..) } } @@ -233,7 +233,7 @@ impl<'input> SymbolIterator<'input> { self.curr_index() == self.len() } - pub fn remaining_symbols(&self) -> &'input [Symbol<'input>] { + pub fn remaining_symbols(&self) -> Option<&'input [Symbol<'input>]> { match &self.kind { SymbolIteratorKind::Nested(parent) => parent.remaining_symbols(), SymbolIteratorKind::Root(root) => root.remaining_symbols(), @@ -386,14 +386,16 @@ impl<'input> Iterator for SymbolIterator<'input> { let mut prefix_matched = false; for prefix in &self.line_prefixes { - let curr_prefix_symbolkinds: Vec<_> = self.remaining_symbols()[..prefix.len()] + let curr_prefix_symbolkinds: Vec<_> = self + .remaining_symbols()? + .get(..prefix.len())? .iter() .map(|s| s.kind) .collect(); if prefix == &curr_prefix_symbolkinds { prefix_matched = true; - // Note: Only update index. Prevents `new_line()` from being changed by possible parent + // Note: Only update index to pass `SymbolKind::Newline` to all nested iterators self.set_curr_index(self.curr_index() + prefix.len()); break; } diff --git a/commons/src/test_runner/snap_test_runner.rs b/commons/src/test_runner/snap_test_runner.rs index fd79137d..6ddf585b 100644 --- a/commons/src/test_runner/snap_test_runner.rs +++ b/commons/src/test_runner/snap_test_runner.rs @@ -15,11 +15,9 @@ impl<'a> SnapTestRunner<'a> { pub fn with_fn(name: &str, input: &'a S, mut parser: PF) -> SnapTestRunner<'a, ()> where S: AsRef<[Symbol<'a>]>, - PF: for<'s> FnMut(&'s [Symbol<'s>]) -> (String, &'s [Symbol<'s>]), + PF: for<'s> FnMut(&'s [Symbol<'s>]) -> String, { - let (snapshot, rest) = parser(input.as_ref()); - - assert_eq!(rest.len(), 0, "Whole input should be parsed"); + let snapshot = parser(input.as_ref()); SnapTestRunner { info: None, diff --git a/core/tests/test_runner/mod.rs b/core/tests/test_runner/mod.rs index f923e72d..fa9dd275 100644 --- a/core/tests/test_runner/mod.rs +++ b/core/tests/test_runner/mod.rs @@ -45,7 +45,7 @@ macro_rules! snapshot_parser { let parse = <$ty>::generate_parser(); parse(&mut input.into()) - .map(|(block, rest)| (Snapshot(block).as_snapshot(), rest)) + .map(|block| Snapshot(block).as_snapshot()) .expect("Could not parse content!") } }; diff --git a/inline/tests/lexer/mod.rs b/inline/tests/lexer/mod.rs index de4a3ca3..c95cba30 100644 --- a/inline/tests/lexer/mod.rs +++ b/inline/tests/lexer/mod.rs @@ -33,9 +33,7 @@ pub fn test_lexer_snapshots() -> Vec { fn run_test_case(case: crate::TestCase) { let symbols = test_runner::scan_str(&case.input); let runner = SnapTestRunner::with_fn(&case.name, &symbols, |symbols| { - let rest = &[]; - let snapshot = Snapshot::snap((case.input.as_ref(), symbols.tokens())); - (snapshot, rest) + Snapshot::snap((case.input.as_ref(), symbols.tokens())) }) .with_info(format!("Test '{}' from '{}'", case.name, case.file_name)); diff --git a/inline/tests/parser/mod.rs b/inline/tests/parser/mod.rs index 3c35a1a5..a9c01f46 100644 --- a/inline/tests/parser/mod.rs +++ b/inline/tests/parser/mod.rs @@ -34,10 +34,8 @@ fn run_test_case(case: crate::TestCase) { let symbols = test_runner::scan_str(&case.input); let runner = SnapTestRunner::with_fn(&case.name, &symbols, |symbols| { - let rest: &[_] = &[]; let inlines: Vec<_> = symbols.parse_inlines().collect(); - let snapshot = Snapshot::snap(&inlines[..]); - (snapshot, rest) + Snapshot::snap(&inlines[..]) }) .with_info(format!("Test '{}' from '{}'", case.name, case.file_name)); diff --git a/parser/src/elements/atomic/heading.rs b/parser/src/elements/atomic/heading.rs index 6a4d1e4d..46b0581c 100644 --- a/parser/src/elements/atomic/heading.rs +++ b/parser/src/elements/atomic/heading.rs @@ -125,7 +125,7 @@ pub enum HeadingToken<'a> { impl ElementParser for Heading { type Token<'a> = self::HeadingToken<'a>; - fn tokenize<'i>(input: &mut SymbolIterator<'i>) -> Option>> { + fn tokenize<'i>(input: &mut SymbolIterator<'i>) -> Option>> { let mut heading_start: Vec = input .peeking_take_while(|symbol| matches!(symbol.kind, SymbolKind::Hash)) .map(|s| s.kind) @@ -164,7 +164,6 @@ impl ElementParser for Heading { return None; } - let rest_of_input = content_iter.remaining_symbols(); content_iter.update(input); let output = TokenizeOutput { @@ -173,7 +172,6 @@ impl ElementParser for Heading { HeadingToken::Content(content_symbols), HeadingToken::End, ], - rest_of_input, }; Some(output) diff --git a/parser/src/elements/atomic/paragraph.rs b/parser/src/elements/atomic/paragraph.rs index 8ac699fa..171e3f55 100644 --- a/parser/src/elements/atomic/paragraph.rs +++ b/parser/src/elements/atomic/paragraph.rs @@ -63,13 +63,10 @@ fn not_closing_symbol(symbol: &&Symbol) -> bool { impl ElementParser for Paragraph { type Token<'a> = &'a Symbol<'a>; - fn tokenize<'i>(input: &mut SymbolIterator<'i>) -> Option>> { + fn tokenize<'i>(input: &mut SymbolIterator<'i>) -> Option>> { let content = input.by_ref().take_while(not_closing_symbol).collect(); - let output = TokenizeOutput { - tokens: content, - rest_of_input: input.remaining_symbols(), - }; + let output = TokenizeOutput { tokens: content }; Some(output) } diff --git a/parser/src/elements/enclosed/verbatim.rs b/parser/src/elements/enclosed/verbatim.rs index b7887b33..7a227b0a 100644 --- a/parser/src/elements/enclosed/verbatim.rs +++ b/parser/src/elements/enclosed/verbatim.rs @@ -31,7 +31,7 @@ pub(crate) enum Token<'a> { impl ElementParser for Verbatim { type Token<'a> = self::Token<'a>; - fn tokenize<'i>(input: &mut SymbolIterator<'i>) -> Option>> { + fn tokenize<'i>(input: &mut SymbolIterator<'i>) -> Option>> { let start_delim: Vec<_> = input .by_ref() .take_while(|symbol| matches!(symbol.kind, SymbolKind::Tick)) @@ -79,7 +79,6 @@ impl ElementParser for Verbatim { let output = TokenizeOutput { tokens: vec![Token::StartDelim(start_delim), Token::Content(vec![])], //content)], - rest_of_input: input.remaining_symbols(), }; Some(output) diff --git a/parser/src/parser.rs b/parser/src/parser.rs index edeb1d4c..3b970103 100644 --- a/parser/src/parser.rs +++ b/parser/src/parser.rs @@ -1,7 +1,7 @@ //! Module for parsing of Unimarkup elements. use logid::log; -use unimarkup_commons::scanner::{Scanner, Symbol, SymbolIterator, SymbolKind}; +use unimarkup_commons::scanner::{Scanner, SymbolIterator, SymbolKind}; use crate::{ document::Document, @@ -17,15 +17,11 @@ use crate::{ use unimarkup_commons::config::Config; /// Parser as function that can parse Unimarkup content -pub type ParserFn = for<'i> fn(&mut SymbolIterator<'i>) -> Option<(Blocks, &'i [Symbol<'i>])>; +pub type ParserFn = for<'i> fn(&mut SymbolIterator<'i>) -> Option; /// Output of symbol tokenization by a parser of a block. -pub(crate) struct TokenizeOutput<'i, T> -where - T: 'i, -{ +pub(crate) struct TokenizeOutput { pub(crate) tokens: Vec, - pub(crate) rest_of_input: &'i [Symbol<'i>], } /// Trait implemented by a parser for each Unimarkup element. @@ -34,7 +30,7 @@ pub(crate) trait ElementParser { type Token<'a>; /// Function that converts input symbols into tokens specific for the given element. - fn tokenize<'i>(input: &mut SymbolIterator<'i>) -> Option>>; + fn tokenize<'i>(input: &mut SymbolIterator<'i>) -> Option>>; /// Function that parses tokenization output and produces one or more Unimarkup elements. fn parse(input: Vec>) -> Option; @@ -65,7 +61,7 @@ where let tokenize_output = T::tokenize(input)?; let blocks = T::parse(tokenize_output.tokens)?; - Some((blocks, tokenize_output.rest_of_input)) + Some(blocks) } } } @@ -121,7 +117,7 @@ impl MainParser { // no parser will match, parse with default parser _ if kind.is_not_keyword() => { - let (mut res_blocks, _) = (self.default_parser)(input) + let mut res_blocks = (self.default_parser)(input) .expect("Default parser could not parse content!"); blocks.append(&mut res_blocks); @@ -131,7 +127,7 @@ impl MainParser { _ => { for parser_fn in &self.parsers { let mut iter = input.clone(); - if let Some((mut res_blocks, _)) = parser_fn(&mut iter) { + if let Some(mut res_blocks) = parser_fn(&mut iter) { blocks.append(&mut res_blocks); // TODO: clarify if this is ok? Wouldn't we lose sequences this way? // input = SymbolIterator::from(rest_of_input); @@ -143,7 +139,7 @@ impl MainParser { } // no registered parser matched -> use default parser - let (mut res_blocks, _) = (self.default_parser)(input) + let mut res_blocks = (self.default_parser)(input) .expect("Default parser could not parse content!"); blocks.append(&mut res_blocks); From c73286f4258eebd2c496b566607ef36684a08ada Mon Sep 17 00:00:00 2001 From: Manuel Hatzl Date: Sat, 23 Sep 2023 17:54:16 +0200 Subject: [PATCH 13/43] fix: correct prefix consumption for symbol iterator --- commons/src/scanner/symbol/iterator.rs | 310 +++++++++++-------------- parser/src/elements/atomic/heading.rs | 23 +- 2 files changed, 143 insertions(+), 190 deletions(-) diff --git a/commons/src/scanner/symbol/iterator.rs b/commons/src/scanner/symbol/iterator.rs index 20eba32e..81f15b01 100644 --- a/commons/src/scanner/symbol/iterator.rs +++ b/commons/src/scanner/symbol/iterator.rs @@ -8,8 +8,8 @@ pub use itertools::*; pub struct SymbolIterator<'input> { kind: SymbolIteratorKind<'input>, start_index: usize, - line_prefixes: Vec>, - end: Option, + prefix_match: Option, + end_match: Option, iter_end: bool, } @@ -52,60 +52,56 @@ pub enum SymbolIteratorKind<'input> { Root(SymbolIteratorRoot<'input>), } -pub type IteratorEndFn = Rc bool)>; +pub type IteratorEndFn = Rc bool)>; +pub type IteratorPrefixFn = Rc bool)>; -pub trait SymbolIterMatcher { +pub trait EndMatcher { fn is_empty_line(&mut self) -> bool; fn consumed_is_empty_line(&mut self) -> bool; fn matches(&mut self, sequence: &[SymbolKind]) -> bool; fn consumed_matches(&mut self, sequence: &[SymbolKind]) -> bool; } -impl<'input> SymbolIterMatcher for SymbolIterator<'input> { +pub trait PrefixMatcher { + fn consumed_prefix(&mut self, sequence: &[SymbolKind]) -> bool; +} + +impl<'input> EndMatcher for SymbolIterator<'input> { fn is_empty_line(&mut self) -> bool { + self.reset_peek(); + let next = self .peeking_next(|s| matches!(s.kind, SymbolKind::Blankline | SymbolKind::Newline)) .map(|s| s.kind); - if Some(SymbolKind::Newline) == next { - let _ = self.peeking_take_while(|s| s.kind == SymbolKind::Whitespace); + let is_empty_line = if Some(SymbolKind::Newline) == next { + let _whitespaces = self + .peeking_take_while(|s| s.kind == SymbolKind::Whitespace) + .count(); self.set_peek_index(self.peek_index().saturating_sub(1)); // Note: To compensate last "peeking_next()" in "peeking_take_whil()" - return self - .peeking_next(|s| matches!(s.kind, SymbolKind::Blankline | SymbolKind::Newline)) - .is_some(); - } + self.peeking_next(|s| matches!(s.kind, SymbolKind::Blankline | SymbolKind::Newline)) + .is_some() + } else { + Some(SymbolKind::Blankline) == next + }; - Some(SymbolKind::Blankline) == next + is_empty_line } fn consumed_is_empty_line(&mut self) -> bool { - let next = self - .peeking_next(|s| matches!(s.kind, SymbolKind::Blankline | SymbolKind::Newline)) - .map(|s| s.kind); - - if Some(SymbolKind::Newline) == next { - let whitespaces = self - .peeking_take_while(|s| s.kind == SymbolKind::Whitespace) - .count(); - self.set_peek_index(self.peek_index().saturating_sub(1)); // Note: To compensate last "peeking_next()" in "peeking_take_whil()" + let is_empty_line = self.is_empty_line(); - let end = self - .peeking_next(|s| matches!(s.kind, SymbolKind::Blankline | SymbolKind::Newline)) - .map(|s| s.kind); - - if end.is_some() { - let _ = self.skip(whitespaces + 2); // +2 for starting newline + (blankline | newline) - return true; - } - } else if Some(SymbolKind::Blankline) == next { - let _ = self.skip(1); + if is_empty_line { + self.set_curr_index(self.peek_index()); // To consume peeked symbols } - Some(SymbolKind::Blankline) == next + is_empty_line } fn matches(&mut self, sequence: &[SymbolKind]) -> bool { + self.reset_peek(); + for kind in sequence { if self.peeking_next(|s| s.kind == *kind).is_none() { return false; @@ -116,15 +112,25 @@ impl<'input> SymbolIterMatcher for SymbolIterator<'input> { } fn consumed_matches(&mut self, sequence: &[SymbolKind]) -> bool { - for kind in sequence { - if self.peeking_next(|s| s.kind == *kind).is_none() { - return false; - } + let matched = self.matches(sequence); + + if matched { + self.set_curr_index(self.peek_index()); // To consume peeked symbols } - let _ = self.skip(sequence.len()); + matched + } +} - true +impl<'input> PrefixMatcher for SymbolIterator<'input> { + fn consumed_prefix(&mut self, sequence: &[SymbolKind]) -> bool { + #[cfg(debug_assertions)] + assert!( + !sequence.contains(&SymbolKind::Newline), + "Newline symbol in prefix match is not allowed." + ); + + self.consumed_matches(sequence) } } @@ -133,8 +139,8 @@ impl<'input> From<&'input [Symbol<'input>]> for SymbolIterator<'input> { SymbolIterator { kind: SymbolIteratorKind::Root(SymbolIteratorRoot::from(value)), start_index: 0, - line_prefixes: vec![], - end: None, + prefix_match: None, + end_match: None, iter_end: false, } } @@ -145,8 +151,8 @@ impl<'input> From<&'input Vec>> for SymbolIterator<'input> { SymbolIterator { kind: SymbolIteratorKind::Root(SymbolIteratorRoot::from(value)), start_index: 0, - line_prefixes: vec![], - end: None, + prefix_match: None, + end_match: None, iter_end: false, } } @@ -162,14 +168,14 @@ impl<'input> SymbolIterator<'input> { pub fn with( symbols: &'input [Symbol<'input>], start_index: usize, - line_prefix: impl Into>>, + prefix_match: Option, end: Option, ) -> Self { SymbolIterator { kind: SymbolIteratorKind::Root(SymbolIteratorRoot::from(symbols)), start_index, - line_prefixes: line_prefix.into(), - end, + prefix_match, + end_match: end, iter_end: false, } } @@ -229,8 +235,8 @@ impl<'input> SymbolIterator<'input> { } } - pub fn eoi(&self) -> bool { - self.curr_index() == self.len() + pub fn reset_peek(&mut self) { + self.set_peek_index(self.curr_index()); } pub fn remaining_symbols(&self) -> Option<&'input [Symbol<'input>]> { @@ -240,48 +246,27 @@ impl<'input> SymbolIterator<'input> { } } - pub fn peek(&self) -> Option<&'input Symbol<'input>> { - match &self.kind { - SymbolIteratorKind::Nested(parent) => parent.peek(), - SymbolIteratorKind::Root(root) => root.symbols.get(root.curr_index), - } + pub fn peek(&mut self) -> Option<&'input Symbol<'input>> { + let symbol = self.peeking_next(|_| true); + self.reset_peek(); // Note: Resetting index, because peek() must be idempotent + symbol } - pub fn peek_kind(&self) -> Option { + pub fn peek_kind(&mut self) -> Option { self.peek().map(|s| s.kind) } pub fn nest( &self, - line_prefix: &[SymbolKind], - end: Option, - ) -> SymbolIterator<'input> { - let curr_index = self.curr_index(); - let iter_end = self.iter_end; - - SymbolIterator { - kind: SymbolIteratorKind::Nested(Box::new(self.clone())), - start_index: curr_index, - line_prefixes: vec![line_prefix.to_vec()], - end, - iter_end, - } - } - - pub fn nest_prefixes( - &self, - line_prefixes: impl Into>>, + prefix_match: Option, end: Option, ) -> SymbolIterator<'input> { - let curr_index = self.curr_index(); - let iter_end = self.iter_end; - SymbolIterator { kind: SymbolIteratorKind::Nested(Box::new(self.clone())), - start_index: curr_index, - line_prefixes: line_prefixes.into(), - end, - iter_end, + start_index: self.curr_index(), + prefix_match, + end_match: end, + iter_end: self.iter_end, } } @@ -291,8 +276,13 @@ impl<'input> SymbolIterator<'input> { } } + /// Tries to skip symbols until one of the end functions signals the end. + /// + /// **Note:** This function might not reach the iterator end. + /// If no symbols are left, or no given line prefix is matched, the iterator may stop before an end is reached. + /// Use `end_reached()` to check if the end was actually reached. pub fn skip_to_end(mut self) -> Self { - while self.next().is_some() {} + let _last_symbol = self.by_ref().last(); self } @@ -318,15 +308,12 @@ impl<'input> Iterator for SymbolIteratorRoot<'input> { type Item = &'input Symbol<'input>; fn next(&mut self) -> Option { - match self.symbols.get(self.curr_index) { - Some(symbol) => { - self.curr_index += 1; - self.peek_index = self.curr_index; + let symbol = self.symbols.get(self.curr_index)?; - Some(symbol) - } - None => None, - } + self.curr_index += 1; + self.peek_index = self.curr_index; + + Some(symbol) } } @@ -336,23 +323,15 @@ impl<'input> PeekingNext for SymbolIteratorRoot<'input> { Self: Sized, F: FnOnce(&Self::Item) -> bool, { - let curr_index = self.curr_index; - self.curr_index = self.peek_index; // Note: peek_index increases until `next()` is called directly - let next_item = self.next(); - - // revert index to simulate lookahead - self.curr_index = curr_index; - - match next_item { - Some(symbol) => { - if (accept)(&symbol) { - next_item - } else { - None - } - } - None => None, + let symbol = self.symbols.get(self.peek_index)?; + + if !(accept)(&symbol) { + return None; } + + self.peek_index += 1; + + Some(symbol) } } @@ -360,55 +339,48 @@ impl<'input> Iterator for SymbolIterator<'input> { type Item = &'input Symbol<'input>; fn next(&mut self) -> Option { - if self.eoi() || self.end_reached() { - return None; - } - - if self.peek_kind()? == SymbolKind::Blankline - && contains_only_non_whitespace_sequences(&self.line_prefixes) - { - return None; - } - - if let Some(end_fn) = self.end.clone() { - if (end_fn)(self) { - self.iter_end = true; - return None; - } - } - - let curr_symbol_opt = match &mut self.kind { + let next_fn = |kind: &mut SymbolIteratorKind<'input>| match kind { SymbolIteratorKind::Nested(parent) => parent.next(), SymbolIteratorKind::Root(root) => root.next(), }; - if curr_symbol_opt?.kind == SymbolKind::Newline && !self.line_prefixes.is_empty() { - let mut prefix_matched = false; - - for prefix in &self.line_prefixes { - let curr_prefix_symbolkinds: Vec<_> = self - .remaining_symbols()? - .get(..prefix.len())? - .iter() - .map(|s| s.kind) - .collect(); - - if prefix == &curr_prefix_symbolkinds { - prefix_matched = true; - // Note: Only update index to pass `SymbolKind::Newline` to all nested iterators - self.set_curr_index(self.curr_index() + prefix.len()); - break; - } - } + next_symbol(self, next_fn) + } +} - // Note: This mostly indicates a syntax violation, so skipped symbol is ok. - if !prefix_matched { - return None; - } +fn next_symbol<'input, F>( + iter: &mut SymbolIterator<'input>, + next_fn: F, +) -> Option<&'input Symbol<'input>> +where + F: FnOnce(&mut SymbolIteratorKind<'input>) -> Option<&'input Symbol<'input>>, +{ + if iter.end_reached() { + return None; + } + + if let Some(end_fn) = iter.end_match.clone() { + if (end_fn)(iter) { + iter.iter_end = true; + return None; } + } + + let curr_symbol_opt = next_fn(&mut iter.kind); + + if curr_symbol_opt?.kind == SymbolKind::Newline && iter.prefix_match.is_some() { + let prefix_match = iter + .prefix_match + .clone() + .expect("Prefix match checked above to be some."); - curr_symbol_opt + // Note: This mostly indicates a syntax violation, so skipped symbol is ok. + if !prefix_match(iter) { + return None; + } } + + curr_symbol_opt } impl<'input> PeekingNext for SymbolIterator<'input> { @@ -417,42 +389,13 @@ impl<'input> PeekingNext for SymbolIterator<'input> { Self: Sized, F: FnOnce(&Self::Item) -> bool, { - match self.kind.borrow_mut() { + let next_fn = |kind: &mut SymbolIteratorKind<'input>| match kind { SymbolIteratorKind::Nested(parent) => parent.peeking_next(accept), SymbolIteratorKind::Root(root) => root.peeking_next(accept), - } - } -} - -pub enum SymbolIteratorError { - /// At least one end-function returned `true`. - EndReached, - /// A new line did not start with the expected prefix. - PrefixMismatch, - /// Reached end of input. - Eoi, -} - -fn contains_only_non_whitespace_sequences(sequences: &[Vec]) -> bool { - let mut whitespace_sequence_found = false; - - for sequence in sequences { - whitespace_sequence_found = whitespace_sequence_found || !contains_non_whitespace(sequence); - } - whitespace_sequence_found -} + }; -fn contains_non_whitespace(sequence: &[SymbolKind]) -> bool { - for kind in sequence { - if !matches!( - kind, - SymbolKind::Whitespace | SymbolKind::Newline | SymbolKind::Blankline - ) { - return true; - } + next_symbol(self, next_fn) } - - false } #[cfg(test)] @@ -461,7 +404,7 @@ mod test { use itertools::{Itertools, PeekingNext}; - use crate::scanner::{Scanner, SymbolKind}; + use crate::scanner::{PrefixMatcher, Scanner, SymbolKind}; use super::SymbolIterator; @@ -535,7 +478,7 @@ mod test { .scan_str("text*"); let mut iterator = SymbolIterator::from(&symbols).nest( - &[], + None, Some(Rc::new(|matcher| matcher.matches(&[SymbolKind::Star]))), ); @@ -567,11 +510,18 @@ mod test { let iterator = SymbolIterator::with( &symbols, 0, - vec![vec![SymbolKind::Star, SymbolKind::Whitespace]], + Some(Rc::new(|matcher: &mut dyn PrefixMatcher| { + matcher.consumed_prefix(&[SymbolKind::Star, SymbolKind::Whitespace]) + })), None, ); - let mut inner = iterator.nest(&[SymbolKind::Star], None); + let mut inner = iterator.nest( + Some(Rc::new(|matcher: &mut dyn PrefixMatcher| { + matcher.consumed_prefix(&[SymbolKind::Star]) + })), + None, + ); let sym_kinds = inner .take_to_end() diff --git a/parser/src/elements/atomic/heading.rs b/parser/src/elements/atomic/heading.rs index 46b0581c..5a0c8f23 100644 --- a/parser/src/elements/atomic/heading.rs +++ b/parser/src/elements/atomic/heading.rs @@ -7,7 +7,7 @@ use crate::elements::blocks::Block; use crate::elements::Blocks; use crate::parser::{ElementParser, TokenizeOutput}; use unimarkup_commons::scanner::{ - Itertools, Symbol, SymbolIterMatcher, SymbolIterator, SymbolKind, + EndMatcher, Itertools, PrefixMatcher, Symbol, SymbolIterator, SymbolKind, }; use super::log_id::AtomicError; @@ -138,27 +138,30 @@ impl ElementParser for Heading { } heading_start.push(SymbolKind::Whitespace); - let whitespace_indents = std::iter::repeat(SymbolKind::Whitespace) - .take(heading_start.len()) - .collect(); let sub_heading_start: Vec = std::iter::repeat(SymbolKind::Hash) .take(heading_start.len()) .chain([SymbolKind::Whitespace]) .collect(); - - let heading_end = move |matcher: &mut dyn SymbolIterMatcher| { + let heading_end = move |matcher: &mut dyn EndMatcher| { matcher.consumed_is_empty_line() || matcher.matches(&[SymbolKind::EOI]) || level != HeadingLevel::Level6 && matcher.matches(&sub_heading_start) }; - let mut content_iter = input.nest_prefixes( - [heading_start, whitespace_indents], - Some(Rc::new(heading_end)), - ); + let whitespace_indents: Vec = std::iter::repeat(SymbolKind::Whitespace) + .take(heading_start.len()) + .collect(); + let heading_prefix = move |matcher: &mut dyn PrefixMatcher| { + matcher.consumed_prefix(&heading_start) || matcher.consumed_prefix(&whitespace_indents) + }; + + let mut content_iter = + input.nest(Some(Rc::new(heading_prefix)), Some(Rc::new(heading_end))); let content_symbols = content_iter.take_to_end(); + dbg!(&content_symbols.iter().map(|s| s.kind).collect::>()); + // Line prefixes violated => invalid heading syntax if !content_iter.end_reached() { return None; From 27d8d70082f243a363937187ca320f8821530373 Mon Sep 17 00:00:00 2001 From: Manuel Hatzl Date: Sat, 23 Sep 2023 18:51:55 +0200 Subject: [PATCH 14/43] fix: fix endless loop in peeking_next() --- commons/src/scanner/symbol/iterator.rs | 66 +++++++++++--------------- 1 file changed, 28 insertions(+), 38 deletions(-) diff --git a/commons/src/scanner/symbol/iterator.rs b/commons/src/scanner/symbol/iterator.rs index 81f15b01..b531da57 100644 --- a/commons/src/scanner/symbol/iterator.rs +++ b/commons/src/scanner/symbol/iterator.rs @@ -339,48 +339,36 @@ impl<'input> Iterator for SymbolIterator<'input> { type Item = &'input Symbol<'input>; fn next(&mut self) -> Option { - let next_fn = |kind: &mut SymbolIteratorKind<'input>| match kind { + if self.end_reached() { + return None; + } + + if let Some(end_fn) = self.end_match.clone() { + if (end_fn)(self) { + self.iter_end = true; + return None; + } + } + + let curr_symbol_opt = match &mut self.kind { SymbolIteratorKind::Nested(parent) => parent.next(), SymbolIteratorKind::Root(root) => root.next(), }; - next_symbol(self, next_fn) - } -} + if curr_symbol_opt?.kind == SymbolKind::Newline && self.prefix_match.is_some() { + let prefix_match = self + .prefix_match + .clone() + .expect("Prefix match checked above to be some."); -fn next_symbol<'input, F>( - iter: &mut SymbolIterator<'input>, - next_fn: F, -) -> Option<&'input Symbol<'input>> -where - F: FnOnce(&mut SymbolIteratorKind<'input>) -> Option<&'input Symbol<'input>>, -{ - if iter.end_reached() { - return None; - } - - if let Some(end_fn) = iter.end_match.clone() { - if (end_fn)(iter) { - iter.iter_end = true; - return None; + // Note: This mostly indicates a syntax violation, so skipped symbol is ok. + if !prefix_match(self) { + return None; + } } - } - - let curr_symbol_opt = next_fn(&mut iter.kind); - if curr_symbol_opt?.kind == SymbolKind::Newline && iter.prefix_match.is_some() { - let prefix_match = iter - .prefix_match - .clone() - .expect("Prefix match checked above to be some."); - - // Note: This mostly indicates a syntax violation, so skipped symbol is ok. - if !prefix_match(iter) { - return None; - } + curr_symbol_opt } - - curr_symbol_opt } impl<'input> PeekingNext for SymbolIterator<'input> { @@ -389,12 +377,14 @@ impl<'input> PeekingNext for SymbolIterator<'input> { Self: Sized, F: FnOnce(&Self::Item) -> bool, { - let next_fn = |kind: &mut SymbolIteratorKind<'input>| match kind { + // Note: Not possible to restrict peek to return only symbols `next()` would return, + // because `peeking_next()` is needed in End- and PrefixMatcher. + // Using the same logic as in `next()` would result in endless loop inside `peeking_next()` => StackOverflow + + match &mut self.kind { SymbolIteratorKind::Nested(parent) => parent.peeking_next(accept), SymbolIteratorKind::Root(root) => root.peeking_next(accept), - }; - - next_symbol(self, next_fn) + } } } From 71171f35af2c4cfd30313ce1a8c8c1ba63815710 Mon Sep 17 00:00:00 2001 From: Manuel Hatzl Date: Sat, 23 Sep 2023 20:10:43 +0200 Subject: [PATCH 15/43] fix: correct iterator length calculation --- commons/src/scanner/symbol/iterator.rs | 17 ++++++----------- parser/src/elements/atomic/heading.rs | 2 -- parser/src/parser.rs | 13 ++++++------- 3 files changed, 12 insertions(+), 20 deletions(-) diff --git a/commons/src/scanner/symbol/iterator.rs b/commons/src/scanner/symbol/iterator.rs index b531da57..de98bc96 100644 --- a/commons/src/scanner/symbol/iterator.rs +++ b/commons/src/scanner/symbol/iterator.rs @@ -78,10 +78,11 @@ impl<'input> EndMatcher for SymbolIterator<'input> { let _whitespaces = self .peeking_take_while(|s| s.kind == SymbolKind::Whitespace) .count(); - self.set_peek_index(self.peek_index().saturating_sub(1)); // Note: To compensate last "peeking_next()" in "peeking_take_whil()" + // self.set_peek_index(self.peek_index().saturating_sub(1)); // Note: To compensate last "peeking_next()" in "peeking_take_while()" - self.peeking_next(|s| matches!(s.kind, SymbolKind::Blankline | SymbolKind::Newline)) - .is_some() + let new_line = self + .peeking_next(|s| matches!(s.kind, SymbolKind::Blankline | SymbolKind::Newline)); + new_line.is_some() } else { Some(SymbolKind::Blankline) == next }; @@ -181,17 +182,11 @@ impl<'input> SymbolIterator<'input> { } pub fn len(&self) -> usize { - match &self.kind { - SymbolIteratorKind::Nested(parent) => parent.len(), - SymbolIteratorKind::Root(root) => root.symbols[self.start_index..].len(), - } + self.remaining_symbols().unwrap_or(&[]).len() } pub fn is_empty(&self) -> bool { - match &self.kind { - SymbolIteratorKind::Nested(parent) => parent.is_empty(), - SymbolIteratorKind::Root(root) => root.symbols[self.start_index..].is_empty(), - } + self.len() == 0 } pub fn start_index(&self) -> usize { diff --git a/parser/src/elements/atomic/heading.rs b/parser/src/elements/atomic/heading.rs index 5a0c8f23..2bbb209f 100644 --- a/parser/src/elements/atomic/heading.rs +++ b/parser/src/elements/atomic/heading.rs @@ -160,8 +160,6 @@ impl ElementParser for Heading { input.nest(Some(Rc::new(heading_prefix)), Some(Rc::new(heading_end))); let content_symbols = content_iter.take_to_end(); - dbg!(&content_symbols.iter().map(|s| s.kind).collect::>()); - // Line prefixes violated => invalid heading syntax if !content_iter.end_reached() { return None; diff --git a/parser/src/parser.rs b/parser/src/parser.rs index 3b970103..1aa4b05b 100644 --- a/parser/src/parser.rs +++ b/parser/src/parser.rs @@ -103,7 +103,7 @@ impl MainParser { let mut blocks = Vec::default(); #[cfg(debug_assertions)] - let mut curr_idx = input.len(); + let mut curr_len = input.len(); 'outer: while let Some(kind) = input.peek_kind() { match kind { @@ -129,10 +129,6 @@ impl MainParser { let mut iter = input.clone(); if let Some(mut res_blocks) = parser_fn(&mut iter) { blocks.append(&mut res_blocks); - // TODO: clarify if this is ok? Wouldn't we lose sequences this way? - // input = SymbolIterator::from(rest_of_input); - - // Maybe this is better? Continue where parser left of *input = iter; continue 'outer; // start from first parser on next input } @@ -148,8 +144,11 @@ impl MainParser { #[cfg(debug_assertions)] { - assert_ne!(input.curr_index(), curr_idx); - curr_idx = input.curr_index(); + assert!( + input.len() < curr_len, + "Parser consumed no symbol in iteration." + ); + curr_len = input.len(); } } From 57f5f72c1718eb9595d779ad917a7f1cd2ee00fc Mon Sep 17 00:00:00 2001 From: Manuel Hatzl Date: Sat, 23 Sep 2023 22:34:50 +0200 Subject: [PATCH 16/43] fix: prevent plain from merging with newline token --- inline/src/parser/mod.rs | 4 +++- parser/src/parser.rs | 1 - 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/inline/src/parser/mod.rs b/inline/src/parser/mod.rs index d1b2fc11..aeb4b7be 100644 --- a/inline/src/parser/mod.rs +++ b/inline/src/parser/mod.rs @@ -127,7 +127,9 @@ impl<'input> Parser<'input> { span.end = next_token.span.end; break; } else if not_enclosed_and_interrupted { - if next_token.consumable_by_plain() { + if !matches!(kind, TokenKind::Newline | TokenKind::EscapedNewline) + && next_token.consumable_by_plain() + { // consume the token let (next_content, next_span) = next_token.parts(); diff --git a/parser/src/parser.rs b/parser/src/parser.rs index 1aa4b05b..b6868d33 100644 --- a/parser/src/parser.rs +++ b/parser/src/parser.rs @@ -164,7 +164,6 @@ pub fn parse_unimarkup(um_content: &str, config: &mut Config) -> Document { .expect("Must be valid provider.") .scan_str(um_content); let mut symbols_iter = SymbolIterator::from(&symbols); - // println!("{:?}", &symbols.iter().map(|s| s.kind).collect::>()); let blocks = parser.parse(&mut symbols_iter); let mut unimarkup = Document { From 16c2a6070c65588f2fdf249dfd8ef08585071847 Mon Sep 17 00:00:00 2001 From: Manuel Hatzl Date: Sat, 23 Sep 2023 23:31:07 +0200 Subject: [PATCH 17/43] fix: implement rendering for whitespace inlines --- render/src/html/render.rs | 45 +++++++++++++++++++++++++++++++++++++++ render/src/html/tag.rs | 2 ++ render/src/render.rs | 30 ++++++++++++++++++++++++++ 3 files changed, 77 insertions(+) diff --git a/render/src/html/render.rs b/render/src/html/render.rs index 370b5e32..5d6ce7e3 100644 --- a/render/src/html/render.rs +++ b/render/src/html/render.rs @@ -211,4 +211,49 @@ impl Renderer for HtmlRenderer { Ok(html) } + + fn render_newline( + &mut self, + _newline: &Newline, + _context: &Context, + ) -> Result { + let html = Html::with_body(HtmlBody::from(HtmlElement { + tag: HtmlTag::PlainContent, + attributes: HtmlAttributes::default(), + content: Some(unimarkup_inline::TokenKind::Whitespace.as_str().to_string()), + })); + + Ok(html) + } + + fn render_escaped_newline( + &mut self, + _escaped_newline: &EscapedNewline, + _context: &Context, + ) -> Result { + let html = Html::with_body(HtmlBody::from(HtmlElement { + tag: HtmlTag::Br, + attributes: HtmlAttributes::default(), + content: None, + })); + + Ok(html) + } + + fn render_escaped_whitespace( + &mut self, + escaped_whitespace: &EscapedWhitespace, + _context: &Context, + ) -> Result { + let html = Html::with_body(HtmlBody::from(HtmlElement { + tag: HtmlTag::Span, + attributes: HtmlAttributes(vec![HtmlAttribute { + name: "style".to_string(), + value: Some("white-space: pre-wrap;".to_string()), + }]), + content: Some(escaped_whitespace.inner().to_string()), + })); + + Ok(html) + } } diff --git a/render/src/html/tag.rs b/render/src/html/tag.rs index e8c02a32..1bcf029d 100644 --- a/render/src/html/tag.rs +++ b/render/src/html/tag.rs @@ -26,6 +26,7 @@ pub enum HtmlTag { Sup, Mark, Q, + Br, } impl HtmlTag { @@ -51,6 +52,7 @@ impl HtmlTag { HtmlTag::Sup => "sup", HtmlTag::Mark => "mark", HtmlTag::Q => "q", + HtmlTag::Br => "br", } } } diff --git a/render/src/render.rs b/render/src/render.rs index 20660859..b84a5cc7 100644 --- a/render/src/render.rs +++ b/render/src/render.rs @@ -158,6 +158,29 @@ pub trait Renderer { Err(RenderError::Unimplemented) } + /// Render [`Newline` content](unimarkup_inline::inlines::Inline) to the output format `T`. + fn render_newline(&mut self, _newline: &Newline, _context: &Context) -> Result { + Err(RenderError::Unimplemented) + } + + /// Render [`EscapedNewline` content](unimarkup_inline::inlines::Inline) to the output format `T`. + fn render_escaped_newline( + &mut self, + _escaped_newline: &EscapedNewline, + _context: &Context, + ) -> Result { + Err(RenderError::Unimplemented) + } + + /// Render [`EscapedWhitespace` content](unimarkup_inline::inlines::Inline) to the output format `T`. + fn render_escaped_whitespace( + &mut self, + _escaped_whitespace: &EscapedWhitespace, + _context: &Context, + ) -> Result { + Err(RenderError::Unimplemented) + } + //----------------------------- GENERIC ELEMENTS ----------------------------- /// Render Unimarkup [`Block`s](Block) to the output format `T`. @@ -236,6 +259,13 @@ pub trait Renderer { Inline::Quote(quote) => self.render_quote(quote, context), Inline::Verbatim(verbatim) => self.render_inline_verbatim(verbatim, context), Inline::Plain(plain) => self.render_plain(plain, context), + Inline::Newline(newline) => self.render_newline(newline, context), + Inline::EscapedNewline(escaped_newline) => { + self.render_escaped_newline(escaped_newline, context) + } + Inline::EscapedWhitespace(escaped_whitespace) => { + self.render_escaped_whitespace(escaped_whitespace, context) + } _ => Err(RenderError::Unimplemented), } } From 1df4d76f33eccb11c377881d399fdcd6a20f79bc Mon Sep 17 00:00:00 2001 From: Manuel Hatzl Date: Sat, 23 Sep 2023 23:39:02 +0200 Subject: [PATCH 18/43] fix: add comment why reset_peek() is needed --- commons/src/scanner/symbol/iterator.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/commons/src/scanner/symbol/iterator.rs b/commons/src/scanner/symbol/iterator.rs index de98bc96..7e576105 100644 --- a/commons/src/scanner/symbol/iterator.rs +++ b/commons/src/scanner/symbol/iterator.rs @@ -68,6 +68,7 @@ pub trait PrefixMatcher { impl<'input> EndMatcher for SymbolIterator<'input> { fn is_empty_line(&mut self) -> bool { + // Note: Multiple matches may be set in the match closure, so we need to ensure that all start at the same index self.reset_peek(); let next = self @@ -78,7 +79,6 @@ impl<'input> EndMatcher for SymbolIterator<'input> { let _whitespaces = self .peeking_take_while(|s| s.kind == SymbolKind::Whitespace) .count(); - // self.set_peek_index(self.peek_index().saturating_sub(1)); // Note: To compensate last "peeking_next()" in "peeking_take_while()" let new_line = self .peeking_next(|s| matches!(s.kind, SymbolKind::Blankline | SymbolKind::Newline)); @@ -101,6 +101,7 @@ impl<'input> EndMatcher for SymbolIterator<'input> { } fn matches(&mut self, sequence: &[SymbolKind]) -> bool { + // Note: Multiple matches may be set in the match closure, so we need to ensure that all start at the same index self.reset_peek(); for kind in sequence { From f7cbbf8f09e9a264abb46b0e62693b015b8990fb Mon Sep 17 00:00:00 2001 From: Manuel Hatzl Date: Sun, 24 Sep 2023 00:48:43 +0200 Subject: [PATCH 19/43] fix: update verbatim to work with symbol iterator --- parser/src/elements/enclosed/verbatim.rs | 84 +++++++++++++----------- render/src/html/render.rs | 22 +++---- 2 files changed, 55 insertions(+), 51 deletions(-) diff --git a/parser/src/elements/enclosed/verbatim.rs b/parser/src/elements/enclosed/verbatim.rs index 7a227b0a..0f290570 100644 --- a/parser/src/elements/enclosed/verbatim.rs +++ b/parser/src/elements/enclosed/verbatim.rs @@ -1,9 +1,11 @@ +use std::rc::Rc; + use serde::{Deserialize, Serialize}; use crate::elements::blocks::Block; use crate::elements::Blocks; use crate::parser::{ElementParser, TokenizeOutput}; -use unimarkup_commons::scanner::{Symbol, SymbolIterator, SymbolKind}; +use unimarkup_commons::scanner::{EndMatcher, Itertools, Symbol, SymbolIterator, SymbolKind}; /// Structure of a Unimarkup verbatim block element. #[derive(Debug, PartialEq, Eq, Clone)] @@ -14,6 +16,9 @@ pub struct Verbatim { /// The content of the verbatim block. pub content: String, + /// The language used to highlight the content. + pub data_lang: Option, + /// Attributes of the verbatim block. // TODO: make attributes data structure pub attributes: Option, @@ -25,6 +30,7 @@ pub struct Verbatim { pub(crate) enum Token<'a> { StartDelim(Vec<&'a Symbol<'a>>), + DataLang(Vec<&'a Symbol<'a>>), Content(Vec<&'a Symbol<'a>>), } @@ -32,53 +38,47 @@ impl ElementParser for Verbatim { type Token<'a> = self::Token<'a>; fn tokenize<'i>(input: &mut SymbolIterator<'i>) -> Option>> { - let start_delim: Vec<_> = input + let start_delim_len = input .by_ref() - .take_while(|symbol| matches!(symbol.kind, SymbolKind::Tick)) - .collect(); - let start_delim_len = start_delim.len(); + .peeking_take_while(|symbol| matches!(symbol.kind, SymbolKind::Tick)) + .count(); if start_delim_len < 3 { return None; }; - let end_sequence = std::iter::repeat(SymbolKind::Tick) - .take(start_delim_len) + let start_delim = input.by_ref().take(start_delim_len).collect(); + // Note: Consuming `Newline` is intended, because it is not part of the content, but also not of data-lang + let data_lang = input + .take_while(|s| s.kind != SymbolKind::Newline) .collect::>(); - let _end_fn = Box::new(|sequence: &[Symbol<'i>]| { - sequence[..start_delim_len] - .iter() - .map(|s| s.kind) - .collect::>() - .starts_with(&end_sequence) - }); - - // let mut content_iter = input.nest(&[], Some(end_fn)); - - // let content = content_iter.take_to_end(); - // if !content_iter.end_reached() { - // return None; - // } - - // input = content_iter.parent()?; - match input - .by_ref() - .take(start_delim_len) - .map(|s| s.kind) - .collect::>() - { - end if end == end_sequence => { - if input.peek_kind() == Some(SymbolKind::Tick) { - return None; - } - } - _ => return None, + + let end_sequence = [SymbolKind::Newline] + .into_iter() + .chain(std::iter::repeat(SymbolKind::Tick).take(start_delim_len)) + .collect::>(); + let end_sequence_len = end_sequence.len(); + let end_fn = Rc::new(move |matcher: &mut dyn EndMatcher| matcher.matches(&end_sequence)); + + let mut content_iter = input.nest(None, Some(end_fn)); + let content = content_iter.take_to_end(); + + if !content_iter.end_reached() { + return None; } + content_iter.update(input); + + input.dropping(end_sequence_len); + // TODO: handle language attribute let output = TokenizeOutput { - tokens: vec![Token::StartDelim(start_delim), Token::Content(vec![])], //content)], + tokens: vec![ + Token::StartDelim(start_delim), + Token::DataLang(data_lang), + Token::Content(content), + ], }; Some(output) @@ -90,7 +90,16 @@ impl ElementParser for Verbatim { }; let line_nr = start.get(0)?.start.line; - let Token::Content(symbols) = input.get(1)? else { + let Token::DataLang(lang_symbols) = input.get(1)? else { + return None; + }; + let data_lang = if lang_symbols.is_empty() { + None + } else { + Some(Symbol::flatten_iter(lang_symbols.iter().copied())?.to_string()) + }; + + let Token::Content(symbols) = input.get(2)? else { return None; }; let content = Symbol::flatten_iter(symbols.iter().copied())?; @@ -98,6 +107,7 @@ impl ElementParser for Verbatim { let block = Self { id: String::default(), content: String::from(content), + data_lang, attributes: None, line_nr, }; diff --git a/render/src/html/render.rs b/render/src/html/render.rs index 5d6ce7e3..291b0731 100644 --- a/render/src/html/render.rs +++ b/render/src/html/render.rs @@ -41,18 +41,6 @@ impl Renderer for HtmlRenderer { verbatim: &unimarkup_parser::elements::enclosed::Verbatim, _context: &Context, ) -> Result { - // TODO: improve handling of attributes - // let attributes = serde_json::from_str::( - // &verbatim.attributes.as_ref().cloned().unwrap_or_default(), - // ) - // .ok(); - - // let language = match attributes.as_ref() { - // Some(attrs) => attrs.language.clone().unwrap_or(PLAIN_SYNTAX.to_string()), - // None => PLAIN_SYNTAX.to_string(), - // }; - let language = "rust"; - let inner = Html::with( HtmlHead { syntax_highlighting_used: true, @@ -62,8 +50,14 @@ impl Renderer for HtmlRenderer { tag: HtmlTag::Code, attributes: HtmlAttributes::default(), content: Some( - highlight::highlight_content(&verbatim.content, language) - .unwrap_or(verbatim.content.clone()), + highlight::highlight_content( + &verbatim.content, + verbatim + .data_lang + .as_ref() + .unwrap_or(&highlight::PLAIN_SYNTAX.to_string()), + ) + .unwrap_or(verbatim.content.clone()), ), }), ); From 6c3c28e33bff13890ee993b2d0a0988a2a0256c6 Mon Sep 17 00:00:00 2001 From: Manuel Hatzl Date: Sun, 24 Sep 2023 01:29:35 +0200 Subject: [PATCH 20/43] arch: split iterator into multiple files --- .../src/scanner/symbol/iterator/matcher.rs | 91 ++++++++ .../symbol/{iterator.rs => iterator/mod.rs} | 199 +++--------------- commons/src/scanner/symbol/iterator/root.rs | 67 ++++++ 3 files changed, 184 insertions(+), 173 deletions(-) create mode 100644 commons/src/scanner/symbol/iterator/matcher.rs rename commons/src/scanner/symbol/{iterator.rs => iterator/mod.rs} (71%) create mode 100644 commons/src/scanner/symbol/iterator/root.rs diff --git a/commons/src/scanner/symbol/iterator/matcher.rs b/commons/src/scanner/symbol/iterator/matcher.rs new file mode 100644 index 00000000..19ab0212 --- /dev/null +++ b/commons/src/scanner/symbol/iterator/matcher.rs @@ -0,0 +1,91 @@ +use std::rc::Rc; + +use itertools::{Itertools, PeekingNext}; + +use crate::scanner::SymbolKind; + +use super::SymbolIterator; + +pub type IteratorEndFn = Rc bool)>; +pub type IteratorPrefixFn = Rc bool)>; + +pub trait EndMatcher { + fn is_empty_line(&mut self) -> bool; + fn consumed_is_empty_line(&mut self) -> bool; + fn matches(&mut self, sequence: &[SymbolKind]) -> bool; + fn consumed_matches(&mut self, sequence: &[SymbolKind]) -> bool; +} + +pub trait PrefixMatcher { + fn consumed_prefix(&mut self, sequence: &[SymbolKind]) -> bool; +} + +impl<'input> EndMatcher for SymbolIterator<'input> { + fn is_empty_line(&mut self) -> bool { + // Note: Multiple matches may be set in the match closure, so we need to ensure that all start at the same index + self.reset_peek(); + + let next = self + .peeking_next(|s| matches!(s.kind, SymbolKind::Blankline | SymbolKind::Newline)) + .map(|s| s.kind); + + let is_empty_line = if Some(SymbolKind::Newline) == next { + let _whitespaces = self + .peeking_take_while(|s| s.kind == SymbolKind::Whitespace) + .count(); + + let new_line = self + .peeking_next(|s| matches!(s.kind, SymbolKind::Blankline | SymbolKind::Newline)); + new_line.is_some() + } else { + Some(SymbolKind::Blankline) == next + }; + + is_empty_line + } + + fn consumed_is_empty_line(&mut self) -> bool { + let is_empty_line = self.is_empty_line(); + + if is_empty_line { + self.set_curr_index(self.peek_index()); // To consume peeked symbols + } + + is_empty_line + } + + fn matches(&mut self, sequence: &[SymbolKind]) -> bool { + // Note: Multiple matches may be set in the match closure, so we need to ensure that all start at the same index + self.reset_peek(); + + for kind in sequence { + if self.peeking_next(|s| s.kind == *kind).is_none() { + return false; + } + } + + true + } + + fn consumed_matches(&mut self, sequence: &[SymbolKind]) -> bool { + let matched = self.matches(sequence); + + if matched { + self.set_curr_index(self.peek_index()); // To consume peeked symbols + } + + matched + } +} + +impl<'input> PrefixMatcher for SymbolIterator<'input> { + fn consumed_prefix(&mut self, sequence: &[SymbolKind]) -> bool { + #[cfg(debug_assertions)] + assert!( + !sequence.contains(&SymbolKind::Newline), + "Newline symbol in prefix match is not allowed." + ); + + self.consumed_matches(sequence) + } +} diff --git a/commons/src/scanner/symbol/iterator.rs b/commons/src/scanner/symbol/iterator/mod.rs similarity index 71% rename from commons/src/scanner/symbol/iterator.rs rename to commons/src/scanner/symbol/iterator/mod.rs index 7e576105..07230c87 100644 --- a/commons/src/scanner/symbol/iterator.rs +++ b/commons/src/scanner/symbol/iterator/mod.rs @@ -1,8 +1,13 @@ -use std::{borrow::BorrowMut, rc::Rc}; +use std::borrow::BorrowMut; use super::{Symbol, SymbolKind}; +mod matcher; +mod root; + pub use itertools::*; +pub use matcher::*; +pub use root::*; #[derive(Clone)] pub struct SymbolIterator<'input> { @@ -13,169 +18,25 @@ pub struct SymbolIterator<'input> { iter_end: bool, } -#[derive(Clone)] -pub struct SymbolIteratorRoot<'input> { - symbols: &'input [Symbol<'input>], - curr_index: usize, - peek_index: usize, -} - -impl<'input> From<&'input [Symbol<'input>]> for SymbolIteratorRoot<'input> { - fn from(value: &'input [Symbol<'input>]) -> Self { - SymbolIteratorRoot { - symbols: value, - curr_index: 0, - peek_index: 0, - } - } -} - -impl<'input> From<&'input Vec>> for SymbolIteratorRoot<'input> { - fn from(value: &'input Vec>) -> Self { - SymbolIteratorRoot { - symbols: value, - curr_index: 0, - peek_index: 0, - } - } -} - -impl<'input> SymbolIteratorRoot<'input> { - fn remaining_symbols(&self) -> Option<&'input [Symbol<'input>]> { - self.symbols.get(self.curr_index..) - } -} - #[derive(Clone)] pub enum SymbolIteratorKind<'input> { Nested(Box>), Root(SymbolIteratorRoot<'input>), } -pub type IteratorEndFn = Rc bool)>; -pub type IteratorPrefixFn = Rc bool)>; - -pub trait EndMatcher { - fn is_empty_line(&mut self) -> bool; - fn consumed_is_empty_line(&mut self) -> bool; - fn matches(&mut self, sequence: &[SymbolKind]) -> bool; - fn consumed_matches(&mut self, sequence: &[SymbolKind]) -> bool; -} - -pub trait PrefixMatcher { - fn consumed_prefix(&mut self, sequence: &[SymbolKind]) -> bool; -} - -impl<'input> EndMatcher for SymbolIterator<'input> { - fn is_empty_line(&mut self) -> bool { - // Note: Multiple matches may be set in the match closure, so we need to ensure that all start at the same index - self.reset_peek(); - - let next = self - .peeking_next(|s| matches!(s.kind, SymbolKind::Blankline | SymbolKind::Newline)) - .map(|s| s.kind); - - let is_empty_line = if Some(SymbolKind::Newline) == next { - let _whitespaces = self - .peeking_take_while(|s| s.kind == SymbolKind::Whitespace) - .count(); - - let new_line = self - .peeking_next(|s| matches!(s.kind, SymbolKind::Blankline | SymbolKind::Newline)); - new_line.is_some() - } else { - Some(SymbolKind::Blankline) == next - }; - - is_empty_line - } - - fn consumed_is_empty_line(&mut self) -> bool { - let is_empty_line = self.is_empty_line(); - - if is_empty_line { - self.set_curr_index(self.peek_index()); // To consume peeked symbols - } - - is_empty_line - } - - fn matches(&mut self, sequence: &[SymbolKind]) -> bool { - // Note: Multiple matches may be set in the match closure, so we need to ensure that all start at the same index - self.reset_peek(); - - for kind in sequence { - if self.peeking_next(|s| s.kind == *kind).is_none() { - return false; - } - } - - true - } - - fn consumed_matches(&mut self, sequence: &[SymbolKind]) -> bool { - let matched = self.matches(sequence); - - if matched { - self.set_curr_index(self.peek_index()); // To consume peeked symbols - } - - matched - } -} - -impl<'input> PrefixMatcher for SymbolIterator<'input> { - fn consumed_prefix(&mut self, sequence: &[SymbolKind]) -> bool { - #[cfg(debug_assertions)] - assert!( - !sequence.contains(&SymbolKind::Newline), - "Newline symbol in prefix match is not allowed." - ); - - self.consumed_matches(sequence) - } -} - -impl<'input> From<&'input [Symbol<'input>]> for SymbolIterator<'input> { - fn from(value: &'input [Symbol<'input>]) -> Self { - SymbolIterator { - kind: SymbolIteratorKind::Root(SymbolIteratorRoot::from(value)), - start_index: 0, - prefix_match: None, - end_match: None, - iter_end: false, - } - } -} - -impl<'input> From<&'input Vec>> for SymbolIterator<'input> { - fn from(value: &'input Vec>) -> Self { - SymbolIterator { - kind: SymbolIteratorKind::Root(SymbolIteratorRoot::from(value)), - start_index: 0, - prefix_match: None, - end_match: None, - iter_end: false, - } - } -} - impl<'input> SymbolIterator<'input> { - pub fn new(symbols: &'input [Symbol<'input>], start_index: usize) -> Self { - let mut iter = SymbolIterator::from(symbols); - iter.start_index = start_index; - iter + pub fn new(symbols: &'input [Symbol<'input>]) -> Self { + SymbolIterator::from(symbols) } pub fn with( symbols: &'input [Symbol<'input>], - start_index: usize, prefix_match: Option, end: Option, ) -> Self { SymbolIterator { kind: SymbolIteratorKind::Root(SymbolIteratorRoot::from(symbols)), - start_index, + start_index: 0, prefix_match, end_match: end, iter_end: false, @@ -300,34 +161,27 @@ impl<'input> SymbolIterator<'input> { } } -impl<'input> Iterator for SymbolIteratorRoot<'input> { - type Item = &'input Symbol<'input>; - - fn next(&mut self) -> Option { - let symbol = self.symbols.get(self.curr_index)?; - - self.curr_index += 1; - self.peek_index = self.curr_index; - - Some(symbol) +impl<'input> From<&'input [Symbol<'input>]> for SymbolIterator<'input> { + fn from(value: &'input [Symbol<'input>]) -> Self { + SymbolIterator { + kind: SymbolIteratorKind::Root(SymbolIteratorRoot::from(value)), + start_index: 0, + prefix_match: None, + end_match: None, + iter_end: false, + } } } -impl<'input> PeekingNext for SymbolIteratorRoot<'input> { - fn peeking_next(&mut self, accept: F) -> Option - where - Self: Sized, - F: FnOnce(&Self::Item) -> bool, - { - let symbol = self.symbols.get(self.peek_index)?; - - if !(accept)(&symbol) { - return None; +impl<'input> From<&'input Vec>> for SymbolIterator<'input> { + fn from(value: &'input Vec>) -> Self { + SymbolIterator { + kind: SymbolIteratorKind::Root(SymbolIteratorRoot::from(value)), + start_index: 0, + prefix_match: None, + end_match: None, + iter_end: false, } - - self.peek_index += 1; - - Some(symbol) } } @@ -495,7 +349,6 @@ mod test { let iterator = SymbolIterator::with( &symbols, - 0, Some(Rc::new(|matcher: &mut dyn PrefixMatcher| { matcher.consumed_prefix(&[SymbolKind::Star, SymbolKind::Whitespace]) })), diff --git a/commons/src/scanner/symbol/iterator/root.rs b/commons/src/scanner/symbol/iterator/root.rs new file mode 100644 index 00000000..49dd8ce2 --- /dev/null +++ b/commons/src/scanner/symbol/iterator/root.rs @@ -0,0 +1,67 @@ +use itertools::PeekingNext; + +use crate::scanner::Symbol; + +#[derive(Clone)] +pub struct SymbolIteratorRoot<'input> { + symbols: &'input [Symbol<'input>], + pub(super) curr_index: usize, + pub(super) peek_index: usize, +} + +impl<'input> SymbolIteratorRoot<'input> { + pub(super) fn remaining_symbols(&self) -> Option<&'input [Symbol<'input>]> { + self.symbols.get(self.curr_index..) + } +} + +impl<'input> From<&'input [Symbol<'input>]> for SymbolIteratorRoot<'input> { + fn from(value: &'input [Symbol<'input>]) -> Self { + SymbolIteratorRoot { + symbols: value, + curr_index: 0, + peek_index: 0, + } + } +} + +impl<'input> From<&'input Vec>> for SymbolIteratorRoot<'input> { + fn from(value: &'input Vec>) -> Self { + SymbolIteratorRoot { + symbols: value, + curr_index: 0, + peek_index: 0, + } + } +} + +impl<'input> Iterator for SymbolIteratorRoot<'input> { + type Item = &'input Symbol<'input>; + + fn next(&mut self) -> Option { + let symbol = self.symbols.get(self.curr_index)?; + + self.curr_index += 1; + self.peek_index = self.curr_index; + + Some(symbol) + } +} + +impl<'input> PeekingNext for SymbolIteratorRoot<'input> { + fn peeking_next(&mut self, accept: F) -> Option + where + Self: Sized, + F: FnOnce(&Self::Item) -> bool, + { + let symbol = self.symbols.get(self.peek_index)?; + + if !(accept)(&symbol) { + return None; + } + + self.peek_index += 1; + + Some(symbol) + } +} From ee317d2102aae100467e2f2bacf55a8fbf798c52 Mon Sep 17 00:00:00 2001 From: Manuel Hatzl Date: Sun, 24 Sep 2023 03:32:27 +0200 Subject: [PATCH 21/43] fix: add documentation for the symbol iterator --- .../src/scanner/symbol/iterator/matcher.rs | 35 ++++++++++ commons/src/scanner/symbol/iterator/mod.rs | 70 +++++++++++++++++-- commons/src/scanner/symbol/iterator/root.rs | 8 +++ 3 files changed, 108 insertions(+), 5 deletions(-) diff --git a/commons/src/scanner/symbol/iterator/matcher.rs b/commons/src/scanner/symbol/iterator/matcher.rs index 19ab0212..56eefab2 100644 --- a/commons/src/scanner/symbol/iterator/matcher.rs +++ b/commons/src/scanner/symbol/iterator/matcher.rs @@ -1,3 +1,6 @@ +//! Contains matcher traits and types used to detect iterator end and strip prefixes. +//! The available matcher traits are implemented for [`SymbolIterator`]. + use std::rc::Rc; use itertools::{Itertools, PeekingNext}; @@ -6,17 +9,49 @@ use crate::scanner::SymbolKind; use super::SymbolIterator; +/// Function type to notify an iterator if an end was reached. pub type IteratorEndFn = Rc bool)>; +/// Function type to consume prefix sequences of a new line. pub type IteratorPrefixFn = Rc bool)>; +/// Trait containing functions that are available inside the end matcher function. pub trait EndMatcher { + /// Returns `true` if the upcoming [`Symbol`] sequence is an empty line. + /// Meaning that a line contains no [`Symbol`] or only [`SymbolKind::Whitespace`]. + /// + /// **Note:** This is also `true` if a parent iterator stripped non-whitespace symbols, and the nested iterator only has whitespace symbols. + /// + /// [`Symbol`]: super::Symbol fn is_empty_line(&mut self) -> bool; + + /// Wrapper around [`Self::is_empty_line()`] that additionally consumes the matched empty line. + /// Consuming means the related iterator advances over the matched empty line. + /// + /// **Note:** The iterator is only advanced if an empty line is matched. fn consumed_is_empty_line(&mut self) -> bool; + + /// Returns `true` if the given [`Symbol`] sequence matches the upcoming one. + /// + /// [`Symbol`]: super::Symbol fn matches(&mut self, sequence: &[SymbolKind]) -> bool; + + /// Wrapper around [`Self::matches()`] that additionally consumes the matched sequence. + /// Consuming means the related iterator advances over the matched sequence. + /// + /// **Note:** The iterator is only advanced if the sequence is matched. fn consumed_matches(&mut self, sequence: &[SymbolKind]) -> bool; } +/// Trait containing functions that are available inside the prefix matcher function. pub trait PrefixMatcher { + /// Consumes and returns `true` if the given [`Symbol`] sequence matches the upcoming one. + /// Consuming means the related iterator advances over the matched sequence. + /// + /// **Note:** The iterator is only advanced if the sequence is matched. + /// + /// **Note:** The given sequence must **not** include any [`SymbolKind::Newline`], because matches are only considered per line. + /// + /// [`Symbol`]: super::Symbol fn consumed_prefix(&mut self, sequence: &[SymbolKind]) -> bool; } diff --git a/commons/src/scanner/symbol/iterator/mod.rs b/commons/src/scanner/symbol/iterator/mod.rs index 07230c87..5c9012c1 100644 --- a/commons/src/scanner/symbol/iterator/mod.rs +++ b/commons/src/scanner/symbol/iterator/mod.rs @@ -1,3 +1,6 @@ +//! Contains the [`SymbolIterator`], and all related functionality +//! that is used to step through the [`Symbol`]s retrieved from the [`Scanner`](crate::scanner::Scanner). + use std::borrow::BorrowMut; use super::{Symbol, SymbolKind}; @@ -9,52 +12,84 @@ pub use itertools::*; pub use matcher::*; pub use root::*; +/// The [`SymbolIterator`] provides an iterator over [`Symbol`]s. +/// It allows to add matcher functions to notify the iterator, +/// when an end of an element is reached, or what prefixes to strip on a new line. +/// Additionaly, the iterator may be nested to enable transparent iterating for nested elements. #[derive(Clone)] pub struct SymbolIterator<'input> { + /// The [`SymbolIteratorKind`] of this iterator. kind: SymbolIteratorKind<'input>, + /// The index inside the [`Symbol`]s of the root iterator. start_index: usize, + /// Optional matching function that is used to automatically skip matched prefixes after a new line. prefix_match: Option, + /// Optional matching function that is used to indicate the end of this iterator. end_match: Option, + /// Flag set to `true` if this iterator reached its end. iter_end: bool, } +/// The [`SymbolIteratorKind`] defines the kind of a [`SymbolIterator`]. +/// +/// **Note:** This enables iterator nesting. #[derive(Clone)] pub enum SymbolIteratorKind<'input> { + /// Defines an iterator as being nested. + /// The contained iterator is the parent iterator. Nested(Box>), + /// Defines an iterator as being root. Root(SymbolIteratorRoot<'input>), } impl<'input> SymbolIterator<'input> { + /// Creates a new [`SymbolIterator`] from the given [`Symbol`] slice. + /// This iterator is created without matching functions. pub fn new(symbols: &'input [Symbol<'input>]) -> Self { SymbolIterator::from(symbols) } + /// Creates a new [`SymbolIterator`] from the given [`Symbol`] slice, + /// and the given matching functions. + /// + /// # Arguments + /// + /// * `symbols` ... [`Symbol`] slice to iterate over + /// * `prefix_match` ... Optional matching function used to strip prefix on new lines + /// * `end_match` ... Optional matching function used to indicate the end of the created iterator pub fn with( symbols: &'input [Symbol<'input>], prefix_match: Option, - end: Option, + end_match: Option, ) -> Self { SymbolIterator { kind: SymbolIteratorKind::Root(SymbolIteratorRoot::from(symbols)), start_index: 0, prefix_match, - end_match: end, + end_match, iter_end: false, } } + /// Returns the length of the remaining [`Symbol`]s this iterator might return. + /// + /// **Note:** This length does not consider parent iterators, or matching functions. + /// Therefore, the returned number of [`Symbol`]s might differ, but cannot be larger than this length. pub fn len(&self) -> usize { self.remaining_symbols().unwrap_or(&[]).len() } + /// Returns `true` if no more [`Symbol`]s are available. pub fn is_empty(&self) -> bool { self.len() == 0 } + /// Returns the index this iterator was started from the [`Symbol`] slice of the root iterator. pub fn start_index(&self) -> usize { self.start_index } + /// Returns the current index this iterator is in the [`Symbol`] slice of the root iterator. pub fn curr_index(&self) -> usize { match &self.kind { SymbolIteratorKind::Nested(parent) => parent.curr_index(), @@ -62,6 +97,7 @@ impl<'input> SymbolIterator<'input> { } } + /// Sets the current index of this iterator to the given index. pub fn set_curr_index(&mut self, index: usize) { if index >= self.start_index { match self.kind.borrow_mut() { @@ -74,6 +110,7 @@ impl<'input> SymbolIterator<'input> { } } + /// Returns the index used to peek. fn peek_index(&self) -> usize { match &self.kind { SymbolIteratorKind::Nested(parent) => parent.peek_index(), @@ -81,6 +118,7 @@ impl<'input> SymbolIterator<'input> { } } + /// Sets the peek index of this iterator to the given index. pub fn set_peek_index(&mut self, index: usize) { if index >= self.curr_index() { match self.kind.borrow_mut() { @@ -92,10 +130,16 @@ impl<'input> SymbolIterator<'input> { } } + /// Resets peek to get `peek() == next()`. + /// + /// **Note:** Needed to reset peek index after using `peeking_next()`. pub fn reset_peek(&mut self) { self.set_peek_index(self.curr_index()); } + /// Returns the maximal remaining symbols in this iterator. + /// + /// **Note:** Similar to `len()`, this does not consider parent iterators and matching functions. pub fn remaining_symbols(&self) -> Option<&'input [Symbol<'input>]> { match &self.kind { SymbolIteratorKind::Nested(parent) => parent.remaining_symbols(), @@ -103,30 +147,44 @@ impl<'input> SymbolIterator<'input> { } } + /// Returns the next [`Symbol`] without changing the current index. pub fn peek(&mut self) -> Option<&'input Symbol<'input>> { let symbol = self.peeking_next(|_| true); self.reset_peek(); // Note: Resetting index, because peek() must be idempotent symbol } + /// Returns the [`SymbolKind`] of the peeked [`Symbol`]. pub fn peek_kind(&mut self) -> Option { self.peek().map(|s| s.kind) } + /// Nests this iterator, by creating a new iterator that has this iterator set as parent. + /// + /// **Note:** Any change in this iterator is **not** propagated to the nested iterator. + /// See [`Self::update()`] on how to synchronize this iterator with the nested one. + /// + /// # Arguments + /// + /// * `prefix_match` ... Optional matching function used to strip prefix on new lines + /// * `end_match` ... Optional matching function used to indicate the end of the created iterator pub fn nest( &self, prefix_match: Option, - end: Option, + end_match: Option, ) -> SymbolIterator<'input> { SymbolIterator { kind: SymbolIteratorKind::Nested(Box::new(self.clone())), start_index: self.curr_index(), prefix_match, - end_match: end, + end_match, iter_end: self.iter_end, } } + /// Updates the given parent iterator to take the progress of the nested iterator. + /// + /// **Note:** Only updates the parent if `self` is nested. pub fn update(self, parent: &mut Self) { if let SymbolIteratorKind::Nested(self_parent) = self.kind { *parent = *self_parent; @@ -136,8 +194,9 @@ impl<'input> SymbolIterator<'input> { /// Tries to skip symbols until one of the end functions signals the end. /// /// **Note:** This function might not reach the iterator end. + /// /// If no symbols are left, or no given line prefix is matched, the iterator may stop before an end is reached. - /// Use `end_reached()` to check if the end was actually reached. + /// Use [`Self::end_reached()`] to check if the end was actually reached. pub fn skip_to_end(mut self) -> Self { let _last_symbol = self.by_ref().last(); @@ -156,6 +215,7 @@ impl<'input> SymbolIterator<'input> { symbols } + /// Returns `true` if this iterator has reached its end. pub fn end_reached(&self) -> bool { self.iter_end } diff --git a/commons/src/scanner/symbol/iterator/root.rs b/commons/src/scanner/symbol/iterator/root.rs index 49dd8ce2..3c8ee6aa 100644 --- a/commons/src/scanner/symbol/iterator/root.rs +++ b/commons/src/scanner/symbol/iterator/root.rs @@ -1,15 +1,23 @@ +//! Contains the [`SymbolIteratorRoot`] that is the root iterator in any [`SymbolIterator`](super::SymbolIterator). + use itertools::PeekingNext; use crate::scanner::Symbol; +/// The [`SymbolIteratorRoot`] is the root iterator in any [`SymbolIterator`](super::SymbolIterator). +/// It holds the actual [`Symbol`] slice. #[derive(Clone)] pub struct SymbolIteratorRoot<'input> { + /// The [`Symbol`] slice the iterator was created for. symbols: &'input [Symbol<'input>], + /// The current index of the iterator inside the [`Symbol`] slice. pub(super) curr_index: usize, + /// The peek index of the iterator inside the [`Symbol`] slice. pub(super) peek_index: usize, } impl<'input> SymbolIteratorRoot<'input> { + /// Returns the remaining symbols in this iterator, or `None` if there are no symbols left.. pub(super) fn remaining_symbols(&self) -> Option<&'input [Symbol<'input>]> { self.symbols.get(self.curr_index..) } From 0d2c225c80ac7bc6b1034109c11a017919a9fba1 Mon Sep 17 00:00:00 2001 From: Manuel Hatzl Date: Sun, 24 Sep 2023 14:45:17 +0200 Subject: [PATCH 22/43] feat: add nesting depth to symbol iterator --- .../src/scanner/symbol/iterator/matcher.rs | 15 ++++ commons/src/scanner/symbol/iterator/mod.rs | 86 +++++++++++++++++-- commons/src/scanner/symbol/iterator/root.rs | 5 ++ parser/src/parser.rs | 6 +- 4 files changed, 102 insertions(+), 10 deletions(-) diff --git a/commons/src/scanner/symbol/iterator/matcher.rs b/commons/src/scanner/symbol/iterator/matcher.rs index 56eefab2..12fedf52 100644 --- a/commons/src/scanner/symbol/iterator/matcher.rs +++ b/commons/src/scanner/symbol/iterator/matcher.rs @@ -28,6 +28,8 @@ pub trait EndMatcher { /// Consuming means the related iterator advances over the matched empty line. /// /// **Note:** The iterator is only advanced if an empty line is matched. + /// + /// **Note:** The empty line is **not** included in the symbols returned by [`SymbolIterator::take_to_end()`]. fn consumed_is_empty_line(&mut self) -> bool; /// Returns `true` if the given [`Symbol`] sequence matches the upcoming one. @@ -39,7 +41,14 @@ pub trait EndMatcher { /// Consuming means the related iterator advances over the matched sequence. /// /// **Note:** The iterator is only advanced if the sequence is matched. + /// + /// **Note:** The matched sequence is **not** included in the symbols returned by [`SymbolIterator::take_to_end()`]. fn consumed_matches(&mut self, sequence: &[SymbolKind]) -> bool; + + /// Returns `true` if the iterator is at the given nesting depth. + /// + /// **Note** Use [`SymbolIterator::curr_depth()`] to get the current depth of an iterator. + fn at_depth(&self, depth: usize) -> bool; } /// Trait containing functions that are available inside the prefix matcher function. @@ -51,6 +60,8 @@ pub trait PrefixMatcher { /// /// **Note:** The given sequence must **not** include any [`SymbolKind::Newline`], because matches are only considered per line. /// + /// **Note:** The matched sequence is **not** included in the symbols returned by [`SymbolIterator::take_to_end()`]. + /// /// [`Symbol`]: super::Symbol fn consumed_prefix(&mut self, sequence: &[SymbolKind]) -> bool; } @@ -111,6 +122,10 @@ impl<'input> EndMatcher for SymbolIterator<'input> { matched } + + fn at_depth(&self, depth: usize) -> bool { + self.curr_depth() == depth + } } impl<'input> PrefixMatcher for SymbolIterator<'input> { diff --git a/commons/src/scanner/symbol/iterator/mod.rs b/commons/src/scanner/symbol/iterator/mod.rs index 5c9012c1..661eb736 100644 --- a/commons/src/scanner/symbol/iterator/mod.rs +++ b/commons/src/scanner/symbol/iterator/mod.rs @@ -22,6 +22,8 @@ pub struct SymbolIterator<'input> { kind: SymbolIteratorKind<'input>, /// The index inside the [`Symbol`]s of the root iterator. start_index: usize, + /// The nesting depth of this iterator, starting at 0 for the root iterator. + depth: usize, /// Optional matching function that is used to automatically skip matched prefixes after a new line. prefix_match: Option, /// Optional matching function that is used to indicate the end of this iterator. @@ -64,6 +66,7 @@ impl<'input> SymbolIterator<'input> { ) -> Self { SymbolIterator { kind: SymbolIteratorKind::Root(SymbolIteratorRoot::from(symbols)), + depth: 0, start_index: 0, prefix_match, end_match, @@ -71,17 +74,17 @@ impl<'input> SymbolIterator<'input> { } } - /// Returns the length of the remaining [`Symbol`]s this iterator might return. + /// Returns the maximum length of the remaining [`Symbol`]s this iterator might return. /// /// **Note:** This length does not consider parent iterators, or matching functions. /// Therefore, the returned number of [`Symbol`]s might differ, but cannot be larger than this length. - pub fn len(&self) -> usize { - self.remaining_symbols().unwrap_or(&[]).len() + pub fn max_len(&self) -> usize { + self.max_remaining_symbols().unwrap_or(&[]).len() } /// Returns `true` if no more [`Symbol`]s are available. pub fn is_empty(&self) -> bool { - self.len() == 0 + self.max_remaining_symbols().unwrap_or(&[]).is_empty() } /// Returns the index this iterator was started from the [`Symbol`] slice of the root iterator. @@ -89,6 +92,12 @@ impl<'input> SymbolIterator<'input> { self.start_index } + /// The current nested depth this iterator is at. + /// The root iterator starts at 0, and every iterator created using [`Self::nest()`] is one depth higher than its parent. + pub fn curr_depth(&self) -> usize { + self.depth + } + /// Returns the current index this iterator is in the [`Symbol`] slice of the root iterator. pub fn curr_index(&self) -> usize { match &self.kind { @@ -139,10 +148,12 @@ impl<'input> SymbolIterator<'input> { /// Returns the maximal remaining symbols in this iterator. /// - /// **Note:** Similar to `len()`, this does not consider parent iterators and matching functions. - pub fn remaining_symbols(&self) -> Option<&'input [Symbol<'input>]> { + /// **Note:** This slice does not consider parent iterators, or matching functions. + /// Therefore, the returned [`Symbol`] slice might differ from the symbols returned by calling [`Self::next()`], + /// but [`Self::next()`] cannot return more symbols than those inside the returned slice. + pub fn max_remaining_symbols(&self) -> Option<&'input [Symbol<'input>]> { match &self.kind { - SymbolIteratorKind::Nested(parent) => parent.remaining_symbols(), + SymbolIteratorKind::Nested(parent) => parent.max_remaining_symbols(), SymbolIteratorKind::Root(root) => root.remaining_symbols(), } } @@ -176,6 +187,7 @@ impl<'input> SymbolIterator<'input> { SymbolIterator { kind: SymbolIteratorKind::Nested(Box::new(self.clone())), start_index: self.curr_index(), + depth: self.depth + 1, prefix_match, end_match, iter_end: self.iter_end, @@ -226,6 +238,7 @@ impl<'input> From<&'input [Symbol<'input>]> for SymbolIterator<'input> { SymbolIterator { kind: SymbolIteratorKind::Root(SymbolIteratorRoot::from(value)), start_index: 0, + depth: 0, prefix_match: None, end_match: None, iter_end: false, @@ -238,6 +251,7 @@ impl<'input> From<&'input Vec>> for SymbolIterator<'input> { SymbolIterator { kind: SymbolIteratorKind::Root(SymbolIteratorRoot::from(value)), start_index: 0, + depth: 0, prefix_match: None, end_match: None, iter_end: false, @@ -279,6 +293,10 @@ impl<'input> Iterator for SymbolIterator<'input> { curr_symbol_opt } + + fn size_hint(&self) -> (usize, Option) { + (0, Some(self.max_len())) + } } impl<'input> PeekingNext for SymbolIterator<'input> { @@ -434,4 +452,58 @@ mod test { "Prefix symbols not correctly skipped" ); } + + #[test] + fn depth_matcher() { + let symbols = Scanner::try_new() + .expect("Must be valid provider.") + .scan_str("[o [i]]"); + + let mut iterator = SymbolIterator::with( + &symbols, + None, + Some(Rc::new(|matcher| { + if matcher.at_depth(0) { + matcher.consumed_matches(&[SymbolKind::CloseBracket]) + } else { + false + } + })), + ); + + iterator = iterator.dropping(1); // To skip first open bracket + let mut taken_outer = iterator + .by_ref() + // Note: This will skip the open bracket for both iterators, but this is ok for this test + .take_while(|s| s.kind != SymbolKind::OpenBracket) + .collect::>(); + + let mut inner_iter = iterator.nest( + None, + Some(Rc::new(|matcher| { + if matcher.at_depth(1) { + matcher.consumed_matches(&[SymbolKind::CloseBracket]) + } else { + false + } + })), + ); + + let taken_inner = inner_iter.take_to_end(); + inner_iter.update(&mut iterator); + + taken_outer.extend(iterator.take_to_end().iter()); + + assert!(iterator.end_reached(), "Iterator end was not reached."); + assert_eq!( + taken_inner.iter().map(|s| s.as_str()).collect::>(), + vec!["i"], + "Inner symbols are incorrect." + ); + assert_eq!( + taken_outer.iter().map(|s| s.as_str()).collect::>(), + vec!["o", " ",], + "Outer symbols are incorrect." + ); + } } diff --git a/commons/src/scanner/symbol/iterator/root.rs b/commons/src/scanner/symbol/iterator/root.rs index 3c8ee6aa..f55d8efa 100644 --- a/commons/src/scanner/symbol/iterator/root.rs +++ b/commons/src/scanner/symbol/iterator/root.rs @@ -54,6 +54,11 @@ impl<'input> Iterator for SymbolIteratorRoot<'input> { Some(symbol) } + + fn size_hint(&self) -> (usize, Option) { + let len = self.remaining_symbols().unwrap_or(&[]).len(); + (len, Some(len)) + } } impl<'input> PeekingNext for SymbolIteratorRoot<'input> { diff --git a/parser/src/parser.rs b/parser/src/parser.rs index b6868d33..c558dc01 100644 --- a/parser/src/parser.rs +++ b/parser/src/parser.rs @@ -103,7 +103,7 @@ impl MainParser { let mut blocks = Vec::default(); #[cfg(debug_assertions)] - let mut curr_len = input.len(); + let mut curr_len = input.max_len(); 'outer: while let Some(kind) = input.peek_kind() { match kind { @@ -145,10 +145,10 @@ impl MainParser { #[cfg(debug_assertions)] { assert!( - input.len() < curr_len, + input.max_len() < curr_len, "Parser consumed no symbol in iteration." ); - curr_len = input.len(); + curr_len = input.max_len(); } } From dd903f51c0cd03761dfb4860c4e76ce379ad007e Mon Sep 17 00:00:00 2001 From: Manuel Hatzl Date: Sun, 24 Sep 2023 15:43:27 +0200 Subject: [PATCH 23/43] fix: add EOI symbol to match end as empty line --- commons/src/scanner/mod.rs | 11 +++++++++++ .../src/scanner/symbol/iterator/matcher.rs | 17 +++++++++++++---- commons/src/scanner/symbol/iterator/mod.rs | 9 +++++++-- parser/src/elements/enclosed/verbatim.rs | 19 +++++++++++++++---- 4 files changed, 46 insertions(+), 10 deletions(-) diff --git a/commons/src/scanner/mod.rs b/commons/src/scanner/mod.rs index 604f0461..43b8a352 100644 --- a/commons/src/scanner/mod.rs +++ b/commons/src/scanner/mod.rs @@ -91,6 +91,17 @@ impl Scanner { prev_offset = offset; } + symbols.push(Symbol { + input, + kind: SymbolKind::EOI, + offset: Offset { + start: prev_offset, + end: prev_offset, + }, + start: curr_pos, + end: curr_pos, + }); + // last offset not needed, because break at EOI is always available symbols } diff --git a/commons/src/scanner/symbol/iterator/matcher.rs b/commons/src/scanner/symbol/iterator/matcher.rs index 12fedf52..52e75926 100644 --- a/commons/src/scanner/symbol/iterator/matcher.rs +++ b/commons/src/scanner/symbol/iterator/matcher.rs @@ -72,7 +72,12 @@ impl<'input> EndMatcher for SymbolIterator<'input> { self.reset_peek(); let next = self - .peeking_next(|s| matches!(s.kind, SymbolKind::Blankline | SymbolKind::Newline)) + .peeking_next(|s| { + matches!( + s.kind, + SymbolKind::Newline | SymbolKind::Blankline | SymbolKind::EOI + ) + }) .map(|s| s.kind); let is_empty_line = if Some(SymbolKind::Newline) == next { @@ -80,11 +85,15 @@ impl<'input> EndMatcher for SymbolIterator<'input> { .peeking_take_while(|s| s.kind == SymbolKind::Whitespace) .count(); - let new_line = self - .peeking_next(|s| matches!(s.kind, SymbolKind::Blankline | SymbolKind::Newline)); + let new_line = self.peeking_next(|s| { + matches!( + s.kind, + SymbolKind::Newline | SymbolKind::Blankline | SymbolKind::EOI + ) + }); new_line.is_some() } else { - Some(SymbolKind::Blankline) == next + next.is_some() }; is_empty_line diff --git a/commons/src/scanner/symbol/iterator/mod.rs b/commons/src/scanner/symbol/iterator/mod.rs index 661eb736..70ff5463 100644 --- a/commons/src/scanner/symbol/iterator/mod.rs +++ b/commons/src/scanner/symbol/iterator/mod.rs @@ -348,7 +348,7 @@ mod test { "Whitespace after hash symbols was not detected." ); assert!( - iterator.next().is_none(), + iterator.next().unwrap().kind == SymbolKind::EOI, "Input end reached, but new symbol was returned." ); } @@ -448,7 +448,12 @@ mod test { assert_eq!( sym_kinds, - vec![SymbolKind::Plain, SymbolKind::Newline, SymbolKind::Plain], + vec![ + SymbolKind::Plain, + SymbolKind::Newline, + SymbolKind::Plain, + SymbolKind::EOI + ], "Prefix symbols not correctly skipped" ); } diff --git a/parser/src/elements/enclosed/verbatim.rs b/parser/src/elements/enclosed/verbatim.rs index 0f290570..44976f36 100644 --- a/parser/src/elements/enclosed/verbatim.rs +++ b/parser/src/elements/enclosed/verbatim.rs @@ -57,8 +57,16 @@ impl ElementParser for Verbatim { .into_iter() .chain(std::iter::repeat(SymbolKind::Tick).take(start_delim_len)) .collect::>(); - let end_sequence_len = end_sequence.len(); - let end_fn = Rc::new(move |matcher: &mut dyn EndMatcher| matcher.matches(&end_sequence)); + let mut longer_delim_sequence = end_sequence.clone(); + longer_delim_sequence.push(SymbolKind::Tick); + + let end_fn = Rc::new(move |matcher: &mut dyn EndMatcher| { + if !matcher.matches(&longer_delim_sequence) { + matcher.consumed_matches(&end_sequence) + } else { + false + } + }); let mut content_iter = input.nest(None, Some(end_fn)); let content = content_iter.take_to_end(); @@ -69,10 +77,13 @@ impl ElementParser for Verbatim { content_iter.update(input); - input.dropping(end_sequence_len); - // TODO: handle language attribute + // ensures empty line after block + if !input.consumed_is_empty_line() { + return None; + } + let output = TokenizeOutput { tokens: vec![ Token::StartDelim(start_delim), From b74c08928080b1c4149b9a66122a6625a04f7b1c Mon Sep 17 00:00:00 2001 From: Manuel Hatzl Date: Sun, 24 Sep 2023 15:46:41 +0200 Subject: [PATCH 24/43] fix: remove EOI symbol for lexer tests --- inline/tests/lexer/mod.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/inline/tests/lexer/mod.rs b/inline/tests/lexer/mod.rs index c95cba30..59861578 100644 --- a/inline/tests/lexer/mod.rs +++ b/inline/tests/lexer/mod.rs @@ -31,7 +31,8 @@ pub fn test_lexer_snapshots() -> Vec { } fn run_test_case(case: crate::TestCase) { - let symbols = test_runner::scan_str(&case.input); + let mut symbols = test_runner::scan_str(&case.input); + symbols.pop(); // Remove EOI symbol. TODO: handle EOI in lexer let runner = SnapTestRunner::with_fn(&case.name, &symbols, |symbols| { Snapshot::snap((case.input.as_ref(), symbols.tokens())) }) From 45f4a1f162fbf8a3dd3d189041528f2b63e0463d Mon Sep 17 00:00:00 2001 From: Manuel Hatzl Date: Sun, 24 Sep 2023 16:57:04 +0200 Subject: [PATCH 25/43] fix: pin zerovec crate to specific version --- commons/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commons/Cargo.toml b/commons/Cargo.toml index 25c98e1a..06f72631 100644 --- a/commons/Cargo.toml +++ b/commons/Cargo.toml @@ -20,7 +20,7 @@ serde_json.workspace = true serde_yaml.workspace = true once_cell = { workspace = true, optional = true } icu = "=1.2.0" -zerovec = "0.9.4" +zerovec = "=0.9.4" icu_provider = "=1.2.0" icu_provider_adapters = "=1.2.0" regex = { version = "1.8.1", optional = true } From 84875387b507935442732e7ed9bc5d34ea526180 Mon Sep 17 00:00:00 2001 From: Manuel Hatzl Date: Sun, 24 Sep 2023 18:10:04 +0200 Subject: [PATCH 26/43] fix: resolve icu dependency problems check in lock file to prevent this in the future --- .gitignore | 4 - Cargo.lock | 1839 ++++++++++++++++++++++++++++++++++++++++++++ commons/Cargo.toml | 5 + 3 files changed, 1844 insertions(+), 4 deletions(-) create mode 100644 Cargo.lock diff --git a/.gitignore b/.gitignore index 4a3a6617..26c6c44f 100644 --- a/.gitignore +++ b/.gitignore @@ -2,10 +2,6 @@ # will have compiled files and executables /target/ -# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries -# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html -Cargo.lock - # These are backup files generated by rustfmt **/*.rs.bk diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 00000000..3c55c42a --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,1839 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "aho-corasick" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea5d730647d4fadd988536d06fecce94b7b4f2a7efdae548f1cf4b63205518ab" +dependencies = [ + "memchr", +] + +[[package]] +name = "anstream" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f58811cfac344940f1a400b6e6231ce35171f614f26439e80f8c1465c5cc0c" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b84bf0a05bbb2a83e5eb6fa36bb6e87baa08193c35ff52bbf6b38d8af2890e46" + +[[package]] +name = "anstyle-parse" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "938874ff5980b03a87c5524b3ae5b59cf99b1d6bc836848df7bc5ada9643c333" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b" +dependencies = [ + "windows-sys 0.48.0", +] + +[[package]] +name = "anstyle-wincon" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "58f54d10c6dfa51283a066ceab3ec1ab78d13fae00aa49243a45e4571fb79dfd" +dependencies = [ + "anstyle", + "windows-sys 0.48.0", +] + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "base64" +version = "0.21.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ba43ea6f343b788c8764558649e08df62f86c6ef251fdaeb1ffd010a9ae50a2" + +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4682ae6287fcf752ecaabbfcc7b6f9b72aa33933dc23a554d853aea8eea8635" + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "cc" +version = "1.0.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" +dependencies = [ + "libc", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "clap" +version = "4.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1d7b8d5ec32af0fadc644bf1fd509a688c2103b185644bb1e29d164e0703136" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5179bb514e4d7c2051749d8fcefa2ed6d06a9f4e6d69faf3805f5d80b8cf8d56" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0862016ff20d69b84ef8247369fabf5c008a7417002411897d40ee1f4532b873" +dependencies = [ + "heck 0.4.1", + "proc-macro2", + "quote", + "syn 2.0.37", +] + +[[package]] +name = "clap_lex" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd7cc57abe963c6d3b9d8be5b06ba7c8957a930305ca90304f24ef040aa6f961" + +[[package]] +name = "colorchoice" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" + +[[package]] +name = "colored" +version = "2.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2674ec482fbc38012cf31e6c42ba0177b431a0cb6f15fe40efa5aab1bda516f6" +dependencies = [ + "is-terminal", + "lazy_static", + "windows-sys 0.48.0", +] + +[[package]] +name = "console" +version = "0.15.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c926e00cc70edefdc64d3a5ff31cc65bb97a3460097762bd23afb4d8145fccf8" +dependencies = [ + "encode_unicode", + "lazy_static", + "libc", + "windows-sys 0.45.0", +] + +[[package]] +name = "cpufeatures" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a17b76ff3a4162b0b27f354a0c87015ddad39d35f9c0c36607a3bdd175dde1f1" +dependencies = [ + "libc", +] + +[[package]] +name = "crc32fast" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "deranged" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2696e8a945f658fd14dc3b87242e6b80cd0f36ff04ea560fa39082368847946" + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + +[[package]] +name = "displaydoc" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "487585f4d0c6655fe74905e2504d8ad6908e4db67f744eb140876906c2f3175d" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.37", +] + +[[package]] +name = "either" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" + +[[package]] +name = "encode_unicode" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" + +[[package]] +name = "errno" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "136526188508e25c6fef639d7927dfb3e0e3084488bf202267829cf7fc23dbdd" +dependencies = [ + "errno-dragonfly", + "libc", + "windows-sys 0.48.0", +] + +[[package]] +name = "errno-dragonfly" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "evident" +version = "0.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3d4868bb797867d4017c78b2ff622d69925c8c79298fc80064e8f9b7af4414c" +dependencies = [ + "once_cell", + "uuid", +] + +[[package]] +name = "fixed_decimal" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5287d527037d0f35c8801880361eb38bb9bce194805350052c2a79538388faeb" +dependencies = [ + "displaydoc", + "smallvec", + "writeable", +] + +[[package]] +name = "flate2" +version = "1.0.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6c98ee8095e9d1dcbf2fcc6d95acccb90d1c81db1e44725c6a984b1dbdfb010" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "form_urlencoded" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a62bc1cf6f830c2ec14a513a9fb124d0a213a629668a4186f329db21fe045652" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" + +[[package]] +name = "heck" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" +dependencies = [ + "unicode-segmentation", +] + +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + +[[package]] +name = "hermit-abi" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7" + +[[package]] +name = "icu" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c200640729e9f7cf6a7a1140dbf797121b70e8f2d1f347090aff8394f7e8f863" +dependencies = [ + "icu_calendar", + "icu_collator", + "icu_collections", + "icu_datetime", + "icu_decimal", + "icu_list", + "icu_locid", + "icu_locid_transform", + "icu_normalizer", + "icu_plurals", + "icu_properties", + "icu_provider", + "icu_segmenter", + "icu_timezone", +] + +[[package]] +name = "icu_calendar" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dee1e8c25ed44743d03e2d58ca1c0226786dc1aac1f9cb27485e2da2de5e0918" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_provider", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_collator" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "088882827079b243dc01883f92290dd3952b656faddc7a2972e6d3ab47e1fc7a" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locid", + "icu_normalizer", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "zerovec", +] + +[[package]] +name = "icu_collections" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef8302d8dfd6044d3ddb3f807a5ef3d7bbca9a574959c6d6e4dc39aa7012d0d5" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_datetime" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72d61014bb8604505baa84ed522aa039951bd81177828d165e80ea8a0543c8a7" +dependencies = [ + "displaydoc", + "either", + "fixed_decimal", + "icu_calendar", + "icu_decimal", + "icu_locid", + "icu_plurals", + "icu_provider", + "icu_timezone", + "smallvec", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_decimal" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "839d40602460578482205f1def416a6442cf29a24dc366aa8cf8d9f95a53c9d2" +dependencies = [ + "displaydoc", + "fixed_decimal", + "icu_locid", + "icu_provider", + "writeable", +] + +[[package]] +name = "icu_list" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd7ba7442d9235b689d4fdce17c452ea229934980fd81ba50cc28275752c9f90" +dependencies = [ + "displaydoc", + "icu_provider", + "regex-automata 0.2.0", + "writeable", +] + +[[package]] +name = "icu_locid" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3003f85dccfc0e238ff567693248c59153a46f4e6125ba4020b973cef4d1d335" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_locid_transform" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd89f392982141a878a9321c9298cce46d14e1c17efc5f428dbfd96b443e57d0" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_normalizer" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "652869735c9fb9f5a64ba180ee16f2c848390469c116deef517ecc53f4343598" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "write16", + "zerovec", +] + +[[package]] +name = "icu_plurals" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a18fbe19656b3cbae9a40a27b0303f06b2b51165e3b06d596dfdff8f06bfce9a" +dependencies = [ + "displaydoc", + "fixed_decimal", + "icu_locid", + "icu_provider", + "zerovec", +] + +[[package]] +name = "icu_properties" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce0e1aa26851f16c9e04412a5911c86b7f8768dac8f8d4c5f1c568a7e5d7a434" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_provider" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8dc312a7b6148f7dfe098047ae2494d12d4034f48ade58d4f353000db376e305" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_provider_macros", + "stable_deref_trait", + "writeable", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_provider_adapters" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4ae1e2bd0c41728b77e7c46e9afdec5e2127d1eedacc684724667d50c126bd3" +dependencies = [ + "icu_locid", + "icu_provider", + "tinystr", + "yoke", + "zerovec", +] + +[[package]] +name = "icu_provider_macros" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd8b728b9421e93eff1d9f8681101b78fa745e0748c95c655c83f337044a7e10" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "icu_segmenter" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3300a7b6bf187be98a57264ad094f11f2e062c2e8263132af010ff522ee5495" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locid", + "icu_provider", + "num-traits", + "utf8_iter", + "zerovec", +] + +[[package]] +name = "icu_timezone" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e22da75a450de2d54161838efa9e1a1f5baa7bc1fffdb015f260e0992b01977" +dependencies = [ + "displaydoc", + "icu_calendar", + "icu_locid", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "idna" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c" +dependencies = [ + "unicode-bidi", + "unicode-normalization", +] + +[[package]] +name = "indexmap" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +dependencies = [ + "autocfg", + "hashbrown", +] + +[[package]] +name = "insta" +version = "1.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3e02c584f4595792d09509a94cdb92a3cef7592b1eb2d9877ee6f527062d0ea" +dependencies = [ + "console", + "lazy_static", + "linked-hash-map", + "serde", + "similar", + "yaml-rust", +] + +[[package]] +name = "is-terminal" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b" +dependencies = [ + "hermit-abi", + "rustix", + "windows-sys 0.48.0", +] + +[[package]] +name = "itertools" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" + +[[package]] +name = "keccak" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f6d5ed8676d904364de097082f4e7d240b571b67989ced0240f08b7f966f940" +dependencies = [ + "cpufeatures", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.148" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cdc71e17332e86d2e1d38c1f99edcb6288ee11b815fb1a4b049eaa2114d369b" + +[[package]] +name = "libm" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7012b1bbb0719e1097c47611d3898568c546d597c2e74d66f6087edd5233ff4" + +[[package]] +name = "libtest-mimic" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d8de370f98a6cb8a4606618e53e802f93b094ddec0f96988eaec2c27e6e9ce7" +dependencies = [ + "clap", + "termcolor", + "threadpool", +] + +[[package]] +name = "line-wrap" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f30344350a2a51da54c1d53be93fade8a237e545dbcc4bdbe635413f2117cab9" +dependencies = [ + "safemem", +] + +[[package]] +name = "linked-hash-map" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" + +[[package]] +name = "linux-raw-sys" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a9bad9f94746442c783ca431b22403b519cd7fbeed0533fdd6328b2f2212128" + +[[package]] +name = "litemap" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a1a2647d5b7134127971a6de0d533c49de2159167e7f259c427195f87168a1" + +[[package]] +name = "log" +version = "0.4.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" + +[[package]] +name = "logid" +version = "0.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c7913b788929e0ffaef3c1e27e6342749323c126d9a4aeba490d361310f1b1e" +dependencies = [ + "colored", + "logid-core", + "logid-derive", +] + +[[package]] +name = "logid-core" +version = "0.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "989118724a635a3c6986f7ebee65a6a3e055a47b6f7b01313a61f88cd038369e" +dependencies = [ + "evident", + "lsp-types", +] + +[[package]] +name = "logid-derive" +version = "0.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bd85b3166760ef3bfa2348c3427b77f9360d25b2be57127ea3e03a83165d000" +dependencies = [ + "logid-core", + "proc-macro2", + "quote", + "syn 2.0.37", +] + +[[package]] +name = "lsp-types" +version = "0.94.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c66bfd44a06ae10647fe3f8214762e9369fd4248df1350924b4ef9e770a85ea1" +dependencies = [ + "bitflags 1.3.2", + "serde", + "serde_json", + "serde_repr", + "url", +] + +[[package]] +name = "memchr" +version = "2.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f232d6ef707e1956a43342693d2a31e72989554d58299d7a88738cc95b0d35c" + +[[package]] +name = "miniz_oxide" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" +dependencies = [ + "adler", +] + +[[package]] +name = "nu-ansi-term" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +dependencies = [ + "overload", + "winapi", +] + +[[package]] +name = "num-traits" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2" +dependencies = [ + "autocfg", + "libm", +] + +[[package]] +name = "num_cpus" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "once_cell" +version = "1.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" + +[[package]] +name = "onig" +version = "6.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c4b31c8722ad9171c6d77d3557db078cab2bd50afcc9d09c8b315c59df8ca4f" +dependencies = [ + "bitflags 1.3.2", + "libc", + "once_cell", + "onig_sys", +] + +[[package]] +name = "onig_sys" +version = "69.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b829e3d7e9cc74c7e315ee8edb185bf4190da5acde74afd7fc59c35b1f086e7" +dependencies = [ + "cc", + "pkg-config", +] + +[[package]] +name = "overload" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" + +[[package]] +name = "percent-encoding" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" + +[[package]] +name = "pin-project-lite" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58" + +[[package]] +name = "pkg-config" +version = "0.3.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964" + +[[package]] +name = "plist" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bdc0001cfea3db57a2e24bc0d818e9e20e554b5f97fabb9bc231dc240269ae06" +dependencies = [ + "base64", + "indexmap", + "line-wrap", + "quick-xml", + "serde", + "time", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" + +[[package]] +name = "proc-macro2" +version = "1.0.67" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d433d9f1a3e8c1263d9456598b16fec66f4acc9a74dacffd35c7bb09b3a1328" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quick-xml" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81b9228215d82c7b61490fec1de287136b5de6f5700f6e58ea9ad61a7964ca51" +dependencies = [ + "memchr", +] + +[[package]] +name = "quote" +version = "1.0.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "regex" +version = "1.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "697061221ea1b4a94a624f67d0ae2bfe4e22b8a17b6a192afb11046542cc8c47" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata 0.3.8", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9368763f5a9b804326f3af749e16f9abf378d227bcdee7634b13d8f17793782" +dependencies = [ + "memchr", +] + +[[package]] +name = "regex-automata" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2f401f4955220693b56f8ec66ee9c78abffd8d1c4f23dc41a23839eb88f0795" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" + +[[package]] +name = "rustix" +version = "0.38.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "747c788e9ce8e92b12cd485c49ddf90723550b654b32508f979b71a7b1ecda4f" +dependencies = [ + "bitflags 2.4.0", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.48.0", +] + +[[package]] +name = "ryu" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" + +[[package]] +name = "safemem" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef703b7cb59335eae2eb93ceb664c0eb7ea6bf567079d843e09420219668e072" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "serde" +version = "1.0.188" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.188" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.37", +] + +[[package]] +name = "serde_json" +version = "1.0.107" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b420ce6e3d8bd882e9b243c6eed35dbc9a6110c9769e74b584e0d68d1f20c65" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "serde_repr" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8725e1dfadb3a50f7e5ce0b1a540466f6ed3fe7a0fca2ac2b8b831d31316bd00" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.37", +] + +[[package]] +name = "serde_yaml" +version = "0.8.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "578a7433b776b56a35785ed5ce9a7e777ac0598aac5a6dd1b4b18a307c7fc71b" +dependencies = [ + "indexmap", + "ryu", + "serde", + "yaml-rust", +] + +[[package]] +name = "sha3" +version = "0.10.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75872d278a8f37ef87fa0ddbda7802605cb18344497949862c0d4dcb291eba60" +dependencies = [ + "digest", + "keccak", +] + +[[package]] +name = "sharded-slab" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "900fba806f70c630b0a382d0d825e17a0f19fcd059a2ade1ff237bcddf446b31" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "similar" +version = "2.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "420acb44afdae038210c99e69aae24109f32f15500aa708e81d46c9f29d55fcf" + +[[package]] +name = "smallvec" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "942b4a808e05215192e39f4ab80813e599068285906cc91aa64f923db842bd5a" + +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + +[[package]] +name = "strsim" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" + +[[package]] +name = "strum" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7ac893c7d471c8a21f31cfe213ec4f6d9afeed25537c772e08ef3f005f8729e" + +[[package]] +name = "strum_macros" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "339f799d8b549e3744c7ac7feb216383e4005d94bdb22561b3ab8f3b808ae9fb" +dependencies = [ + "heck 0.3.3", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7303ef2c05cd654186cb250d29049a24840ca25d2747c25c0381c8d9e2f582e8" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "synstructure" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", + "unicode-xid", +] + +[[package]] +name = "synstructure" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "285ba80e733fac80aa4270fbcdf83772a79b80aa35c97075320abfee4a915b06" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.37", + "unicode-xid", +] + +[[package]] +name = "syntect" +version = "5.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e02b4b303bf8d08bfeb0445cba5068a3d306b6baece1d5582171a9bf49188f91" +dependencies = [ + "bincode", + "bitflags 1.3.2", + "flate2", + "fnv", + "once_cell", + "onig", + "plist", + "regex-syntax", + "serde", + "serde_json", + "thiserror", + "walkdir", + "yaml-rust", +] + +[[package]] +name = "termcolor" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6093bad37da69aab9d123a8091e4be0aa4a03e4d601ec641c327398315f62b64" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "thiserror" +version = "1.0.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d6d7a740b8a666a7e828dd00da9c0dc290dff53154ea77ac109281de90589b7" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49922ecae66cc8a249b77e68d1d0623c1b2c514f0060c27cdc68bd62a1219d35" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.37", +] + +[[package]] +name = "thread_local" +version = "1.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdd6f064ccff2d6567adcb3873ca630700f00b5ad3f060c25b5dcfd9a4ce152" +dependencies = [ + "cfg-if", + "once_cell", +] + +[[package]] +name = "threadpool" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d050e60b33d41c19108b32cea32164033a9013fe3b46cbd4457559bfbf77afaa" +dependencies = [ + "num_cpus", +] + +[[package]] +name = "time" +version = "0.3.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "426f806f4089c493dcac0d24c29c01e2c38baf8e30f1b716ee37e83d200b18fe" +dependencies = [ + "deranged", + "itoa", + "serde", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" + +[[package]] +name = "time-macros" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ad70d68dba9e1f8aceda7aa6711965dfec1cac869f311a51bd08b3a2ccbce20" +dependencies = [ + "time-core", +] + +[[package]] +name = "tinystr" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ac3f5b6856e931e15e07b478e98c8045239829a65f9156d4fa7e7788197a5ef" +dependencies = [ + "displaydoc", + "zerovec", +] + +[[package]] +name = "tinyvec" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "tracing" +version = "0.1.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8" +dependencies = [ + "cfg-if", + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f4f31f56159e98206da9efd823404b79b6ef3143b4a7ab76e67b1751b25a4ab" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.37", +] + +[[package]] +name = "tracing-core" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0955b8137a1df6f1a2e9a37d8a6656291ff0297c1a97c24e0d8425fe2312f79a" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ddad33d2d10b1ed7eb9d1f518a5674713876e97e5bb9b7345a7984fbb4f922" +dependencies = [ + "lazy_static", + "log", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30a651bc37f915e81f087d86e62a18eec5f79550c7faff886f7090b4ea757c77" +dependencies = [ + "nu-ansi-term", + "sharded-slab", + "smallvec", + "thread_local", + "tracing-core", + "tracing-log", +] + +[[package]] +name = "typenum" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" + +[[package]] +name = "unicode-bidi" +version = "0.3.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "unicode-normalization" +version = "0.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "unicode-segmentation" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" + +[[package]] +name = "unicode-xid" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c" + +[[package]] +name = "unimarkup" +version = "0.4.0" +dependencies = [ + "clap", + "logid", + "thiserror", + "tracing", + "tracing-subscriber", + "unimarkup-commons", + "unimarkup-core", +] + +[[package]] +name = "unimarkup-commons" +version = "0.4.0" +dependencies = [ + "clap", + "icu", + "icu_collections", + "icu_locid", + "icu_provider", + "icu_provider_adapters", + "icu_provider_macros", + "insta", + "itertools", + "logid", + "once_cell", + "regex", + "serde", + "serde_json", + "serde_yaml", + "thiserror", + "tinystr", + "zerovec", + "zerovec-derive", +] + +[[package]] +name = "unimarkup-core" +version = "0.4.0" +dependencies = [ + "logid", + "thiserror", + "unimarkup-commons", + "unimarkup-parser", + "unimarkup-render", +] + +[[package]] +name = "unimarkup-inline" +version = "0.4.0" +dependencies = [ + "libtest-mimic", + "logid", + "serde", + "serde_yaml", + "unicode-segmentation", + "unimarkup-commons", +] + +[[package]] +name = "unimarkup-parser" +version = "0.4.0" +dependencies = [ + "clap", + "logid", + "regex", + "serde", + "serde_json", + "serde_yaml", + "sha3", + "strum", + "strum_macros", + "thiserror", + "unimarkup-commons", + "unimarkup-inline", +] + +[[package]] +name = "unimarkup-render" +version = "0.4.0" +dependencies = [ + "logid", + "once_cell", + "syntect", + "thiserror", + "unimarkup-commons", + "unimarkup-inline", + "unimarkup-parser", +] + +[[package]] +name = "url" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "143b538f18257fac9cad154828a57c6bf5157e1aa604d4816b5995bf6de87ae5" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", + "serde", +] + +[[package]] +name = "utf16_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52df8b7fb78e7910d776fccf2e42ceaf3604d55e8e7eb2dbd183cb1441d8a692" + +[[package]] +name = "utf8_iter" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64a8922555b9500e3d865caed19330172cd67cbf82203f1a3311d8c305cc9f33" + +[[package]] +name = "utf8parse" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" + +[[package]] +name = "uuid" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79daa5ed5740825c40b389c5e50312b9c86df53fccd33f281df655642b43869d" +dependencies = [ + "getrandom", + "rand", +] + +[[package]] +name = "valuable" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "walkdir" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71d857dc86794ca4c280d616f7da00d2dbfd8cd788846559a6813e6aa4b54ee" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" +dependencies = [ + "winapi", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-sys" +version = "0.45.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" +dependencies = [ + "windows-targets 0.42.2", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" +dependencies = [ + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_i686_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "write16" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + +[[package]] +name = "writeable" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0af0c3d13faebf8dda0b5256fa7096a2d5ccb662f7b9f54a40fe201077ab1c2" + +[[package]] +name = "yaml-rust" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85" +dependencies = [ + "linked-hash-map", +] + +[[package]] +name = "yoke" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61e38c508604d6bbbd292dadb3c02559aa7fff6b654a078a36217cad871636e4" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5e19fb6ed40002bab5403ffa37e53e0e56f914a4450c8765f533018db1db35f" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.37", + "synstructure 0.13.0", +] + +[[package]] +name = "zerofrom" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "655b0814c5c0b19ade497851070c640773304939a6c0fd5f5fb43da0696d05b7" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6a647510471d372f2e6c2e6b7219e44d8c574d24fdc11c610a61455782f18c3" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.37", + "synstructure 0.13.0", +] + +[[package]] +name = "zerovec" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "198f54134cd865f437820aa3b43d0ad518af4e68ee161b444cdd15d8e567c8ea" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "486558732d5dde10d0f8cb2936507c1bb21bc539d924c949baf5f36a58e51bac" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", + "synstructure 0.12.6", +] diff --git a/commons/Cargo.toml b/commons/Cargo.toml index 06f72631..44f677a2 100644 --- a/commons/Cargo.toml +++ b/commons/Cargo.toml @@ -20,9 +20,14 @@ serde_json.workspace = true serde_yaml.workspace = true once_cell = { workspace = true, optional = true } icu = "=1.2.0" +icu_locid = "=1.2.0" zerovec = "=0.9.4" +zerovec-derive = "=0.9.4" +tinystr = "=0.7.1" icu_provider = "=1.2.0" icu_provider_adapters = "=1.2.0" +icu_provider_macros = "=1.2.0" +icu_collections = "=1.2.0" regex = { version = "1.8.1", optional = true } insta = {version = "1.29.0", features = ["serde"], optional = true} itertools = "0.11.0" From e1751f5c1f81d69243387156960bbb5aac78fa13 Mon Sep 17 00:00:00 2001 From: Manuel Hatzl Date: Mon, 25 Sep 2023 19:50:49 +0200 Subject: [PATCH 27/43] feat: update icu to not need any generated data --- Cargo.lock | 304 +++--------------- commons/Cargo.toml | 11 +- commons/src/config/locale.rs | 4 +- commons/src/config/mod.rs | 2 +- commons/src/config/preamble.rs | 2 +- commons/src/scanner/icu_data/any.rs | 2 - .../icu_data/fallback/likelysubtags_v1/mod.rs | 2 - .../fallback/likelysubtags_v1/und.rs.data | 1 - commons/src/scanner/icu_data/fallback/mod.rs | 2 - .../icu_data/fallback/parents_v1/mod.rs | 2 - .../icu_data/fallback/parents_v1/und.rs.data | 1 - .../icu_data/fallback/supplement/co_v1/mod.rs | 2 - .../fallback/supplement/co_v1/und.rs.data | 1 - .../icu_data/fallback/supplement/mod.rs | 2 - commons/src/scanner/icu_data/mod.rs | 2 - .../icu_data/segmenter/grapheme_v1/mod.rs | 2 - .../segmenter/grapheme_v1/und.rs.data | 1 - commons/src/scanner/icu_data/segmenter/mod.rs | 2 - commons/src/scanner/mod.rs | 37 +-- commons/src/scanner/symbol/iterator/mod.rs | 20 +- commons/src/test_runner/mod.rs | 3 +- commons/src/test_runner/snap_test_runner.rs | 2 +- parser/src/parser.rs | 4 +- 23 files changed, 63 insertions(+), 348 deletions(-) delete mode 100644 commons/src/scanner/icu_data/any.rs delete mode 100644 commons/src/scanner/icu_data/fallback/likelysubtags_v1/mod.rs delete mode 100644 commons/src/scanner/icu_data/fallback/likelysubtags_v1/und.rs.data delete mode 100644 commons/src/scanner/icu_data/fallback/mod.rs delete mode 100644 commons/src/scanner/icu_data/fallback/parents_v1/mod.rs delete mode 100644 commons/src/scanner/icu_data/fallback/parents_v1/und.rs.data delete mode 100644 commons/src/scanner/icu_data/fallback/supplement/co_v1/mod.rs delete mode 100644 commons/src/scanner/icu_data/fallback/supplement/co_v1/und.rs.data delete mode 100644 commons/src/scanner/icu_data/fallback/supplement/mod.rs delete mode 100644 commons/src/scanner/icu_data/mod.rs delete mode 100644 commons/src/scanner/icu_data/segmenter/grapheme_v1/mod.rs delete mode 100644 commons/src/scanner/icu_data/segmenter/grapheme_v1/und.rs.data delete mode 100644 commons/src/scanner/icu_data/segmenter/mod.rs diff --git a/Cargo.lock b/Cargo.lock index 3c55c42a..9a88b825 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -191,6 +191,15 @@ dependencies = [ "windows-sys 0.45.0", ] +[[package]] +name = "core_maths" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3b02505ccb8c50b0aa21ace0fc08c3e53adebd4e58caa18a36152803c7709a3" +dependencies = [ + "libm", +] + [[package]] name = "cpufeatures" version = "0.2.9" @@ -289,17 +298,6 @@ dependencies = [ "uuid", ] -[[package]] -name = "fixed_decimal" -version = "0.5.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5287d527037d0f35c8801880361eb38bb9bce194805350052c2a79538388faeb" -dependencies = [ - "displaydoc", - "smallvec", - "writeable", -] - [[package]] name = "flate2" version = "1.0.27" @@ -373,65 +371,11 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7" -[[package]] -name = "icu" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c200640729e9f7cf6a7a1140dbf797121b70e8f2d1f347090aff8394f7e8f863" -dependencies = [ - "icu_calendar", - "icu_collator", - "icu_collections", - "icu_datetime", - "icu_decimal", - "icu_list", - "icu_locid", - "icu_locid_transform", - "icu_normalizer", - "icu_plurals", - "icu_properties", - "icu_provider", - "icu_segmenter", - "icu_timezone", -] - -[[package]] -name = "icu_calendar" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dee1e8c25ed44743d03e2d58ca1c0226786dc1aac1f9cb27485e2da2de5e0918" -dependencies = [ - "displaydoc", - "icu_locid", - "icu_provider", - "tinystr", - "writeable", - "zerovec", -] - -[[package]] -name = "icu_collator" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "088882827079b243dc01883f92290dd3952b656faddc7a2972e6d3ab47e1fc7a" -dependencies = [ - "displaydoc", - "icu_collections", - "icu_locid", - "icu_normalizer", - "icu_properties", - "icu_provider", - "smallvec", - "utf16_iter", - "utf8_iter", - "zerovec", -] - [[package]] name = "icu_collections" -version = "1.2.0" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef8302d8dfd6044d3ddb3f807a5ef3d7bbca9a574959c6d6e4dc39aa7012d0d5" +checksum = "b222d891e7bd8c3fb8122cbf255c5e7763ee4824f3620d54a009077c30539fe1" dependencies = [ "displaydoc", "yoke", @@ -439,189 +383,67 @@ dependencies = [ "zerovec", ] -[[package]] -name = "icu_datetime" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72d61014bb8604505baa84ed522aa039951bd81177828d165e80ea8a0543c8a7" -dependencies = [ - "displaydoc", - "either", - "fixed_decimal", - "icu_calendar", - "icu_decimal", - "icu_locid", - "icu_plurals", - "icu_provider", - "icu_timezone", - "smallvec", - "tinystr", - "writeable", - "zerovec", -] - -[[package]] -name = "icu_decimal" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "839d40602460578482205f1def416a6442cf29a24dc366aa8cf8d9f95a53c9d2" -dependencies = [ - "displaydoc", - "fixed_decimal", - "icu_locid", - "icu_provider", - "writeable", -] - -[[package]] -name = "icu_list" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd7ba7442d9235b689d4fdce17c452ea229934980fd81ba50cc28275752c9f90" -dependencies = [ - "displaydoc", - "icu_provider", - "regex-automata 0.2.0", - "writeable", -] - [[package]] name = "icu_locid" -version = "1.2.0" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3003f85dccfc0e238ff567693248c59153a46f4e6125ba4020b973cef4d1d335" +checksum = "56b72c6de0121c00da9828eb3e2603041d563788289bb15feba7c3331de71b5f" dependencies = [ "displaydoc", "litemap", "tinystr", "writeable", - "zerovec", -] - -[[package]] -name = "icu_locid_transform" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd89f392982141a878a9321c9298cce46d14e1c17efc5f428dbfd96b443e57d0" -dependencies = [ - "displaydoc", - "icu_locid", - "icu_provider", - "tinystr", - "zerovec", -] - -[[package]] -name = "icu_normalizer" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "652869735c9fb9f5a64ba180ee16f2c848390469c116deef517ecc53f4343598" -dependencies = [ - "displaydoc", - "icu_collections", - "icu_properties", - "icu_provider", - "smallvec", - "utf16_iter", - "utf8_iter", - "write16", - "zerovec", -] - -[[package]] -name = "icu_plurals" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a18fbe19656b3cbae9a40a27b0303f06b2b51165e3b06d596dfdff8f06bfce9a" -dependencies = [ - "displaydoc", - "fixed_decimal", - "icu_locid", - "icu_provider", - "zerovec", -] - -[[package]] -name = "icu_properties" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce0e1aa26851f16c9e04412a5911c86b7f8768dac8f8d4c5f1c568a7e5d7a434" -dependencies = [ - "displaydoc", - "icu_collections", - "icu_provider", - "tinystr", - "zerovec", ] [[package]] name = "icu_provider" -version = "1.2.0" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8dc312a7b6148f7dfe098047ae2494d12d4034f48ade58d4f353000db376e305" +checksum = "a5d3810a06fce5c900f8ace41b72abf8f6308f77c9e7647211aa5f121c0c9f43" dependencies = [ "displaydoc", "icu_locid", "icu_provider_macros", "stable_deref_trait", + "tinystr", "writeable", "yoke", "zerofrom", "zerovec", ] -[[package]] -name = "icu_provider_adapters" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4ae1e2bd0c41728b77e7c46e9afdec5e2127d1eedacc684724667d50c126bd3" -dependencies = [ - "icu_locid", - "icu_provider", - "tinystr", - "yoke", - "zerovec", -] - [[package]] name = "icu_provider_macros" -version = "1.2.0" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd8b728b9421e93eff1d9f8681101b78fa745e0748c95c655c83f337044a7e10" +checksum = "ca9be8af0b117ccf1516251daab4c9137c012646a211c2a02d2f568ea3cd0df4" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.37", ] [[package]] name = "icu_segmenter" -version = "1.2.1" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3300a7b6bf187be98a57264ad094f11f2e062c2e8263132af010ff522ee5495" +checksum = "b9221a9db12a8026cd94f3a171a6514763daafadd64bc0f127c05a5f09836ded" dependencies = [ + "core_maths", "displaydoc", "icu_collections", "icu_locid", "icu_provider", - "num-traits", + "icu_segmenter_data", "utf8_iter", "zerovec", ] [[package]] -name = "icu_timezone" -version = "1.2.0" +name = "icu_segmenter_data" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e22da75a450de2d54161838efa9e1a1f5baa7bc1fffdb015f260e0992b01977" -dependencies = [ - "displaydoc", - "icu_calendar", - "icu_locid", - "icu_provider", - "tinystr", - "zerovec", -] +checksum = "23ca0059266f591bfb7cac9ee3fce5f9861beaa4532ef3629653653acba0a94c" [[package]] name = "idna" @@ -825,16 +647,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "num-traits" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2" -dependencies = [ - "autocfg", - "libm", -] - [[package]] name = "num_cpus" version = "1.16.0" @@ -982,19 +794,10 @@ checksum = "697061221ea1b4a94a624f67d0ae2bfe4e22b8a17b6a192afb11046542cc8c47" dependencies = [ "aho-corasick", "memchr", - "regex-automata 0.3.8", + "regex-automata", "regex-syntax", ] -[[package]] -name = "regex-automata" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9368763f5a9b804326f3af749e16f9abf378d227bcdee7634b13d8f17793782" -dependencies = [ - "memchr", -] - [[package]] name = "regex-automata" version = "0.3.8" @@ -1183,18 +986,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "synstructure" -version = "0.12.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", - "unicode-xid", -] - [[package]] name = "synstructure" version = "0.13.0" @@ -1306,12 +1097,11 @@ dependencies = [ [[package]] name = "tinystr" -version = "0.7.1" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ac3f5b6856e931e15e07b478e98c8045239829a65f9156d4fa7e7788197a5ef" +checksum = "b07bb54ef1f8ff27564b08b861144d3b8d40263efe07684f64987f4c0d044e3e" dependencies = [ "displaydoc", - "zerovec", ] [[package]] @@ -1444,12 +1234,8 @@ name = "unimarkup-commons" version = "0.4.0" dependencies = [ "clap", - "icu", - "icu_collections", "icu_locid", - "icu_provider", - "icu_provider_adapters", - "icu_provider_macros", + "icu_segmenter", "insta", "itertools", "logid", @@ -1459,9 +1245,6 @@ dependencies = [ "serde_json", "serde_yaml", "thiserror", - "tinystr", - "zerovec", - "zerovec-derive", ] [[package]] @@ -1530,12 +1313,6 @@ dependencies = [ "serde", ] -[[package]] -name = "utf16_iter" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52df8b7fb78e7910d776fccf2e42ceaf3604d55e8e7eb2dbd183cb1441d8a692" - [[package]] name = "utf8_iter" version = "1.0.3" @@ -1749,12 +1526,6 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" -[[package]] -name = "write16" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" - [[package]] name = "writeable" version = "0.5.3" @@ -1791,7 +1562,7 @@ dependencies = [ "proc-macro2", "quote", "syn 2.0.37", - "synstructure 0.13.0", + "synstructure", ] [[package]] @@ -1812,14 +1583,14 @@ dependencies = [ "proc-macro2", "quote", "syn 2.0.37", - "synstructure 0.13.0", + "synstructure", ] [[package]] name = "zerovec" -version = "0.9.4" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "198f54134cd865f437820aa3b43d0ad518af4e68ee161b444cdd15d8e567c8ea" +checksum = "1194130c5b155bf8ae50ab16c86ab758cd695cf9ad176d2f870b744cbdbb572e" dependencies = [ "yoke", "zerofrom", @@ -1828,12 +1599,11 @@ dependencies = [ [[package]] name = "zerovec-derive" -version = "0.9.4" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "486558732d5dde10d0f8cb2936507c1bb21bc539d924c949baf5f36a58e51bac" +checksum = "acabf549809064225ff8878baedc4ce3732ac3b07e7c7ce6e5c2ccdbc485c324" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", - "synstructure 0.12.6", + "syn 2.0.37", ] diff --git a/commons/Cargo.toml b/commons/Cargo.toml index 44f677a2..4ee516ca 100644 --- a/commons/Cargo.toml +++ b/commons/Cargo.toml @@ -19,15 +19,8 @@ serde.workspace = true serde_json.workspace = true serde_yaml.workspace = true once_cell = { workspace = true, optional = true } -icu = "=1.2.0" -icu_locid = "=1.2.0" -zerovec = "=0.9.4" -zerovec-derive = "=0.9.4" -tinystr = "=0.7.1" -icu_provider = "=1.2.0" -icu_provider_adapters = "=1.2.0" -icu_provider_macros = "=1.2.0" -icu_collections = "=1.2.0" +icu_segmenter = "1.3.0" +icu_locid = "1.3.0" regex = { version = "1.8.1", optional = true } insta = {version = "1.29.0", features = ["serde"], optional = true} itertools = "0.11.0" diff --git a/commons/src/config/locale.rs b/commons/src/config/locale.rs index 190ea5f7..4208ca5e 100644 --- a/commons/src/config/locale.rs +++ b/commons/src/config/locale.rs @@ -1,5 +1,5 @@ pub mod serde { - use icu::locid::Locale; + use icu_locid::Locale; use serde::{Deserialize, Deserializer, Serializer}; pub mod single { @@ -70,7 +70,7 @@ pub mod serde { } pub mod clap { - pub fn parse_locale(input: &str) -> Result { + pub fn parse_locale(input: &str) -> Result { input.parse().map_err(|err| { clap::Error::raw( clap::error::ErrorKind::InvalidValue, diff --git a/commons/src/config/mod.rs b/commons/src/config/mod.rs index ae7a6853..6977d31c 100644 --- a/commons/src/config/mod.rs +++ b/commons/src/config/mod.rs @@ -6,7 +6,7 @@ use serde::{Deserialize, Serialize}; use self::{log_id::ConfigErr, output::Output, preamble::Preamble}; -pub use icu::locid; +pub use icu_locid as locid; pub mod locale; pub mod log_id; diff --git a/commons/src/config/preamble.rs b/commons/src/config/preamble.rs index a1671e18..29e02950 100644 --- a/commons/src/config/preamble.rs +++ b/commons/src/config/preamble.rs @@ -4,7 +4,7 @@ use std::{ }; use clap::Args; -use icu::locid::Locale; +use icu_locid::Locale; use logid::err; use serde::{Deserialize, Serialize}; diff --git a/commons/src/scanner/icu_data/any.rs b/commons/src/scanner/icu_data/any.rs deleted file mode 100644 index 6fedf866..00000000 --- a/commons/src/scanner/icu_data/any.rs +++ /dev/null @@ -1,2 +0,0 @@ -// @generated -impl_any_provider ! (BakedDataProvider) ; \ No newline at end of file diff --git a/commons/src/scanner/icu_data/fallback/likelysubtags_v1/mod.rs b/commons/src/scanner/icu_data/fallback/likelysubtags_v1/mod.rs deleted file mode 100644 index 02592b80..00000000 --- a/commons/src/scanner/icu_data/fallback/likelysubtags_v1/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -// @generated -type DataStruct = < :: icu_provider_adapters :: fallback :: provider :: LocaleFallbackLikelySubtagsV1Marker as :: icu_provider :: DataMarker > :: Yokeable ; pub fn lookup (locale : & icu_provider :: DataLocale) -> Option < & 'static DataStruct > { locale . is_empty () . then (|| & UND) } static UND : DataStruct = include ! ("und.rs.data") ; \ No newline at end of file diff --git a/commons/src/scanner/icu_data/fallback/likelysubtags_v1/und.rs.data b/commons/src/scanner/icu_data/fallback/likelysubtags_v1/und.rs.data deleted file mode 100644 index 5dfb677f..00000000 --- a/commons/src/scanner/icu_data/fallback/likelysubtags_v1/und.rs.data +++ /dev/null @@ -1 +0,0 @@ -:: icu_provider_adapters :: fallback :: provider :: LocaleFallbackLikelySubtagsV1 { l2s : unsafe { # [allow (unused_unsafe)] :: zerovec :: ZeroMap :: from_parts_unchecked (unsafe { :: zerovec :: ZeroVec :: from_bytes_unchecked (b"am\0ar\0as\0be\0bg\0bgcbhobn\0brxchrcv\0doiel\0fa\0gu\0he\0hi\0hy\0ja\0ka\0kk\0km\0kn\0ko\0kokks\0ky\0lo\0maimk\0ml\0mn\0mnimr\0my\0ne\0or\0pa\0ps\0rajru\0sa\0satsd\0si\0sr\0ta\0te\0tg\0th\0ti\0tt\0uk\0ur\0yuezh\0") } , unsafe { :: zerovec :: ZeroVec :: from_bytes_unchecked (b"EthiArabBengCyrlCyrlDevaDevaBengDevaCherCyrlDevaGrekArabGujrHebrDevaArmnJpanGeorCyrlKhmrKndaKoreDevaArabCyrlLaooDevaCyrlMlymCyrlBengDevaMymrDevaOryaGuruArabDevaCyrlDevaOlckArabSinhCyrlTamlTeluCyrlThaiEthiCyrlCyrlArabHantHans") }) } , lr2s : unsafe { # [allow (unused_unsafe)] :: zerovec :: ZeroMap2d :: from_parts_unchecked (unsafe { :: zerovec :: ZeroVec :: from_bytes_unchecked (b"az\0ha\0kk\0ky\0mn\0ms\0pa\0sd\0sr\0tg\0uz\0yuezh\0") } , unsafe { :: zerovec :: ZeroVec :: from_bytes_unchecked (b"\x03\0\0\0\x05\0\0\0\t\0\0\0\x0B\0\0\0\x0C\0\0\0\r\0\0\0\x0E\0\0\0\x0F\0\0\0\x13\0\0\0\x14\0\0\0\x16\0\0\0\x17\0\0\0&\0\0\0") } , unsafe { :: zerovec :: ZeroVec :: from_bytes_unchecked (b"IQ\0IR\0RU\0CM\0SD\0AF\0CN\0IR\0MN\0CN\0TR\0CN\0CC\0PK\0IN\0ME\0RO\0RU\0TR\0PK\0AF\0CN\0CN\0AU\0BN\0GB\0GF\0HK\0ID\0MO\0PA\0PF\0PH\0SR\0TH\0TW\0US\0VN\0") } , unsafe { :: zerovec :: ZeroVec :: from_bytes_unchecked (b"ArabArabCyrlArabArabArabArabArabArabArabLatnMongArabArabDevaLatnLatnLatnLatnArabArabCyrlHansHantHantHantHantHantHantHantHantHantHantHantHantHantHantHant") }) } , l2r : unsafe { # [allow (unused_unsafe)] :: zerovec :: ZeroMap :: from_parts_unchecked (unsafe { :: zerovec :: ZeroVec :: from_bytes_unchecked (b"af\0am\0ar\0as\0astaz\0be\0bg\0bgcbhobn\0br\0brxbs\0ca\0cebchrcs\0cv\0cy\0da\0de\0doidsbel\0en\0es\0et\0eu\0fa\0ff\0fi\0filfo\0fr\0ga\0gd\0gl\0gu\0ha\0he\0hi\0hr\0hsbhu\0hy\0ia\0id\0ig\0is\0it\0ja\0jv\0ka\0keakgpkk\0km\0kn\0ko\0kokks\0ky\0lo\0lt\0lv\0maimi\0mk\0ml\0mn\0mnimr\0ms\0my\0ne\0nl\0nn\0no\0or\0pa\0pcmpl\0ps\0pt\0qu\0rajrm\0ro\0ru\0sa\0satsc\0sd\0si\0sk\0sl\0so\0sq\0sr\0su\0sv\0sw\0ta\0te\0tg\0th\0ti\0tk\0to\0tr\0tt\0uk\0ur\0uz\0vi\0wo\0xh\0yo\0yrlyuezh\0zu\0") } , unsafe { :: zerovec :: ZeroVec :: from_bytes_unchecked (b"ZA\0ET\0EG\0IN\0ES\0AZ\0BY\0BG\0IN\0IN\0BD\0FR\0IN\0BA\0ES\0PH\0US\0CZ\0RU\0GB\0DK\0DE\0IN\0DE\0GR\0US\0ES\0EE\0ES\0IR\0SN\0FI\0PH\0FO\0FR\0IE\0GB\0ES\0IN\0NG\0IL\0IN\0HR\0DE\0HU\0AM\x00001ID\0NG\0IS\0IT\0JP\0ID\0GE\0CV\0BR\0KZ\0KH\0IN\0KR\0IN\0IN\0KG\0LA\0LT\0LV\0IN\0NZ\0MK\0IN\0MN\0IN\0IN\0MY\0MM\0NP\0NL\0NO\0NO\0IN\0IN\0NG\0PL\0AF\0BR\0PE\0IN\0CH\0RO\0RU\0IN\0IN\0IT\0PK\0LK\0SK\0SI\0SO\0AL\0RS\0ID\0SE\0TZ\0IN\0IN\0TJ\0TH\0ET\0TM\0TO\0TR\0RU\0UA\0PK\0UZ\0VN\0SN\0ZA\0NG\0BR\0HK\0CN\0ZA\0") }) } , ls2r : unsafe { # [allow (unused_unsafe)] :: zerovec :: ZeroMap2d :: from_parts_unchecked (unsafe { :: zerovec :: ZeroVec :: from_bytes_unchecked (b"az\0en\0ff\0kk\0ky\0mn\0pa\0sd\0tg\0uz\0yuezh\0") } , unsafe { :: zerovec :: ZeroVec :: from_bytes_unchecked (b"\x01\0\0\0\x02\0\0\0\x03\0\0\0\x04\0\0\0\x06\0\0\0\x07\0\0\0\x08\0\0\0\x0B\0\0\0\x0C\0\0\0\r\0\0\0\x0E\0\0\0\x11\0\0\0") } , unsafe { :: zerovec :: ZeroVec :: from_bytes_unchecked (b"ArabShawAdlmArabArabLatnMongArabDevaKhojSindArabArabHansBopoHanbHant") } , unsafe { :: zerovec :: ZeroVec :: from_bytes_unchecked (b"IR\0GB\0GN\0CN\0CN\0TR\0CN\0PK\0IN\0IN\0IN\0PK\0AF\0CN\0TW\0TW\0TW\0") }) } , } \ No newline at end of file diff --git a/commons/src/scanner/icu_data/fallback/mod.rs b/commons/src/scanner/icu_data/fallback/mod.rs deleted file mode 100644 index 46529525..00000000 --- a/commons/src/scanner/icu_data/fallback/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -// @generated -pub mod likelysubtags_v1 ; pub mod parents_v1 ; pub mod supplement ; \ No newline at end of file diff --git a/commons/src/scanner/icu_data/fallback/parents_v1/mod.rs b/commons/src/scanner/icu_data/fallback/parents_v1/mod.rs deleted file mode 100644 index 7d2b4206..00000000 --- a/commons/src/scanner/icu_data/fallback/parents_v1/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -// @generated -type DataStruct = < :: icu_provider_adapters :: fallback :: provider :: LocaleFallbackParentsV1Marker as :: icu_provider :: DataMarker > :: Yokeable ; pub fn lookup (locale : & icu_provider :: DataLocale) -> Option < & 'static DataStruct > { locale . is_empty () . then (|| & UND) } static UND : DataStruct = include ! ("und.rs.data") ; \ No newline at end of file diff --git a/commons/src/scanner/icu_data/fallback/parents_v1/und.rs.data b/commons/src/scanner/icu_data/fallback/parents_v1/und.rs.data deleted file mode 100644 index 817c1dd1..00000000 --- a/commons/src/scanner/icu_data/fallback/parents_v1/und.rs.data +++ /dev/null @@ -1 +0,0 @@ -:: icu_provider_adapters :: fallback :: provider :: LocaleFallbackParentsV1 { parents : unsafe { # [allow (unused_unsafe)] :: zerovec :: ZeroMap :: from_parts_unchecked (unsafe { :: zerovec :: VarZeroVec :: from_bytes_unchecked (b"\x84\0\0\0\0\0\x06\0\x0B\0\x10\0\x15\0\x1A\0\x1F\0$\0)\0.\x003\08\0=\0B\0G\0L\0Q\0V\0[\0`\0e\0j\0o\0t\0y\0~\0\x83\0\x88\0\x8D\0\x92\0\x97\0\x9C\0\xA1\0\xA6\0\xAB\0\xB0\0\xB5\0\xBA\0\xBF\0\xC4\0\xC9\0\xCE\0\xD3\0\xD8\0\xDD\0\xE2\0\xE7\0\xEC\0\xF1\0\xF6\0\xFB\0\0\x01\x05\x01\n\x01\x0F\x01\x14\x01\x19\x01\x1E\x01#\x01(\x01-\x012\x017\x01<\x01A\x01F\x01K\x01P\x01U\x01Z\x01_\x01d\x01i\x01n\x01s\x01x\x01}\x01\x82\x01\x87\x01\x8C\x01\x91\x01\x96\x01\x9B\x01\xA0\x01\xA5\x01\xAA\x01\xAF\x01\xB4\x01\xB9\x01\xBE\x01\xC3\x01\xC8\x01\xCD\x01\xD2\x01\xD7\x01\xDC\x01\xE1\x01\xE6\x01\xEB\x01\xF0\x01\xF5\x01\xFA\x01\xFF\x01\x04\x02\t\x02\x0E\x02\x13\x02\x18\x02\x1D\x02\"\x02'\x02,\x021\x026\x02;\x02@\x02G\x02I\x02K\x02M\x02R\x02W\x02\\\x02a\x02f\x02k\x02p\x02u\x02z\x02\x7F\x02\x84\x02\x89\x02en-150en-AGen-AIen-ATen-AUen-BBen-BEen-BMen-BSen-BWen-BZen-CCen-CHen-CKen-CMen-CXen-CYen-DEen-DGen-DKen-DMen-ERen-FIen-FJen-FKen-FMen-GBen-GDen-GGen-GHen-GIen-GMen-GYen-HKen-IEen-ILen-IMen-INen-IOen-JEen-JMen-KEen-KIen-KNen-KYen-LCen-LRen-LSen-MGen-MOen-MSen-MTen-MUen-MVen-MWen-MYen-NAen-NFen-NGen-NLen-NRen-NUen-NZen-PGen-PKen-PNen-PWen-RWen-SBen-SCen-SDen-SEen-SGen-SHen-SIen-SLen-SSen-SXen-SZen-TCen-TKen-TOen-TTen-TVen-TZen-UGen-VCen-VGen-VUen-WSen-ZAen-ZMen-ZWes-ARes-BOes-BRes-BZes-CLes-COes-CRes-CUes-DOes-ECes-GTes-HNes-MXes-NIes-PAes-PEes-PRes-PYes-SVes-USes-UYes-VEhi-Latnhtnbnnno-NOpt-AOpt-CHpt-CVpt-FRpt-GQpt-GWpt-LUpt-MOpt-MZpt-STpt-TLzh-Hant-MO") } , unsafe { :: zerovec :: ZeroVec :: from_bytes_unchecked (b"en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01150en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01150en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01150en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01150en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01150en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01150en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01150en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01150en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01150en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419en\0\0\0\0\0\0\x01IN\0fr\0\0\0\0\0\0\x01HT\0no\0\0\0\0\0\0\0\0\0\0no\0\0\0\0\0\0\0\0\0\0no\0\0\0\0\0\0\0\0\0\0pt\0\0\0\0\0\0\x01PT\0pt\0\0\0\0\0\0\x01PT\0pt\0\0\0\0\0\0\x01PT\0pt\0\0\0\0\0\0\x01PT\0pt\0\0\0\0\0\0\x01PT\0pt\0\0\0\0\0\0\x01PT\0pt\0\0\0\0\0\0\x01PT\0pt\0\0\0\0\0\0\x01PT\0pt\0\0\0\0\0\0\x01PT\0pt\0\0\0\0\0\0\x01PT\0pt\0\0\0\0\0\0\x01PT\0zh\0\x01Hant\x01HK\0") }) } , } \ No newline at end of file diff --git a/commons/src/scanner/icu_data/fallback/supplement/co_v1/mod.rs b/commons/src/scanner/icu_data/fallback/supplement/co_v1/mod.rs deleted file mode 100644 index abf03206..00000000 --- a/commons/src/scanner/icu_data/fallback/supplement/co_v1/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -// @generated -type DataStruct = < :: icu_provider_adapters :: fallback :: provider :: CollationFallbackSupplementV1Marker as :: icu_provider :: DataMarker > :: Yokeable ; pub fn lookup (locale : & icu_provider :: DataLocale) -> Option < & 'static DataStruct > { locale . is_empty () . then (|| & UND) } static UND : DataStruct = include ! ("und.rs.data") ; \ No newline at end of file diff --git a/commons/src/scanner/icu_data/fallback/supplement/co_v1/und.rs.data b/commons/src/scanner/icu_data/fallback/supplement/co_v1/und.rs.data deleted file mode 100644 index 61e586d3..00000000 --- a/commons/src/scanner/icu_data/fallback/supplement/co_v1/und.rs.data +++ /dev/null @@ -1 +0,0 @@ -:: icu_provider_adapters :: fallback :: provider :: LocaleFallbackSupplementV1 { parents : unsafe { # [allow (unused_unsafe)] :: zerovec :: ZeroMap :: from_parts_unchecked (unsafe { :: zerovec :: VarZeroVec :: from_bytes_unchecked (b"\x01\0\0\0\0\0yue") } , unsafe { :: zerovec :: ZeroVec :: from_bytes_unchecked (b"zh\0\x01Hant\0\0\0\0") }) } , unicode_extension_defaults : unsafe { # [allow (unused_unsafe)] :: zerovec :: ZeroMap2d :: from_parts_unchecked (unsafe { :: zerovec :: ZeroVec :: from_bytes_unchecked (b"co") } , unsafe { :: zerovec :: ZeroVec :: from_bytes_unchecked (b"\x02\0\0\0") } , unsafe { :: zerovec :: VarZeroVec :: from_bytes_unchecked (b"\x02\0\0\0\0\0\x02\0zhzh-Hant") } , unsafe { :: zerovec :: VarZeroVec :: from_bytes_unchecked (b"\x02\0\0\0\0\0\x06\0pinyinstroke") }) } , } \ No newline at end of file diff --git a/commons/src/scanner/icu_data/fallback/supplement/mod.rs b/commons/src/scanner/icu_data/fallback/supplement/mod.rs deleted file mode 100644 index c48b8225..00000000 --- a/commons/src/scanner/icu_data/fallback/supplement/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -// @generated -pub mod co_v1 ; \ No newline at end of file diff --git a/commons/src/scanner/icu_data/mod.rs b/commons/src/scanner/icu_data/mod.rs deleted file mode 100644 index 4f6998a0..00000000 --- a/commons/src/scanner/icu_data/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -// @generated -# [clippy :: msrv = "1.61"] mod fallback ; # [clippy :: msrv = "1.61"] mod segmenter ; # [clippy :: msrv = "1.61"] use :: icu_provider :: prelude :: * ; # [doc = r" Implement [`DataProvider`] on the given struct using the data"] # [doc = r" hardcoded in this module. This allows the struct to be used with"] # [doc = r" `icu`'s `_unstable` constructors."] # [doc = r""] # [doc = r" This macro can only be called from its definition-site, i.e. right"] # [doc = r" after `include!`-ing the generated module."] # [doc = r""] # [doc = r" ```compile_fail"] # [doc = r" struct MyDataProvider;"] # [doc = r#" include!("/path/to/generated/mod.rs");"#] # [doc = r" impl_data_provider(MyDataProvider);"] # [doc = r" ```"] # [allow (unused_macros)] macro_rules ! impl_data_provider { ($ provider : path) => { # [clippy :: msrv = "1.61"] impl DataProvider < :: icu_provider_adapters :: fallback :: provider :: CollationFallbackSupplementV1Marker > for $ provider { fn load (& self , req : DataRequest ,) -> Result < DataResponse < :: icu_provider_adapters :: fallback :: provider :: CollationFallbackSupplementV1Marker > , DataError > { fallback :: supplement :: co_v1 :: lookup (& req . locale) . map (zerofrom :: ZeroFrom :: zero_from) . map (DataPayload :: from_owned) . map (| payload | { DataResponse { metadata : Default :: default () , payload : Some (payload) , } }) . ok_or_else (|| DataErrorKind :: MissingLocale . with_req (:: icu_provider_adapters :: fallback :: provider :: CollationFallbackSupplementV1Marker :: KEY , req)) } } # [clippy :: msrv = "1.61"] impl DataProvider < :: icu_provider_adapters :: fallback :: provider :: LocaleFallbackLikelySubtagsV1Marker > for $ provider { fn load (& self , req : DataRequest ,) -> Result < DataResponse < :: icu_provider_adapters :: fallback :: provider :: LocaleFallbackLikelySubtagsV1Marker > , DataError > { fallback :: likelysubtags_v1 :: lookup (& req . locale) . map (zerofrom :: ZeroFrom :: zero_from) . map (DataPayload :: from_owned) . map (| payload | { DataResponse { metadata : Default :: default () , payload : Some (payload) , } }) . ok_or_else (|| DataErrorKind :: MissingLocale . with_req (:: icu_provider_adapters :: fallback :: provider :: LocaleFallbackLikelySubtagsV1Marker :: KEY , req)) } } # [clippy :: msrv = "1.61"] impl DataProvider < :: icu_provider_adapters :: fallback :: provider :: LocaleFallbackParentsV1Marker > for $ provider { fn load (& self , req : DataRequest ,) -> Result < DataResponse < :: icu_provider_adapters :: fallback :: provider :: LocaleFallbackParentsV1Marker > , DataError > { fallback :: parents_v1 :: lookup (& req . locale) . map (zerofrom :: ZeroFrom :: zero_from) . map (DataPayload :: from_owned) . map (| payload | { DataResponse { metadata : Default :: default () , payload : Some (payload) , } }) . ok_or_else (|| DataErrorKind :: MissingLocale . with_req (:: icu_provider_adapters :: fallback :: provider :: LocaleFallbackParentsV1Marker :: KEY , req)) } } # [clippy :: msrv = "1.61"] impl DataProvider < :: icu::segmenter :: provider :: GraphemeClusterBreakDataV1Marker > for $ provider { fn load (& self , req : DataRequest ,) -> Result < DataResponse < :: icu::segmenter :: provider :: GraphemeClusterBreakDataV1Marker > , DataError > { segmenter :: grapheme_v1 :: lookup (& req . locale) . map (zerofrom :: ZeroFrom :: zero_from) . map (DataPayload :: from_owned) . map (| payload | { DataResponse { metadata : Default :: default () , payload : Some (payload) , } }) . ok_or_else (|| DataErrorKind :: MissingLocale . with_req (:: icu::segmenter :: provider :: GraphemeClusterBreakDataV1Marker :: KEY , req)) } } } } # [doc = r" Implement [`AnyProvider`] on the given struct using the data"] # [doc = r" hardcoded in this module. This allows the struct to be used with"] # [doc = r" `icu`'s `_any` constructors."] # [doc = r""] # [doc = r" This macro can only be called from its definition-site, i.e. right"] # [doc = r" after `include!`-ing the generated module."] # [doc = r" "] # [doc = r" ```compile_fail"] # [doc = r" struct MyAnyProvider;"] # [doc = r#" include!("/path/to/generated/mod.rs");"#] # [doc = r" impl_any_provider(MyAnyProvider);"] # [doc = r" ```"] # [allow (unused_macros)] macro_rules ! impl_any_provider { ($ provider : path) => { # [clippy :: msrv = "1.61"] impl AnyProvider for $ provider { fn load_any (& self , key : DataKey , req : DataRequest) -> Result < AnyResponse , DataError > { const COLLATIONFALLBACKSUPPLEMENTV1MARKER : :: icu_provider :: DataKeyHash = :: icu_provider_adapters :: fallback :: provider :: CollationFallbackSupplementV1Marker :: KEY . hashed () ; const LOCALEFALLBACKLIKELYSUBTAGSV1MARKER : :: icu_provider :: DataKeyHash = :: icu_provider_adapters :: fallback :: provider :: LocaleFallbackLikelySubtagsV1Marker :: KEY . hashed () ; const LOCALEFALLBACKPARENTSV1MARKER : :: icu_provider :: DataKeyHash = :: icu_provider_adapters :: fallback :: provider :: LocaleFallbackParentsV1Marker :: KEY . hashed () ; const GRAPHEMECLUSTERBREAKDATAV1MARKER : :: icu_provider :: DataKeyHash = :: icu::segmenter :: provider :: GraphemeClusterBreakDataV1Marker :: KEY . hashed () ; match key . hashed () { COLLATIONFALLBACKSUPPLEMENTV1MARKER => fallback :: supplement :: co_v1 :: lookup (& req . locale) . map (AnyPayload :: from_static_ref) , LOCALEFALLBACKLIKELYSUBTAGSV1MARKER => fallback :: likelysubtags_v1 :: lookup (& req . locale) . map (AnyPayload :: from_static_ref) , LOCALEFALLBACKPARENTSV1MARKER => fallback :: parents_v1 :: lookup (& req . locale) . map (AnyPayload :: from_static_ref) , GRAPHEMECLUSTERBREAKDATAV1MARKER => segmenter :: grapheme_v1 :: lookup (& req . locale) . map (AnyPayload :: from_static_ref) , _ => return Err (DataErrorKind :: MissingDataKey . with_req (key , req)) , } . map (| payload | AnyResponse { payload : Some (payload) , metadata : Default :: default () , }) . ok_or_else (|| DataErrorKind :: MissingLocale . with_req (key , req)) } } } } # [clippy :: msrv = "1.61"] pub struct BakedDataProvider ; impl_data_provider ! (BakedDataProvider) ; \ No newline at end of file diff --git a/commons/src/scanner/icu_data/segmenter/grapheme_v1/mod.rs b/commons/src/scanner/icu_data/segmenter/grapheme_v1/mod.rs deleted file mode 100644 index 2ed3b502..00000000 --- a/commons/src/scanner/icu_data/segmenter/grapheme_v1/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -// @generated -type DataStruct = < :: icu::segmenter :: provider :: GraphemeClusterBreakDataV1Marker as :: icu_provider :: DataMarker > :: Yokeable ; pub fn lookup (locale : & icu_provider :: DataLocale) -> Option < & 'static DataStruct > { locale . is_empty () . then (|| & UND) } static UND : DataStruct = include ! ("und.rs.data") ; \ No newline at end of file diff --git a/commons/src/scanner/icu_data/segmenter/grapheme_v1/und.rs.data b/commons/src/scanner/icu_data/segmenter/grapheme_v1/und.rs.data deleted file mode 100644 index 97d3187e..00000000 --- a/commons/src/scanner/icu_data/segmenter/grapheme_v1/und.rs.data +++ /dev/null @@ -1 +0,0 @@ -:: icu::segmenter :: provider :: RuleBreakDataV1 { property_table : :: icu::segmenter :: provider :: RuleBreakPropertyTable (:: icu::collections :: codepointtrie :: CodePointTrie :: from_parts (:: icu::collections :: codepointtrie :: CodePointTrieHeader { high_start : 921600u32 , shifted12_high_start : 225u16 , index3_null_offset : 407u16 , data_null_offset : 32u32 , null_value : 0u32 , trie_type : :: icu::collections :: codepointtrie :: TrieType :: Small , } , unsafe { :: zerovec :: ZeroVec :: from_bytes_unchecked (b"\0\0@\0\x7F\0 \0 \0 \0 \0 \0 \0 \0 \0 \0\xBF\0\xCF\0 \0 \0 \0 \0\x0C\x01 \0 \0 \0;\x01y\x01\xB9\x01\xEE\x01 \0\x1F\x02P\x02\x85\x02\x9F\x02\xD0\x02\x0E\x03<\x03l\x03\xA2\x03\xDF\x03\x1E\x04]\x04\x9C\x04\xDB\x04\x1A\x05\xDB\x04Z\x05\x9A\x05\xDA\x05\x18\x06V\x06\x96\x06\xD5\x06\x14\x07T\x07\x94\x07\xD3\x07\x12\x08H\x08|\x08\xB7\x08\xC6\x08\x07\x01\x03\tC\t\x83\t\x14\x06\x86\x07\xA0\x07\xAD\x07\xC6\x07\xE6\x07\x01\x08\x19\x088\x08\xA0\x07\xA0\x07\xA0\x07\xA0\x07\xA0\x07\xA0\x07\xA0\x07\xA0\x07\xA0\x07\xA0\x07\xA0\x07\xA0\x07\xA0\x07\xA0\x07\xA0\x07\xA0\x07\xA0\x07\xA0\x07\xA0\x07\xA0\x07\xA0\x07\xA0\x07\xA0\x07\xA0\x07\xA0\x07\xA0\x07\xA0\x07\xA0\x07\xA0\x07\xA0\x07\xA0\x07\xA0\x07\xA0\x07\xA0\x07\xA0\x07\xA0\x07\xA0\x07\xA0\x07\xA0\x07\xA0\x07\xA0\x07\xA0\x07\xA0\x07\xA0\x07\xA0\x07\xA0\x07\xA0\x07\xA0\x07X\x08\0\0\x10\0 \x000\0@\0P\0`\0p\0\x7F\0\x8F\0\x9F\0\xAF\0 \x000\0@\0P\0 \x000\0@\0P\0 \x000\0@\0P\0 \x000\0@\0P\0 \x000\0@\0P\0\xBF\0\xCF\0\xDF\0\xEF\0\xCF\0\xDF\0\xEF\0\xFF\0 \x000\0@\0P\0 \x000\0@\0P\0\x0C\x01\x1C\x01,\x01<\x01 \x000\0@\0P\0 \x000\0@\0P\0 \x000\0@\0P\0;\x01K\x01[\x01k\x01y\x01\x89\x01\x99\x01\xA9\x01\xB9\x01\xC9\x01\xD9\x01\xE9\x01\xEE\x01\xFE\x01\x0E\x02\x1E\x02 \x000\0@\0P\0\x1F\x02/\x02?\x02O\x02P\x02`\x02p\x02\x80\x02\x85\x02\x95\x02\xA5\x02\xB5\x02\x9F\x02\xAF\x02\xBF\x02\xCF\x02\xD0\x02\xE0\x02\xF0\x02\0\x03\x0E\x03\x1E\x03.\x03>\x03<\x03L\x03\\\x03l\x03l\x03|\x03\x8C\x03\x9C\x03\xA2\x03\xB2\x03\xC2\x03\xD2\x03\xDF\x03\xEF\x03\xFF\x03\x0F\x04\x1E\x04.\x04>\x04N\x04]\x04m\x04}\x04\x8D\x04\x9C\x04\xAC\x04\xBC\x04\xCC\x04\xDB\x04\xEB\x04\xFB\x04\x0B\x05\x1A\x05*\x05:\x05J\x05\xDB\x04\xEB\x04\xFB\x04\x0B\x05Z\x05j\x05z\x05\x8A\x05\x9A\x05\xAA\x05\xBA\x05\xCA\x05\xDA\x05\xEA\x05\xFA\x05\n\x06\x18\x06(\x068\x06H\x06V\x06f\x06v\x06\x86\x06\x96\x06\xA6\x06\xB6\x06\xC6\x06\xD5\x06\xE5\x06\xF5\x06\x05\x07\x14\x07$\x074\x07D\x07T\x07d\x07t\x07\x84\x07\x94\x07\xA4\x07\xB4\x07\xC4\x07\xD3\x07\xE3\x07\xF3\x07\x03\x08\x12\x08\"\x082\x08B\x08H\x08X\x08h\x08x\x08|\x08\x8C\x08\x9C\x08\xAC\x08\xB7\x08\xC7\x08\xD7\x08\xE7\x08\xC6\x08\xD6\x08\xE6\x08\xF6\x08\x07\x01\x17\x01'\x017\x01\x03\t\x13\t#\t3\tC\tS\tc\ts\t\x83\t\x93\t\xA3\t\xB3\t\x14\x06$\x064\x06D\x06 \0 \0\xB2\0\xC3\t \0\xD2\t\xFE\0H\x02\xE2\t\x11\x02 \0 \0 \0 \0 \0 \0\xF2\t\xF2\t\xF2\t\xF2\t\xF2\t\xF2\t\x02\n\x02\n\x02\n\x02\n\n\n\x12\n\x12\n\x12\n\x12\n\x12\n \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0\xB2\0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0\"\n \x000\n \0>\x04 \0>\x04 \0 \0 \0<\nJ\n\0\x03 \0 \0Z\n \0 \0 \0 \0 \0 \0 \0;\x04 \0\x15\x02 \0 \0 \0 \0 \0 \0 \0i\ny\n \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0\x85\n \0 \0 \0\x91\n\x9F\n\xAC\n \0 \0 \0\xBF\0\xF0\0 \0 \0 \0\xDE\x03 \0 \0\xBC\n\xCA\n \0\xB4\0\xFB\0\xE0\x03 \0\xD9\n \0 \0 \0\xE7\n\x94\x07 \0 \0\xF7\n\x03\x0B \0 \0 \0 \0 \0 \0 \0 \0 \0\x98\t\x13\x0B\x1C\x0B \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0\xBF\0\xBF\0\xBF\0\xBF\0&\x0B \x006\x0BE\x0BH\x0B \0\x0E\0 \0 \0 \0 \0 \0 \0\xBF\0\xBF\0\xFE\0 \0 \0O\x0BH\x0B \0 \0 \0 \0 \0[\x0Be\x0B \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0p\x0BI\x0B \0 \0 \0 \0 \0I\x0B \0 \0 \0|\x0B \0\x82\x0B\x8E\x0B \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0O\x0B \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0p\x0BK\x0B\xAD\0 \0 \0\x99\x0B\xA9\x0B\xAC\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xBA\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xCA\x0B\xD7\x0B\xDC\x0Bw\x0B\xE8\x0B\xF7\x0B\x04\x0C \0 \0\x0F\x0C\x1E\x0C\x1F\x0C \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0j\x0B \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0\x0F\x0C#\x0C \0 \0 \0/\x0C \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0\xB0\0\xFD\0 \0 \0 \0 \0 \0 \0 \0\xB0\0 \0 \0 \0 \0 \0 \0\xBF\0\xBF\0 \0 \0\xB5\x004\x0C \0 \0 \0 \0 \x007\x04 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0B\x0C \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0\xB0\0R\x0C \0\xB1\0 \0 \0 \0 \0 \0\xFD\0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0`\x0C \0m\x0C \0 \0 \0 \0 \0_\x04 \0 \0z\x0C\x86\x0C \0\xBF\0\x8A\x0C \0 \0~\x03 \0\xB8\0\x94\x07\xF2\t\x9A\x0C\xDF\x03 \0 \0\xA7\x0C\xE2\x03 \0\x19\x02 \0 \0\xB7\x0C\xC6\x0C\xD3\x0C \0 \0\x12\x02 \0 \0 \0\xE3\x0C\x7F\x01 \0\xF3\x0C\x03\r \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0\x10\r \0 \r$\r1\r(\r1\r,\r1\r \r$\r1\r(\r1\r,\r1\r \r$\r1\r(\r1\r,\r1\r \r$\r1\r(\r1\r,\r1\r \r$\r1\r(\r1\r,\r1\r \r$\r1\r(\r1\r=\r\x02\nM\r\x12\n\x12\nX\r \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0\x10\x02 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0\xBF\0 \0\xBF\0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0p\0 \0 \0 \0 \0 \0 \0 \0 \0 \0\xB1\0 \0 \0 \0 \0 \0\x14\0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0\x11\x02 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0\xFE\0 \0 \0 \0 \0 \0 \0 \0 \0d\r \0 \0 \0 \0 \0 \0 \0 \0s\r \0 \0\x83\r \0 \0 \0 \0 \0 \0 \0 \0 \0 \0;\x04 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0{\r \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0?\x05 \0 \0 \0 \0\xB2\0 \0 \0 \0 \0\xB9\0\xFE\0 \0 \0\x90\r \0 \0 \0 \0 \0 \0 \0\xA0\r \0 \0\xB7\0\xF8\0 \0 \0\xB0\r\xE0\x03 \0 \0\xC0\r\xCE\r \0 \0 \0\xFC\0 \0\xDC\r\xFA\0u\x08 \0 \0\x1B\x02\xE0\x03 \0 \0\xEC\r\xFB\r \0 \0 \0\x0B\x0E\x1A\x0E\x7F\x01 \0 \0 \0 \0 \0 \0 \0 \0\xB0\0*\x0E \0\x94\x07 \0 \0\xC4\x07:\x0E\x17\x02H\x0E\xFA\0 \0 \0 \0 \0 \0 \0 \0 \0U\x0Ee\x0E\x10\x02 \0 \0 \0 \0 \0u\x0E\x82\x0E \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0\xB0\0\x92\x0E\xFE\0>\x05 \0 \0 \0\xA2\x0E\xFE\0 \0 \0 \0 \0 \0\xB2\x0E\xC2\x0E \0 \0 \0 \0 \0\xB2\0\xD0\x0E \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0\x0B\x0E\xE0\x0E \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0\xF0\x0E\0\x0F \0 \0 \0 \0 \0 \0 \0 \0\x0F\x0F\x1F\x0F \0.\x0F \0 \0;\x0F\x17\x02J\x0F \0 \0V\x0F`\x0F \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0x\x07p\x0F \0 \0 \0 \0 \0\xBD\0\x7F\x0F\x8E\x0F \0 \0 \0 \0 \0 \0 \0\x9D\x0F\xAC\x0F \0 \0 \0\xB4\x0F\xC4\x0F \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0\x91\x07\xD4\x0F \0 \0\xE0\x0F\xF0\x0F \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0\x0E\0\0\x10\xF9\0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0\xFA\0 \0 \0 \0\xF8\0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0\xB0\0\x10\x10\x11\x10\x11\x10\x19\x10\xFC\0 \0 \0 \0 \0\x1A\x02_\x04 \0 \0 \0 \0 \0 \0 \0 \0 \0=\x05\x1C\0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0\xBF\0\xBF\0\xF1\0\xBF\0\xF8\0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0#\x101\x10>\x10 \0J\x10 \0 \0 \0 \0 \0S\x03 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0\xBF\0\xBF\0\xBF\0Z\x10\xBF\0\xBF\0\xF2\0\x19\x02\x1A\x02\xB4\0\xBE\0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0\x94\te\x10s\x10 \0 \0 \0 \0 \0\xB0\0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0\xF8\0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0\x10\x02 \0 \0 \0\xB3\0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0\xB3\0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0\xF8\0 \0 \0 \0 \0 \0 \0\x0B\x01 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0BR\x0B \0|\x0B \0 \0 \0S\x0B\x83\x10\x93\x10\xA2\x10R\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB2\x10\xB8\x10\xAF\x0BG\x0B|\x0B\xC8\x10\x82\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xD4\x10\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB2\x0B\xE4\x10\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B \0 \0 \0\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B \0 \0 \0 \0 \0 \0 \0\xE6\x10 \0 \0 \0 \0 \0\xE5\x10\xB0\x0B\xB0\x0BS\x0B \0 \0 \0\xF6\x10U\x0B \0 \0\xF6\x10 \0}\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0BS\x0B\xB0\x0B\xB0\x0B\xFE\x10\xA9\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB0\x0B\xB2\x0B\x0E\0\x0E\0\xBF\0\xBF\0\xBF\0\xBF\0\xBF\0\xBF\0\x0E\0\x0E\0\x0E\0\x0E\0\x0E\0\x0E\0\x0E\0\x0E\0\xBF\0\xBF\0\xBF\0\xBF\0\xBF\0\xBF\0\xBF\0\xBF\0\xBF\0\xBF\0\xBF\0\xBF\0\xBF\0\xBF\0\xBF\0\x0E\0\x0E\0\x0E\0\x0E\0\x0E\0\x0E\0\x0E\0\x0E\0\x0E\0\x0E\0\x0E\0\x0E\0\x0E\0\x0E\0\x0E\0\x0E\0\x0E\0\x0E\0\x0E\0\x0E\0\x0E\0\x0E\0\x0E\0\x0E\0\x0E\0\x0E\0\x0E\0\x0E\0\x0E\0\x0E\0\x0E\0\x0E\0y\0\x89\0\xA1\0\xC1\0\xE1\0\x01\x01!\x01A\x01a\x01\x81\x01\x97\x01\xA6\x01\xC6\x01\xE5\x01\x05\x02\x97\x01%\x02@\x02`\x02\x80\x02\x9C\x02\xB0\x02\xC6\x02\x97\x01\xE6\x02\xFD\x02\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x17\x037\x03V\x03v\x03z\x03w\x03{\x03x\x03|\x03y\x03v\x03z\x03w\x03{\x03x\x03|\x03y\x03\x84\x03\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\xA4\x03\x97\x01\xC4\x03\xE4\x03\x04\x04\x97\x01\x97\x01\x97\x01$\x043\x04I\x04i\x04\x87\x04\xA4\x04\xC2\x04\xE0\x04\0\x05\x1E\x058\x05\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01U\x05\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01f\x05\x97\x01z\x05\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x9A\x05\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\x97\x01\xAA\x05\xBF\x05\xDB\x05\x97\x01\x97\x01\x97\x01\xFB\x05\x97\x01\x97\x01\x1B\x061\x06C\x06\x97\x01V\x06\x97\x01\x97\x01\x97\x01v\x06\x96\x06\xB6\x06\xD1\x06\xF1\x06\x06\x07&\x07'\x07G\x07f\x07f\x07f\x07f\x07f\x07f\x07f\x07") } , unsafe { :: zerovec :: ZeroVec :: from_bytes_unchecked (b"\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x02\x03\x03\x01\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\0\0\0\0\0\0\0\0\0\x0E\0\0\0\x03\x0E\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\x04\x04\x04\x04\x04\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\0\x04\x04\0\x04\x04\0\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x07\x07\x07\x07\x07\x07\0\0\0\0\0\0\0\0\0\0\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\0\x03\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\x04\x04\x04\x04\x04\x04\x07\0\x04\x04\x04\x04\x04\x04\0\0\x04\x04\0\x04\x04\x04\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x07\0\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\x04\x04\x04\x04\x04\x04\x04\x04\0\0\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\x04\x04\x04\0\x04\x04\x04\x04\x04\x04\x04\x04\x04\0\x04\x04\x04\0\x04\x04\x04\x04\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\x04\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x07\x07\0\0\0\0\0\0\x04\x04\x04\x04\x04\x04\x04\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x07\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x08\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\x08\x04\0\x08\x08\x04\x04\x04\x04\x04\x04\x04\x04\x08\x08\x08\x08\x04\x08\x08\0\x04\x04\x04\x04\x04\x04\x04\0\0\0\0\0\0\0\0\0\0\x04\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\x08\x08\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\0\x04\x08\x04\x04\x04\x04\0\0\x08\x08\0\0\x08\x08\x04\0\0\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0\0\0\0\x04\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\0\x04\x04\x08\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\0\x08\x08\x04\x04\0\0\0\0\x04\x04\0\0\x04\x04\x04\0\0\0\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\x04\0\0\0\x04\0\0\0\0\0\0\0\0\0\0\x08\x04\x04\x04\x04\x04\0\x04\x04\x08\0\x08\x08\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\x04\x04\x04\x04\x04\0\x04\x08\x08\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\0\x04\x04\x08\x04\x04\x04\x04\0\0\x08\x08\0\0\x08\x08\x04\0\0\0\0\0\0\0\x04\x04\x04\0\0\0\0\0\0\0\0\0\0\x04\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\x08\x08\0\0\0\x08\x08\x08\0\x08\x08\x08\x04\0\0\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\x08\x08\x08\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\0\x04\x04\x08\x08\x08\x08\0\x04\x04\x04\0\x04\x04\x04\x04\0\0\0\0\0\0\0\x04\x04\0\0\0\0\0\0\0\0\0\0\0\x04\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\x08\x08\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\0\x08\x04\x08\x08\x04\x08\x08\0\x04\x08\x08\0\x08\x08\x04\x04\0\0\0\0\0\0\0\x04\x04\0\0\0\0\0\0\0\0\0\0\0\x04\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x08\0\0\0\0\0\0\0\0\0\0\0\0\x04\x04\x08\x08\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\x04\0\x04\x08\x04\x04\x04\x04\0\x08\x08\x08\0\x08\x08\x08\x04\x07\0\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0\0\0\0\x04\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\x08\x08\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\0\0\0\0\x04\x08\x08\x04\x04\x04\0\x04\0\x08\x08\x08\x08\x08\x08\x08\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x08\x08\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\0\x08\x04\x04\x04\x04\x04\x04\x04\0\0\0\0\0\0\0\x04\x04\x04\x04\x04\x04\x04\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\0\x08\x04\x04\x04\x04\x04\x04\x04\x04\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\0\x04\0\x04\0\0\0\0\x08\x08\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x08\x04\x04\x04\x04\x04\0\x04\x04\0\0\0\0\0\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\0\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\0\0\0\x04\x08\x04\x04\x04\x04\x04\x04\0\x04\x04\x08\x08\x04\x04\0\0\0\0\0\0\x08\x08\x04\x04\0\0\0\0\x04\x04\0\0\x04\0\x08\x04\x04\0\0\0\0\0\0\x04\0\0\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\0\0\x04\x04\x04\x08\0\0\0\0\0\0\0\0\0\0\x04\x04\x08\0\0\0\0\0\0\0\0\0\0\0\x04\x04\x08\x04\x04\x04\x04\x04\x04\x04\x08\x08\x08\x08\x08\x08\x04\x08\x08\x04\x04\x04\x04\x04\x04\x04\0\0\0\0\0\0\0\0\0\0\0\x04\x04\x04\x03\x04\x04\x04\x08\x08\x08\x08\x04\x04\x08\x08\x08\0\0\0\0\x08\x08\x04\x08\x08\x08\x08\x08\x08\x04\x04\x04\0\0\0\0\0\0\0\x04\x04\x08\x08\x04\0\0\0\0\0\x08\x04\x08\x04\x04\x04\x04\x04\x04\x04\0\x04\0\0\x04\x04\x04\x04\x04\x04\x04\x04\x08\x08\x08\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\0\0\x04\0\0\0\0\x04\x04\x04\x04\x04\x04\x04\x08\x04\x08\x08\x08\x04\x08\x08\0\0\0\0\0\0\0\0\0\0\0\x08\x04\x04\x04\x04\x08\x08\x04\x04\x08\x04\x04\x04\0\0\0\0\0\0\x04\x08\x04\x04\x08\x08\x08\x04\x08\x04\0\0\0\0\x08\x08\x08\x08\x08\x08\x08\x08\x04\x04\x04\x04\x08\x08\x04\x04\0\0\0\0\0\0\0\0\x04\x08\x04\x04\x04\x04\x04\x04\x04\0\0\0\0\x04\0\0\x08\x04\x04\0\0\0\0\0\0\0\0\0\0\0\x03\x04\x05\x03\x03\0\0\0\0\0\0\0\0\x03\x03\x03\x03\x03\x03\x03\0\0\0\0\0\0\0\0\0\0\0\0\x0E\0\0\0\0\0\0\0\0\0\0\0\0\0\x0E\x0E\x0E\x0E\x0E\x0E\0\0\0\0\0\0\0\0\0\x0E\x0E\0\0\0\0\0\0\0\0\0\0\x0E\x0E\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x0E\x0E\x0E\x0E\x0E\x0E\x0E\0\0\0\0\x0E\x0E\x0E\0\0\0\0\0\0\0\0\0\0\0\x0E\x0E\x0E\x0E\0\x0E\x0E\x0E\x0E\x0E\x0E\0\x0E\x0E\x0E\x0E\x0E\x0E\x0E\x0E\x0E\x0E\x0E\x0E\x0E\x0E\x0E\x0E\0\0\0\0\0\0\0\0\0\0\x0E\x0E\x0E\x0E\x0E\x0E\0\0\x0E\x0E\x0E\x0E\x0E\x0E\x0E\x0E\0\x0E\0\x0E\0\0\0\0\0\0\x0E\0\0\0\0\0\0\0\x0E\0\0\x0E\0\0\0\0\x0E\0\x0E\0\0\0\x0E\x0E\x0E\0\x0E\0\0\0\0\0\0\0\0\x0E\x0E\x0E\x0E\x0E\0\0\0\0\0\0\0\0\x0E\x0E\x0E\0\0\0\0\0\0\0\0\x0E\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x0E\x0E\0\0\0\0\x0E\0\0\0\0\0\0\0\0\0\0\0\0\x0E\0\0\0\0\0\0\0\x0E\0\x0E\0\0\0\0\0\0\x04\x04\x04\0\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\0\0\x04\0\0\0\x04\0\0\0\0\x04\0\0\0\0\x08\x08\x04\x04\x08\0\0\0\0\x04\0\0\0\0\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x04\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\t\t\t\t\t\t\t\t\t\t\t\t\t\0\0\0\x04\x08\x08\x04\x04\x04\x04\x08\x08\x04\x04\x08\x08\0\0\0\0\0\0\0\0\0\x04\x04\x04\x04\x04\x04\x08\x04\x04\x08\x08\x04\x04\0\0\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0\0\x04\x08\0\0\x04\0\x04\x04\x04\0\0\x04\x04\0\0\0\0\0\x04\x04\0\0\0\0\0\0\0\0\0\0\0\x08\x04\x04\x08\x08\0\0\0\0\0\x08\x04\0\0\0\0\0\0\0\0\0\x08\x08\x04\x08\x08\x04\x08\x08\0\x08\x04\0\0\x0C\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\x0C\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\0\0\0\0\0\0\0\0\0\0\0\0\n\n\n\n\n\n\n\0\0\0\0\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\0\0\0\0\0\0\x04\x04\x04\x04\x04\0\0\0\0\0\x04\x04\x04\0\x04\x04\0\0\0\0\0\x04\x04\x04\x04\0\0\0\0\0\0\0\0\x04\x04\x04\0\0\0\0\x04\x04\x04\x04\0\0\0\0\0\0\0\0\0\0\x08\x04\x08\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\0\0\x04\x04\0\0\0\0\0\0\0\0\0\0\x04\x08\x08\x08\x04\x04\x04\x04\x08\x08\x04\x04\0\0\x07\0\0\x04\0\0\0\0\0\0\0\0\0\0\x07\0\0\0\0\0\0\0\x04\x04\x04\x04\x04\x08\x04\x04\x04\0\0\0\x08\x08\x08\x04\x04\x04\x04\x04\x04\x04\x04\x04\x08\0\x07\x07\0\0\0\0\0\x04\x04\x04\x04\0\x08\x04\0\0\0\0\0\0\0\0\0\0\0\0\x08\x08\x08\x04\x04\x08\x08\x04\x08\x04\x04\0\0\0\0\0\0\x04\0\x08\x08\x08\x04\x04\x04\x04\x04\x04\x04\x04\0\0\0\0\0\x04\x08\x08\x08\x08\0\0\x08\x08\0\0\x08\x08\x08\0\0\x08\x08\0\0\x04\x04\x04\x04\x04\x04\x04\0\0\0\0\0\x08\x08\x08\x04\x04\x04\x04\x04\x04\x04\x04\x08\x08\x04\x04\x04\x08\x04\0\0\0\0\0\0\0\0\0\x04\x08\x08\x04\x04\x04\x04\x04\x04\x08\x04\x08\x08\x04\x08\x04\x04\0\0\0\0\0\0\0\0\0\0\0\0\x08\x08\x04\x04\x04\x04\0\0\x08\x08\x08\x08\x04\x04\x08\x04\x08\x08\x08\x04\x04\x04\x04\x04\x04\x04\x04\x08\x08\x04\x08\x04\0\0\0\0\0\0\0\0\0\0\0\x04\x08\x04\x08\x08\x04\x04\x04\x04\x04\x04\x08\x04\0\0\0\0\0\0\0\0\x04\x04\x04\x04\x08\x04\x04\x04\x04\x04\0\0\0\0\x04\x04\x04\x04\x04\x04\x04\x04\x08\x04\x04\0\0\0\0\0\x04\x08\x08\x08\x08\x08\0\x08\x08\0\0\x04\x04\x08\x04\x07\x08\x07\x08\x04\0\0\0\0\0\0\0\0\0\0\0\0\x08\x08\x08\x04\x04\x04\x04\0\0\x04\x04\x08\x08\x08\x08\x04\0\0\0\x08\0\0\0\0\0\0\0\0\0\0\0\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\0\0\0\0\0\x04\x04\x04\x04\x04\x04\x08\x07\x04\x04\x04\x04\0\x04\x04\x04\x04\x04\x04\x08\x08\x04\x04\x04\0\0\0\0\x07\x07\x07\x07\x07\x07\x04\x04\x04\x04\x04\x04\x04\x08\x04\x04\0\0\0\0\0\0\x04\x04\x04\x04\x04\x04\x04\0\x04\x04\x04\x04\x04\x04\x08\x04\x04\x04\x04\x04\x04\x04\x04\0\x08\x04\x04\x04\x04\x04\x04\x08\x04\x04\x08\x04\x04\0\0\0\0\0\0\0\0\0\x04\x04\x04\x04\x04\x04\0\0\0\x04\0\x04\x04\0\x04\x04\x04\x04\x04\x04\x07\x04\0\0\0\0\0\0\0\0\0\0\x08\x08\x08\x08\x08\0\x04\x04\0\x08\x08\x04\x08\x04\0\0\0\0\0\0\0\0\x04\x04\x07\x08\0\0\0\0\0\0\0\0\0\0\0\0\x08\x08\x04\x04\x04\x04\x04\0\0\0\x08\x08\x04\x08\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\x04\x04\x04\x04\x04\x04\x04\x04\x04\0\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\0\0\0\0\0\0\0\x04\x08\x04\x04\x04\0\0\0\x08\x04\x04\x04\x03\x03\x03\x03\x03\x03\x03\x03\x04\x04\x04\x04\x04\0\0\x04\x04\x04\x04\x04\x04\x04\0\0\0\0\0\0\0\0\0\0\x04\x04\x04\x04\0\0\x04\x04\x04\x04\x04\x04\x04\0\0\0\0\x04\x04\x04\x04\x04\x04\x04\x04\x04\0\0\x04\x04\x04\x04\x04\0\x04\x04\0\x04\x04\x04\x04\x04\0\0\0\0\0\x0E\x0E\0\0\0\0\0\0\0\0\0\0\0\0\x0E\x0E\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x0E\0\x0E\x0E\x0E\x0E\x0E\x0E\x0E\x0E\x0E\x0E\0\0\0\0\0\x0E\x0E\x0E\x0E\x0E\x0E\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\0\0\x0E\x0E\x0E\x0E\x0E\x0E\x0E\x0E\x0E\0\x0E\x0E\x0E\x0E\x0E\x0E\x0E\x0E\x0E\x0E\x0E\x04\x04\x04\x04\x04\0\0\0\0\0\0\x0E\x0E\x0E\x0E\x0E\x0E\x0E\x0E\x0E\x0E\x0E\x0E\0\0\0\0\0\0\0\0\x0E\x0E\x0E\x0E\x0E\x0E\x0E\x0E\x0E\x0E\x0E\0\x0E\x0E\x0E\x0E\0\0") } , 0u8) ,) , break_state_table : :: icu::segmenter :: provider :: RuleBreakStateTable (unsafe { :: zerovec :: ZeroVec :: from_bytes_unchecked (b"\x80\x80\x80\x80\xFF\xFF\x80\x80\xFF\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\xFF\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\xFF\xFF\x80\x80\xFF\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\xFF\xFF\x80\x80\xFF\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\xFF\xFF\x11\x80\xFF\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\xFF\x80\x80\x80\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x80\x80\x80\x80\x80\x80\x80\x80\x80\xFF\xFF\x80\x80\xFF\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\xFF\xFF\x80\x80\xFF\xFF\xFF\x80\xFF\xFF\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\xFF\xFF\x80\x80\xFF\x80\xFF\xFF\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\xFF\xFF\x80\x80\xFF\x80\x80\xFF\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\xFF\xFF\x80\x80\xFF\x80\xFF\xFF\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\xFF\xFF\x80\x80\xFF\x80\x80\xFF\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x0F\x10\x80\x80\xFF\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x0F\x10\x80\x80\xFF\x80\x80\x80\x80\x80\x80\xFE\xFE\xFE\xFE\x80\x80\x80\x80\x80\xFF\xFF\x80\x80\xFF\x80\x80\x80\x80\x80\xFF\xFE\xFE\xFE\xFE\x80\x80\xFE\xFE\xFE\xFF\xFF\x80\xFE\xFE\xFE\xFE\xFE\xFE\xFE\xFE\xFE\xFE\xFE\xFE\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80") } ,) , rule_status_table : :: icu::segmenter :: provider :: RuleStatusTable (:: zerovec :: ZeroVec :: new () ,) , property_count : 20u8 , last_codepoint_property : 14i8 , sot_property : 18u8 , eot_property : 19u8 , complex_property : 127u8 , } \ No newline at end of file diff --git a/commons/src/scanner/icu_data/segmenter/mod.rs b/commons/src/scanner/icu_data/segmenter/mod.rs deleted file mode 100644 index 08825d5f..00000000 --- a/commons/src/scanner/icu_data/segmenter/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -// @generated -pub mod grapheme_v1 ; \ No newline at end of file diff --git a/commons/src/scanner/mod.rs b/commons/src/scanner/mod.rs index 43b8a352..bfcd8eed 100644 --- a/commons/src/scanner/mod.rs +++ b/commons/src/scanner/mod.rs @@ -1,49 +1,36 @@ //! Scanner and helper types and traits for structurization of Unimarkup input. +use icu_segmenter::GraphemeClusterSegmenter; + pub mod position; pub mod span; mod symbol; -use icu::segmenter::{GraphemeClusterSegmenter, SegmenterError}; -use icu_provider_adapters::fallback::LocaleFallbackProvider; use position::{Offset, Position as SymPos}; pub use symbol::{iterator::*, Symbol, SymbolKind}; -#[derive(Debug, Clone)] -struct IcuDataProvider; -// Generated using: `icu4x-datagen --keys-for-bin .\target\debug\unimarkup.exe --locales full --format mod --out .\commons\src\scanner\icu_data` -// Note: Run `cargo build` before re-generating the data to ensure the newest binary is inspected by icu4x-datagen. -include!("./icu_data/mod.rs"); -impl_data_provider!(IcuDataProvider); - #[derive(Debug)] pub struct Scanner { - provider: LocaleFallbackProvider, segmenter: GraphemeClusterSegmenter, } impl Clone for Scanner { fn clone(&self) -> Self { - let segmenter = GraphemeClusterSegmenter::try_new_unstable(&self.provider) - .expect("Provider is valid at this point."); + Scanner::new() + } +} - Self { - provider: self.provider.clone(), - segmenter, - } +impl Default for Scanner { + fn default() -> Self { + Self::new() } } impl Scanner { - pub fn try_new() -> Result { - let icu_data_provider = IcuDataProvider; - let fallback_provider = LocaleFallbackProvider::try_new_unstable(icu_data_provider)?; - let segmenter = GraphemeClusterSegmenter::try_new_unstable(&fallback_provider)?; - - Ok(Self { - provider: fallback_provider, - segmenter, - }) + pub fn new() -> Self { + let segmenter = GraphemeClusterSegmenter::new(); + + Self { segmenter } } pub fn scan_str<'s>(&self, input: &'s str) -> Vec> { diff --git a/commons/src/scanner/symbol/iterator/mod.rs b/commons/src/scanner/symbol/iterator/mod.rs index 70ff5463..e6235039 100644 --- a/commons/src/scanner/symbol/iterator/mod.rs +++ b/commons/src/scanner/symbol/iterator/mod.rs @@ -328,9 +328,7 @@ mod test { #[test] fn peek_while_index() { - let symbols = Scanner::try_new() - .expect("Must be valid provider.") - .scan_str("## "); + let symbols = Scanner::new().scan_str("## "); let mut iterator = SymbolIterator::from(&symbols); let hash_cnt = iterator @@ -355,9 +353,7 @@ mod test { #[test] fn peek_next() { - let symbols = Scanner::try_new() - .expect("Must be valid provider.") - .scan_str("#*"); + let symbols = Scanner::new().scan_str("#*"); let mut iterator = SymbolIterator::from(&symbols); @@ -391,9 +387,7 @@ mod test { #[test] fn reach_end() { - let symbols = Scanner::try_new() - .expect("Must be valid provider.") - .scan_str("text*"); + let symbols = Scanner::new().scan_str("text*"); let mut iterator = SymbolIterator::from(&symbols).nest( None, @@ -421,9 +415,7 @@ mod test { #[test] fn with_nested_and_parent_prefix() { - let symbols = Scanner::try_new() - .expect("Must be valid provider.") - .scan_str("a\n* *b"); + let symbols = Scanner::new().scan_str("a\n* *b"); let iterator = SymbolIterator::with( &symbols, @@ -460,9 +452,7 @@ mod test { #[test] fn depth_matcher() { - let symbols = Scanner::try_new() - .expect("Must be valid provider.") - .scan_str("[o [i]]"); + let symbols = Scanner::new().scan_str("[o [i]]"); let mut iterator = SymbolIterator::with( &symbols, diff --git a/commons/src/test_runner/mod.rs b/commons/src/test_runner/mod.rs index e0cd5a74..e1f7dfbf 100644 --- a/commons/src/test_runner/mod.rs +++ b/commons/src/test_runner/mod.rs @@ -9,6 +9,5 @@ pub use insta; /// Scans the string using the [`Scanner`] struct. pub fn scan_str(input: &str) -> Vec { - let scanner = Scanner::try_new().unwrap(); - scanner.scan_str(input) + Scanner::new().scan_str(input) } diff --git a/commons/src/test_runner/snap_test_runner.rs b/commons/src/test_runner/snap_test_runner.rs index 6ddf585b..700d2c85 100644 --- a/commons/src/test_runner/snap_test_runner.rs +++ b/commons/src/test_runner/snap_test_runner.rs @@ -123,7 +123,7 @@ macro_rules! test_parser_snap { ($paths:expr, $parser_fn:expr) => { let test_content = $crate::test_runner::test_file::get_test_content($paths.0, $paths.1); let cfg = $crate::config::Config::default(); - let scanner = $crate::scanner::Scanner::try_new().unwrap(); + let scanner = $crate::scanner::Scanner::new(); for test in &test_content.test_file.tests { let symbols = scanner.scan_str(&test.input); diff --git a/parser/src/parser.rs b/parser/src/parser.rs index c558dc01..8c78e917 100644 --- a/parser/src/parser.rs +++ b/parser/src/parser.rs @@ -160,9 +160,7 @@ impl MainParser { pub fn parse_unimarkup(um_content: &str, config: &mut Config) -> Document { let parser = MainParser::default(); - let symbols = Scanner::try_new() - .expect("Must be valid provider.") - .scan_str(um_content); + let symbols = Scanner::new().scan_str(um_content); let mut symbols_iter = SymbolIterator::from(&symbols); let blocks = parser.parse(&mut symbols_iter); From f31143bf782a9a65dcb565ef34642ae1f2ebc6a3 Mon Sep 17 00:00:00 2001 From: Manuel Hatzl Date: Mon, 25 Sep 2023 20:19:14 +0200 Subject: [PATCH 28/43] fix: remove crate_authors!() due to clippy warning Behavior remains the same, because this was the default anyways. --- commons/src/config/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/commons/src/config/mod.rs b/commons/src/config/mod.rs index 6977d31c..27a03cf2 100644 --- a/commons/src/config/mod.rs +++ b/commons/src/config/mod.rs @@ -1,6 +1,6 @@ use std::{collections::HashSet, path::PathBuf}; -use clap::{crate_authors, Args, Parser}; +use clap::{Args, Parser}; use logid::err; use serde::{Deserialize, Serialize}; @@ -43,7 +43,7 @@ pub trait ConfigFns { } #[derive(Parser, Debug, PartialEq, Eq, Clone, Default, Serialize, Deserialize)] -#[command(name = UNIMARKUP_NAME, help_template = HELP_TEMPLATE, author = crate_authors!(", "), version, about = ABOUT, long_about = None)] +#[command(name = UNIMARKUP_NAME, help_template = HELP_TEMPLATE, author, version, about = ABOUT, long_about = None)] pub struct Config { #[command(flatten)] pub preamble: Preamble, From 37460271f617479bab3b376115d4a629c8f47554 Mon Sep 17 00:00:00 2001 From: Manuel Hatzl Date: Mon, 25 Sep 2023 21:47:29 +0200 Subject: [PATCH 29/43] chore: remove lock file from vc after icu bump --- .gitignore | 4 + Cargo.lock | 1609 ---------------------------------------------------- 2 files changed, 4 insertions(+), 1609 deletions(-) delete mode 100644 Cargo.lock diff --git a/.gitignore b/.gitignore index 26c6c44f..4a3a6617 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,10 @@ # will have compiled files and executables /target/ +# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries +# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html +Cargo.lock + # These are backup files generated by rustfmt **/*.rs.bk diff --git a/Cargo.lock b/Cargo.lock deleted file mode 100644 index 9a88b825..00000000 --- a/Cargo.lock +++ /dev/null @@ -1,1609 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 3 - -[[package]] -name = "adler" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" - -[[package]] -name = "aho-corasick" -version = "1.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea5d730647d4fadd988536d06fecce94b7b4f2a7efdae548f1cf4b63205518ab" -dependencies = [ - "memchr", -] - -[[package]] -name = "anstream" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1f58811cfac344940f1a400b6e6231ce35171f614f26439e80f8c1465c5cc0c" -dependencies = [ - "anstyle", - "anstyle-parse", - "anstyle-query", - "anstyle-wincon", - "colorchoice", - "utf8parse", -] - -[[package]] -name = "anstyle" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b84bf0a05bbb2a83e5eb6fa36bb6e87baa08193c35ff52bbf6b38d8af2890e46" - -[[package]] -name = "anstyle-parse" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "938874ff5980b03a87c5524b3ae5b59cf99b1d6bc836848df7bc5ada9643c333" -dependencies = [ - "utf8parse", -] - -[[package]] -name = "anstyle-query" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b" -dependencies = [ - "windows-sys 0.48.0", -] - -[[package]] -name = "anstyle-wincon" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58f54d10c6dfa51283a066ceab3ec1ab78d13fae00aa49243a45e4571fb79dfd" -dependencies = [ - "anstyle", - "windows-sys 0.48.0", -] - -[[package]] -name = "autocfg" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" - -[[package]] -name = "base64" -version = "0.21.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ba43ea6f343b788c8764558649e08df62f86c6ef251fdaeb1ffd010a9ae50a2" - -[[package]] -name = "bincode" -version = "1.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" -dependencies = [ - "serde", -] - -[[package]] -name = "bitflags" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" - -[[package]] -name = "bitflags" -version = "2.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4682ae6287fcf752ecaabbfcc7b6f9b72aa33933dc23a554d853aea8eea8635" - -[[package]] -name = "block-buffer" -version = "0.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" -dependencies = [ - "generic-array", -] - -[[package]] -name = "cc" -version = "1.0.83" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" -dependencies = [ - "libc", -] - -[[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" - -[[package]] -name = "clap" -version = "4.4.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1d7b8d5ec32af0fadc644bf1fd509a688c2103b185644bb1e29d164e0703136" -dependencies = [ - "clap_builder", - "clap_derive", -] - -[[package]] -name = "clap_builder" -version = "4.4.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5179bb514e4d7c2051749d8fcefa2ed6d06a9f4e6d69faf3805f5d80b8cf8d56" -dependencies = [ - "anstream", - "anstyle", - "clap_lex", - "strsim", -] - -[[package]] -name = "clap_derive" -version = "4.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0862016ff20d69b84ef8247369fabf5c008a7417002411897d40ee1f4532b873" -dependencies = [ - "heck 0.4.1", - "proc-macro2", - "quote", - "syn 2.0.37", -] - -[[package]] -name = "clap_lex" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd7cc57abe963c6d3b9d8be5b06ba7c8957a930305ca90304f24ef040aa6f961" - -[[package]] -name = "colorchoice" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" - -[[package]] -name = "colored" -version = "2.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2674ec482fbc38012cf31e6c42ba0177b431a0cb6f15fe40efa5aab1bda516f6" -dependencies = [ - "is-terminal", - "lazy_static", - "windows-sys 0.48.0", -] - -[[package]] -name = "console" -version = "0.15.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c926e00cc70edefdc64d3a5ff31cc65bb97a3460097762bd23afb4d8145fccf8" -dependencies = [ - "encode_unicode", - "lazy_static", - "libc", - "windows-sys 0.45.0", -] - -[[package]] -name = "core_maths" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3b02505ccb8c50b0aa21ace0fc08c3e53adebd4e58caa18a36152803c7709a3" -dependencies = [ - "libm", -] - -[[package]] -name = "cpufeatures" -version = "0.2.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a17b76ff3a4162b0b27f354a0c87015ddad39d35f9c0c36607a3bdd175dde1f1" -dependencies = [ - "libc", -] - -[[package]] -name = "crc32fast" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" -dependencies = [ - "cfg-if", -] - -[[package]] -name = "crypto-common" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" -dependencies = [ - "generic-array", - "typenum", -] - -[[package]] -name = "deranged" -version = "0.3.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2696e8a945f658fd14dc3b87242e6b80cd0f36ff04ea560fa39082368847946" - -[[package]] -name = "digest" -version = "0.10.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" -dependencies = [ - "block-buffer", - "crypto-common", -] - -[[package]] -name = "displaydoc" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "487585f4d0c6655fe74905e2504d8ad6908e4db67f744eb140876906c2f3175d" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.37", -] - -[[package]] -name = "either" -version = "1.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" - -[[package]] -name = "encode_unicode" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" - -[[package]] -name = "errno" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "136526188508e25c6fef639d7927dfb3e0e3084488bf202267829cf7fc23dbdd" -dependencies = [ - "errno-dragonfly", - "libc", - "windows-sys 0.48.0", -] - -[[package]] -name = "errno-dragonfly" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" -dependencies = [ - "cc", - "libc", -] - -[[package]] -name = "evident" -version = "0.12.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3d4868bb797867d4017c78b2ff622d69925c8c79298fc80064e8f9b7af4414c" -dependencies = [ - "once_cell", - "uuid", -] - -[[package]] -name = "flate2" -version = "1.0.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6c98ee8095e9d1dcbf2fcc6d95acccb90d1c81db1e44725c6a984b1dbdfb010" -dependencies = [ - "crc32fast", - "miniz_oxide", -] - -[[package]] -name = "fnv" -version = "1.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" - -[[package]] -name = "form_urlencoded" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a62bc1cf6f830c2ec14a513a9fb124d0a213a629668a4186f329db21fe045652" -dependencies = [ - "percent-encoding", -] - -[[package]] -name = "generic-array" -version = "0.14.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" -dependencies = [ - "typenum", - "version_check", -] - -[[package]] -name = "getrandom" -version = "0.2.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" -dependencies = [ - "cfg-if", - "libc", - "wasi", -] - -[[package]] -name = "hashbrown" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" - -[[package]] -name = "heck" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" -dependencies = [ - "unicode-segmentation", -] - -[[package]] -name = "heck" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" - -[[package]] -name = "hermit-abi" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7" - -[[package]] -name = "icu_collections" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b222d891e7bd8c3fb8122cbf255c5e7763ee4824f3620d54a009077c30539fe1" -dependencies = [ - "displaydoc", - "yoke", - "zerofrom", - "zerovec", -] - -[[package]] -name = "icu_locid" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56b72c6de0121c00da9828eb3e2603041d563788289bb15feba7c3331de71b5f" -dependencies = [ - "displaydoc", - "litemap", - "tinystr", - "writeable", -] - -[[package]] -name = "icu_provider" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5d3810a06fce5c900f8ace41b72abf8f6308f77c9e7647211aa5f121c0c9f43" -dependencies = [ - "displaydoc", - "icu_locid", - "icu_provider_macros", - "stable_deref_trait", - "tinystr", - "writeable", - "yoke", - "zerofrom", - "zerovec", -] - -[[package]] -name = "icu_provider_macros" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca9be8af0b117ccf1516251daab4c9137c012646a211c2a02d2f568ea3cd0df4" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.37", -] - -[[package]] -name = "icu_segmenter" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9221a9db12a8026cd94f3a171a6514763daafadd64bc0f127c05a5f09836ded" -dependencies = [ - "core_maths", - "displaydoc", - "icu_collections", - "icu_locid", - "icu_provider", - "icu_segmenter_data", - "utf8_iter", - "zerovec", -] - -[[package]] -name = "icu_segmenter_data" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23ca0059266f591bfb7cac9ee3fce5f9861beaa4532ef3629653653acba0a94c" - -[[package]] -name = "idna" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c" -dependencies = [ - "unicode-bidi", - "unicode-normalization", -] - -[[package]] -name = "indexmap" -version = "1.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" -dependencies = [ - "autocfg", - "hashbrown", -] - -[[package]] -name = "insta" -version = "1.32.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3e02c584f4595792d09509a94cdb92a3cef7592b1eb2d9877ee6f527062d0ea" -dependencies = [ - "console", - "lazy_static", - "linked-hash-map", - "serde", - "similar", - "yaml-rust", -] - -[[package]] -name = "is-terminal" -version = "0.4.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b" -dependencies = [ - "hermit-abi", - "rustix", - "windows-sys 0.48.0", -] - -[[package]] -name = "itertools" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" -dependencies = [ - "either", -] - -[[package]] -name = "itoa" -version = "1.0.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" - -[[package]] -name = "keccak" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f6d5ed8676d904364de097082f4e7d240b571b67989ced0240f08b7f966f940" -dependencies = [ - "cpufeatures", -] - -[[package]] -name = "lazy_static" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" - -[[package]] -name = "libc" -version = "0.2.148" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cdc71e17332e86d2e1d38c1f99edcb6288ee11b815fb1a4b049eaa2114d369b" - -[[package]] -name = "libm" -version = "0.2.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7012b1bbb0719e1097c47611d3898568c546d597c2e74d66f6087edd5233ff4" - -[[package]] -name = "libtest-mimic" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d8de370f98a6cb8a4606618e53e802f93b094ddec0f96988eaec2c27e6e9ce7" -dependencies = [ - "clap", - "termcolor", - "threadpool", -] - -[[package]] -name = "line-wrap" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f30344350a2a51da54c1d53be93fade8a237e545dbcc4bdbe635413f2117cab9" -dependencies = [ - "safemem", -] - -[[package]] -name = "linked-hash-map" -version = "0.5.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" - -[[package]] -name = "linux-raw-sys" -version = "0.4.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a9bad9f94746442c783ca431b22403b519cd7fbeed0533fdd6328b2f2212128" - -[[package]] -name = "litemap" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77a1a2647d5b7134127971a6de0d533c49de2159167e7f259c427195f87168a1" - -[[package]] -name = "log" -version = "0.4.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" - -[[package]] -name = "logid" -version = "0.12.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c7913b788929e0ffaef3c1e27e6342749323c126d9a4aeba490d361310f1b1e" -dependencies = [ - "colored", - "logid-core", - "logid-derive", -] - -[[package]] -name = "logid-core" -version = "0.12.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "989118724a635a3c6986f7ebee65a6a3e055a47b6f7b01313a61f88cd038369e" -dependencies = [ - "evident", - "lsp-types", -] - -[[package]] -name = "logid-derive" -version = "0.12.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bd85b3166760ef3bfa2348c3427b77f9360d25b2be57127ea3e03a83165d000" -dependencies = [ - "logid-core", - "proc-macro2", - "quote", - "syn 2.0.37", -] - -[[package]] -name = "lsp-types" -version = "0.94.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c66bfd44a06ae10647fe3f8214762e9369fd4248df1350924b4ef9e770a85ea1" -dependencies = [ - "bitflags 1.3.2", - "serde", - "serde_json", - "serde_repr", - "url", -] - -[[package]] -name = "memchr" -version = "2.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f232d6ef707e1956a43342693d2a31e72989554d58299d7a88738cc95b0d35c" - -[[package]] -name = "miniz_oxide" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" -dependencies = [ - "adler", -] - -[[package]] -name = "nu-ansi-term" -version = "0.46.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" -dependencies = [ - "overload", - "winapi", -] - -[[package]] -name = "num_cpus" -version = "1.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" -dependencies = [ - "hermit-abi", - "libc", -] - -[[package]] -name = "once_cell" -version = "1.18.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" - -[[package]] -name = "onig" -version = "6.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c4b31c8722ad9171c6d77d3557db078cab2bd50afcc9d09c8b315c59df8ca4f" -dependencies = [ - "bitflags 1.3.2", - "libc", - "once_cell", - "onig_sys", -] - -[[package]] -name = "onig_sys" -version = "69.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b829e3d7e9cc74c7e315ee8edb185bf4190da5acde74afd7fc59c35b1f086e7" -dependencies = [ - "cc", - "pkg-config", -] - -[[package]] -name = "overload" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" - -[[package]] -name = "percent-encoding" -version = "2.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" - -[[package]] -name = "pin-project-lite" -version = "0.2.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58" - -[[package]] -name = "pkg-config" -version = "0.3.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964" - -[[package]] -name = "plist" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bdc0001cfea3db57a2e24bc0d818e9e20e554b5f97fabb9bc231dc240269ae06" -dependencies = [ - "base64", - "indexmap", - "line-wrap", - "quick-xml", - "serde", - "time", -] - -[[package]] -name = "ppv-lite86" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" - -[[package]] -name = "proc-macro2" -version = "1.0.67" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d433d9f1a3e8c1263d9456598b16fec66f4acc9a74dacffd35c7bb09b3a1328" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "quick-xml" -version = "0.29.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81b9228215d82c7b61490fec1de287136b5de6f5700f6e58ea9ad61a7964ca51" -dependencies = [ - "memchr", -] - -[[package]] -name = "quote" -version = "1.0.33" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "rand" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" -dependencies = [ - "libc", - "rand_chacha", - "rand_core", -] - -[[package]] -name = "rand_chacha" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" -dependencies = [ - "ppv-lite86", - "rand_core", -] - -[[package]] -name = "rand_core" -version = "0.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" -dependencies = [ - "getrandom", -] - -[[package]] -name = "regex" -version = "1.9.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "697061221ea1b4a94a624f67d0ae2bfe4e22b8a17b6a192afb11046542cc8c47" -dependencies = [ - "aho-corasick", - "memchr", - "regex-automata", - "regex-syntax", -] - -[[package]] -name = "regex-automata" -version = "0.3.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2f401f4955220693b56f8ec66ee9c78abffd8d1c4f23dc41a23839eb88f0795" -dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax", -] - -[[package]] -name = "regex-syntax" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" - -[[package]] -name = "rustix" -version = "0.38.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "747c788e9ce8e92b12cd485c49ddf90723550b654b32508f979b71a7b1ecda4f" -dependencies = [ - "bitflags 2.4.0", - "errno", - "libc", - "linux-raw-sys", - "windows-sys 0.48.0", -] - -[[package]] -name = "ryu" -version = "1.0.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" - -[[package]] -name = "safemem" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef703b7cb59335eae2eb93ceb664c0eb7ea6bf567079d843e09420219668e072" - -[[package]] -name = "same-file" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" -dependencies = [ - "winapi-util", -] - -[[package]] -name = "serde" -version = "1.0.188" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e" -dependencies = [ - "serde_derive", -] - -[[package]] -name = "serde_derive" -version = "1.0.188" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.37", -] - -[[package]] -name = "serde_json" -version = "1.0.107" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b420ce6e3d8bd882e9b243c6eed35dbc9a6110c9769e74b584e0d68d1f20c65" -dependencies = [ - "itoa", - "ryu", - "serde", -] - -[[package]] -name = "serde_repr" -version = "0.1.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8725e1dfadb3a50f7e5ce0b1a540466f6ed3fe7a0fca2ac2b8b831d31316bd00" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.37", -] - -[[package]] -name = "serde_yaml" -version = "0.8.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "578a7433b776b56a35785ed5ce9a7e777ac0598aac5a6dd1b4b18a307c7fc71b" -dependencies = [ - "indexmap", - "ryu", - "serde", - "yaml-rust", -] - -[[package]] -name = "sha3" -version = "0.10.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75872d278a8f37ef87fa0ddbda7802605cb18344497949862c0d4dcb291eba60" -dependencies = [ - "digest", - "keccak", -] - -[[package]] -name = "sharded-slab" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "900fba806f70c630b0a382d0d825e17a0f19fcd059a2ade1ff237bcddf446b31" -dependencies = [ - "lazy_static", -] - -[[package]] -name = "similar" -version = "2.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "420acb44afdae038210c99e69aae24109f32f15500aa708e81d46c9f29d55fcf" - -[[package]] -name = "smallvec" -version = "1.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "942b4a808e05215192e39f4ab80813e599068285906cc91aa64f923db842bd5a" - -[[package]] -name = "stable_deref_trait" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" - -[[package]] -name = "strsim" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" - -[[package]] -name = "strum" -version = "0.22.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7ac893c7d471c8a21f31cfe213ec4f6d9afeed25537c772e08ef3f005f8729e" - -[[package]] -name = "strum_macros" -version = "0.22.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "339f799d8b549e3744c7ac7feb216383e4005d94bdb22561b3ab8f3b808ae9fb" -dependencies = [ - "heck 0.3.3", - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "syn" -version = "1.0.109" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "syn" -version = "2.0.37" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7303ef2c05cd654186cb250d29049a24840ca25d2747c25c0381c8d9e2f582e8" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "synstructure" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "285ba80e733fac80aa4270fbcdf83772a79b80aa35c97075320abfee4a915b06" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.37", - "unicode-xid", -] - -[[package]] -name = "syntect" -version = "5.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e02b4b303bf8d08bfeb0445cba5068a3d306b6baece1d5582171a9bf49188f91" -dependencies = [ - "bincode", - "bitflags 1.3.2", - "flate2", - "fnv", - "once_cell", - "onig", - "plist", - "regex-syntax", - "serde", - "serde_json", - "thiserror", - "walkdir", - "yaml-rust", -] - -[[package]] -name = "termcolor" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6093bad37da69aab9d123a8091e4be0aa4a03e4d601ec641c327398315f62b64" -dependencies = [ - "winapi-util", -] - -[[package]] -name = "thiserror" -version = "1.0.48" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d6d7a740b8a666a7e828dd00da9c0dc290dff53154ea77ac109281de90589b7" -dependencies = [ - "thiserror-impl", -] - -[[package]] -name = "thiserror-impl" -version = "1.0.48" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49922ecae66cc8a249b77e68d1d0623c1b2c514f0060c27cdc68bd62a1219d35" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.37", -] - -[[package]] -name = "thread_local" -version = "1.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fdd6f064ccff2d6567adcb3873ca630700f00b5ad3f060c25b5dcfd9a4ce152" -dependencies = [ - "cfg-if", - "once_cell", -] - -[[package]] -name = "threadpool" -version = "1.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d050e60b33d41c19108b32cea32164033a9013fe3b46cbd4457559bfbf77afaa" -dependencies = [ - "num_cpus", -] - -[[package]] -name = "time" -version = "0.3.29" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "426f806f4089c493dcac0d24c29c01e2c38baf8e30f1b716ee37e83d200b18fe" -dependencies = [ - "deranged", - "itoa", - "serde", - "time-core", - "time-macros", -] - -[[package]] -name = "time-core" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" - -[[package]] -name = "time-macros" -version = "0.2.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ad70d68dba9e1f8aceda7aa6711965dfec1cac869f311a51bd08b3a2ccbce20" -dependencies = [ - "time-core", -] - -[[package]] -name = "tinystr" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b07bb54ef1f8ff27564b08b861144d3b8d40263efe07684f64987f4c0d044e3e" -dependencies = [ - "displaydoc", -] - -[[package]] -name = "tinyvec" -version = "1.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" -dependencies = [ - "tinyvec_macros", -] - -[[package]] -name = "tinyvec_macros" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" - -[[package]] -name = "tracing" -version = "0.1.37" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8" -dependencies = [ - "cfg-if", - "pin-project-lite", - "tracing-attributes", - "tracing-core", -] - -[[package]] -name = "tracing-attributes" -version = "0.1.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f4f31f56159e98206da9efd823404b79b6ef3143b4a7ab76e67b1751b25a4ab" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.37", -] - -[[package]] -name = "tracing-core" -version = "0.1.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0955b8137a1df6f1a2e9a37d8a6656291ff0297c1a97c24e0d8425fe2312f79a" -dependencies = [ - "once_cell", - "valuable", -] - -[[package]] -name = "tracing-log" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78ddad33d2d10b1ed7eb9d1f518a5674713876e97e5bb9b7345a7984fbb4f922" -dependencies = [ - "lazy_static", - "log", - "tracing-core", -] - -[[package]] -name = "tracing-subscriber" -version = "0.3.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30a651bc37f915e81f087d86e62a18eec5f79550c7faff886f7090b4ea757c77" -dependencies = [ - "nu-ansi-term", - "sharded-slab", - "smallvec", - "thread_local", - "tracing-core", - "tracing-log", -] - -[[package]] -name = "typenum" -version = "1.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" - -[[package]] -name = "unicode-bidi" -version = "0.3.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" - -[[package]] -name = "unicode-ident" -version = "1.0.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" - -[[package]] -name = "unicode-normalization" -version = "0.1.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921" -dependencies = [ - "tinyvec", -] - -[[package]] -name = "unicode-segmentation" -version = "1.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" - -[[package]] -name = "unicode-xid" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c" - -[[package]] -name = "unimarkup" -version = "0.4.0" -dependencies = [ - "clap", - "logid", - "thiserror", - "tracing", - "tracing-subscriber", - "unimarkup-commons", - "unimarkup-core", -] - -[[package]] -name = "unimarkup-commons" -version = "0.4.0" -dependencies = [ - "clap", - "icu_locid", - "icu_segmenter", - "insta", - "itertools", - "logid", - "once_cell", - "regex", - "serde", - "serde_json", - "serde_yaml", - "thiserror", -] - -[[package]] -name = "unimarkup-core" -version = "0.4.0" -dependencies = [ - "logid", - "thiserror", - "unimarkup-commons", - "unimarkup-parser", - "unimarkup-render", -] - -[[package]] -name = "unimarkup-inline" -version = "0.4.0" -dependencies = [ - "libtest-mimic", - "logid", - "serde", - "serde_yaml", - "unicode-segmentation", - "unimarkup-commons", -] - -[[package]] -name = "unimarkup-parser" -version = "0.4.0" -dependencies = [ - "clap", - "logid", - "regex", - "serde", - "serde_json", - "serde_yaml", - "sha3", - "strum", - "strum_macros", - "thiserror", - "unimarkup-commons", - "unimarkup-inline", -] - -[[package]] -name = "unimarkup-render" -version = "0.4.0" -dependencies = [ - "logid", - "once_cell", - "syntect", - "thiserror", - "unimarkup-commons", - "unimarkup-inline", - "unimarkup-parser", -] - -[[package]] -name = "url" -version = "2.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "143b538f18257fac9cad154828a57c6bf5157e1aa604d4816b5995bf6de87ae5" -dependencies = [ - "form_urlencoded", - "idna", - "percent-encoding", - "serde", -] - -[[package]] -name = "utf8_iter" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64a8922555b9500e3d865caed19330172cd67cbf82203f1a3311d8c305cc9f33" - -[[package]] -name = "utf8parse" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" - -[[package]] -name = "uuid" -version = "1.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79daa5ed5740825c40b389c5e50312b9c86df53fccd33f281df655642b43869d" -dependencies = [ - "getrandom", - "rand", -] - -[[package]] -name = "valuable" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" - -[[package]] -name = "version_check" -version = "0.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" - -[[package]] -name = "walkdir" -version = "2.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71d857dc86794ca4c280d616f7da00d2dbfd8cd788846559a6813e6aa4b54ee" -dependencies = [ - "same-file", - "winapi-util", -] - -[[package]] -name = "wasi" -version = "0.11.0+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" - -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-util" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" -dependencies = [ - "winapi", -] - -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - -[[package]] -name = "windows-sys" -version = "0.45.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" -dependencies = [ - "windows-targets 0.42.2", -] - -[[package]] -name = "windows-sys" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" -dependencies = [ - "windows-targets 0.48.5", -] - -[[package]] -name = "windows-targets" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" -dependencies = [ - "windows_aarch64_gnullvm 0.42.2", - "windows_aarch64_msvc 0.42.2", - "windows_i686_gnu 0.42.2", - "windows_i686_msvc 0.42.2", - "windows_x86_64_gnu 0.42.2", - "windows_x86_64_gnullvm 0.42.2", - "windows_x86_64_msvc 0.42.2", -] - -[[package]] -name = "windows-targets" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" -dependencies = [ - "windows_aarch64_gnullvm 0.48.5", - "windows_aarch64_msvc 0.48.5", - "windows_i686_gnu 0.48.5", - "windows_i686_msvc 0.48.5", - "windows_x86_64_gnu 0.48.5", - "windows_x86_64_gnullvm 0.48.5", - "windows_x86_64_msvc 0.48.5", -] - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" - -[[package]] -name = "windows_i686_gnu" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" - -[[package]] -name = "windows_i686_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" - -[[package]] -name = "windows_i686_msvc" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" - -[[package]] -name = "windows_i686_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" - -[[package]] -name = "writeable" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0af0c3d13faebf8dda0b5256fa7096a2d5ccb662f7b9f54a40fe201077ab1c2" - -[[package]] -name = "yaml-rust" -version = "0.4.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85" -dependencies = [ - "linked-hash-map", -] - -[[package]] -name = "yoke" -version = "0.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61e38c508604d6bbbd292dadb3c02559aa7fff6b654a078a36217cad871636e4" -dependencies = [ - "serde", - "stable_deref_trait", - "yoke-derive", - "zerofrom", -] - -[[package]] -name = "yoke-derive" -version = "0.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5e19fb6ed40002bab5403ffa37e53e0e56f914a4450c8765f533018db1db35f" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.37", - "synstructure", -] - -[[package]] -name = "zerofrom" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "655b0814c5c0b19ade497851070c640773304939a6c0fd5f5fb43da0696d05b7" -dependencies = [ - "zerofrom-derive", -] - -[[package]] -name = "zerofrom-derive" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6a647510471d372f2e6c2e6b7219e44d8c574d24fdc11c610a61455782f18c3" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.37", - "synstructure", -] - -[[package]] -name = "zerovec" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1194130c5b155bf8ae50ab16c86ab758cd695cf9ad176d2f870b744cbdbb572e" -dependencies = [ - "yoke", - "zerofrom", - "zerovec-derive", -] - -[[package]] -name = "zerovec-derive" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acabf549809064225ff8878baedc4ce3732ac3b07e7c7ce6e5c2ccdbc485c324" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.37", -] From f8bab5124e0c67f1ef97edf7438e1b45cc281180 Mon Sep 17 00:00:00 2001 From: Manuel Hatzl <49341624+mhatzl@users.noreply.github.com> Date: Fri, 29 Sep 2023 16:55:57 +0200 Subject: [PATCH 30/43] fix: add blankline for better readability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Nadir Fejzić --- commons/src/scanner/symbol/iterator/matcher.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/commons/src/scanner/symbol/iterator/matcher.rs b/commons/src/scanner/symbol/iterator/matcher.rs index 52e75926..0be22950 100644 --- a/commons/src/scanner/symbol/iterator/matcher.rs +++ b/commons/src/scanner/symbol/iterator/matcher.rs @@ -11,6 +11,7 @@ use super::SymbolIterator; /// Function type to notify an iterator if an end was reached. pub type IteratorEndFn = Rc bool)>; + /// Function type to consume prefix sequences of a new line. pub type IteratorPrefixFn = Rc bool)>; From b63b9027a5f6a14e43619cc42645389686b8a830 Mon Sep 17 00:00:00 2001 From: Manuel Hatzl <49341624+mhatzl@users.noreply.github.com> Date: Fri, 29 Sep 2023 17:01:23 +0200 Subject: [PATCH 31/43] fix: use `debug_assert!()` instead of `cfg(debug_assertions)` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Nadir Fejzić --- commons/src/scanner/symbol/iterator/matcher.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/commons/src/scanner/symbol/iterator/matcher.rs b/commons/src/scanner/symbol/iterator/matcher.rs index 0be22950..dc063205 100644 --- a/commons/src/scanner/symbol/iterator/matcher.rs +++ b/commons/src/scanner/symbol/iterator/matcher.rs @@ -140,8 +140,7 @@ impl<'input> EndMatcher for SymbolIterator<'input> { impl<'input> PrefixMatcher for SymbolIterator<'input> { fn consumed_prefix(&mut self, sequence: &[SymbolKind]) -> bool { - #[cfg(debug_assertions)] - assert!( + debug_assert!( !sequence.contains(&SymbolKind::Newline), "Newline symbol in prefix match is not allowed." ); From 0ad2063fd011a6f82a46af55c2f32c76e0424407 Mon Sep 17 00:00:00 2001 From: Manuel Hatzl Date: Fri, 29 Sep 2023 17:06:59 +0200 Subject: [PATCH 32/43] fix: make peeking_next() more compact --- commons/src/scanner/symbol/iterator/root.rs | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/commons/src/scanner/symbol/iterator/root.rs b/commons/src/scanner/symbol/iterator/root.rs index f55d8efa..d18a599f 100644 --- a/commons/src/scanner/symbol/iterator/root.rs +++ b/commons/src/scanner/symbol/iterator/root.rs @@ -67,14 +67,8 @@ impl<'input> PeekingNext for SymbolIteratorRoot<'input> { Self: Sized, F: FnOnce(&Self::Item) -> bool, { - let symbol = self.symbols.get(self.peek_index)?; - - if !(accept)(&symbol) { - return None; - } - + let symbol = self.symbols.get(self.peek_index).filter(accept)?; self.peek_index += 1; - Some(symbol) } } From 17e1956b63799e5cad00715a52be3429a06e7410 Mon Sep 17 00:00:00 2001 From: Manuel Hatzl Date: Fri, 29 Sep 2023 17:13:35 +0200 Subject: [PATCH 33/43] fix: use owned Vec to create Paragraph from --- parser/src/elements/atomic/paragraph.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/parser/src/elements/atomic/paragraph.rs b/parser/src/elements/atomic/paragraph.rs index 171e3f55..a09af705 100644 --- a/parser/src/elements/atomic/paragraph.rs +++ b/parser/src/elements/atomic/paragraph.rs @@ -28,8 +28,8 @@ pub struct Paragraph { impl Paragraph {} -impl From<&Vec<&'_ Symbol<'_>>> for Paragraph { - fn from(value: &Vec<&'_ Symbol<'_>>) -> Self { +impl From>> for Paragraph { + fn from(value: Vec<&'_ Symbol<'_>>) -> Self { let content = value .iter() .map(|&s| *s) @@ -72,7 +72,7 @@ impl ElementParser for Paragraph { } fn parse(input: Vec>) -> Option { - let block = Block::Paragraph(Paragraph::from(&input)); + let block = Block::Paragraph(Paragraph::from(input)); Some(vec![block]) } From b20952fa1d44f2e8cdc10d40c6e1ece58edf6ed5 Mon Sep 17 00:00:00 2001 From: Manuel Hatzl <49341624+mhatzl@users.noreply.github.com> Date: Fri, 29 Sep 2023 17:18:12 +0200 Subject: [PATCH 34/43] fix: use `iter::once()` to create end sequence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Nadir Fejzić --- parser/src/elements/enclosed/verbatim.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/parser/src/elements/enclosed/verbatim.rs b/parser/src/elements/enclosed/verbatim.rs index 44976f36..4b62ad80 100644 --- a/parser/src/elements/enclosed/verbatim.rs +++ b/parser/src/elements/enclosed/verbatim.rs @@ -53,8 +53,7 @@ impl ElementParser for Verbatim { .take_while(|s| s.kind != SymbolKind::Newline) .collect::>(); - let end_sequence = [SymbolKind::Newline] - .into_iter() + let end_sequence = std::iter::once(SymbolKind::Newline) .chain(std::iter::repeat(SymbolKind::Tick).take(start_delim_len)) .collect::>(); let mut longer_delim_sequence = end_sequence.clone(); From 0dc18ad2f5955d6d88e6bc6f80a6dfcec418cf87 Mon Sep 17 00:00:00 2001 From: Manuel Hatzl <49341624+mhatzl@users.noreply.github.com> Date: Fri, 29 Sep 2023 17:18:57 +0200 Subject: [PATCH 35/43] fix: remove double dot at end of sentence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Nadir Fejzić --- commons/src/scanner/symbol/iterator/root.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commons/src/scanner/symbol/iterator/root.rs b/commons/src/scanner/symbol/iterator/root.rs index d18a599f..31357ad1 100644 --- a/commons/src/scanner/symbol/iterator/root.rs +++ b/commons/src/scanner/symbol/iterator/root.rs @@ -17,7 +17,7 @@ pub struct SymbolIteratorRoot<'input> { } impl<'input> SymbolIteratorRoot<'input> { - /// Returns the remaining symbols in this iterator, or `None` if there are no symbols left.. + /// Returns the remaining symbols in this iterator, or `None` if there are no symbols left. pub(super) fn remaining_symbols(&self) -> Option<&'input [Symbol<'input>]> { self.symbols.get(self.curr_index..) } From 85f46ff677750b2bec03b45c9d6230bf6b7f25ab Mon Sep 17 00:00:00 2001 From: Manuel Hatzl <49341624+mhatzl@users.noreply.github.com> Date: Fri, 29 Sep 2023 17:22:26 +0200 Subject: [PATCH 36/43] fix: map length before unwrap of remaining_symbols MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Nadir Fejzić --- commons/src/scanner/symbol/iterator/root.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commons/src/scanner/symbol/iterator/root.rs b/commons/src/scanner/symbol/iterator/root.rs index 31357ad1..d8da41dd 100644 --- a/commons/src/scanner/symbol/iterator/root.rs +++ b/commons/src/scanner/symbol/iterator/root.rs @@ -56,7 +56,7 @@ impl<'input> Iterator for SymbolIteratorRoot<'input> { } fn size_hint(&self) -> (usize, Option) { - let len = self.remaining_symbols().unwrap_or(&[]).len(); + let len = self.remaining_symbols().map(<[_]>::len).unwrap_or(0); (len, Some(len)) } } From 6e12f23e4f7ca4018976a35b6ea5d3c9b252bd0c Mon Sep 17 00:00:00 2001 From: Manuel Hatzl Date: Fri, 29 Sep 2023 17:40:31 +0200 Subject: [PATCH 37/43] fix: improve comments for SymbolIterator --- commons/src/scanner/symbol/iterator/mod.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/commons/src/scanner/symbol/iterator/mod.rs b/commons/src/scanner/symbol/iterator/mod.rs index e6235039..dd8a3842 100644 --- a/commons/src/scanner/symbol/iterator/mod.rs +++ b/commons/src/scanner/symbol/iterator/mod.rs @@ -16,6 +16,10 @@ pub use root::*; /// It allows to add matcher functions to notify the iterator, /// when an end of an element is reached, or what prefixes to strip on a new line. /// Additionaly, the iterator may be nested to enable transparent iterating for nested elements. +/// +/// *Transparent* meaning that the nested iterator does not see [`Symbol`]s consumed by the wrapped (parent) iterator. +/// In other words, wrapped iterators control which [`Symbol`]s will be passed to their nested iterator. +/// Therefore, each nested iterator only sees those [`Symbol`]s that are relevant to its scope. #[derive(Clone)] pub struct SymbolIterator<'input> { /// The [`SymbolIteratorKind`] of this iterator. @@ -33,8 +37,6 @@ pub struct SymbolIterator<'input> { } /// The [`SymbolIteratorKind`] defines the kind of a [`SymbolIterator`]. -/// -/// **Note:** This enables iterator nesting. #[derive(Clone)] pub enum SymbolIteratorKind<'input> { /// Defines an iterator as being nested. From 7235dfb8008e4024d8258b4e3edb3d0a101da1b5 Mon Sep 17 00:00:00 2001 From: Manuel Hatzl Date: Fri, 29 Sep 2023 17:57:13 +0200 Subject: [PATCH 38/43] fix: remove Scanner struct Provide `scan_str()` as standalone function. --- commons/src/scanner/mod.rs | 131 ++++++++------------ commons/src/scanner/symbol/iterator/mod.rs | 12 +- commons/src/test_runner/mod.rs | 7 -- commons/src/test_runner/snap_test_runner.rs | 3 +- inline/tests/lexer/mod.rs | 4 +- inline/tests/parser/mod.rs | 4 +- parser/src/parser.rs | 4 +- 7 files changed, 68 insertions(+), 97 deletions(-) diff --git a/commons/src/scanner/mod.rs b/commons/src/scanner/mod.rs index bfcd8eed..515ee895 100644 --- a/commons/src/scanner/mod.rs +++ b/commons/src/scanner/mod.rs @@ -1,4 +1,5 @@ -//! Scanner and helper types and traits for structurization of Unimarkup input. +//! Functionality, iterators, helper types and traits to get [`Symbol`]s from `&str`. +//! These [`Symbol`]s and iterators are used to convert the input into a Unimarkup document. use icu_segmenter::GraphemeClusterSegmenter; @@ -9,87 +10,65 @@ mod symbol; use position::{Offset, Position as SymPos}; pub use symbol::{iterator::*, Symbol, SymbolKind}; -#[derive(Debug)] -pub struct Scanner { - segmenter: GraphemeClusterSegmenter, -} +/// Scans given input and returns vector of [`Symbol`]s needed to convert the input to Unimarkup content. +pub fn scan_str(input: &str) -> Vec> { + let segmenter = GraphemeClusterSegmenter::new(); -impl Clone for Scanner { - fn clone(&self) -> Self { - Scanner::new() - } -} - -impl Default for Scanner { - fn default() -> Self { - Self::new() - } -} - -impl Scanner { - pub fn new() -> Self { - let segmenter = GraphemeClusterSegmenter::new(); - - Self { segmenter } - } + let mut symbols: Vec = Vec::new(); + let mut curr_pos: SymPos = SymPos::default(); + let mut prev_offset = 0; - pub fn scan_str<'s>(&self, input: &'s str) -> Vec> { - let mut symbols: Vec = Vec::new(); - let mut curr_pos: SymPos = SymPos::default(); - let mut prev_offset = 0; + // skip(1) to ignore break at start of input + for offset in segmenter.segment_str(input).skip(1) { + if let Some(grapheme) = input.get(prev_offset..offset) { + let mut kind = SymbolKind::from(grapheme); - // skip(1) to ignore break at start of input - for offset in self.segmenter.segment_str(input).skip(1) { - if let Some(grapheme) = input.get(prev_offset..offset) { - let mut kind = SymbolKind::from(grapheme); - - let end_pos = if kind == SymbolKind::Newline { - SymPos { - line: (curr_pos.line + 1), - ..Default::default() - } - } else { - SymPos { - line: curr_pos.line, - col_utf8: (curr_pos.col_utf8 + grapheme.len()), - col_utf16: (curr_pos.col_utf16 + grapheme.encode_utf16().count()), - col_grapheme: (curr_pos.col_grapheme + 1), - } - }; - - if curr_pos.col_utf8 == 1 && kind == SymbolKind::Newline { - // newline at the start of line -> Blankline - kind = SymbolKind::Blankline; + let end_pos = if kind == SymbolKind::Newline { + SymPos { + line: (curr_pos.line + 1), + ..Default::default() } + } else { + SymPos { + line: curr_pos.line, + col_utf8: (curr_pos.col_utf8 + grapheme.len()), + col_utf16: (curr_pos.col_utf16 + grapheme.encode_utf16().count()), + col_grapheme: (curr_pos.col_grapheme + 1), + } + }; - symbols.push(Symbol { - input, - kind, - offset: Offset { - start: prev_offset, - end: offset, - }, - start: curr_pos, - end: end_pos, - }); - - curr_pos = end_pos; + if curr_pos.col_utf8 == 1 && kind == SymbolKind::Newline { + // newline at the start of line -> Blankline + kind = SymbolKind::Blankline; } - prev_offset = offset; - } - - symbols.push(Symbol { - input, - kind: SymbolKind::EOI, - offset: Offset { - start: prev_offset, - end: prev_offset, - }, - start: curr_pos, - end: curr_pos, - }); - // last offset not needed, because break at EOI is always available - symbols + symbols.push(Symbol { + input, + kind, + offset: Offset { + start: prev_offset, + end: offset, + }, + start: curr_pos, + end: end_pos, + }); + + curr_pos = end_pos; + } + prev_offset = offset; } + + symbols.push(Symbol { + input, + kind: SymbolKind::EOI, + offset: Offset { + start: prev_offset, + end: prev_offset, + }, + start: curr_pos, + end: curr_pos, + }); + + // last offset not needed, because break at EOI is always available + symbols } diff --git a/commons/src/scanner/symbol/iterator/mod.rs b/commons/src/scanner/symbol/iterator/mod.rs index dd8a3842..4cb75389 100644 --- a/commons/src/scanner/symbol/iterator/mod.rs +++ b/commons/src/scanner/symbol/iterator/mod.rs @@ -324,13 +324,13 @@ mod test { use itertools::{Itertools, PeekingNext}; - use crate::scanner::{PrefixMatcher, Scanner, SymbolKind}; + use crate::scanner::{PrefixMatcher, SymbolKind}; use super::SymbolIterator; #[test] fn peek_while_index() { - let symbols = Scanner::new().scan_str("## "); + let symbols = crate::scanner::scan_str("## "); let mut iterator = SymbolIterator::from(&symbols); let hash_cnt = iterator @@ -355,7 +355,7 @@ mod test { #[test] fn peek_next() { - let symbols = Scanner::new().scan_str("#*"); + let symbols = crate::scanner::scan_str("#*"); let mut iterator = SymbolIterator::from(&symbols); @@ -389,7 +389,7 @@ mod test { #[test] fn reach_end() { - let symbols = Scanner::new().scan_str("text*"); + let symbols = crate::scanner::scan_str("text*"); let mut iterator = SymbolIterator::from(&symbols).nest( None, @@ -417,7 +417,7 @@ mod test { #[test] fn with_nested_and_parent_prefix() { - let symbols = Scanner::new().scan_str("a\n* *b"); + let symbols = crate::scanner::scan_str("a\n* *b"); let iterator = SymbolIterator::with( &symbols, @@ -454,7 +454,7 @@ mod test { #[test] fn depth_matcher() { - let symbols = Scanner::new().scan_str("[o [i]]"); + let symbols = crate::scanner::scan_str("[o [i]]"); let mut iterator = SymbolIterator::with( &symbols, diff --git a/commons/src/test_runner/mod.rs b/commons/src/test_runner/mod.rs index e1f7dfbf..7dfb9bb2 100644 --- a/commons/src/test_runner/mod.rs +++ b/commons/src/test_runner/mod.rs @@ -1,13 +1,6 @@ -use crate::scanner::{Scanner, Symbol}; - pub mod as_snapshot; pub mod snap_test_runner; pub mod spec_test; pub mod test_file; pub use insta; - -/// Scans the string using the [`Scanner`] struct. -pub fn scan_str(input: &str) -> Vec { - Scanner::new().scan_str(input) -} diff --git a/commons/src/test_runner/snap_test_runner.rs b/commons/src/test_runner/snap_test_runner.rs index 700d2c85..b23f3967 100644 --- a/commons/src/test_runner/snap_test_runner.rs +++ b/commons/src/test_runner/snap_test_runner.rs @@ -123,10 +123,9 @@ macro_rules! test_parser_snap { ($paths:expr, $parser_fn:expr) => { let test_content = $crate::test_runner::test_file::get_test_content($paths.0, $paths.1); let cfg = $crate::config::Config::default(); - let scanner = $crate::scanner::Scanner::new(); for test in &test_content.test_file.tests { - let symbols = scanner.scan_str(&test.input); + let symbols = $crate::scanner::scan_str(&test.input); let mut snap_runner = SnapTestRunner::with_fn::<_, _>(&test.name, &symbols, $parser_fn) .with_info(format!( diff --git a/inline/tests/lexer/mod.rs b/inline/tests/lexer/mod.rs index 59861578..0f16aa31 100644 --- a/inline/tests/lexer/mod.rs +++ b/inline/tests/lexer/mod.rs @@ -1,7 +1,7 @@ use std::panic; use libtest_mimic::Trial; -use unimarkup_commons::test_runner::{self, snap_test_runner::SnapTestRunner}; +use unimarkup_commons::test_runner::snap_test_runner::SnapTestRunner; use unimarkup_inline::Tokenize; use crate::snapshot::Snapshot; @@ -31,7 +31,7 @@ pub fn test_lexer_snapshots() -> Vec { } fn run_test_case(case: crate::TestCase) { - let mut symbols = test_runner::scan_str(&case.input); + let mut symbols = unimarkup_commons::scanner::scan_str(&case.input); symbols.pop(); // Remove EOI symbol. TODO: handle EOI in lexer let runner = SnapTestRunner::with_fn(&case.name, &symbols, |symbols| { Snapshot::snap((case.input.as_ref(), symbols.tokens())) diff --git a/inline/tests/parser/mod.rs b/inline/tests/parser/mod.rs index a9c01f46..80d8c1f9 100644 --- a/inline/tests/parser/mod.rs +++ b/inline/tests/parser/mod.rs @@ -2,7 +2,7 @@ use std::panic; use crate::snapshot::Snapshot; use libtest_mimic::Trial; -use unimarkup_commons::test_runner::{self, snap_test_runner::SnapTestRunner}; +use unimarkup_commons::test_runner::snap_test_runner::SnapTestRunner; use unimarkup_inline::ParseInlines; mod snapshot; @@ -31,7 +31,7 @@ pub fn test_parser_snapshots() -> Vec { } fn run_test_case(case: crate::TestCase) { - let symbols = test_runner::scan_str(&case.input); + let symbols = unimarkup_commons::scanner::scan_str(&case.input); let runner = SnapTestRunner::with_fn(&case.name, &symbols, |symbols| { let inlines: Vec<_> = symbols.parse_inlines().collect(); diff --git a/parser/src/parser.rs b/parser/src/parser.rs index 8c78e917..bff92bb8 100644 --- a/parser/src/parser.rs +++ b/parser/src/parser.rs @@ -1,7 +1,7 @@ //! Module for parsing of Unimarkup elements. use logid::log; -use unimarkup_commons::scanner::{Scanner, SymbolIterator, SymbolKind}; +use unimarkup_commons::scanner::{SymbolIterator, SymbolKind}; use crate::{ document::Document, @@ -160,7 +160,7 @@ impl MainParser { pub fn parse_unimarkup(um_content: &str, config: &mut Config) -> Document { let parser = MainParser::default(); - let symbols = Scanner::new().scan_str(um_content); + let symbols = unimarkup_commons::scanner::scan_str(um_content); let mut symbols_iter = SymbolIterator::from(&symbols); let blocks = parser.parse(&mut symbols_iter); From 02c4505179fe9d4e5674e9057483d268d72b47ce Mon Sep 17 00:00:00 2001 From: Manuel Hatzl Date: Fri, 29 Sep 2023 18:04:21 +0200 Subject: [PATCH 39/43] fix: restrict visibility of iterator index fns --- commons/src/scanner/symbol/iterator/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/commons/src/scanner/symbol/iterator/mod.rs b/commons/src/scanner/symbol/iterator/mod.rs index 4cb75389..4cca9912 100644 --- a/commons/src/scanner/symbol/iterator/mod.rs +++ b/commons/src/scanner/symbol/iterator/mod.rs @@ -109,7 +109,7 @@ impl<'input> SymbolIterator<'input> { } /// Sets the current index of this iterator to the given index. - pub fn set_curr_index(&mut self, index: usize) { + pub(super) fn set_curr_index(&mut self, index: usize) { if index >= self.start_index { match self.kind.borrow_mut() { SymbolIteratorKind::Nested(parent) => parent.set_curr_index(index), @@ -130,7 +130,7 @@ impl<'input> SymbolIterator<'input> { } /// Sets the peek index of this iterator to the given index. - pub fn set_peek_index(&mut self, index: usize) { + fn set_peek_index(&mut self, index: usize) { if index >= self.curr_index() { match self.kind.borrow_mut() { SymbolIteratorKind::Nested(parent) => parent.set_peek_index(index), From 0d5c8ab50e79db511a535a2ed011e720789862cb Mon Sep 17 00:00:00 2001 From: Manuel Hatzl Date: Fri, 29 Sep 2023 18:38:50 +0200 Subject: [PATCH 40/43] fix: remove duplicate From<> impls for iterators --- commons/src/scanner/symbol/iterator/mod.rs | 26 +++++++-------------- commons/src/scanner/symbol/iterator/root.rs | 19 +++++---------- parser/src/parser.rs | 2 +- 3 files changed, 15 insertions(+), 32 deletions(-) diff --git a/commons/src/scanner/symbol/iterator/mod.rs b/commons/src/scanner/symbol/iterator/mod.rs index 4cca9912..62870033 100644 --- a/commons/src/scanner/symbol/iterator/mod.rs +++ b/commons/src/scanner/symbol/iterator/mod.rs @@ -235,21 +235,11 @@ impl<'input> SymbolIterator<'input> { } } -impl<'input> From<&'input [Symbol<'input>]> for SymbolIterator<'input> { - fn from(value: &'input [Symbol<'input>]) -> Self { - SymbolIterator { - kind: SymbolIteratorKind::Root(SymbolIteratorRoot::from(value)), - start_index: 0, - depth: 0, - prefix_match: None, - end_match: None, - iter_end: false, - } - } -} - -impl<'input> From<&'input Vec>> for SymbolIterator<'input> { - fn from(value: &'input Vec>) -> Self { +impl<'input, T> From for SymbolIterator<'input> +where + T: Into<&'input [Symbol<'input>]>, +{ + fn from(value: T) -> Self { SymbolIterator { kind: SymbolIteratorKind::Root(SymbolIteratorRoot::from(value)), start_index: 0, @@ -332,7 +322,7 @@ mod test { fn peek_while_index() { let symbols = crate::scanner::scan_str("## "); - let mut iterator = SymbolIterator::from(&symbols); + let mut iterator = SymbolIterator::from(&*symbols); let hash_cnt = iterator .peeking_take_while(|symbol| symbol.kind == SymbolKind::Hash) .count(); @@ -357,7 +347,7 @@ mod test { fn peek_next() { let symbols = crate::scanner::scan_str("#*"); - let mut iterator = SymbolIterator::from(&symbols); + let mut iterator = SymbolIterator::from(&*symbols); let peeked_symbol = iterator.peeking_next(|_| true); let next_symbol = iterator.next(); @@ -391,7 +381,7 @@ mod test { fn reach_end() { let symbols = crate::scanner::scan_str("text*"); - let mut iterator = SymbolIterator::from(&symbols).nest( + let mut iterator = SymbolIterator::from(&*symbols).nest( None, Some(Rc::new(|matcher| matcher.matches(&[SymbolKind::Star]))), ); diff --git a/commons/src/scanner/symbol/iterator/root.rs b/commons/src/scanner/symbol/iterator/root.rs index d8da41dd..3bb8a35e 100644 --- a/commons/src/scanner/symbol/iterator/root.rs +++ b/commons/src/scanner/symbol/iterator/root.rs @@ -23,20 +23,13 @@ impl<'input> SymbolIteratorRoot<'input> { } } -impl<'input> From<&'input [Symbol<'input>]> for SymbolIteratorRoot<'input> { - fn from(value: &'input [Symbol<'input>]) -> Self { +impl<'input, T> From for SymbolIteratorRoot<'input> +where + T: Into<&'input [Symbol<'input>]>, +{ + fn from(value: T) -> Self { SymbolIteratorRoot { - symbols: value, - curr_index: 0, - peek_index: 0, - } - } -} - -impl<'input> From<&'input Vec>> for SymbolIteratorRoot<'input> { - fn from(value: &'input Vec>) -> Self { - SymbolIteratorRoot { - symbols: value, + symbols: value.into(), curr_index: 0, peek_index: 0, } diff --git a/parser/src/parser.rs b/parser/src/parser.rs index bff92bb8..c3e273f1 100644 --- a/parser/src/parser.rs +++ b/parser/src/parser.rs @@ -161,7 +161,7 @@ pub fn parse_unimarkup(um_content: &str, config: &mut Config) -> Document { let parser = MainParser::default(); let symbols = unimarkup_commons::scanner::scan_str(um_content); - let mut symbols_iter = SymbolIterator::from(&symbols); + let mut symbols_iter = SymbolIterator::from(&*symbols); let blocks = parser.parse(&mut symbols_iter); let mut unimarkup = Document { From d48907605e6709b889a61d73500381a5f4590610 Mon Sep 17 00:00:00 2001 From: Manuel Hatzl Date: Fri, 29 Sep 2023 18:42:37 +0200 Subject: [PATCH 41/43] fix: remove *curr* prefix for iterator functions --- .../src/scanner/symbol/iterator/matcher.rs | 6 +++--- commons/src/scanner/symbol/iterator/mod.rs | 20 +++++++++---------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/commons/src/scanner/symbol/iterator/matcher.rs b/commons/src/scanner/symbol/iterator/matcher.rs index dc063205..be59190b 100644 --- a/commons/src/scanner/symbol/iterator/matcher.rs +++ b/commons/src/scanner/symbol/iterator/matcher.rs @@ -104,7 +104,7 @@ impl<'input> EndMatcher for SymbolIterator<'input> { let is_empty_line = self.is_empty_line(); if is_empty_line { - self.set_curr_index(self.peek_index()); // To consume peeked symbols + self.set_index(self.peek_index()); // To consume peeked symbols } is_empty_line @@ -127,14 +127,14 @@ impl<'input> EndMatcher for SymbolIterator<'input> { let matched = self.matches(sequence); if matched { - self.set_curr_index(self.peek_index()); // To consume peeked symbols + self.set_index(self.peek_index()); // To consume peeked symbols } matched } fn at_depth(&self, depth: usize) -> bool { - self.curr_depth() == depth + self.depth() == depth } } diff --git a/commons/src/scanner/symbol/iterator/mod.rs b/commons/src/scanner/symbol/iterator/mod.rs index 62870033..a87d2e7b 100644 --- a/commons/src/scanner/symbol/iterator/mod.rs +++ b/commons/src/scanner/symbol/iterator/mod.rs @@ -96,23 +96,23 @@ impl<'input> SymbolIterator<'input> { /// The current nested depth this iterator is at. /// The root iterator starts at 0, and every iterator created using [`Self::nest()`] is one depth higher than its parent. - pub fn curr_depth(&self) -> usize { + pub fn depth(&self) -> usize { self.depth } /// Returns the current index this iterator is in the [`Symbol`] slice of the root iterator. - pub fn curr_index(&self) -> usize { + pub fn index(&self) -> usize { match &self.kind { - SymbolIteratorKind::Nested(parent) => parent.curr_index(), + SymbolIteratorKind::Nested(parent) => parent.index(), SymbolIteratorKind::Root(root) => root.curr_index, } } /// Sets the current index of this iterator to the given index. - pub(super) fn set_curr_index(&mut self, index: usize) { + pub(super) fn set_index(&mut self, index: usize) { if index >= self.start_index { match self.kind.borrow_mut() { - SymbolIteratorKind::Nested(parent) => parent.set_curr_index(index), + SymbolIteratorKind::Nested(parent) => parent.set_index(index), SymbolIteratorKind::Root(root) => { root.curr_index = index; root.peek_index = index; @@ -131,7 +131,7 @@ impl<'input> SymbolIterator<'input> { /// Sets the peek index of this iterator to the given index. fn set_peek_index(&mut self, index: usize) { - if index >= self.curr_index() { + if index >= self.index() { match self.kind.borrow_mut() { SymbolIteratorKind::Nested(parent) => parent.set_peek_index(index), SymbolIteratorKind::Root(root) => { @@ -145,7 +145,7 @@ impl<'input> SymbolIterator<'input> { /// /// **Note:** Needed to reset peek index after using `peeking_next()`. pub fn reset_peek(&mut self) { - self.set_peek_index(self.curr_index()); + self.set_peek_index(self.index()); } /// Returns the maximal remaining symbols in this iterator. @@ -188,7 +188,7 @@ impl<'input> SymbolIterator<'input> { ) -> SymbolIterator<'input> { SymbolIterator { kind: SymbolIteratorKind::Nested(Box::new(self.clone())), - start_index: self.curr_index(), + start_index: self.index(), depth: self.depth + 1, prefix_match, end_match, @@ -328,7 +328,7 @@ mod test { .count(); let next_symbol = iterator.nth(hash_cnt); - let curr_index = iterator.curr_index(); + let curr_index = iterator.index(); assert_eq!(hash_cnt, 2, "Hash symbols in input not correctly detected."); assert_eq!(curr_index, 3, "Current index was not updated correctly."); @@ -352,7 +352,7 @@ mod test { let peeked_symbol = iterator.peeking_next(|_| true); let next_symbol = iterator.next(); let next_peeked_symbol = iterator.peeking_next(|_| true); - let curr_index = iterator.curr_index(); + let curr_index = iterator.index(); assert_eq!(curr_index, 1, "Current index was not updated correctly."); assert_eq!( From 01a148ba2a44e54469829bef02465891d6dc775e Mon Sep 17 00:00:00 2001 From: Manuel Hatzl Date: Fri, 29 Sep 2023 18:45:01 +0200 Subject: [PATCH 42/43] fix: remove *curr* prefix from index in root iterator --- commons/src/scanner/symbol/iterator/mod.rs | 4 ++-- commons/src/scanner/symbol/iterator/root.rs | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/commons/src/scanner/symbol/iterator/mod.rs b/commons/src/scanner/symbol/iterator/mod.rs index a87d2e7b..930ee971 100644 --- a/commons/src/scanner/symbol/iterator/mod.rs +++ b/commons/src/scanner/symbol/iterator/mod.rs @@ -104,7 +104,7 @@ impl<'input> SymbolIterator<'input> { pub fn index(&self) -> usize { match &self.kind { SymbolIteratorKind::Nested(parent) => parent.index(), - SymbolIteratorKind::Root(root) => root.curr_index, + SymbolIteratorKind::Root(root) => root.index, } } @@ -114,7 +114,7 @@ impl<'input> SymbolIterator<'input> { match self.kind.borrow_mut() { SymbolIteratorKind::Nested(parent) => parent.set_index(index), SymbolIteratorKind::Root(root) => { - root.curr_index = index; + root.index = index; root.peek_index = index; } } diff --git a/commons/src/scanner/symbol/iterator/root.rs b/commons/src/scanner/symbol/iterator/root.rs index 3bb8a35e..a2082fe6 100644 --- a/commons/src/scanner/symbol/iterator/root.rs +++ b/commons/src/scanner/symbol/iterator/root.rs @@ -11,7 +11,7 @@ pub struct SymbolIteratorRoot<'input> { /// The [`Symbol`] slice the iterator was created for. symbols: &'input [Symbol<'input>], /// The current index of the iterator inside the [`Symbol`] slice. - pub(super) curr_index: usize, + pub(super) index: usize, /// The peek index of the iterator inside the [`Symbol`] slice. pub(super) peek_index: usize, } @@ -19,7 +19,7 @@ pub struct SymbolIteratorRoot<'input> { impl<'input> SymbolIteratorRoot<'input> { /// Returns the remaining symbols in this iterator, or `None` if there are no symbols left. pub(super) fn remaining_symbols(&self) -> Option<&'input [Symbol<'input>]> { - self.symbols.get(self.curr_index..) + self.symbols.get(self.index..) } } @@ -30,7 +30,7 @@ where fn from(value: T) -> Self { SymbolIteratorRoot { symbols: value.into(), - curr_index: 0, + index: 0, peek_index: 0, } } @@ -40,10 +40,10 @@ impl<'input> Iterator for SymbolIteratorRoot<'input> { type Item = &'input Symbol<'input>; fn next(&mut self) -> Option { - let symbol = self.symbols.get(self.curr_index)?; + let symbol = self.symbols.get(self.index)?; - self.curr_index += 1; - self.peek_index = self.curr_index; + self.index += 1; + self.peek_index = self.index; Some(symbol) } From d710917f18b48b00cd69b163e2d4cb5ddd1a80f9 Mon Sep 17 00:00:00 2001 From: Manuel Hatzl Date: Fri, 29 Sep 2023 19:01:38 +0200 Subject: [PATCH 43/43] fix: add assert to ensure update done on act parent Assert only in debug mode. --- commons/src/scanner/symbol/iterator/mod.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/commons/src/scanner/symbol/iterator/mod.rs b/commons/src/scanner/symbol/iterator/mod.rs index 930ee971..a175caba 100644 --- a/commons/src/scanner/symbol/iterator/mod.rs +++ b/commons/src/scanner/symbol/iterator/mod.rs @@ -201,6 +201,13 @@ impl<'input> SymbolIterator<'input> { /// **Note:** Only updates the parent if `self` is nested. pub fn update(self, parent: &mut Self) { if let SymbolIteratorKind::Nested(self_parent) = self.kind { + // Make sure it actually is the parent. + // It is not possible to check more precisely, because other indices are expected to be different due to `clone()`. + debug_assert_eq!( + self_parent.start_index, parent.start_index, + "Updated iterator is not the actual parent of this iterator." + ); + *parent = *self_parent; } }