Skip to content

Commit

Permalink
Add ParserIterator
Browse files Browse the repository at this point in the history
  • Loading branch information
ictrobot committed Nov 2, 2024
1 parent 85e0933 commit 52a4c5a
Show file tree
Hide file tree
Showing 10 changed files with 161 additions and 23 deletions.
67 changes: 59 additions & 8 deletions crates/utils/src/parser/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ use crate::input::{InputError, MapWithInputExt};
use crate::parser::combinator::{
Map, MapResult, Optional, Or, RepeatArrayVec, RepeatN, RepeatVec, WithPrefix, WithSuffix,
};
use crate::parser::error::WithErrorMsg;
use crate::parser::error::{ParseError, WithErrorMsg};
use crate::parser::iterator::ParserIterator;
use crate::parser::simple::{Constant, Eol};
use crate::parser::then::{Then2, Unimplemented};
use crate::parser::ParseError;

/// [`Result`] type returned by [`Parser::parse`].
pub type ParseResult<'i, T> = Result<(T, &'i [u8]), (ParseError, &'i [u8])>;
Expand Down Expand Up @@ -164,7 +164,7 @@ pub trait Parser: Sized {
/// # use utils::parser::{self, Parser};
/// assert_eq!(
/// parser::u32()
/// .with_suffix(",".optional())
/// .with_suffix(",".or(parser::eof()))
/// .repeat_n() // N = 3 is inferred
/// .parse(b"12,34,56"),
/// Ok(([12, 34, 56], &b""[..]))
Expand Down Expand Up @@ -319,7 +319,7 @@ pub trait Parser: Sized {
/// assert_eq!(
/// parser::u32()
/// .then(parser::u32().with_prefix("x"))
/// .with_suffix(",".optional())
/// .with_suffix(",".or(parser::eof()))
/// .parse_all("1x2,3x4,1234x5678")
/// .unwrap(),
/// vec![
Expand Down Expand Up @@ -360,6 +360,56 @@ pub trait Parser: Sized {
.repeat(Constant(()), 0)
.parse_complete(input)
}

/// Create an iterator which applies this parser repeatedly until the provided input is fully
/// consumed.
///
/// The returned iterator will lazily parse the provided input string, producing a sequence of
/// [`Result`] values. Once the end of input is reached, or an error is returned, the parser
/// will always return [`None`].
///
/// # Examples
/// ```
/// # use utils::input::InputError;
/// # use utils::parser::{self, Parser};
/// let iterator = parser::u32()
/// .with_suffix(parser::eol())
/// .parse_iterator("12\n34\n56\n78");
/// for item in iterator {
/// println!("{}", item?);
/// }
/// # Ok::<(), InputError>(())
/// ```
///
/// ```
/// # use utils::parser::{self, Parser};
/// let mut iterator = parser::u32()
/// .with_suffix(parser::eol())
/// .parse_iterator("12\n34\nnot a integer");
/// assert_eq!(iterator.next().unwrap().unwrap(), 12);
/// assert_eq!(iterator.next().unwrap().unwrap(), 34);
/// assert!(iterator.next().unwrap().is_err());
/// assert!(iterator.next().is_none());
/// ```
///
/// ```
/// # use utils::input::InputError;
/// # use utils::parser::{self, Parser};
/// let filtered = parser::u32()
/// .with_suffix(parser::eol())
/// .parse_iterator("11\n22\n33\n44\n55")
/// .filter(|r| r.is_err() || r.as_ref().is_ok_and(|v| v % 2 == 0))
/// .collect::<Result<Vec<u32>, InputError>>()?;
/// assert_eq!(filtered, vec![22, 44]);
/// # Ok::<(), InputError>(())
/// ```
fn parse_iterator(self, input: &str) -> ParserIterator<Self> {
ParserIterator {
input,
remaining: input.as_bytes(),
parser: self,
}
}
}

// Workaround to allow using methods which consume a parser in methods which take references.
Expand All @@ -382,15 +432,15 @@ impl<'a, P: Parser> Parser for ParserRef<'a, P> {
///
/// Normally used with [`with_prefix`](Parser::with_prefix)/[`with_suffix`](Parser::with_suffix).
impl Parser for &'static str {
type Output<'i> = Self;
type Output<'i> = ();
type Then<T: Parser> = Then2<Self, T>;

#[inline]
fn parse<'i>(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output<'i>> {
// This is faster than using strip_prefix for the common case where the string is a short
// string literal known at compile time.
if input.len() >= self.len() && self.bytes().zip(input).all(|(a, &b)| a == b) {
Ok((self, &input[self.len()..]))
Ok(((), &input[self.len()..]))
} else {
Err((ParseError::ExpectedLiteral(self), input))
}
Expand All @@ -405,13 +455,13 @@ impl Parser for &'static str {
///
/// Normally used with [`with_prefix`](Parser::with_prefix)/[`with_suffix`](Parser::with_suffix).
impl Parser for u8 {
type Output<'i> = Self;
type Output<'i> = ();
type Then<T: Parser> = Then2<Self, T>;

#[inline]
fn parse<'i>(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output<'i>> {
if input.first() == Some(self) {
Ok((*self, &input[1..]))
Ok(((), &input[1..]))
} else {
Err((ParseError::ExpectedByte(*self), input))
}
Expand All @@ -427,6 +477,7 @@ impl<O, F: Fn(&[u8]) -> ParseResult<O>> Parser for F {
type Output<'i> = O;
type Then<T: Parser> = Then2<Self, T>;

#[inline]
fn parse<'i>(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output<'i>> {
self(input)
}
Expand Down
38 changes: 38 additions & 0 deletions crates/utils/src/parser/iterator.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
use crate::input::InputError;
use crate::parser::Parser;
use std::iter::FusedIterator;

/// An iterator that lazily parses the input using the provided parser.
///
/// See [`Parser::parse_iterator`].
#[derive(Copy, Clone)]
#[must_use = "iterators are lazy and do nothing unless consumed"]
pub struct ParserIterator<'a, P> {
pub(super) input: &'a str,
pub(super) remaining: &'a [u8],
pub(super) parser: P,
}

impl<'a, P: Parser> Iterator for ParserIterator<'a, P> {
type Item = Result<P::Output<'a>, InputError>;

#[inline]
fn next(&mut self) -> Option<Self::Item> {
if self.remaining.is_empty() {
return None;
}

match self.parser.parse(self.remaining) {
Ok((v, remaining)) => {
self.remaining = remaining;
Some(Ok(v))
}
Err((err, remaining)) => {
self.remaining = &[]; // Ensure future calls return None
Some(Err(InputError::new(self.input, remaining, err)))
}
}
}
}

impl<'a, P: Parser> FusedIterator for ParserIterator<'a, P> {}
4 changes: 3 additions & 1 deletion crates/utils/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
mod base;
mod combinator;
mod error;
mod iterator;
mod macros;
mod number;
mod one_of;
Expand All @@ -11,8 +12,9 @@ mod then;

pub use base::*;
pub use error::ParseError;
pub use iterator::ParserIterator;
pub use number::{i128, i16, i32, i64, i8, number_range, u128, u16, u32, u64, u8};
pub use one_of::one_of;
pub use simple::{byte, byte_range, constant, eol, noop, take_while, take_while1};
pub use simple::{byte, byte_range, constant, eof, eol, noop, take_while, take_while1};

pub use crate::parser_literal_map as literal_map;
46 changes: 45 additions & 1 deletion crates/utils/src/parser/simple.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::parser::then::Then2;
use crate::parser::then::{Then2, Unimplemented};
use crate::parser::{ParseError, ParseResult, Parser};
use std::ops::RangeInclusive;

Expand Down Expand Up @@ -139,6 +139,50 @@ pub fn noop() -> Constant<()> {
Constant(())
}

#[derive(Copy, Clone)]
pub struct Eof();
impl Parser for Eof {
type Output<'i> = ();
type Then<T: Parser> = Unimplemented;

#[inline]
fn parse<'i>(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output<'i>> {
match input {
[] => Ok(((), input)),
_ => Err((ParseError::Expected("end of input"), input)),
}
}

fn then<T: Parser>(self, _next: T) -> Self::Then<T> {
panic!("chaining after eof will never match");
}
}

/// Parser which matches the end of the input.
///
/// Useful when parsing a list and each item is separated by a separator, unless it is at the end of
/// the input.
///
/// # Examples
/// ```
/// # use utils::parser::{self, Parser};
/// assert_eq!(
/// parser::eof().parse(b""),
/// Ok(((), &b""[..]))
/// );
/// assert_eq!(
/// parser::u32()
/// .with_suffix(b','.or(parser::eof()))
/// .repeat_n()
/// .parse(b"12,34,56"),
/// Ok(([12, 34, 56], &b""[..]))
/// );
/// ```
#[must_use]
pub fn eof() -> Eof {
Eof()
}

#[derive(Copy, Clone)]
pub struct Eol();
impl Parser for Eol {
Expand Down
2 changes: 1 addition & 1 deletion crates/year2016/src/day01.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ impl Day01 {
Ok(Self {
instructions: parser::literal_map!("L" => Turn::L, "R" => Turn::R)
.then(parser::u16())
.with_suffix(", ".optional())
.with_suffix(", ".or(parser::eof()))
.parse_all(input)?,
})
}
Expand Down
9 changes: 5 additions & 4 deletions crates/year2016/src/day08.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,12 @@ impl Day08 {
.then(parser::u32())
.map(|(x, by)| Instruction::RotateCol { x, by });

let instructions = rect.or(rotate_row).or(rotate_col).parse_lines(input)?;

let mut grid = [[false; 50]; 6];
for &instruction in &instructions {
match instruction {
for item in parser::one_of((rect, rotate_row, rotate_col))
.with_suffix(parser::eol())
.parse_iterator(input)
{
match item? {
Instruction::Rect { width, height } => {
for row in &mut grid[..height as usize] {
row[..width as usize].fill(true);
Expand Down
2 changes: 1 addition & 1 deletion crates/year2017/src/day06.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ pub struct Day06 {
impl Day06 {
pub fn new(input: &str, _: InputType) -> Result<Self, InputError> {
let banks = parser::u32()
.with_suffix(b' '.or(b'\t').optional())
.with_suffix(parser::one_of((b' ', b'\t', parser::eof())))
.parse_all(input)?;

let (mut power, mut lambda) = (1, 1);
Expand Down
8 changes: 5 additions & 3 deletions crates/year2017/src/day08.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ pub struct Day08 {

impl Day08 {
pub fn new(input: &str, _: InputType) -> Result<Self, InputError> {
let parsed = parser::take_while1(u8::is_ascii_lowercase)
let parse_iterator = parser::take_while1(u8::is_ascii_lowercase)
.with_suffix(" ")
.then(
parser::one_of((
Expand All @@ -32,11 +32,13 @@ impl Day08 {
.with_suffix(" "),
)
.then(parser::i32())
.parse_lines(input)?;
.with_suffix(parser::eol())
.parse_iterator(input);

let mut registers = HashMap::new();
let mut max = 0;
for (reg, value, cond_reg, comparison, cond_value) in parsed {
for item in parse_iterator {
let (reg, value, cond_reg, comparison, cond_value) = item?;
if comparison(registers.entry(cond_reg).or_insert(0), &cond_value) {
let entry = registers.entry(reg).or_insert(0);
*entry += value;
Expand Down
2 changes: 1 addition & 1 deletion crates/year2017/src/day10.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ impl<'a> Day10<'a> {
#[must_use]
pub fn part1(&self) -> u32 {
let lengths = parser::u8()
.with_suffix(b','.optional())
.with_suffix(b','.or(parser::eof()))
.parse_all(self.input)
.expect("input invalid for part 1");

Expand Down
6 changes: 3 additions & 3 deletions crates/year2017/src/day11.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,13 @@ impl Day11 {
"n" => Point2D::new(0, -1),
"s" => Point2D::new(0, 1),
)
.repeat(b',', 1)
.parse_complete(input)?;
.with_suffix(b','.or(parser::eof()))
.parse_iterator(input);

let mut pos = Point2D::new(0, 0);
let mut max = 0;
for step in steps {
pos += step;
pos += step?;
max = max.max(Self::hex_dist_to_origin(pos));
}

Expand Down

0 comments on commit 52a4c5a

Please sign in to comment.