From 0d79d0b085f92ddd5702556c766cd75e33f19e22 Mon Sep 17 00:00:00 2001 From: baoyachi Date: Sun, 31 Dec 2023 00:27:16 +0800 Subject: [PATCH] feat(token): `tag` combinator support `char` type Fixes #407 --- src/stream/mod.rs | 60 ++++++++++++++++++++++++++++++++++ src/stream/tests.rs | 80 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 140 insertions(+) diff --git a/src/stream/mod.rs b/src/stream/mod.rs index 39a1e0d4..d5d11aa3 100644 --- a/src/stream/mod.rs +++ b/src/stream/mod.rs @@ -374,6 +374,13 @@ impl<'a> SliceLen for &'a str { } } +impl SliceLen for char { + #[inline] + fn slice_len(&self) -> usize { + self.len_utf8() + } +} + impl<'a> SliceLen for &'a Bytes { #[inline] fn slice_len(&self) -> usize { @@ -1717,6 +1724,32 @@ impl<'a, 'b> Compare> for &'a [u8] { } } +impl<'a> Compare for &'a [u8] { + #[inline(always)] + fn compare(&self, t: char) -> CompareResult { + self.compare(t.encode_utf8(&mut [0; 4]).as_bytes()) + } + + #[inline(always)] + #[allow(deprecated)] + fn compare_no_case(&self, t: char) -> CompareResult { + self.compare_no_case(t.encode_utf8(&mut [0; 4]).as_bytes()) + } +} + +impl<'a> Compare> for &'a [u8] { + #[inline] + fn compare(&self, t: AsciiCaseless) -> CompareResult { + self.compare(AsciiCaseless(t.0.encode_utf8(&mut [0; 4]).as_bytes())) + } + + #[inline(always)] + #[allow(deprecated)] + fn compare_no_case(&self, t: AsciiCaseless) -> CompareResult { + self.compare_no_case(AsciiCaseless(t.0.encode_utf8(&mut [0; 4]).as_bytes())) + } +} + impl<'a, 'b> Compare<&'b str> for &'a str { #[inline(always)] fn compare(&self, t: &'b str) -> CompareResult { @@ -1743,6 +1776,32 @@ impl<'a, 'b> Compare> for &'a str { } } +impl<'a> Compare for &'a str { + #[inline(always)] + fn compare(&self, t: char) -> CompareResult { + self.compare(t.encode_utf8(&mut [0; 4]).as_bytes()) + } + + #[inline(always)] + #[allow(deprecated)] + fn compare_no_case(&self, t: char) -> CompareResult { + self.compare_no_case(t.encode_utf8(&mut [0; 4]).as_bytes()) + } +} + +impl<'a> Compare> for &'a str { + #[inline] + fn compare(&self, t: AsciiCaseless) -> CompareResult { + self.compare(AsciiCaseless(t.0.encode_utf8(&mut [0; 4]).as_bytes())) + } + + #[inline(always)] + #[allow(deprecated)] + fn compare_no_case(&self, t: AsciiCaseless) -> CompareResult { + self.compare_no_case(AsciiCaseless(t.0.encode_utf8(&mut [0; 4]).as_bytes())) + } +} + impl<'a, 'b> Compare<&'b [u8]> for &'a str { #[inline(always)] fn compare(&self, t: &'b [u8]) -> CompareResult { @@ -2620,6 +2679,7 @@ impl AsChar for u8 { self == b'\n' } } + impl<'a> AsChar for &'a u8 { #[inline(always)] fn as_char(self) -> char { diff --git a/src/stream/tests.rs b/src/stream/tests.rs index 0129f6e3..06047dfd 100644 --- a/src/stream/tests.rs +++ b/src/stream/tests.rs @@ -1,6 +1,9 @@ #[cfg(feature = "std")] use proptest::prelude::*; +use crate::error::ErrMode::Backtrack; +use crate::error::{ErrorKind, InputError}; +use crate::token::tag; use crate::{ combinator::{separated, separated_pair}, PResult, Parser, @@ -146,3 +149,80 @@ fn test_custom_slice() { let offset = input.offset_from(&start); assert_eq!(offset, 2); } + +#[test] +fn test_tag_support_char() { + assert_eq!( + tag::<_, _, InputError<_>>('π').parse_peek("π"), + Ok(("", "π")) + ); + assert_eq!( + tag::<_, _, InputError<_>>('π').parse_peek("π3.14"), + Ok(("3.14", "π")) + ); + + assert_eq!( + tag::<_, _, InputError<_>>("π").parse_peek("π3.14"), + Ok(("3.14", "π")) + ); + + assert_eq!( + tag::<_, _, InputError<_>>('-').parse_peek("π"), + Err(Backtrack(InputError::new("π", ErrorKind::Tag))) + ); + + assert_eq!( + tag::<_, Partial<&[u8]>, InputError<_>>('π').parse_peek(Partial::new(b"\xCF\x80")), + Ok((Partial::new(Default::default()), "π".as_bytes())) + ); + assert_eq!( + tag::<_, &[u8], InputError<_>>('π').parse_peek(b"\xCF\x80"), + Ok((Default::default(), "π".as_bytes())) + ); + + assert_eq!( + tag::<_, Partial<&[u8]>, InputError<_>>('π').parse_peek(Partial::new(b"\xCF\x803.14")), + Ok((Partial::new(&b"3.14"[..]), "π".as_bytes())) + ); + assert_eq!( + tag::<_, &[u8], InputError<_>>('π').parse_peek(b"\xCF\x80"), + Ok((Default::default(), "π".as_bytes())) + ); + + assert_eq!( + tag::<_, &[u8], InputError<_>>('π').parse_peek(b"\xCF\x803.14"), + Ok((&b"3.14"[..], "π".as_bytes())) + ); + + assert_eq!( + tag::<_, &[u8], InputError<_>>(AsciiCaseless('a')).parse_peek(b"ABCxyz"), + Ok((&b"BCxyz"[..], &b"A"[..])) + ); + + assert_eq!( + tag::<_, &[u8], InputError<_>>('a').parse_peek(b"ABCxyz"), + Err(Backtrack(InputError::new(&b"ABCxyz"[..], ErrorKind::Tag))) + ); + + assert_eq!( + tag::<_, &[u8], InputError<_>>(AsciiCaseless('π')).parse_peek(b"\xCF\x803.14"), + Ok((&b"3.14"[..], "π".as_bytes())) + ); + + assert_eq!( + tag::<_, _, InputError<_>>(AsciiCaseless('🧑')).parse_peek("🧑你好"), + Ok(("你好", "🧑")) + ); + + let mut buffer = [0; 4]; + let input = '\u{241b}'.encode_utf8(&mut buffer); + assert_eq!( + tag::<_, &[u8], InputError<_>>(AsciiCaseless('␛')).parse_peek(input.as_bytes()), + Ok((&b""[..], [226, 144, 155].as_slice())) + ); + + assert_eq!( + tag::<_, &[u8], InputError<_>>('-').parse_peek(b"\xCF\x80"), + Err(Backtrack(InputError::new(&b"\xCF\x80"[..], ErrorKind::Tag))) + ); +}