From 02710dc4126abcd9929f80c11417033afd6b646a Mon Sep 17 00:00:00 2001 From: rzvxa Date: Wed, 28 Aug 2024 18:56:33 +0330 Subject: [PATCH] feat(regular_expression): implement `Display` for `RegularExpression` type. --- .../src/body_parser/mod.rs | 3 +- crates/oxc_regular_expression/src/display.rs | 374 ++++++++++++++++++ crates/oxc_regular_expression/src/lib.rs | 1 + 3 files changed, 377 insertions(+), 1 deletion(-) create mode 100644 crates/oxc_regular_expression/src/display.rs diff --git a/crates/oxc_regular_expression/src/body_parser/mod.rs b/crates/oxc_regular_expression/src/body_parser/mod.rs index be0e18bd65dcf8..0e1135778cbf05 100644 --- a/crates/oxc_regular_expression/src/body_parser/mod.rs +++ b/crates/oxc_regular_expression/src/body_parser/mod.rs @@ -2,9 +2,10 @@ mod diagnostics; mod parser; mod reader; mod state; -mod unicode; mod unicode_property; +pub(crate) mod unicode; + pub use parser::PatternParser; #[cfg(test)] diff --git a/crates/oxc_regular_expression/src/display.rs b/crates/oxc_regular_expression/src/display.rs new file mode 100644 index 00000000000000..014877837853df --- /dev/null +++ b/crates/oxc_regular_expression/src/display.rs @@ -0,0 +1,374 @@ +use std::fmt::{self, Display}; + +#[allow(clippy::wildcard_imports)] +use crate::ast::*; +use crate::body_parser::unicode::{is_lead_surrogate, is_syntax_character, is_trail_surrogate}; + +impl<'a> Display for RegularExpression<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "/{}/{}", self.pattern, self.flags) + } +} + +impl Display for Flags { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut flags = String::with_capacity(8); + macro_rules! if_true_append { + ($flag:ident, $char:literal) => { + if self.$flag { + flags.push($char); + } + }; + } + + // write flags in the order they are described in the `MDN` + // + if_true_append!(has_indices, 'd'); + if_true_append!(global, 'g'); + if_true_append!(ignore_case, 'i'); + if_true_append!(multiline, 'm'); + if_true_append!(dot_all, 's'); + if_true_append!(unicode, 'u'); + if_true_append!(unicode_sets, 'v'); + if_true_append!(sticky, 'y'); + + write!(f, "{flags}") + } +} + +impl<'a> Display for Pattern<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.body) + } +} + +impl<'a> Display for Disjunction<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write_join(f, "|", &self.body) + } +} + +impl<'a> Display for Alternative<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write_join(f, "", &self.body) + } +} + +impl<'a> Display for Term<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::BoundaryAssertion(it) => write!(f, "{it}"), + Self::LookAroundAssertion(it) => write!(f, "{}", it.as_ref()), + Self::Quantifier(it) => write!(f, "{}", it.as_ref()), + Self::Character(it) => write!(f, "{it}"), + Self::Dot(it) => write!(f, "{it}"), + Self::CharacterClassEscape(it) => write!(f, "{it}"), + Self::UnicodePropertyEscape(it) => write!(f, "{}", it.as_ref()), + Self::CharacterClass(it) => write!(f, "{}", it.as_ref()), + Self::CapturingGroup(it) => write!(f, "{}", it.as_ref()), + Self::IgnoreGroup(it) => write!(f, "{}", it.as_ref()), + Self::IndexedReference(it) => write!(f, "{it}"), + Self::NamedReference(it) => write!(f, "{}", it.as_ref()), + } + } +} + +impl Display for BoundaryAssertion { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.kind) + } +} + +impl Display for BoundaryAssertionKind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Start => write!(f, "^"), + Self::End => write!(f, "$"), + Self::Boundary => write!(f, r"\b"), + Self::NegativeBoundary => write!(f, r"\B"), + } + } +} + +impl<'a> Display for LookAroundAssertion<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "({}{})", self.kind, self.body) + } +} + +impl Display for LookAroundAssertionKind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Lookahead => write!(f, "?="), + Self::NegativeLookahead => write!(f, "?!"), + Self::Lookbehind => write!(f, "?<="), + Self::NegativeLookbehind => write!(f, "? Display for Quantifier<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.body)?; + + match (self.min, self.max) { + (0, None) => write!(f, "*")?, + (1, None) => write!(f, "+")?, + (0, Some(1)) => write!(f, "?")?, + (min, Some(max)) if min == max => write!(f, "{{{min}}}",)?, + (min, max) => { + let max = max.map_or_else(String::default, |it| it.to_string()); + write!(f, "{{{min},{max}}}",)?; + } + } + + if !self.greedy { + write!(f, "?")?; + } + + Ok(()) + } +} + +impl Display for Character { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let cp = self.value; + // escape syntax characters + if is_syntax_character(cp) { + write!(f, r"\")?; + } + + if is_lead_surrogate(cp) || is_trail_surrogate(cp) { + write!(f, r"\u{cp:X}") + } else { + // TODO: use `self.kind` to print the correct representation. + let Some(ch) = char::from_u32(cp) else { return Err(fmt::Error) }; + write!(f, "{ch}") + } + } +} + +impl Display for Dot { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, ".") + } +} + +impl Display for CharacterClassEscape { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.kind) + } +} + +impl Display for CharacterClassEscapeKind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::D => write!(f, r"\d"), + Self::NegativeD => write!(f, r"\D"), + Self::S => write!(f, r"\s"), + Self::NegativeS => write!(f, r"\S"), + Self::W => write!(f, r"\w"), + Self::NegativeW => write!(f, r"\W"), + } + } +} + +impl<'a> Display for UnicodePropertyEscape<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.negative { + write!(f, r"\P")?; + } else { + write!(f, r"\p")?; + } + + if let Some(value) = &self.value { + let name = &self.name; + write!(f, "{{{name}={value}}}") + } else { + write!(f, "{{{}}}", self.name) + } + } +} + +impl<'a> Display for CharacterClass<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "[")?; + + if !self.body.is_empty() { + if self.negative { + write!(f, "^")?; + } + let sep = match self.kind { + CharacterClassContentsKind::Union => "", + CharacterClassContentsKind::Subtraction => "--", + CharacterClassContentsKind::Intersection => "&&", + }; + write_join(f, sep, &self.body)?; + } + + write!(f, "]") + } +} + +impl<'a> Display for CharacterClassContents<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::CharacterClassRange(it) => write!(f, "{}", it.as_ref()), + Self::CharacterClassEscape(it) => write!(f, "{it}"), + Self::UnicodePropertyEscape(it) => write!(f, "{}", it.as_ref()), + Self::Character(it) => write!(f, "{it}"), + Self::NestedCharacterClass(it) => write!(f, "{}", it.as_ref()), + Self::ClassStringDisjunction(it) => write!(f, "{}", it.as_ref()), + } + } +} + +impl Display for CharacterClassRange { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}-{}", self.min, self.max) + } +} + +impl<'a> Display for ClassStringDisjunction<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "\\q{{")?; + write_join(f, "|", &self.body)?; + write!(f, "}}") + } +} + +impl<'a> Display for ClassString<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write_join(f, "", &self.body) + } +} + +impl<'a> Display for CapturingGroup<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let body = &self.body; + if let Some(name) = &self.name { + write!(f, "(?<{name}>{body})") + } else { + write!(f, "({body})") + } + } +} + +impl<'a> Display for IgnoreGroup<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fn write_flags( + f: &mut fmt::Formatter<'_>, + prefix: char, + flags: &ModifierFlags, + ) -> fmt::Result { + if flags.ignore_case { + write!(f, "{prefix}i")?; + } + if flags.sticky { + write!(f, "{prefix}y")?; + } + if flags.multiline { + write!(f, "{prefix}m")?; + } + Ok(()) + } + + write!(f, "(?")?; + if let Some(enabling) = &self.enabling_modifiers { + write_flags(f, '\0', enabling)?; + } + if let Some(disabling) = &self.disabling_modifiers { + write_flags(f, '-', disabling)?; + } + write!(f, ":{})", self.body) + } +} + +impl Display for IndexedReference { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "\\{}", self.index) + } +} + +impl<'a> Display for NamedReference<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, r"\k<{}>", self.name) + } +} + +fn write_join(f: &mut fmt::Formatter<'_>, sep: S, items: I) -> fmt::Result +where + S: AsRef, + E: Display, + I: IntoIterator, +{ + let sep = sep.as_ref(); + let mut iter = items.into_iter(); + + if let Some(first) = iter.next() { + write!(f, "{first}")?; + } + + for it in iter { + write!(f, "{sep}{it}")?; + } + + Ok(()) +} + +#[cfg(test)] +mod test { + use oxc_allocator::Allocator; + + static CASES: &[(&str, /* expected display */ Option<&str>)] = &[ + ("/ab/", None), + ("/abc/i", None), + ("/a*?/i", None), + ("/emo👈🏻ji/u", None), + ("/ab|c/i", None), + ("/a|b+|c/i", None), + ("/(?=a)|(?<=b)|(?!c)|(?cg)(?:g)/", None), + (r"/^(?=ab)\b(?!cd)(?<=ef)\B(?) { + use crate::{Parser, ParserOptions}; + let expect = expect.unwrap_or(source); + let parsed = Parser::new(allocator, source, ParserOptions::default()).parse().unwrap(); + assert_eq!(expect, parsed.to_string()); + } + + #[test] + fn test() { + let allocator = &Allocator::default(); + CASES + .iter() + .for_each(|(source, expect)| test_display(allocator, source, expect.as_deref())); + } +} diff --git a/crates/oxc_regular_expression/src/lib.rs b/crates/oxc_regular_expression/src/lib.rs index ae2f1c0a58bc0f..5b2d4758915efe 100644 --- a/crates/oxc_regular_expression/src/lib.rs +++ b/crates/oxc_regular_expression/src/lib.rs @@ -2,6 +2,7 @@ pub mod ast; mod body_parser; +mod display; mod flag_parser; mod literal_parser; mod options;