-
-
Notifications
You must be signed in to change notification settings - Fork 476
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(regular_expression): implement
Display
for RegularExpression
…
… type.
- Loading branch information
Showing
3 changed files
with
371 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,368 @@ | ||
use std::fmt::{self, Display}; | ||
|
||
#[allow(clippy::wildcard_imports)] | ||
use crate::ast::*; | ||
use crate::body_parser::unicode::{is_lead_surrogate, is_syntax_character, is_trail_surrogate}; | ||
|
||
impl<'a> Display for RegularExpression<'a> { | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
write!(f, "/{}/{}", self.pattern, self.flags) | ||
} | ||
} | ||
|
||
impl Display for Flags { | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
let mut flags = String::with_capacity(8); | ||
macro_rules! if_true_append { | ||
($flag:ident, $char:literal) => { | ||
if self.$flag { | ||
flags.push($char); | ||
} | ||
}; | ||
} | ||
|
||
// write flags in the order they are described in the `MDN` | ||
// <https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_expressions#advanced_searching_with_flags> | ||
if_true_append!(has_indices, 'd'); | ||
if_true_append!(global, 'g'); | ||
if_true_append!(ignore_case, 'i'); | ||
if_true_append!(multiline, 'm'); | ||
if_true_append!(dot_all, 's'); | ||
if_true_append!(unicode, 'u'); | ||
if_true_append!(unicode_sets, 'v'); | ||
if_true_append!(sticky, 'y'); | ||
|
||
write!(f, "{flags}") | ||
} | ||
} | ||
|
||
impl<'a> Display for Pattern<'a> { | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
write!(f, "{}", self.body) | ||
} | ||
} | ||
|
||
impl<'a> Display for Disjunction<'a> { | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
write_join(f, "|", &self.body) | ||
} | ||
} | ||
|
||
impl<'a> Display for Alternative<'a> { | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
write_join(f, "", &self.body) | ||
} | ||
} | ||
|
||
impl<'a> Display for Term<'a> { | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
match self { | ||
Self::BoundaryAssertion(it) => write!(f, "{it}"), | ||
Self::LookAroundAssertion(it) => write!(f, "{}", it.as_ref()), | ||
Self::Quantifier(it) => write!(f, "{}", it.as_ref()), | ||
Self::Character(it) => write!(f, "{it}"), | ||
Self::Dot(it) => write!(f, "{it}"), | ||
Self::CharacterClassEscape(it) => write!(f, "{it}"), | ||
Self::UnicodePropertyEscape(it) => write!(f, "{}", it.as_ref()), | ||
Self::CharacterClass(it) => write!(f, "{}", it.as_ref()), | ||
Self::CapturingGroup(it) => write!(f, "{}", it.as_ref()), | ||
Self::IgnoreGroup(it) => write!(f, "{}", it.as_ref()), | ||
Self::IndexedReference(it) => write!(f, "{it}"), | ||
Self::NamedReference(it) => write!(f, "{}", it.as_ref()), | ||
} | ||
} | ||
} | ||
|
||
impl Display for BoundaryAssertion { | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
write!(f, "{}", self.kind) | ||
} | ||
} | ||
|
||
impl Display for BoundaryAssertionKind { | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
match self { | ||
Self::Start => write!(f, "^"), | ||
Self::End => write!(f, "$"), | ||
Self::Boundary => write!(f, "\\b"), | ||
Self::NegativeBoundary => write!(f, "\\B"), | ||
} | ||
} | ||
} | ||
|
||
impl<'a> Display for LookAroundAssertion<'a> { | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
write!(f, "({}{})", self.kind, self.body) | ||
} | ||
} | ||
|
||
impl Display for LookAroundAssertionKind { | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
match self { | ||
Self::Lookahead => write!(f, "?="), | ||
Self::NegativeLookahead => write!(f, "?!"), | ||
Self::Lookbehind => write!(f, "?<="), | ||
Self::NegativeLookbehind => write!(f, "?<!"), | ||
} | ||
} | ||
} | ||
|
||
impl<'a> Display for Quantifier<'a> { | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
write!(f, "{}", self.body)?; | ||
match (self.min, self.max) { | ||
(0, None) => write!(f, "*"), | ||
(1, None) => write!(f, "+"), | ||
(0, Some(1)) => write!(f, "?"), | ||
(min, Some(max)) if min == max => { | ||
write!(f, "{{{min}}}",) | ||
} | ||
(min, max) => { | ||
let max = max.map_or_else(String::default, |it| it.to_string()); | ||
write!(f, "{{{min},{max}}}",) | ||
} | ||
} | ||
} | ||
} | ||
|
||
impl Display for Character { | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
let cp = self.value; | ||
// escape syntax characters | ||
if is_syntax_character(cp) { | ||
write!(f, "\\")?; | ||
} | ||
|
||
if is_lead_surrogate(cp) || is_trail_surrogate(cp) { | ||
write!(f, "\\u{cp:X}") | ||
} else { | ||
// TODO: use `self.kind` to print the correct representation. | ||
let Some(ch) = char::from_u32(cp) else { return Err(fmt::Error) }; | ||
write!(f, "{ch}") | ||
} | ||
} | ||
} | ||
|
||
impl Display for Dot { | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
write!(f, ".") | ||
} | ||
} | ||
|
||
impl Display for CharacterClassEscape { | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
write!(f, "{}", self.kind) | ||
} | ||
} | ||
|
||
impl Display for CharacterClassEscapeKind { | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
match self { | ||
Self::D => write!(f, "\\d"), | ||
Self::NegativeD => write!(f, "\\D"), | ||
Self::S => write!(f, "\\s"), | ||
Self::NegativeS => write!(f, "\\S"), | ||
Self::W => write!(f, "\\w"), | ||
Self::NegativeW => write!(f, "\\W"), | ||
} | ||
} | ||
} | ||
|
||
impl<'a> Display for UnicodePropertyEscape<'a> { | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
if self.negative { | ||
write!(f, "\\P")?; | ||
} else { | ||
write!(f, "\\p")?; | ||
} | ||
|
||
if let Some(value) = &self.value { | ||
let name = &self.name; | ||
write!(f, "{{{name}={value}}}") | ||
} else { | ||
write!(f, "{{{}}}", self.name) | ||
} | ||
} | ||
} | ||
|
||
impl<'a> Display for CharacterClass<'a> { | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
write!(f, "[")?; | ||
|
||
if !self.body.is_empty() { | ||
if self.negative { | ||
write!(f, "^")?; | ||
} | ||
let sep = match self.kind { | ||
CharacterClassContentsKind::Union => "", | ||
CharacterClassContentsKind::Subtraction => "--", | ||
CharacterClassContentsKind::Intersection => "&&", | ||
}; | ||
write_join(f, sep, &self.body)?; | ||
} | ||
|
||
write!(f, "]") | ||
} | ||
} | ||
|
||
impl<'a> Display for CharacterClassContents<'a> { | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
match self { | ||
Self::CharacterClassRange(it) => write!(f, "{}", it.as_ref()), | ||
Self::CharacterClassEscape(it) => write!(f, "{it}"), | ||
Self::UnicodePropertyEscape(it) => write!(f, "{}", it.as_ref()), | ||
Self::Character(it) => write!(f, "{it}"), | ||
Self::NestedCharacterClass(it) => write!(f, "{}", it.as_ref()), | ||
Self::ClassStringDisjunction(it) => write!(f, "{}", it.as_ref()), | ||
} | ||
} | ||
} | ||
|
||
impl Display for CharacterClassRange { | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
write!(f, "{}-{}", self.min, self.max) | ||
} | ||
} | ||
|
||
impl<'a> Display for ClassStringDisjunction<'a> { | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
write!(f, "\\q{{")?; | ||
write_join(f, "|", &self.body)?; | ||
write!(f, "}}") | ||
} | ||
} | ||
|
||
impl<'a> Display for ClassString<'a> { | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
write_join(f, "", &self.body) | ||
} | ||
} | ||
|
||
impl<'a> Display for CapturingGroup<'a> { | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
let body = &self.body; | ||
if let Some(name) = &self.name { | ||
write!(f, "(?<{name}>{body})") | ||
} else { | ||
write!(f, "({body})") | ||
} | ||
} | ||
} | ||
|
||
impl<'a> Display for IgnoreGroup<'a> { | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
fn write_flags( | ||
f: &mut fmt::Formatter<'_>, | ||
prefix: char, | ||
flags: &ModifierFlags, | ||
) -> fmt::Result { | ||
if flags.ignore_case { | ||
write!(f, "{prefix}i")?; | ||
} | ||
if flags.sticky { | ||
write!(f, "{prefix}y")?; | ||
} | ||
if flags.multiline { | ||
write!(f, "{prefix}m")?; | ||
} | ||
Ok(()) | ||
} | ||
|
||
write!(f, "(?")?; | ||
if let Some(enabling) = &self.enabling_modifiers { | ||
write_flags(f, '\0', enabling)?; | ||
} | ||
if let Some(disabling) = &self.disabling_modifiers { | ||
write_flags(f, '-', disabling)?; | ||
} | ||
write!(f, ":{})", self.body) | ||
} | ||
} | ||
|
||
impl Display for IndexedReference { | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
write!(f, "\\{}", self.index) | ||
} | ||
} | ||
|
||
impl<'a> Display for NamedReference<'a> { | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
write!(f, "\\k{}", self.name) | ||
} | ||
} | ||
|
||
fn write_join<S, I, E>(f: &mut fmt::Formatter<'_>, sep: S, items: I) -> fmt::Result | ||
where | ||
S: AsRef<str>, | ||
E: Display, | ||
I: IntoIterator<Item = E>, | ||
{ | ||
let sep = sep.as_ref(); | ||
let mut iter = items.into_iter(); | ||
|
||
if let Some(first) = iter.next() { | ||
write!(f, "{first}")?; | ||
} | ||
|
||
for it in iter { | ||
write!(f, "{sep}{it}")?; | ||
} | ||
|
||
Ok(()) | ||
} | ||
|
||
#[cfg(test)] | ||
mod test { | ||
use oxc_allocator::Allocator; | ||
|
||
static CASES: &[(&str, /* expected display */ Option<&str>)] = &[ | ||
("/ab/", None), | ||
("/abc/i", None), | ||
("/emo👈🏻ji/u", None), | ||
("/ab|c/i", None), | ||
("/a|b+|c/i", None), | ||
("/(?=a)|(?<=b)|(?!c)|(?<!d)/i", None), | ||
(r"/(cg)(?<n>cg)(?:g)/", None), | ||
(r"/^(?=ab)\b(?!cd)(?<=ef)\B(?<!gh)$/", None), | ||
(r"/^(?<!ab)$/", None), | ||
(r"/[abc]/", None), | ||
(r"/[a&&b]/v", None), | ||
(r"/[a--b]/v", None), | ||
(r"/[^a--b--c]/v", None), | ||
(r"/[a[b[c[d[e[f[g[h[i[j[k[l]]]]]]]]]]]]/v", None), | ||
(r"/[\q{abc|d|e|}]/v", None), | ||
(r"/\p{Basic_Emoji}/v", None), | ||
// we lose the flags ordering | ||
("/abcd/igv", Some("/abcd/giv")), | ||
(r"/\d/ug", Some(r"/\d/gu")), | ||
// we always display "syntax characters" as their escaped form. | ||
(r"/c]/", Some(r"/c\]/")), | ||
(r"/[|\]]/", Some(r"/[\|\]]/")), | ||
("/a{0}|b{1,2}|c{3,}/i", None), | ||
// NOTE: surrogated characters can't be displayed without access to the next character so we | ||
// can't print them correctly via `Display` trait. Instead we print their unicode code point | ||
(r"/Em🥹j/", Some(r"/Em\uD83E\uDD79j/")), | ||
// TODO: currently we do **NOT** respect the `Character::kind` field so everything is | ||
// lowered to their final form. | ||
(r"/\n\cM\0\x41\./", Some("/\n\r\0A\\./")), | ||
(r"/\n\cM\0\x41\u1234\./u", Some("/\n\r\0Aሴ\\./u")), | ||
(r"/\n\cM\0\x41\u{1f600}\./u", Some("/\n\r\0A😀\\./u")), | ||
// TODO: `\b` is parsed as `\u{8}` which is wrong. | ||
// (r"/[\bb]/", Some(r"/[\bb]/")), | ||
]; | ||
|
||
fn test_display(allocator: &Allocator, source: &str, expect: Option<&str>) { | ||
use crate::{Parser, ParserOptions}; | ||
let expect = expect.unwrap_or(source); | ||
let parsed = Parser::new(allocator, source, ParserOptions::default()).parse().unwrap(); | ||
assert_eq!(expect, parsed.to_string()); | ||
} | ||
|
||
#[test] | ||
fn test() { | ||
let allocator = &Allocator::default(); | ||
CASES | ||
.iter() | ||
.for_each(|(source, expect)| test_display(allocator, source, expect.as_deref())); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,6 +2,7 @@ | |
|
||
pub mod ast; | ||
mod body_parser; | ||
mod display; | ||
mod flag_parser; | ||
mod literal_parser; | ||
mod options; | ||
|