Skip to content

Commit

Permalink
feat(regular_expression): implement Display for RegularExpression
Browse files Browse the repository at this point in the history
… type.
  • Loading branch information
rzvxa committed Aug 28, 2024
1 parent 08dc0ad commit 1a6a980
Show file tree
Hide file tree
Showing 3 changed files with 371 additions and 1 deletion.
3 changes: 2 additions & 1 deletion crates/oxc_regular_expression/src/body_parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@ mod diagnostics;
mod parser;
mod reader;
mod state;
mod unicode;
mod unicode_property;

pub(crate) mod unicode;

pub use parser::PatternParser;

#[cfg(test)]
Expand Down
368 changes: 368 additions & 0 deletions crates/oxc_regular_expression/src/display.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,368 @@
use std::fmt::{self, Display};

#[allow(clippy::wildcard_imports)]
use crate::ast::*;
use crate::body_parser::unicode::{is_lead_surrogate, is_syntax_character, is_trail_surrogate};

impl<'a> Display for RegularExpression<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "/{}/{}", self.pattern, self.flags)
}
}

impl Display for Flags {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut flags = String::with_capacity(8);
macro_rules! if_true_append {
($flag:ident, $char:literal) => {
if self.$flag {
flags.push($char);
}
};
}

// write flags in the order they are described in the `MDN`
// <https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_expressions#advanced_searching_with_flags>
if_true_append!(has_indices, 'd');
if_true_append!(global, 'g');
if_true_append!(ignore_case, 'i');
if_true_append!(multiline, 'm');
if_true_append!(dot_all, 's');
if_true_append!(unicode, 'u');
if_true_append!(unicode_sets, 'v');
if_true_append!(sticky, 'y');

write!(f, "{flags}")
}
}

impl<'a> Display for Pattern<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.body)
}
}

impl<'a> Display for Disjunction<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write_join(f, "|", &self.body)
}
}

impl<'a> Display for Alternative<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write_join(f, "", &self.body)
}
}

impl<'a> Display for Term<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::BoundaryAssertion(it) => write!(f, "{it}"),
Self::LookAroundAssertion(it) => write!(f, "{}", it.as_ref()),
Self::Quantifier(it) => write!(f, "{}", it.as_ref()),
Self::Character(it) => write!(f, "{it}"),
Self::Dot(it) => write!(f, "{it}"),
Self::CharacterClassEscape(it) => write!(f, "{it}"),
Self::UnicodePropertyEscape(it) => write!(f, "{}", it.as_ref()),
Self::CharacterClass(it) => write!(f, "{}", it.as_ref()),
Self::CapturingGroup(it) => write!(f, "{}", it.as_ref()),
Self::IgnoreGroup(it) => write!(f, "{}", it.as_ref()),
Self::IndexedReference(it) => write!(f, "{it}"),
Self::NamedReference(it) => write!(f, "{}", it.as_ref()),
}
}
}

impl Display for BoundaryAssertion {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.kind)
}
}

impl Display for BoundaryAssertionKind {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Start => write!(f, "^"),
Self::End => write!(f, "$"),
Self::Boundary => write!(f, "\\b"),
Self::NegativeBoundary => write!(f, "\\B"),
}
}
}

impl<'a> Display for LookAroundAssertion<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "({}{})", self.kind, self.body)
}
}

impl Display for LookAroundAssertionKind {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Lookahead => write!(f, "?="),
Self::NegativeLookahead => write!(f, "?!"),
Self::Lookbehind => write!(f, "?<="),
Self::NegativeLookbehind => write!(f, "?<!"),
}
}
}

impl<'a> Display for Quantifier<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.body)?;
match (self.min, self.max) {
(0, None) => write!(f, "*"),
(1, None) => write!(f, "+"),
(0, Some(1)) => write!(f, "?"),
(min, Some(max)) if min == max => {
write!(f, "{{{min}}}",)
}
(min, max) => {
let max = max.map_or_else(String::default, |it| it.to_string());
write!(f, "{{{min},{max}}}",)
}
}
}
}

impl Display for Character {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let cp = self.value;
// escape syntax characters
if is_syntax_character(cp) {
write!(f, "\\")?;
}

if is_lead_surrogate(cp) || is_trail_surrogate(cp) {
write!(f, "\\u{cp:X}")
} else {
// TODO: use `self.kind` to print the correct representation.
let Some(ch) = char::from_u32(cp) else { return Err(fmt::Error) };
write!(f, "{ch}")
}
}
}

impl Display for Dot {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, ".")
}
}

impl Display for CharacterClassEscape {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.kind)
}
}

impl Display for CharacterClassEscapeKind {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::D => write!(f, "\\d"),
Self::NegativeD => write!(f, "\\D"),
Self::S => write!(f, "\\s"),
Self::NegativeS => write!(f, "\\S"),
Self::W => write!(f, "\\w"),
Self::NegativeW => write!(f, "\\W"),
}
}
}

impl<'a> Display for UnicodePropertyEscape<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if self.negative {
write!(f, "\\P")?;
} else {
write!(f, "\\p")?;
}

if let Some(value) = &self.value {
let name = &self.name;
write!(f, "{{{name}={value}}}")
} else {
write!(f, "{{{}}}", self.name)
}
}
}

impl<'a> Display for CharacterClass<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "[")?;

if !self.body.is_empty() {
if self.negative {
write!(f, "^")?;
}
let sep = match self.kind {
CharacterClassContentsKind::Union => "",
CharacterClassContentsKind::Subtraction => "--",
CharacterClassContentsKind::Intersection => "&&",
};
write_join(f, sep, &self.body)?;
}

write!(f, "]")
}
}

impl<'a> Display for CharacterClassContents<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::CharacterClassRange(it) => write!(f, "{}", it.as_ref()),
Self::CharacterClassEscape(it) => write!(f, "{it}"),
Self::UnicodePropertyEscape(it) => write!(f, "{}", it.as_ref()),
Self::Character(it) => write!(f, "{it}"),
Self::NestedCharacterClass(it) => write!(f, "{}", it.as_ref()),
Self::ClassStringDisjunction(it) => write!(f, "{}", it.as_ref()),
}
}
}

impl Display for CharacterClassRange {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}-{}", self.min, self.max)
}
}

impl<'a> Display for ClassStringDisjunction<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "\\q{{")?;
write_join(f, "|", &self.body)?;
write!(f, "}}")
}
}

impl<'a> Display for ClassString<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write_join(f, "", &self.body)
}
}

impl<'a> Display for CapturingGroup<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let body = &self.body;
if let Some(name) = &self.name {
write!(f, "(?<{name}>{body})")
} else {
write!(f, "({body})")
}
}
}

impl<'a> Display for IgnoreGroup<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fn write_flags(
f: &mut fmt::Formatter<'_>,
prefix: char,
flags: &ModifierFlags,
) -> fmt::Result {
if flags.ignore_case {
write!(f, "{prefix}i")?;
}
if flags.sticky {
write!(f, "{prefix}y")?;
}
if flags.multiline {
write!(f, "{prefix}m")?;
}
Ok(())
}

write!(f, "(?")?;
if let Some(enabling) = &self.enabling_modifiers {
write_flags(f, '\0', enabling)?;
}
if let Some(disabling) = &self.disabling_modifiers {
write_flags(f, '-', disabling)?;
}
write!(f, ":{})", self.body)
}
}

impl Display for IndexedReference {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "\\{}", self.index)
}
}

impl<'a> Display for NamedReference<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "\\k{}", self.name)
}
}

fn write_join<S, I, E>(f: &mut fmt::Formatter<'_>, sep: S, items: I) -> fmt::Result
where
S: AsRef<str>,
E: Display,
I: IntoIterator<Item = E>,
{
let sep = sep.as_ref();
let mut iter = items.into_iter();

if let Some(first) = iter.next() {
write!(f, "{first}")?;
}

for it in iter {
write!(f, "{sep}{it}")?;
}

Ok(())
}

#[cfg(test)]
mod test {
use oxc_allocator::Allocator;

static CASES: &[(&str, /* expected display */ Option<&str>)] = &[
("/ab/", None),
("/abc/i", None),
("/emo👈🏻ji/u", None),
("/ab|c/i", None),
("/a|b+|c/i", None),
("/(?=a)|(?<=b)|(?!c)|(?<!d)/i", None),
(r"/(cg)(?<n>cg)(?:g)/", None),
(r"/^(?=ab)\b(?!cd)(?<=ef)\B(?<!gh)$/", None),
(r"/^(?<!ab)$/", None),
(r"/[abc]/", None),
(r"/[a&&b]/v", None),
(r"/[a--b]/v", None),
(r"/[^a--b--c]/v", None),
(r"/[a[b[c[d[e[f[g[h[i[j[k[l]]]]]]]]]]]]/v", None),
(r"/[\q{abc|d|e|}]/v", None),
(r"/\p{Basic_Emoji}/v", None),
// we lose the flags ordering
("/abcd/igv", Some("/abcd/giv")),
(r"/\d/ug", Some(r"/\d/gu")),
// we always display "syntax characters" as their escaped form.
(r"/c]/", Some(r"/c\]/")),
(r"/[|\]]/", Some(r"/[\|\]]/")),
("/a{0}|b{1,2}|c{3,}/i", None),
// NOTE: surrogated characters can't be displayed without access to the next character so we
// can't print them correctly via `Display` trait. Instead we print their unicode code point
(r"/Em🥹j/", Some(r"/Em\uD83E\uDD79j/")),
// TODO: currently we do **NOT** respect the `Character::kind` field so everything is
// lowered to their final form.
(r"/\n\cM\0\x41\./", Some("/\n\r\0A\\./")),
(r"/\n\cM\0\x41\u1234\./u", Some("/\n\r\0Aሴ\\./u")),
(r"/\n\cM\0\x41\u{1f600}\./u", Some("/\n\r\0A😀\\./u")),
// TODO: `\b` is parsed as `\u{8}` which is wrong.
// (r"/[\bb]/", Some(r"/[\bb]/")),
];

fn test_display(allocator: &Allocator, source: &str, expect: Option<&str>) {
use crate::{Parser, ParserOptions};
let expect = expect.unwrap_or(source);
let parsed = Parser::new(allocator, source, ParserOptions::default()).parse().unwrap();
assert_eq!(expect, parsed.to_string());
}

#[test]
fn test() {
let allocator = &Allocator::default();
CASES
.iter()
.for_each(|(source, expect)| test_display(allocator, source, expect.as_deref()));
}
}
1 change: 1 addition & 0 deletions crates/oxc_regular_expression/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

pub mod ast;
mod body_parser;
mod display;
mod flag_parser;
mod literal_parser;
mod options;
Expand Down

0 comments on commit 1a6a980

Please sign in to comment.