diff --git a/crates/oxc_regular_expression/examples/visitor.rs b/crates/oxc_regular_expression/examples/visitor.rs new file mode 100644 index 00000000000000..39ead8818cc9b6 --- /dev/null +++ b/crates/oxc_regular_expression/examples/visitor.rs @@ -0,0 +1,28 @@ +#![allow(clippy::print_stdout)] + +use oxc_allocator::Allocator; +use oxc_regular_expression::{ + visit::{RegexAstKind, Visit}, + Parser, ParserOptions, +}; + +struct TestVisitor; + +impl Visit<'_> for TestVisitor { + fn enter_node(&mut self, kind: RegexAstKind) { + println!("enter_node: {kind:?}"); + } + + fn leave_node(&mut self, kind: RegexAstKind) { + println!("leave_node: {kind:?}"); + } +} + +fn main() { + let source_text = r"/(https?:\/\/github\.com\/(([^\s]+)\/([^\s]+))\/([^\s]+\/)?(issues|pull)\/([0-9]+))|(([^\s]+)\/([^\s]+))?#([1-9][0-9]*)($|[\s\:\;\-\(\=])/"; + let allocator = Allocator::default(); + let parser = Parser::new(&allocator, source_text, ParserOptions::default()); + let pattern = parser.parse().unwrap().pattern; + let mut visitor = TestVisitor; + visitor.visit_pattern(&pattern); +} diff --git a/crates/oxc_regular_expression/src/lib.rs b/crates/oxc_regular_expression/src/lib.rs index d1b19e075df966..ee697a9f02b76e 100644 --- a/crates/oxc_regular_expression/src/lib.rs +++ b/crates/oxc_regular_expression/src/lib.rs @@ -9,6 +9,7 @@ mod literal_parser; mod options; mod span; mod surrogate_pair; +pub mod visit; mod generated { mod derive_clone_in; diff --git a/crates/oxc_regular_expression/src/visit.rs b/crates/oxc_regular_expression/src/visit.rs new file mode 100644 index 00000000000000..4e21db214058c2 --- /dev/null +++ b/crates/oxc_regular_expression/src/visit.rs @@ -0,0 +1,391 @@ +#![allow(unused_variables, clippy::wildcard_imports)] +use walk::walk_pattern; + +use crate::ast::{ + Alternative, BoundaryAssertion, CapturingGroup, Character, CharacterClass, + CharacterClassContents, CharacterClassEscape, CharacterClassRange, ClassString, + ClassStringDisjunction, Disjunction, Dot, IgnoreGroup, IndexedReference, LookAroundAssertion, + NamedReference, Pattern, Quantifier, Term, UnicodePropertyEscape, +}; +use walk::*; + +#[derive(Copy, Clone, Debug)] +pub enum RegexAstKind<'a> { + Pattern(&'a Pattern<'a>), + Disjunction(&'a Disjunction<'a>), + Alternative(&'a Alternative<'a>), + Term(&'a Term<'a>), + LookAroundAssertion(&'a LookAroundAssertion<'a>), + Quantifier(&'a Quantifier<'a>), + CapturingGroup(&'a CapturingGroup<'a>), + IgnoreGroup(&'a IgnoreGroup<'a>), + BoundaryAssertion(&'a BoundaryAssertion), + Character(&'a Character), + Dot(&'a Dot), + CharacterClassEscape(&'a CharacterClassEscape), + UnicodePropertyEscape(&'a UnicodePropertyEscape<'a>), + CharacterClass(&'a CharacterClass<'a>), + CharacterClassContents(&'a CharacterClassContents<'a>), + CharacterClassRange(&'a CharacterClassRange), + CharacterClassStringDisjunction(&'a ClassStringDisjunction<'a>), + CharacterClassString(&'a ClassString<'a>), + IndexedReference(&'a IndexedReference), + NamedReference(&'a NamedReference<'a>), +} + +/// RegEx syntax tree traversal +pub trait Visit<'a>: Sized { + #[inline] + fn enter_node(&mut self, kind: RegexAstKind<'a>) {} + #[inline] + fn leave_node(&mut self, kind: RegexAstKind<'a>) {} + + #[inline] + fn alloc(&self, t: &T) -> &'a T { + // SAFETY: + // This should be safe as long as `src` is an reference from the allocator. + // But honestly, I'm not really sure if this is safe. + unsafe { std::mem::transmute(t) } + } + + #[inline] + fn visit_pattern(&mut self, it: &Pattern<'a>) { + walk_pattern(self, it); + } + + #[inline] + fn visit_disjunction(&mut self, it: &Disjunction<'a>) { + walk_disjunction(self, it); + } + + #[inline] + fn visit_alternative(&mut self, it: &Alternative<'a>) { + walk_alternative(self, it); + } + + #[inline] + fn visit_term(&mut self, it: &Term<'a>) { + walk_term(self, it); + } + + #[inline] + fn visit_lookaround_assertion(&mut self, it: &LookAroundAssertion<'a>) { + walk_lookaround_assertion(self, it); + } + + #[inline] + fn visit_quantifier(&mut self, it: &Quantifier<'a>) { + walk_quantifier(self, it); + } + + #[inline] + fn visit_capturing_group(&mut self, it: &CapturingGroup<'a>) { + walk_capturing_group(self, it); + } + + #[inline] + fn visit_ignore_group(&mut self, it: &IgnoreGroup<'a>) { + walk_ignore_group(self, it); + } + + #[inline] + fn visit_boundary_assertion(&mut self, it: &BoundaryAssertion) { + walk_boundary_assertion(self, it); + } + + #[inline] + fn visit_character(&mut self, it: &Character) { + walk_character(self, it); + } + + #[inline] + fn visit_dot(&mut self, it: &Dot) { + walk_dot(self, it); + } + + #[inline] + fn visit_character_class_escape(&mut self, it: &CharacterClassEscape) { + walk_character_class_escape(self, it); + } + + #[inline] + fn visit_unicode_property_escape(&mut self, it: &UnicodePropertyEscape<'a>) { + walk_unicode_property_escape(self, it); + } + + #[inline] + fn visit_character_class(&mut self, it: &CharacterClass<'a>) { + walk_character_class(self, it); + } + + #[inline] + fn visit_character_class_contents(&mut self, it: &CharacterClassContents<'a>) { + walk_character_class_contents(self, it); + } + + #[inline] + fn visit_character_class_range(&mut self, it: &CharacterClassRange) { + walk_character_class_range(self, it); + } + + #[inline] + fn visit_character_class_string_disjunction(&mut self, it: &ClassStringDisjunction<'a>) { + walk_character_class_string_disjunction(self, it); + } + + #[inline] + fn visit_character_class_string(&mut self, it: &ClassString<'a>) { + walk_character_class_string(self, it); + } + + #[inline] + fn visit_indexed_reference(&mut self, it: &IndexedReference) { + walk_indexed_reference(self, it); + } + + #[inline] + fn visit_named_reference(&mut self, it: &NamedReference<'a>) { + walk_named_reference(self, it); + } +} + +pub mod walk { + use crate::ast::UnicodePropertyEscape; + + use super::*; + + #[inline] + pub fn walk_pattern<'a, V: Visit<'a>>(visitor: &mut V, it: &Pattern<'a>) { + let kind = RegexAstKind::Pattern(visitor.alloc(it)); + visitor.enter_node(kind); + visitor.visit_disjunction(&it.body); + visitor.leave_node(kind); + } + + #[inline] + pub fn walk_disjunction<'a, V: Visit<'a>>(visitor: &mut V, it: &Disjunction<'a>) { + let kind = RegexAstKind::Disjunction(visitor.alloc(it)); + visitor.enter_node(kind); + for alt in &it.body { + visitor.visit_alternative(alt); + } + visitor.leave_node(kind); + } + + #[inline] + pub fn walk_alternative<'a, V: Visit<'a>>(visitor: &mut V, it: &Alternative<'a>) { + let kind = RegexAstKind::Alternative(visitor.alloc(it)); + visitor.enter_node(kind); + for term in &it.body { + visitor.visit_term(term); + } + visitor.leave_node(kind); + } + + #[inline] + pub fn walk_term<'a, V: Visit<'a>>(visitor: &mut V, it: &Term<'a>) { + let kind = RegexAstKind::Term(visitor.alloc(it)); + visitor.enter_node(kind); + match it { + Term::LookAroundAssertion(lookaround) => { + visitor.visit_lookaround_assertion(lookaround); + } + Term::Quantifier(quant) => { + visitor.visit_quantifier(quant); + } + Term::CapturingGroup(group) => { + visitor.visit_capturing_group(group); + } + Term::IgnoreGroup(group) => { + visitor.visit_ignore_group(group); + } + Term::BoundaryAssertion(boundary_assertion) => { + visitor.visit_boundary_assertion(boundary_assertion); + } + Term::Character(character) => { + visitor.visit_character(character); + } + Term::Dot(dot) => { + visitor.visit_dot(dot); + } + Term::CharacterClassEscape(character_class_escape) => { + visitor.visit_character_class_escape(character_class_escape); + } + Term::UnicodePropertyEscape(unicode_property_escape) => { + visitor.visit_unicode_property_escape(unicode_property_escape); + } + Term::CharacterClass(character_class) => { + visitor.visit_character_class(character_class); + } + Term::IndexedReference(indexed_reference) => { + visitor.visit_indexed_reference(indexed_reference); + } + Term::NamedReference(named_reference) => { + visitor.visit_named_reference(named_reference); + } + } + visitor.leave_node(kind); + } + + #[inline] + pub fn walk_lookaround_assertion<'a, V: Visit<'a>>( + visitor: &mut V, + it: &LookAroundAssertion<'a>, + ) { + let kind = RegexAstKind::LookAroundAssertion(visitor.alloc(it)); + visitor.enter_node(kind); + visitor.visit_disjunction(&it.body); + visitor.leave_node(kind); + } + + #[inline] + pub fn walk_quantifier<'a, V: Visit<'a>>(visitor: &mut V, it: &Quantifier<'a>) { + let kind = RegexAstKind::Quantifier(visitor.alloc(it)); + visitor.enter_node(kind); + visitor.visit_term(&it.body); + visitor.leave_node(kind); + } + + #[inline] + pub fn walk_capturing_group<'a, V: Visit<'a>>(visitor: &mut V, it: &CapturingGroup<'a>) { + let kind = RegexAstKind::CapturingGroup(visitor.alloc(it)); + visitor.enter_node(kind); + visitor.visit_disjunction(&it.body); + visitor.leave_node(kind); + } + + #[inline] + pub fn walk_ignore_group<'a, V: Visit<'a>>(visitor: &mut V, it: &IgnoreGroup<'a>) { + let kind = RegexAstKind::IgnoreGroup(visitor.alloc(it)); + visitor.enter_node(kind); + visitor.visit_disjunction(&it.body); + visitor.leave_node(kind); + } + + #[inline] + pub fn walk_boundary_assertion<'a, V: Visit<'a>>(visitor: &mut V, it: &BoundaryAssertion) { + let kind = RegexAstKind::BoundaryAssertion(visitor.alloc(it)); + visitor.enter_node(kind); + visitor.leave_node(kind); + } + + #[inline] + pub fn walk_character<'a, V: Visit<'a>>(visitor: &mut V, it: &Character) { + let kind = RegexAstKind::Character(visitor.alloc(it)); + visitor.enter_node(kind); + visitor.leave_node(kind); + } + + #[inline] + pub fn walk_dot<'a, V: Visit<'a>>(visitor: &mut V, it: &Dot) { + let kind = RegexAstKind::Dot(visitor.alloc(it)); + visitor.enter_node(kind); + visitor.leave_node(kind); + } + + #[inline] + pub fn walk_character_class_escape<'a, V: Visit<'a>>( + visitor: &mut V, + it: &CharacterClassEscape, + ) { + let kind = RegexAstKind::CharacterClassEscape(visitor.alloc(it)); + visitor.enter_node(kind); + visitor.leave_node(kind); + } + + #[inline] + pub fn walk_unicode_property_escape<'a, V: Visit<'a>>( + visitor: &mut V, + it: &UnicodePropertyEscape<'a>, + ) { + let kind = RegexAstKind::UnicodePropertyEscape(visitor.alloc(it)); + visitor.enter_node(kind); + visitor.leave_node(kind); + } + + #[inline] + pub fn walk_character_class<'a, V: Visit<'a>>(visitor: &mut V, it: &CharacterClass<'a>) { + let kind = RegexAstKind::CharacterClass(visitor.alloc(it)); + visitor.enter_node(kind); + for content in &it.body { + visitor.visit_character_class_contents(content); + } + visitor.leave_node(kind); + } + + #[inline] + pub fn walk_character_class_contents<'a, V: Visit<'a>>( + visitor: &mut V, + it: &CharacterClassContents<'a>, + ) { + let kind = RegexAstKind::CharacterClassContents(visitor.alloc(it)); + visitor.enter_node(kind); + match it { + CharacterClassContents::CharacterClassRange(character_class_range) => { + visitor.visit_character_class_range(character_class_range); + } + CharacterClassContents::CharacterClassEscape(character_class_escape) => { + visitor.visit_character_class_escape(character_class_escape); + } + CharacterClassContents::UnicodePropertyEscape(unicode_property_escape) => { + visitor.visit_unicode_property_escape(unicode_property_escape); + } + CharacterClassContents::Character(character) => { + visitor.visit_character(character); + } + CharacterClassContents::NestedCharacterClass(character_class) => { + visitor.visit_character_class(character_class); + } + CharacterClassContents::ClassStringDisjunction(class_string_disjunction) => { + visitor.visit_character_class_string_disjunction(class_string_disjunction); + } + } + visitor.leave_node(kind); + } + + #[inline] + pub fn walk_character_class_range<'a, V: Visit<'a>>(visitor: &mut V, it: &CharacterClassRange) { + let kind = RegexAstKind::CharacterClassRange(visitor.alloc(it)); + visitor.enter_node(kind); + visitor.visit_character(&it.min); + visitor.visit_character(&it.max); + visitor.leave_node(kind); + } + + #[inline] + pub fn walk_character_class_string_disjunction<'a, V: Visit<'a>>( + visitor: &mut V, + it: &ClassStringDisjunction<'a>, + ) { + let kind = RegexAstKind::CharacterClassStringDisjunction(visitor.alloc(it)); + visitor.enter_node(kind); + for string in &it.body { + visitor.visit_character_class_string(string); + } + visitor.leave_node(kind); + } + + #[inline] + pub fn walk_character_class_string<'a, V: Visit<'a>>(visitor: &mut V, it: &ClassString<'a>) { + let kind = RegexAstKind::CharacterClassString(visitor.alloc(it)); + visitor.enter_node(kind); + for character in &it.body { + visitor.visit_character(character); + } + visitor.leave_node(kind); + } + + #[inline] + pub fn walk_indexed_reference<'a, V: Visit<'a>>(visitor: &mut V, it: &IndexedReference) { + let kind = RegexAstKind::IndexedReference(visitor.alloc(it)); + visitor.enter_node(kind); + visitor.leave_node(kind); + } + + #[inline] + pub fn walk_named_reference<'a, V: Visit<'a>>(visitor: &mut V, it: &NamedReference<'a>) { + let kind = RegexAstKind::NamedReference(visitor.alloc(it)); + visitor.enter_node(kind); + visitor.leave_node(kind); + } +}