Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(transformer): support all /regex/ to new RegExp transforms #5387

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 8 additions & 7 deletions crates/oxc_transformer/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,14 @@ test = false
doctest = false

[dependencies]
oxc_ast = { workspace = true }
oxc_span = { workspace = true }
oxc_allocator = { workspace = true }
oxc_diagnostics = { workspace = true }
oxc_syntax = { workspace = true, features = ["to_js_string"] }
oxc_traverse = { workspace = true }
oxc_semantic = { workspace = true }
oxc_ast = { workspace = true }
oxc_span = { workspace = true }
oxc_allocator = { workspace = true }
oxc_diagnostics = { workspace = true }
oxc_syntax = { workspace = true, features = ["to_js_string"] }
oxc_traverse = { workspace = true }
oxc_semantic = { workspace = true }
oxc_regular_expression = { workspace = true }

dashmap = { workspace = true }
indexmap = { workspace = true }
Expand Down
9 changes: 8 additions & 1 deletion crates/oxc_transformer/src/env/data/babel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,17 @@ use crate::env::{targets::version::Version, Versions};
fn features() -> &'static FxHashMap<String, Versions> {
static FEATURES: OnceLock<FxHashMap<String, Versions>> = OnceLock::new();
FEATURES.get_or_init(|| {
let map: FxHashMap<String, FxHashMap<String, String>> =
let mut map: FxHashMap<String, FxHashMap<String, String>> =
serde_json::from_str(include_str!("./@babel/compat_data/data/plugins.json"))
.expect("failed to parse json");

map.extend(
serde_json::from_str::<FxHashMap<String, FxHashMap<String, String>>>(include_str!(
"./esbuild/features.json"
))
.expect("failed to parse json"),
);

map.into_iter()
.map(|(feature, mut versions)| {
(feature, {
Expand Down
23 changes: 23 additions & 0 deletions crates/oxc_transformer/src/env/data/esbuild/features.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
"esbuild-regexp-lookbehind-assertions": {
"chrome": "62",
"deno": "1.0",
"edge": "79",
"firefox": "78",
"hermes": "0.7",
"ios": "16.4",
"node": "8.10",
"opera": "49",
"safari": "16.4"
},
"esbuild-regexp-match-indices": {
"chrome": "90",
"deno": "1.8",
"edge": "90",
"firefox": "88",
"ios": "15.0",
"node": "16.0",
"opera": "76",
"safari": "15.0"
}
}
7 changes: 6 additions & 1 deletion crates/oxc_transformer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ mod es2019;
mod es2020;
mod es2021;
mod react;
mod regexp;
mod typescript;

mod helpers {
Expand All @@ -41,6 +42,7 @@ use oxc_diagnostics::OxcDiagnostic;
use oxc_semantic::{ScopeTree, SymbolTable};
use oxc_span::{SourceType, SPAN};
use oxc_traverse::{traverse_mut, Traverse, TraverseCtx};
use regexp::RegExp;

pub use crate::{
compiler_assumptions::CompilerAssumptions,
Expand Down Expand Up @@ -74,6 +76,7 @@ pub struct Transformer<'a> {
x2_es2018: ES2018<'a>,
x2_es2016: ES2016<'a>,
x3_es2015: ES2015<'a>,
x4_regexp: RegExp<'a>,
}

impl<'a> Transformer<'a> {
Expand Down Expand Up @@ -102,7 +105,8 @@ impl<'a> Transformer<'a> {
x2_es2019: ES2019::new(options.es2019, Rc::clone(&ctx)),
x2_es2018: ES2018::new(options.es2018, Rc::clone(&ctx)),
x2_es2016: ES2016::new(options.es2016, Rc::clone(&ctx)),
x3_es2015: ES2015::new(options.es2015, ctx),
x3_es2015: ES2015::new(options.es2015, Rc::clone(&ctx)),
x4_regexp: RegExp::new(options.regexp, ctx),
}
}

Expand Down Expand Up @@ -177,6 +181,7 @@ impl<'a> Traverse<'a> for Transformer<'a> {
self.x2_es2018.enter_expression(expr, ctx);
self.x2_es2016.enter_expression(expr, ctx);
self.x3_es2015.enter_expression(expr, ctx);
self.x4_regexp.enter_expression(expr, ctx);
}

fn exit_expression(&mut self, expr: &mut Expression<'a>, ctx: &mut TraverseCtx<'a>) {
Expand Down
27 changes: 27 additions & 0 deletions crates/oxc_transformer/src/options/transformer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use crate::{
es2021::ES2021Options,
options::babel::BabelOptions,
react::ReactOptions,
regexp::RegExpOptions,
typescript::TypeScriptOptions,
};

Expand All @@ -38,6 +39,8 @@ pub struct TransformOptions {
/// [preset-react](https://babeljs.io/docs/babel-preset-react)
pub react: ReactOptions,

pub regexp: RegExpOptions,

pub es2015: ES2015Options,

pub es2016: ES2016Options,
Expand All @@ -60,6 +63,7 @@ impl TransformOptions {
es2019: ES2019Options::from_targets_and_bugfixes(targets, bugfixes),
es2020: ES2020Options::from_targets_and_bugfixes(targets, bugfixes),
es2021: ES2021Options::from_targets_and_bugfixes(targets, bugfixes),
regexp: RegExpOptions::from_targets_and_bugfixes(targets, bugfixes),
..Default::default()
}
}
Expand Down Expand Up @@ -215,6 +219,29 @@ impl TransformOptions {
}
};

let regexp = transformer_options.regexp;
if !regexp.sticky_flag {
transformer_options.regexp.sticky_flag = options.has_plugin("transform-sticky-regex");
}
if !regexp.unicode_flag {
transformer_options.regexp.unicode_flag = options.has_plugin("transform-unicode-regex");
}
if !regexp.dot_all_flag {
transformer_options.regexp.dot_all_flag = options.has_plugin("transform-dotall-regex");
}
if !regexp.named_capture_groups {
transformer_options.regexp.named_capture_groups =
options.has_plugin("transform-named-capturing-groups-regex");
}
if !regexp.unicode_property_escapes {
transformer_options.regexp.unicode_property_escapes =
options.has_plugin("transform-unicode-property-regex");
}
if !regexp.set_notation {
transformer_options.regexp.set_notation =
options.has_plugin("transform-unicode-sets-regex");
}

transformer_options.assumptions = if options.assumptions.is_null() {
CompilerAssumptions::default()
} else {
Expand Down
218 changes: 218 additions & 0 deletions crates/oxc_transformer/src/regexp/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,218 @@
//! RegExp Transformer
//!
//! This module supports various RegExp plugins to handle unsupported RegExp literal features.
//! When an unsupported feature is detected, these plugins convert the RegExp literal into
//! a `new RegExp()` constructor call to avoid syntax errors.
//!
//! Note: You will need to include a polyfill for the `RegExp` constructor in your code to have the correct runtime behavior.
//!
//! ### ES2015
//!
//! #### Sticky flag (`y`)
//! - @babel/plugin-transform-sticky-regex: <https://babeljs.io/docs/en/babel-plugin-transform-sticky-regex>
//!
//! #### Unicode flag (`u`)
//! - @babel/plugin-transform-unicode-regex: <https://babeljs.io/docs/en/babel-plugin-transform-unicode-regex>
//!
//! ### ES2018
//!
//! #### DotAll flag (`s`)
//! - @babel/plugin-transform-dotall-regex: <https://babeljs.io/docs/en/babel-plugin-transform-dotall-regex>
//! - Spec: ECMAScript 2018: <https://262.ecma-international.org/9.0/#sec-get-regexp.prototype.dotAll>
//!
//! #### Lookbehind assertions (`/(?<=x)/` and `/(?<!x)/`)
//! - Implementation: Same as esbuild's handling
//!
//! #### Named capture groups (`(?<name>x)`)
//! - @babel/plugin-transform-named-capturing-groups-regex: <https://babeljs.io/docs/en/babel-plugin-transform-named-capturing-groups-regex>
//!
//! #### Unicode property escapes (`\p{...}` and `\P{...}`)
//! - @babel/plugin-transform-unicode-property-regex: <https://babeljs.io/docs/en/babel-plugin-proposal-unicode-property-regex>
//!
//! ### ES2022
//!
//! #### Match indices flag (`d`)
//! - Implementation: Same as esbuild's handling
//!
//! ### ES2024
//!
//! #### Set notation + properties of strings (`v`)
//! - @babel/plugin-transform-unicode-sets-regex: <https://babeljs.io/docs/en/babel-plugin-proposal-unicode-sets-regex>
//! - TC39 Proposal: <https://github.com/tc39/proposal-regexp-set-notation>

mod options;

use std::borrow::Cow;
use std::mem;

pub use options::RegExpOptions;
use oxc_allocator::Box;
use oxc_allocator::Vec;
use oxc_ast::ast::*;
use oxc_regular_expression::ast::{
CharacterClass, CharacterClassContents, LookAroundAssertionKind, Pattern, Term,
};
use oxc_semantic::ReferenceFlags;
use oxc_span::Atom;
use oxc_traverse::{Traverse, TraverseCtx};

use crate::context::Ctx;

pub struct RegExp<'a> {
_ctx: Ctx<'a>,
options: RegExpOptions,
}

impl<'a> RegExp<'a> {
pub fn new(options: RegExpOptions, ctx: Ctx<'a>) -> Self {
Self { _ctx: ctx, options }
}
}

impl<'a> Traverse<'a> for RegExp<'a> {
fn enter_expression(
&mut self,
expr: &mut Expression<'a>,
ctx: &mut oxc_traverse::TraverseCtx<'a>,
) {
let Expression::RegExpLiteral(ref mut regexp) = expr else {
return;
};

if !self.has_unsupported_regular_expression_flags(regexp.regex.flags)
&& self.requires_pattern_analysis()
{
match try_parse_pattern(regexp, ctx) {
Ok(pattern) => {
let is_unsupported = self.has_unsupported_regular_expression_pattern(&pattern);
regexp.regex.pattern = RegExpPattern::Pattern(pattern);
if !is_unsupported {
return;
}
}
Err(err) => {
regexp.regex.pattern = RegExpPattern::Invalid(err);
return;
}
}
};

let pattern_source: Cow<'_, str> = match &regexp.regex.pattern {
RegExpPattern::Raw(raw) | RegExpPattern::Invalid(raw) => Cow::Borrowed(raw),
RegExpPattern::Pattern(p) => Cow::Owned(p.to_string()),
};

let callee = {
let symbol_id = ctx.scopes().find_binding(ctx.current_scope_id(), "RegExp");
let ident = ctx.create_reference_id(
regexp.span,
Atom::from("RegExp"),
symbol_id,
ReferenceFlags::read(),
);
ctx.ast.expression_from_identifier_reference(ident)
};

let mut arguments = ctx.ast.vec_with_capacity(2);
arguments.push(
ctx.ast.argument_expression(
ctx.ast.expression_string_literal(regexp.span, pattern_source),
),
);

let flags = regexp.regex.flags.to_string();
let flags =
ctx.ast.argument_expression(ctx.ast.expression_string_literal(regexp.span, flags));
arguments.push(flags);

*expr = ctx.ast.expression_new(
regexp.span,
callee,
arguments,
None::<TSTypeParameterInstantiation>,
);
}
}

impl<'a> RegExp<'a> {
fn requires_pattern_analysis(&self) -> bool {
self.options.named_capture_groups
|| self.options.unicode_property_escapes
|| self.options.look_behind_assertions
}

/// Check if the regular expression contains any unsupported flags.
fn has_unsupported_regular_expression_flags(&self, flags: RegExpFlags) -> bool {
flags.iter().any(|f| match f {
RegExpFlags::S if self.options.dot_all_flag => true,
RegExpFlags::Y if self.options.sticky_flag => true,
RegExpFlags::U if self.options.unicode_flag => true,
RegExpFlags::D if self.options.match_indices => true,
RegExpFlags::V if self.options.set_notation => true,
_ => false,
})
}

/// Check if the regular expression contains any unsupported syntax.
///
/// Based on parsed regular expression pattern.
fn has_unsupported_regular_expression_pattern(&self, pattern: &Pattern<'a>) -> bool {
let check_terms = |terms: &Vec<'a, Term>| {
terms.iter().any(|element| match element {
Term::CapturingGroup(_) if self.options.named_capture_groups => true,
Term::UnicodePropertyEscape(_) if self.options.unicode_property_escapes => true,
Term::CharacterClass(character_class) if self.options.unicode_property_escapes => {
has_unicode_property_escape_character_class(character_class)
}
Term::LookAroundAssertion(assertion)
if self.options.look_behind_assertions
&& matches!(
assertion.kind,
LookAroundAssertionKind::Lookbehind
| LookAroundAssertionKind::NegativeLookbehind
) =>
{
true
}
_ => false,
})
};

pattern.body.body.iter().any(|alternative| check_terms(&alternative.body))
}
}

fn has_unicode_property_escape_character_class(character_class: &CharacterClass) -> bool {
character_class.body.iter().any(|element| match element {
CharacterClassContents::UnicodePropertyEscape(_) => true,
CharacterClassContents::NestedCharacterClass(character_class) => {
has_unicode_property_escape_character_class(character_class)
}
_ => false,
})
}

fn try_parse_pattern<'a>(
literal: &mut RegExpLiteral<'a>,
ctx: &mut TraverseCtx<'a>,
) -> Result<Box<'a, Pattern<'a>>, &'a str> {
// Take the ownership of the pattern
let regexp_pattern = mem::replace(&mut literal.regex.pattern, RegExpPattern::Raw(""));

match regexp_pattern {
RegExpPattern::Raw(raw) => {
use oxc_regular_expression::{ParserOptions, PatternParser};
let options = ParserOptions {
span_offset: literal.span.start + 1, // exclude `/`
unicode_mode: literal.regex.flags.contains(RegExpFlags::U)
|| literal.regex.flags.contains(RegExpFlags::V),
unicode_sets_mode: literal.regex.flags.contains(RegExpFlags::V),
};
PatternParser::new(ctx.ast.allocator, raw, options)
.parse()
.map_or_else(|_| Err(raw), |p| Ok(ctx.alloc(p)))
}
RegExpPattern::Pattern(pattern) => Ok(pattern),
RegExpPattern::Invalid(raw) => Err(raw),
}
}
Loading