Skip to content

Commit

Permalink
feat(transformer): support all regexp plugins
Browse files Browse the repository at this point in the history
  • Loading branch information
Dunqing committed Sep 1, 2024
1 parent 2499cb9 commit 8140457
Show file tree
Hide file tree
Showing 29 changed files with 390 additions and 3 deletions.
9 changes: 8 additions & 1 deletion crates/oxc_transformer/src/env/data/babel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,17 @@ use crate::env::{targets::version::Version, Versions};
fn features() -> &'static FxHashMap<String, Versions> {
static FEATURES: OnceLock<FxHashMap<String, Versions>> = OnceLock::new();
FEATURES.get_or_init(|| {
let map: FxHashMap<String, FxHashMap<String, String>> =
let mut map: FxHashMap<String, FxHashMap<String, String>> =
serde_json::from_str(include_str!("./@babel/compat_data/data/plugins.json"))
.expect("failed to parse json");

map.extend(
serde_json::from_str::<FxHashMap<String, FxHashMap<String, String>>>(include_str!(
"./esbuild/features.json"
))
.expect("failed to parse json"),
);

map.into_iter()
.map(|(feature, mut versions)| {
(feature, {
Expand Down
23 changes: 23 additions & 0 deletions crates/oxc_transformer/src/env/data/esbuild/features.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
"esbuild-regexp-lookbehind-assertions": {
"chrome": "62",
"deno": "1.0",
"edge": "79",
"firefox": "78",
"hermes": "0.7",
"ios": "16.4",
"node": "8.10",
"opera": "49",
"safari": "16.4"
},
"esbuild-regexp-match-indices": {
"chrome": "90",
"deno": "1.8",
"edge": "90",
"firefox": "88",
"ios": "15.0",
"node": "16.0",
"opera": "76",
"safari": "15.0"
}
}
7 changes: 6 additions & 1 deletion crates/oxc_transformer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ mod es2019;
mod es2020;
mod es2021;
mod react;
mod regexp;
mod typescript;

mod helpers {
Expand All @@ -41,6 +42,7 @@ use oxc_diagnostics::OxcDiagnostic;
use oxc_semantic::{ScopeTree, SymbolTable};
use oxc_span::{SourceType, SPAN};
use oxc_traverse::{traverse_mut, Traverse, TraverseCtx};
use regexp::RegExp;

pub use crate::{
compiler_assumptions::CompilerAssumptions,
Expand Down Expand Up @@ -74,6 +76,7 @@ pub struct Transformer<'a> {
x2_es2018: ES2018<'a>,
x2_es2016: ES2016<'a>,
x3_es2015: ES2015<'a>,
x4_regexp: RegExp<'a>,
}

impl<'a> Transformer<'a> {
Expand Down Expand Up @@ -102,7 +105,8 @@ impl<'a> Transformer<'a> {
x2_es2019: ES2019::new(options.es2019, Rc::clone(&ctx)),
x2_es2018: ES2018::new(options.es2018, Rc::clone(&ctx)),
x2_es2016: ES2016::new(options.es2016, Rc::clone(&ctx)),
x3_es2015: ES2015::new(options.es2015, ctx),
x3_es2015: ES2015::new(options.es2015, Rc::clone(&ctx)),
x4_regexp: RegExp::new(options.regexp, ctx),
}
}

Expand Down Expand Up @@ -177,6 +181,7 @@ impl<'a> Traverse<'a> for Transformer<'a> {
self.x2_es2018.enter_expression(expr, ctx);
self.x2_es2016.enter_expression(expr, ctx);
self.x3_es2015.enter_expression(expr, ctx);
self.x4_regexp.enter_expression(expr, ctx);
}

fn exit_expression(&mut self, expr: &mut Expression<'a>, ctx: &mut TraverseCtx<'a>) {
Expand Down
34 changes: 34 additions & 0 deletions crates/oxc_transformer/src/options/transformer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use crate::{
es2021::ES2021Options,
options::babel::BabelOptions,
react::ReactOptions,
regexp::RegExpOptions,
typescript::TypeScriptOptions,
};

Expand All @@ -38,6 +39,8 @@ pub struct TransformOptions {
/// [preset-react](https://babeljs.io/docs/babel-preset-react)
pub react: ReactOptions,

pub regexp: RegExpOptions,

pub es2015: ES2015Options,

pub es2016: ES2016Options,
Expand All @@ -60,6 +63,7 @@ impl TransformOptions {
es2019: ES2019Options::from_targets_and_bugfixes(targets, bugfixes),
es2020: ES2020Options::from_targets_and_bugfixes(targets, bugfixes),
es2021: ES2021Options::from_targets_and_bugfixes(targets, bugfixes),
regexp: RegExpOptions::from_targets_and_bugfixes(targets, bugfixes),
..Default::default()
}
}
Expand Down Expand Up @@ -204,6 +208,36 @@ impl TransformOptions {
})
};

let regexp = transformer_options.regexp;
if !regexp.sticky_flag {
transformer_options
.regexp
.with_sticky_flag(options.has_plugin("transform-sticky-regex"));
}
if !regexp.unicode_flag {
transformer_options
.regexp
.with_unicode_flag(options.has_plugin("transform-unicode-regex"));
}
if !regexp.dot_all_flag {
transformer_options.regexp.with_dot_all(options.has_plugin("transform-dotall-regex"));
}
if !regexp.named_capture_groups {
transformer_options.regexp.with_named_capture_groups(
options.has_plugin("transform-named-capturing-groups-regex"),
);
}
if !regexp.unicode_property_escapes {
transformer_options.regexp.with_unicode_property_escapes(
options.has_plugin("transform-unicode-property-regex"),
);
}
if !regexp.set_notation {
transformer_options
.regexp
.with_regexp_set_notation(options.has_plugin("transform-unicode-sets-regex"));
}

transformer_options.assumptions = if options.assumptions.is_null() {
CompilerAssumptions::default()
} else {
Expand Down
153 changes: 153 additions & 0 deletions crates/oxc_transformer/src/regexp/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
//! All RegExp plugins are supported in this module.
//!
//! ## References:
//! * xxx
//! * yyy
mod options;

pub use options::RegExpOptions;
use oxc_ast::ast::*;
use oxc_semantic::ReferenceFlags;
use oxc_traverse::Traverse;

use crate::context::Ctx;

pub struct RegExp<'a> {
_ctx: Ctx<'a>,
options: RegExpOptions,
}

impl<'a> RegExp<'a> {
pub fn new(options: RegExpOptions, ctx: Ctx<'a>) -> Self {
Self { _ctx: ctx, options }
}
}

impl<'a> Traverse<'a> for RegExp<'a> {
fn enter_expression(
&mut self,
expr: &mut Expression<'a>,
ctx: &mut oxc_traverse::TraverseCtx<'a>,
) {
let Expression::RegExpLiteral(regexp) = expr else {
return;
};

if !self.is_unsupported_regular_expression(regexp) {
return;
}

let callee = {
let symbol_id = ctx.scopes().find_binding(ctx.current_scope_id(), "RegExp");
let ident = ctx.create_reference_id(
regexp.span,
"RegExp".into(),
symbol_id,
ReferenceFlags::read(),
);
ctx.ast.expression_from_identifier_reference(ident)
};

let mut arguments = ctx.ast.vec_with_capacity(2);
// Escape backslashes and quotes in the pattern
let pattern = regexp.regex.pattern.as_str().replace('\\', "\\\\").replace('\"', "\\\"");
arguments.push(
ctx.ast.argument_expression(ctx.ast.expression_string_literal(regexp.span, pattern)),
);

let flags = regexp.regex.flags.to_string();
let flags =
ctx.ast.argument_expression(ctx.ast.expression_string_literal(regexp.span, flags));
arguments.push(flags);

*expr = ctx.ast.expression_new(
regexp.span,
callee,
arguments,
None::<TSTypeParameterInstantiation>,
);
}
}

impl<'a> RegExp<'a> {
/// Check if the regular expression contains any unsupported features.
///
/// Port from [esbuild](https://github.com/evanw/esbuild/blob/332727499e62315cff4ecaff9fa8b86336555e46/internal/js_parser/js_parser.go#L12667-L12800)
fn is_unsupported_regular_expression(&mut self, literal: &RegExpLiteral<'a>) -> bool {
let regex = &literal.regex;
let pattern = &regex.pattern;
let flags = &regex.flags;

let has_unsupported_feature = flags.iter().any(|f| match f {
RegExpFlags::G | RegExpFlags::I | RegExpFlags::M => true,
RegExpFlags::S if self.options.dot_all_flag => true,
RegExpFlags::Y if self.options.sticky_flag => true,
RegExpFlags::U if self.options.unicode_flag => true,
RegExpFlags::D if self.options.match_indices => true,
RegExpFlags::V if self.options.set_notation => true,
_ => false,
});

if has_unsupported_feature {
return true;
}

let mut paren_depth = 0;
let mut i = 0;
let is_unicode = flags.contains(RegExpFlags::U);

while i < pattern.len() {
let c = pattern.chars().nth(i).unwrap();
i += 1;

match c {
'[' => {
while i < pattern.len() {
let c = pattern.chars().nth(i).unwrap();
i += 1;
match c {
']' => break,
'\\' => i += 1,
_ => {}
}
}
}
'(' => {
let tail = &pattern[i..];
if tail.starts_with("?<=") || tail.starts_with("?<!") {
if self.options.look_behind_assertions {
return true;
}
} else if self.options.named_capture_groups
&& tail.starts_with("?<")
&& tail[2..].find('>').is_some()
{
return true;
}
paren_depth += 1;
}
')' => {
if paren_depth == 0 {
return true;
}
paren_depth -= 1;
}
'\\' => {
let tail = &pattern[i + 1..];
if self.options.unicode_property_escapes
&& is_unicode
&& (tail.starts_with("p{") || tail.starts_with("P{"))
&& tail[i + 3..].find('}').is_some()
{
return true;
}
i += 1;
}
_ => continue,
}
}

false
}
}
80 changes: 80 additions & 0 deletions crates/oxc_transformer/src/regexp/options.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
use crate::env::{can_enable_plugin, Versions};

#[derive(Default, Debug, Clone, Copy)]
pub struct RegExpOptions {
pub sticky_flag: bool,
pub unicode_flag: bool,
pub dot_all_flag: bool,
pub look_behind_assertions: bool,
pub named_capture_groups: bool,
pub unicode_property_escapes: bool,
pub match_indices: bool,
pub set_notation: bool,
}

impl RegExpOptions {
pub fn with_sticky_flag(&mut self, value: bool) -> &Self {
self.sticky_flag = value;
self
}
pub fn with_unicode_flag(&mut self, value: bool) -> &Self {
self.unicode_flag = value;
self
}

pub fn with_dot_all(&mut self, value: bool) -> &Self {
self.dot_all_flag = value;
self
}

pub fn with_unicode_property_regex(&mut self, value: bool) -> &Self {
self.look_behind_assertions = value;
self
}

pub fn with_named_capture_groups(&mut self, value: bool) -> &Self {
self.named_capture_groups = value;
self
}

pub fn with_unicode_property_escapes(&mut self, value: bool) -> &Self {
self.unicode_property_escapes = value;
self
}

pub fn with_regexp_match_indices(&mut self, value: bool) -> &Self {
self.match_indices = value;
self
}

pub fn with_regexp_set_notation(&mut self, value: bool) -> &Self {
self.set_notation = value;
self
}

#[must_use]
pub fn from_targets_and_bugfixes(targets: Option<&Versions>, bugfixes: bool) -> Self {
Self {
sticky_flag: can_enable_plugin("transform-sticky-regex", targets, bugfixes),
unicode_flag: can_enable_plugin("transform-unicode-regex", targets, bugfixes),
dot_all_flag: can_enable_plugin("transform-dotall-regex", targets, bugfixes),
look_behind_assertions: can_enable_plugin(
"esbuild-regexp-lookbehind-assertions",
targets,
bugfixes,
),
named_capture_groups: can_enable_plugin(
"transform-named-capturing-groups-regex",
targets,
bugfixes,
),
unicode_property_escapes: can_enable_plugin(
"transform-unicode-property-regex",
targets,
bugfixes,
),
match_indices: can_enable_plugin("esbuild-regexp-match-indices", targets, bugfixes),
set_notation: can_enable_plugin("transform-unicode-sets-regex", targets, bugfixes),
}
}
}
3 changes: 2 additions & 1 deletion tasks/transform_conformance/oxc.snap.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
commit: 3bcfee23

Passed: 10/38
Passed: 17/45

# All Passed:
* babel-plugin-transform-optional-catch-binding
* babel-plugin-transform-arrow-functions
* esbuild-tests


# babel-plugin-transform-nullish-coalescing-operator (0/1)
Expand Down
Loading

0 comments on commit 8140457

Please sign in to comment.