From 0a5a4a9812aaadb822818d644806009f5d42d477 Mon Sep 17 00:00:00 2001 From: camchenry <1514176+camchenry@users.noreply.github.com> Date: Mon, 23 Sep 2024 01:11:02 +0000 Subject: [PATCH] refactor(linter): use parsed patterns for `unicorn/no-hex-escape` (#5985) - part of https://github.com/oxc-project/oxc/issues/5416 This pull request includes significant improvements to the `no_hex_escape` rule in the `oxc_linter` crate. The changes enhance the detection and replacement of hexadecimal escapes within regular expressions by introducing a more comprehensive AST traversal. - Implemented a new `visit_terms` function and its helper functions to traverse the regex AST and apply checks on individual terms. - Introduced the `check_character` function to replace hexadecimal escapes with Unicode escapes within regex patterns. - Updated snapshots to reflect the new diagnostic messages and replacements for hexadecimal escapes in regex patterns. --- .../src/rules/unicorn/no_hex_escape.rs | 94 ++++++++++++++++--- .../src/snapshots/no_hex_escape.snap | 21 ++++- 2 files changed, 96 insertions(+), 19 deletions(-) diff --git a/crates/oxc_linter/src/rules/unicorn/no_hex_escape.rs b/crates/oxc_linter/src/rules/unicorn/no_hex_escape.rs index 9f7449aa0727e..2cd6dd8a1afc3 100644 --- a/crates/oxc_linter/src/rules/unicorn/no_hex_escape.rs +++ b/crates/oxc_linter/src/rules/unicorn/no_hex_escape.rs @@ -4,6 +4,9 @@ use oxc_ast::{ }; use oxc_diagnostics::OxcDiagnostic; use oxc_macros::declare_oxc_lint; +use oxc_regular_expression::ast::{ + Alternative, Character, CharacterClassContents, CharacterKind, Disjunction, Pattern, Term, +}; use oxc_span::Span; use crate::{context::LintContext, rule::Rule, AstNode}; @@ -57,7 +60,7 @@ fn check_escape(value: &str) -> Option { if matched.is_empty() { None } else { - let mut fixed = String::with_capacity(value.len() + matched.len() * 2); + let mut fixed: String = String::with_capacity(value.len() + matched.len() * 2); let mut last = 0; for index in matched { fixed.push_str(&value[last..index - 1]); @@ -90,26 +93,87 @@ impl Rule for NoHexEscape { }); } AstKind::RegExpLiteral(regex) => { - if let Some(fixed) = - check_escape(regex.regex.pattern.source_text(ctx.source_text()).as_ref()) - { - #[allow(clippy::cast_possible_truncation)] - ctx.diagnostic_with_fix(no_hex_escape_diagnostic(regex.span), |fixer| { - fixer.replace( - Span::new( - regex.span.start, - regex.span.end - regex.regex.flags.iter().count() as u32, - ), - format!("/{fixed}/"), - ) - }); - } + let Some(pattern) = regex.regex.pattern.as_pattern() else { + return; + }; + + visit_terms(pattern, &mut |term| match term { + Term::Character(ch) => { + check_character(ch, ctx); + } + Term::CharacterClass(class) => { + for term in &class.body { + match term { + CharacterClassContents::Character(ch) => { + check_character(ch, ctx); + } + CharacterClassContents::CharacterClassRange(range) => { + check_character(&range.min, ctx); + check_character(&range.max, ctx); + } + _ => (), + } + } + } + _ => (), + }); } _ => {} } } } +fn check_character(ch: &Character, ctx: &LintContext) { + if ch.kind == CharacterKind::HexadecimalEscape { + let unicode_escape = format!(r"\u00{}", &ch.span.source_text(ctx.source_text())[2..]); + ctx.diagnostic_with_fix(no_hex_escape_diagnostic(ch.span), |fixer| { + fixer.replace(ch.span, unicode_escape) + }); + } +} + +// TODO: Replace with proper regex AST visitor when available +/// Calls the given closure on every [`Term`] in the [`Pattern`]. +fn visit_terms<'a, F: FnMut(&'a Term<'a>)>(pattern: &'a Pattern, f: &mut F) { + visit_terms_disjunction(&pattern.body, f); +} + +/// Calls the given closure on every [`Term`] in the [`Disjunction`]. +fn visit_terms_disjunction<'a, F: FnMut(&'a Term<'a>)>(disjunction: &'a Disjunction, f: &mut F) { + for alternative in &disjunction.body { + visit_terms_alternative(alternative, f); + } +} + +/// Calls the given closure on every [`Term`] in the [`Alternative`]. +fn visit_terms_alternative<'a, F: FnMut(&'a Term<'a>)>(alternative: &'a Alternative, f: &mut F) { + for term in &alternative.body { + visit_term(term, f); + } +} + +fn visit_term<'a, F: FnMut(&'a Term<'a>)>(term: &'a Term<'a>, f: &mut F) { + match term { + Term::LookAroundAssertion(lookaround) => { + f(term); + visit_terms_disjunction(&lookaround.body, f); + } + Term::Quantifier(quant) => { + f(term); + visit_term(&quant.body, f); + } + Term::CapturingGroup(group) => { + f(term); + visit_terms_disjunction(&group.body, f); + } + Term::IgnoreGroup(group) => { + f(term); + visit_terms_disjunction(&group.body, f); + } + _ => f(term), + } +} + #[test] fn test() { use crate::tester::Tester; diff --git a/crates/oxc_linter/src/snapshots/no_hex_escape.snap b/crates/oxc_linter/src/snapshots/no_hex_escape.snap index e607ab2b13a21..4e7e84d5f1de8 100644 --- a/crates/oxc_linter/src/snapshots/no_hex_escape.snap +++ b/crates/oxc_linter/src/snapshots/no_hex_escape.snap @@ -9,9 +9,22 @@ source: crates/oxc_linter/src/tester.rs help: Replace `"\xb1"` with `'\u00b1'`. ⚠ eslint-plugin-unicorn(no-hex-escape): Use Unicode escapes instead of hexadecimal escapes. - ╭─[no_hex_escape.tsx:1:8] + ╭─[no_hex_escape.tsx:1:41] 1 │ wrapId(/(^|[])(?:алг|арг(?:\x20*рез)?|ввод|ВКЛЮЧИТЬ|вс[её]|выбор|вывод|выход|дано|для|до|дс|если|иначе|исп|использовать|кон(?:(?:\x20+|_)исп)?|кц(?:(?:\x20+|_)при)?|надо|нач|нс|нц|от|пауза|пока|при|раза?|рез|стоп|таб|то|утв|шаг)(?=[]|$)/.source) - · ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── + · ──── ╰──── - help: Replace `/(^|[])(?:алг|арг(?:\x20*рез)?|ввод|ВКЛЮЧИТЬ|вс[её]|выбор|вывод|выход|дано|для|до|дс|если|иначе|исп|использовать|кон(?:(?:\x20+|_)исп)?|кц(?:(?:\x20+|_)при)?|надо|нач|нс|нц|от|пауза|пока|при|раза?|рез|стоп|таб|то|утв|шаг)(?=[]|$)/...` with `/(^|[])(?:алг|арг(?:\u0020*рез)?|ввод|ВКЛЮЧИТЬ|вс[её]|выбор|вывод|выход|дано|для|до|дс|если|иначе|исп|использовать| - кон(?:(?:\u0020+|_)исп)?|кц(?:(?:\u0020+|_)при)?|надо|нач|нс|нц|от|пауза|пока|при|раза?|рез|стоп|таб|то|утв|шаг)(?=[]|$)/...`. + help: Replace `\x20` with `\u0020`. + + ⚠ eslint-plugin-unicorn(no-hex-escape): Use Unicode escapes instead of hexadecimal escapes. + ╭─[no_hex_escape.tsx:1:215] + 1 │ wrapId(/(^|[])(?:алг|арг(?:\x20*рез)?|ввод|ВКЛЮЧИТЬ|вс[её]|выбор|вывод|выход|дано|для|до|дс|если|иначе|исп|использовать|кон(?:(?:\x20+|_)исп)?|кц(?:(?:\x20+|_)при)?|надо|нач|нс|нц|от|пауза|пока|при|раза?|рез|стоп|таб|то|утв|шаг)(?=[]|$)/.source) + · ──── + ╰──── + help: Replace `\x20` with `\u0020`. + + ⚠ eslint-plugin-unicorn(no-hex-escape): Use Unicode escapes instead of hexadecimal escapes. + ╭─[no_hex_escape.tsx:1:242] + 1 │ wrapId(/(^|[])(?:алг|арг(?:\x20*рез)?|ввод|ВКЛЮЧИТЬ|вс[её]|выбор|вывод|выход|дано|для|до|дс|если|иначе|исп|использовать|кон(?:(?:\x20+|_)исп)?|кц(?:(?:\x20+|_)при)?|надо|нач|нс|нц|от|пауза|пока|при|раза?|рез|стоп|таб|то|утв|шаг)(?=[]|$)/.source) + · ──── + ╰──── + help: Replace `\x20` with `\u0020`.