Skip to content

Commit

Permalink
refactor(linter): use parsed patterns for unicorn/no-hex-escape (#5985
Browse files Browse the repository at this point in the history
)

- part of #5416

This pull request includes significant improvements to the `no_hex_escape` rule in the `oxc_linter` crate. The changes enhance the detection and replacement of hexadecimal escapes within regular expressions by introducing a more comprehensive AST traversal.

  - Implemented a new `visit_terms` function and its helper functions to traverse the regex AST and apply checks on individual terms.
  - Introduced the `check_character` function to replace hexadecimal escapes with Unicode escapes within regex patterns.
  - Updated snapshots to reflect the new diagnostic messages and replacements for hexadecimal escapes in regex patterns.
  • Loading branch information
camchenry committed Sep 23, 2024
1 parent b681c9a commit 0a5a4a9
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 19 deletions.
94 changes: 79 additions & 15 deletions crates/oxc_linter/src/rules/unicorn/no_hex_escape.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ use oxc_ast::{
};
use oxc_diagnostics::OxcDiagnostic;
use oxc_macros::declare_oxc_lint;
use oxc_regular_expression::ast::{
Alternative, Character, CharacterClassContents, CharacterKind, Disjunction, Pattern, Term,
};
use oxc_span::Span;

use crate::{context::LintContext, rule::Rule, AstNode};
Expand Down Expand Up @@ -57,7 +60,7 @@ fn check_escape(value: &str) -> Option<String> {
if matched.is_empty() {
None
} else {
let mut fixed = String::with_capacity(value.len() + matched.len() * 2);
let mut fixed: String = String::with_capacity(value.len() + matched.len() * 2);
let mut last = 0;
for index in matched {
fixed.push_str(&value[last..index - 1]);
Expand Down Expand Up @@ -90,26 +93,87 @@ impl Rule for NoHexEscape {
});
}
AstKind::RegExpLiteral(regex) => {
if let Some(fixed) =
check_escape(regex.regex.pattern.source_text(ctx.source_text()).as_ref())
{
#[allow(clippy::cast_possible_truncation)]
ctx.diagnostic_with_fix(no_hex_escape_diagnostic(regex.span), |fixer| {
fixer.replace(
Span::new(
regex.span.start,
regex.span.end - regex.regex.flags.iter().count() as u32,
),
format!("/{fixed}/"),
)
});
}
let Some(pattern) = regex.regex.pattern.as_pattern() else {
return;
};

visit_terms(pattern, &mut |term| match term {
Term::Character(ch) => {
check_character(ch, ctx);
}
Term::CharacterClass(class) => {
for term in &class.body {
match term {
CharacterClassContents::Character(ch) => {
check_character(ch, ctx);
}
CharacterClassContents::CharacterClassRange(range) => {
check_character(&range.min, ctx);
check_character(&range.max, ctx);
}
_ => (),
}
}
}
_ => (),
});
}
_ => {}
}
}
}

fn check_character(ch: &Character, ctx: &LintContext) {
if ch.kind == CharacterKind::HexadecimalEscape {
let unicode_escape = format!(r"\u00{}", &ch.span.source_text(ctx.source_text())[2..]);
ctx.diagnostic_with_fix(no_hex_escape_diagnostic(ch.span), |fixer| {
fixer.replace(ch.span, unicode_escape)
});
}
}

// TODO: Replace with proper regex AST visitor when available
/// Calls the given closure on every [`Term`] in the [`Pattern`].
fn visit_terms<'a, F: FnMut(&'a Term<'a>)>(pattern: &'a Pattern, f: &mut F) {
visit_terms_disjunction(&pattern.body, f);
}

/// Calls the given closure on every [`Term`] in the [`Disjunction`].
fn visit_terms_disjunction<'a, F: FnMut(&'a Term<'a>)>(disjunction: &'a Disjunction, f: &mut F) {
for alternative in &disjunction.body {
visit_terms_alternative(alternative, f);
}
}

/// Calls the given closure on every [`Term`] in the [`Alternative`].
fn visit_terms_alternative<'a, F: FnMut(&'a Term<'a>)>(alternative: &'a Alternative, f: &mut F) {
for term in &alternative.body {
visit_term(term, f);
}
}

fn visit_term<'a, F: FnMut(&'a Term<'a>)>(term: &'a Term<'a>, f: &mut F) {
match term {
Term::LookAroundAssertion(lookaround) => {
f(term);
visit_terms_disjunction(&lookaround.body, f);
}
Term::Quantifier(quant) => {
f(term);
visit_term(&quant.body, f);
}
Term::CapturingGroup(group) => {
f(term);
visit_terms_disjunction(&group.body, f);
}
Term::IgnoreGroup(group) => {
f(term);
visit_terms_disjunction(&group.body, f);
}
_ => f(term),
}
}

#[test]
fn test() {
use crate::tester::Tester;
Expand Down
21 changes: 17 additions & 4 deletions crates/oxc_linter/src/snapshots/no_hex_escape.snap
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,22 @@ source: crates/oxc_linter/src/tester.rs
help: Replace `"\xb1"` with `'\u00b1'`.

eslint-plugin-unicorn(no-hex-escape): Use Unicode escapes instead of hexadecimal escapes.
╭─[no_hex_escape.tsx:1:8]
╭─[no_hex_escape.tsx:1:41]
1wrapId(/(^|[<nonId>])(?:алг|арг(?:\x20*рез)?|ввод|ВКЛЮЧИТЬ|вс[её]|выбор|вывод|выход|дано|для|до|дс|если|иначе|исп|использовать|кон(?:(?:\x20+|_)исп)?|кц(?:(?:\x20+|_)при)?|надо|нач|нс|нц|от|пауза|пока|при|раза?|рез|стоп|таб|то|утв|шаг)(?=[<nonId>]|$)/.source)
· ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
· ────
╰────
help: Replace `/(^|[<nonId>])(?:алг|арг(?:\x20*рез)?|ввод|ВКЛЮЧИТЬ|вс[её]|выбор|вывод|выход|дано|для|до|дс|если|иначе|исп|использовать|кон(?:(?:\x20+|_)исп)?|кц(?:(?:\x20+|_)при)?|надо|нач|нс|нц|от|пауза|пока|при|раза?|рез|стоп|таб|то|утв|шаг)(?=[<nonId>]|$)/...` with `/(^|[<nonId>])(?:алг|арг(?:\u0020*рез)?|ввод|ВКЛЮЧИТЬ|вс[её]|выбор|вывод|выход|дано|для|до|дс|если|иначе|исп|использовать|
кон(?:(?:\u0020+|_)исп)?|кц(?:(?:\u0020+|_)при)?|надо|нач|нс|нц|от|пауза|пока|при|раза?|рез|стоп|таб|то|утв|шаг)(?=[<nonId>]|$)/...`.
help: Replace `\x20` with `\u0020`.

eslint-plugin-unicorn(no-hex-escape): Use Unicode escapes instead of hexadecimal escapes.
╭─[no_hex_escape.tsx:1:215]
1wrapId(/(^|[<nonId>])(?:алг|арг(?:\x20*рез)?|ввод|ВКЛЮЧИТЬ|вс[её]|выбор|вывод|выход|дано|для|до|дс|если|иначе|исп|использовать|кон(?:(?:\x20+|_)исп)?|кц(?:(?:\x20+|_)при)?|надо|нач|нс|нц|от|пауза|пока|при|раза?|рез|стоп|таб|то|утв|шаг)(?=[<nonId>]|$)/.source)
· ────
╰────
help: Replace `\x20` with `\u0020`.

eslint-plugin-unicorn(no-hex-escape): Use Unicode escapes instead of hexadecimal escapes.
╭─[no_hex_escape.tsx:1:242]
1wrapId(/(^|[<nonId>])(?:алг|арг(?:\x20*рез)?|ввод|ВКЛЮЧИТЬ|вс[её]|выбор|вывод|выход|дано|для|до|дс|если|иначе|исп|использовать|кон(?:(?:\x20+|_)исп)?|кц(?:(?:\x20+|_)при)?|надо|нач|нс|нц|от|пауза|пока|при|раза?|рез|стоп|таб|то|утв|шаг)(?=[<nonId>]|$)/.source)
· ────
╰────
help: Replace `\x20` with `\u0020`.

0 comments on commit 0a5a4a9

Please sign in to comment.