From 2cf2edd6bf500ca0412f961c4859b6dbf4eeea2c Mon Sep 17 00:00:00 2001 From: camchenry <1514176+camchenry@users.noreply.github.com> Date: Mon, 23 Sep 2024 00:33:29 +0000 Subject: [PATCH] refactor(linter): use parsed patterns in `no-empty-character-class` rule (#5980) - part of https://github.com/oxc-project/oxc/issues/5416 Uses the parsed regular expression patterns for detecting empty character classes. This is more robust than the handwritten pattern matching from before and allows us to provide more accurate diagnostics and actually point to the empty character class in the literal. --- .../rules/eslint/no_empty_character_class.rs | 97 +++++++++++++++---- .../snapshots/no_empty_character_class.snap | 92 ++++++++++++------ 2 files changed, 136 insertions(+), 53 deletions(-) diff --git a/crates/oxc_linter/src/rules/eslint/no_empty_character_class.rs b/crates/oxc_linter/src/rules/eslint/no_empty_character_class.rs index 69e71e51e27b6..7db6cfdb20173 100644 --- a/crates/oxc_linter/src/rules/eslint/no_empty_character_class.rs +++ b/crates/oxc_linter/src/rules/eslint/no_empty_character_class.rs @@ -1,16 +1,18 @@ +use memchr::memchr2; // Ported from https://github.com/eslint/eslint/blob/main/lib/rules/no-empty-character-class.js -use lazy_static::lazy_static; use oxc_ast::AstKind; use oxc_diagnostics::OxcDiagnostic; use oxc_macros::declare_oxc_lint; +use oxc_regular_expression::ast::{ + Alternative, CharacterClass, CharacterClassContents, Disjunction, Pattern, Term, +}; use oxc_span::Span; -use regex::Regex; use crate::{context::LintContext, rule::Rule, AstNode}; fn no_empty_character_class_diagnostic(span: Span) -> OxcDiagnostic { - OxcDiagnostic::warn("Empty character class") - .with_help("Try to remove empty character class `[]` in regexp literal") + OxcDiagnostic::warn("Empty character class will not match anything") + .with_help("Remove the empty character class: `[]`") .with_label(span) } @@ -34,26 +36,77 @@ declare_oxc_lint!( impl Rule for NoEmptyCharacterClass { fn run<'a>(&self, node: &AstNode<'a>, ctx: &LintContext<'a>) { - lazy_static! { - /* - * plain-English description of the following regexp: - * 0. `^` fix the match at the beginning of the string - * 1. `([^\\[]|\\.|\[([^\\\]]|\\.)+\])*`: regexp contents; 0 or more of the following - * 1.0. `[^\\[]`: any character that's not a `\` or a `[` (anything but escape sequences and character classes) - * 1.1. `\\.`: an escape sequence - * 1.2. `\[([^\\\]]|\\.)+\]`: a character class that isn't empty - * 2. `$`: fix the match at the end of the string - */ - static ref NO_EMPTY_CLASS_REGEX_PATTERN: Regex = - Regex::new(r"^([^\\\[]|\\.|\[([^\\\]]|\\.)+\])*$").unwrap(); - } - if let AstKind::RegExpLiteral(lit) = node.kind() { - if !NO_EMPTY_CLASS_REGEX_PATTERN - .is_match(lit.regex.pattern.source_text(ctx.source_text()).as_ref()) + let Some(pattern) = lit.regex.pattern.as_pattern() else { + return; + }; + + // Skip if the pattern doesn't contain a `[` or `]` character + if memchr2(b'[', b']', lit.regex.pattern.source_text(ctx.source_text()).as_bytes()) + .is_none() { - ctx.diagnostic(no_empty_character_class_diagnostic(lit.span)); + return; + } + + visit_terms(pattern, &mut |term| { + if let Term::CharacterClass(class) = term { + check_character_class(ctx, class); + } + }); + } + } +} + +fn check_character_class(ctx: &LintContext, class: &CharacterClass) { + // Class has nothing in it, example: `/[]/` + if !class.negative && class.body.is_empty() { + ctx.diagnostic(no_empty_character_class_diagnostic(class.span)); + return; + } + + // Class has something in it, but might contain empty nested character classes, + // example: `/[[]]/` + for term in &class.body { + if let CharacterClassContents::NestedCharacterClass(class) = term { + check_character_class(ctx, class); + } + } +} + +// TODO: Replace with proper regex AST visitor when available +/// Calls the given closure on every [`Term`] in the [`Pattern`]. +fn visit_terms<'a, F: FnMut(&'a Term<'a>)>(pattern: &'a Pattern, f: &mut F) { + visit_terms_disjunction(&pattern.body, f); +} + +/// Calls the given closure on every [`Term`] in the [`Disjunction`]. +fn visit_terms_disjunction<'a, F: FnMut(&'a Term<'a>)>(disjunction: &'a Disjunction, f: &mut F) { + for alternative in &disjunction.body { + visit_terms_alternative(alternative, f); + } +} + +/// Calls the given closure on every [`Term`] in the [`Alternative`]. +fn visit_terms_alternative<'a, F: FnMut(&'a Term<'a>)>(alternative: &'a Alternative, f: &mut F) { + for term in &alternative.body { + match term { + Term::LookAroundAssertion(lookaround) => { + f(term); + visit_terms_disjunction(&lookaround.body, f); + } + Term::Quantifier(quant) => { + f(term); + f(&quant.body); + } + Term::CapturingGroup(group) => { + f(term); + visit_terms_disjunction(&group.body, f); + } + Term::IgnoreGroup(group) => { + f(term); + visit_terms_disjunction(&group.body, f); } + _ => f(term), } } } @@ -88,6 +141,8 @@ fn test() { ("var foo = /\\[[]/;", None), ("var foo = /\\[\\[\\]a-z[]/;", None), ("var foo = /[]]/d;", None), + ("var foo = /[[][]]/v;", None), + ("var foo = /[[]]|[]/v;", None), ]; Tester::new(NoEmptyCharacterClass::NAME, pass, fail).test_and_snapshot(); diff --git a/crates/oxc_linter/src/snapshots/no_empty_character_class.snap b/crates/oxc_linter/src/snapshots/no_empty_character_class.snap index 35f2a9cb7e446..2d0095eb7a998 100644 --- a/crates/oxc_linter/src/snapshots/no_empty_character_class.snap +++ b/crates/oxc_linter/src/snapshots/no_empty_character_class.snap @@ -1,58 +1,86 @@ --- source: crates/oxc_linter/src/tester.rs --- - ⚠ eslint(no-empty-character-class): Empty character class - ╭─[no_empty_character_class.tsx:1:11] + ⚠ eslint(no-empty-character-class): Empty character class will not match anything + ╭─[no_empty_character_class.tsx:1:16] 1 │ var foo = /^abc[]/; - · ──────── + · ── ╰──── - help: Try to remove empty character class `[]` in regexp literal + help: Remove the empty character class: `[]` - ⚠ eslint(no-empty-character-class): Empty character class - ╭─[no_empty_character_class.tsx:1:11] + ⚠ eslint(no-empty-character-class): Empty character class will not match anything + ╭─[no_empty_character_class.tsx:1:15] 1 │ var foo = /foo[]bar/; - · ────────── + · ── ╰──── - help: Try to remove empty character class `[]` in regexp literal + help: Remove the empty character class: `[]` - ⚠ eslint(no-empty-character-class): Empty character class - ╭─[no_empty_character_class.tsx:1:15] + ⚠ eslint(no-empty-character-class): Empty character class will not match anything + ╭─[no_empty_character_class.tsx:1:20] 1 │ if (foo.match(/^abc[]/)) {} - · ──────── + · ── ╰──── - help: Try to remove empty character class `[]` in regexp literal + help: Remove the empty character class: `[]` - ⚠ eslint(no-empty-character-class): Empty character class - ╭─[no_empty_character_class.tsx:1:5] + ⚠ eslint(no-empty-character-class): Empty character class will not match anything + ╭─[no_empty_character_class.tsx:1:10] 1 │ if (/^abc[]/.test(foo)) {} - · ──────── + · ── ╰──── - help: Try to remove empty character class `[]` in regexp literal + help: Remove the empty character class: `[]` - ⚠ eslint(no-empty-character-class): Empty character class - ╭─[no_empty_character_class.tsx:1:11] + ⚠ eslint(no-empty-character-class): Empty character class will not match anything + ╭─[no_empty_character_class.tsx:1:12] 1 │ var foo = /[]]/; - · ───── + · ── ╰──── - help: Try to remove empty character class `[]` in regexp literal + help: Remove the empty character class: `[]` - ⚠ eslint(no-empty-character-class): Empty character class - ╭─[no_empty_character_class.tsx:1:11] + ⚠ eslint(no-empty-character-class): Empty character class will not match anything + ╭─[no_empty_character_class.tsx:1:14] 1 │ var foo = /\[[]/; - · ────── + · ── ╰──── - help: Try to remove empty character class `[]` in regexp literal + help: Remove the empty character class: `[]` - ⚠ eslint(no-empty-character-class): Empty character class - ╭─[no_empty_character_class.tsx:1:11] + ⚠ eslint(no-empty-character-class): Empty character class will not match anything + ╭─[no_empty_character_class.tsx:1:21] 1 │ var foo = /\[\[\]a-z[]/; - · ───────────── + · ── ╰──── - help: Try to remove empty character class `[]` in regexp literal + help: Remove the empty character class: `[]` - ⚠ eslint(no-empty-character-class): Empty character class - ╭─[no_empty_character_class.tsx:1:11] + ⚠ eslint(no-empty-character-class): Empty character class will not match anything + ╭─[no_empty_character_class.tsx:1:12] 1 │ var foo = /[]]/d; - · ────── + · ── + ╰──── + help: Remove the empty character class: `[]` + + ⚠ eslint(no-empty-character-class): Empty character class will not match anything + ╭─[no_empty_character_class.tsx:1:13] + 1 │ var foo = /[[][]]/v; + · ── + ╰──── + help: Remove the empty character class: `[]` + + ⚠ eslint(no-empty-character-class): Empty character class will not match anything + ╭─[no_empty_character_class.tsx:1:15] + 1 │ var foo = /[[][]]/v; + · ── + ╰──── + help: Remove the empty character class: `[]` + + ⚠ eslint(no-empty-character-class): Empty character class will not match anything + ╭─[no_empty_character_class.tsx:1:13] + 1 │ var foo = /[[]]|[]/v; + · ── + ╰──── + help: Remove the empty character class: `[]` + + ⚠ eslint(no-empty-character-class): Empty character class will not match anything + ╭─[no_empty_character_class.tsx:1:17] + 1 │ var foo = /[[]]|[]/v; + · ── ╰──── - help: Try to remove empty character class `[]` in regexp literal + help: Remove the empty character class: `[]`