From d6805ddc3bc1970400159d043f4674201a19b89d Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Thu, 6 Jul 2023 12:41:35 -0400 Subject: [PATCH 1/3] ADd exemtpion parser --- Cargo.lock | 11 + crates/ruff/Cargo.toml | 8 +- crates/ruff/benches/benchmark.rs | 32 +++ crates/ruff/src/lib.rs | 2 +- crates/ruff/src/noqa.rs | 207 +++++++++++++++++- ...ff__noqa__tests__flake8_exemption_all.snap | 7 + ...flake8_exemption_all_case_insensitive.snap | 7 + ..._tests__flake8_exemption_all_no_space.snap | 7 + ...__noqa__tests__flake8_exemption_codes.snap | 7 + ...ruff__noqa__tests__ruff_exemption_all.snap | 7 + ...__ruff_exemption_all_case_insensitive.snap | 7 + ...a__tests__ruff_exemption_all_no_space.snap | 7 + ...ff__noqa__tests__ruff_exemption_codes.snap | 12 + 13 files changed, 313 insertions(+), 8 deletions(-) create mode 100644 crates/ruff/benches/benchmark.rs create mode 100644 crates/ruff/src/snapshots/ruff__noqa__tests__flake8_exemption_all.snap create mode 100644 crates/ruff/src/snapshots/ruff__noqa__tests__flake8_exemption_all_case_insensitive.snap create mode 100644 crates/ruff/src/snapshots/ruff__noqa__tests__flake8_exemption_all_no_space.snap create mode 100644 crates/ruff/src/snapshots/ruff__noqa__tests__flake8_exemption_codes.snap create mode 100644 crates/ruff/src/snapshots/ruff__noqa__tests__ruff_exemption_all.snap create mode 100644 crates/ruff/src/snapshots/ruff__noqa__tests__ruff_exemption_all_case_insensitive.snap create mode 100644 crates/ruff/src/snapshots/ruff__noqa__tests__ruff_exemption_all_no_space.snap create mode 100644 crates/ruff/src/snapshots/ruff__noqa__tests__ruff_exemption_codes.snap diff --git a/Cargo.lock b/Cargo.lock index 2376d4aab3b33..51581e2510979 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1844,6 +1844,7 @@ dependencies = [ "chrono", "clap", "colored", + "criterion", "dirs 5.0.1", "fern", "glob", @@ -1898,6 +1899,7 @@ dependencies = [ "thiserror", "toml", "typed-arena", + "unicase", "unicode-width", "unicode_names2", ] @@ -2917,6 +2919,15 @@ dependencies = [ "unic-common", ] +[[package]] +name = "unicase" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50f37be617794602aabbeee0be4f259dc1778fabe05e2d67ee8f79326d5cb4f6" +dependencies = [ + "version_check", +] + [[package]] name = "unicode-bidi" version = "0.3.13" diff --git a/crates/ruff/Cargo.toml b/crates/ruff/Cargo.toml index ad1372686c47d..f2fa373a392bf 100644 --- a/crates/ruff/Cargo.toml +++ b/crates/ruff/Cargo.toml @@ -27,7 +27,6 @@ ruff_rustpython = { path = "../ruff_rustpython" } ruff_text_size = { workspace = true } ruff_textwrap = { path = "../ruff_textwrap" } -aho-corasick = { version = "1.0.2" } annotate-snippets = { version = "0.9.1", features = ["color"] } anyhow = { workspace = true } bitflags = { workspace = true } @@ -79,6 +78,9 @@ toml = { workspace = true } typed-arena = { version = "2.0.2" } unicode-width = { version = "0.1.10" } unicode_names2 = { version = "0.6.0", git = "https://github.com/youknowone/unicode_names2.git", rev = "4ce16aa85cbcdd9cc830410f1a72ef9a235f2fde" } +criterion = "0.5.1" +unicase = "2.6.0" +aho-corasick = "1.0.2" [dev-dependencies] insta = { workspace = true } @@ -92,3 +94,7 @@ default = [] schemars = ["dep:schemars"] # Enables the UnreachableCode rule unreachable-code = [] + +[[bench]] +name = "benchmark" +harness = false diff --git a/crates/ruff/benches/benchmark.rs b/crates/ruff/benches/benchmark.rs new file mode 100644 index 0000000000000..337b4da02a218 --- /dev/null +++ b/crates/ruff/benches/benchmark.rs @@ -0,0 +1,32 @@ +use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion}; + +use ruff::noqa::ParsedFileExemption; + +pub fn directive_benchmark(c: &mut Criterion) { + let mut group = c.benchmark_group("Directive"); + for i in [ + "# noqa: F401", + "# noqa: F401, F841", + "# flake8: noqa: F401, F841", + "# ruff: noqa: F401, F841", + "# flake8: noqa", + "# ruff: noqa", + "# noqa", + "# type: ignore # noqa: E501", + "# type: ignore # nosec", + "# some very long comment that # is interspersed with characters but # no directive", + ] + .iter() + { + group.bench_with_input(BenchmarkId::new("Regex", i), i, |b, _i| { + b.iter(|| ParsedFileExemption::try_regex(black_box(i))) + }); + group.bench_with_input(BenchmarkId::new("Lexer", i), i, |b, _i| { + b.iter(|| ParsedFileExemption::try_extract(black_box(i))) + }); + } + group.finish(); +} + +criterion_group!(benches, directive_benchmark); +criterion_main!(benches); diff --git a/crates/ruff/src/lib.rs b/crates/ruff/src/lib.rs index 2a69194fad4e9..155414c1bcd91 100644 --- a/crates/ruff/src/lib.rs +++ b/crates/ruff/src/lib.rs @@ -27,7 +27,7 @@ pub mod line_width; pub mod linter; pub mod logging; pub mod message; -mod noqa; +pub mod noqa; pub mod packaging; pub mod pyproject_toml; pub mod registry; diff --git a/crates/ruff/src/noqa.rs b/crates/ruff/src/noqa.rs index 176dd6d345201..3460cc4ba80f9 100644 --- a/crates/ruff/src/noqa.rs +++ b/crates/ruff/src/noqa.rs @@ -83,7 +83,7 @@ impl<'a> Directive<'a> { let mut codes = vec![]; let mut codes_end = codes_start; let mut leading_space = 0; - while let Some(code) = Directive::lex_code(&text[codes_end + leading_space..]) { + while let Some(code) = Directive::eat_code(&text[codes_end + leading_space..]) { codes.push(code); codes_end += leading_space; codes_end += code.len(); @@ -125,8 +125,8 @@ impl<'a> Directive<'a> { None } - /// Lex an individual rule code (e.g., `F401`). - fn lex_code(text: &str) -> Option<&str> { + /// Eat an individual rule code (e.g., `F401`). + fn eat_code(text: &str) -> Option<&str> { // Extract, e.g., the `F` in `F401`. let prefix = text.chars().take_while(char::is_ascii_uppercase).count(); // Extract, e.g., the `401` in `F401`. @@ -246,7 +246,7 @@ impl FileExemption { /// [`FileExemption`], but only for a single line, as opposed to an aggregated set of exemptions /// across a source file. #[derive(Debug)] -enum ParsedFileExemption<'a> { +pub enum ParsedFileExemption<'a> { /// The file-level exemption ignores all rules (e.g., `# ruff: noqa`). All, /// The file-level exemption ignores specific rules (e.g., `# ruff: noqa: F401, F841`). @@ -255,7 +255,7 @@ enum ParsedFileExemption<'a> { impl<'a> ParsedFileExemption<'a> { /// Return a [`ParsedFileExemption`] for a given comment line. - fn try_extract(line: &'a str) -> Option { + pub fn try_regex(line: &'a str) -> Option { let line = line.trim_whitespace_start(); if line.starts_with("# flake8: noqa") @@ -288,6 +288,152 @@ impl<'a> ParsedFileExemption<'a> { None } + + /// Return a [`ParsedFileExemption`] for a given comment line. + pub fn try_extract(line: &'a str) -> Option { + let line = ParsedFileExemption::eat_whitespace(line); + let line = ParsedFileExemption::eat_char(line, '#')?; + let line = ParsedFileExemption::eat_whitespace(line); + + if let Some(line) = ParsedFileExemption::eat_flake8(line) { + // Ex) `# flake8: noqa` + let line = ParsedFileExemption::eat_whitespace(line); + let line = ParsedFileExemption::eat_char(line, ':')?; + let line = ParsedFileExemption::eat_whitespace(line); + let line = ParsedFileExemption::eat_noqa(line)?; + + Some(Self::All) + } else if let Some(line) = ParsedFileExemption::eat_ruff(line) { + let line = ParsedFileExemption::eat_whitespace(line); + let line = ParsedFileExemption::eat_char(line, ':')?; + let line = ParsedFileExemption::eat_whitespace(line); + let line = ParsedFileExemption::eat_noqa(line)?; + + if line.is_empty() { + // Ex) `# ruff: noqa` + Some(Self::All) + } else { + // Ex) `# ruff: noqa: F401, F841` + let line = ParsedFileExemption::eat_whitespace(line); + let Some(line) = ParsedFileExemption::eat_char(line, ':') else { + warn!("Unexpected suffix on `noqa` directive: \"{line}\""); + return None; + }; + let line = ParsedFileExemption::eat_whitespace(line); + + // Extract the codes from the line (e.g., `F401, F841`). + let mut codes = vec![]; + let mut line = line; + while let Some(code) = ParsedFileExemption::eat_code(line) { + codes.push(code); + line = &line[code.len()..]; + + // Codes can be comma- or whitespace-delimited. + if let Some(rest) = ParsedFileExemption::eat_delimiter(line) + .map(ParsedFileExemption::eat_whitespace) + { + line = rest; + } else { + break; + } + } + + Some(Self::Codes(codes)) + } + } else { + None + } + } + + /// Eat optional leading whitespace. + #[inline] + fn eat_whitespace(line: &str) -> &str { + line.trim_start() + } + + /// Eat a specific character, or return `None` if the character is not the first character in + /// the line. + #[inline] + fn eat_char(line: &str, c: char) -> Option<&str> { + let mut chars = line.chars(); + if chars.next() == Some(c) { + Some(chars.as_str()) + } else { + None + } + } + + /// Eat the "flake8" prefix of a `noqa` directive. + #[inline] + fn eat_flake8(line: &str) -> Option<&str> { + line.strip_prefix("flake8") + } + + /// Eat the "ruff" prefix of a `noqa` directive. + #[inline] + fn eat_ruff(line: &str) -> Option<&str> { + line.strip_prefix("ruff") + } + + /// Eat a `noqa` directive with case-insensitive matching. + #[inline] + fn eat_noqa(line: &str) -> Option<&str> { + let mut chars = line.bytes(); + if chars + .next() + .map_or(false, |c| c.to_ascii_lowercase() == b'n') + { + if chars + .next() + .map_or(false, |c| c.to_ascii_lowercase() == b'o') + { + if chars + .next() + .map_or(false, |c| c.to_ascii_lowercase() == b'q') + { + if chars + .next() + .map_or(false, |c| c.to_ascii_lowercase() == b'a') + { + return Some(chars.as_str()); + } + } + } + } + None + } + + /// Eat a code delimiter, which can either be a comma or whitespace. + #[inline] + fn eat_delimiter(line: &str) -> Option<&str> { + let mut chars = line.chars(); + if let Some(c) = chars.next() { + if c == ',' || c.is_whitespace() { + Some(chars.as_str()) + } else { + None + } + } else { + None + } + } + + /// Eat an individual rule code (e.g., `F401`). + #[inline] + fn eat_code(line: &str) -> Option<&str> { + // Extract, e.g., the `F` in `F401`. + let prefix = line.chars().take_while(char::is_ascii_uppercase).count(); + // Extract, e.g., the `401` in `F401`. + let suffix = line[prefix..] + .chars() + .take_while(char::is_ascii_digit) + .count(); + if prefix > 0 && suffix > 0 { + Some(&line[..prefix + suffix]) + } else { + None + } + } } /// Adds noqa comments to suppress all diagnostics of a file. @@ -620,7 +766,7 @@ mod tests { use ruff_python_ast::source_code::Locator; use ruff_python_whitespace::LineEnding; - use crate::noqa::{add_noqa_inner, Directive, NoqaMapping}; + use crate::noqa::{add_noqa_inner, Directive, NoqaMapping, ParsedFileExemption}; use crate::rules::pycodestyle::rules::AmbiguousVariableName; use crate::rules::pyflakes::rules::UnusedVariable; @@ -750,6 +896,55 @@ mod tests { assert_debug_snapshot!(Directive::try_extract(source, TextSize::default())); } + #[test] + fn flake8_exemption_all() { + let source = "# flake8: noqa"; + assert_debug_snapshot!(ParsedFileExemption::try_extract(source)); + } + + #[test] + fn ruff_exemption_all() { + let source = "# ruff: noqa"; + assert_debug_snapshot!(ParsedFileExemption::try_extract(source)); + } + + #[test] + fn flake8_exemption_all_no_space() { + let source = "#flake8:noqa"; + assert_debug_snapshot!(ParsedFileExemption::try_extract(source)); + } + + #[test] + fn ruff_exemption_all_no_space() { + let source = "#ruff:noqa"; + assert_debug_snapshot!(ParsedFileExemption::try_extract(source)); + } + + #[test] + fn flake8_exemption_codes() { + // Note: Flake8 doesn't support this; it's treated as a blanket exemption. + let source = "# flake8: noqa: F401, F841"; + assert_debug_snapshot!(ParsedFileExemption::try_extract(source)); + } + + #[test] + fn ruff_exemption_codes() { + let source = "# ruff: noqa: F401, F841"; + assert_debug_snapshot!(ParsedFileExemption::try_extract(source)); + } + + #[test] + fn flake8_exemption_all_case_insensitive() { + let source = "# flake8: NoQa"; + assert_debug_snapshot!(ParsedFileExemption::try_extract(source)); + } + + #[test] + fn ruff_exemption_all_case_insensitive() { + let source = "# ruff: NoQa"; + assert_debug_snapshot!(ParsedFileExemption::try_extract(source)); + } + #[test] fn modification() { let contents = "x = 1"; diff --git a/crates/ruff/src/snapshots/ruff__noqa__tests__flake8_exemption_all.snap b/crates/ruff/src/snapshots/ruff__noqa__tests__flake8_exemption_all.snap new file mode 100644 index 0000000000000..30642fbd1c503 --- /dev/null +++ b/crates/ruff/src/snapshots/ruff__noqa__tests__flake8_exemption_all.snap @@ -0,0 +1,7 @@ +--- +source: crates/ruff/src/noqa.rs +expression: "ParsedFileExemption::try_extract(source)" +--- +Some( + All, +) diff --git a/crates/ruff/src/snapshots/ruff__noqa__tests__flake8_exemption_all_case_insensitive.snap b/crates/ruff/src/snapshots/ruff__noqa__tests__flake8_exemption_all_case_insensitive.snap new file mode 100644 index 0000000000000..30642fbd1c503 --- /dev/null +++ b/crates/ruff/src/snapshots/ruff__noqa__tests__flake8_exemption_all_case_insensitive.snap @@ -0,0 +1,7 @@ +--- +source: crates/ruff/src/noqa.rs +expression: "ParsedFileExemption::try_extract(source)" +--- +Some( + All, +) diff --git a/crates/ruff/src/snapshots/ruff__noqa__tests__flake8_exemption_all_no_space.snap b/crates/ruff/src/snapshots/ruff__noqa__tests__flake8_exemption_all_no_space.snap new file mode 100644 index 0000000000000..30642fbd1c503 --- /dev/null +++ b/crates/ruff/src/snapshots/ruff__noqa__tests__flake8_exemption_all_no_space.snap @@ -0,0 +1,7 @@ +--- +source: crates/ruff/src/noqa.rs +expression: "ParsedFileExemption::try_extract(source)" +--- +Some( + All, +) diff --git a/crates/ruff/src/snapshots/ruff__noqa__tests__flake8_exemption_codes.snap b/crates/ruff/src/snapshots/ruff__noqa__tests__flake8_exemption_codes.snap new file mode 100644 index 0000000000000..30642fbd1c503 --- /dev/null +++ b/crates/ruff/src/snapshots/ruff__noqa__tests__flake8_exemption_codes.snap @@ -0,0 +1,7 @@ +--- +source: crates/ruff/src/noqa.rs +expression: "ParsedFileExemption::try_extract(source)" +--- +Some( + All, +) diff --git a/crates/ruff/src/snapshots/ruff__noqa__tests__ruff_exemption_all.snap b/crates/ruff/src/snapshots/ruff__noqa__tests__ruff_exemption_all.snap new file mode 100644 index 0000000000000..30642fbd1c503 --- /dev/null +++ b/crates/ruff/src/snapshots/ruff__noqa__tests__ruff_exemption_all.snap @@ -0,0 +1,7 @@ +--- +source: crates/ruff/src/noqa.rs +expression: "ParsedFileExemption::try_extract(source)" +--- +Some( + All, +) diff --git a/crates/ruff/src/snapshots/ruff__noqa__tests__ruff_exemption_all_case_insensitive.snap b/crates/ruff/src/snapshots/ruff__noqa__tests__ruff_exemption_all_case_insensitive.snap new file mode 100644 index 0000000000000..30642fbd1c503 --- /dev/null +++ b/crates/ruff/src/snapshots/ruff__noqa__tests__ruff_exemption_all_case_insensitive.snap @@ -0,0 +1,7 @@ +--- +source: crates/ruff/src/noqa.rs +expression: "ParsedFileExemption::try_extract(source)" +--- +Some( + All, +) diff --git a/crates/ruff/src/snapshots/ruff__noqa__tests__ruff_exemption_all_no_space.snap b/crates/ruff/src/snapshots/ruff__noqa__tests__ruff_exemption_all_no_space.snap new file mode 100644 index 0000000000000..30642fbd1c503 --- /dev/null +++ b/crates/ruff/src/snapshots/ruff__noqa__tests__ruff_exemption_all_no_space.snap @@ -0,0 +1,7 @@ +--- +source: crates/ruff/src/noqa.rs +expression: "ParsedFileExemption::try_extract(source)" +--- +Some( + All, +) diff --git a/crates/ruff/src/snapshots/ruff__noqa__tests__ruff_exemption_codes.snap b/crates/ruff/src/snapshots/ruff__noqa__tests__ruff_exemption_codes.snap new file mode 100644 index 0000000000000..cbf46f63ff3dd --- /dev/null +++ b/crates/ruff/src/snapshots/ruff__noqa__tests__ruff_exemption_codes.snap @@ -0,0 +1,12 @@ +--- +source: crates/ruff/src/noqa.rs +expression: "ParsedFileExemption::try_extract(source)" +--- +Some( + Codes( + [ + "F401", + "F841", + ], + ), +) From 3348090a942601a713ca0dc725c3bf40a5647c9b Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Thu, 6 Jul 2023 14:03:24 -0400 Subject: [PATCH 2/3] Revert benches --- Cargo.lock | 11 --- crates/ruff/Cargo.toml | 8 +- crates/ruff/benches/benchmark.rs | 32 -------- crates/ruff/src/lib.rs | 2 +- crates/ruff/src/noqa.rs | 131 +++++++++++-------------------- 5 files changed, 50 insertions(+), 134 deletions(-) delete mode 100644 crates/ruff/benches/benchmark.rs diff --git a/Cargo.lock b/Cargo.lock index 51581e2510979..2376d4aab3b33 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1844,7 +1844,6 @@ dependencies = [ "chrono", "clap", "colored", - "criterion", "dirs 5.0.1", "fern", "glob", @@ -1899,7 +1898,6 @@ dependencies = [ "thiserror", "toml", "typed-arena", - "unicase", "unicode-width", "unicode_names2", ] @@ -2919,15 +2917,6 @@ dependencies = [ "unic-common", ] -[[package]] -name = "unicase" -version = "2.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50f37be617794602aabbeee0be4f259dc1778fabe05e2d67ee8f79326d5cb4f6" -dependencies = [ - "version_check", -] - [[package]] name = "unicode-bidi" version = "0.3.13" diff --git a/crates/ruff/Cargo.toml b/crates/ruff/Cargo.toml index f2fa373a392bf..ad1372686c47d 100644 --- a/crates/ruff/Cargo.toml +++ b/crates/ruff/Cargo.toml @@ -27,6 +27,7 @@ ruff_rustpython = { path = "../ruff_rustpython" } ruff_text_size = { workspace = true } ruff_textwrap = { path = "../ruff_textwrap" } +aho-corasick = { version = "1.0.2" } annotate-snippets = { version = "0.9.1", features = ["color"] } anyhow = { workspace = true } bitflags = { workspace = true } @@ -78,9 +79,6 @@ toml = { workspace = true } typed-arena = { version = "2.0.2" } unicode-width = { version = "0.1.10" } unicode_names2 = { version = "0.6.0", git = "https://github.com/youknowone/unicode_names2.git", rev = "4ce16aa85cbcdd9cc830410f1a72ef9a235f2fde" } -criterion = "0.5.1" -unicase = "2.6.0" -aho-corasick = "1.0.2" [dev-dependencies] insta = { workspace = true } @@ -94,7 +92,3 @@ default = [] schemars = ["dep:schemars"] # Enables the UnreachableCode rule unreachable-code = [] - -[[bench]] -name = "benchmark" -harness = false diff --git a/crates/ruff/benches/benchmark.rs b/crates/ruff/benches/benchmark.rs deleted file mode 100644 index 337b4da02a218..0000000000000 --- a/crates/ruff/benches/benchmark.rs +++ /dev/null @@ -1,32 +0,0 @@ -use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion}; - -use ruff::noqa::ParsedFileExemption; - -pub fn directive_benchmark(c: &mut Criterion) { - let mut group = c.benchmark_group("Directive"); - for i in [ - "# noqa: F401", - "# noqa: F401, F841", - "# flake8: noqa: F401, F841", - "# ruff: noqa: F401, F841", - "# flake8: noqa", - "# ruff: noqa", - "# noqa", - "# type: ignore # noqa: E501", - "# type: ignore # nosec", - "# some very long comment that # is interspersed with characters but # no directive", - ] - .iter() - { - group.bench_with_input(BenchmarkId::new("Regex", i), i, |b, _i| { - b.iter(|| ParsedFileExemption::try_regex(black_box(i))) - }); - group.bench_with_input(BenchmarkId::new("Lexer", i), i, |b, _i| { - b.iter(|| ParsedFileExemption::try_extract(black_box(i))) - }); - } - group.finish(); -} - -criterion_group!(benches, directive_benchmark); -criterion_main!(benches); diff --git a/crates/ruff/src/lib.rs b/crates/ruff/src/lib.rs index 155414c1bcd91..2a69194fad4e9 100644 --- a/crates/ruff/src/lib.rs +++ b/crates/ruff/src/lib.rs @@ -27,7 +27,7 @@ pub mod line_width; pub mod linter; pub mod logging; pub mod message; -pub mod noqa; +mod noqa; pub mod packaging; pub mod pyproject_toml; pub mod registry; diff --git a/crates/ruff/src/noqa.rs b/crates/ruff/src/noqa.rs index 3460cc4ba80f9..f137596732dfa 100644 --- a/crates/ruff/src/noqa.rs +++ b/crates/ruff/src/noqa.rs @@ -14,7 +14,7 @@ use rustpython_parser::ast::Ranged; use ruff_diagnostics::Diagnostic; use ruff_python_ast::source_code::Locator; -use ruff_python_whitespace::{LineEnding, PythonWhitespace}; +use ruff_python_whitespace::LineEnding; use crate::codes::NoqaCode; use crate::registry::{AsRule, Rule, RuleSet}; @@ -83,7 +83,7 @@ impl<'a> Directive<'a> { let mut codes = vec![]; let mut codes_end = codes_start; let mut leading_space = 0; - while let Some(code) = Directive::eat_code(&text[codes_end + leading_space..]) { + while let Some(code) = Directive::lex_code(&text[codes_end + leading_space..]) { codes.push(code); codes_end += leading_space; codes_end += code.len(); @@ -125,17 +125,18 @@ impl<'a> Directive<'a> { None } - /// Eat an individual rule code (e.g., `F401`). - fn eat_code(text: &str) -> Option<&str> { + /// Lex an individual rule code (e.g., `F401`). + #[inline] + fn lex_code(line: &str) -> Option<&str> { // Extract, e.g., the `F` in `F401`. - let prefix = text.chars().take_while(char::is_ascii_uppercase).count(); + let prefix = line.chars().take_while(char::is_ascii_uppercase).count(); // Extract, e.g., the `401` in `F401`. - let suffix = text[prefix..] + let suffix = line[prefix..] .chars() .take_while(char::is_ascii_digit) .count(); if prefix > 0 && suffix > 0 { - Some(&text[..prefix + suffix]) + Some(&line[..prefix + suffix]) } else { None } @@ -246,7 +247,7 @@ impl FileExemption { /// [`FileExemption`], but only for a single line, as opposed to an aggregated set of exemptions /// across a source file. #[derive(Debug)] -pub enum ParsedFileExemption<'a> { +enum ParsedFileExemption<'a> { /// The file-level exemption ignores all rules (e.g., `# ruff: noqa`). All, /// The file-level exemption ignores specific rules (e.g., `# ruff: noqa: F401, F841`). @@ -255,82 +256,46 @@ pub enum ParsedFileExemption<'a> { impl<'a> ParsedFileExemption<'a> { /// Return a [`ParsedFileExemption`] for a given comment line. - pub fn try_regex(line: &'a str) -> Option { - let line = line.trim_whitespace_start(); - - if line.starts_with("# flake8: noqa") - || line.starts_with("# flake8: NOQA") - || line.starts_with("# flake8: NoQA") - { - return Some(Self::All); - } + fn try_extract(line: &'a str) -> Option { + let line = ParsedFileExemption::lex_whitespace(line); + let line = ParsedFileExemption::lex_char(line, '#')?; + let line = ParsedFileExemption::lex_whitespace(line); - if let Some(remainder) = line - .strip_prefix("# ruff: noqa") - .or_else(|| line.strip_prefix("# ruff: NOQA")) - .or_else(|| line.strip_prefix("# ruff: NoQA")) - { - if remainder.is_empty() { - return Some(Self::All); - } else if let Some(codes) = remainder.strip_prefix(':') { - let codes = codes - .split(|c: char| c.is_whitespace() || c == ',') - .map(str::trim) - .filter(|code| !code.is_empty()) - .collect_vec(); - if codes.is_empty() { - warn!("Expected rule codes on `noqa` directive: \"{line}\""); - } - return Some(Self::Codes(codes)); - } - warn!("Unexpected suffix on `noqa` directive: \"{line}\""); - } - - None - } - - /// Return a [`ParsedFileExemption`] for a given comment line. - pub fn try_extract(line: &'a str) -> Option { - let line = ParsedFileExemption::eat_whitespace(line); - let line = ParsedFileExemption::eat_char(line, '#')?; - let line = ParsedFileExemption::eat_whitespace(line); - - if let Some(line) = ParsedFileExemption::eat_flake8(line) { + if let Some(line) = ParsedFileExemption::lex_flake8(line) { // Ex) `# flake8: noqa` - let line = ParsedFileExemption::eat_whitespace(line); - let line = ParsedFileExemption::eat_char(line, ':')?; - let line = ParsedFileExemption::eat_whitespace(line); - let line = ParsedFileExemption::eat_noqa(line)?; - + let line = ParsedFileExemption::lex_whitespace(line); + let line = ParsedFileExemption::lex_char(line, ':')?; + let line = ParsedFileExemption::lex_whitespace(line); + ParsedFileExemption::lex_noqa(line)?; Some(Self::All) - } else if let Some(line) = ParsedFileExemption::eat_ruff(line) { - let line = ParsedFileExemption::eat_whitespace(line); - let line = ParsedFileExemption::eat_char(line, ':')?; - let line = ParsedFileExemption::eat_whitespace(line); - let line = ParsedFileExemption::eat_noqa(line)?; + } else if let Some(line) = ParsedFileExemption::lex_ruff(line) { + let line = ParsedFileExemption::lex_whitespace(line); + let line = ParsedFileExemption::lex_char(line, ':')?; + let line = ParsedFileExemption::lex_whitespace(line); + let line = ParsedFileExemption::lex_noqa(line)?; if line.is_empty() { // Ex) `# ruff: noqa` Some(Self::All) } else { // Ex) `# ruff: noqa: F401, F841` - let line = ParsedFileExemption::eat_whitespace(line); - let Some(line) = ParsedFileExemption::eat_char(line, ':') else { + let line = ParsedFileExemption::lex_whitespace(line); + let Some(line) = ParsedFileExemption::lex_char(line, ':') else { warn!("Unexpected suffix on `noqa` directive: \"{line}\""); return None; }; - let line = ParsedFileExemption::eat_whitespace(line); + let line = ParsedFileExemption::lex_whitespace(line); // Extract the codes from the line (e.g., `F401, F841`). let mut codes = vec![]; let mut line = line; - while let Some(code) = ParsedFileExemption::eat_code(line) { + while let Some(code) = ParsedFileExemption::lex_code(line) { codes.push(code); line = &line[code.len()..]; // Codes can be comma- or whitespace-delimited. - if let Some(rest) = ParsedFileExemption::eat_delimiter(line) - .map(ParsedFileExemption::eat_whitespace) + if let Some(rest) = ParsedFileExemption::lex_delimiter(line) + .map(ParsedFileExemption::lex_whitespace) { line = rest; } else { @@ -345,16 +310,16 @@ impl<'a> ParsedFileExemption<'a> { } } - /// Eat optional leading whitespace. + /// Lex optional leading whitespace. #[inline] - fn eat_whitespace(line: &str) -> &str { + fn lex_whitespace(line: &str) -> &str { line.trim_start() } - /// Eat a specific character, or return `None` if the character is not the first character in + /// Lex a specific character, or return `None` if the character is not the first character in /// the line. #[inline] - fn eat_char(line: &str, c: char) -> Option<&str> { + fn lex_char(line: &str, c: char) -> Option<&str> { let mut chars = line.chars(); if chars.next() == Some(c) { Some(chars.as_str()) @@ -363,37 +328,37 @@ impl<'a> ParsedFileExemption<'a> { } } - /// Eat the "flake8" prefix of a `noqa` directive. + /// Lex the "flake8" prefix of a `noqa` directive. #[inline] - fn eat_flake8(line: &str) -> Option<&str> { + fn lex_flake8(line: &str) -> Option<&str> { line.strip_prefix("flake8") } - /// Eat the "ruff" prefix of a `noqa` directive. + /// Lex the "ruff" prefix of a `noqa` directive. #[inline] - fn eat_ruff(line: &str) -> Option<&str> { + fn lex_ruff(line: &str) -> Option<&str> { line.strip_prefix("ruff") } - /// Eat a `noqa` directive with case-insensitive matching. + /// Lex a `noqa` directive with case-insensitive matching. #[inline] - fn eat_noqa(line: &str) -> Option<&str> { - let mut chars = line.bytes(); + fn lex_noqa(line: &str) -> Option<&str> { + let mut chars = line.chars(); if chars .next() - .map_or(false, |c| c.to_ascii_lowercase() == b'n') + .map_or(false, |c| c.to_ascii_lowercase() == 'n') { if chars .next() - .map_or(false, |c| c.to_ascii_lowercase() == b'o') + .map_or(false, |c| c.to_ascii_lowercase() == 'o') { if chars .next() - .map_or(false, |c| c.to_ascii_lowercase() == b'q') + .map_or(false, |c| c.to_ascii_lowercase() == 'q') { if chars .next() - .map_or(false, |c| c.to_ascii_lowercase() == b'a') + .map_or(false, |c| c.to_ascii_lowercase() == 'a') { return Some(chars.as_str()); } @@ -403,9 +368,9 @@ impl<'a> ParsedFileExemption<'a> { None } - /// Eat a code delimiter, which can either be a comma or whitespace. + /// Lex a code delimiter, which can either be a comma or whitespace. #[inline] - fn eat_delimiter(line: &str) -> Option<&str> { + fn lex_delimiter(line: &str) -> Option<&str> { let mut chars = line.chars(); if let Some(c) = chars.next() { if c == ',' || c.is_whitespace() { @@ -418,9 +383,9 @@ impl<'a> ParsedFileExemption<'a> { } } - /// Eat an individual rule code (e.g., `F401`). + /// Lex an individual rule code (e.g., `F401`). #[inline] - fn eat_code(line: &str) -> Option<&str> { + fn lex_code(line: &str) -> Option<&str> { // Extract, e.g., the `F` in `F401`. let prefix = line.chars().take_while(char::is_ascii_uppercase).count(); // Extract, e.g., the `401` in `F401`. From 5f783422111d71b870678513068612121617fc7f Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Fri, 7 Jul 2023 11:33:24 -0400 Subject: [PATCH 3/3] Review feedback --- crates/ruff/src/noqa.rs | 65 ++++++++++++++--------------------------- 1 file changed, 22 insertions(+), 43 deletions(-) diff --git a/crates/ruff/src/noqa.rs b/crates/ruff/src/noqa.rs index f137596732dfa..a34b73f57c6a3 100644 --- a/crates/ruff/src/noqa.rs +++ b/crates/ruff/src/noqa.rs @@ -83,7 +83,7 @@ impl<'a> Directive<'a> { let mut codes = vec![]; let mut codes_end = codes_start; let mut leading_space = 0; - while let Some(code) = Directive::lex_code(&text[codes_end + leading_space..]) { + while let Some(code) = Self::lex_code(&text[codes_end + leading_space..]) { codes.push(code); codes_end += leading_space; codes_end += code.len(); @@ -257,46 +257,44 @@ enum ParsedFileExemption<'a> { impl<'a> ParsedFileExemption<'a> { /// Return a [`ParsedFileExemption`] for a given comment line. fn try_extract(line: &'a str) -> Option { - let line = ParsedFileExemption::lex_whitespace(line); - let line = ParsedFileExemption::lex_char(line, '#')?; - let line = ParsedFileExemption::lex_whitespace(line); + let line = Self::lex_whitespace(line); + let line = Self::lex_char(line, '#')?; + let line = Self::lex_whitespace(line); - if let Some(line) = ParsedFileExemption::lex_flake8(line) { + if let Some(line) = Self::lex_flake8(line) { // Ex) `# flake8: noqa` - let line = ParsedFileExemption::lex_whitespace(line); - let line = ParsedFileExemption::lex_char(line, ':')?; - let line = ParsedFileExemption::lex_whitespace(line); - ParsedFileExemption::lex_noqa(line)?; + let line = Self::lex_whitespace(line); + let line = Self::lex_char(line, ':')?; + let line = Self::lex_whitespace(line); + Self::lex_noqa(line)?; Some(Self::All) - } else if let Some(line) = ParsedFileExemption::lex_ruff(line) { - let line = ParsedFileExemption::lex_whitespace(line); - let line = ParsedFileExemption::lex_char(line, ':')?; - let line = ParsedFileExemption::lex_whitespace(line); - let line = ParsedFileExemption::lex_noqa(line)?; + } else if let Some(line) = Self::lex_ruff(line) { + let line = Self::lex_whitespace(line); + let line = Self::lex_char(line, ':')?; + let line = Self::lex_whitespace(line); + let line = Self::lex_noqa(line)?; if line.is_empty() { // Ex) `# ruff: noqa` Some(Self::All) } else { // Ex) `# ruff: noqa: F401, F841` - let line = ParsedFileExemption::lex_whitespace(line); - let Some(line) = ParsedFileExemption::lex_char(line, ':') else { + let line = Self::lex_whitespace(line); + let Some(line) = Self::lex_char(line, ':') else { warn!("Unexpected suffix on `noqa` directive: \"{line}\""); return None; }; - let line = ParsedFileExemption::lex_whitespace(line); + let line = Self::lex_whitespace(line); // Extract the codes from the line (e.g., `F401, F841`). let mut codes = vec![]; let mut line = line; - while let Some(code) = ParsedFileExemption::lex_code(line) { + while let Some(code) = Self::lex_code(line) { codes.push(code); line = &line[code.len()..]; // Codes can be comma- or whitespace-delimited. - if let Some(rest) = ParsedFileExemption::lex_delimiter(line) - .map(ParsedFileExemption::lex_whitespace) - { + if let Some(rest) = Self::lex_delimiter(line).map(Self::lex_whitespace) { line = rest; } else { break; @@ -343,29 +341,10 @@ impl<'a> ParsedFileExemption<'a> { /// Lex a `noqa` directive with case-insensitive matching. #[inline] fn lex_noqa(line: &str) -> Option<&str> { - let mut chars = line.chars(); - if chars - .next() - .map_or(false, |c| c.to_ascii_lowercase() == 'n') - { - if chars - .next() - .map_or(false, |c| c.to_ascii_lowercase() == 'o') - { - if chars - .next() - .map_or(false, |c| c.to_ascii_lowercase() == 'q') - { - if chars - .next() - .map_or(false, |c| c.to_ascii_lowercase() == 'a') - { - return Some(chars.as_str()); - } - } - } + match line.as_bytes() { + [b'n' | b'N', b'o' | b'O', b'q' | b'Q', b'a' | b'A', ..] => Some(&line["noqa".len()..]), + _ => None, } - None } /// Lex a code delimiter, which can either be a comma or whitespace.