Add fast-path for comment detection

astral-sh · Feb 3, 2024 · 905202d · 905202d
1 parent c53aae0
commit 905202d
Show file tree

Hide file tree

Showing 2 changed files with 27 additions and 4 deletions.
diff --git a/crates/ruff_linter/src/rules/eradicate/detection.rs b/crates/ruff_linter/src/rules/eradicate/detection.rs
@@ -4,11 +4,12 @@ use once_cell::sync::Lazy;
 use regex::{Regex, RegexSet};
 
 use ruff_python_parser::parse_suite;
+use ruff_python_trivia::{SimpleTokenKind, SimpleTokenizer};
+use ruff_text_size::{Ranged, TextSize};
 
 static CODE_INDICATORS: Lazy<AhoCorasick> = Lazy::new(|| {
     AhoCorasick::new([
-        "(", ")", "[", "]", "{", "}", ":", "=", "%", "print", "return", "break", "continue",
-        "import",
+        "(", ")", "[", "]", "{", "}", ":", "=", "%", "return", "break", "continue", "import",
     ])
     .unwrap()
 });
@@ -53,6 +54,24 @@ pub(crate) fn comment_contains_code(line: &str, task_tags: &[String]) -> bool {
         return false;
     }
 
+    // Fast path: if the comment starts with two consecutive identifiers, we know it won't parse,
+    // unless the first identifier is a keyword.
+    if let Some(token) = SimpleTokenizer::starts_at(TextSize::default(), line)
+        .skip_trivia()
+        .next()
+    {
+        if token.kind == SimpleTokenKind::Other {
+            if let Some(token) = SimpleTokenizer::starts_at(token.end(), line)
+                .skip_trivia()
+                .next()
+            {
+                if token.kind == SimpleTokenKind::Other {
+                    return false;
+                }
+            }
+        }
+    }
+
     // Ignore whitelisted comments.
     if ALLOWLIST_REGEX.is_match(line) {
         return false;
@@ -123,9 +142,10 @@ mod tests {
 
     #[test]
     fn comment_contains_code_with_print() {
-        assert!(comment_contains_code("#print", &[]));
         assert!(comment_contains_code("#print(1)", &[]));
 
+        assert!(!comment_contains_code("#print", &[]));
+        assert!(!comment_contains_code("#print 1", &[]));
         assert!(!comment_contains_code("#to print", &[]));
     }
 

diff --git a/crates/ruff_python_trivia/src/tokenizer.rs b/crates/ruff_python_trivia/src/tokenizer.rs
@@ -182,7 +182,7 @@ fn to_keyword_or_other(source: &str) -> SimpleTokenKind {
         "case" => SimpleTokenKind::Case,
         "with" => SimpleTokenKind::With,
         "yield" => SimpleTokenKind::Yield,
-        _ => SimpleTokenKind::Other, // Potentially an identifier, but only if it isn't a string prefix. We can ignore this for now https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
+        _ => SimpleTokenKind::Name, // Potentially an identifier, but only if it isn't a string prefix. We can ignore this for now https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
     }
 }
 
@@ -467,6 +467,9 @@ pub enum SimpleTokenKind {
     /// `yield`
     Yield,
 
+    /// An identifier or keyword.
+    Name,
+
     /// Any other non trivia token.
     Other,