diff --git a/crates/ruff_linter/resources/test/fixtures/refurb/FURB101.py b/crates/ruff_linter/resources/test/fixtures/refurb/FURB101.py new file mode 100644 index 0000000000000..6e25d2860456b --- /dev/null +++ b/crates/ruff_linter/resources/test/fixtures/refurb/FURB101.py @@ -0,0 +1,105 @@ +def foo(): + ... + + +def bar(x): + ... + + +# Errors. + +# FURB101 +with open("file.txt") as f: + x = f.read() + +# FURB101 +with open("file.txt", "rb") as f: + x = f.read() + +# FURB101 +with open("file.txt", mode="rb") as f: + x = f.read() + +# FURB101 +with open("file.txt", encoding="utf8") as f: + x = f.read() + +# FURB101 +with open("file.txt", errors="ignore") as f: + x = f.read() + +# FURB101 +with open("file.txt", errors="ignore", mode="rb") as f: + x = f.read() + +# FURB101 +with open("file.txt", mode="r") as f: # noqa: FURB120 + x = f.read() + +# FURB101 +with open(foo(), "rb") as f: + # The body of `with` is non-trivial, but the recommendation holds. + bar("pre") + bar(f.read()) + bar("post") + print("Done") + +# FURB101 +with open("a.txt") as a, open("b.txt", "rb") as b: + x = a.read() + y = b.read() + +# FURB101 +with foo() as a, open("file.txt") as b, foo() as c: + # We have other things in here, multiple with items, but + # the user reads the whole file and that bit they can replace. + bar(a) + bar(bar(a + b.read())) + bar(c) + + +# Non-errors. + +f2 = open("file2.txt") +with open("file.txt") as f: + x = f2.read() + +with open("file.txt") as f: + # Path.read_text() does not support size, so ignore this + x = f.read(100) + +# mode is dynamic +with open("file.txt", foo()) as f: + x = f.read() + +# keyword mode is incorrect +with open("file.txt", mode="a+") as f: + x = f.read() + +# enables line buffering, not supported in read_text() +with open("file.txt", buffering=1) as f: + x = f.read() + +# force CRLF, not supported in read_text() +with open("file.txt", newline="\r\n") as f: + x = f.read() + +# dont mistake "newline" for "mode" +with open("file.txt", newline="b") as f: + x = f.read() + +# I guess we can possibly also report this case, but the question +# is why the user would put "r+" here in the first place. +with open("file.txt", "r+") as f: + x = f.read() + +# Even though we read the whole file, we do other things. +with open("file.txt") as f: + x = f.read() + f.seek(0) + x += f.read(100) + +open = bar +# Not a real `open`. +with open("file.txt") as f: + x = f.read() diff --git a/crates/ruff_linter/src/checkers/ast/analyze/statement.rs b/crates/ruff_linter/src/checkers/ast/analyze/statement.rs index a39fd0734355c..cabda30e9906c 100644 --- a/crates/ruff_linter/src/checkers/ast/analyze/statement.rs +++ b/crates/ruff_linter/src/checkers/ast/analyze/statement.rs @@ -1166,6 +1166,9 @@ pub(crate) fn statement(stmt: &Stmt, checker: &mut Checker) { if checker.enabled(Rule::RedefinedLoopName) { pylint::rules::redefined_loop_name(checker, stmt); } + if checker.enabled(Rule::ReadWholeFile) { + refurb::rules::read_whole_file(checker, with_stmt); + } } Stmt::While(ast::StmtWhile { body, orelse, .. }) => { if checker.enabled(Rule::FunctionUsesLoopVariable) { diff --git a/crates/ruff_linter/src/codes.rs b/crates/ruff_linter/src/codes.rs index 5de943cd83a6b..81dbe3a6d5963 100644 --- a/crates/ruff_linter/src/codes.rs +++ b/crates/ruff_linter/src/codes.rs @@ -914,6 +914,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> { (Flake8Slots, "002") => (RuleGroup::Unspecified, rules::flake8_slots::rules::NoSlotsInNamedtupleSubclass), // refurb + (Refurb, "101") => (RuleGroup::Preview, rules::refurb::rules::ReadWholeFile), (Refurb, "105") => (RuleGroup::Preview, rules::refurb::rules::PrintEmptyString), #[allow(deprecated)] (Refurb, "113") => (RuleGroup::Nursery, rules::refurb::rules::RepeatedAppend), diff --git a/crates/ruff_linter/src/rules/refurb/mod.rs b/crates/ruff_linter/src/rules/refurb/mod.rs index 587b0f85fae6b..27497194a7730 100644 --- a/crates/ruff_linter/src/rules/refurb/mod.rs +++ b/crates/ruff_linter/src/rules/refurb/mod.rs @@ -14,6 +14,7 @@ mod tests { use crate::test::test_path; use crate::{assert_messages, settings}; + #[test_case(Rule::ReadWholeFile, Path::new("FURB101.py"))] #[test_case(Rule::RepeatedAppend, Path::new("FURB113.py"))] #[test_case(Rule::DeleteFullSlice, Path::new("FURB131.py"))] #[test_case(Rule::CheckAndRemoveFromSet, Path::new("FURB132.py"))] diff --git a/crates/ruff_linter/src/rules/refurb/rules/mod.rs b/crates/ruff_linter/src/rules/refurb/rules/mod.rs index a3277d79f4667..021b21075ffa8 100644 --- a/crates/ruff_linter/src/rules/refurb/rules/mod.rs +++ b/crates/ruff_linter/src/rules/refurb/rules/mod.rs @@ -1,6 +1,7 @@ pub(crate) use check_and_remove_from_set::*; pub(crate) use delete_full_slice::*; pub(crate) use print_empty_string::*; +pub(crate) use read_whole_file::*; pub(crate) use reimplemented_starmap::*; pub(crate) use repeated_append::*; pub(crate) use slice_copy::*; @@ -9,6 +10,7 @@ pub(crate) use unnecessary_enumerate::*; mod check_and_remove_from_set; mod delete_full_slice; mod print_empty_string; +mod read_whole_file; mod reimplemented_starmap; mod repeated_append; mod slice_copy; diff --git a/crates/ruff_linter/src/rules/refurb/rules/read_whole_file.rs b/crates/ruff_linter/src/rules/refurb/rules/read_whole_file.rs new file mode 100644 index 0000000000000..a56535216ae9f --- /dev/null +++ b/crates/ruff_linter/src/rules/refurb/rules/read_whole_file.rs @@ -0,0 +1,312 @@ +use ruff_diagnostics::{Diagnostic, Violation}; +use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::visitor::{self, Visitor}; +use ruff_python_ast::{self as ast, Expr}; +use ruff_python_codegen::Generator; +use ruff_python_semantic::{BindingId, ResolvedReference, SemanticModel}; +use ruff_text_size::{Ranged, TextRange}; + +use crate::autofix::snippet::SourceCodeSnippet; +use crate::checkers::ast::Checker; + +/// ## What it does +/// Checks for uses of `open` and `read` that can be replaced by `pathlib`. +/// +/// ## Why is this bad? +/// When you just want to read the contents of a whole file, using a `with` block +/// is a bit of an overkill. A simpler alternative is to use pathlib's `read_text` +/// and `read_bytes` functions: +/// +/// ## Example +/// ```python +/// with open(filename) as f: +/// contents = f.read() +/// ``` +/// +/// Use instead: +/// ```python +/// contents = Path(filename).read_text() +/// ``` +/// +/// ## References +/// - [Python documentation: `Path.read_bytes()`](https://docs.python.org/3/library/pathlib.html#pathlib.Path.read_bytes) +/// - [Python documentation: `Path.read_text()`](https://docs.python.org/3/library/pathlib.html#pathlib.Path.read_text) +#[violation] +pub struct ReadWholeFile { + filename: SourceCodeSnippet, + suggestion: SourceCodeSnippet, +} + +impl Violation for ReadWholeFile { + #[derive_message_formats] + fn message(&self) -> String { + let filename = self.filename.truncated_display(); + let suggestion = self.suggestion.truncated_display(); + + format!("`open` and `read` should be replaced by `Path({filename}).{suggestion}`") + } +} + +/// FURB101 +pub(crate) fn read_whole_file(checker: &mut Checker, with: &ast::StmtWith) { + // async check here is more of a precaution. + if with.is_async || !checker.semantic().is_builtin("open") { + return; + } + + // First we go through all the items in the statement and find all `open` operations. + let opens = find_file_opens(checker.semantic(), with); + if opens.is_empty() { + return; + } + + // Then we need to match each `open` operation with exactly one `read` call. + let mut matcher = ReadMatcher { candidates: opens }; + visitor::walk_body(&mut matcher, &with.body); + + // All the matched operations should be reported. + let diagnostics: Vec = matcher + .candidates + .iter() + .filter(|open| open.matched) + .map(|open| { + Diagnostic::new( + ReadWholeFile { + filename: SourceCodeSnippet::from_str(&checker.generator().expr(open.filename)), + suggestion: make_suggestion(open, checker.generator()), + }, + open.item.range(), + ) + }) + .collect(); + checker.diagnostics.extend(diagnostics); +} + +#[derive(Debug)] +enum ReadMode { + /// "r" -> `read_text` + Text, + /// "rb" -> `read_bytes` + Bytes, +} + +/// A grab bag struct that joins together every piece of information we need to track +/// about a file open operation. +struct FileOpen<'a> { + /// With item where the open happens, we use it for the reporting range. + item: &'a ast::WithItem, + /// Filename expression used as the first argument in `open`, we use it in the diagnostic message. + filename: &'a Expr, + /// The type of read to choose `read_text` or `read_bytes`. + mode: ReadMode, + /// Keywords that can be used in the new read call. + keywords: Vec<&'a ast::Keyword>, + /// We only check `open` operations which file handles are used exactly once. + /// The easiest way to figure out if something IS that reference we knew all along + /// is to compare text ranges. + ref_range: TextRange, + /// A flag signifying that there is a matching `read` in the body. + matched: bool, +} + +impl<'a> FileOpen<'a> { + fn is_ref(&self, expr: &Expr) -> bool { + expr.range() == self.ref_range + } +} + +/// Find and return all `open` operations in the given `with` statement. +fn find_file_opens<'a>(semantic: &SemanticModel, with: &'a ast::StmtWith) -> Vec> { + with.items + .iter() + .filter_map(|item| find_file_open(semantic, item, with.range())) + .collect() +} + +/// Find `open` operation in the given `with` item. +fn find_file_open<'a>( + semantic: &SemanticModel, + item: &'a ast::WithItem, + body_range: TextRange, +) -> Option> { + // We want to match `open(...) as var`. + let ast::ExprCall { + func, + arguments: ast::Arguments { args, keywords, .. }, + .. + } = item.context_expr.as_call_expr()?; + + if func.as_name_expr()?.id != "open" { + return None; + } + + let var = item.optional_vars.as_deref()?.as_name_expr()?; + + // Match positional arguments, get filename and read mode. + let (filename, pos_mode) = match_open_args(args)?; + // Match keyword arguments, get keyword arguments to forward and possibly read mode. + let (keywords, kw_mode) = match_open_keywords(keywords)?; + + // `pos_mode` could've been assigned default value corresponding to "r", while + // keyword mode should override that. + let mode = kw_mode.unwrap_or(pos_mode); + + // Now we need to find what is this variable bound to... + let scope = semantic.current_scope(); + let bindings: Vec = scope.get_all(var.id.as_str()).collect(); + + let binding = bindings + .iter() + .map(|x| semantic.binding(*x)) + // We might have many bindings with the same name, but we only care + // for the one we are looking at right now. + .find(|binding| binding.range() == var.range())?; + + // Since many references can share the same binding, we can limit our attention span + // exclusively to the body of the current `with` statement. + let references: Vec<&ResolvedReference> = binding + .references + .iter() + .map(|id| semantic.reference(*id)) + .filter(|reference| body_range.contains_range(reference.range())) + .collect(); + + // And even with all these restrictions, if the file handle gets used not exactly once, + // it doesn't fit the bill. + let [reference] = references.as_slice() else { + return None; + }; + + // Range seems to be the easiest way to understand that this is the right + // reference when we'll be looking at it. + let ref_range = reference.range(); + + Some(FileOpen { + item, + filename, + mode, + keywords, + ref_range, + // matcher will set this later + matched: false, + }) +} + +/// Match positional arguments. Return expression for the file name and read mode. +fn match_open_args(args: &[Expr]) -> Option<(&Expr, ReadMode)> { + match args { + [filename] => Some((filename, ReadMode::Text)), + [filename, mode_literal] => match_open_mode(mode_literal).map(|mode| (filename, mode)), + // The third positional argument is buffering and `read_text` doesn't support it. + _ => None, + } +} + +/// Match keyword arguments. Return keywrod arguments to forward and read mode. +fn match_open_keywords( + keywords: &[ast::Keyword], +) -> Option<(Vec<&ast::Keyword>, Option)> { + let mut result: Vec<&ast::Keyword> = vec![]; + let mut mode: Option = None; + + for keyword in keywords { + match keyword.arg.as_ref()?.as_str() { + "encoding" | "errors" => result.push(keyword), + + // This might look bizarre, - why do we re-wrap this optional? + // + // The answer is quite simple, in the result of the current function + // mode being `None` is a possible and correct option meaning that there + // was NO "mode" keyword argument. + // + // The result of `match_open_mode` on the other hand is None + // in the cases when the mode is not compatible with `read_text`/`read_bytes`. + // + // So, here we return None from this whole function if the mode + // is incompatible. + "mode" => mode = Some(match_open_mode(&keyword.value)?), + + // All other keywords cannot be directly forwarded. + _ => return None, + }; + } + Some((result, mode)) +} + +/// Match open mode to see if it is supported. +fn match_open_mode(mode: &Expr) -> Option { + let ast::StringConstant { + value, + implicit_concatenated: false, + .. + } = mode.as_constant_expr()?.value.as_str()? + else { + return None; + }; + match value.as_str() { + "r" => Some(ReadMode::Text), + "rb" => Some(ReadMode::Bytes), + _ => None, + } +} + +/// AST visitor that matches `open` operations with the corresponding `read` calls. +struct ReadMatcher<'a> { + candidates: Vec>, +} + +impl<'a> Visitor<'a> for ReadMatcher<'a> { + fn visit_expr(&mut self, expr: &'a Expr) { + if let Some(read_from) = match_read_call(expr) { + if let Some(open) = self + .candidates + .iter_mut() + .find(|open| open.is_ref(read_from)) + { + open.matched = true; + } + return; + } + visitor::walk_expr(self, expr); + } +} + +/// Match `x.read()` expression and return expression `x` on success. +fn match_read_call(expr: &Expr) -> Option<&Expr> { + let call = expr.as_call_expr()?; + let attr = call.func.as_attribute_expr()?; + let method_name = &attr.attr; + + if method_name != "read" + || !attr.value.is_name_expr() + || !call.arguments.args.is_empty() + || !call.arguments.keywords.is_empty() + { + return None; + } + + Some(attr.value.as_ref()) +} + +/// Construct the replacement suggestion call. +fn make_suggestion(open: &FileOpen<'_>, generator: Generator) -> SourceCodeSnippet { + let method_name = match open.mode { + ReadMode::Text => "read_text", + ReadMode::Bytes => "read_bytes", + }; + let name = ast::ExprName { + id: method_name.to_string(), + ctx: ast::ExprContext::Load, + range: TextRange::default(), + }; + let call = ast::ExprCall { + func: Box::new(name.into()), + arguments: ast::Arguments { + args: vec![], + keywords: open.keywords.iter().copied().cloned().collect(), + range: TextRange::default(), + }, + range: TextRange::default(), + }; + SourceCodeSnippet::from_str(&generator.expr(&call.into())) +} diff --git a/crates/ruff_linter/src/rules/refurb/snapshots/ruff_linter__rules__refurb__tests__FURB101_FURB101.py.snap b/crates/ruff_linter/src/rules/refurb/snapshots/ruff_linter__rules__refurb__tests__FURB101_FURB101.py.snap new file mode 100644 index 0000000000000..6b3595549841c --- /dev/null +++ b/crates/ruff_linter/src/rules/refurb/snapshots/ruff_linter__rules__refurb__tests__FURB101_FURB101.py.snap @@ -0,0 +1,96 @@ +--- +source: crates/ruff_linter/src/rules/refurb/mod.rs +--- +FURB101.py:12:6: FURB101 `open` and `read` should be replaced by `Path("file.txt").read_text()` + | +11 | # FURB101 +12 | with open("file.txt") as f: + | ^^^^^^^^^^^^^^^^^^^^^ FURB101 +13 | x = f.read() + | + +FURB101.py:16:6: FURB101 `open` and `read` should be replaced by `Path("file.txt").read_bytes()` + | +15 | # FURB101 +16 | with open("file.txt", "rb") as f: + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ FURB101 +17 | x = f.read() + | + +FURB101.py:20:6: FURB101 `open` and `read` should be replaced by `Path("file.txt").read_bytes()` + | +19 | # FURB101 +20 | with open("file.txt", mode="rb") as f: + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ FURB101 +21 | x = f.read() + | + +FURB101.py:24:6: FURB101 `open` and `read` should be replaced by `Path("file.txt").read_text(encoding="utf8")` + | +23 | # FURB101 +24 | with open("file.txt", encoding="utf8") as f: + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ FURB101 +25 | x = f.read() + | + +FURB101.py:28:6: FURB101 `open` and `read` should be replaced by `Path("file.txt").read_text(errors="ignore")` + | +27 | # FURB101 +28 | with open("file.txt", errors="ignore") as f: + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ FURB101 +29 | x = f.read() + | + +FURB101.py:32:6: FURB101 `open` and `read` should be replaced by `Path("file.txt").read_bytes(errors="ignore")` + | +31 | # FURB101 +32 | with open("file.txt", errors="ignore", mode="rb") as f: + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ FURB101 +33 | x = f.read() + | + +FURB101.py:36:6: FURB101 `open` and `read` should be replaced by `Path("file.txt").read_text()` + | +35 | # FURB101 +36 | with open("file.txt", mode="r") as f: # noqa: FURB120 + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ FURB101 +37 | x = f.read() + | + +FURB101.py:40:6: FURB101 `open` and `read` should be replaced by `Path(foo()).read_bytes()` + | +39 | # FURB101 +40 | with open(foo(), "rb") as f: + | ^^^^^^^^^^^^^^^^^^^^^^ FURB101 +41 | # The body of `with` is non-trivial, but the recommendation holds. +42 | bar("pre") + | + +FURB101.py:48:6: FURB101 `open` and `read` should be replaced by `Path("a.txt").read_text()` + | +47 | # FURB101 +48 | with open("a.txt") as a, open("b.txt", "rb") as b: + | ^^^^^^^^^^^^^^^^^^ FURB101 +49 | x = a.read() +50 | y = b.read() + | + +FURB101.py:48:26: FURB101 `open` and `read` should be replaced by `Path("b.txt").read_bytes()` + | +47 | # FURB101 +48 | with open("a.txt") as a, open("b.txt", "rb") as b: + | ^^^^^^^^^^^^^^^^^^^^^^^^ FURB101 +49 | x = a.read() +50 | y = b.read() + | + +FURB101.py:53:18: FURB101 `open` and `read` should be replaced by `Path("file.txt").read_text()` + | +52 | # FURB101 +53 | with foo() as a, open("file.txt") as b, foo() as c: + | ^^^^^^^^^^^^^^^^^^^^^ FURB101 +54 | # We have other things in here, multiple with items, but +55 | # the user reads the whole file and that bit they can replace. + | + + diff --git a/ruff.schema.json b/ruff.schema.json index 1b0d1db27eca8..6d05c9c8e5c02 100644 --- a/ruff.schema.json +++ b/ruff.schema.json @@ -2652,6 +2652,7 @@ "FURB", "FURB1", "FURB10", + "FURB101", "FURB105", "FURB11", "FURB113",