diff --git a/crates/ruff_linter/resources/test/fixtures/refurb/FURB129.py b/crates/ruff_linter/resources/test/fixtures/refurb/FURB129.py new file mode 100644 index 0000000000000..1f42b1a10bfd0 --- /dev/null +++ b/crates/ruff_linter/resources/test/fixtures/refurb/FURB129.py @@ -0,0 +1,67 @@ +import codecs +import io +from pathlib import Path + +# Errors +with open("FURB129.py") as f: + for _line in f.readlines(): + pass + a = [line.lower() for line in f.readlines()] + b = {line.upper() for line in f.readlines()} + c = {line.lower(): line.upper() for line in f.readlines()} + +with Path("FURB129.py").open() as f: + for _line in f.readlines(): + pass + +for _line in open("FURB129.py").readlines(): + pass + +for _line in Path("FURB129.py").open().readlines(): + pass + + +def good1(): + f = Path("FURB129.py").open() + for _line in f.readlines(): + pass + f.close() + + +def good2(f: io.BytesIO): + for _line in f.readlines(): + pass + + +# False positives +def bad(f): + for _line in f.readlines(): + pass + + +def worse(f: codecs.StreamReader): + for _line in f.readlines(): + pass + + +def foo(): + class A: + def readlines(self) -> list[str]: + return ["a", "b", "c"] + + return A() + + +for _line in foo().readlines(): + pass + +# OK +for _line in ["a", "b", "c"]: + pass +with open("FURB129.py") as f: + for _line in f: + pass + for _line in f.readlines(10): + pass + for _not_line in f.readline(): + pass diff --git a/crates/ruff_linter/src/checkers/ast/analyze/comprehension.rs b/crates/ruff_linter/src/checkers/ast/analyze/comprehension.rs index c3d368be2954a..821e62c283597 100644 --- a/crates/ruff_linter/src/checkers/ast/analyze/comprehension.rs +++ b/crates/ruff_linter/src/checkers/ast/analyze/comprehension.rs @@ -2,11 +2,14 @@ use ruff_python_ast::Comprehension; use crate::checkers::ast::Checker; use crate::codes::Rule; -use crate::rules::flake8_simplify; +use crate::rules::{flake8_simplify, refurb}; /// Run lint rules over a [`Comprehension`] syntax nodes. pub(crate) fn comprehension(comprehension: &Comprehension, checker: &mut Checker) { if checker.enabled(Rule::InDictKeys) { flake8_simplify::rules::key_in_dict_comprehension(checker, comprehension); } + if checker.enabled(Rule::ReadlinesInFor) { + refurb::rules::readlines_in_comprehension(checker, comprehension); + } } diff --git a/crates/ruff_linter/src/checkers/ast/analyze/statement.rs b/crates/ruff_linter/src/checkers/ast/analyze/statement.rs index 7786931f8fab0..37bc90fcaca9d 100644 --- a/crates/ruff_linter/src/checkers/ast/analyze/statement.rs +++ b/crates/ruff_linter/src/checkers/ast/analyze/statement.rs @@ -1317,6 +1317,9 @@ pub(crate) fn statement(stmt: &Stmt, checker: &mut Checker) { if checker.enabled(Rule::UnnecessaryDictIndexLookup) { pylint::rules::unnecessary_dict_index_lookup(checker, for_stmt); } + if checker.enabled(Rule::ReadlinesInFor) { + refurb::rules::readlines_in_for(checker, for_stmt); + } if !is_async { if checker.enabled(Rule::ReimplementedBuiltin) { flake8_simplify::rules::convert_for_loop_to_any_all(checker, stmt); diff --git a/crates/ruff_linter/src/codes.rs b/crates/ruff_linter/src/codes.rs index d79d21dcf7a26..8e275cc3712e0 100644 --- a/crates/ruff_linter/src/codes.rs +++ b/crates/ruff_linter/src/codes.rs @@ -1025,6 +1025,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> { #[allow(deprecated)] (Refurb, "113") => (RuleGroup::Nursery, rules::refurb::rules::RepeatedAppend), (Refurb, "118") => (RuleGroup::Preview, rules::refurb::rules::ReimplementedOperator), + (Refurb, "129") => (RuleGroup::Preview, rules::refurb::rules::ReadlinesInFor), #[allow(deprecated)] (Refurb, "131") => (RuleGroup::Nursery, rules::refurb::rules::DeleteFullSlice), #[allow(deprecated)] diff --git a/crates/ruff_linter/src/rules/refurb/mod.rs b/crates/ruff_linter/src/rules/refurb/mod.rs index 26ba58041933b..de5f0bdde06dd 100644 --- a/crates/ruff_linter/src/rules/refurb/mod.rs +++ b/crates/ruff_linter/src/rules/refurb/mod.rs @@ -17,6 +17,7 @@ mod tests { #[test_case(Rule::ReadWholeFile, Path::new("FURB101.py"))] #[test_case(Rule::RepeatedAppend, Path::new("FURB113.py"))] #[test_case(Rule::ReimplementedOperator, Path::new("FURB118.py"))] + #[test_case(Rule::ReadlinesInFor, Path::new("FURB129.py"))] #[test_case(Rule::DeleteFullSlice, Path::new("FURB131.py"))] #[test_case(Rule::CheckAndRemoveFromSet, Path::new("FURB132.py"))] #[test_case(Rule::IfExprMinMax, Path::new("FURB136.py"))] diff --git a/crates/ruff_linter/src/rules/refurb/rules/mod.rs b/crates/ruff_linter/src/rules/refurb/rules/mod.rs index 532003ee143df..a69a798cf5ec8 100644 --- a/crates/ruff_linter/src/rules/refurb/rules/mod.rs +++ b/crates/ruff_linter/src/rules/refurb/rules/mod.rs @@ -9,6 +9,7 @@ pub(crate) use math_constant::*; pub(crate) use metaclass_abcmeta::*; pub(crate) use print_empty_string::*; pub(crate) use read_whole_file::*; +pub(crate) use readlines_in_for::*; pub(crate) use redundant_log_base::*; pub(crate) use regex_flag_alias::*; pub(crate) use reimplemented_operator::*; @@ -30,6 +31,7 @@ mod math_constant; mod metaclass_abcmeta; mod print_empty_string; mod read_whole_file; +mod readlines_in_for; mod redundant_log_base; mod regex_flag_alias; mod reimplemented_operator; diff --git a/crates/ruff_linter/src/rules/refurb/rules/readlines_in_for.rs b/crates/ruff_linter/src/rules/refurb/rules/readlines_in_for.rs new file mode 100644 index 0000000000000..e548b499844df --- /dev/null +++ b/crates/ruff_linter/src/rules/refurb/rules/readlines_in_for.rs @@ -0,0 +1,92 @@ +use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix}; +use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::{Comprehension, Expr, StmtFor}; +use ruff_python_semantic::analyze::typing; +use ruff_python_semantic::analyze::typing::is_io_base_expr; +use ruff_text_size::Ranged; + +use crate::checkers::ast::Checker; + +/// ## What it does +/// Checks for uses of `readlines()` when iterating over a file line-by-line. +/// +/// ## Why is this bad? +/// Rather than iterating over all lines in a file by calling `readlines()`, +/// it's more convenient and performant to iterate over the file object +/// directly. +/// +/// ## Example +/// ```python +/// with open("file.txt") as fp: +/// for line in fp.readlines(): +/// ... +/// ``` +/// +/// Use instead: +/// ```python +/// with open("file.txt") as fp: +/// for line in fp: +/// ... +/// ``` +/// +/// ## References +/// - [Python documentation: `io.IOBase.readlines`](https://docs.python.org/3/library/io.html#io.IOBase.readlines) +#[violation] +pub(crate) struct ReadlinesInFor; + +impl AlwaysFixableViolation for ReadlinesInFor { + #[derive_message_formats] + fn message(&self) -> String { + format!("Instead of calling `readlines()`, iterate over file object directly") + } + + fn fix_title(&self) -> String { + "Remove `readlines()`".into() + } +} + +/// FURB129 +pub(crate) fn readlines_in_for(checker: &mut Checker, for_stmt: &StmtFor) { + readlines_in_iter(checker, for_stmt.iter.as_ref()); +} + +/// FURB129 +pub(crate) fn readlines_in_comprehension(checker: &mut Checker, comprehension: &Comprehension) { + readlines_in_iter(checker, &comprehension.iter); +} + +fn readlines_in_iter(checker: &mut Checker, iter_expr: &Expr) { + let Expr::Call(expr_call) = iter_expr else { + return; + }; + + let Expr::Attribute(expr_attr) = expr_call.func.as_ref() else { + return; + }; + + if expr_attr.attr.as_str() != "readlines" || !expr_call.arguments.is_empty() { + return; + } + + // Determine whether `fp` in `fp.readlines()` was bound to a file object. + if let Expr::Name(name) = expr_attr.value.as_ref() { + if !checker + .semantic() + .resolve_name(name) + .map(|id| checker.semantic().binding(id)) + .is_some_and(|binding| typing::is_io_base(binding, checker.semantic())) + { + return; + } + } else { + if !is_io_base_expr(expr_attr.value.as_ref(), checker.semantic()) { + return; + } + } + + let mut diagnostic = Diagnostic::new(ReadlinesInFor, expr_call.range()); + diagnostic.set_fix(Fix::unsafe_edit(Edit::range_deletion( + expr_call.range().add_start(expr_attr.value.range().len()), + ))); + checker.diagnostics.push(diagnostic); +} diff --git a/crates/ruff_linter/src/rules/refurb/snapshots/ruff_linter__rules__refurb__tests__FURB129_FURB129.py.snap b/crates/ruff_linter/src/rules/refurb/snapshots/ruff_linter__rules__refurb__tests__FURB129_FURB129.py.snap new file mode 100644 index 0000000000000..2d3c00155b559 --- /dev/null +++ b/crates/ruff_linter/src/rules/refurb/snapshots/ruff_linter__rules__refurb__tests__FURB129_FURB129.py.snap @@ -0,0 +1,186 @@ +--- +source: crates/ruff_linter/src/rules/refurb/mod.rs +--- +FURB129.py:7:18: FURB129 [*] Instead of calling `readlines()`, iterate over file object directly + | +5 | # Errors +6 | with open("FURB129.py") as f: +7 | for _line in f.readlines(): + | ^^^^^^^^^^^^^ FURB129 +8 | pass +9 | a = [line.lower() for line in f.readlines()] + | + = help: Remove `readlines()` + +ℹ Unsafe fix +4 4 | +5 5 | # Errors +6 6 | with open("FURB129.py") as f: +7 |- for _line in f.readlines(): + 7 |+ for _line in f: +8 8 | pass +9 9 | a = [line.lower() for line in f.readlines()] +10 10 | b = {line.upper() for line in f.readlines()} + +FURB129.py:9:35: FURB129 [*] Instead of calling `readlines()`, iterate over file object directly + | + 7 | for _line in f.readlines(): + 8 | pass + 9 | a = [line.lower() for line in f.readlines()] + | ^^^^^^^^^^^^^ FURB129 +10 | b = {line.upper() for line in f.readlines()} +11 | c = {line.lower(): line.upper() for line in f.readlines()} + | + = help: Remove `readlines()` + +ℹ Unsafe fix +6 6 | with open("FURB129.py") as f: +7 7 | for _line in f.readlines(): +8 8 | pass +9 |- a = [line.lower() for line in f.readlines()] + 9 |+ a = [line.lower() for line in f] +10 10 | b = {line.upper() for line in f.readlines()} +11 11 | c = {line.lower(): line.upper() for line in f.readlines()} +12 12 | + +FURB129.py:10:35: FURB129 [*] Instead of calling `readlines()`, iterate over file object directly + | + 8 | pass + 9 | a = [line.lower() for line in f.readlines()] +10 | b = {line.upper() for line in f.readlines()} + | ^^^^^^^^^^^^^ FURB129 +11 | c = {line.lower(): line.upper() for line in f.readlines()} + | + = help: Remove `readlines()` + +ℹ Unsafe fix +7 7 | for _line in f.readlines(): +8 8 | pass +9 9 | a = [line.lower() for line in f.readlines()] +10 |- b = {line.upper() for line in f.readlines()} + 10 |+ b = {line.upper() for line in f} +11 11 | c = {line.lower(): line.upper() for line in f.readlines()} +12 12 | +13 13 | with Path("FURB129.py").open() as f: + +FURB129.py:11:49: FURB129 [*] Instead of calling `readlines()`, iterate over file object directly + | + 9 | a = [line.lower() for line in f.readlines()] +10 | b = {line.upper() for line in f.readlines()} +11 | c = {line.lower(): line.upper() for line in f.readlines()} + | ^^^^^^^^^^^^^ FURB129 +12 | +13 | with Path("FURB129.py").open() as f: + | + = help: Remove `readlines()` + +ℹ Unsafe fix +8 8 | pass +9 9 | a = [line.lower() for line in f.readlines()] +10 10 | b = {line.upper() for line in f.readlines()} +11 |- c = {line.lower(): line.upper() for line in f.readlines()} + 11 |+ c = {line.lower(): line.upper() for line in f} +12 12 | +13 13 | with Path("FURB129.py").open() as f: +14 14 | for _line in f.readlines(): + +FURB129.py:14:18: FURB129 [*] Instead of calling `readlines()`, iterate over file object directly + | +13 | with Path("FURB129.py").open() as f: +14 | for _line in f.readlines(): + | ^^^^^^^^^^^^^ FURB129 +15 | pass + | + = help: Remove `readlines()` + +ℹ Unsafe fix +11 11 | c = {line.lower(): line.upper() for line in f.readlines()} +12 12 | +13 13 | with Path("FURB129.py").open() as f: +14 |- for _line in f.readlines(): + 14 |+ for _line in f: +15 15 | pass +16 16 | +17 17 | for _line in open("FURB129.py").readlines(): + +FURB129.py:17:14: FURB129 [*] Instead of calling `readlines()`, iterate over file object directly + | +15 | pass +16 | +17 | for _line in open("FURB129.py").readlines(): + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ FURB129 +18 | pass + | + = help: Remove `readlines()` + +ℹ Unsafe fix +14 14 | for _line in f.readlines(): +15 15 | pass +16 16 | +17 |-for _line in open("FURB129.py").readlines(): + 17 |+for _line in open("FURB129.py"): +18 18 | pass +19 19 | +20 20 | for _line in Path("FURB129.py").open().readlines(): + +FURB129.py:20:14: FURB129 [*] Instead of calling `readlines()`, iterate over file object directly + | +18 | pass +19 | +20 | for _line in Path("FURB129.py").open().readlines(): + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ FURB129 +21 | pass + | + = help: Remove `readlines()` + +ℹ Unsafe fix +17 17 | for _line in open("FURB129.py").readlines(): +18 18 | pass +19 19 | +20 |-for _line in Path("FURB129.py").open().readlines(): + 20 |+for _line in Path("FURB129.py").open(): +21 21 | pass +22 22 | +23 23 | + +FURB129.py:26:18: FURB129 [*] Instead of calling `readlines()`, iterate over file object directly + | +24 | def good1(): +25 | f = Path("FURB129.py").open() +26 | for _line in f.readlines(): + | ^^^^^^^^^^^^^ FURB129 +27 | pass +28 | f.close() + | + = help: Remove `readlines()` + +ℹ Unsafe fix +23 23 | +24 24 | def good1(): +25 25 | f = Path("FURB129.py").open() +26 |- for _line in f.readlines(): + 26 |+ for _line in f: +27 27 | pass +28 28 | f.close() +29 29 | + +FURB129.py:32:18: FURB129 [*] Instead of calling `readlines()`, iterate over file object directly + | +31 | def good2(f: io.BytesIO): +32 | for _line in f.readlines(): + | ^^^^^^^^^^^^^ FURB129 +33 | pass + | + = help: Remove `readlines()` + +ℹ Unsafe fix +29 29 | +30 30 | +31 31 | def good2(f: io.BytesIO): +32 |- for _line in f.readlines(): + 32 |+ for _line in f: +33 33 | pass +34 34 | +35 35 | + + diff --git a/crates/ruff_python_semantic/src/analyze/typing.rs b/crates/ruff_python_semantic/src/analyze/typing.rs index 5c60b21f44e9b..48d3b597f176e 100644 --- a/crates/ruff_python_semantic/src/analyze/typing.rs +++ b/crates/ruff_python_semantic/src/analyze/typing.rs @@ -406,7 +406,7 @@ where } /// Abstraction for a type checker, conservatively checks for the intended type(s). -trait TypeChecker { +pub trait TypeChecker { /// Check annotation expression to match the intended type(s). fn match_annotation(annotation: &Expr, semantic: &SemanticModel) -> bool; /// Check initializer expression to match the intended type(s). @@ -441,6 +441,24 @@ fn check_type(binding: &Binding, semantic: &SemanticModel) -> bo _ => false, }, + BindingKind::WithItemVar => match binding.statement(semantic) { + // ```python + // with open("file.txt") as x: + // ... + // ``` + Some(Stmt::With(ast::StmtWith { items, .. })) => { + let Some(item) = items.iter().find(|item| { + item.optional_vars + .as_ref() + .is_some_and(|vars| vars.range().contains_range(binding.range)) + }) else { + return false; + }; + T::match_initializer(&item.context_expr, semantic) + } + _ => false, + }, + BindingKind::Argument => match binding.statement(semantic) { // ```python // def foo(x: annotation): @@ -565,35 +583,125 @@ impl BuiltinTypeChecker for TupleChecker { const EXPR_TYPE: PythonType = PythonType::Tuple; } -/// Test whether the given binding (and the given name) can be considered a list. +pub struct IoBaseChecker; + +impl TypeChecker for IoBaseChecker { + fn match_annotation(annotation: &Expr, semantic: &SemanticModel) -> bool { + semantic + .resolve_call_path(annotation) + .is_some_and(|call_path| { + if semantic.match_typing_call_path(&call_path, "IO") { + return true; + } + if semantic.match_typing_call_path(&call_path, "BinaryIO") { + return true; + } + if semantic.match_typing_call_path(&call_path, "TextIO") { + return true; + } + matches!( + call_path.as_slice(), + [ + "io", + "IOBase" + | "RawIOBase" + | "BufferedIOBase" + | "TextIOBase" + | "BytesIO" + | "StringIO" + | "BufferedReader" + | "BufferedWriter" + | "BufferedRandom" + | "BufferedRWPair" + | "TextIOWrapper" + ] | ["os", "Path" | "PathLike"] + | [ + "pathlib", + "Path" | "PurePath" | "PurePosixPath" | "PureWindowsPath" + ] + ) + }) + } + + fn match_initializer(initializer: &Expr, semantic: &SemanticModel) -> bool { + let Expr::Call(ast::ExprCall { func, .. }) = initializer else { + return false; + }; + + // Ex) `pathlib.Path("file.txt")` + if let Expr::Attribute(ast::ExprAttribute { value, attr, .. }) = func.as_ref() { + if attr.as_str() == "open" { + if let Expr::Call(ast::ExprCall { func, .. }) = value.as_ref() { + return semantic.resolve_call_path(func).is_some_and(|call_path| { + matches!( + call_path.as_slice(), + [ + "pathlib", + "Path" | "PurePath" | "PurePosixPath" | "PureWindowsPath" + ] + ) + }); + } + } + } + + // Ex) `open("file.txt")` + semantic + .resolve_call_path(func.as_ref()) + .is_some_and(|call_path| { + matches!( + call_path.as_slice(), + ["io", "open" | "open_code"] | ["os" | "", "open"] + ) + }) + } +} + +/// Test whether the given binding can be considered a list. +/// /// For this, we check what value might be associated with it through it's initialization and /// what annotation it has (we consider `list` and `typing.List`). pub fn is_list(binding: &Binding, semantic: &SemanticModel) -> bool { check_type::(binding, semantic) } -/// Test whether the given binding (and the given name) can be considered a dictionary. +/// Test whether the given binding can be considered a dictionary. +/// /// For this, we check what value might be associated with it through it's initialization and /// what annotation it has (we consider `dict` and `typing.Dict`). pub fn is_dict(binding: &Binding, semantic: &SemanticModel) -> bool { check_type::(binding, semantic) } -/// Test whether the given binding (and the given name) can be considered a set. +/// Test whether the given binding can be considered a set. +/// /// For this, we check what value might be associated with it through it's initialization and /// what annotation it has (we consider `set` and `typing.Set`). pub fn is_set(binding: &Binding, semantic: &SemanticModel) -> bool { check_type::(binding, semantic) } -/// Test whether the given binding (and the given name) can be considered a -/// tuple. For this, we check what value might be associated with it through +/// Test whether the given binding can be considered a tuple. +/// +/// For this, we check what value might be associated with it through /// it's initialization and what annotation it has (we consider `tuple` and /// `typing.Tuple`). pub fn is_tuple(binding: &Binding, semantic: &SemanticModel) -> bool { check_type::(binding, semantic) } +/// Test whether the given binding can be considered a file-like object (i.e., a type that +/// implements `io.IOBase`). +pub fn is_io_base(binding: &Binding, semantic: &SemanticModel) -> bool { + check_type::(binding, semantic) +} + +/// Test whether the given expression can be considered a file-like object (i.e., a type that +/// implements `io.IOBase`). +pub fn is_io_base_expr(expr: &Expr, semantic: &SemanticModel) -> bool { + IoBaseChecker::match_initializer(expr, semantic) +} + /// Find the [`ParameterWithDefault`] corresponding to the given [`Binding`]. #[inline] fn find_parameter<'a>( @@ -699,6 +807,18 @@ pub fn find_binding_value<'a>(binding: &Binding, semantic: &'a SemanticModel) -> _ => {} } } + // Ex) `with open("file.txt") as f:` + BindingKind::WithItemVar => { + let parent_id = binding.source?; + let parent = semantic.statement(parent_id); + if let Stmt::With(ast::StmtWith { items, .. }) = parent { + return items.iter().find_map(|item| { + let target = item.optional_vars.as_ref()?; + let value = &item.context_expr; + match_value(binding, target, value) + }); + } + } _ => {} } None diff --git a/ruff.schema.json b/ruff.schema.json index 179d5eb66a3ca..369c973256cc5 100644 --- a/ruff.schema.json +++ b/ruff.schema.json @@ -2988,6 +2988,8 @@ "FURB11", "FURB113", "FURB118", + "FURB12", + "FURB129", "FURB13", "FURB131", "FURB132",