From 0003c730e0bbf39d94ac7964c9a161d61776368a Mon Sep 17 00:00:00 2001 From: Chammika Mannakkara Date: Mon, 15 Jan 2024 01:00:59 +0900 Subject: [PATCH] [`flake8-simplify`] Implement `enumerate-for-loop` (`SIM113`) (#7777) Implements SIM113 from #998 Added tests Limitations - No fix yet - Only flag cases where index variable immediately precede `for` loop @charliermarsh please review and let me know any improvements --------- Co-authored-by: Charlie Marsh --- .../test/fixtures/flake8_simplify/SIM113.py | 168 +++++++++++++++ .../ast/analyze/deferred_for_loops.rs | 5 +- .../src/checkers/ast/analyze/statement.rs | 3 +- crates/ruff_linter/src/codes.rs | 1 + .../src/rules/flake8_simplify/mod.rs | 1 + .../flake8_simplify/rules/ast_bool_op.rs | 2 +- .../rules/enumerate_for_loop.rs | 193 ++++++++++++++++++ .../src/rules/flake8_simplify/rules/mod.rs | 2 + ...ke8_simplify__tests__SIM113_SIM113.py.snap | 47 +++++ .../flake8_trio/rules/zero_sleep_call.rs | 17 +- crates/ruff_python_ast/src/traversal.rs | 12 ++ .../src/analyze/typing.rs | 18 ++ crates/ruff_python_semantic/src/scope.rs | 5 + ruff.schema.json | 1 + 14 files changed, 464 insertions(+), 11 deletions(-) create mode 100644 crates/ruff_linter/resources/test/fixtures/flake8_simplify/SIM113.py create mode 100644 crates/ruff_linter/src/rules/flake8_simplify/rules/enumerate_for_loop.rs create mode 100644 crates/ruff_linter/src/rules/flake8_simplify/snapshots/ruff_linter__rules__flake8_simplify__tests__SIM113_SIM113.py.snap diff --git a/crates/ruff_linter/resources/test/fixtures/flake8_simplify/SIM113.py b/crates/ruff_linter/resources/test/fixtures/flake8_simplify/SIM113.py new file mode 100644 index 0000000000000..67ef392337b3c --- /dev/null +++ b/crates/ruff_linter/resources/test/fixtures/flake8_simplify/SIM113.py @@ -0,0 +1,168 @@ +def func(): + # SIM113 + idx = 0 + for x in range(5): + g(x, idx) + idx += 1 + h(x) + + +def func(): + # SIM113 + sum = 0 + idx = 0 + for x in range(5): + if g(x): + break + idx += 1 + sum += h(x, idx) + + +def func(): + # SIM113 + idx = 0 + for x, y in zip(range(5), range(5)): + g(x) + h(x, y) + idx += 1 + + +def func(): + # SIM113 + idx = 0 + sum = 0 + for x in range(5): + sum += h(x, idx) + idx += 1 + + +def func(): + # SIM113 + sum, idx = 0, 0 + for x in range(5): + g(x, idx) + idx += 1 + h(x) + + +def func(): + # OK (index doesn't start at 0 + idx = 10 + for x in range(5): + g(x, idx) + idx += 1 + h(x) + + +def func(): + # OK (incremented by more than 1) + idx = 0 + for x in range(5): + g(x, idx) + idx += 2 + h(x) + + +def func(): + # OK (incremented in `if`) + idx = 0 + for x in range(5): + if g(x): + idx += 1 + h(x) + + +def func(): + # OK (`continue` in match-case) + idx = 0 + for x in range(5): + match g(x): + case 1: + h(x) + case 2: + continue + case _: + h(idx) + idx += 1 + + +def func(): + # OK (`continue` inside `with` clause) + idx = 0 + for x in range(5): + with context as c: + if g(x): + continue + h(x, idx, c) + idx += 1 + + +def func(): + # OK (incremented in `try` block) + idx = 0 + for x in range(5): + try: + g(x, idx) + except: + h(x) + continue + idx += 1 + + +def func(idx: 0): + # OK (index is an argument) + for x in range(5): + g(x, idx) + idx += 1 + + +def func(x: int): + # OK (index _may_ be non-zero) + if x > 0: + idx = 1 + else: + idx = 0 + for x in range(5): + g(x, idx) + idx += 1 + h(x) + + +def func(): + # OK + idx = 0 + for x in range(5): + g(x, idx) + idx += 1 + idx += 1 + h(x) + + +def func(): + # OK + idx = 0 + for x in range(5): + g(x, idx) + idx += 1 + if idx > 10: + idx *= 2 + h(x) + + +def func(): + # OK (index used after loop) + idx = 0 + for x in range(5): + g(x, idx) + idx += 1 + h(x) + print(idx) + + +def func(): + # OK (index within nested loop) + idx = 0 + for x in range(5): + for y in range(5): + g(x, idx) + idx += 1 diff --git a/crates/ruff_linter/src/checkers/ast/analyze/deferred_for_loops.rs b/crates/ruff_linter/src/checkers/ast/analyze/deferred_for_loops.rs index 5d4aa26ed2608..c336121bfdaa1 100644 --- a/crates/ruff_linter/src/checkers/ast/analyze/deferred_for_loops.rs +++ b/crates/ruff_linter/src/checkers/ast/analyze/deferred_for_loops.rs @@ -2,7 +2,7 @@ use ruff_python_ast::Stmt; use crate::checkers::ast::Checker; use crate::codes::Rule; -use crate::rules::{flake8_bugbear, perflint, pyupgrade, refurb}; +use crate::rules::{flake8_bugbear, flake8_simplify, perflint, pyupgrade, refurb}; /// Run lint rules over all deferred for-loops in the [`SemanticModel`]. pub(crate) fn deferred_for_loops(checker: &mut Checker) { @@ -27,6 +27,9 @@ pub(crate) fn deferred_for_loops(checker: &mut Checker) { if checker.enabled(Rule::UnnecessaryEnumerate) { refurb::rules::unnecessary_enumerate(checker, stmt_for); } + if checker.enabled(Rule::EnumerateForLoop) { + flake8_simplify::rules::enumerate_for_loop(checker, stmt_for); + } } } } diff --git a/crates/ruff_linter/src/checkers/ast/analyze/statement.rs b/crates/ruff_linter/src/checkers/ast/analyze/statement.rs index ff764de5f8977..af634230f4709 100644 --- a/crates/ruff_linter/src/checkers/ast/analyze/statement.rs +++ b/crates/ruff_linter/src/checkers/ast/analyze/statement.rs @@ -1258,9 +1258,10 @@ pub(crate) fn statement(stmt: &Stmt, checker: &mut Checker) { }, ) => { if checker.any_enabled(&[ - Rule::UnusedLoopControlVariable, + Rule::EnumerateForLoop, Rule::IncorrectDictIterator, Rule::UnnecessaryEnumerate, + Rule::UnusedLoopControlVariable, Rule::YieldInForLoop, ]) { checker.deferred.for_loops.push(checker.semantic.snapshot()); diff --git a/crates/ruff_linter/src/codes.rs b/crates/ruff_linter/src/codes.rs index 1f24c4f1866fa..d417ef3af9cb5 100644 --- a/crates/ruff_linter/src/codes.rs +++ b/crates/ruff_linter/src/codes.rs @@ -454,6 +454,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> { (Flake8Simplify, "109") => (RuleGroup::Stable, rules::flake8_simplify::rules::CompareWithTuple), (Flake8Simplify, "110") => (RuleGroup::Stable, rules::flake8_simplify::rules::ReimplementedBuiltin), (Flake8Simplify, "112") => (RuleGroup::Stable, rules::flake8_simplify::rules::UncapitalizedEnvironmentVariables), + (Flake8Simplify, "113") => (RuleGroup::Preview, rules::flake8_simplify::rules::EnumerateForLoop), (Flake8Simplify, "114") => (RuleGroup::Stable, rules::flake8_simplify::rules::IfWithSameArms), (Flake8Simplify, "115") => (RuleGroup::Stable, rules::flake8_simplify::rules::OpenFileWithContextHandler), (Flake8Simplify, "116") => (RuleGroup::Stable, rules::flake8_simplify::rules::IfElseBlockInsteadOfDictLookup), diff --git a/crates/ruff_linter/src/rules/flake8_simplify/mod.rs b/crates/ruff_linter/src/rules/flake8_simplify/mod.rs index c472b56c0d079..3c825ae16359b 100644 --- a/crates/ruff_linter/src/rules/flake8_simplify/mod.rs +++ b/crates/ruff_linter/src/rules/flake8_simplify/mod.rs @@ -27,6 +27,7 @@ mod tests { #[test_case(Rule::ReimplementedBuiltin, Path::new("SIM110.py"))] #[test_case(Rule::ReimplementedBuiltin, Path::new("SIM111.py"))] #[test_case(Rule::UncapitalizedEnvironmentVariables, Path::new("SIM112.py"))] + #[test_case(Rule::EnumerateForLoop, Path::new("SIM113.py"))] #[test_case(Rule::OpenFileWithContextHandler, Path::new("SIM115.py"))] #[test_case(Rule::MultipleWithStatements, Path::new("SIM117.py"))] #[test_case(Rule::InDictKeys, Path::new("SIM118.py"))] diff --git a/crates/ruff_linter/src/rules/flake8_simplify/rules/ast_bool_op.rs b/crates/ruff_linter/src/rules/flake8_simplify/rules/ast_bool_op.rs index 32fe135d00910..c3d4a86d1a026 100644 --- a/crates/ruff_linter/src/rules/flake8_simplify/rules/ast_bool_op.rs +++ b/crates/ruff_linter/src/rules/flake8_simplify/rules/ast_bool_op.rs @@ -287,7 +287,7 @@ impl AlwaysFixableViolation for ExprAndFalse { } /// Return `true` if two `Expr` instances are equivalent names. -fn is_same_expr<'a>(a: &'a Expr, b: &'a Expr) -> Option<&'a str> { +pub(crate) fn is_same_expr<'a>(a: &'a Expr, b: &'a Expr) -> Option<&'a str> { if let (Expr::Name(ast::ExprName { id: a, .. }), Expr::Name(ast::ExprName { id: b, .. })) = (&a, &b) { diff --git a/crates/ruff_linter/src/rules/flake8_simplify/rules/enumerate_for_loop.rs b/crates/ruff_linter/src/rules/flake8_simplify/rules/enumerate_for_loop.rs new file mode 100644 index 0000000000000..95f2237b01952 --- /dev/null +++ b/crates/ruff_linter/src/rules/flake8_simplify/rules/enumerate_for_loop.rs @@ -0,0 +1,193 @@ +use ruff_diagnostics::{Diagnostic, Violation}; +use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::statement_visitor::{walk_stmt, StatementVisitor}; +use ruff_python_ast::{self as ast, Expr, Int, Number, Operator, Stmt}; +use ruff_python_semantic::analyze::typing; +use ruff_text_size::Ranged; + +use crate::checkers::ast::Checker; + +/// ## What it does +/// Checks for `for` loops with explicit loop-index variables that can be replaced +/// with `enumerate()`. +/// +/// ## Why is this bad? +/// When iterating over a sequence, it's often desirable to keep track of the +/// index of each element alongside the element itself. Prefer the `enumerate` +/// builtin over manually incrementing a counter variable within the loop, as +/// `enumerate` is more concise and idiomatic. +/// +/// ## Example +/// ```python +/// fruits = ["apple", "banana", "cherry"] +/// for fruit in fruits: +/// print(f"{i + 1}. {fruit}") +/// i += 1 +/// ``` +/// +/// Use instead: +/// ```python +/// fruits = ["apple", "banana", "cherry"] +/// for i, fruit in enumerate(fruits): +/// print(f"{i + 1}. {fruit}") +/// ``` +/// +/// ## References +/// - [Python documentation: `enumerate`](https://docs.python.org/3/library/functions.html#enumerate) +#[violation] +pub struct EnumerateForLoop { + index: String, +} + +impl Violation for EnumerateForLoop { + #[derive_message_formats] + fn message(&self) -> String { + let EnumerateForLoop { index } = self; + format!("Use `enumerate()` for index variable `{index}` in `for` loop") + } +} + +/// SIM113 +pub(crate) fn enumerate_for_loop(checker: &mut Checker, for_stmt: &ast::StmtFor) { + // If the loop contains a `continue`, abort. + let mut visitor = LoopControlFlowVisitor::default(); + visitor.visit_body(&for_stmt.body); + if visitor.has_continue { + return; + } + + for stmt in &for_stmt.body { + // Find the augmented assignment expression (e.g., `i += 1`). + if let Some(index) = match_index_increment(stmt) { + // Find the binding corresponding to the initialization (e.g., `i = 1`). + let Some(id) = checker.semantic().resolve_name(index) else { + continue; + }; + + // If it's not an assignment (e.g., it's a function argument), ignore it. + let binding = checker.semantic().binding(id); + if !binding.kind.is_assignment() { + continue; + } + + // If the variable is global or nonlocal, ignore it. + if binding.is_global() || binding.is_nonlocal() { + continue; + } + + // Ensure that the index variable was initialized to 0. + let Some(value) = typing::find_binding_value(&index.id, binding, checker.semantic()) + else { + continue; + }; + if !matches!( + value, + Expr::NumberLiteral(ast::ExprNumberLiteral { + value: Number::Int(Int::ZERO), + .. + }) + ) { + continue; + } + + // If the binding is not at the same level as the `for` loop (e.g., it's in an `if`), + // ignore it. + let Some(for_loop_id) = checker.semantic().current_statement_id() else { + continue; + }; + let Some(assignment_id) = binding.source else { + continue; + }; + if checker.semantic().parent_statement_id(for_loop_id) + != checker.semantic().parent_statement_id(assignment_id) + { + continue; + } + + // Identify the binding created by the augmented assignment. + // TODO(charlie): There should be a way to go from `ExprName` to `BindingId` (like + // `resolve_name`, but for bindings rather than references). + let binding = { + let mut bindings = checker + .semantic() + .current_scope() + .get_all(&index.id) + .map(|id| checker.semantic().binding(id)) + .filter(|binding| for_stmt.range().contains_range(binding.range())); + + let Some(binding) = bindings.next() else { + continue; + }; + + // If there are multiple assignments to this variable _within_ the loop, ignore it. + if bindings.next().is_some() { + continue; + } + + binding + }; + + // If the variable is used _after_ the loop, ignore it. + // Find the binding for the augmented assignment. + if binding.references.iter().any(|id| { + let reference = checker.semantic().reference(*id); + reference.start() > for_stmt.end() + }) { + continue; + } + + let diagnostic = Diagnostic::new( + EnumerateForLoop { + index: index.id.to_string(), + }, + stmt.range(), + ); + checker.diagnostics.push(diagnostic); + } + } +} + +/// If the statement is an index increment statement (e.g., `i += 1`), return +/// the name of the index variable. +fn match_index_increment(stmt: &Stmt) -> Option<&ast::ExprName> { + let Stmt::AugAssign(ast::StmtAugAssign { + target, + op: Operator::Add, + value, + .. + }) = stmt + else { + return None; + }; + + let name = target.as_name_expr()?; + + if matches!( + value.as_ref(), + Expr::NumberLiteral(ast::ExprNumberLiteral { + value: Number::Int(Int::ONE), + .. + }) + ) { + return Some(name); + } + + None +} + +#[derive(Debug, Default)] +struct LoopControlFlowVisitor { + has_continue: bool, +} + +impl StatementVisitor<'_> for LoopControlFlowVisitor { + fn visit_stmt(&mut self, stmt: &Stmt) { + match stmt { + Stmt::Continue(_) => self.has_continue = true, + Stmt::FunctionDef(_) | Stmt::ClassDef(_) => { + // Don't recurse. + } + _ => walk_stmt(self, stmt), + } + } +} diff --git a/crates/ruff_linter/src/rules/flake8_simplify/rules/mod.rs b/crates/ruff_linter/src/rules/flake8_simplify/rules/mod.rs index f91528fdc9f41..b2aa7d5259732 100644 --- a/crates/ruff_linter/src/rules/flake8_simplify/rules/mod.rs +++ b/crates/ruff_linter/src/rules/flake8_simplify/rules/mod.rs @@ -4,6 +4,7 @@ pub(crate) use ast_ifexp::*; pub(crate) use ast_unary_op::*; pub(crate) use ast_with::*; pub(crate) use collapsible_if::*; +pub(crate) use enumerate_for_loop::*; pub(crate) use if_else_block_instead_of_dict_get::*; pub(crate) use if_else_block_instead_of_dict_lookup::*; pub(crate) use if_else_block_instead_of_if_exp::*; @@ -23,6 +24,7 @@ mod ast_ifexp; mod ast_unary_op; mod ast_with; mod collapsible_if; +mod enumerate_for_loop; mod fix_with; mod if_else_block_instead_of_dict_get; mod if_else_block_instead_of_dict_lookup; diff --git a/crates/ruff_linter/src/rules/flake8_simplify/snapshots/ruff_linter__rules__flake8_simplify__tests__SIM113_SIM113.py.snap b/crates/ruff_linter/src/rules/flake8_simplify/snapshots/ruff_linter__rules__flake8_simplify__tests__SIM113_SIM113.py.snap new file mode 100644 index 0000000000000..b8cc349f0e83b --- /dev/null +++ b/crates/ruff_linter/src/rules/flake8_simplify/snapshots/ruff_linter__rules__flake8_simplify__tests__SIM113_SIM113.py.snap @@ -0,0 +1,47 @@ +--- +source: crates/ruff_linter/src/rules/flake8_simplify/mod.rs +--- +SIM113.py:6:9: SIM113 Use `enumerate()` for index variable `idx` in `for` loop + | +4 | for x in range(5): +5 | g(x, idx) +6 | idx += 1 + | ^^^^^^^^ SIM113 +7 | h(x) + | + +SIM113.py:17:9: SIM113 Use `enumerate()` for index variable `idx` in `for` loop + | +15 | if g(x): +16 | break +17 | idx += 1 + | ^^^^^^^^ SIM113 +18 | sum += h(x, idx) + | + +SIM113.py:27:9: SIM113 Use `enumerate()` for index variable `idx` in `for` loop + | +25 | g(x) +26 | h(x, y) +27 | idx += 1 + | ^^^^^^^^ SIM113 + | + +SIM113.py:36:9: SIM113 Use `enumerate()` for index variable `idx` in `for` loop + | +34 | for x in range(5): +35 | sum += h(x, idx) +36 | idx += 1 + | ^^^^^^^^ SIM113 + | + +SIM113.py:44:9: SIM113 Use `enumerate()` for index variable `idx` in `for` loop + | +42 | for x in range(5): +43 | g(x, idx) +44 | idx += 1 + | ^^^^^^^^ SIM113 +45 | h(x) + | + + diff --git a/crates/ruff_linter/src/rules/flake8_trio/rules/zero_sleep_call.rs b/crates/ruff_linter/src/rules/flake8_trio/rules/zero_sleep_call.rs index 57caec4eec68c..20f7654ff5f86 100644 --- a/crates/ruff_linter/src/rules/flake8_trio/rules/zero_sleep_call.rs +++ b/crates/ruff_linter/src/rules/flake8_trio/rules/zero_sleep_call.rs @@ -1,6 +1,6 @@ use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix}; use ruff_macros::{derive_message_formats, violation}; -use ruff_python_ast::{self as ast, Expr, ExprCall, Int}; +use ruff_python_ast::{self as ast, Expr, ExprCall, Int, Number}; use ruff_python_semantic::analyze::typing::find_assigned_value; use ruff_text_size::Ranged; @@ -65,8 +65,7 @@ pub(crate) fn zero_sleep_call(checker: &mut Checker, call: &ExprCall) { match arg { Expr::NumberLiteral(ast::ExprNumberLiteral { value, .. }) => { - let Some(int) = value.as_int() else { return }; - if *int != Int::ZERO { + if !matches!(value, Number::Int(Int::ZERO)) { return; } } @@ -74,11 +73,13 @@ pub(crate) fn zero_sleep_call(checker: &mut Checker, call: &ExprCall) { let Some(value) = find_assigned_value(id, checker.semantic()) else { return; }; - let Expr::NumberLiteral(ast::ExprNumberLiteral { value: num, .. }) = value else { - return; - }; - let Some(int) = num.as_int() else { return }; - if *int != Int::ZERO { + if !matches!( + value, + Expr::NumberLiteral(ast::ExprNumberLiteral { + value: Number::Int(Int::ZERO), + .. + }) + ) { return; } } diff --git a/crates/ruff_python_ast/src/traversal.rs b/crates/ruff_python_ast/src/traversal.rs index 1e050cfa94b1e..8252da83d11d4 100644 --- a/crates/ruff_python_ast/src/traversal.rs +++ b/crates/ruff_python_ast/src/traversal.rs @@ -79,3 +79,15 @@ pub fn next_sibling<'a>(stmt: &'a Stmt, suite: &'a Suite) -> Option<&'a Stmt> { } None } + +/// Given a [`Stmt`] and its containing [`Suite`], return the previous [`Stmt`] in the [`Suite`]. +pub fn prev_sibling<'a>(stmt: &'a Stmt, suite: &'a Suite) -> Option<&'a Stmt> { + let mut prev = None; + for sibling in suite { + if sibling == stmt { + return prev; + } + prev = Some(sibling); + } + None +} diff --git a/crates/ruff_python_semantic/src/analyze/typing.rs b/crates/ruff_python_semantic/src/analyze/typing.rs index f63a4f5215f28..39eec17a53f88 100644 --- a/crates/ruff_python_semantic/src/analyze/typing.rs +++ b/crates/ruff_python_semantic/src/analyze/typing.rs @@ -645,6 +645,24 @@ pub fn resolve_assignment<'a>( pub fn find_assigned_value<'a>(symbol: &str, semantic: &'a SemanticModel<'a>) -> Option<&'a Expr> { let binding_id = semantic.lookup_symbol(symbol)?; let binding = semantic.binding(binding_id); + find_binding_value(symbol, binding, semantic) +} + +/// Find the assigned [`Expr`] for a given [`Binding`], if any. +/// +/// For example given: +/// ```python +/// foo = 42 +/// (bar, bla) = 1, "str" +/// ``` +/// +/// This function will return a `NumberLiteral` with value `Int(42)` when called with `foo` and a +/// `StringLiteral` with value `"str"` when called with `bla`. +pub fn find_binding_value<'a>( + symbol: &str, + binding: &Binding, + semantic: &'a SemanticModel, +) -> Option<&'a Expr> { match binding.kind { // Ex) `x := 1` BindingKind::NamedExprAssignment => { diff --git a/crates/ruff_python_semantic/src/scope.rs b/crates/ruff_python_semantic/src/scope.rs index 3de3b058f9192..0699d160b02fb 100644 --- a/crates/ruff_python_semantic/src/scope.rs +++ b/crates/ruff_python_semantic/src/scope.rs @@ -129,6 +129,11 @@ impl<'a> Scope<'a> { self.shadowed_bindings.get(&id).copied() } + /// Returns an iterator over all bindings that the given binding shadows, including itself. + pub fn shadowed_bindings(&self, id: BindingId) -> impl Iterator + '_ { + std::iter::successors(Some(id), |id| self.shadowed_bindings.get(id).copied()) + } + /// Adds a reference to a star import (e.g., `from sys import *`) to this scope. pub fn add_star_import(&mut self, import: StarImport<'a>) { self.star_imports.push(import); diff --git a/ruff.schema.json b/ruff.schema.json index b4146b51e7845..cd6bcee1270e9 100644 --- a/ruff.schema.json +++ b/ruff.schema.json @@ -3552,6 +3552,7 @@ "SIM11", "SIM110", "SIM112", + "SIM113", "SIM114", "SIM115", "SIM116",