From 312f6640b837eb1189748980707cfed70d1f5ffb Mon Sep 17 00:00:00 2001 From: Tobias Fischer <30701667+tobb10001@users.noreply.github.com> Date: Fri, 31 May 2024 23:48:36 +0200 Subject: [PATCH] [`flake8-bugbear`] Implement `return-in-generator` (`B901`) (#11644) ## Summary This PR implements the rule B901, which is part of the opinionated rules of `flake8-bugbear`. This rule seems to be desired in `ruff` as per https://github.com/astral-sh/ruff/issues/3758 and https://github.com/astral-sh/ruff/issues/2954#issuecomment-1441162976. ## Test Plan As this PR was made closely following the [CONTRIBUTING.md](https://github.com/astral-sh/ruff/blob/8a25531a7144fd4a6b62c54efde1ef28e2dc18c4/CONTRIBUTING.md), it tests using the snapshot approach, that is described there. ## Sources The implementation is inspired by [the original implementation in the `flake8-bugbear` repository](https://github.com/PyCQA/flake8-bugbear/blob/d1aec4cbef7c4a49147c428b7e4a97e497b5d163/bugbear.py#L1092). The error message and [test file](https://github.com/PyCQA/flake8-bugbear/blob/d1aec4cbef7c4a49147c428b7e4a97e497b5d163/tests/b901.py) where also copied from there. The documentation I came up with on my own and needs improvement. Maybe the example given in https://github.com/astral-sh/ruff/issues/2954#issuecomment-1441162976 could be used, but maybe they are too complex, I'm not sure. ## Open Questions - [ ] Documentation. (See above.) - [x] Can I access the parent in a visitor? The [original implementation](https://github.com/PyCQA/flake8-bugbear/blob/d1aec4cbef7c4a49147c428b7e4a97e497b5d163/bugbear.py#L1100) references the `yield` statement's parent to check if it is an expression statement. I didn't find a way to do this in `ruff` and used the `is_expresssion_statement` field on the visitor instead. What are your thoughts on this? Is it possible and / or desired to access the parent node here? - [x] Is `Option::is_some(...)` -> `...unwrap()` the right thing to do? Referring to [this piece of code](https://github.com/tobb10001/ruff/blob/9d5a280f71103ef33df5676d00a6c68c601261ac/crates/ruff_linter/src/rules/flake8_bugbear/rules/return_x_in_generator.rs?plain=1#L91-L96). From my understanding, the `.unwrap()` is safe, because it is checked that `return_` is not `None`. However, I feel like I missed a more elegant solution that does both in one. ## Other I don't know a lot about this rule, I just implemented it because I found it in a https://github.com/astral-sh/ruff/labels/good%20first%20issue. I'm new to Rust, so any constructive critisism is appreciated. --------- Co-authored-by: Charlie Marsh --- .../test/fixtures/flake8_bugbear/B901.py | 78 ++++++++++ .../src/checkers/ast/analyze/statement.rs | 3 + crates/ruff_linter/src/codes.rs | 1 + .../src/rules/flake8_bugbear/mod.rs | 1 + .../src/rules/flake8_bugbear/rules/mod.rs | 2 + .../rules/return_in_generator.rs | 137 ++++++++++++++++++ ...__flake8_bugbear__tests__B901_B901.py.snap | 21 +++ ruff.schema.json | 1 + 8 files changed, 244 insertions(+) create mode 100644 crates/ruff_linter/resources/test/fixtures/flake8_bugbear/B901.py create mode 100644 crates/ruff_linter/src/rules/flake8_bugbear/rules/return_in_generator.rs create mode 100644 crates/ruff_linter/src/rules/flake8_bugbear/snapshots/ruff_linter__rules__flake8_bugbear__tests__B901_B901.py.snap diff --git a/crates/ruff_linter/resources/test/fixtures/flake8_bugbear/B901.py b/crates/ruff_linter/resources/test/fixtures/flake8_bugbear/B901.py new file mode 100644 index 0000000000000..42fdda60d7c25 --- /dev/null +++ b/crates/ruff_linter/resources/test/fixtures/flake8_bugbear/B901.py @@ -0,0 +1,78 @@ +""" +Should emit: +B901 - on lines 9, 36 +""" + + +def broken(): + if True: + return [1, 2, 3] + + yield 3 + yield 2 + yield 1 + + +def not_broken(): + if True: + return + + yield 3 + yield 2 + yield 1 + + +def not_broken2(): + return not_broken() + + +def not_broken3(): + return + + yield from not_broken() + + +def broken2(): + return [3, 2, 1] + + yield from not_broken() + + +async def not_broken4(): + import asyncio + + await asyncio.sleep(1) + return 1 + + +def not_broken5(): + def inner(): + return 2 + + yield inner() + + +def not_broken6(): + return (yield from []) + + +def not_broken7(): + x = yield from [] + return x + + +def not_broken8(): + x = None + + def inner(ex): + nonlocal x + x = ex + + inner((yield from [])) + return x + + +class NotBroken9(object): + def __await__(self): + yield from function() + return 42 diff --git a/crates/ruff_linter/src/checkers/ast/analyze/statement.rs b/crates/ruff_linter/src/checkers/ast/analyze/statement.rs index c6f30346ee052..94419de40fbb0 100644 --- a/crates/ruff_linter/src/checkers/ast/analyze/statement.rs +++ b/crates/ruff_linter/src/checkers/ast/analyze/statement.rs @@ -207,6 +207,9 @@ pub(crate) fn statement(stmt: &Stmt, checker: &mut Checker) { if checker.enabled(Rule::MutableArgumentDefault) { flake8_bugbear::rules::mutable_argument_default(checker, function_def); } + if checker.enabled(Rule::ReturnInGenerator) { + flake8_bugbear::rules::return_in_generator(checker, function_def); + } if checker.any_enabled(&[ Rule::UnnecessaryReturnNone, Rule::ImplicitReturnValue, diff --git a/crates/ruff_linter/src/codes.rs b/crates/ruff_linter/src/codes.rs index 95361820f2002..9c8385ac17d74 100644 --- a/crates/ruff_linter/src/codes.rs +++ b/crates/ruff_linter/src/codes.rs @@ -383,6 +383,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> { (Flake8Bugbear, "033") => (RuleGroup::Stable, rules::flake8_bugbear::rules::DuplicateValue), (Flake8Bugbear, "034") => (RuleGroup::Stable, rules::flake8_bugbear::rules::ReSubPositionalArgs), (Flake8Bugbear, "035") => (RuleGroup::Stable, rules::flake8_bugbear::rules::StaticKeyDictComprehension), + (Flake8Bugbear, "901") => (RuleGroup::Preview, rules::flake8_bugbear::rules::ReturnInGenerator), (Flake8Bugbear, "904") => (RuleGroup::Stable, rules::flake8_bugbear::rules::RaiseWithoutFromInsideExcept), (Flake8Bugbear, "905") => (RuleGroup::Stable, rules::flake8_bugbear::rules::ZipWithoutExplicitStrict), (Flake8Bugbear, "909") => (RuleGroup::Preview, rules::flake8_bugbear::rules::LoopIteratorMutation), diff --git a/crates/ruff_linter/src/rules/flake8_bugbear/mod.rs b/crates/ruff_linter/src/rules/flake8_bugbear/mod.rs index 640007b307435..2fa8d5b7a67aa 100644 --- a/crates/ruff_linter/src/rules/flake8_bugbear/mod.rs +++ b/crates/ruff_linter/src/rules/flake8_bugbear/mod.rs @@ -62,6 +62,7 @@ mod tests { #[test_case(Rule::UselessContextlibSuppress, Path::new("B022.py"))] #[test_case(Rule::UselessExpression, Path::new("B018.ipynb"))] #[test_case(Rule::UselessExpression, Path::new("B018.py"))] + #[test_case(Rule::ReturnInGenerator, Path::new("B901.py"))] #[test_case(Rule::LoopIteratorMutation, Path::new("B909.py"))] fn rules(rule_code: Rule, path: &Path) -> Result<()> { let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy()); diff --git a/crates/ruff_linter/src/rules/flake8_bugbear/rules/mod.rs b/crates/ruff_linter/src/rules/flake8_bugbear/rules/mod.rs index 111eb5f18b72c..4f7fd0eebf42b 100644 --- a/crates/ruff_linter/src/rules/flake8_bugbear/rules/mod.rs +++ b/crates/ruff_linter/src/rules/flake8_bugbear/rules/mod.rs @@ -20,6 +20,7 @@ pub(crate) use raise_literal::*; pub(crate) use raise_without_from_inside_except::*; pub(crate) use re_sub_positional_args::*; pub(crate) use redundant_tuple_in_exception_handler::*; +pub(crate) use return_in_generator::*; pub(crate) use reuse_of_groupby_generator::*; pub(crate) use setattr_with_constant::*; pub(crate) use star_arg_unpacking_after_keyword_arg::*; @@ -56,6 +57,7 @@ mod raise_literal; mod raise_without_from_inside_except; mod re_sub_positional_args; mod redundant_tuple_in_exception_handler; +mod return_in_generator; mod reuse_of_groupby_generator; mod setattr_with_constant; mod star_arg_unpacking_after_keyword_arg; diff --git a/crates/ruff_linter/src/rules/flake8_bugbear/rules/return_in_generator.rs b/crates/ruff_linter/src/rules/flake8_bugbear/rules/return_in_generator.rs new file mode 100644 index 0000000000000..9e80a2fca52b4 --- /dev/null +++ b/crates/ruff_linter/src/rules/flake8_bugbear/rules/return_in_generator.rs @@ -0,0 +1,137 @@ +use ruff_diagnostics::Diagnostic; +use ruff_diagnostics::Violation; +use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::statement_visitor; +use ruff_python_ast::statement_visitor::StatementVisitor; +use ruff_python_ast::{self as ast, Expr, Stmt, StmtFunctionDef}; +use ruff_text_size::TextRange; + +use crate::checkers::ast::Checker; + +/// ## What it does +/// Checks for `return {value}` statements in functions that also contain `yield` +/// or `yield from` statements. +/// +/// ## Why is this bad? +/// Using `return {value}` in a generator function was syntactically invalid in +/// Python 2. In Python 3 `return {value}` _can_ be used in a generator; however, +/// the combination of `yield` and `return` can lead to confusing behavior, as +/// the `return` statement will cause the generator to raise `StopIteration` +/// with the value provided, rather than returning the value to the caller. +/// +/// For example, given: +/// ```python +/// from collections.abc import Iterable +/// from pathlib import Path +/// +/// +/// def get_file_paths(file_types: Iterable[str] | None = None) -> Iterable[Path]: +/// dir_path = Path(".") +/// if file_types is None: +/// return dir_path.glob("*") +/// +/// for file_type in file_types: +/// yield from dir_path.glob(f"*.{file_type}") +/// ``` +/// +/// Readers might assume that `get_file_paths()` would return an iterable of +/// `Path` objects in the directory; in reality, though, `list(get_file_paths())` +/// evaluates to `[]`, since the `return` statement causes the generator to raise +/// `StopIteration` with the value `dir_path.glob("*")`: +/// +/// ```shell +/// >>> list(get_file_paths(file_types=["cfg", "toml"])) +/// [PosixPath('setup.cfg'), PosixPath('pyproject.toml')] +/// >>> list(get_file_paths()) +/// [] +/// ``` +/// +/// For intentional uses of `return` in a generator, consider suppressing this +/// diagnostic. +/// +/// ## Example +/// ```python +/// from collections.abc import Iterable +/// from pathlib import Path +/// +/// +/// def get_file_paths(file_types: Iterable[str] | None = None) -> Iterable[Path]: +/// dir_path = Path(".") +/// if file_types is None: +/// return dir_path.glob("*") +/// +/// for file_type in file_types: +/// yield from dir_path.glob(f"*.{file_type}") +/// ``` +/// +/// Use instead: +/// +/// ```python +/// from collections.abc import Iterable +/// from pathlib import Path +/// +/// +/// def get_file_paths(file_types: Iterable[str] | None = None) -> Iterable[Path]: +/// dir_path = Path(".") +/// if file_types is None: +/// yield from dir_path.glob("*") +/// else: +/// for file_type in file_types: +/// yield from dir_path.glob(f"*.{file_type}") +/// ``` +#[violation] +pub struct ReturnInGenerator; + +impl Violation for ReturnInGenerator { + #[derive_message_formats] + fn message(&self) -> String { + format!("Using `yield` and `return {{value}}` in a generator function can lead to confusing behavior") + } +} + +/// B901 +pub(crate) fn return_in_generator(checker: &mut Checker, function_def: &StmtFunctionDef) { + if function_def.name.id == "__await__" { + return; + } + + let mut visitor = ReturnInGeneratorVisitor::default(); + visitor.visit_body(&function_def.body); + + if visitor.has_yield { + if let Some(return_) = visitor.return_ { + checker + .diagnostics + .push(Diagnostic::new(ReturnInGenerator, return_)); + } + } +} + +#[derive(Default)] +struct ReturnInGeneratorVisitor { + return_: Option, + has_yield: bool, +} + +impl StatementVisitor<'_> for ReturnInGeneratorVisitor { + fn visit_stmt(&mut self, stmt: &Stmt) { + match stmt { + Stmt::Expr(ast::StmtExpr { value, .. }) => match **value { + Expr::Yield(_) | Expr::YieldFrom(_) => { + self.has_yield = true; + } + _ => {} + }, + Stmt::FunctionDef(_) => { + // Do not recurse into nested functions; they're evaluated separately. + } + Stmt::Return(ast::StmtReturn { + value: Some(_), + range, + }) => { + self.return_ = Some(*range); + } + _ => statement_visitor::walk_stmt(self, stmt), + } + } +} diff --git a/crates/ruff_linter/src/rules/flake8_bugbear/snapshots/ruff_linter__rules__flake8_bugbear__tests__B901_B901.py.snap b/crates/ruff_linter/src/rules/flake8_bugbear/snapshots/ruff_linter__rules__flake8_bugbear__tests__B901_B901.py.snap new file mode 100644 index 0000000000000..b1e65fb3c0ee2 --- /dev/null +++ b/crates/ruff_linter/src/rules/flake8_bugbear/snapshots/ruff_linter__rules__flake8_bugbear__tests__B901_B901.py.snap @@ -0,0 +1,21 @@ +--- +source: crates/ruff_linter/src/rules/flake8_bugbear/mod.rs +--- +B901.py:9:9: B901 Using `yield` and `return {value}` in a generator function can lead to confusing behavior + | + 7 | def broken(): + 8 | if True: + 9 | return [1, 2, 3] + | ^^^^^^^^^^^^^^^^ B901 +10 | +11 | yield 3 + | + +B901.py:36:5: B901 Using `yield` and `return {value}` in a generator function can lead to confusing behavior + | +35 | def broken2(): +36 | return [3, 2, 1] + | ^^^^^^^^^^^^^^^^ B901 +37 | +38 | yield from not_broken() + | diff --git a/ruff.schema.json b/ruff.schema.json index 588d7b3199c7e..b146d9b74bcc2 100644 --- a/ruff.schema.json +++ b/ruff.schema.json @@ -2729,6 +2729,7 @@ "B035", "B9", "B90", + "B901", "B904", "B905", "B909",