Skip to content

Commit

Permalink
Expand RUF015 to include all expression types (#5767)
Browse files Browse the repository at this point in the history
## Summary

We now allow RUF015 to fix cases like:

```python
list(range(10))[0]
list(x.y)[0]
list(x["y"])[0]
```

Further, we fix generators like:

```python
[i + 1 for i in x][0]
```

By rewriting to `next(iter(i + 1 for i in x))`.

I've retained the special-case that rewrites `[i for i in x][0]` to
`next(iter(x))`.

Closes #5764.
  • Loading branch information
charliermarsh authored Jul 21, 2023
1 parent 4e68107 commit 5f2014b
Show file tree
Hide file tree
Showing 4 changed files with 337 additions and 64 deletions.
16 changes: 14 additions & 2 deletions crates/ruff/resources/test/fixtures/ruff/RUF015.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,23 @@
[i for i in x][::2]
[i for i in x][::]

# OK (doesn't mirror the underlying list)
# RUF015 (doesn't mirror the underlying list)
[i + 1 for i in x][0]
[i for i in x if i > 5][0]
[(i, i + 1) for i in x][0]

# OK (multiple generators)
# RUF015 (multiple generators)
y = range(10)
[i + j for i in x for j in y][0]

# RUF015
list(range(10))[0]
list(x.y)[0]
list(x["y"])[0]

# RUF015 (multi-line)
revision_heads_map_ast = [
a
for a in revision_heads_map_ast_obj.body
if isinstance(a, ast.Assign) and a.targets[0].id == "REVISION_HEADS_MAP"
][0]
2 changes: 1 addition & 1 deletion crates/ruff/src/rules/ruff/rules/invalid_index_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ impl Violation for InvalidIndexType {
}
}

/// RUF015
/// RUF016
pub(crate) fn invalid_index_type(checker: &mut Checker, expr: &ExprSubscript) {
let ExprSubscript {
value,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
use std::borrow::Cow;

use num_bigint::BigInt;
use num_traits::{One, Zero};
use rustpython_parser::ast::{self, Comprehension, Constant, Expr, ExprSubscript};
use ruff_text_size::{TextRange, TextSize};
use rustpython_parser::ast::{self, Comprehension, Constant, Expr, Ranged};
use unicode_width::UnicodeWidthStr;

use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix};
use ruff_macros::{derive_message_formats, violation};
Expand All @@ -16,8 +20,8 @@ use crate::registry::AsRule;
/// ## Why is this bad?
/// Calling `list(...)` will create a new list of the entire collection, which
/// can be very expensive for large collections. If you only need the first
/// element of the collection, you can use `next(iter(...))` to lazily fetch
/// the first element without creating a new list.
/// element of the collection, you can use `next(...)` or `next(iter(...)` to
/// lazily fetch the first element.
///
/// Note that migrating from `list(...)[0]` to `next(iter(...))` can change
/// the behavior of your program in two ways:
Expand All @@ -26,16 +30,18 @@ use crate::registry::AsRule;
/// `next(iter(...))` will only evaluate the first element. As such, any
/// side effects that occur during iteration will be delayed.
/// 2. Second, `list(...)[0]` will raise `IndexError` if the collection is
/// empty, while `next(iter(...))` will raise `StopIteration`.
/// empty, while `next(iter(...))` will raise `StopIteration`.
///
/// ## Example
/// ```python
/// head = list(range(1000000000000))[0]
/// head = list(x)[0]
/// head = [x * x for x in range(10)][0]
/// ```
///
/// Use instead:
/// ```python
/// head = next(iter(range(1000000000000)))
/// head = next(iter(x))
/// head = next(x * x for x in range(10))
/// ```
///
/// ## References
Expand All @@ -53,12 +59,13 @@ impl AlwaysAutofixableViolation for UnnecessaryIterableAllocationForFirstElement
iterable,
subscript_kind,
} = self;
let iterable = Self::truncate(iterable);
match subscript_kind {
HeadSubscriptKind::Index => {
format!("Prefer `next(iter({iterable}))` over `list({iterable})[0]`")
format!("Prefer `next({iterable})` over single element slice")
}
HeadSubscriptKind::Slice => {
format!("Prefer `[next(iter({iterable}))]` over `list({iterable})[:1]`")
format!("Prefer `[next({iterable})]` over single element slice")
}
}
}
Expand All @@ -68,17 +75,29 @@ impl AlwaysAutofixableViolation for UnnecessaryIterableAllocationForFirstElement
iterable,
subscript_kind,
} = self;
let iterable = Self::truncate(iterable);
match subscript_kind {
HeadSubscriptKind::Index => format!("Replace with `next(iter({iterable}))`"),
HeadSubscriptKind::Slice => format!("Replace with `[next(iter({iterable}))]"),
HeadSubscriptKind::Index => format!("Replace with `next({iterable})`"),
HeadSubscriptKind::Slice => format!("Replace with `[next({iterable})]"),
}
}
}

impl UnnecessaryIterableAllocationForFirstElement {
/// If the iterable is too long, or spans multiple lines, truncate it.
fn truncate(iterable: &str) -> &str {
if iterable.width() > 40 || iterable.contains(['\r', '\n']) {
"..."
} else {
iterable
}
}
}

/// RUF015
pub(crate) fn unnecessary_iterable_allocation_for_first_element(
checker: &mut Checker,
subscript: &ExprSubscript,
subscript: &ast::ExprSubscript,
) {
let ast::ExprSubscript {
value,
Expand All @@ -91,9 +110,15 @@ pub(crate) fn unnecessary_iterable_allocation_for_first_element(
return;
};

let Some(iterable) = iterable_name(value, checker.semantic()) else {
let Some(target) = match_iteration_target(value, checker.semantic()) else {
return;
};
let iterable = checker.locator.slice(target.range);
let iterable = if target.iterable {
Cow::Borrowed(iterable)
} else {
Cow::Owned(format!("iter({iterable})"))
};

let mut diagnostic = Diagnostic::new(
UnnecessaryIterableAllocationForFirstElement {
Expand All @@ -105,8 +130,8 @@ pub(crate) fn unnecessary_iterable_allocation_for_first_element(

if checker.patch(diagnostic.kind.rule()) {
let replacement = match subscript_kind {
HeadSubscriptKind::Index => format!("next(iter({iterable}))"),
HeadSubscriptKind::Slice => format!("[next(iter({iterable}))]"),
HeadSubscriptKind::Index => format!("next({iterable})"),
HeadSubscriptKind::Slice => format!("[next({iterable})]"),
};
diagnostic.set_fix(Fix::suggested(Edit::range_replacement(replacement, *range)));
}
Expand All @@ -127,11 +152,11 @@ enum HeadSubscriptKind {
/// first `bool` checks that the element is in fact first, the second checks if it's a slice or an
/// index.
fn classify_subscript(expr: &Expr) -> Option<HeadSubscriptKind> {
match expr {
let result = match expr {
Expr::Constant(ast::ExprConstant {
value: Constant::Int(value),
..
}) if value.is_zero() => Some(HeadSubscriptKind::Index),
}) if value.is_zero() => HeadSubscriptKind::Index,
Expr::Slice(ast::ExprSlice {
step, lower, upper, ..
}) => {
Expand All @@ -158,54 +183,116 @@ fn classify_subscript(expr: &Expr) -> Option<HeadSubscriptKind> {
}
}

Some(HeadSubscriptKind::Slice)
HeadSubscriptKind::Slice
}
_ => None,
}
_ => return None,
};

Some(result)
}

#[derive(Debug)]
struct IterationTarget {
/// The [`TextRange`] of the target.
range: TextRange,
/// Whether the target is an iterable (e.g., a generator). If not, the target must be wrapped
/// in `iter(...)` prior to calling `next(...)`.
iterable: bool,
}

/// Fetch the name of the iterable from an expression if the expression returns an unmodified list
/// which can be sliced into.
fn iterable_name<'a>(expr: &'a Expr, model: &SemanticModel) -> Option<&'a str> {
match expr {
/// Return the [`IterationTarget`] of an expression, if the expression can be sliced into (i.e.,
/// is a list comprehension, or call to `list` or `tuple`).
///
/// For example, given `list(x)`, returns the range of `x`. Given `[x * x for x in y]`, returns the
/// range of `x * x for x in y`.
///
/// As a special-case, given `[x for x in y]`, returns the range of `y` (rather than the
/// redundant comprehension).
fn match_iteration_target(expr: &Expr, model: &SemanticModel) -> Option<IterationTarget> {
let result = match expr {
Expr::Call(ast::ExprCall { func, args, .. }) => {
let ast::ExprName { id, .. } = func.as_name_expr()?;

if !matches!(id.as_str(), "tuple" | "list") {
return None;
}

let [arg] = args.as_slice() else {
return None;
};

if !model.is_builtin(id.as_str()) {
return None;
}

match args.first() {
Some(Expr::Name(ast::ExprName { id: arg_name, .. })) => Some(arg_name.as_str()),
Some(Expr::GeneratorExp(ast::ExprGeneratorExp {
match arg {
Expr::GeneratorExp(ast::ExprGeneratorExp {
elt, generators, ..
}) => match match_simple_comprehension(elt, generators) {
Some(range) => IterationTarget {
range,
iterable: false,
},
None => IterationTarget {
range: arg.range(),
iterable: true,
},
},
Expr::ListComp(ast::ExprListComp {
elt, generators, ..
})) => generator_iterable(elt, generators),
_ => None,
}) => match match_simple_comprehension(elt, generators) {
Some(range) => IterationTarget {
range,
iterable: false,
},
None => IterationTarget {
range: arg
.range()
// Remove the `[`
.add_start(TextSize::from(1))
// Remove the `]`
.sub_end(TextSize::from(1)),
iterable: true,
},
},
_ => IterationTarget {
range: arg.range(),
iterable: false,
},
}
}

Expr::ListComp(ast::ExprListComp {
elt, generators, ..
}) => generator_iterable(elt, generators),
_ => None,
}
}
}) => match match_simple_comprehension(elt, generators) {
Some(range) => IterationTarget {
range,
iterable: false,
},
None => IterationTarget {
range: expr
.range()
// Remove the `[`
.add_start(TextSize::from(1))
// Remove the `]`
.sub_end(TextSize::from(1)),
iterable: true,
},
},

/// Given a comprehension, returns the name of the iterable over which it iterates, if it's
/// a simple comprehension (e.g., `x` for `[i for i in x]`).
fn generator_iterable<'a>(elt: &'a Expr, generators: &'a Vec<Comprehension>) -> Option<&'a str> {
// If the `elt` field is anything other than a [`Expr::Name`], we can't be sure that it
// doesn't modify the elements of the underlying iterator (e.g., `[i + 1 for i in x][0]`).
if !elt.is_name_expr() {
return None;
}
_ => return None,
};

Some(result)
}

// If there's more than 1 generator, we can't safely say that it fits the diagnostic conditions
// (e.g., `[(i, j) for i in x for j in y][0]`).
let [generator] = generators.as_slice() else {
/// Returns the [`Expr`] target for a comprehension, if the comprehension is "simple"
/// (e.g., `x` for `[i for i in x]`).
fn match_simple_comprehension(elt: &Expr, generators: &[Comprehension]) -> Option<TextRange> {
let [generator @ Comprehension {
is_async: false, ..
}] = generators
else {
return None;
};

Expand All @@ -214,8 +301,14 @@ fn generator_iterable<'a>(elt: &'a Expr, generators: &'a Vec<Comprehension>) ->
return None;
}

let ast::ExprName { id, .. } = generator.iter.as_name_expr()?;
Some(id.as_str())
// Verify that the generator is, e.g. `i for i in x`, as opposed to `i for j in x`.
let elt = elt.as_name_expr()?;
let target = generator.target.as_name_expr()?;
if elt.id != target.id {
return None;
}

Some(generator.iter.range())
}

/// If an expression is a constant integer, returns the value of that integer; otherwise,
Expand Down
Loading

0 comments on commit 5f2014b

Please sign in to comment.