Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add RUF016: Detection of invalid index types #5602

Merged
merged 21 commits into from
Jul 12, 2023
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
115 changes: 115 additions & 0 deletions crates/ruff/resources/test/fixtures/ruff/RUF016.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
# Should not emit for valid access with index
var = "abc"[0]
var = f"abc"[0]
var = [1, 2, 3][0]
var = (1, 2, 3)[0]
var = b"abc"[0]

# Should not emit for valid access with slice
var = "abc"[0:2]
var = f"abc"[0:2]
var = b"abc"[0:2]
var = [1, 2, 3][0:2]
var = (1, 2, 3)[0:2]
var = [1, 2, 3][None:2]
var = [1, 2, 3][0:None]
var = [1, 2, 3][:2]
var = [1, 2, 3][0:]

# Should emit for invalid access on strings
var = "abc"["x"]
var = f"abc"["x"]

# Should emit for invalid access on bytes
var = b"abc"["x"]

# Should emit for invalid access on lists and tuples
var = [1, 2, 3]["x"]
var = (1, 2, 3)["x"]

# Should emit for invalid access on list comprehensions
var = [x for x in range(10)]["x"]

# Should emit for invalid access using tuple
var = "abc"[1, 2]

# Should emit for invalid access using string
var = [1, 2]["x"]

# Should emit for invalid access using float
var = [1, 2][0.25]

# Should emit for invalid access using dict
var = [1, 2][{"x": "y"}]

# Should emit for invalid access using dict comp
var = [1, 2][{x: "y" for x in range(2)}]

# Should emit for invalid access using list
var = [1, 2][2, 3]

# Should emit for invalid access using list comp
var = [1, 2][[x for x in range(2)]]

# Should emit on invalid access using set
var = [1, 2][{"x", "y"}]

# Should emit on invalid access using set comp
var = [1, 2][{x for x in range(2)}]

# Should emit on invalid access using bytes
var = [1, 2][b"x"]

# Should emit for non-integer slice start
var = [1, 2, 3]["x":2]
var = [1, 2, 3][f"x":2]
var = [1, 2, 3][1.2:2]
var = [1, 2, 3][{"x"}:2]
var = [1, 2, 3][{x for x in range(2)}:2]
var = [1, 2, 3][{"x": x for x in range(2)}:2]
var = [1, 2, 3][[x for x in range(2)]:2]

# Should emit for non-integer slice end
var = [1, 2, 3][0:"x"]
var = [1, 2, 3][0:f"x"]
var = [1, 2, 3][0:1.2]
var = [1, 2, 3][0:{"x"}]
var = [1, 2, 3][0:{x for x in range(2)}]
var = [1, 2, 3][0:{"x": x for x in range(2)}]
var = [1, 2, 3][0:[x for x in range(2)]]

# Should emit for non-integer slice step
var = [1, 2, 3][0:1:"x"]
var = [1, 2, 3][0:1:f"x"]
var = [1, 2, 3][0:1:1.2]
var = [1, 2, 3][0:1:{"x"}]
var = [1, 2, 3][0:1:{x for x in range(2)}]
var = [1, 2, 3][0:1:{"x": x for x in range(2)}]
var = [1, 2, 3][0:1:[x for x in range(2)]]

# Should emit for non-integer slice start and end; should emit twice with specific ranges
var = [1, 2, 3]["x":"y"]

# Should emit once for repeated invalid access
var = [1, 2, 3]["x"]["y"]["z"]

# Cannot emit on invalid access using variable in index
x = "x"
var = "abc"[x]

# Cannot emit on invalid access using call
def func():
return 1
var = "abc"[func()]

# Cannot emit on invalid access using a variable in parent
x = [1, 2, 3]
var = x["y"]

# Cannot emit for invalid access on byte array
var = bytearray(b"abc")["x"]

# Cannot emit for slice bound using variable
x = "x"
var = [1, 2, 3][0:x]
var = [1, 2, 3][x:1]
6 changes: 5 additions & 1 deletion crates/ruff/src/checkers/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2142,7 +2142,7 @@ where

// Pre-visit.
match expr {
subscript @ Expr::Subscript(ast::ExprSubscript { value, slice, .. }) => {
Expr::Subscript(subscript @ ast::ExprSubscript { value, slice, .. }) => {
// Ex) Optional[...], Union[...]
if self.any_enabled(&[
Rule::FutureRewritableTypeAnnotation,
Expand Down Expand Up @@ -2235,6 +2235,10 @@ where
ruff::rules::unnecessary_iterable_allocation_for_first_element(self, subscript);
}

if self.enabled(Rule::InvalidIndexType) {
ruff::rules::invalid_index_type(self, subscript);
}

pandas_vet::rules::subscript(self, value, expr);
}
Expr::Tuple(ast::ExprTuple {
Expand Down
1 change: 1 addition & 0 deletions crates/ruff/src/codes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -781,6 +781,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> {
#[cfg(feature = "unreachable-code")]
(Ruff, "014") => (RuleGroup::Nursery, rules::ruff::rules::UnreachableCode),
(Ruff, "015") => (RuleGroup::Unspecified, rules::ruff::rules::UnnecessaryIterableAllocationForFirstElement),
(Ruff, "016") => (RuleGroup::Unspecified, rules::ruff::rules::InvalidIndexType),
(Ruff, "100") => (RuleGroup::Unspecified, rules::ruff::rules::UnusedNOQA),
(Ruff, "200") => (RuleGroup::Unspecified, rules::ruff::rules::InvalidPyprojectToml),

Expand Down
1 change: 1 addition & 0 deletions crates/ruff/src/rules/ruff/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ mod tests {
Rule::UnnecessaryIterableAllocationForFirstElement,
Path::new("RUF015.py")
)]
#[test_case(Rule::InvalidIndexType, Path::new("RUF016.py"))]
#[cfg_attr(
feature = "unreachable-code",
test_case(Rule::UnreachableCode, Path::new("RUF014.py"))
Expand Down
212 changes: 212 additions & 0 deletions crates/ruff/src/rules/ruff/rules/invalid_index_type.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
use rustpython_parser::ast::{Constant, Expr, ExprConstant, ExprSlice, ExprSubscript, Ranged};

use ruff_diagnostics::{Diagnostic, Violation};
use ruff_macros::{derive_message_formats, violation};
use std::fmt;

use crate::checkers::ast::Checker;

/// ## What it does
/// Checks for indexed access to lists, strings, tuples, bytes, and comprehensions
/// using a type other than an integer or slice.
///
/// ## Why is this bad?
/// Only integers or slices can be used as indices to these types. Using
/// other types will result in a `TypeError` at runtime and a `SyntaxWarning` at
/// import time.
///
/// ## Example
/// ```python
/// var = [1, 2, 3]["x"]
/// ```
///
/// Use instead:
/// ```python
/// var = [1, 2, 3][0]
/// ```
#[violation]
pub struct InvalidIndexType {
value_type: String,
index_type: String,
is_slice: bool,
}

impl Violation for InvalidIndexType {
#[derive_message_formats]
fn message(&self) -> String {
let InvalidIndexType {
value_type,
index_type,
..
} = self;
if self.is_slice {
zanieb marked this conversation as resolved.
Show resolved Hide resolved
format!("Slice in indexed access to type `{value_type}` uses type `{index_type}` instead of an integer.")
} else {
format!(
"Indexed access to type `{value_type}` uses type `{index_type}` instead of an integer or slice."
)
}
}
}

/// RUF015
pub(crate) fn invalid_index_type<'a>(checker: &mut Checker, expr: &'a ExprSubscript) {
let ExprSubscript {
value,
slice: index,
..
} = expr;

// Check the value being indexed is a list, tuple, string, f-string, bytes, or comprehension
if !matches!(
value.as_ref(),
Expr::List(_)
| Expr::ListComp(_)
| Expr::Tuple(_)
| Expr::JoinedStr(_)
| Expr::Constant(ExprConstant {
value: Constant::Str(_) | Constant::Bytes(_),
..
})
) {
return;
}

// The types supported by this rule should always be checkable
let value_type = CheckableExprType::try_from(&value)
.expect("Expected indexed expression to be a checkable type.");
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@charliermarsh I retained this expect because I'd rather not silently return in this case. A developer has broken this rule if the type is not checkable.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To me it would be a bit more natural to do this before the if !matches (like let Some(value_type) = ... else { return; };), then have an if !matches(value_type, CheckableExprType::List | ...) to enforce the further narrowing. That way the expect is unnecessary as the condition is impossible. But it's not blocking.

Copy link
Member Author

@zanieb zanieb Jul 12, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did this before then moved it back to an expect, I feel like it confuses the logic of the rule. Like...

If the value is a list, byte, string, or tuple then we want to enforce this rule.

To display a violation, we happen to use CheckableExprType to generate a type name for the value. However, this is just for display purposes not a limitation of the rule itself.

If someone were to extend the rule to apply to a new type, e.g. byte arrays, I would not expect this function to exit early because there is not an implementation for displaying the type name. In this case, the expect would point a developer to the next step in their implementation.

🤷‍♀️ it does feel like a subtle stylistic choice in the end — maybe you shouldn't have sent me that BurntSushi article 😝

I'm kind of curious to see what lessons I learn from making my own stylistic choices but I'm happy to follow precedent for the project too!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This makes sense. I'd say run with what you have here! I tend to be pretty defensive around expect and unwrap (which are ~equivalent) because they're not recoverable in any way, so e.g. if we shipped a release that had an oversight here, Ruff would bail entirely on files that hit this codepath.

The "right" answer here may be to extend CheckableExprType to instead implement all expression types (so it doesn't return Option), or perhaps to use a debug assertion so that we see this failure in debug builds but fail silently in releases. But it's also ok for now, we know that it's a superset of expressions on the match anyway. (And we're obviously overdoing it on this one point just for educational / discussion purposes, it's not a sticking point in practice :))

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or perhaps to use a debug assertion so that we see this failure in debug builds but fail silently in releases.

I'm pretty happy with that as an approach.

Although... I'd also want a user to report a failure here because it's a bug in Ruff. I guess it's a matter of if we want to panic and make the rule entirely unusable for their code or fail silently and hope they notice that the rule isn't raising a violation. I can understand preferring the second as a safer user experience :)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wanted debug_panic!(...) but I guess debug_assert!(false, ...) will do e058e24

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm happy with whatever! If it were me on a desert island, I guess I'd write something like let Some(value_type) = CheckableExprType::try_from(value) else { return; }; but I'm not convinced that's the best option. This looks good to me.

(Again just to complete the picture, I think if we hit a panic here, the behavior the user would see is that we'd throw up a message asking them to file an issue and wouldn't print any violations for the failing file, but would print out violations for all other files as usual.)


// If the index is not a checkable type then we can't easily determine if there is a violation
let Some(index_type) = CheckableExprType::try_from(&index) else {
return;
};

// Then check the contents of the index
match index.as_ref() {
Expr::Constant(ExprConstant {
value: index_value, ..
}) => {
// If the index is a constant, require an integer
if !index_value.is_int() {
checker.diagnostics.push(Diagnostic::new(
InvalidIndexType {
value_type: value_type.to_string(),
index_type: constant_type_name(index_value).to_string(),
is_slice: false,
},
index.range(),
));
}
}
Expr::Slice(ExprSlice {
lower, upper, step, ..
}) => {
// If the index is a slice, require integer or null bounds
for is_slice in [lower, upper, step].into_iter().flatten() {
if let Expr::Constant(ExprConstant {
value: index_value, ..
}) = is_slice.as_ref()
{
if !(index_value.is_int() || index_value.is_none()) {
checker.diagnostics.push(Diagnostic::new(
InvalidIndexType {
value_type: value_type.to_string(),
index_type: constant_type_name(index_value).to_string(),
is_slice: true,
},
is_slice.range(),
));
}
} else if let Some(is_slice_type) = CheckableExprType::try_from(is_slice.as_ref()) {
checker.diagnostics.push(Diagnostic::new(
InvalidIndexType {
value_type: value_type.to_string(),
index_type: is_slice_type.to_string(),
is_slice: true,
},
is_slice.range(),
));
}
}
}
_ => {
// If it's some other checkable data type, it's a violation
checker.diagnostics.push(Diagnostic::new(
InvalidIndexType {
value_type: value_type.to_string(),
index_type: index_type.to_string(),
is_slice: false,
},
index.range(),
));
}
}
}

/// An expression that can be checked for type compatibility.
///
/// These are generally "literal" type expressions in that we know their concrete type
/// without additional analysis; opposed to expressions like a function call where we
/// cannot determine what type it may return.
#[derive(Debug)]
enum CheckableExprType<'a> {
Constant(&'a Constant),
JoinedStr,
List,
ListComp,
SetComp,
DictComp,
Set,
Dict,
Tuple,
Slice,
}

impl fmt::Display for CheckableExprType<'_> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Self::Constant(constant) => f.write_str(&constant_type_name(constant)),
Self::JoinedStr => f.write_str("str"),
Self::List => f.write_str("list"),
Self::SetComp => f.write_str("set comprehension"),
Self::ListComp => f.write_str("list comprehension"),
Self::DictComp => f.write_str("dict comprehension"),
Self::Set => f.write_str("set"),
Self::Slice => f.write_str("slice"),
Self::Dict => f.write_str("dict"),
Self::Tuple => f.write_str("tuple"),
}
}
}

impl<'a> CheckableExprType<'a> {
fn try_from(expr: &'a Expr) -> Option<Self> {
match expr {
Expr::Constant(ExprConstant { value, .. }) => Some(Self::Constant(value)),
Expr::JoinedStr(_) => Some(Self::JoinedStr),
Expr::List(_) => Some(Self::List),
Expr::ListComp(_) => Some(Self::ListComp),
Expr::SetComp(_) => Some(Self::SetComp),
Expr::DictComp(_) => Some(Self::DictComp),
Expr::Set(_) => Some(Self::Set),
Expr::Dict(_) => Some(Self::Dict),
Expr::Tuple(_) => Some(Self::Tuple),
Expr::Slice(_) => Some(Self::Slice),
_ => None,
}
}
}

fn constant_type_name(constant: &Constant) -> &'static str {
match constant {
Constant::None => "None",
Constant::Bool(_) => "bool",
Constant::Str(_) => "str",
Constant::Bytes(_) => "bytes",
Constant::Int(_) => "int",
Constant::Tuple(_) => "tuple",
Constant::Float(_) => "float",
Constant::Complex { .. } => "complex",
Constant::Ellipsis => "ellipsis",
}
}
2 changes: 2 additions & 0 deletions crates/ruff/src/rules/ruff/rules/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ pub(crate) use collection_literal_concatenation::*;
pub(crate) use explicit_f_string_type_conversion::*;
pub(crate) use function_call_in_dataclass_default::*;
pub(crate) use implicit_optional::*;
pub(crate) use invalid_index_type::*;
pub(crate) use invalid_pyproject_toml::*;
pub(crate) use mutable_class_default::*;
pub(crate) use mutable_dataclass_default::*;
Expand All @@ -22,6 +23,7 @@ mod explicit_f_string_type_conversion;
mod function_call_in_dataclass_default;
mod helpers;
mod implicit_optional;
mod invalid_index_type;
mod invalid_pyproject_toml;
mod mutable_class_default;
mod mutable_dataclass_default;
Expand Down
Loading