-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add RUF016: Detection of invalid index types (#5602)
Detects invalid types for tuple, list, bytes, string indices. For example, the following will raise a `TypeError` at runtime and when imported Python will display a `SyntaxWarning` ```python var = [1, 2, 3]["x"] ``` ``` example.py:1: SyntaxWarning: list indices must be integers or slices, not str; perhaps you missed a comma? var = [1, 2, 3]["x"] Traceback (most recent call last): File "example.py", line 1, in <module> var = [1, 2, 3]["x"] ~~~~~~~~~^^^^^ TypeError: list indices must be integers or slices, not str ``` Previously, Ruff would not report the invalid syntax but now a violation will be reported. This does not apply to cases where a variable, call, or complex expression is used in the index — detection is roughly limited to static definitions, which matches Python's warnings. ``` ❯ ./target/debug/ruff example.py --select RUF015 --show-source --no-cache example.py:1:17: RUF015 Indexed access to type `list` uses type `str` instead of an integer or slice. | 1 | var = [1, 2, 3]["x"] | ^^^ RUF015 | ``` Closes #5082 xref python/cpython@ffff144
- Loading branch information
Showing
9 changed files
with
722 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
# Should not emit for valid access with index | ||
var = "abc"[0] | ||
var = f"abc"[0] | ||
var = [1, 2, 3][0] | ||
var = (1, 2, 3)[0] | ||
var = b"abc"[0] | ||
|
||
# Should not emit for valid access with slice | ||
var = "abc"[0:2] | ||
var = f"abc"[0:2] | ||
var = b"abc"[0:2] | ||
var = [1, 2, 3][0:2] | ||
var = (1, 2, 3)[0:2] | ||
var = [1, 2, 3][None:2] | ||
var = [1, 2, 3][0:None] | ||
var = [1, 2, 3][:2] | ||
var = [1, 2, 3][0:] | ||
|
||
# Should emit for invalid access on strings | ||
var = "abc"["x"] | ||
var = f"abc"["x"] | ||
|
||
# Should emit for invalid access on bytes | ||
var = b"abc"["x"] | ||
|
||
# Should emit for invalid access on lists and tuples | ||
var = [1, 2, 3]["x"] | ||
var = (1, 2, 3)["x"] | ||
|
||
# Should emit for invalid access on list comprehensions | ||
var = [x for x in range(10)]["x"] | ||
|
||
# Should emit for invalid access using tuple | ||
var = "abc"[1, 2] | ||
|
||
# Should emit for invalid access using string | ||
var = [1, 2]["x"] | ||
|
||
# Should emit for invalid access using float | ||
var = [1, 2][0.25] | ||
|
||
# Should emit for invalid access using dict | ||
var = [1, 2][{"x": "y"}] | ||
|
||
# Should emit for invalid access using dict comp | ||
var = [1, 2][{x: "y" for x in range(2)}] | ||
|
||
# Should emit for invalid access using list | ||
var = [1, 2][2, 3] | ||
|
||
# Should emit for invalid access using list comp | ||
var = [1, 2][[x for x in range(2)]] | ||
|
||
# Should emit on invalid access using set | ||
var = [1, 2][{"x", "y"}] | ||
|
||
# Should emit on invalid access using set comp | ||
var = [1, 2][{x for x in range(2)}] | ||
|
||
# Should emit on invalid access using bytes | ||
var = [1, 2][b"x"] | ||
|
||
# Should emit for non-integer slice start | ||
var = [1, 2, 3]["x":2] | ||
var = [1, 2, 3][f"x":2] | ||
var = [1, 2, 3][1.2:2] | ||
var = [1, 2, 3][{"x"}:2] | ||
var = [1, 2, 3][{x for x in range(2)}:2] | ||
var = [1, 2, 3][{"x": x for x in range(2)}:2] | ||
var = [1, 2, 3][[x for x in range(2)]:2] | ||
|
||
# Should emit for non-integer slice end | ||
var = [1, 2, 3][0:"x"] | ||
var = [1, 2, 3][0:f"x"] | ||
var = [1, 2, 3][0:1.2] | ||
var = [1, 2, 3][0:{"x"}] | ||
var = [1, 2, 3][0:{x for x in range(2)}] | ||
var = [1, 2, 3][0:{"x": x for x in range(2)}] | ||
var = [1, 2, 3][0:[x for x in range(2)]] | ||
|
||
# Should emit for non-integer slice step | ||
var = [1, 2, 3][0:1:"x"] | ||
var = [1, 2, 3][0:1:f"x"] | ||
var = [1, 2, 3][0:1:1.2] | ||
var = [1, 2, 3][0:1:{"x"}] | ||
var = [1, 2, 3][0:1:{x for x in range(2)}] | ||
var = [1, 2, 3][0:1:{"x": x for x in range(2)}] | ||
var = [1, 2, 3][0:1:[x for x in range(2)]] | ||
|
||
# Should emit for non-integer slice start and end; should emit twice with specific ranges | ||
var = [1, 2, 3]["x":"y"] | ||
|
||
# Should emit once for repeated invalid access | ||
var = [1, 2, 3]["x"]["y"]["z"] | ||
|
||
# Cannot emit on invalid access using variable in index | ||
x = "x" | ||
var = "abc"[x] | ||
|
||
# Cannot emit on invalid access using call | ||
def func(): | ||
return 1 | ||
var = "abc"[func()] | ||
|
||
# Cannot emit on invalid access using a variable in parent | ||
x = [1, 2, 3] | ||
var = x["y"] | ||
|
||
# Cannot emit for invalid access on byte array | ||
var = bytearray(b"abc")["x"] | ||
|
||
# Cannot emit for slice bound using variable | ||
x = "x" | ||
var = [1, 2, 3][0:x] | ||
var = [1, 2, 3][x:1] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,214 @@ | ||
use rustpython_parser::ast::{Constant, Expr, ExprConstant, ExprSlice, ExprSubscript, Ranged}; | ||
|
||
use ruff_diagnostics::{Diagnostic, Violation}; | ||
use ruff_macros::{derive_message_formats, violation}; | ||
use std::fmt; | ||
|
||
use crate::checkers::ast::Checker; | ||
|
||
/// ## What it does | ||
/// Checks for indexed access to lists, strings, tuples, bytes, and comprehensions | ||
/// using a type other than an integer or slice. | ||
/// | ||
/// ## Why is this bad? | ||
/// Only integers or slices can be used as indices to these types. Using | ||
/// other types will result in a `TypeError` at runtime and a `SyntaxWarning` at | ||
/// import time. | ||
/// | ||
/// ## Example | ||
/// ```python | ||
/// var = [1, 2, 3]["x"] | ||
/// ``` | ||
/// | ||
/// Use instead: | ||
/// ```python | ||
/// var = [1, 2, 3][0] | ||
/// ``` | ||
#[violation] | ||
pub struct InvalidIndexType { | ||
value_type: String, | ||
index_type: String, | ||
is_slice: bool, | ||
} | ||
|
||
impl Violation for InvalidIndexType { | ||
#[derive_message_formats] | ||
fn message(&self) -> String { | ||
let InvalidIndexType { | ||
value_type, | ||
index_type, | ||
is_slice, | ||
} = self; | ||
if *is_slice { | ||
format!("Slice in indexed access to type `{value_type}` uses type `{index_type}` instead of an integer.") | ||
} else { | ||
format!( | ||
"Indexed access to type `{value_type}` uses type `{index_type}` instead of an integer or slice." | ||
) | ||
} | ||
} | ||
} | ||
|
||
/// RUF015 | ||
pub(crate) fn invalid_index_type(checker: &mut Checker, expr: &ExprSubscript) { | ||
let ExprSubscript { | ||
value, | ||
slice: index, | ||
.. | ||
} = expr; | ||
|
||
// Check the value being indexed is a list, tuple, string, f-string, bytes, or comprehension | ||
if !matches!( | ||
value.as_ref(), | ||
Expr::List(_) | ||
| Expr::ListComp(_) | ||
| Expr::Tuple(_) | ||
| Expr::JoinedStr(_) | ||
| Expr::Constant(ExprConstant { | ||
value: Constant::Str(_) | Constant::Bytes(_), | ||
.. | ||
}) | ||
) { | ||
return; | ||
} | ||
|
||
// The value types supported by this rule should always be checkable | ||
let Some(value_type) = CheckableExprType::try_from(value) else { | ||
debug_assert!(false, "Index value must be a checkable type to generate a violation message."); | ||
return; | ||
}; | ||
|
||
// If the index is not a checkable type then we can't easily determine if there is a violation | ||
let Some(index_type) = CheckableExprType::try_from(index) else { | ||
return; | ||
}; | ||
|
||
// Then check the contents of the index | ||
match index.as_ref() { | ||
Expr::Constant(ExprConstant { | ||
value: index_value, .. | ||
}) => { | ||
// If the index is a constant, require an integer | ||
if !index_value.is_int() { | ||
checker.diagnostics.push(Diagnostic::new( | ||
InvalidIndexType { | ||
value_type: value_type.to_string(), | ||
index_type: constant_type_name(index_value).to_string(), | ||
is_slice: false, | ||
}, | ||
index.range(), | ||
)); | ||
} | ||
} | ||
Expr::Slice(ExprSlice { | ||
lower, upper, step, .. | ||
}) => { | ||
// If the index is a slice, require integer or null bounds | ||
for is_slice in [lower, upper, step].into_iter().flatten() { | ||
if let Expr::Constant(ExprConstant { | ||
value: index_value, .. | ||
}) = is_slice.as_ref() | ||
{ | ||
if !(index_value.is_int() || index_value.is_none()) { | ||
checker.diagnostics.push(Diagnostic::new( | ||
InvalidIndexType { | ||
value_type: value_type.to_string(), | ||
index_type: constant_type_name(index_value).to_string(), | ||
is_slice: true, | ||
}, | ||
is_slice.range(), | ||
)); | ||
} | ||
} else if let Some(is_slice_type) = CheckableExprType::try_from(is_slice.as_ref()) { | ||
checker.diagnostics.push(Diagnostic::new( | ||
InvalidIndexType { | ||
value_type: value_type.to_string(), | ||
index_type: is_slice_type.to_string(), | ||
is_slice: true, | ||
}, | ||
is_slice.range(), | ||
)); | ||
} | ||
} | ||
} | ||
_ => { | ||
// If it's some other checkable data type, it's a violation | ||
checker.diagnostics.push(Diagnostic::new( | ||
InvalidIndexType { | ||
value_type: value_type.to_string(), | ||
index_type: index_type.to_string(), | ||
is_slice: false, | ||
}, | ||
index.range(), | ||
)); | ||
} | ||
} | ||
} | ||
|
||
/// An expression that can be checked for type compatibility. | ||
/// | ||
/// These are generally "literal" type expressions in that we know their concrete type | ||
/// without additional analysis; opposed to expressions like a function call where we | ||
/// cannot determine what type it may return. | ||
#[derive(Debug)] | ||
enum CheckableExprType<'a> { | ||
Constant(&'a Constant), | ||
JoinedStr, | ||
List, | ||
ListComp, | ||
SetComp, | ||
DictComp, | ||
Set, | ||
Dict, | ||
Tuple, | ||
Slice, | ||
} | ||
|
||
impl fmt::Display for CheckableExprType<'_> { | ||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||
match self { | ||
Self::Constant(constant) => f.write_str(constant_type_name(constant)), | ||
Self::JoinedStr => f.write_str("str"), | ||
Self::List => f.write_str("list"), | ||
Self::SetComp => f.write_str("set comprehension"), | ||
Self::ListComp => f.write_str("list comprehension"), | ||
Self::DictComp => f.write_str("dict comprehension"), | ||
Self::Set => f.write_str("set"), | ||
Self::Slice => f.write_str("slice"), | ||
Self::Dict => f.write_str("dict"), | ||
Self::Tuple => f.write_str("tuple"), | ||
} | ||
} | ||
} | ||
|
||
impl<'a> CheckableExprType<'a> { | ||
fn try_from(expr: &'a Expr) -> Option<Self> { | ||
match expr { | ||
Expr::Constant(ExprConstant { value, .. }) => Some(Self::Constant(value)), | ||
Expr::JoinedStr(_) => Some(Self::JoinedStr), | ||
Expr::List(_) => Some(Self::List), | ||
Expr::ListComp(_) => Some(Self::ListComp), | ||
Expr::SetComp(_) => Some(Self::SetComp), | ||
Expr::DictComp(_) => Some(Self::DictComp), | ||
Expr::Set(_) => Some(Self::Set), | ||
Expr::Dict(_) => Some(Self::Dict), | ||
Expr::Tuple(_) => Some(Self::Tuple), | ||
Expr::Slice(_) => Some(Self::Slice), | ||
_ => None, | ||
} | ||
} | ||
} | ||
|
||
fn constant_type_name(constant: &Constant) -> &'static str { | ||
match constant { | ||
Constant::None => "None", | ||
Constant::Bool(_) => "bool", | ||
Constant::Str(_) => "str", | ||
Constant::Bytes(_) => "bytes", | ||
Constant::Int(_) => "int", | ||
Constant::Tuple(_) => "tuple", | ||
Constant::Float(_) => "float", | ||
Constant::Complex { .. } => "complex", | ||
Constant::Ellipsis => "ellipsis", | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.