Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ruff] Extend unnecessary-regular-expression to non-literal strings (RUF055) #14679

Merged
merged 22 commits into from
Dec 3, 2024
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions crates/ruff_linter/resources/test/fixtures/ruff/RUF055_1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
"""Test that RUF055 can follow a single str assignment for both the pattern and
the replacement argument to re.sub
"""

import re

pat1 = "needle"

re.sub(pat1, "", haystack)

# aliases are not followed, so this one should not trigger the rule
if pat4 := pat1:
re.sub(pat4, "", haystack)

# also works for the `repl` argument in sub
repl = "new"
re.sub(r"abc", repl, haystack)

# fixes should not be proposed for `sub` replacements with backreferences or
# most other escapes
re.sub(r"a", r"\g<0>\g<0>\g<0>", "a")
re.sub(r"a", r"\1", "a")

# but escapes like \n should work
re.sub(r"a", "\n", "a")
3 changes: 2 additions & 1 deletion crates/ruff_linter/src/rules/ruff/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,8 @@ mod tests {
#[test_case(Rule::MapIntVersionParsing, Path::new("RUF048_1.py"))]
#[test_case(Rule::UnrawRePattern, Path::new("RUF039.py"))]
#[test_case(Rule::UnrawRePattern, Path::new("RUF039_concat.py"))]
#[test_case(Rule::UnnecessaryRegularExpression, Path::new("RUF055.py"))]
#[test_case(Rule::UnnecessaryRegularExpression, Path::new("RUF055_0.py"))]
#[test_case(Rule::UnnecessaryRegularExpression, Path::new("RUF055_1.py"))]
fn preview_rules(rule_code: Rule, path: &Path) -> Result<()> {
let snapshot = format!(
"preview__{}_{}",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
use itertools::Itertools;
use ruff_diagnostics::{AlwaysFixableViolation, Applicability, Diagnostic, Edit, Fix};
use ruff_macros::{derive_message_formats, ViolationMetadata};
use ruff_python_ast::ExprStringLiteral;
use ruff_python_ast::{
Arguments, CmpOp, Expr, ExprAttribute, ExprCall, ExprCompare, ExprContext, Identifier,
};
use ruff_python_semantic::analyze::typing::find_binding_value;
use ruff_python_semantic::{Modules, SemanticModel};
use ruff_text_size::TextRange;

Expand Down Expand Up @@ -90,8 +93,8 @@ pub(crate) fn unnecessary_regular_expression(checker: &mut Checker, call: &ExprC
return;
};

// For now, restrict this rule to string literals
let Some(string_lit) = re_func.pattern.as_string_literal_expr() else {
// For now, restrict this rule to string literals and variables that can be resolved to literals
let Some(string_lit) = resolve_string_literal(re_func.pattern, semantic) else {
return;
};

Expand Down Expand Up @@ -173,8 +176,13 @@ impl<'a> ReFunc<'a> {
// version
("sub", 3) => {
let repl = call.arguments.find_argument("repl", 1)?;
if !repl.is_string_literal_expr() {
return None;
// make sure repl can be resolved to a string literal without
// backslash escapes other than ascii control characters
let lit = resolve_string_literal(repl, semantic)?;
for (c, next) in lit.value.chars().tuple_windows() {
if c == '\\' && !"abfnrtv".contains(next) {
return None;
}
}
Some(ReFunc {
kind: ReFuncKind::Sub { repl },
Expand Down Expand Up @@ -248,3 +256,23 @@ impl<'a> ReFunc<'a> {
})
}
}

/// Try to resolve `name` to an [`ExprStringLiteral`] in `semantic`.
fn resolve_string_literal<'a>(
name: &'a Expr,
semantic: &'a SemanticModel,
) -> Option<&'a ExprStringLiteral> {
if name.is_string_literal_expr() {
return name.as_string_literal_expr();
}

if let Some(name_expr) = name.as_name_expr() {
let binding = semantic.binding(semantic.only_binding(name_expr)?);
let value = find_binding_value(binding, semantic)?;
if value.is_string_literal_expr() {
return value.as_string_literal_expr();
}
}

None
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
source: crates/ruff_linter/src/rules/ruff/mod.rs
snapshot_kind: text
---
RUF055.py:6:1: RUF055 [*] Plain string pattern passed to `re` function
RUF055_0.py:6:1: RUF055 [*] Plain string pattern passed to `re` function
|
5 | # this should be replaced with s.replace("abc", "")
6 | re.sub("abc", "", s)
Expand All @@ -20,7 +20,7 @@ RUF055.py:6:1: RUF055 [*] Plain string pattern passed to `re` function
8 8 |
9 9 | # this example, adapted from https://docs.python.org/3/library/re.html#re.sub,

RUF055.py:22:4: RUF055 [*] Plain string pattern passed to `re` function
RUF055_0.py:22:4: RUF055 [*] Plain string pattern passed to `re` function
|
20 | # this one should be replaced with s.startswith("abc") because the Match is
21 | # used in an if context for its truth value
Expand All @@ -41,7 +41,7 @@ RUF055.py:22:4: RUF055 [*] Plain string pattern passed to `re` function
24 24 | if m := re.match("abc", s): # this should *not* be replaced
25 25 | pass

RUF055.py:29:4: RUF055 [*] Plain string pattern passed to `re` function
RUF055_0.py:29:4: RUF055 [*] Plain string pattern passed to `re` function
|
28 | # this should be replaced with "abc" in s
29 | if re.search("abc", s):
Expand All @@ -61,7 +61,7 @@ RUF055.py:29:4: RUF055 [*] Plain string pattern passed to `re` function
31 31 | re.search("abc", s) # this should not be replaced
32 32 |

RUF055.py:34:4: RUF055 [*] Plain string pattern passed to `re` function
RUF055_0.py:34:4: RUF055 [*] Plain string pattern passed to `re` function
|
33 | # this should be replaced with "abc" == s
34 | if re.fullmatch("abc", s):
Expand All @@ -81,7 +81,7 @@ RUF055.py:34:4: RUF055 [*] Plain string pattern passed to `re` function
36 36 | re.fullmatch("abc", s) # this should not be replaced
37 37 |

RUF055.py:39:1: RUF055 [*] Plain string pattern passed to `re` function
RUF055_0.py:39:1: RUF055 [*] Plain string pattern passed to `re` function
|
38 | # this should be replaced with s.split("abc")
39 | re.split("abc", s)
Expand All @@ -101,7 +101,7 @@ RUF055.py:39:1: RUF055 [*] Plain string pattern passed to `re` function
41 41 | # these currently should not be modified because the patterns contain regex
42 42 | # metacharacters

RUF055.py:70:1: RUF055 [*] Plain string pattern passed to `re` function
RUF055_0.py:70:1: RUF055 [*] Plain string pattern passed to `re` function
|
69 | # this should trigger an unsafe fix because of the presence of comments
70 | / re.sub(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
---
source: crates/ruff_linter/src/rules/ruff/mod.rs
snapshot_kind: text
---
RUF055_1.py:9:1: RUF055 [*] Plain string pattern passed to `re` function
|
7 | pat1 = "needle"
8 |
9 | re.sub(pat1, "", haystack)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^ RUF055
10 |
11 | # aliases are not followed, so this one should not trigger the rule
|
= help: Replace with `haystack.replace(pat1, "")`

ℹ Safe fix
6 6 |
7 7 | pat1 = "needle"
8 8 |
9 |-re.sub(pat1, "", haystack)
9 |+haystack.replace(pat1, "")
10 10 |
11 11 | # aliases are not followed, so this one should not trigger the rule
12 12 | if pat4 := pat1:

RUF055_1.py:17:1: RUF055 [*] Plain string pattern passed to `re` function
|
15 | # also works for the `repl` argument in sub
16 | repl = "new"
17 | re.sub(r"abc", repl, haystack)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ RUF055
18 |
19 | # fixes should not be proposed for `sub` replacements with backreferences or
|
= help: Replace with `haystack.replace("abc", repl)`

ℹ Safe fix
14 14 |
15 15 | # also works for the `repl` argument in sub
16 16 | repl = "new"
17 |-re.sub(r"abc", repl, haystack)
17 |+haystack.replace("abc", repl)
18 18 |
19 19 | # fixes should not be proposed for `sub` replacements with backreferences or
20 20 | # most other escapes

RUF055_1.py:25:1: RUF055 [*] Plain string pattern passed to `re` function
|
24 | # but escapes like \n should work
25 | re.sub(r"a", "\n", "a")
| ^^^^^^^^^^^^^^^^^^^^^^^ RUF055
|
= help: Replace with `"a".replace("a", "\n")`

ℹ Safe fix
22 22 | re.sub(r"a", r"\1", "a")
23 23 |
24 24 | # but escapes like \n should work
25 |-re.sub(r"a", "\n", "a")
25 |+"a".replace("a", "\n")