-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Co-authored-by: Micha Reiser <[email protected]> Co-authored-by: Alex Waygood <[email protected]> Co-authored-by: Simon Brugman <[email protected]>
- Loading branch information
1 parent
dc29f52
commit 224fe75
Showing
8 changed files
with
464 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
import re | ||
|
||
s = "str" | ||
|
||
# this should be replaced with s.replace("abc", "") | ||
re.sub("abc", "", s) | ||
|
||
|
||
# this example, adapted from https://docs.python.org/3/library/re.html#re.sub, | ||
# should *not* be replaced because repl is a function, not a string | ||
def dashrepl(matchobj): | ||
if matchobj.group(0) == "-": | ||
return " " | ||
else: | ||
return "-" | ||
|
||
|
||
re.sub("-", dashrepl, "pro----gram-files") | ||
|
||
# this one should be replaced with s.startswith("abc") because the Match is | ||
# used in an if context for its truth value | ||
if re.match("abc", s): | ||
pass | ||
if m := re.match("abc", s): # this should *not* be replaced | ||
pass | ||
re.match("abc", s) # this should not be replaced because match returns a Match | ||
|
||
# this should be replaced with "abc" in s | ||
if re.search("abc", s): | ||
pass | ||
re.search("abc", s) # this should not be replaced | ||
|
||
# this should be replaced with "abc" == s | ||
if re.fullmatch("abc", s): | ||
pass | ||
re.fullmatch("abc", s) # this should not be replaced | ||
|
||
# this should be replaced with s.split("abc") | ||
re.split("abc", s) | ||
|
||
# these currently should not be modified because the patterns contain regex | ||
# metacharacters | ||
re.sub("ab[c]", "", s) | ||
re.match("ab[c]", s) | ||
re.search("ab[c]", s) | ||
re.fullmatch("ab[c]", s) | ||
re.split("ab[c]", s) | ||
|
||
# test that all of the metacharacters prevent the rule from triggering, also | ||
# use raw strings in line with RUF039 | ||
re.sub(r"abc.", "", s) | ||
re.sub(r"^abc", "", s) | ||
re.sub(r"abc$", "", s) | ||
re.sub(r"abc*", "", s) | ||
re.sub(r"abc+", "", s) | ||
re.sub(r"abc?", "", s) | ||
re.sub(r"abc{2,3}", "", s) | ||
re.sub(r"abc\n", "", s) # this one could be fixed but is not currently | ||
re.sub(r"abc|def", "", s) | ||
re.sub(r"(a)bc", "", s) | ||
|
||
# and these should not be modified because they have extra arguments | ||
re.sub("abc", "", s, flags=re.A) | ||
re.match("abc", s, flags=re.I) | ||
re.search("abc", s, flags=re.L) | ||
re.fullmatch("abc", s, flags=re.M) | ||
re.split("abc", s, maxsplit=2) | ||
|
||
# this should trigger an unsafe fix because of the presence of comments | ||
re.sub( | ||
# pattern | ||
"abc", | ||
# repl | ||
"", | ||
s, # string | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
250 changes: 250 additions & 0 deletions
250
crates/ruff_linter/src/rules/ruff/rules/unnecessary_regular_expression.rs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,250 @@ | ||
use ruff_diagnostics::{AlwaysFixableViolation, Applicability, Diagnostic, Edit, Fix}; | ||
use ruff_macros::{derive_message_formats, ViolationMetadata}; | ||
use ruff_python_ast::{ | ||
Arguments, CmpOp, Expr, ExprAttribute, ExprCall, ExprCompare, ExprContext, Identifier, | ||
}; | ||
use ruff_python_semantic::{Modules, SemanticModel}; | ||
use ruff_text_size::TextRange; | ||
|
||
use crate::checkers::ast::Checker; | ||
|
||
/// ## What it does | ||
/// | ||
/// Checks for uses of the `re` module that can be replaced with builtin `str` methods. | ||
/// | ||
/// ## Why is this bad? | ||
/// | ||
/// Performing checks on strings directly can make the code simpler, may require | ||
/// less escaping, and will often be faster. | ||
/// | ||
/// ## Example | ||
/// | ||
/// ```python | ||
/// re.sub("abc", "", s) | ||
/// ``` | ||
/// | ||
/// Use instead: | ||
/// | ||
/// ```python | ||
/// s.replace("abc", "") | ||
/// ``` | ||
/// | ||
/// ## Details | ||
/// | ||
/// The rule reports the following calls when the first argument to the call is | ||
/// a plain string literal, and no additional flags are passed: | ||
/// | ||
/// - `re.sub` | ||
/// - `re.match` | ||
/// - `re.search` | ||
/// - `re.fullmatch` | ||
/// - `re.split` | ||
/// | ||
/// For `re.sub`, the `repl` (replacement) argument must also be a string literal, | ||
/// not a function. For `re.match`, `re.search`, and `re.fullmatch`, the return | ||
/// value must also be used only for its truth value. | ||
/// | ||
/// ## Fix safety | ||
/// | ||
/// This rule's fix is marked as unsafe if the affected expression contains comments. Otherwise, | ||
/// the fix can be applied safely. | ||
/// | ||
/// ## References | ||
/// - [Python Regular Expression HOWTO: Common Problems - Use String Methods](https://docs.python.org/3/howto/regex.html#use-string-methods) | ||
#[derive(ViolationMetadata)] | ||
pub(crate) struct UnnecessaryRegularExpression { | ||
replacement: String, | ||
} | ||
|
||
impl AlwaysFixableViolation for UnnecessaryRegularExpression { | ||
#[derive_message_formats] | ||
fn message(&self) -> String { | ||
"Plain string pattern passed to `re` function".to_string() | ||
} | ||
|
||
fn fix_title(&self) -> String { | ||
format!("Replace with `{}`", self.replacement) | ||
} | ||
} | ||
|
||
/// RUF055 | ||
pub(crate) fn unnecessary_regular_expression(checker: &mut Checker, call: &ExprCall) { | ||
// adapted from unraw_re_pattern | ||
let semantic = checker.semantic(); | ||
|
||
if !semantic.seen_module(Modules::RE) { | ||
return; | ||
} | ||
|
||
let Some(qualified_name) = semantic.resolve_qualified_name(&call.func) else { | ||
return; | ||
}; | ||
|
||
let ["re", func] = qualified_name.segments() else { | ||
return; | ||
}; | ||
|
||
// skip calls with more than `pattern` and `string` arguments (and `repl` | ||
// for `sub`) | ||
let Some(re_func) = ReFunc::from_call_expr(semantic, call, func) else { | ||
return; | ||
}; | ||
|
||
// For now, restrict this rule to string literals | ||
let Some(string_lit) = re_func.pattern.as_string_literal_expr() else { | ||
return; | ||
}; | ||
|
||
// For now, reject any regex metacharacters. Compare to the complete list | ||
// from https://docs.python.org/3/howto/regex.html#matching-characters | ||
let has_metacharacters = string_lit | ||
.value | ||
.to_str() | ||
.contains(['.', '^', '$', '*', '+', '?', '{', '[', '\\', '|', '(']); | ||
|
||
if has_metacharacters { | ||
return; | ||
} | ||
|
||
// Here we know the pattern is a string literal with no metacharacters, so | ||
// we can proceed with the str method replacement | ||
let new_expr = re_func.replacement(); | ||
|
||
let repl = checker.generator().expr(&new_expr); | ||
let diagnostic = Diagnostic::new( | ||
UnnecessaryRegularExpression { | ||
replacement: repl.clone(), | ||
}, | ||
call.range, | ||
); | ||
|
||
let fix = Fix::applicable_edit( | ||
Edit::range_replacement(repl, call.range), | ||
if checker | ||
.comment_ranges() | ||
.has_comments(call, checker.source()) | ||
{ | ||
Applicability::Unsafe | ||
} else { | ||
Applicability::Safe | ||
}, | ||
); | ||
|
||
checker.diagnostics.push(diagnostic.with_fix(fix)); | ||
} | ||
|
||
/// The `re` functions supported by this rule. | ||
#[derive(Debug)] | ||
enum ReFuncKind<'a> { | ||
Sub { repl: &'a Expr }, | ||
Match, | ||
Search, | ||
Fullmatch, | ||
Split, | ||
} | ||
|
||
#[derive(Debug)] | ||
struct ReFunc<'a> { | ||
kind: ReFuncKind<'a>, | ||
pattern: &'a Expr, | ||
string: &'a Expr, | ||
} | ||
|
||
impl<'a> ReFunc<'a> { | ||
fn from_call_expr( | ||
semantic: &SemanticModel, | ||
call: &'a ExprCall, | ||
func_name: &str, | ||
) -> Option<Self> { | ||
// the proposed fixes for match, search, and fullmatch rely on the | ||
// return value only being used for its truth value | ||
let in_if_context = semantic.in_boolean_test(); | ||
|
||
match (func_name, call.arguments.len()) { | ||
// `split` is the safest of these to fix, as long as metacharacters | ||
// have already been filtered out from the `pattern` | ||
("split", 2) => Some(ReFunc { | ||
kind: ReFuncKind::Split, | ||
pattern: call.arguments.find_argument("pattern", 0)?, | ||
string: call.arguments.find_argument("string", 1)?, | ||
}), | ||
// `sub` is only safe to fix if `repl` is a string. `re.sub` also | ||
// allows it to be a function, which will *not* work in the str | ||
// version | ||
("sub", 3) => { | ||
let repl = call.arguments.find_argument("repl", 1)?; | ||
if !repl.is_string_literal_expr() { | ||
return None; | ||
} | ||
Some(ReFunc { | ||
kind: ReFuncKind::Sub { repl }, | ||
pattern: call.arguments.find_argument("pattern", 0)?, | ||
string: call.arguments.find_argument("string", 2)?, | ||
}) | ||
} | ||
("match", 2) if in_if_context => Some(ReFunc { | ||
kind: ReFuncKind::Match, | ||
pattern: call.arguments.find_argument("pattern", 0)?, | ||
string: call.arguments.find_argument("string", 1)?, | ||
}), | ||
("search", 2) if in_if_context => Some(ReFunc { | ||
kind: ReFuncKind::Search, | ||
pattern: call.arguments.find_argument("pattern", 0)?, | ||
string: call.arguments.find_argument("string", 1)?, | ||
}), | ||
("fullmatch", 2) if in_if_context => Some(ReFunc { | ||
kind: ReFuncKind::Fullmatch, | ||
pattern: call.arguments.find_argument("pattern", 0)?, | ||
string: call.arguments.find_argument("string", 1)?, | ||
}), | ||
_ => None, | ||
} | ||
} | ||
|
||
fn replacement(&self) -> Expr { | ||
match self.kind { | ||
// string.replace(pattern, repl) | ||
ReFuncKind::Sub { repl } => { | ||
self.method_expr("replace", vec![self.pattern.clone(), repl.clone()]) | ||
} | ||
// string.startswith(pattern) | ||
ReFuncKind::Match => self.method_expr("startswith", vec![self.pattern.clone()]), | ||
// pattern in string | ||
ReFuncKind::Search => self.compare_expr(CmpOp::In), | ||
// string == pattern | ||
ReFuncKind::Fullmatch => self.compare_expr(CmpOp::Eq), | ||
// string.split(pattern) | ||
ReFuncKind::Split => self.method_expr("split", vec![self.pattern.clone()]), | ||
} | ||
} | ||
|
||
/// Return a new compare expr of the form `self.pattern op self.string` | ||
fn compare_expr(&self, op: CmpOp) -> Expr { | ||
Expr::Compare(ExprCompare { | ||
left: Box::new(self.pattern.clone()), | ||
ops: Box::new([op]), | ||
comparators: Box::new([self.string.clone()]), | ||
range: TextRange::default(), | ||
}) | ||
} | ||
|
||
/// Return a new method call expression on `self.string` with `args` like | ||
/// `self.string.method(args...)` | ||
fn method_expr(&self, method: &str, args: Vec<Expr>) -> Expr { | ||
let method = Expr::Attribute(ExprAttribute { | ||
value: Box::new(self.string.clone()), | ||
attr: Identifier::new(method, TextRange::default()), | ||
ctx: ExprContext::Load, | ||
range: TextRange::default(), | ||
}); | ||
Expr::Call(ExprCall { | ||
func: Box::new(method), | ||
arguments: Arguments { | ||
args: args.into_boxed_slice(), | ||
keywords: Box::new([]), | ||
range: TextRange::default(), | ||
}, | ||
range: TextRange::default(), | ||
}) | ||
} | ||
} |
Oops, something went wrong.