Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[flake8-pyi] Implement duplicate types in unions (PYI016) #3922

Merged
merged 19 commits into from
Apr 12, 2023
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions crates/ruff/resources/test/fixtures/flake8_pyi/PYI016.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Shouldn't affect non-union field types
field1: str

# Should emit for duplicate field types
field2: str | str # PYI016 Duplicate union member `str`


# Should emit for union types in arguments
def func1(arg1: int | int): # PYI016 Duplicate union member `int`
print(arg1)


# Should emit for unions in return types
def func2() -> str | str: # PYI016 Duplicate union member `str`
return "my string"


# Should emit in longer unions, even if not directly adjacent
field3: str | str | int # PYI016 Duplicate union member `str`
field4: int | int | str # PYI016 Duplicate union member `int`
field5: str | int | str # PYI016 Duplicate union member `str`
field6: int | bool | str | int # PYI016 Duplicate union member `int`

# Shouldn't emit for non-type unions
field7 = str | str

# Should emit for strangely-bracketed unions
field8: int | (str | int) # PYI016 Duplicate union member `int`

# Should handle user brackets when autorixing
field9: int | (int | str) # PYI016 Duplicate union member `int`
field10: (str | int) | str # PYI016 Duplicate union member `str`
29 changes: 29 additions & 0 deletions crates/ruff/resources/test/fixtures/flake8_pyi/PYI016.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Shouldn't affect non-union field types
field1: str

# Should emit for duplicate field types
field2: str | str # PYI016 Duplicate union member `str`

# Should emit for union types in arguments
def func1(arg1: int | int): # PYI016 Duplicate union member `int`
print(arg1)

# Should emit for unions in return types
def func2() -> str | str: # PYI016 Duplicate union member `str`
return "my string"

# Should emit in longer unions, even if not directly adjacent
field3: str | str | int # PYI016 Duplicate union member `str`
field4: int | int | str # PYI016 Duplicate union member `int`
field5: str | int | str # PYI016 Duplicate union member `str`
field6: int | bool | str | int # PYI016 Duplicate union member `int`

# Shouldn't emit for non-type unions
field7 = str | str

# Should emit for strangely-bracketed unions
field8: int | (str | int) # PYI016 Duplicate union member `int`

# Should handle user brackets when autorixing
field9: int | (int | str) # PYI016 Duplicate union member `int`
field10: (str | int) | str # PYI016 Duplicate union member `str`
13 changes: 13 additions & 0 deletions crates/ruff/src/checkers/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3214,6 +3214,19 @@ where
flake8_bandit::rules::hardcoded_sql_expression(self, expr);
}
}
ExprKind::BinOp {
op: Operator::BitOr,
..
} => {
if self.is_stub {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh sorry -- I also changed this to .pyi only for consistency with the other flake8-pyi rules. I want to enable these for non-.pyi files, but I want to do it all at once for the plugin.

if self.settings.rules.enabled(Rule::DuplicateUnionMember)
&& self.ctx.in_type_definition
&& self.ctx.current_expr_parent().is_none()
{
flake8_pyi::rules::duplicate_union_member(self, expr);
}
}
}
ExprKind::UnaryOp { op, operand } => {
let check_not_in = self.settings.rules.enabled(Rule::NotInTest);
let check_not_is = self.settings.rules.enabled(Rule::NotIsTest);
Expand Down
1 change: 1 addition & 0 deletions crates/ruff/src/codes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -573,6 +573,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<Rule> {
(Flake8Pyi, "012") => Rule::PassInClassBody,
(Flake8Pyi, "014") => Rule::ArgumentDefaultInStub,
(Flake8Pyi, "015") => Rule::AssignmentDefaultInStub,
(Flake8Pyi, "016") => Rule::DuplicateUnionMember,
(Flake8Pyi, "021") => Rule::DocstringInStub,
(Flake8Pyi, "033") => Rule::TypeCommentInStub,

Expand Down
1 change: 1 addition & 0 deletions crates/ruff/src/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -527,6 +527,7 @@ ruff_macros::register_rules!(
rules::flake8_pyi::rules::UnrecognizedPlatformCheck,
rules::flake8_pyi::rules::UnrecognizedPlatformName,
rules::flake8_pyi::rules::PassInClassBody,
rules::flake8_pyi::rules::DuplicateUnionMember,
// flake8-pytest-style
rules::flake8_pytest_style::rules::PytestFixtureIncorrectParenthesesStyle,
rules::flake8_pytest_style::rules::PytestFixturePositionalArgs,
Expand Down
2 changes: 2 additions & 0 deletions crates/ruff/src/rules/flake8_pyi/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ mod tests {
#[test_case(Rule::ArgumentDefaultInStub, Path::new("PYI014.pyi"))]
#[test_case(Rule::AssignmentDefaultInStub, Path::new("PYI015.py"))]
#[test_case(Rule::AssignmentDefaultInStub, Path::new("PYI015.pyi"))]
#[test_case(Rule::DuplicateUnionMember, Path::new("PYI016.py"))]
#[test_case(Rule::DuplicateUnionMember, Path::new("PYI016.pyi"))]
#[test_case(Rule::DocstringInStub, Path::new("PYI021.py"))]
#[test_case(Rule::DocstringInStub, Path::new("PYI021.pyi"))]
#[test_case(Rule::TypeCommentInStub, Path::new("PYI033.py"))]
Expand Down
92 changes: 92 additions & 0 deletions crates/ruff/src/rules/flake8_pyi/rules/duplicate_union_member.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
use rustc_hash::FxHashSet;
use rustpython_parser::ast::{Expr, ExprKind, Operator};

use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::comparable::ComparableExpr;
use ruff_python_ast::helpers::unparse_expr;
use ruff_python_ast::types::Range;

use crate::checkers::ast::Checker;
use crate::registry::AsRule;

#[violation]
pub struct DuplicateUnionMember {
pub duplicate_name: String,
}

impl AlwaysAutofixableViolation for DuplicateUnionMember {
#[derive_message_formats]
fn message(&self) -> String {
format!("Duplicate union member `{}`", self.duplicate_name)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changed this to use the same message as flake8-pyi, I think.

}

fn autofix_title(&self) -> String {
format!("Remove duplicate union member `{}`", self.duplicate_name)
}
}

/// PYI016
pub fn duplicate_union_member(checker: &mut Checker, expr: &Expr) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tweaked this to use an inner helper, so that the caller doesn't have to worry or know about (e.g.) creating a hash set for internal use.

let mut seen_nodes = FxHashSet::default();
traverse_union(&mut seen_nodes, checker, expr, None);
}

fn traverse_union<'a>(
seen_nodes: &mut FxHashSet<ComparableExpr<'a>>,
checker: &mut Checker,
expr: &'a Expr,
parent: Option<&'a Expr>,
) {
// The union data structure usually looks like this:
// a | b | c -> (a | b) | c
//
// However, parenthesized expressions can coerce it into any structure:
// a | (b | c)
//
// So we have to traverse both branches in order (left, then right), to report duplicates
// in the order they appear in the source code.
if let ExprKind::BinOp {
op: Operator::BitOr,
left,
right,
} = &expr.node
{
// Traverse left subtree, then the right subtree, propagating the previous node.
traverse_union(seen_nodes, checker, left, Some(expr));
traverse_union(seen_nodes, checker, right, Some(expr));
}

// If we've already seen this union member, raise a violation.
if !seen_nodes.insert(expr.into()) {
let mut diagnostic = Diagnostic::new(
DuplicateUnionMember {
duplicate_name: unparse_expr(expr, checker.stylist),
charliermarsh marked this conversation as resolved.
Show resolved Hide resolved
},
Range::from(expr),
);
if checker.patch(diagnostic.kind.rule()) {
// Delete the "|" character as well as the duplicate value by reconstructing the
// parent without the duplicate

// SAFETY: impossible to have a duplicate without a `parent` node.
let parent = parent.unwrap();

// SAFETY: Parent node must have been a BinOp in order for us to have traversed it
let ExprKind::BinOp { left, right, .. } = &parent.node
else {
unreachable!();
};
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know if there's a better way to do this unwrapping.


// Replace parent with the non-duplicate of its children
let fixed_parent = if expr.node == left.node { right } else { left };

diagnostic.set_fix(Edit::replacement(
unparse_expr(fixed_parent, checker.stylist),
parent.location,
parent.end_location.unwrap(),
));
Copy link
Member

@charliermarsh charliermarsh Apr 11, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this can lead to syntax errors in the presence of parentheses. E.g., if you try to autofix this, we throw a syntax error:

field2: (str | int) | str  # PYI016 Duplicate name in union

I'm guessing the issue is that the previous end_location won't include the parenthesis -- so we change to:

field2: (str | int  # PYI016 Duplicate name in union

(I'm guessing here, I haven't looked at the erroneous source code.)

As an alternative strategy, we could use unparse_expr, and re-create the source expr, but with the duplicate expression removed. (That function takes an AST node and generates source code from the AST.) It'll be guaranteed to be correct and valid syntax, but we will lose trivia (comments, optional user-added parentheses) -- that seems ok here, since comments and such are pretty rare within these unions?

Copy link
Contributor Author

@USER-5 USER-5 Apr 11, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, and there is also:

field: str | (str | int)

Which would presumably get corrected to

field: str | int)

In the above case, we would either want to:

  • delete the | character after the duplicate
  • reconstruct from unparse - removing all brackets in the process
  • delete as we do currently, then re-introduce a bracket at the correct location (this might be the easiest?)

I'm not sure what other "fix" tools I have at my disposal to help achieve this.

I just saw your edit about unparse_expr so I think I have a way forward.

Copy link
Contributor Author

@USER-5 USER-5 Apr 11, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've hopefully fixed this issue, and I've added a couple test cases to the fixture for parentheses cases, which work as I'd expect:

field9: int | (int | str)  # PYI016 Duplicate union member `int`
field10: (str | int) | str  # PYI016 Duplicate union member `str`
# Gets auto fixed to
field9: int | (str)  # PYI016 Duplicate union member `int`
field10: str | int  # PYI016 Duplicate union member `str`

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cool, this looks good to me!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, the approach here makes sense. I guess I was suggesting taking the top-level expression (the thing we pass into duplicate_union_member) and recursively filtering to find the duplicated child, but what you settled on here seems better.

}
checker.diagnostics.push(diagnostic);
}
}
2 changes: 2 additions & 0 deletions crates/ruff/src/rules/flake8_pyi/rules/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
pub use bad_version_info_comparison::{bad_version_info_comparison, BadVersionInfoComparison};
pub use docstring_in_stubs::{docstring_in_stubs, DocstringInStub};
pub use duplicate_union_member::{duplicate_union_member, DuplicateUnionMember};
pub use non_empty_stub_body::{non_empty_stub_body, NonEmptyStubBody};
pub use pass_in_class_body::{pass_in_class_body, PassInClassBody};
pub use pass_statement_stub_body::{pass_statement_stub_body, PassStatementStubBody};
Expand All @@ -15,6 +16,7 @@ pub use unrecognized_platform::{

mod bad_version_info_comparison;
mod docstring_in_stubs;
mod duplicate_union_member;
mod non_empty_stub_body;
mod pass_in_class_body;
mod pass_statement_stub_body;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
source: crates/ruff/src/rules/flake8_pyi/mod.rs
expression: diagnostics
---
[]

Loading