Skip to content

Commit

Permalink
Add list terminator kind for error recovery (#11843)
Browse files Browse the repository at this point in the history
## Summary

This PR adds a new enum to determine the kind of terminator token i.e.,
is it actually terminates the list or is it used for error recovery.

This is important because the parser should take the error recovery
route in case the terminator token is used for better error recovery.
This will then try to re-lex the token if it's the case.

I haven't updated any reference to use this new enum as otherwise it'll
update the snapshots. I plan to do that in a follow-up PR so that it's
easier to reason about.

## Test plan

`cargo insta test`
  • Loading branch information
dhruvmanila authored Jun 12, 2024
1 parent a525b4b commit 60ea72a
Showing 1 changed file with 73 additions and 31 deletions.
104 changes: 73 additions & 31 deletions crates/ruff_python_parser/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -778,6 +778,14 @@ impl Parenthesized {
}
}

#[derive(Copy, Clone, Debug)]
enum ListTerminatorKind {
/// The current token terminates the list.
Regular,
/// The current token doesn't terminate the list, but is useful for better error recovery.
ErrorRecovery,
}

#[derive(Copy, Clone, Debug)]
enum RecoveryContextKind {
/// When parsing a list of statements at the module level i.e., at the top level of a file.
Expand Down Expand Up @@ -876,35 +884,49 @@ impl RecoveryContextKind {
)
}

/// Returns `true` if the parser is at a token that terminates the list as per the context.
fn is_list_terminator(self, p: &Parser) -> bool {
self.list_terminator_kind(p).is_some()
}

/// Checks the current token the parser is at and returns the list terminator kind if the token
/// terminates the list as per the context.
fn list_terminator_kind(self, p: &Parser) -> Option<ListTerminatorKind> {
match self {
// The parser must consume all tokens until the end
RecoveryContextKind::ModuleStatements => false,
RecoveryContextKind::BlockStatements => p.at(TokenKind::Dedent),
RecoveryContextKind::ModuleStatements => None,
RecoveryContextKind::BlockStatements => p
.at(TokenKind::Dedent)
.then_some(ListTerminatorKind::Regular),

RecoveryContextKind::Elif => p.at(TokenKind::Else),
RecoveryContextKind::Elif => {
p.at(TokenKind::Else).then_some(ListTerminatorKind::Regular)
}
RecoveryContextKind::Except => {
matches!(p.current_token_kind(), TokenKind::Finally | TokenKind::Else)
.then_some(ListTerminatorKind::Regular)
}
RecoveryContextKind::AssignmentTargets => {
// test_ok assign_targets_terminator
// x = y = z = 1; a, b
// x = y = z = 1
// a, b
matches!(p.current_token_kind(), TokenKind::Newline | TokenKind::Semi)
.then_some(ListTerminatorKind::Regular)
}

// Tokens other than `]` are for better error recovery. For example, recover when we
// find the `:` of a clause header or the equal of a type assignment.
RecoveryContextKind::TypeParams => {
matches!(
p.current_token_kind(),
TokenKind::Rsqb
| TokenKind::Newline
| TokenKind::Colon
| TokenKind::Equal
| TokenKind::Lpar
)
if p.at(TokenKind::Rsqb) {
Some(ListTerminatorKind::Regular)
} else {
matches!(
p.current_token_kind(),
TokenKind::Newline | TokenKind::Colon | TokenKind::Equal | TokenKind::Lpar
)
.then_some(ListTerminatorKind::ErrorRecovery)
}
}
// The names of an import statement cannot be parenthesized, so `)` is not a
// terminator.
Expand All @@ -914,6 +936,7 @@ impl RecoveryContextKind {
// import a, b
// c, d
matches!(p.current_token_kind(), TokenKind::Semi | TokenKind::Newline)
.then_some(ListTerminatorKind::Regular)
}
RecoveryContextKind::ImportFromAsNames(_) => {
// test_ok from_import_stmt_terminator
Expand All @@ -926,20 +949,21 @@ impl RecoveryContextKind {
p.current_token_kind(),
TokenKind::Rpar | TokenKind::Semi | TokenKind::Newline
)
.then_some(ListTerminatorKind::Regular)
}
// The elements in a container expression cannot end with a newline
// as all of them are actually non-logical newlines.
RecoveryContextKind::Slices | RecoveryContextKind::ListElements => {
p.at(TokenKind::Rsqb)
}
RecoveryContextKind::SetElements | RecoveryContextKind::DictElements => {
p.at(TokenKind::Rbrace)
p.at(TokenKind::Rsqb).then_some(ListTerminatorKind::Regular)
}
RecoveryContextKind::SetElements | RecoveryContextKind::DictElements => p
.at(TokenKind::Rbrace)
.then_some(ListTerminatorKind::Regular),
RecoveryContextKind::TupleElements(parenthesized) => {
if parenthesized.is_yes() {
p.at(TokenKind::Rpar)
p.at(TokenKind::Rpar).then_some(ListTerminatorKind::Regular)
} else {
p.at_sequence_end()
p.at_sequence_end().then_some(ListTerminatorKind::Regular)
}
}
RecoveryContextKind::SequenceMatchPattern(parentheses) => match parentheses {
Expand All @@ -951,42 +975,58 @@ impl RecoveryContextKind {
// case a, b: ...
// case a, b if x: ...
matches!(p.current_token_kind(), TokenKind::Colon | TokenKind::If)
.then_some(ListTerminatorKind::Regular)
}
Some(parentheses) => {
// test_ok match_sequence_pattern_parentheses_terminator
// match subject:
// case [a, b]: ...
// case (a, b): ...
p.at(parentheses.closing_kind())
.then_some(ListTerminatorKind::Regular)
}
},
RecoveryContextKind::MatchPatternMapping => p.at(TokenKind::Rbrace),
RecoveryContextKind::MatchPatternClassArguments => p.at(TokenKind::Rpar),
RecoveryContextKind::Arguments => p.at(TokenKind::Rpar),
RecoveryContextKind::MatchPatternMapping => p
.at(TokenKind::Rbrace)
.then_some(ListTerminatorKind::Regular),
RecoveryContextKind::MatchPatternClassArguments => {
p.at(TokenKind::Rpar).then_some(ListTerminatorKind::Regular)
}
RecoveryContextKind::Arguments => {
p.at(TokenKind::Rpar).then_some(ListTerminatorKind::Regular)
}
RecoveryContextKind::DeleteTargets | RecoveryContextKind::Identifiers => {
// test_ok del_targets_terminator
// del a, b; c, d
// del a, b
// c, d
matches!(p.current_token_kind(), TokenKind::Semi | TokenKind::Newline)
.then_some(ListTerminatorKind::Regular)
}
RecoveryContextKind::Parameters(function_kind) => {
// `lambda x, y: ...` or `def f(x, y): ...`
p.at(function_kind.list_terminator())
if p.at(function_kind.list_terminator()) {
Some(ListTerminatorKind::Regular)
} else {
// To recover from missing closing parentheses
|| p.at(TokenKind::Rarrow)
|| p.at_compound_stmt()
(p.at(TokenKind::Rarrow) || p.at_compound_stmt())
.then_some(ListTerminatorKind::ErrorRecovery)
}
}
RecoveryContextKind::WithItems(with_item_kind) => match with_item_kind {
WithItemKind::Parenthesized => {
matches!(p.current_token_kind(), TokenKind::Rpar | TokenKind::Colon)
}
WithItemKind::Unparenthesized | WithItemKind::ParenthesizedExpression => {
p.at(TokenKind::Colon)
}
WithItemKind::Parenthesized => match p.current_token_kind() {
TokenKind::Rpar => Some(ListTerminatorKind::Regular),
TokenKind::Colon => Some(ListTerminatorKind::ErrorRecovery),
_ => None,
},
WithItemKind::Unparenthesized | WithItemKind::ParenthesizedExpression => p
.at(TokenKind::Colon)
.then_some(ListTerminatorKind::Regular),
},
RecoveryContextKind::FStringElements(kind) => {
p.at(kind.list_terminator())
if p.at(kind.list_terminator()) {
Some(ListTerminatorKind::Regular)
} else {
// test_err unterminated_fstring_newline_recovery
// f"hello
// 1 + 1
Expand All @@ -996,7 +1036,9 @@ impl RecoveryContextKind {
// 3 + 3
// f"hello {x}
// 4 + 4
|| p.at(TokenKind::Newline)
p.at(TokenKind::Newline)
.then_some(ListTerminatorKind::ErrorRecovery)
}
}
}
}
Expand Down

0 comments on commit 60ea72a

Please sign in to comment.