Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Parser::look_ahead #127636

Merged
merged 3 commits into from
Jul 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 26 additions & 30 deletions compiler/rustc_parse/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1118,41 +1118,37 @@ impl<'a> Parser<'a> {
return looker(&self.token);
}

if let Some(&(_, span, _, delim)) = self.token_cursor.stack.last()
&& delim != Delimiter::Invisible
{
// We are not in the outermost token stream, and the token stream
// we are in has non-skipped delimiters. Look for skipped
// delimiters in the lookahead range.
let tree_cursor = &self.token_cursor.tree_cursor;
let all_normal = (0..dist).all(|i| {
let token = tree_cursor.look_ahead(i);
!matches!(token, Some(TokenTree::Delimited(.., Delimiter::Invisible, _)))
});
if all_normal {
// There were no skipped delimiters. Do lookahead by plain indexing.
return match tree_cursor.look_ahead(dist - 1) {
Some(tree) => {
// Indexing stayed within the current token stream.
match tree {
TokenTree::Token(token, _) => looker(token),
TokenTree::Delimited(dspan, _, delim, _) => {
looker(&Token::new(token::OpenDelim(*delim), dspan.open))
}
// Typically around 98% of the `dist > 0` cases have `dist == 1`, so we
// have a fast special case for that.
if dist == 1 {
// The index is zero because the tree cursor's index always points
// to the next token to be gotten.
match self.token_cursor.tree_cursor.look_ahead(0) {
Some(tree) => {
// Indexing stayed within the current token tree.
return match tree {
TokenTree::Token(token, _) => looker(token),
TokenTree::Delimited(dspan, _, delim, _) => {
looker(&Token::new(token::OpenDelim(*delim), dspan.open))
}
};
}
None => {
// The tree cursor lookahead went (one) past the end of the
// current token tree. Try to return a close delimiter.
if let Some(&(_, span, _, delim)) = self.token_cursor.stack.last()
&& delim != Delimiter::Invisible
{
// We are not in the outermost token stream, so we have
// delimiters. Also, those delimiters are not skipped.
return looker(&Token::new(token::CloseDelim(delim), span.close));
}
None => {
// Indexing went past the end of the current token
// stream. Use the close delimiter, no matter how far
// ahead `dist` went.
looker(&Token::new(token::CloseDelim(delim), span.close))
}
};
}
}
}

// We are in a more complex case. Just clone the token cursor and use
// `next`, skipping delimiters as necessary. Slow but simple.
// Just clone the token cursor and use `next`, skipping delimiters as
// necessary. Slow but simple.
let mut cursor = self.token_cursor.clone();
let mut i = 0;
let mut token = Token::dummy();
Expand Down
121 changes: 121 additions & 0 deletions compiler/rustc_parse/src/parser/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1376,6 +1376,127 @@ fn ttdelim_span() {
});
}

// Uses a macro rather than a function so that failure messages mention the
// correct line in the test function.
fmease marked this conversation as resolved.
Show resolved Hide resolved
macro_rules! look {
($p:ident, $dist:literal, $kind:expr) => {
$p.look_ahead($dist, |tok| assert_eq!($kind, tok.kind));
};
}

#[test]
fn look_ahead() {
create_default_session_globals_then(|| {
let sym_f = Symbol::intern("f");
let sym_x = Symbol::intern("x");
#[allow(non_snake_case)]
let sym_S = Symbol::intern("S");
let raw_no = IdentIsRaw::No;

let psess = psess();
let mut p = string_to_parser(&psess, "fn f(x: u32) { x } struct S;".to_string());

// Current position is the `fn`.
look!(p, 0, token::Ident(kw::Fn, raw_no));
look!(p, 1, token::Ident(sym_f, raw_no));
look!(p, 2, token::OpenDelim(Delimiter::Parenthesis));
look!(p, 3, token::Ident(sym_x, raw_no));
look!(p, 4, token::Colon);
look!(p, 5, token::Ident(sym::u32, raw_no));
look!(p, 6, token::CloseDelim(Delimiter::Parenthesis));
look!(p, 7, token::OpenDelim(Delimiter::Brace));
look!(p, 8, token::Ident(sym_x, raw_no));
look!(p, 9, token::CloseDelim(Delimiter::Brace));
look!(p, 10, token::Ident(kw::Struct, raw_no));
look!(p, 11, token::Ident(sym_S, raw_no));
look!(p, 12, token::Semi);
// Any lookahead past the end of the token stream returns `Eof`.
look!(p, 13, token::Eof);
look!(p, 14, token::Eof);
look!(p, 15, token::Eof);
look!(p, 100, token::Eof);

// Move forward to the first `x`.
for _ in 0..3 {
p.bump();
}
look!(p, 0, token::Ident(sym_x, raw_no));
look!(p, 1, token::Colon);
look!(p, 2, token::Ident(sym::u32, raw_no));
look!(p, 3, token::CloseDelim(Delimiter::Parenthesis));
look!(p, 4, token::OpenDelim(Delimiter::Brace));
look!(p, 5, token::Ident(sym_x, raw_no));
look!(p, 6, token::CloseDelim(Delimiter::Brace));
look!(p, 7, token::Ident(kw::Struct, raw_no));
look!(p, 8, token::Ident(sym_S, raw_no));
look!(p, 9, token::Semi);
look!(p, 10, token::Eof);
look!(p, 11, token::Eof);
look!(p, 100, token::Eof);

// Move forward to the `;`.
for _ in 0..9 {
p.bump();
}
look!(p, 0, token::Semi);
// Any lookahead past the end of the token stream returns `Eof`.
look!(p, 1, token::Eof);
look!(p, 100, token::Eof);

// Move one past the `;`, i.e. past the end of the token stream.
p.bump();
look!(p, 0, token::Eof);
look!(p, 1, token::Eof);
look!(p, 100, token::Eof);

// Bumping after Eof is idempotent.
p.bump();
look!(p, 0, token::Eof);
look!(p, 1, token::Eof);
look!(p, 100, token::Eof);
});
}

/// There used to be some buggy behaviour when using `look_ahead` not within
/// the outermost token stream, which this test covers.
#[test]
fn look_ahead_non_outermost_stream() {
create_default_session_globals_then(|| {
let sym_f = Symbol::intern("f");
let sym_x = Symbol::intern("x");
#[allow(non_snake_case)]
let sym_S = Symbol::intern("S");
let raw_no = IdentIsRaw::No;

let psess = psess();
let mut p = string_to_parser(&psess, "mod m { fn f(x: u32) { x } struct S; }".to_string());

// Move forward to the `fn`, which is not within the outermost token
// stream (because it's inside the `mod { ... }`).
for _ in 0..3 {
p.bump();
}
look!(p, 0, token::Ident(kw::Fn, raw_no));
look!(p, 1, token::Ident(sym_f, raw_no));
look!(p, 2, token::OpenDelim(Delimiter::Parenthesis));
look!(p, 3, token::Ident(sym_x, raw_no));
look!(p, 4, token::Colon);
look!(p, 5, token::Ident(sym::u32, raw_no));
look!(p, 6, token::CloseDelim(Delimiter::Parenthesis));
look!(p, 7, token::OpenDelim(Delimiter::Brace));
look!(p, 8, token::Ident(sym_x, raw_no));
look!(p, 9, token::CloseDelim(Delimiter::Brace));
look!(p, 10, token::Ident(kw::Struct, raw_no));
look!(p, 11, token::Ident(sym_S, raw_no));
look!(p, 12, token::Semi);
look!(p, 13, token::CloseDelim(Delimiter::Brace));
// Any lookahead past the end of the token stream returns `Eof`.
look!(p, 14, token::Eof);
look!(p, 15, token::Eof);
look!(p, 100, token::Eof);
});
}

// This tests that when parsing a string (rather than a file) we don't try
// and read in a file for a module declaration and just parse a stub.
// See `recurse_into_file_modules` in the parser.
Expand Down
Loading