Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix double handling in collect_tokens #129346

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -4172,6 +4172,7 @@ dependencies = [
"rustc_errors",
"rustc_feature",
"rustc_fluent_macro",
"rustc_index",
"rustc_lexer",
"rustc_macros",
"rustc_session",
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_index/src/interval.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ mod tests;
#[derive(Debug, Clone)]
pub struct IntervalSet<I> {
// Start, end
map: SmallVec<[(u32, u32); 4]>,
map: SmallVec<[(u32, u32); 2]>,
domain: usize,
_data: PhantomData<I>,
}
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_parse/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ rustc_data_structures = { path = "../rustc_data_structures" }
rustc_errors = { path = "../rustc_errors" }
rustc_feature = { path = "../rustc_feature" }
rustc_fluent_macro = { path = "../rustc_fluent_macro" }
rustc_index = { path = "../rustc_index" }
rustc_lexer = { path = "../rustc_lexer" }
rustc_macros = { path = "../rustc_macros" }
rustc_session = { path = "../rustc_session" }
Expand Down
133 changes: 79 additions & 54 deletions compiler/rustc_parse/src/parser/attr_wrapper.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::borrow::Cow;
use std::{iter, mem};

use rustc_ast::token::{Delimiter, Token, TokenKind};
Expand All @@ -6,6 +7,7 @@ use rustc_ast::tokenstream::{
Spacing, ToAttrTokenStream,
};
use rustc_ast::{self as ast, AttrVec, Attribute, HasAttrs, HasTokens};
use rustc_data_structures::fx::FxHashSet;
use rustc_errors::PResult;
use rustc_session::parse::ParseSess;
use rustc_span::{sym, Span, DUMMY_SP};
Expand Down Expand Up @@ -134,30 +136,17 @@ impl ToAttrTokenStream for LazyAttrTokenStreamImpl {
node_replacements.array_windows()
{
assert!(
node_range.0.end <= next_node_range.0.start
|| node_range.0.end >= next_node_range.0.end,
"Node ranges should be disjoint or nested: ({:?}, {:?}) ({:?}, {:?})",
node_range.0.end <= next_node_range.0.start,
"Node ranges should be disjoint: ({:?}, {:?}) ({:?}, {:?})",
node_range,
tokens,
next_node_range,
next_tokens,
);
}

// Process the replace ranges, starting from the highest start
// position and working our way back. If have tokens like:
//
// `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }`
//
// Then we will generate replace ranges for both
// the `#[cfg(FALSE)] field: bool` and the entire
// `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }`
//
// By starting processing from the replace range with the greatest
// start position, we ensure that any (outer) replace range which
// encloses another (inner) replace range will fully overwrite the
// inner range's replacement.
for (node_range, target) in node_replacements.into_iter().rev() {
// Process the replace ranges.
for (node_range, target) in node_replacements.into_iter() {
assert!(
!node_range.0.is_empty(),
"Cannot replace an empty node range: {:?}",
Expand Down Expand Up @@ -234,6 +223,8 @@ impl<'a> Parser<'a> {
force_collect: ForceCollect,
f: impl FnOnce(&mut Self, AttrVec) -> PResult<'a, (R, Trailing, UsePreAttrPos)>,
) -> PResult<'a, R> {
let possible_capture_mode = self.capture_cfg;

// We must collect if anything could observe the collected tokens, i.e.
// if any of the following conditions hold.
// - We are force collecting tokens (because force collection requires
Expand All @@ -244,9 +235,9 @@ impl<'a> Parser<'a> {
// - Our target supports custom inner attributes (custom
// inner attribute invocation might require token capturing).
|| R::SUPPORTS_CUSTOM_INNER_ATTRS
// - We are in `capture_cfg` mode (which requires tokens if
// - We are in "possible capture mode" (which requires tokens if
// the parsed node has `#[cfg]` or `#[cfg_attr]` attributes).
|| self.capture_cfg;
|| possible_capture_mode;
if !needs_collection {
return Ok(f(self, attrs.attrs)?.0);
}
Expand All @@ -267,18 +258,48 @@ impl<'a> Parser<'a> {
res?
};

// When we're not in `capture_cfg` mode, then skip collecting and
// return early if either of the following conditions hold.
// - `None`: Our target doesn't support tokens at all (e.g. `NtIdent`).
// - `Some(None)`: Our target supports tokens and has none.
// - `Some(Some(_))`: Our target already has tokens set (e.g. we've
// parsed something like `#[my_attr] $item`). The actual parsing code
// takes care of prepending any attributes to the nonterminal, so we
// don't need to modify the already captured tokens.
// parsed something like `#[my_attr] $item`).
let ret_can_hold_tokens = matches!(ret.tokens_mut(), Some(None));

// Ignore any attributes we've previously processed. This happens when
// an inner call to `collect_tokens` returns an AST node and then an
// outer call ends up with the same AST node without any additional
// wrapping layer.
let mut seen_indices = FxHashSet::default();
for (i, attr) in ret.attrs().iter().enumerate() {
let is_unseen = self.capture_state.seen_attrs.insert(attr.id);
if !is_unseen {
seen_indices.insert(i);
}
}
let ret_attrs: Cow<'_, [Attribute]> =
if seen_indices.is_empty() {
Cow::Borrowed(ret.attrs())
} else {
let ret_attrs =
ret.attrs()
.iter()
.enumerate()
.filter_map(|(i, attr)| {
if seen_indices.contains(&i) { None } else { Some(attr.clone()) }
})
.collect();
Cow::Owned(ret_attrs)
};

// When we're not in "definite capture mode", then skip collecting and
// return early if `ret` doesn't support tokens or already has some.
//
// Note that this check is independent of `force_collect`. There's no
// need to collect tokens when we don't support tokens or already have
// tokens.
if !self.capture_cfg && matches!(ret.tokens_mut(), None | Some(Some(_))) {
let definite_capture_mode = self.capture_cfg
&& matches!(self.capture_state.capturing, Capturing::Yes)
&& has_cfg_or_cfg_attr(&ret_attrs);
if !definite_capture_mode && !ret_can_hold_tokens {
return Ok(ret);
}

Expand All @@ -297,12 +318,12 @@ impl<'a> Parser<'a> {
// outer and inner attributes. So this check is more precise than
// the earlier `needs_tokens` check, and we don't need to
// check `R::SUPPORTS_CUSTOM_INNER_ATTRS`.)
|| needs_tokens(ret.attrs())
// - We are in `capture_cfg` mode and there are `#[cfg]` or
// `#[cfg_attr]` attributes. (During normal non-`capture_cfg`
// parsing, we don't need any special capturing for those
// attributes, because they're builtin.)
|| (self.capture_cfg && has_cfg_or_cfg_attr(ret.attrs()));
|| needs_tokens(&ret_attrs)
// - We are in "definite capture mode", which requires that there
// are `#[cfg]` or `#[cfg_attr]` attributes. (During normal
// non-`capture_cfg` parsing, we don't need any special capturing
// for those attributes, because they're builtin.)
|| definite_capture_mode;
if !needs_collection {
return Ok(ret);
}
Expand Down Expand Up @@ -336,7 +357,7 @@ impl<'a> Parser<'a> {
// `Parser::parse_inner_attributes`, and pair them in a `ParserReplacement` with `None`,
// which means the relevant tokens will be removed. (More details below.)
let mut inner_attr_parser_replacements = Vec::new();
for attr in ret.attrs() {
for attr in ret_attrs.iter() {
if attr.style == ast::AttrStyle::Inner {
if let Some(inner_attr_parser_range) =
self.capture_state.inner_attr_parser_ranges.remove(&attr.id)
Expand All @@ -359,11 +380,10 @@ impl<'a> Parser<'a> {
// from `ParserRange` form to `NodeRange` form. We will perform the actual
// replacement only when we convert the `LazyAttrTokenStream` to an
// `AttrTokenStream`.
self.capture_state.parser_replacements
[parser_replacements_start..parser_replacements_end]
.iter()
.cloned()
.chain(inner_attr_parser_replacements.iter().cloned())
self.capture_state
.parser_replacements
.drain(parser_replacements_start..parser_replacements_end)
.chain(inner_attr_parser_replacements.into_iter())
.map(|(parser_range, data)| {
(NodeRange::new(parser_range, collect_pos.start_pos), data)
})
Expand Down Expand Up @@ -399,20 +419,12 @@ impl<'a> Parser<'a> {
break_last_token: self.break_last_token,
node_replacements,
});
let mut tokens_used = false;

// If we support tokens and don't already have them, store the newly captured tokens.
if let Some(target_tokens @ None) = ret.tokens_mut() {
*target_tokens = Some(tokens.clone());
}

// If `capture_cfg` is set and we're inside a recursive call to
// `collect_tokens`, then we need to register a replace range if we
// have `#[cfg]` or `#[cfg_attr]`. This allows us to run eager
// cfg-expansion on the captured token stream.
if self.capture_cfg
&& matches!(self.capture_state.capturing, Capturing::Yes)
&& has_cfg_or_cfg_attr(ret.attrs())
{
// If in "definite capture mode" we need to register a replace range
// for the `#[cfg]` and/or `#[cfg_attr]` attrs. This allows us to run
// eager cfg-expansion on the captured token stream.
if definite_capture_mode {
assert!(!self.break_last_token, "Should not have unglued last token with cfg attr");

// What is the status here when parsing the example code at the top of this method?
Expand All @@ -429,7 +441,9 @@ impl<'a> Parser<'a> {
// cfg-expand this AST node.
let start_pos =
if has_outer_attrs { attrs.start_pos.unwrap() } else { collect_pos.start_pos };
let target = AttrsTarget { attrs: ret.attrs().iter().cloned().collect(), tokens };
let target =
AttrsTarget { attrs: ret_attrs.iter().cloned().collect(), tokens: tokens.clone() };
tokens_used = true;
self.capture_state
.parser_replacements
.push((ParserRange(start_pos..end_pos), Some(target)));
Expand All @@ -438,7 +452,16 @@ impl<'a> Parser<'a> {
// the outermost call to this method.
self.capture_state.parser_replacements.clear();
self.capture_state.inner_attr_parser_ranges.clear();
self.capture_state.seen_attrs.clear();
}

// If we support tokens and don't already have them, store the newly captured tokens.
if let Some(target_tokens @ None) = ret.tokens_mut() {
tokens_used = true;
*target_tokens = Some(tokens);
}

assert!(tokens_used); // check we didn't create `tokens` unnecessarily
Ok(ret)
}
}
Expand Down Expand Up @@ -510,9 +533,11 @@ fn make_attr_token_stream(
}

/// Tokens are needed if:
/// - any non-single-segment attributes (other than doc comments) are present; or
/// - any `cfg_attr` attributes are present;
/// - any single-segment, non-builtin attributes are present.
/// - any non-single-segment attributes (other than doc comments) are present,
/// e.g. `rustfmt::skip`; or
/// - any `cfg_attr` attributes are present; or
/// - any single-segment, non-builtin attributes are present, e.g. `derive`,
/// `test`, `global_allocator`.
fn needs_tokens(attrs: &[ast::Attribute]) -> bool {
attrs.iter().any(|attr| match attr.ident() {
None => !attr.is_doc_comment(),
Expand Down
10 changes: 8 additions & 2 deletions compiler/rustc_parse/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ use rustc_ast_pretty::pprust;
use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::sync::Lrc;
use rustc_errors::{Applicability, Diag, FatalError, MultiSpan, PResult};
use rustc_index::interval::IntervalSet;
use rustc_session::parse::ParseSess;
use rustc_span::symbol::{kw, sym, Ident, Symbol};
use rustc_span::{Span, DUMMY_SP};
Expand Down Expand Up @@ -183,7 +184,7 @@ pub struct Parser<'a> {
// This type is used a lot, e.g. it's cloned when matching many declarative macro rules with nonterminals. Make sure
// it doesn't unintentionally get bigger.
#[cfg(target_pointer_width = "64")]
rustc_data_structures::static_assert_size!(Parser<'_>, 256);
rustc_data_structures::static_assert_size!(Parser<'_>, 288);

/// Stores span information about a closure.
#[derive(Clone, Debug)]
Expand Down Expand Up @@ -238,7 +239,8 @@ impl NodeRange {
// is the position of the function's start token. This gives
// `NodeRange(10..15)`.
fn new(ParserRange(parser_range): ParserRange, start_pos: u32) -> NodeRange {
assert!(parser_range.start >= start_pos && parser_range.end >= start_pos);
assert!(!parser_range.is_empty());
assert!(parser_range.start >= start_pos);
NodeRange((parser_range.start - start_pos)..(parser_range.end - start_pos))
}
}
Expand All @@ -260,6 +262,9 @@ struct CaptureState {
capturing: Capturing,
parser_replacements: Vec<ParserReplacement>,
inner_attr_parser_ranges: FxHashMap<AttrId, ParserRange>,
// `IntervalSet` is good for perf because attrs are mostly added to this
// set in contiguous ranges.
seen_attrs: IntervalSet<AttrId>,
}

/// Iterator over a `TokenStream` that produces `Token`s. It's a bit odd that
Expand Down Expand Up @@ -457,6 +462,7 @@ impl<'a> Parser<'a> {
capturing: Capturing::No,
parser_replacements: Vec::new(),
inner_attr_parser_ranges: Default::default(),
seen_attrs: IntervalSet::new(u32::MAX as usize),
},
current_closure: None,
recovery: Recovery::Allowed,
Expand Down
7 changes: 0 additions & 7 deletions tests/crashes/129166.rs

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// This was triggering an assertion failure in `NodeRange::new`.
nnethercote marked this conversation as resolved.
Show resolved Hide resolved

#![feature(cfg_eval)]
#![feature(stmt_expr_attributes)]

fn f() -> u32 {
#[cfg_eval] #[cfg(not(FALSE))] 0
//~^ ERROR removing an expression is not supported in this position
}

fn main() {}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
error: removing an expression is not supported in this position
--> $DIR/invalid-node-range-issue-129166.rs:7:17
|
LL | #[cfg_eval] #[cfg(not(FALSE))] 0
| ^^^^^^^^^^^^^^^^^^

error: aborting due to 1 previous error

12 changes: 5 additions & 7 deletions tests/ui/proc-macro/macro-rules-derive-cfg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,15 @@ extern crate test_macros;
macro_rules! produce_it {
($expr:expr) => {
#[derive(Print)]
struct Foo {
val: [bool; {
let a = #[cfg_attr(not(FALSE), rustc_dummy(first))] $expr;
0
}]
}
Comment on lines -17 to -22
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This case stopped working?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in #132587.

struct Foo(
[bool; #[cfg_attr(not(FALSE), rustc_dummy(first))] $expr]
);
}
}

produce_it!(#[cfg_attr(not(FALSE), rustc_dummy(second))] {
#![cfg_attr(not(FALSE), allow(unused))]
#![cfg_attr(not(FALSE), rustc_dummy(third))]
#[cfg_attr(not(FALSE), rustc_dummy(fourth))]
30
});

Expand Down
Loading
Loading