From 934edc50bb0387bfef48de3c1154ca03c32caed8 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 21 Aug 2024 15:01:10 +1000 Subject: [PATCH 1/6] Tweak `tests/ui/proc-macro/macro-rules-derive-cfg.rs`. - Trim some unnecessary fat from the type declaration. - Add another attribute, to make it a stronger test of `cfg_attr` processing. --- tests/ui/proc-macro/macro-rules-derive-cfg.rs | 12 +- .../proc-macro/macro-rules-derive-cfg.stdout | 185 ++++++++---------- 2 files changed, 87 insertions(+), 110 deletions(-) diff --git a/tests/ui/proc-macro/macro-rules-derive-cfg.rs b/tests/ui/proc-macro/macro-rules-derive-cfg.rs index 68026a60be686..c34f1695761cf 100644 --- a/tests/ui/proc-macro/macro-rules-derive-cfg.rs +++ b/tests/ui/proc-macro/macro-rules-derive-cfg.rs @@ -14,17 +14,15 @@ extern crate test_macros; macro_rules! produce_it { ($expr:expr) => { #[derive(Print)] - struct Foo { - val: [bool; { - let a = #[cfg_attr(not(FALSE), rustc_dummy(first))] $expr; - 0 - }] - } + struct Foo( + [bool; #[cfg_attr(not(FALSE), rustc_dummy(first))] $expr] + ); } } produce_it!(#[cfg_attr(not(FALSE), rustc_dummy(second))] { - #![cfg_attr(not(FALSE), allow(unused))] + #![cfg_attr(not(FALSE), rustc_dummy(third))] + #[cfg_attr(not(FALSE), rustc_dummy(fourth))] 30 }); diff --git a/tests/ui/proc-macro/macro-rules-derive-cfg.stdout b/tests/ui/proc-macro/macro-rules-derive-cfg.stdout index fadf210127e4d..c1e46b50d40c2 100644 --- a/tests/ui/proc-macro/macro-rules-derive-cfg.stdout +++ b/tests/ui/proc-macro/macro-rules-derive-cfg.stdout @@ -1,21 +1,9 @@ -PRINT-DERIVE INPUT (DISPLAY): struct Foo -{ - val : - [bool; - { - let a = #[rustc_dummy(first)] #[rustc_dummy(second)] - { #![allow(unused)] 30 }; 0 - }] -} -PRINT-DERIVE DEEP-RE-COLLECTED (DISPLAY): struct Foo -{ - val : - [bool; - { - let a = #[rustc_dummy(first)] #[rustc_dummy(second)] - { #! [allow(unused)] 30 }; 0 - }] -} +PRINT-DERIVE INPUT (DISPLAY): struct +Foo([bool; #[rustc_dummy(first)] #[rustc_dummy(second)] +{ #![rustc_dummy(third)] #[rustc_dummy(fourth)] 30 }]); +PRINT-DERIVE DEEP-RE-COLLECTED (DISPLAY): struct +Foo([bool; #[rustc_dummy(first)] #[rustc_dummy(second)] +{ #! [rustc_dummy(third)] #[rustc_dummy(fourth)] 30 }]); PRINT-DERIVE INPUT (DEBUG): TokenStream [ Ident { ident: "struct", @@ -26,155 +14,146 @@ PRINT-DERIVE INPUT (DEBUG): TokenStream [ span: $DIR/macro-rules-derive-cfg.rs:17:16: 17:19 (#3), }, Group { - delimiter: Brace, + delimiter: Parenthesis, stream: TokenStream [ - Ident { - ident: "val", - span: $DIR/macro-rules-derive-cfg.rs:18:13: 18:16 (#3), - }, - Punct { - ch: ':', - spacing: Alone, - span: $DIR/macro-rules-derive-cfg.rs:18:16: 18:17 (#3), - }, Group { delimiter: Bracket, stream: TokenStream [ Ident { ident: "bool", - span: $DIR/macro-rules-derive-cfg.rs:18:19: 18:23 (#3), + span: $DIR/macro-rules-derive-cfg.rs:18:14: 18:18 (#3), }, Punct { ch: ';', spacing: Alone, - span: $DIR/macro-rules-derive-cfg.rs:18:23: 18:24 (#3), + span: $DIR/macro-rules-derive-cfg.rs:18:18: 18:19 (#3), + }, + Punct { + ch: '#', + spacing: Alone, + span: $DIR/macro-rules-derive-cfg.rs:18:20: 18:21 (#3), }, Group { - delimiter: Brace, + delimiter: Bracket, stream: TokenStream [ Ident { - ident: "let", - span: $DIR/macro-rules-derive-cfg.rs:19:17: 19:20 (#3), + ident: "rustc_dummy", + span: $DIR/macro-rules-derive-cfg.rs:18:43: 18:54 (#3), }, + Group { + delimiter: Parenthesis, + stream: TokenStream [ + Ident { + ident: "first", + span: $DIR/macro-rules-derive-cfg.rs:18:55: 18:60 (#3), + }, + ], + span: $DIR/macro-rules-derive-cfg.rs:18:54: 18:61 (#3), + }, + ], + span: $DIR/macro-rules-derive-cfg.rs:18:21: 18:63 (#3), + }, + Punct { + ch: '#', + spacing: Alone, + span: $DIR/macro-rules-derive-cfg.rs:23:13: 23:14 (#0), + }, + Group { + delimiter: Bracket, + stream: TokenStream [ Ident { - ident: "a", - span: $DIR/macro-rules-derive-cfg.rs:19:21: 19:22 (#3), + ident: "rustc_dummy", + span: $DIR/macro-rules-derive-cfg.rs:23:36: 23:47 (#0), }, - Punct { - ch: '=', - spacing: Alone, - span: $DIR/macro-rules-derive-cfg.rs:19:23: 19:24 (#3), + Group { + delimiter: Parenthesis, + stream: TokenStream [ + Ident { + ident: "second", + span: $DIR/macro-rules-derive-cfg.rs:23:48: 23:54 (#0), + }, + ], + span: $DIR/macro-rules-derive-cfg.rs:23:47: 23:55 (#0), }, + ], + span: $DIR/macro-rules-derive-cfg.rs:23:14: 23:57 (#0), + }, + Group { + delimiter: Brace, + stream: TokenStream [ Punct { ch: '#', + spacing: Joint, + span: $DIR/macro-rules-derive-cfg.rs:24:5: 24:6 (#0), + }, + Punct { + ch: '!', spacing: Alone, - span: $DIR/macro-rules-derive-cfg.rs:19:25: 19:26 (#3), + span: $DIR/macro-rules-derive-cfg.rs:24:6: 24:7 (#0), }, Group { delimiter: Bracket, stream: TokenStream [ Ident { ident: "rustc_dummy", - span: $DIR/macro-rules-derive-cfg.rs:19:48: 19:59 (#3), + span: $DIR/macro-rules-derive-cfg.rs:24:29: 24:40 (#0), }, Group { delimiter: Parenthesis, stream: TokenStream [ Ident { - ident: "first", - span: $DIR/macro-rules-derive-cfg.rs:19:60: 19:65 (#3), + ident: "third", + span: $DIR/macro-rules-derive-cfg.rs:24:41: 24:46 (#0), }, ], - span: $DIR/macro-rules-derive-cfg.rs:19:59: 19:66 (#3), + span: $DIR/macro-rules-derive-cfg.rs:24:40: 24:47 (#0), }, ], - span: $DIR/macro-rules-derive-cfg.rs:19:26: 19:68 (#3), + span: $DIR/macro-rules-derive-cfg.rs:24:7: 24:49 (#0), }, Punct { ch: '#', spacing: Alone, - span: $DIR/macro-rules-derive-cfg.rs:26:13: 26:14 (#0), + span: $DIR/macro-rules-derive-cfg.rs:25:5: 25:6 (#0), }, Group { delimiter: Bracket, stream: TokenStream [ Ident { ident: "rustc_dummy", - span: $DIR/macro-rules-derive-cfg.rs:26:36: 26:47 (#0), + span: $DIR/macro-rules-derive-cfg.rs:25:28: 25:39 (#0), }, Group { delimiter: Parenthesis, stream: TokenStream [ Ident { - ident: "second", - span: $DIR/macro-rules-derive-cfg.rs:26:48: 26:54 (#0), + ident: "fourth", + span: $DIR/macro-rules-derive-cfg.rs:25:40: 25:46 (#0), }, ], - span: $DIR/macro-rules-derive-cfg.rs:26:47: 26:55 (#0), + span: $DIR/macro-rules-derive-cfg.rs:25:39: 25:47 (#0), }, ], - span: $DIR/macro-rules-derive-cfg.rs:26:14: 26:57 (#0), - }, - Group { - delimiter: Brace, - stream: TokenStream [ - Punct { - ch: '#', - spacing: Joint, - span: $DIR/macro-rules-derive-cfg.rs:27:5: 27:6 (#0), - }, - Punct { - ch: '!', - spacing: Alone, - span: $DIR/macro-rules-derive-cfg.rs:27:6: 27:7 (#0), - }, - Group { - delimiter: Bracket, - stream: TokenStream [ - Ident { - ident: "allow", - span: $DIR/macro-rules-derive-cfg.rs:27:29: 27:34 (#0), - }, - Group { - delimiter: Parenthesis, - stream: TokenStream [ - Ident { - ident: "unused", - span: $DIR/macro-rules-derive-cfg.rs:27:35: 27:41 (#0), - }, - ], - span: $DIR/macro-rules-derive-cfg.rs:27:34: 27:42 (#0), - }, - ], - span: $DIR/macro-rules-derive-cfg.rs:27:7: 27:44 (#0), - }, - Literal { - kind: Integer, - symbol: "30", - suffix: None, - span: $DIR/macro-rules-derive-cfg.rs:28:5: 28:7 (#0), - }, - ], - span: $DIR/macro-rules-derive-cfg.rs:26:58: 29:2 (#0), - }, - Punct { - ch: ';', - spacing: Alone, - span: $DIR/macro-rules-derive-cfg.rs:19:74: 19:75 (#3), + span: $DIR/macro-rules-derive-cfg.rs:25:6: 25:49 (#0), }, Literal { kind: Integer, - symbol: "0", + symbol: "30", suffix: None, - span: $DIR/macro-rules-derive-cfg.rs:20:17: 20:18 (#3), + span: $DIR/macro-rules-derive-cfg.rs:26:5: 26:7 (#0), }, ], - span: $DIR/macro-rules-derive-cfg.rs:18:25: 21:14 (#3), + span: $DIR/macro-rules-derive-cfg.rs:23:58: 27:2 (#0), }, ], - span: $DIR/macro-rules-derive-cfg.rs:18:18: 21:15 (#3), + span: $DIR/macro-rules-derive-cfg.rs:18:13: 18:70 (#3), }, ], - span: $DIR/macro-rules-derive-cfg.rs:17:20: 22:10 (#3), + span: $DIR/macro-rules-derive-cfg.rs:17:19: 19:10 (#3), + }, + Punct { + ch: ';', + spacing: Alone, + span: $DIR/macro-rules-derive-cfg.rs:19:10: 19:11 (#3), }, ] From 2d6ecec1dff4e48c541dfedd24b04d1ea786062e Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 21 Aug 2024 15:32:57 +1000 Subject: [PATCH 2/6] Clarify a comment. --- compiler/rustc_parse/src/parser/attr_wrapper.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/compiler/rustc_parse/src/parser/attr_wrapper.rs b/compiler/rustc_parse/src/parser/attr_wrapper.rs index 49df2811d525b..cfbac4876d537 100644 --- a/compiler/rustc_parse/src/parser/attr_wrapper.rs +++ b/compiler/rustc_parse/src/parser/attr_wrapper.rs @@ -510,9 +510,11 @@ fn make_attr_token_stream( } /// Tokens are needed if: -/// - any non-single-segment attributes (other than doc comments) are present; or -/// - any `cfg_attr` attributes are present; -/// - any single-segment, non-builtin attributes are present. +/// - any non-single-segment attributes (other than doc comments) are present, +/// e.g. `rustfmt::skip`; or +/// - any `cfg_attr` attributes are present; or +/// - any single-segment, non-builtin attributes are present, e.g. `derive`, +/// `test`, `global_allocator`. fn needs_tokens(attrs: &[ast::Attribute]) -> bool { attrs.iter().any(|attr| match attr.ident() { None => !attr.is_doc_comment(), From b5574c4a7626031fb891ae0cb34bca1e50891ff4 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Tue, 20 Aug 2024 12:25:31 +1000 Subject: [PATCH 3/6] Avoid unnecessary `cloned`. --- compiler/rustc_parse/src/parser/attr_wrapper.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler/rustc_parse/src/parser/attr_wrapper.rs b/compiler/rustc_parse/src/parser/attr_wrapper.rs index cfbac4876d537..67d1e410c589d 100644 --- a/compiler/rustc_parse/src/parser/attr_wrapper.rs +++ b/compiler/rustc_parse/src/parser/attr_wrapper.rs @@ -363,7 +363,7 @@ impl<'a> Parser<'a> { [parser_replacements_start..parser_replacements_end] .iter() .cloned() - .chain(inner_attr_parser_replacements.iter().cloned()) + .chain(inner_attr_parser_replacements.into_iter()) .map(|(parser_range, data)| { (NodeRange::new(parser_range, collect_pos.start_pos), data) }) From 889ed021961adecb423c226d0aa170f505111902 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Tue, 20 Aug 2024 17:47:53 +1000 Subject: [PATCH 4/6] Return earlier in some cases in `collect_token`. This example triggers an assertion failure: ``` fn f() -> u32 { #[cfg_eval] #[cfg(not(FALSE))] 0 } ``` The sequence of events: - `configure_annotatable` calls `parse_expr_force_collect`, which calls `collect_tokens`. - Within that, we end up in `parse_expr_dot_or_call`, which again calls `collect_tokens`. - The return value of the `f` call is the expression `0`. - This inner call collects tokens for `0` (parser range 10..11) and creates a replacement covering `#[cfg(not(FALSE))] 0` (parser range 0..11). - We return to the outer `collect_tokens` call. The return value of the `f` call is *again* the expression `0`, again with the range 10..11, but the replacement from earlier covers the range 0..11. The code mistakenly assumes that any attributes from an inner `collect_tokens` call fit entirely within the body of the result of an outer `collect_tokens` call. So it adjusts the replacement parser range 0..11 to a node range by subtracting 10, resulting in -10..1. This is an invalid range and triggers an assertion failure. It's tricky to follow, but basically things get complicated when an AST node is returned from an inner `collect_tokens` call and then returned again from an outer `collect_token` node without being wrapped in any kind of additional layer. This commit changes `collect_tokens` to return early in some extra cases, avoiding the construction of lazy tokens. In the example above, the outer `collect_tokens` returns earlier because the `0` token already has tokens and `self.capture_state.capturing` is `Capturing::No`. This early return avoids the creation of the invalid range and the assertion failure. Fixes #129166. Note: these invalid ranges have been happening for a long time. #128725 looks like it's at fault only because it introduced the assertion that catches the invalid ranges. --- .../rustc_parse/src/parser/attr_wrapper.rs | 39 +++++++++++-------- tests/crashes/129166.rs | 7 ---- .../invalid-node-range-issue-129166.rs | 11 ++++++ .../invalid-node-range-issue-129166.stderr | 8 ++++ 4 files changed, 41 insertions(+), 24 deletions(-) delete mode 100644 tests/crashes/129166.rs create mode 100644 tests/ui/conditional-compilation/invalid-node-range-issue-129166.rs create mode 100644 tests/ui/conditional-compilation/invalid-node-range-issue-129166.stderr diff --git a/compiler/rustc_parse/src/parser/attr_wrapper.rs b/compiler/rustc_parse/src/parser/attr_wrapper.rs index 67d1e410c589d..81b683705f308 100644 --- a/compiler/rustc_parse/src/parser/attr_wrapper.rs +++ b/compiler/rustc_parse/src/parser/attr_wrapper.rs @@ -234,6 +234,8 @@ impl<'a> Parser<'a> { force_collect: ForceCollect, f: impl FnOnce(&mut Self, AttrVec) -> PResult<'a, (R, Trailing, UsePreAttrPos)>, ) -> PResult<'a, R> { + let possible_capture_mode = self.capture_cfg; + // We must collect if anything could observe the collected tokens, i.e. // if any of the following conditions hold. // - We are force collecting tokens (because force collection requires @@ -244,9 +246,9 @@ impl<'a> Parser<'a> { // - Our target supports custom inner attributes (custom // inner attribute invocation might require token capturing). || R::SUPPORTS_CUSTOM_INNER_ATTRS - // - We are in `capture_cfg` mode (which requires tokens if + // - We are in "possible capture mode" (which requires tokens if // the parsed node has `#[cfg]` or `#[cfg_attr]` attributes). - || self.capture_cfg; + || possible_capture_mode; if !needs_collection { return Ok(f(self, attrs.attrs)?.0); } @@ -267,7 +269,7 @@ impl<'a> Parser<'a> { res? }; - // When we're not in `capture_cfg` mode, then skip collecting and + // When we're not in "definite capture mode", then skip collecting and // return early if either of the following conditions hold. // - `None`: Our target doesn't support tokens at all (e.g. `NtIdent`). // - `Some(Some(_))`: Our target already has tokens set (e.g. we've @@ -278,7 +280,10 @@ impl<'a> Parser<'a> { // Note that this check is independent of `force_collect`. There's no // need to collect tokens when we don't support tokens or already have // tokens. - if !self.capture_cfg && matches!(ret.tokens_mut(), None | Some(Some(_))) { + let definite_capture_mode = self.capture_cfg + && matches!(self.capture_state.capturing, Capturing::Yes) + && has_cfg_or_cfg_attr(ret.attrs()); + if !definite_capture_mode && matches!(ret.tokens_mut(), None | Some(Some(_))) { return Ok(ret); } @@ -298,11 +303,11 @@ impl<'a> Parser<'a> { // the earlier `needs_tokens` check, and we don't need to // check `R::SUPPORTS_CUSTOM_INNER_ATTRS`.) || needs_tokens(ret.attrs()) - // - We are in `capture_cfg` mode and there are `#[cfg]` or - // `#[cfg_attr]` attributes. (During normal non-`capture_cfg` - // parsing, we don't need any special capturing for those - // attributes, because they're builtin.) - || (self.capture_cfg && has_cfg_or_cfg_attr(ret.attrs())); + // - We are in "definite capture mode", which requires that there + // are `#[cfg]` or `#[cfg_attr]` attributes. (During normal + // non-`capture_cfg` parsing, we don't need any special capturing + // for those attributes, because they're builtin.) + || definite_capture_mode; if !needs_collection { return Ok(ret); } @@ -399,20 +404,18 @@ impl<'a> Parser<'a> { break_last_token: self.break_last_token, node_replacements, }); + let mut tokens_used = false; // If we support tokens and don't already have them, store the newly captured tokens. if let Some(target_tokens @ None) = ret.tokens_mut() { + tokens_used = true; *target_tokens = Some(tokens.clone()); } - // If `capture_cfg` is set and we're inside a recursive call to - // `collect_tokens`, then we need to register a replace range if we - // have `#[cfg]` or `#[cfg_attr]`. This allows us to run eager - // cfg-expansion on the captured token stream. - if self.capture_cfg - && matches!(self.capture_state.capturing, Capturing::Yes) - && has_cfg_or_cfg_attr(ret.attrs()) - { + // If in "definite capture mode" we need to register a replace range + // for the `#[cfg]` and/or `#[cfg_attr]` attrs. This allows us to run + // eager cfg-expansion on the captured token stream. + if definite_capture_mode { assert!(!self.break_last_token, "Should not have unglued last token with cfg attr"); // What is the status here when parsing the example code at the top of this method? @@ -430,6 +433,7 @@ impl<'a> Parser<'a> { let start_pos = if has_outer_attrs { attrs.start_pos.unwrap() } else { collect_pos.start_pos }; let target = AttrsTarget { attrs: ret.attrs().iter().cloned().collect(), tokens }; + tokens_used = true; self.capture_state .parser_replacements .push((ParserRange(start_pos..end_pos), Some(target))); @@ -439,6 +443,7 @@ impl<'a> Parser<'a> { self.capture_state.parser_replacements.clear(); self.capture_state.inner_attr_parser_ranges.clear(); } + assert!(tokens_used); // check we didn't create `tokens` unnecessarily Ok(ret) } } diff --git a/tests/crashes/129166.rs b/tests/crashes/129166.rs deleted file mode 100644 index d3635d410db5e..0000000000000 --- a/tests/crashes/129166.rs +++ /dev/null @@ -1,7 +0,0 @@ -//@ known-bug: rust-lang/rust#129166 - -fn main() { - #[cfg_eval] - #[cfg] - 0 -} diff --git a/tests/ui/conditional-compilation/invalid-node-range-issue-129166.rs b/tests/ui/conditional-compilation/invalid-node-range-issue-129166.rs new file mode 100644 index 0000000000000..794e6fad3fc22 --- /dev/null +++ b/tests/ui/conditional-compilation/invalid-node-range-issue-129166.rs @@ -0,0 +1,11 @@ +// This was triggering an assertion failure in `NodeRange::new`. + +#![feature(cfg_eval)] +#![feature(stmt_expr_attributes)] + +fn f() -> u32 { + #[cfg_eval] #[cfg(not(FALSE))] 0 + //~^ ERROR removing an expression is not supported in this position +} + +fn main() {} diff --git a/tests/ui/conditional-compilation/invalid-node-range-issue-129166.stderr b/tests/ui/conditional-compilation/invalid-node-range-issue-129166.stderr new file mode 100644 index 0000000000000..41d43abdbd8e1 --- /dev/null +++ b/tests/ui/conditional-compilation/invalid-node-range-issue-129166.stderr @@ -0,0 +1,8 @@ +error: removing an expression is not supported in this position + --> $DIR/invalid-node-range-issue-129166.rs:5:17 + | +LL | #[cfg_eval] #[cfg(not(FALSE))] 0 + | ^^^^^^^^^^^^^^^^^^ + +error: aborting due to 1 previous error + From d7455a25932d2f7c62ca48c048cf79e7efd4fff8 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Tue, 20 Aug 2024 12:28:39 +1000 Subject: [PATCH 5/6] Avoid nested replacement ranges. In a case like this: ``` mod a { mod b { #[cfg_attr(unix, inline)] fn f() { #[cfg_attr(linux, inline)] fn g1() {} #[cfg_attr(linux, inline)] fn g2() {} } } } ``` We currently end up with the following replacement ranges. - The lazy tokens for `f` has replacement ranges for `g1` and `g2`. - The lazy tokens for `a` has replacement ranges for `f`, `g1`, and `g2`. I.e. the replacement ranges for `g1` and `g2` are duplicated. In general, replacement ranges for inner AST nodes are duplicated up the chain for each nested `collect_tokens` call. And the code that processes the replacements is careful about the ordering in which the replacements are applied, to ensure that inner replacements are applied before outer replacements. But all of this is unnecessary. If you apply an inner replacement and then an outer replacement, the outer replacement completely overwrites the inner replacement. This commit avoids the duplication by removing replacements from `self.capture_state.parser_replacements` when they are used. (The effect on the example above is that the lazy tokesn for `a` no longer include replacement ranges for `g1` and `g2`.) This eliminates the possibility of nested replacements on individual AST nodes, which avoids the need for careful ordering of replacements. --- .../rustc_parse/src/parser/attr_wrapper.rs | 28 +++++-------------- 1 file changed, 7 insertions(+), 21 deletions(-) diff --git a/compiler/rustc_parse/src/parser/attr_wrapper.rs b/compiler/rustc_parse/src/parser/attr_wrapper.rs index 81b683705f308..a74c87ca2a71e 100644 --- a/compiler/rustc_parse/src/parser/attr_wrapper.rs +++ b/compiler/rustc_parse/src/parser/attr_wrapper.rs @@ -134,9 +134,8 @@ impl ToAttrTokenStream for LazyAttrTokenStreamImpl { node_replacements.array_windows() { assert!( - node_range.0.end <= next_node_range.0.start - || node_range.0.end >= next_node_range.0.end, - "Node ranges should be disjoint or nested: ({:?}, {:?}) ({:?}, {:?})", + node_range.0.end <= next_node_range.0.start, + "Node ranges should be disjoint: ({:?}, {:?}) ({:?}, {:?})", node_range, tokens, next_node_range, @@ -144,20 +143,8 @@ impl ToAttrTokenStream for LazyAttrTokenStreamImpl { ); } - // Process the replace ranges, starting from the highest start - // position and working our way back. If have tokens like: - // - // `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }` - // - // Then we will generate replace ranges for both - // the `#[cfg(FALSE)] field: bool` and the entire - // `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }` - // - // By starting processing from the replace range with the greatest - // start position, we ensure that any (outer) replace range which - // encloses another (inner) replace range will fully overwrite the - // inner range's replacement. - for (node_range, target) in node_replacements.into_iter().rev() { + // Process the replace ranges. + for (node_range, target) in node_replacements.into_iter() { assert!( !node_range.0.is_empty(), "Cannot replace an empty node range: {:?}", @@ -364,10 +351,9 @@ impl<'a> Parser<'a> { // from `ParserRange` form to `NodeRange` form. We will perform the actual // replacement only when we convert the `LazyAttrTokenStream` to an // `AttrTokenStream`. - self.capture_state.parser_replacements - [parser_replacements_start..parser_replacements_end] - .iter() - .cloned() + self.capture_state + .parser_replacements + .drain(parser_replacements_start..parser_replacements_end) .chain(inner_attr_parser_replacements.into_iter()) .map(|(parser_range, data)| { (NodeRange::new(parser_range, collect_pos.start_pos), data) From 7d6580971e8af6eb25b5fd670cfe0308795df041 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 21 Aug 2024 14:16:42 +1000 Subject: [PATCH 6/6] Avoid double-handling of attributes in `collect_tokens`. By keeping track of attributes that have been previously processed. This has no external effect for now, but is necessary for #124141, which removes nonterminals. --- .../rustc_parse/src/parser/attr_wrapper.rs | 23 +++++++++++++++---- compiler/rustc_parse/src/parser/mod.rs | 6 +++-- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/compiler/rustc_parse/src/parser/attr_wrapper.rs b/compiler/rustc_parse/src/parser/attr_wrapper.rs index a74c87ca2a71e..b2abe5464b463 100644 --- a/compiler/rustc_parse/src/parser/attr_wrapper.rs +++ b/compiler/rustc_parse/src/parser/attr_wrapper.rs @@ -256,6 +256,20 @@ impl<'a> Parser<'a> { res? }; + // Ignore any attributes we've previously processed. This happens when + // an inner call to `collect_tokens` returns an AST node and then an + // outer call ends up with the same AST node without any additional + // wrapping layer. + let ret_attrs: AttrVec = ret + .attrs() + .iter() + .cloned() + .filter(|attr| { + let is_unseen = self.capture_state.seen_attrs.insert(attr.id); + is_unseen + }) + .collect(); + // When we're not in "definite capture mode", then skip collecting and // return early if either of the following conditions hold. // - `None`: Our target doesn't support tokens at all (e.g. `NtIdent`). @@ -269,7 +283,7 @@ impl<'a> Parser<'a> { // tokens. let definite_capture_mode = self.capture_cfg && matches!(self.capture_state.capturing, Capturing::Yes) - && has_cfg_or_cfg_attr(ret.attrs()); + && has_cfg_or_cfg_attr(&ret_attrs); if !definite_capture_mode && matches!(ret.tokens_mut(), None | Some(Some(_))) { return Ok(ret); } @@ -289,7 +303,7 @@ impl<'a> Parser<'a> { // outer and inner attributes. So this check is more precise than // the earlier `needs_tokens` check, and we don't need to // check `R::SUPPORTS_CUSTOM_INNER_ATTRS`.) - || needs_tokens(ret.attrs()) + || needs_tokens(&ret_attrs) // - We are in "definite capture mode", which requires that there // are `#[cfg]` or `#[cfg_attr]` attributes. (During normal // non-`capture_cfg` parsing, we don't need any special capturing @@ -328,7 +342,7 @@ impl<'a> Parser<'a> { // `Parser::parse_inner_attributes`, and pair them in a `ParserReplacement` with `None`, // which means the relevant tokens will be removed. (More details below.) let mut inner_attr_parser_replacements = Vec::new(); - for attr in ret.attrs() { + for attr in ret_attrs.iter() { if attr.style == ast::AttrStyle::Inner { if let Some(inner_attr_parser_range) = self.capture_state.inner_attr_parser_ranges.remove(&attr.id) @@ -418,7 +432,7 @@ impl<'a> Parser<'a> { // cfg-expand this AST node. let start_pos = if has_outer_attrs { attrs.start_pos.unwrap() } else { collect_pos.start_pos }; - let target = AttrsTarget { attrs: ret.attrs().iter().cloned().collect(), tokens }; + let target = AttrsTarget { attrs: ret_attrs, tokens }; tokens_used = true; self.capture_state .parser_replacements @@ -428,6 +442,7 @@ impl<'a> Parser<'a> { // the outermost call to this method. self.capture_state.parser_replacements.clear(); self.capture_state.inner_attr_parser_ranges.clear(); + self.capture_state.seen_attrs.clear(); } assert!(tokens_used); // check we didn't create `tokens` unnecessarily Ok(ret) diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 61e3fa2e6c560..db4ebec8d7e23 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -32,7 +32,7 @@ use rustc_ast::{ VisibilityKind, DUMMY_NODE_ID, }; use rustc_ast_pretty::pprust; -use rustc_data_structures::fx::FxHashMap; +use rustc_data_structures::fx::{FxHashMap, FxHashSet}; use rustc_data_structures::sync::Lrc; use rustc_errors::{Applicability, Diag, FatalError, MultiSpan, PResult}; use rustc_session::parse::ParseSess; @@ -183,7 +183,7 @@ pub struct Parser<'a> { // This type is used a lot, e.g. it's cloned when matching many declarative macro rules with nonterminals. Make sure // it doesn't unintentionally get bigger. #[cfg(target_pointer_width = "64")] -rustc_data_structures::static_assert_size!(Parser<'_>, 256); +rustc_data_structures::static_assert_size!(Parser<'_>, 288); /// Stores span information about a closure. #[derive(Clone, Debug)] @@ -260,6 +260,7 @@ struct CaptureState { capturing: Capturing, parser_replacements: Vec, inner_attr_parser_ranges: FxHashMap, + seen_attrs: FxHashSet, } /// Iterator over a `TokenStream` that produces `Token`s. It's a bit odd that @@ -457,6 +458,7 @@ impl<'a> Parser<'a> { capturing: Capturing::No, parser_replacements: Vec::new(), inner_attr_parser_ranges: Default::default(), + seen_attrs: Default::default(), }, current_closure: None, recovery: Recovery::Allowed,