diff --git a/.changeset/pink-dryers-begin.md b/.changeset/pink-dryers-begin.md new file mode 100644 index 0000000000..dc5416d551 --- /dev/null +++ b/.changeset/pink-dryers-begin.md @@ -0,0 +1,5 @@ +--- +"@nomicfoundation/slang": minor +--- + +Add `TokenKind::is_trivia` diff --git a/crates/codegen/grammar/src/parser_definition.rs b/crates/codegen/grammar/src/parser_definition.rs index bc4da37edc..4efcf12f88 100644 --- a/crates/codegen/grammar/src/parser_definition.rs +++ b/crates/codegen/grammar/src/parser_definition.rs @@ -131,8 +131,8 @@ impl Visitable for ParserDefinitionNode { terminator.accept_visitor(visitor); } - Self::ScannerDefinition(_) - | Self::KeywordScannerDefinition(_) + Self::ScannerDefinition(def) => def.accept_visitor(visitor), + Self::KeywordScannerDefinition(_) | Self::TriviaParserDefinition(_) | Self::ParserDefinition(_) | Self::PrecedenceParserDefinition(_) => {} diff --git a/crates/codegen/parser/generator/src/rust_generator.rs b/crates/codegen/parser/generator/src/rust_generator.rs index 6121aad6e0..29530d2347 100644 --- a/crates/codegen/parser/generator/src/rust_generator.rs +++ b/crates/codegen/parser/generator/src/rust_generator.rs @@ -33,6 +33,7 @@ pub struct RustGenerator { rule_kinds: BTreeSet<&'static str>, token_kinds: BTreeSet<&'static str>, trivia_kinds: BTreeSet<&'static str>, + trivia_scanner_names: BTreeSet<&'static str>, labels: BTreeSet, scanner_functions: BTreeMap<&'static str, String>, // (name of scanner, code) @@ -307,6 +308,26 @@ impl GrammarVisitor for RustGenerator { self.set_current_context(parser.context()); self.rule_kinds.insert(parser.name()); self.trivia_kinds.insert(parser.name()); + let trivia_scanners = { + use codegen_grammar::Visitable as _; + // TODO(#737): This will be cleaned up once we don't emit rule kinds for trivia parsers + // Visit each node and only collect the scanner definition names: + #[derive(Default)] + struct CollectTriviaScanners { + scanner_names: BTreeSet<&'static str>, + } + impl codegen_grammar::GrammarVisitor for CollectTriviaScanners { + fn scanner_definition_enter(&mut self, node: &ScannerDefinitionRef) { + self.scanner_names.insert(node.name()); + } + } + + let mut visitor = CollectTriviaScanners::default(); + parser.node().accept_visitor(&mut visitor); + visitor.scanner_names + }; + self.trivia_scanner_names.extend(trivia_scanners); + self.parser_functions.insert( parser.name(), { diff --git a/crates/codegen/parser/runtime/src/cst.rs b/crates/codegen/parser/runtime/src/cst.rs index d81a96b881..7110b2fa13 100644 --- a/crates/codegen/parser/runtime/src/cst.rs +++ b/crates/codegen/parser/runtime/src/cst.rs @@ -157,6 +157,13 @@ impl Node { pub fn as_token_with_kinds(&self, kinds: &[TokenKind]) -> Option<&Rc> { self.as_token().filter(|token| kinds.contains(&token.kind)) } + + pub fn is_trivia(&self) -> bool { + match self { + Self::Rule(rule) => rule.kind.is_trivia(), + Self::Token(token) => token.kind.is_trivia(), + } + } } impl From> for Node { diff --git a/crates/codegen/parser/runtime/src/kinds.rs b/crates/codegen/parser/runtime/src/kinds.rs index 1cce7d11b8..28027677c1 100644 --- a/crates/codegen/parser/runtime/src/kinds.rs +++ b/crates/codegen/parser/runtime/src/kinds.rs @@ -24,6 +24,12 @@ pub enum TokenKind { Token3, } +impl TokenKind { + pub fn is_trivia(&self) -> bool { + unreachable!("Expanded by the template") + } +} + #[derive( Debug, Eq, diff --git a/crates/codegen/parser/runtime/src/napi_interface/templates/ast_selectors.rs.jinja2 b/crates/codegen/parser/runtime/src/napi_interface/templates/ast_selectors.rs.jinja2 index 1216bc8cf0..90a3854d1f 100644 --- a/crates/codegen/parser/runtime/src/napi_interface/templates/ast_selectors.rs.jinja2 +++ b/crates/codegen/parser/runtime/src/napi_interface/templates/ast_selectors.rs.jinja2 @@ -277,10 +277,7 @@ impl Selector { fn try_select(&mut self, filter: impl FnOnce(&RustNode) -> bool) -> Result> { while let Some(child) = self.node.children.get(self.index) { match child { - RustLabeledNode { - label: _, - node: RustNode::Rule(rule), - } if rule.kind.is_trivia() => { + node if node.is_trivia() => { // skip trivia, since it's not part of the AST self.index += 1; continue; diff --git a/crates/codegen/parser/runtime/src/parser_support/parser_result.rs b/crates/codegen/parser/runtime/src/parser_support/parser_result.rs index e74efbea5b..5c705125a2 100644 --- a/crates/codegen/parser/runtime/src/parser_support/parser_result.rs +++ b/crates/codegen/parser/runtime/src/parser_support/parser_result.rs @@ -88,14 +88,6 @@ impl ParserResult { /// Returns a significant (non-trivia) node if there is exactly one. pub(crate) fn significant_node_mut(&mut self) -> Option<&mut cst::LabeledNode> { - fn is_significant(labeled: &cst::LabeledNode) -> bool { - match &labeled.node { - cst::Node::Rule(rule) => !rule.kind.is_trivia(), - // FIXME: Some tokens are in fact trivia - cst::Node::Token(_) => true, - } - } - let nodes = match self { ParserResult::Match(r#match) => &mut r#match.nodes[..], ParserResult::IncompleteMatch(incomplete_match) => &mut incomplete_match.nodes[..], @@ -105,9 +97,9 @@ impl ParserResult { let result = nodes.iter_mut().try_fold(None, |acc, next| match acc { // Two significant nodes, bail - Some(_) if is_significant(next) => ControlFlow::Break(None), + Some(_) if !next.is_trivia() => ControlFlow::Break(None), Some(_) => ControlFlow::Continue(acc), - None => ControlFlow::Continue(is_significant(next).then_some(next)), + None => ControlFlow::Continue((!next.is_trivia()).then_some(next)), }); match result { diff --git a/crates/codegen/parser/runtime/src/templates/kinds.rs.jinja2 b/crates/codegen/parser/runtime/src/templates/kinds.rs.jinja2 index d635221808..0e124a99c4 100644 --- a/crates/codegen/parser/runtime/src/templates/kinds.rs.jinja2 +++ b/crates/codegen/parser/runtime/src/templates/kinds.rs.jinja2 @@ -86,6 +86,18 @@ pub enum TokenKind { {%- endfor -%} } +impl TokenKind { + pub fn is_trivia(&self) -> bool { + #[allow(clippy::match_like_matches_macro)] + match self { + {%- for variant in generator.trivia_scanner_names -%} + Self::{{ variant }} => true, + {%- endfor -%} + _ => false, + } + } +} + /// The lexical context of the scanner. #[derive(strum_macros::FromRepr, Clone, Copy)] pub(crate) enum LexicalContext { diff --git a/crates/solidity/outputs/cargo/slang_solidity/src/generated/cst.rs b/crates/solidity/outputs/cargo/slang_solidity/src/generated/cst.rs index 9b4281aa40..fe454e0a31 100644 --- a/crates/solidity/outputs/cargo/slang_solidity/src/generated/cst.rs +++ b/crates/solidity/outputs/cargo/slang_solidity/src/generated/cst.rs @@ -159,6 +159,13 @@ impl Node { pub fn as_token_with_kinds(&self, kinds: &[TokenKind]) -> Option<&Rc> { self.as_token().filter(|token| kinds.contains(&token.kind)) } + + pub fn is_trivia(&self) -> bool { + match self { + Self::Rule(rule) => rule.kind.is_trivia(), + Self::Token(token) => token.kind.is_trivia(), + } + } } impl From> for Node { diff --git a/crates/solidity/outputs/cargo/slang_solidity/src/generated/kinds.rs b/crates/solidity/outputs/cargo/slang_solidity/src/generated/kinds.rs index eb44abc228..4fa045e30e 100644 --- a/crates/solidity/outputs/cargo/slang_solidity/src/generated/kinds.rs +++ b/crates/solidity/outputs/cargo/slang_solidity/src/generated/kinds.rs @@ -775,6 +775,21 @@ pub enum TokenKind { YulYearsKeyword, } +impl TokenKind { + pub fn is_trivia(&self) -> bool { + #[allow(clippy::match_like_matches_macro)] + match self { + Self::EndOfLine => true, + Self::MultiLineComment => true, + Self::MultiLineNatSpecComment => true, + Self::SingleLineComment => true, + Self::SingleLineNatSpecComment => true, + Self::Whitespace => true, + _ => false, + } + } +} + /// The lexical context of the scanner. #[derive(strum_macros::FromRepr, Clone, Copy)] pub(crate) enum LexicalContext { diff --git a/crates/solidity/outputs/cargo/slang_solidity/src/generated/napi_interface/ast_selectors.rs b/crates/solidity/outputs/cargo/slang_solidity/src/generated/napi_interface/ast_selectors.rs index 06ee87b627..256e0bd650 100644 --- a/crates/solidity/outputs/cargo/slang_solidity/src/generated/napi_interface/ast_selectors.rs +++ b/crates/solidity/outputs/cargo/slang_solidity/src/generated/napi_interface/ast_selectors.rs @@ -3143,10 +3143,7 @@ impl Selector { fn try_select(&mut self, filter: impl FnOnce(&RustNode) -> bool) -> Result> { while let Some(child) = self.node.children.get(self.index) { match child { - RustLabeledNode { - label: _, - node: RustNode::Rule(rule), - } if rule.kind.is_trivia() => { + node if node.is_trivia() => { // skip trivia, since it's not part of the AST self.index += 1; continue; diff --git a/crates/solidity/outputs/cargo/slang_solidity/src/generated/parser_support/parser_result.rs b/crates/solidity/outputs/cargo/slang_solidity/src/generated/parser_support/parser_result.rs index 138ba829a5..9a885edd44 100644 --- a/crates/solidity/outputs/cargo/slang_solidity/src/generated/parser_support/parser_result.rs +++ b/crates/solidity/outputs/cargo/slang_solidity/src/generated/parser_support/parser_result.rs @@ -90,14 +90,6 @@ impl ParserResult { /// Returns a significant (non-trivia) node if there is exactly one. pub(crate) fn significant_node_mut(&mut self) -> Option<&mut cst::LabeledNode> { - fn is_significant(labeled: &cst::LabeledNode) -> bool { - match &labeled.node { - cst::Node::Rule(rule) => !rule.kind.is_trivia(), - // FIXME: Some tokens are in fact trivia - cst::Node::Token(_) => true, - } - } - let nodes = match self { ParserResult::Match(r#match) => &mut r#match.nodes[..], ParserResult::IncompleteMatch(incomplete_match) => &mut incomplete_match.nodes[..], @@ -107,9 +99,9 @@ impl ParserResult { let result = nodes.iter_mut().try_fold(None, |acc, next| match acc { // Two significant nodes, bail - Some(_) if is_significant(next) => ControlFlow::Break(None), + Some(_) if !next.is_trivia() => ControlFlow::Break(None), Some(_) => ControlFlow::Continue(acc), - None => ControlFlow::Continue(is_significant(next).then_some(next)), + None => ControlFlow::Continue((!next.is_trivia()).then_some(next)), }); match result { diff --git a/crates/solidity/testing/utils/src/node_extensions/mod.rs b/crates/solidity/testing/utils/src/node_extensions/mod.rs index e9fe0707cc..5132a4e389 100644 --- a/crates/solidity/testing/utils/src/node_extensions/mod.rs +++ b/crates/solidity/testing/utils/src/node_extensions/mod.rs @@ -2,21 +2,12 @@ mod tests; use slang_solidity::cst::{Node, RuleNode, TokenNode}; -use slang_solidity::kinds::RuleKind; pub trait NodeExtensions { - fn is_trivia(&self) -> bool; fn extract_non_trivia(&self) -> String; } impl NodeExtensions for Node { - fn is_trivia(&self) -> bool { - match self { - Node::Token(token) => token.is_trivia(), - Node::Rule(rule) => rule.is_trivia(), - } - } - fn extract_non_trivia(&self) -> String { match self { Node::Token(token) => token.extract_non_trivia(), @@ -26,28 +17,16 @@ impl NodeExtensions for Node { } impl NodeExtensions for RuleNode { - fn is_trivia(&self) -> bool { - matches!( - self.kind, - RuleKind::LeadingTrivia | RuleKind::TrailingTrivia - ) - } - fn extract_non_trivia(&self) -> String { - return self - .children + self.children .iter() .filter(|child| !child.is_trivia()) .map(|child| child.extract_non_trivia()) - .collect(); + .collect() } } impl NodeExtensions for TokenNode { - fn is_trivia(&self) -> bool { - false - } - fn extract_non_trivia(&self) -> String { self.text.clone() } diff --git a/crates/testlang/outputs/cargo/slang_testlang/src/generated/cst.rs b/crates/testlang/outputs/cargo/slang_testlang/src/generated/cst.rs index 9b4281aa40..fe454e0a31 100644 --- a/crates/testlang/outputs/cargo/slang_testlang/src/generated/cst.rs +++ b/crates/testlang/outputs/cargo/slang_testlang/src/generated/cst.rs @@ -159,6 +159,13 @@ impl Node { pub fn as_token_with_kinds(&self, kinds: &[TokenKind]) -> Option<&Rc> { self.as_token().filter(|token| kinds.contains(&token.kind)) } + + pub fn is_trivia(&self) -> bool { + match self { + Self::Rule(rule) => rule.kind.is_trivia(), + Self::Token(token) => token.kind.is_trivia(), + } + } } impl From> for Node { diff --git a/crates/testlang/outputs/cargo/slang_testlang/src/generated/kinds.rs b/crates/testlang/outputs/cargo/slang_testlang/src/generated/kinds.rs index 9701c5288d..56c813112b 100644 --- a/crates/testlang/outputs/cargo/slang_testlang/src/generated/kinds.rs +++ b/crates/testlang/outputs/cargo/slang_testlang/src/generated/kinds.rs @@ -114,6 +114,19 @@ pub enum TokenKind { Whitespace, } +impl TokenKind { + pub fn is_trivia(&self) -> bool { + #[allow(clippy::match_like_matches_macro)] + match self { + Self::EndOfLine => true, + Self::MultiLineComment => true, + Self::SingleLineComment => true, + Self::Whitespace => true, + _ => false, + } + } +} + /// The lexical context of the scanner. #[derive(strum_macros::FromRepr, Clone, Copy)] pub(crate) enum LexicalContext { diff --git a/crates/testlang/outputs/cargo/slang_testlang/src/generated/napi_interface/ast_selectors.rs b/crates/testlang/outputs/cargo/slang_testlang/src/generated/napi_interface/ast_selectors.rs index 824e941076..c3460e9f9f 100644 --- a/crates/testlang/outputs/cargo/slang_testlang/src/generated/napi_interface/ast_selectors.rs +++ b/crates/testlang/outputs/cargo/slang_testlang/src/generated/napi_interface/ast_selectors.rs @@ -297,10 +297,7 @@ impl Selector { fn try_select(&mut self, filter: impl FnOnce(&RustNode) -> bool) -> Result> { while let Some(child) = self.node.children.get(self.index) { match child { - RustLabeledNode { - label: _, - node: RustNode::Rule(rule), - } if rule.kind.is_trivia() => { + node if node.is_trivia() => { // skip trivia, since it's not part of the AST self.index += 1; continue; diff --git a/crates/testlang/outputs/cargo/slang_testlang/src/generated/parser_support/parser_result.rs b/crates/testlang/outputs/cargo/slang_testlang/src/generated/parser_support/parser_result.rs index 138ba829a5..9a885edd44 100644 --- a/crates/testlang/outputs/cargo/slang_testlang/src/generated/parser_support/parser_result.rs +++ b/crates/testlang/outputs/cargo/slang_testlang/src/generated/parser_support/parser_result.rs @@ -90,14 +90,6 @@ impl ParserResult { /// Returns a significant (non-trivia) node if there is exactly one. pub(crate) fn significant_node_mut(&mut self) -> Option<&mut cst::LabeledNode> { - fn is_significant(labeled: &cst::LabeledNode) -> bool { - match &labeled.node { - cst::Node::Rule(rule) => !rule.kind.is_trivia(), - // FIXME: Some tokens are in fact trivia - cst::Node::Token(_) => true, - } - } - let nodes = match self { ParserResult::Match(r#match) => &mut r#match.nodes[..], ParserResult::IncompleteMatch(incomplete_match) => &mut incomplete_match.nodes[..], @@ -107,9 +99,9 @@ impl ParserResult { let result = nodes.iter_mut().try_fold(None, |acc, next| match acc { // Two significant nodes, bail - Some(_) if is_significant(next) => ControlFlow::Break(None), + Some(_) if !next.is_trivia() => ControlFlow::Break(None), Some(_) => ControlFlow::Continue(acc), - None => ControlFlow::Continue(is_significant(next).then_some(next)), + None => ControlFlow::Continue((!next.is_trivia()).then_some(next)), }); match result {