Skip to content

Commit

Permalink
Emit trivia as sibling tokens and don't generate a RuleKind for it (#…
Browse files Browse the repository at this point in the history
…885)

Closes #737 



Functionally it's done but needs a rebase and a final polish. For now
I'm using the existing PG infra until we clean it up (#638, slated for
this sprint as well).
  • Loading branch information
Xanewok authored Mar 7, 2024
1 parent 0125717 commit a9bd8da
Show file tree
Hide file tree
Showing 483 changed files with 3,295 additions and 3,370 deletions.
5 changes: 5 additions & 0 deletions .changeset/soft-ties-sort.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@nomicfoundation/slang": minor
---

Flatten the trivia syntax nodes into sibling tokens
43 changes: 34 additions & 9 deletions crates/codegen/grammar/src/constructor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,12 @@ impl GrammarConstructorDslV2 for Grammar {

let leading_trivia = Rc::new(NamedTriviaParser {
name: "LeadingTrivia",
def: resolve_trivia(lang.leading_trivia.clone(), &mut ctx),
def: resolve_trivia(lang.leading_trivia.clone(), TriviaKind::Leading, &mut ctx),
}) as Rc<dyn TriviaParserDefinition>;

let trailing_trivia = Rc::new(NamedTriviaParser {
name: "TrailingTrivia",
def: resolve_trivia(lang.trailing_trivia.clone(), &mut ctx),
def: resolve_trivia(lang.trailing_trivia.clone(), TriviaKind::Trailing, &mut ctx),
}) as Rc<dyn TriviaParserDefinition>;

for (_lex_ctx, item) in items.values() {
Expand Down Expand Up @@ -507,35 +507,43 @@ fn resolve_keyword_value(value: model::KeywordValue) -> KeywordScannerDefinition
}
}

fn resolve_trivia(parser: model::TriviaParser, ctx: &mut ResolveCtx<'_>) -> ParserDefinitionNode {
fn resolve_trivia(
parser: model::TriviaParser,
kind: TriviaKind,
ctx: &mut ResolveCtx<'_>,
) -> ParserDefinitionNode {
match parser {
model::TriviaParser::Optional { parser } => {
ParserDefinitionNode::Optional(Box::new(resolve_trivia(*parser, ctx)))
ParserDefinitionNode::Optional(Box::new(resolve_trivia(*parser, kind, ctx)))
}
model::TriviaParser::OneOrMore { parser } => ParserDefinitionNode::OneOrMore(
Labeled::anonymous(Box::new(resolve_trivia(*parser, ctx))),
Labeled::anonymous(Box::new(resolve_trivia(*parser, kind, ctx))),
),
model::TriviaParser::ZeroOrMore { parser } => ParserDefinitionNode::ZeroOrMore(
Labeled::anonymous(Box::new(resolve_trivia(*parser, ctx))),
Labeled::anonymous(Box::new(resolve_trivia(*parser, kind, ctx))),
),
model::TriviaParser::Sequence { parsers } => ParserDefinitionNode::Sequence(
parsers
.into_iter()
.map(|scanner| Labeled::anonymous(resolve_trivia(scanner, ctx)))
.map(|scanner| Labeled::anonymous(resolve_trivia(scanner, kind, ctx)))
.collect(),
),
model::TriviaParser::Choice { parsers } => {
ParserDefinitionNode::Choice(Labeled::anonymous(
parsers
.into_iter()
.map(|scanner| resolve_trivia(scanner, ctx))
.map(|scanner| resolve_trivia(scanner, kind, ctx))
.collect(),
))
}
model::TriviaParser::Trivia { reference } => {
match resolve_grammar_element(&reference, ctx) {
GrammarElement::ScannerDefinition(parser) => {
ParserDefinitionNode::ScannerDefinition(parser)
// Hack: This is a sequence of a single scanner in order to emit the names
ParserDefinitionNode::Sequence(vec![Labeled::with_builtin_label(
kind.label(),
ParserDefinitionNode::ScannerDefinition(parser),
)])
}
_ => panic!("Expected {reference} to be a ScannerDefinition"),
}
Expand Down Expand Up @@ -793,6 +801,21 @@ fn resolve_precedence(
}
}

#[derive(Clone, Copy)]
enum TriviaKind {
Leading,
Trailing,
}

impl TriviaKind {
fn label(self) -> BuiltInLabel {
match self {
TriviaKind::Leading => BuiltInLabel::LeadingTrivia,
TriviaKind::Trailing => BuiltInLabel::TrailingTrivia,
}
}
}

trait IntoParserDefNode {
fn into_parser_def_node(self) -> ParserDefinitionNode;
}
Expand Down Expand Up @@ -861,6 +884,8 @@ enum BuiltInLabel {
Operand,
LeftOperand,
RightOperand,
LeadingTrivia,
TrailingTrivia,
}

impl<T> LabeledExt<T> for Labeled<T> {
Expand Down
20 changes: 6 additions & 14 deletions crates/codegen/parser/generator/src/rust_generator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ pub struct RustGenerator {

rule_kinds: BTreeSet<&'static str>,
token_kinds: BTreeSet<&'static str>,
trivia_kinds: BTreeSet<&'static str>,
trivia_scanner_names: BTreeSet<&'static str>,
labels: BTreeSet<String>,

Expand All @@ -41,6 +40,7 @@ pub struct RustGenerator {
keyword_compound_scanners: BTreeMap<&'static str, String>, // (name of the KW scanner, code)

parser_functions: BTreeMap<&'static str, String>, // (name of parser, code)
trivia_parser_functions: BTreeMap<&'static str, String>, // (name of parser, code)

#[serde(skip)]
top_level_scanner_names: BTreeSet<&'static str>,
Expand Down Expand Up @@ -280,6 +280,8 @@ impl GrammarVisitor for RustGenerator {
self.labels.remove("operand");
self.labels.remove("left_operand");
self.labels.remove("right_operand");
self.labels.remove("leading_trivia");
self.labels.remove("trailing_trivia");

// Just being anal about tidying up :)
self.all_scanners.clear();
Expand All @@ -306,12 +308,9 @@ impl GrammarVisitor for RustGenerator {

fn trivia_parser_definition_enter(&mut self, parser: &TriviaParserDefinitionRef) {
self.set_current_context(parser.context());
self.rule_kinds.insert(parser.name());
self.trivia_kinds.insert(parser.name());
let trivia_scanners = {
use codegen_grammar::Visitable as _;
// TODO(#737): This will be cleaned up once we don't emit rule kinds for trivia parsers
// Visit each node and only collect the scanner definition names:

#[derive(Default)]
struct CollectTriviaScanners {
scanner_names: BTreeSet<&'static str>,
Expand All @@ -328,15 +327,8 @@ impl GrammarVisitor for RustGenerator {
};
self.trivia_scanner_names.extend(trivia_scanners);

self.parser_functions.insert(
parser.name(),
{
let code = parser.to_parser_code();
let rule_kind = format_ident!("{}", parser.name());
quote! { #code.with_kind(RuleKind::#rule_kind) }
}
.to_string(),
);
self.trivia_parser_functions
.insert(parser.name(), parser.to_parser_code().to_string());
}

fn parser_definition_enter(&mut self, parser: &ParserDefinitionRef) {
Expand Down
2 changes: 1 addition & 1 deletion crates/codegen/parser/runtime/src/cst.rs
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ impl Node {

pub fn is_trivia(&self) -> bool {
match self {
Self::Rule(rule) => rule.kind.is_trivia(),
Self::Rule(_) => false,
Self::Token(token) => token.kind.is_trivia(),
}
}
Expand Down
8 changes: 2 additions & 6 deletions crates/codegen/parser/runtime/src/kinds.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,6 @@ pub enum RuleKind {
Rule3,
}

impl RuleKind {
pub fn is_trivia(&self) -> bool {
unreachable!("Expanded by the template")
}
}

#[derive(
Debug,
Eq,
Expand All @@ -82,6 +76,8 @@ pub enum NodeLabel {
Operand,
LeftOperand,
RightOperand,
LeadingTrivia,
TrailingTrivia,
// Used for testing this crate, this is generated in the client code
Label1,
Label2,
Expand Down
41 changes: 19 additions & 22 deletions crates/codegen/parser/runtime/src/parser_support/parser_function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,41 +13,38 @@ pub trait ParserFunction<L>
where
Self: Fn(&L, &mut ParserContext<'_>) -> ParserResult,
{
fn parse(&self, language: &L, input: &str, collect_trivia: bool) -> ParseOutput;
fn parse(&self, language: &L, input: &str) -> ParseOutput;
}

impl<L, F> ParserFunction<L> for F
where
L: Lexer,
F: Fn(&L, &mut ParserContext<'_>) -> ParserResult,
{
fn parse(&self, language: &L, input: &str, collect_trivia: bool) -> ParseOutput {
fn parse(&self, language: &L, input: &str) -> ParseOutput {
let mut stream = ParserContext::new(input);
let mut result = self(language, &mut stream);

// For a succesful/recovered parse, collect any remaining trivia as part of the parse result
// TODO(#737): Remove this once we unconditionally collect trivia
if collect_trivia {
if let ParserResult::Match(r#match) = &mut result {
let [topmost] = r#match.nodes.as_mut_slice() else {
unreachable!(
"Match at the top level of a parse does not have exactly one Rule node"
)
};

let eof_trivia = match Lexer::leading_trivia(language, &mut stream) {
ParserResult::Match(eof_trivia) if !eof_trivia.nodes.is_empty() => {
Some(eof_trivia.nodes)
}
_ => None,
};
if let ParserResult::Match(r#match) = &mut result {
let [topmost] = r#match.nodes.as_mut_slice() else {
unreachable!(
"Match at the top level of a parse does not have exactly one Rule node"
)
};

let eof_trivia = match Lexer::leading_trivia(language, &mut stream) {
ParserResult::Match(eof_trivia) if !eof_trivia.nodes.is_empty() => {
Some(eof_trivia.nodes)
}
_ => None,
};

if let (cst::Node::Rule(rule), Some(eof_trivia)) = (&mut topmost.node, eof_trivia) {
let mut new_children = rule.children.clone();
new_children.extend(eof_trivia);
if let (cst::Node::Rule(rule), Some(eof_trivia)) = (&mut topmost.node, eof_trivia) {
let mut new_children = rule.children.clone();
new_children.extend(eof_trivia);

topmost.node = cst::Node::rule(rule.kind, new_children);
}
topmost.node = cst::Node::rule(rule.kind, new_children);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,14 @@ impl ParserResult {
{
*prev_label = Some(label);
}
// Also allow to name a single trivia token node
else if let ParserResult::Match(Match { nodes, .. }) = &mut self {
if let [node] = nodes.as_mut_slice() {
if node.as_token().is_some_and(|tok| tok.kind.is_trivia()) {
node.label = Some(label);
}
}
}

self
}
Expand Down
32 changes: 21 additions & 11 deletions crates/codegen/parser/runtime/src/parser_support/sequence_helper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -123,17 +123,27 @@ impl SequenceHelper {
return;
}

let tokens: Vec<_> =
next.nodes.iter().filter_map(|node| node.as_token()).collect();
let mut rules = next.nodes.iter().filter_map(|node| node.as_rule());

let is_single_token_with_trivia =
tokens.len() == 1 && rules.all(|rule| rule.kind.is_trivia());
let next_token = tokens.first().map(|token| token.kind);

// NOTE: We only support skipping to a single token (optionally with trivia)
debug_assert!(is_single_token_with_trivia);
debug_assert_eq!(next_token, Some(running.found));
// We only support skipping to a single, significant token.
// Sanity check that we are recovering to the expected one.
let next_token = next.nodes.iter().try_fold(None, |acc, node| {
match &**node {
cst::Node::Token(token) if token.kind.is_trivia() => Ok(acc),
cst::Node::Token(token) => {
match acc {
None => Ok(Some(token.kind)),
Some(..) => {
debug_assert!(false, "Recovery skipped to multiple tokens: {acc:?}, {token:?}");
Err(())
}
}
}
cst::Node::Rule(rule) => {
debug_assert!(false, "Recovery skipped to a rule: {rule:?}");
Err(())
}
}
});
debug_assert_eq!(next_token, Ok(Some(running.found)));

running.nodes.push(LabeledNode::anonymous(cst::Node::token(
TokenKind::SKIPPED,
Expand Down
14 changes: 2 additions & 12 deletions crates/codegen/parser/runtime/src/templates/kinds.rs.jinja2
Original file line number Diff line number Diff line change
Expand Up @@ -22,18 +22,6 @@ pub enum RuleKind {
{%- endfor -%}
}

impl RuleKind {
pub fn is_trivia(&self) -> bool {
#[allow(clippy::match_like_matches_macro)]
match self {
{%- for variant in generator.trivia_kinds -%}
Self::{{ variant }} => true,
{%- endfor -%}
_ => false,
}
}
}

#[derive(
Debug,
Eq,
Expand All @@ -58,6 +46,8 @@ pub enum NodeLabel {
Operand,
LeftOperand,
RightOperand,
LeadingTrivia,
TrailingTrivia,
// Generated
{% for variant in generator.labels -%}
{{ variant | pascal_case }},
Expand Down
12 changes: 6 additions & 6 deletions crates/codegen/parser/runtime/src/templates/language.rs.jinja2
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,11 @@ impl Language {
fn {{ parser_name | snake_case }}(&self, input: &mut ParserContext<'_>) -> ParserResult { {{ parser_code }} }
{% endfor %}

{% for parser_name, parser_code in generator.trivia_parser_functions %}
#[allow(unused_assignments, unused_parens)]
fn {{ parser_name | snake_case }}(&self, input: &mut ParserContext<'_>) -> ParserResult { {{ parser_code }} }
{% endfor %}

/********************************************
* Scanner Functions
********************************************/
Expand All @@ -102,12 +107,7 @@ impl Language {
pub fn parse(&self, kind: RuleKind, input: &str) -> ParseOutput {
match kind {
{%- for parser_name, _ in generator.parser_functions -%}
{# TODO(#737): Remove the special case once we stop generating RuleKind for trivia #}
{%- if parser_name is ending_with("Trivia") -%}
RuleKind::{{ parser_name }} => Self::{{ parser_name | snake_case }}.parse(self, input, false),
{%- else -%}
RuleKind::{{ parser_name }} => Self::{{ parser_name | snake_case }}.parse(self, input, true),
{%- endif -%}
RuleKind::{{ parser_name }} => Self::{{ parser_name | snake_case }}.parse(self, input),
{%- endfor -%}
}
}
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit a9bd8da

Please sign in to comment.