From f94454a586cda0f33e35273e0b519c36fa0bdc26 Mon Sep 17 00:00:00 2001 From: Mason Protter Date: Mon, 3 Jun 2024 23:35:52 +0200 Subject: [PATCH 1/3] allow "(@foo x\n y)" --- Project.toml | 2 +- src/parser.jl | 131 ++++++++++++++++++++++++++------------------------ 2 files changed, 69 insertions(+), 64 deletions(-) diff --git a/Project.toml b/Project.toml index 6ffbaa40..89de4864 100644 --- a/Project.toml +++ b/Project.toml @@ -4,7 +4,7 @@ authors = ["Claire Foster and contributors"] version = "0.4.6" [compat] -julia = "1.0" +julia = "1.11" [deps] diff --git a/src/parser.jl b/src/parser.jl index 34666b59..7308eb72 100644 --- a/src/parser.jl +++ b/src/parser.jl @@ -41,6 +41,9 @@ function ParseState(ps::ParseState; range_colon_enabled=nothing, where_enabled === nothing ? ps.where_enabled : where_enabled) end +using Base.ScopedValues +const in_parens = ScopedValue(false) + # Functions to change parse state function normal_context(ps::ParseState) @@ -56,7 +59,7 @@ end function with_space_sensitive(ps::ParseState) ParseState(ps, space_sensitive=true, - whitespace_newline=false) + whitespace_newline=in_parens[]) end # Convenient wrappers for ParseStream @@ -3007,70 +3010,72 @@ function parse_paren(ps::ParseState, check_identifiers=true) space_sensitive=false, where_enabled=true, whitespace_newline=true) - mark = position(ps) - @check peek(ps) == K"(" - bump(ps, TRIVIA_FLAG) # K"(" - after_paren_mark = position(ps) - k = peek(ps) - if k == K")" - # () ==> (tuple-p) - bump(ps, TRIVIA_FLAG) - emit(ps, mark, K"tuple", PARENS_FLAG) - elseif is_syntactic_operator(k) - # allow :(=) etc in unchecked contexts, eg quotes - # :(=) ==> (quote-: (parens =)) - parse_atom(ps, check_identifiers) - bump_closing_token(ps, K")") - emit(ps, mark, K"parens") - elseif !check_identifiers && k == K"::" && - peek(ps, 2, skip_newlines=true) == K")" - # allow :(::) as a special case - # :(::) ==> (quote-: (parens ::)) - bump(ps) - bump(ps, TRIVIA_FLAG, skip_newlines=true) - emit(ps, mark, K"parens") - else - # Deal with all other cases of tuple or block syntax via the generic - # parse_brackets - initial_semi = peek(ps) == K";" - opts = parse_brackets(ps, K")") do had_commas, had_splat, num_semis, num_subexprs - is_tuple = had_commas || (had_splat && num_semis >= 1) || - (initial_semi && (num_semis == 1 || num_subexprs > 0)) - return (needs_parameters=is_tuple, - is_tuple=is_tuple, - is_block=num_semis > 0) - end - if opts.is_tuple - # Tuple syntax with commas - # (x,) ==> (tuple-p x) - # (x,y) ==> (tuple-p x y) - # (x=1, y=2) ==> (tuple-p (= x 1) (= y 2)) - # - # Named tuple with initial semicolon - # (;) ==> (tuple-p (parameters)) - # (; a=1) ==> (tuple-p (parameters (= a 1))) - # - # Extra credit: nested parameters and frankentuples - # (x...;) ==> (tuple-p (... x) (parameters)) - # (x...; y) ==> (tuple-p (... x) (parameters y)) - # (; a=1; b=2) ==> (tuple-p (parameters (= a 1)) (parameters (= b 2))) - # (a; b; c,d) ==> (tuple-p a (parameters b) (parameters c d)) - # (a=1, b=2; c=3) ==> (tuple-p (= a 1) (= b 2) (parameters (= c 3))) + with(in_parens => true) do + mark = position(ps) + @check peek(ps) == K"(" + bump(ps, TRIVIA_FLAG) # K"(" + after_paren_mark = position(ps) + k = peek(ps) + if k == K")" + # () ==> (tuple-p) + bump(ps, TRIVIA_FLAG) emit(ps, mark, K"tuple", PARENS_FLAG) - elseif opts.is_block - # Blocks - # (;;) ==> (block-p) - # (a=1;) ==> (block-p (= a 1)) - # (a;b;;c) ==> (block-p a b c) - # (a=1; b=2) ==> (block-p (= a 1) (= b 2)) - emit(ps, mark, K"block", PARENS_FLAG) - else - # Parentheses used for grouping - # (a * b) ==> (parens (call-i * a b)) - # (a=1) ==> (parens (= a 1)) - # (x) ==> (parens x) - # (a...) ==> (parens (... a)) + elseif is_syntactic_operator(k) + # allow :(=) etc in unchecked contexts, eg quotes + # :(=) ==> (quote-: (parens =)) + parse_atom(ps, check_identifiers) + bump_closing_token(ps, K")") emit(ps, mark, K"parens") + elseif !check_identifiers && k == K"::" && + peek(ps, 2, skip_newlines=true) == K")" + # allow :(::) as a special case + # :(::) ==> (quote-: (parens ::)) + bump(ps) + bump(ps, TRIVIA_FLAG, skip_newlines=true) + emit(ps, mark, K"parens") + else + # Deal with all other cases of tuple or block syntax via the generic + # parse_brackets + initial_semi = peek(ps) == K";" + opts = parse_brackets(ps, K")") do had_commas, had_splat, num_semis, num_subexprs + is_tuple = had_commas || (had_splat && num_semis >= 1) || + (initial_semi && (num_semis == 1 || num_subexprs > 0)) + return (needs_parameters=is_tuple, + is_tuple=is_tuple, + is_block=num_semis > 0) + end + if opts.is_tuple + # Tuple syntax with commas + # (x,) ==> (tuple-p x) + # (x,y) ==> (tuple-p x y) + # (x=1, y=2) ==> (tuple-p (= x 1) (= y 2)) + # + # Named tuple with initial semicolon + # (;) ==> (tuple-p (parameters)) + # (; a=1) ==> (tuple-p (parameters (= a 1))) + # + # Extra credit: nested parameters and frankentuples + # (x...;) ==> (tuple-p (... x) (parameters)) + # (x...; y) ==> (tuple-p (... x) (parameters y)) + # (; a=1; b=2) ==> (tuple-p (parameters (= a 1)) (parameters (= b 2))) + # (a; b; c,d) ==> (tuple-p a (parameters b) (parameters c d)) + # (a=1, b=2; c=3) ==> (tuple-p (= a 1) (= b 2) (parameters (= c 3))) + emit(ps, mark, K"tuple", PARENS_FLAG) + elseif opts.is_block + # Blocks + # (;;) ==> (block-p) + # (a=1;) ==> (block-p (= a 1)) + # (a;b;;c) ==> (block-p a b c) + # (a=1; b=2) ==> (block-p (= a 1) (= b 2)) + emit(ps, mark, K"block", PARENS_FLAG) + else + # Parentheses used for grouping + # (a * b) ==> (parens (call-i * a b)) + # (a=1) ==> (parens (= a 1)) + # (x) ==> (parens x) + # (a...) ==> (parens (... a)) + emit(ps, mark, K"parens") + end end end end From 2090e860547c94fcf3f1a141a0291a834925f16c Mon Sep 17 00:00:00 2001 From: Mason Protter Date: Fri, 26 Jul 2024 11:42:01 +0200 Subject: [PATCH 2/3] fixup implementation --- src/parse_stream.jl | 4 +- src/parser.jl | 141 ++++++++++++++++++++++---------------------- 2 files changed, 74 insertions(+), 71 deletions(-) diff --git a/src/parse_stream.jl b/src/parse_stream.jl index dcbb52af..c91d78da 100644 --- a/src/parse_stream.jl +++ b/src/parse_stream.jl @@ -262,7 +262,7 @@ mutable struct ParseStream # May be different from VERSION! version::Tuple{Int,Int} - function ParseStream(text_buf::Vector{UInt8}, text_root, next_byte::Integer, + function ParseStream(text_buf::Union{Vector{UInt8}, Memory{UInt8}}, text_root, next_byte::Integer, version::VersionNumber) io = IOBuffer(text_buf) seek(io, next_byte-1) @@ -404,7 +404,7 @@ function _buffer_lookahead_tokens(lexer, lookahead) end end end - + # Return the index of the next byte of the input function _next_byte(stream) last(stream.tokens).next_byte diff --git a/src/parser.jl b/src/parser.jl index 7308eb72..da0c838d 100644 --- a/src/parser.jl +++ b/src/parser.jl @@ -21,24 +21,27 @@ struct ParseState whitespace_newline::Bool # Enable parsing `where` with high precedence where_enabled::Bool + in_parens::Bool end # Normal context function ParseState(stream::ParseStream) - ParseState(stream, true, false, false, false, false, true) + ParseState(stream, true, false, false, false, false, true, false) end function ParseState(ps::ParseState; range_colon_enabled=nothing, space_sensitive=nothing, for_generator=nothing, end_symbol=nothing, whitespace_newline=nothing, - where_enabled=nothing) + where_enabled=nothing, + in_parens=nothing) ParseState(ps.stream, range_colon_enabled === nothing ? ps.range_colon_enabled : range_colon_enabled, space_sensitive === nothing ? ps.space_sensitive : space_sensitive, for_generator === nothing ? ps.for_generator : for_generator, end_symbol === nothing ? ps.end_symbol : end_symbol, whitespace_newline === nothing ? ps.whitespace_newline : whitespace_newline, - where_enabled === nothing ? ps.where_enabled : where_enabled) + where_enabled === nothing ? ps.where_enabled : where_enabled, + in_parens === nothing ? ps.in_parens : in_parens) end using Base.ScopedValues @@ -59,7 +62,7 @@ end function with_space_sensitive(ps::ParseState) ParseState(ps, space_sensitive=true, - whitespace_newline=in_parens[]) + whitespace_newline=ps.in_parens) end # Convenient wrappers for ParseStream @@ -3009,73 +3012,73 @@ function parse_paren(ps::ParseState, check_identifiers=true) ps = ParseState(ps, range_colon_enabled=true, space_sensitive=false, where_enabled=true, - whitespace_newline=true) - with(in_parens => true) do - mark = position(ps) - @check peek(ps) == K"(" - bump(ps, TRIVIA_FLAG) # K"(" - after_paren_mark = position(ps) - k = peek(ps) - if k == K")" - # () ==> (tuple-p) - bump(ps, TRIVIA_FLAG) + whitespace_newline=true, + in_parens=peek(ps, 2) == K"@") + @show peek(ps, 2) + mark = position(ps) + @check peek(ps) == K"(" + bump(ps, TRIVIA_FLAG) # K"(" + after_paren_mark = position(ps) + k = peek(ps) + if k == K")" + # () ==> (tuple-p) + bump(ps, TRIVIA_FLAG) + emit(ps, mark, K"tuple", PARENS_FLAG) + elseif is_syntactic_operator(k) + # allow :(=) etc in unchecked contexts, eg quotes + # :(=) ==> (quote-: (parens =)) + parse_atom(ps, check_identifiers) + bump_closing_token(ps, K")") + emit(ps, mark, K"parens") + elseif !check_identifiers && k == K"::" && + peek(ps, 2, skip_newlines=true) == K")" + # allow :(::) as a special case + # :(::) ==> (quote-: (parens ::)) + bump(ps) + bump(ps, TRIVIA_FLAG, skip_newlines=true) + emit(ps, mark, K"parens") + else + # Deal with all other cases of tuple or block syntax via the generic + # parse_brackets + initial_semi = peek(ps) == K";" + opts = parse_brackets(ps, K")") do had_commas, had_splat, num_semis, num_subexprs + is_tuple = had_commas || (had_splat && num_semis >= 1) || + (initial_semi && (num_semis == 1 || num_subexprs > 0)) + return (needs_parameters=is_tuple, + is_tuple=is_tuple, + is_block=num_semis > 0) + end + if opts.is_tuple + # Tuple syntax with commas + # (x,) ==> (tuple-p x) + # (x,y) ==> (tuple-p x y) + # (x=1, y=2) ==> (tuple-p (= x 1) (= y 2)) + # + # Named tuple with initial semicolon + # (;) ==> (tuple-p (parameters)) + # (; a=1) ==> (tuple-p (parameters (= a 1))) + # + # Extra credit: nested parameters and frankentuples + # (x...;) ==> (tuple-p (... x) (parameters)) + # (x...; y) ==> (tuple-p (... x) (parameters y)) + # (; a=1; b=2) ==> (tuple-p (parameters (= a 1)) (parameters (= b 2))) + # (a; b; c,d) ==> (tuple-p a (parameters b) (parameters c d)) + # (a=1, b=2; c=3) ==> (tuple-p (= a 1) (= b 2) (parameters (= c 3))) emit(ps, mark, K"tuple", PARENS_FLAG) - elseif is_syntactic_operator(k) - # allow :(=) etc in unchecked contexts, eg quotes - # :(=) ==> (quote-: (parens =)) - parse_atom(ps, check_identifiers) - bump_closing_token(ps, K")") - emit(ps, mark, K"parens") - elseif !check_identifiers && k == K"::" && - peek(ps, 2, skip_newlines=true) == K")" - # allow :(::) as a special case - # :(::) ==> (quote-: (parens ::)) - bump(ps) - bump(ps, TRIVIA_FLAG, skip_newlines=true) - emit(ps, mark, K"parens") + elseif opts.is_block + # Blocks + # (;;) ==> (block-p) + # (a=1;) ==> (block-p (= a 1)) + # (a;b;;c) ==> (block-p a b c) + # (a=1; b=2) ==> (block-p (= a 1) (= b 2)) + emit(ps, mark, K"block", PARENS_FLAG) else - # Deal with all other cases of tuple or block syntax via the generic - # parse_brackets - initial_semi = peek(ps) == K";" - opts = parse_brackets(ps, K")") do had_commas, had_splat, num_semis, num_subexprs - is_tuple = had_commas || (had_splat && num_semis >= 1) || - (initial_semi && (num_semis == 1 || num_subexprs > 0)) - return (needs_parameters=is_tuple, - is_tuple=is_tuple, - is_block=num_semis > 0) - end - if opts.is_tuple - # Tuple syntax with commas - # (x,) ==> (tuple-p x) - # (x,y) ==> (tuple-p x y) - # (x=1, y=2) ==> (tuple-p (= x 1) (= y 2)) - # - # Named tuple with initial semicolon - # (;) ==> (tuple-p (parameters)) - # (; a=1) ==> (tuple-p (parameters (= a 1))) - # - # Extra credit: nested parameters and frankentuples - # (x...;) ==> (tuple-p (... x) (parameters)) - # (x...; y) ==> (tuple-p (... x) (parameters y)) - # (; a=1; b=2) ==> (tuple-p (parameters (= a 1)) (parameters (= b 2))) - # (a; b; c,d) ==> (tuple-p a (parameters b) (parameters c d)) - # (a=1, b=2; c=3) ==> (tuple-p (= a 1) (= b 2) (parameters (= c 3))) - emit(ps, mark, K"tuple", PARENS_FLAG) - elseif opts.is_block - # Blocks - # (;;) ==> (block-p) - # (a=1;) ==> (block-p (= a 1)) - # (a;b;;c) ==> (block-p a b c) - # (a=1; b=2) ==> (block-p (= a 1) (= b 2)) - emit(ps, mark, K"block", PARENS_FLAG) - else - # Parentheses used for grouping - # (a * b) ==> (parens (call-i * a b)) - # (a=1) ==> (parens (= a 1)) - # (x) ==> (parens x) - # (a...) ==> (parens (... a)) - emit(ps, mark, K"parens") - end + # Parentheses used for grouping + # (a * b) ==> (parens (call-i * a b)) + # (a=1) ==> (parens (= a 1)) + # (x) ==> (parens x) + # (a...) ==> (parens (... a)) + emit(ps, mark, K"parens") end end end From 4b1f7662ee82df020220d3e1aea1fd9232f458c5 Mon Sep 17 00:00:00 2001 From: Mason Protter Date: Fri, 26 Jul 2024 11:43:11 +0200 Subject: [PATCH 3/3] don't use new features --- Project.toml | 2 +- src/parser.jl | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/Project.toml b/Project.toml index 89de4864..6ffbaa40 100644 --- a/Project.toml +++ b/Project.toml @@ -4,7 +4,7 @@ authors = ["Claire Foster and contributors"] version = "0.4.6" [compat] -julia = "1.11" +julia = "1.0" [deps] diff --git a/src/parser.jl b/src/parser.jl index da0c838d..020ab241 100644 --- a/src/parser.jl +++ b/src/parser.jl @@ -44,9 +44,6 @@ function ParseState(ps::ParseState; range_colon_enabled=nothing, in_parens === nothing ? ps.in_parens : in_parens) end -using Base.ScopedValues -const in_parens = ScopedValue(false) - # Functions to change parse state function normal_context(ps::ParseState)