From 296cd5ee4cded19cec84d8328da0b5ee1ce98f05 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 9 Jul 2023 07:17:45 +1000 Subject: [PATCH] AST: parse `a.b` as `(. a b)` (#325) I was working a bit on macro expansion - particularly `quote` (quasiquote) expansion with `$` interpolations - and I've found that it's weird and inconvenient that we parse `a.b` into `(. a (quote b))`. Specifically, the part that's weird here is that we emit `(quote b)` for the field name even though this is "not quote syntax": this should not yield a syntax literal during lowering, and is thus a semantic mismatch with actual quote syntax of the form `:(a + b)` or `quote a+b end`. * Why is this a problem? It means we need special rules to distinguish actual syntax literals from field names. * But can we really change this? Surely this AST form had a purpose? Yes! A long time ago Julia supported `a.(b)` syntax to mean `getfield(a, b)`, which would naturally have been parsed as `(. a b)`. However this was deprecated as part of adding broadcast syntax in https://github.com/JuliaLang/julia/pull/15032 Here we simplify by parsing `a.b` as `(. a b)` instead, with the second argument implied to be a field name. --- docs/src/reference.md | 1 + src/expr.jl | 15 +++++++--- src/parser.jl | 66 +++++++++++++++++++------------------------ test/expr.jl | 13 +++++++-- test/parser.jl | 60 +++++++++++++++++++-------------------- 5 files changed, 81 insertions(+), 74 deletions(-) diff --git a/docs/src/reference.md b/docs/src/reference.md index 22322b55..67ced3f1 100644 --- a/docs/src/reference.md +++ b/docs/src/reference.md @@ -64,6 +64,7 @@ class of tokenization errors and lets the parser deal with them. ### Improvements for AST inconsistencies +* Field access syntax like `a.b` is parsed as `(. a b)` rather than `(. a (quote b))` to avoid the inconsistency between this and actual quoted syntax literals like `:(b)` and `quote b end` ([#342](https://github.com/JuliaLang/JuliaSyntax.jl/issues/324)) * Dotted call syntax like `f.(a,b)` and `a .+ b` has been made consistent with the `K"dotcall"` head (#90) * Standalone dotted operators are always parsed as `(. op)`. For example `.*(x,y)` is parsed as `(call (. *) x y)` (#240) * The `K"="` kind is used for keyword syntax rather than `kw`, to avoid various inconsistencies and ambiguities (#103) diff --git a/src/expr.jl b/src/expr.jl index bfc2f391..f674b984 100644 --- a/src/expr.jl +++ b/src/expr.jl @@ -259,10 +259,17 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, args[1] = Symbol(".", args[1]) end end - elseif k == K"." && length(args) == 1 && is_operator(childheads[1]) - # Hack: Here we preserve the head of the operator to determine whether - # we need to coalesce it with the dot into a single symbol later on. - args[1] = (childheads[1], args[1]) + elseif k == K"." + if length(args) == 2 + a2 = args[2] + if !@isexpr(a2, :quote) && !(a2 isa QuoteNode) + args[2] = QuoteNode(a2) + end + elseif length(args) == 1 && is_operator(childheads[1]) + # Hack: Here we preserve the head of the operator to determine whether + # we need to coalesce it with the dot into a single symbol later on. + args[1] = (childheads[1], args[1]) + end elseif k == K"ref" || k == K"curly" # Move parameters blocks to args[2] _reorder_parameters!(args, 2) diff --git a/src/parser.jl b/src/parser.jl index 9ca609d8..042e811d 100644 --- a/src/parser.jl +++ b/src/parser.jl @@ -1437,7 +1437,7 @@ end # * Adjoint suffix like a' # * String macros like a"str" b"""str""" c`str` d```str``` # -# f(a).g(b) ==> (call (. (call f a) (quote g)) b) +# f(a).g(b) ==> (call (. (call f a) g) b) # # flisp: parse-call-chain, parse-call-with-initial-ex function parse_call_chain(ps::ParseState, mark, is_macrocall=false) @@ -1448,7 +1448,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) end # source range of the @-prefixed part of a macro macro_atname_range = nothing - # $A.@x ==> (macrocall (. ($ A) (quote @x))) + # $A.@x ==> (macrocall (. ($ A) @x)) maybe_strmac = true # We record the last component of chains of dot-separated identifiers so we # know which identifier was the macro name. @@ -1470,22 +1470,22 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # [@foo x] ==> (vect (macrocall @foo x)) # [@foo] ==> (vect (macrocall @foo)) # @var"#" a ==> (macrocall (var @#) a) - # A.@x y ==> (macrocall (. A (quote @x)) y) - # A.@var"#" a ==> (macrocall (. A (quote (var @#))) a) + # A.@x y ==> (macrocall (. A @x) y) + # A.@var"#" a ==> (macrocall (. A (var @#)) a) # @+x y ==> (macrocall @+ x y) - # A.@.x ==> (macrocall (. A (quote @.)) x) + # A.@.x ==> (macrocall (. A @.) x) fix_macro_name_kind!(ps, macro_name_position) let ps = with_space_sensitive(ps) # Space separated macro arguments - # A.@foo a b ==> (macrocall (. A (quote @foo)) a b) - # @A.foo a b ==> (macrocall (. A (quote @foo)) a b) + # A.@foo a b ==> (macrocall (. A @foo) a b) + # @A.foo a b ==> (macrocall (. A @foo) a b) n_args = parse_space_separated_exprs(ps) is_doc_macro = peek_behind(ps, macro_name_position).orig_kind == K"doc" if is_doc_macro && n_args == 1 # Parse extended @doc args on next line # @doc x\ny ==> (macrocall @doc x y) - # A.@doc x\ny ==> (macrocall (. A (quote @doc)) doc x y) - # @A.doc x\ny ==> (macrocall (. A (quote @doc)) doc x y) + # A.@doc x\ny ==> (macrocall (. A @doc) doc x y) + # @A.doc x\ny ==> (macrocall (. A @doc) doc x y) # @doc x y\nz ==> (macrocall @doc x y) # # Excluded cases @@ -1518,8 +1518,8 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) end if is_macrocall # @x(a, b) ==> (macrocall-p @x a b) - # A.@x(y) ==> (macrocall-p (. A (quote @x)) y) - # A.@x(y).z ==> (. (macrocall-p (. A (quote @x)) y) (quote z)) + # A.@x(y) ==> (macrocall-p (. A @x) y) + # A.@x(y).z ==> (. (macrocall-p (. A @x) y) z) fix_macro_name_kind!(ps, macro_name_position) is_macrocall = false macro_atname_range = nothing @@ -1535,8 +1535,8 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # @S[a,b] ==> (macrocall @S (vect a b)) # @S[a b] ==> (macrocall @S (hcat a b)) # @S[a; b] ==> (macrocall @S (vcat a b)) - # A.@S[a] ==> (macrocall (. A (quote @S)) (vect a)) - # @S[a].b ==> (. (macrocall @S (vect a)) (quote b)) + # A.@S[a] ==> (macrocall (. A @S) (vect a)) + # @S[a].b ==> (. (macrocall @S (vect a)) b) #v1.7: @S[a ;; b] ==> (macrocall @S (ncat-2 a b)) #v1.6: @S[a ;; b] ==> (macrocall @S (error (ncat-2 a b))) fix_macro_name_kind!(ps, macro_name_position) @@ -1565,14 +1565,14 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) check_ncat_compat(ps, mark, ckind) end elseif k == K"." - # x .y ==> (. x (error-t) (quote y)) + # x .y ==> (. x (error-t) y) bump_disallowed_space(ps) emark = position(ps) if !isnothing(macro_atname_range) # Allow `@` in macrocall only in first and last position - # A.B.@x ==> (macrocall (. (. A (quote B)) (quote @x))) - # @A.B.x ==> (macrocall (. (. A (quote B)) (quote @x))) - # A.@B.x ==> (macrocall (. (. A (error-t) B) (quote @x))) + # A.B.@x ==> (macrocall (. (. A B) @x)) + # @A.B.x ==> (macrocall (. (. A B) @x)) + # A.@B.x ==> (macrocall (. (. A B (error-t)) @x)) emit_diagnostic(ps, macro_atname_range..., error="`@` must appear on first or last macro name component") bump(ps, TRIVIA_FLAG, error="Unexpected `.` after macro name") @@ -1603,28 +1603,23 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) emit(ps, m, K"quote", COLON_QUOTE) emit(ps, mark, K".") elseif k == K"$" - # f.$x ==> (. f (inert ($ x))) - # f.$(x+y) ==> (. f (inert ($ (call + x y)))) - # A.$B.@x ==> (macrocall (. (. A (inert ($ B))) (quote @x))) - # @A.$x a ==> (macrocall (. A (inert (error x))) a) + # f.$x ==> (. f ($ x)) + # f.$(x+y) ==> (. f ($ (call + x y))) + # A.$B.@x ==> (macrocall (. (. A ($ B)) @x)) + # @A.$x a ==> (macrocall (. A (error x)) a) m = position(ps) bump(ps, TRIVIA_FLAG) parse_atom(ps) emit(ps, m, K"$") macro_name_position = position(ps) - # We need `inert` rather than `quote` here for subtle reasons: - # We need the expression expander to "see through" the quote - # around the `$x` in `:(f.$x)`, so that the `$x` is expanded - # even though it's double quoted. - emit(ps, m, K"inert") emit(ps, mark, K".") elseif k == K"@" # A macro call after some prefix A has been consumed - # A.@x ==> (macrocall (. A (quote @x))) - # A.@x a ==> (macrocall (. A (quote @x)) a) + # A.@x ==> (macrocall (. A @x)) + # A.@x a ==> (macrocall (. A @x) a) m = position(ps) if is_macrocall - # @A.B.@x a ==> (macrocall (. (. A (quote B)) (quote (error-t) @x)) a) + # @A.B.@x a ==> (macrocall (. (. A B) (error-t) @x) a) bump(ps, TRIVIA_FLAG, error="repeated `@` in macro module path") else bump(ps, TRIVIA_FLAG) @@ -1633,7 +1628,6 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) parse_macro_name(ps) macro_name_position = position(ps) macro_atname_range = (m, position(ps)) - emit(ps, m, K"quote") emit(ps, mark, K".") elseif k == K"'" # TODO: Reclaim dotted postfix operators :-) @@ -1643,12 +1637,10 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) error="the .' operator for transpose is discontinued") else # Field/property syntax - # f.x.y ==> (. (. f (quote x)) (quote y)) - m = position(ps) + # f.x.y ==> (. (. f x) y) parse_atom(ps, false) macro_name_position = position(ps) maybe_strmac_1 = true - emit(ps, m, K"quote") emit(ps, mark, K".") end elseif k == K"'" && !preceding_whitespace(t) @@ -1665,8 +1657,8 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) parse_call_arglist(ps, K"}") if is_macrocall # @S{a,b} ==> (macrocall S (braces a b)) - # A.@S{a} ==> (macrocall (. A (quote @S)) (braces a)) - # @S{a}.b ==> (. (macrocall @S (braces a)) (quote b)) + # A.@S{a} ==> (macrocall (. A @S) (braces a)) + # @S{a}.b ==> (. (macrocall @S (braces a)) b) fix_macro_name_kind!(ps, macro_name_position) emit(ps, m, K"braces") emit(ps, mark, K"macrocall") @@ -2118,7 +2110,7 @@ function parse_function_signature(ps::ParseState, is_function::Bool) # function ()(x) end ==> (function (call (tuple-p) x) (block)) emit(ps, mark, K"tuple", PARENS_FLAG) else - # function (A).f() end ==> (function (call (. (parens A) (quote f))) (block)) + # function (A).f() end ==> (function (call (. (parens A) f)) (block)) # function (:)() end ==> (function (call (parens :)) (block)) # function (x::T)() end ==> (function (call (parens (::-i x T))) (block)) # function (::T)() end ==> (function (call (parens (::-pre T))) (block)) @@ -2147,7 +2139,7 @@ function parse_function_signature(ps::ParseState, is_function::Bool) # Parse function argument list # function f(x,y) end ==> (function (call f x y) (block)) # function f{T}() end ==> (function (call (curly f T)) (block)) - # function A.f() end ==> (function (call (. A (quote f))) (block)) + # function A.f() end ==> (function (call (. A f)) (block)) parse_call_chain(ps, mark) if peek_behind(ps).kind != K"call" # function f body end ==> (function (error f) (block body)) diff --git a/test/expr.jl b/test/expr.jl index 8e6e37ea..96e711d7 100644 --- a/test/expr.jl +++ b/test/expr.jl @@ -17,9 +17,6 @@ # Compatibility hack for VERSION >= v"1.4" # https://github.com/JuliaLang/julia/pull/34077 @test parseatom(":true") == Expr(:quote, true) - - # Handling of K"inert" - @test parsestmt("a.\$b") == Expr(:., :a, QuoteNode(Expr(:$, :b))) end @testset "Line numbers" begin @@ -386,6 +383,16 @@ Expr(:tuple, Expr(:parameters, Expr(:kw, :a, 1))) end + @testset "Field access syntax" begin + @test parsestmt("a.b") == Expr(:., :a, QuoteNode(:b)) + @test parsestmt("a.\$b") == Expr(:., :a, QuoteNode(Expr(:$, :b))) + @test parsestmt("a.:b") == Expr(:., :a, QuoteNode(:b)) + @test parsestmt("a.@b x") == Expr(:macrocall, + Expr(:., :a, QuoteNode(Symbol("@b"))), + LineNumberNode(1), + :x) + end + @testset "dotcall / dotted operators" begin @test parsestmt("f.(x,y)") == Expr(:., :f, Expr(:tuple, :x, :y)) @test parsestmt("f.(x=1)") == Expr(:., :f, Expr(:tuple, Expr(:kw, :x, 1))) diff --git a/test/parser.jl b/test/parser.jl index 161323fc..9291c7f6 100644 --- a/test/parser.jl +++ b/test/parser.jl @@ -312,8 +312,8 @@ tests = [ "\$f(x)" => "(call (\$ f) x)" ".&(x,y)" => "(call (. &) x y)" # parse_call_chain - "f(a).g(b)" => "(call (. (call f a) (quote g)) b)" - "\$A.@x" => "(macrocall (. (\$ A) (quote @x)))" + "f(a).g(b)" => "(call (. (call f a) g) b)" + "\$A.@x" => "(macrocall (. (\$ A) @x))" # non-errors in space sensitive contexts "[f (x)]" => "(hcat f (parens x))" @@ -322,16 +322,16 @@ tests = [ "@foo a b" => "(macrocall @foo a b)" "@foo (x)" => "(macrocall @foo (parens x))" "@foo (x,y)" => "(macrocall @foo (tuple-p x y))" - "A.@foo a b" => "(macrocall (. A (quote @foo)) a b)" - "@A.foo a b" => "(macrocall (. A (quote @foo)) a b)" + "A.@foo a b" => "(macrocall (. A @foo) a b)" + "@A.foo a b" => "(macrocall (. A @foo) a b)" "[@foo x]" => "(vect (macrocall @foo x))" "[@foo]" => "(vect (macrocall @foo))" "@var\"#\" a" => "(macrocall (var @#) a)" "@(A) x" => "(macrocall (parens @A) x)" - "A.@x y" => "(macrocall (. A (quote @x)) y)" - "A.@var\"#\" a"=> "(macrocall (. A (quote (var @#))) a)" + "A.@x y" => "(macrocall (. A @x) y)" + "A.@var\"#\" a"=> "(macrocall (. A (var @#)) a)" "@+x y" => "(macrocall @+ x y)" - "A.@.x" => "(macrocall (. A (quote @.)) x)" + "A.@.x" => "(macrocall (. A @.) x)" # Macro names "@! x" => "(macrocall @! x)" "@.. x" => "(macrocall @.. x)" @@ -339,8 +339,8 @@ tests = [ "@[x] y z" => "(macrocall (error (vect x)) y z)" # Special @doc parsing rules "@doc x\ny" => "(macrocall @doc x y)" - "A.@doc x\ny" => "(macrocall (. A (quote @doc)) x y)" - "@A.doc x\ny" => "(macrocall (. A (quote @doc)) x y)" + "A.@doc x\ny" => "(macrocall (. A @doc) x y)" + "@A.doc x\ny" => "(macrocall (. A @doc) x y)" "@doc x y\nz" => "(macrocall @doc x y)" "@doc x\n\ny" => "(macrocall @doc x)" "@doc x\nend" => "(macrocall @doc x)" @@ -352,8 +352,8 @@ tests = [ "(a=1)()" => "(call (parens (= a 1)))" "f (a)" => "(call f (error-t) a)" "@x(a, b)" => "(macrocall-p @x a b)" - "A.@x(y)" => "(macrocall-p (. A (quote @x)) y)" - "A.@x(y).z" => "(. (macrocall-p (. A (quote @x)) y) (quote z))" + "A.@x(y)" => "(macrocall-p (. A @x) y)" + "A.@x(y).z" => "(. (macrocall-p (. A @x) y) z)" # do "f() do\nend" => "(do (call f) (tuple) (block))" "f() do ; body end" => "(do (call f) (tuple) (block body))" @@ -364,8 +364,8 @@ tests = [ "@S[a,b]" => "(macrocall @S (vect a b))" "@S[a b]" => "(macrocall @S (hcat a b))" "@S[a; b]" => "(macrocall @S (vcat a b))" - "A.@S[a]" => "(macrocall (. A (quote @S)) (vect a))" - "@S[a].b" => "(. (macrocall @S (vect a)) (quote b))" + "A.@S[a]" => "(macrocall (. A @S) (vect a))" + "@S[a].b" => "(. (macrocall @S (vect a)) b)" ((v=v"1.7",), "@S[a ;; b]") => "(macrocall @S (ncat-2 a b))" ((v=v"1.6",), "@S[a ;; b]") => "(macrocall @S (error (ncat-2 a b)))" "a[i]" => "(ref a i)" @@ -383,9 +383,9 @@ tests = [ # Dotted forms # Allow `@` in macrocall only in first and last position - "A.B.@x" => "(macrocall (. (. A (quote B)) (quote @x)))" - "@A.B.x" => "(macrocall (. (. A (quote B)) (quote @x)))" - "A.@B.x" => "(macrocall (. (. A (quote B)) (error-t) (quote @x)))" + "A.B.@x" => "(macrocall (. (. A B) @x))" + "@A.B.x" => "(macrocall (. (. A B) @x))" + "A.@B.x" => "(macrocall (. (. A B) (error-t) @x))" "@M.(x)" => "(macrocall (dotcall @M (error-t) x))" "f.(a,b)" => "(dotcall f a b)" "f.(a=1; b=2)" => "(dotcall f (= a 1) (parameters (= b 2)))" @@ -395,27 +395,27 @@ tests = [ "A.:+" => "(. A (quote-: +))" "A.:.+" => "(. A (quote-: (. +)))" "A.: +" => "(. A (quote-: (error-t) +))" - "f.\$x" => "(. f (inert (\$ x)))" - "f.\$(x+y)" => "(. f (inert (\$ (parens (call-i x + y)))))" - "A.\$B.@x" => "(macrocall (. (. A (inert (\$ B))) (quote @x)))" - "@A.\$x a" => "(macrocall (. A (inert (error x))) a)" - "A.@x" => "(macrocall (. A (quote @x)))" - "A.@x a" => "(macrocall (. A (quote @x)) a)" - "@A.B.@x a" => "(macrocall (. (. A (quote B)) (quote (error-t) @x)) a)" + "f.\$x" => "(. f (\$ x))" + "f.\$(x+y)" => "(. f (\$ (parens (call-i x + y))))" + "A.\$B.@x" => "(macrocall (. (. A (\$ B)) @x))" + "@A.\$x a" => "(macrocall (. A (error x)) a)" + "A.@x" => "(macrocall (. A @x))" + "A.@x a" => "(macrocall (. A @x) a)" + "@A.B.@x a" => "(macrocall (. (. A B) (error-t) @x) a)" # .' discontinued "f.'" => "(wrapper f (error-t '))" # Field/property syntax - "f.x.y" => "(. (. f (quote x)) (quote y))" - "x .y" => "(. x (error-t) (quote y))" + "f.x.y" => "(. (. f x) y)" + "x .y" => "(. x (error-t) y)" # Adjoint "f'" => "(call-post f ')" "f'ᵀ" => "(call-post f 'ᵀ)" # Curly calls "S {a}" => "(curly S (error-t) a)" - "A.@S{a}" => "(macrocall (. A (quote @S)) (braces a))" + "A.@S{a}" => "(macrocall (. A @S) (braces a))" "@S{a,b}" => "(macrocall @S (braces a b))" - "A.@S{a}" => "(macrocall (. A (quote @S)) (braces a))" - "@S{a}.b" => "(. (macrocall @S (braces a)) (quote b))" + "A.@S{a}" => "(macrocall (. A @S) (braces a))" + "@S{a}.b" => "(. (macrocall @S (braces a)) b)" "S{a,b}" => "(curly S a b)" # String macros "x\"str\"" => """(macrocall @x_str (string-r "str"))""" @@ -554,7 +554,7 @@ tests = [ "function (x=1) end" => "(function (tuple-p (= x 1)) (block))" "function (;x=1) end" => "(function (tuple-p (parameters (= x 1))) (block))" "function ()(x) end" => "(function (call (tuple-p) x) (block))" - "function (A).f() end" => "(function (call (. (parens A) (quote f))) (block))" + "function (A).f() end" => "(function (call (. (parens A) f)) (block))" "function (:)() end" => "(function (call (parens :)) (block))" "function (x::T)() end"=> "(function (call (parens (::-i x T))) (block))" "function (::g(x))() end" => "(function (call (parens (::-pre (call g x)))) (block))" @@ -575,7 +575,7 @@ tests = [ # Function argument list "function f(x,y) end" => "(function (call f x y) (block))" "function f{T}() end" => "(function (call (curly f T)) (block))" - "function A.f() end" => "(function (call (. A (quote f))) (block))" + "function A.f() end" => "(function (call (. A f)) (block))" "function f body end" => "(function (error f) (block body))" "function f()::T end" => "(function (::-i (call f) T) (block))" "function f()::g(T) end" => "(function (::-i (call f) (call g T)) (block))"