diff --git a/src/expr.jl b/src/expr.jl index 748f3516..4d536ef2 100644 --- a/src/expr.jl +++ b/src/expr.jl @@ -99,46 +99,46 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, args[1] = _to_expr(node_args[1], need_linenodes=false) args[2] = _to_expr(node_args[2]) else - eq_to_kw = headsym == :call && !has_flags(node, INFIX_FLAG) || - headsym == :ref || - (headsym == :parameters && !inside_vect_or_braces) || - (headsym == :tuple && inside_dot_expr) + eq_to_kw_in_call = + headsym == :call && is_prefix_call(node) || + headsym == :ref + eq_to_kw_all = headsym == :parameters && !inside_vect_or_braces || + (headsym == :tuple && inside_dot_expr) in_dot = headsym == :. in_vb = headsym == :vect || headsym == :braces - if insert_linenums - if isempty(node_args) - push!(args, source_location(LineNumberNode, node.source, node.position)) - else - for i in 1:length(node_args) - n = node_args[i] - args[2*i-1] = source_location(LineNumberNode, n.source, n.position) - args[2*i] = _to_expr(n, - eq_to_kw=eq_to_kw, - inside_dot_expr=in_dot, - inside_vect_or_braces=in_vb) - end - end + if insert_linenums && isempty(node_args) + push!(args, source_location(LineNumberNode, node.source, node.position)) else for i in 1:length(node_args) - args[i] = _to_expr(node_args[i], - eq_to_kw=eq_to_kw, - inside_dot_expr=in_dot, - inside_vect_or_braces=in_vb) + n = node_args[i] + if insert_linenums + args[2*i-1] = source_location(LineNumberNode, n.source, n.position) + end + eq_to_kw = eq_to_kw_in_call && i > 1 || eq_to_kw_all + args[insert_linenums ? 2*i : i] = + _to_expr(n, eq_to_kw=eq_to_kw, + inside_dot_expr=in_dot, + inside_vect_or_braces=in_vb) end end end - # Julia's standard `Expr` ASTs have children stored in a canonical - # order which is often not always source order. We permute the children - # here as necessary to get the canonical order. - if is_infix(node.raw) - args[2], args[1] = args[1], args[2] - end # Special cases for various expression heads loc = source_location(LineNumberNode, node.source, node.position) if headsym == :macrocall insert!(args, 2, loc) elseif headsym in (:call, :ref) + # Julia's standard `Expr` ASTs have children stored in a canonical + # order which is often not always source order. We permute the children + # here as necessary to get the canonical order. + if is_infix_op_call(node) || is_postfix_op_call(node) + args[2], args[1] = args[1], args[2] + end + # Lower (call x ') to special ' head + if is_postfix_op_call(node) && args[1] == Symbol("'") + popfirst!(args) + headsym = Symbol("'") + end # Move parameters block to args[2] if length(args) > 1 && Meta.isexpr(args[end], :parameters) insert!(args, 2, args[end]) diff --git a/src/parse_stream.jl b/src/parse_stream.jl index 00790f1d..ec50ef76 100644 --- a/src/parse_stream.jl +++ b/src/parse_stream.jl @@ -5,18 +5,25 @@ # TODO: Use `primitive type SyntaxFlags 16 end` rather than an alias? const RawFlags = UInt16 const EMPTY_FLAGS = RawFlags(0) +# Applied to tokens which are syntax trivia after parsing const TRIVIA_FLAG = RawFlags(1<<0) -# Some of the following flags are head-specific and could probably be allowed -# to cover the same bits... -const INFIX_FLAG = RawFlags(1<<1) -# Record whether syntactic operators were dotted -const DOTOP_FLAG = RawFlags(1<<2) + +# Record whether operators are dotted +const DOTOP_FLAG = RawFlags(1<<1) +# Record whether operator has a suffix +const SUFFIXED_FLAG = RawFlags(1<<2) + +# Distinguish various syntaxes which are mapped to K"call" +const PREFIX_CALL_FLAG = RawFlags(0<<3) +const INFIX_FLAG = RawFlags(1<<3) +const PREFIX_OP_FLAG = RawFlags(2<<3) +const POSTFIX_OP_FLAG = RawFlags(3<<3) + +# The next two bits could overlap with the previous two if necessary # Set when kind == K"String" was triple-delimited as with """ or ``` -const TRIPLE_STRING_FLAG = RawFlags(1<<3) +const TRIPLE_STRING_FLAG = RawFlags(1<<5) # Set when a string or identifier needs "raw string" unescaping -const RAW_STRING_FLAG = RawFlags(1<<4) -# Record whether operator has a suffix -const SUFFIXED_FLAG = RawFlags(1<<6) +const RAW_STRING_FLAG = RawFlags(1<<6) # Token-only flag # Record whether a token had preceding whitespace @@ -34,6 +41,10 @@ function set_numeric_flags(n::Integer) f end +function call_type_flags(f::RawFlags) + f & 0b11000 +end + function numeric_flags(f::RawFlags) Int((f >> 8) % UInt8) end @@ -70,7 +81,9 @@ function untokenize(head::SyntaxHead; unique=true, include_flag_suff=true) if include_flag_suff && suffix_flags != EMPTY_FLAGS str = str*"-" is_trivia(head) && (str = str*"t") - is_infix(head) && (str = str*"i") + is_infix_op_call(head) && (str = str*"i") + is_prefix_op_call(head) && (str = str*"pre") + is_postfix_op_call(head) && (str = str*"post") has_flags(head, TRIPLE_STRING_FLAG) && (str = str*"s") has_flags(head, RAW_STRING_FLAG) && (str = str*"r") is_suffixed(head) && (str = str*"S") @@ -90,8 +103,13 @@ flags(x) = flags(head(x)) # Predicates based on flags() has_flags(x, test_flags) = has_flags(flags(x), test_flags) +call_type_flags(x) = call_type_flags(flags(x)) + is_trivia(x) = has_flags(x, TRIVIA_FLAG) -is_infix(x) = has_flags(x, INFIX_FLAG) +is_prefix_call(x) = call_type_flags(x) == PREFIX_CALL_FLAG +is_infix_op_call(x) = call_type_flags(x) == INFIX_FLAG +is_prefix_op_call(x) = call_type_flags(x) == PREFIX_OP_FLAG +is_postfix_op_call(x) = call_type_flags(x) == POSTFIX_OP_FLAG is_dotted(x) = has_flags(x, DOTOP_FLAG) is_suffixed(x) = has_flags(x, SUFFIXED_FLAG) is_decorated(x) = is_dotted(x) || is_suffixed(x) diff --git a/src/parser.jl b/src/parser.jl index 9f968d03..fa7e8f6e 100644 --- a/src/parser.jl +++ b/src/parser.jl @@ -544,7 +544,7 @@ function parse_assignment_with_initial_ex(ps::ParseState, mark, down::T) where { if k == K"~" if ps.space_sensitive && !preceding_whitespace(peek_token(ps, 2)) # Unary ~ in space sensitive context is not assignment precedence - # [a ~b] ==> (hcat a (call ~ b)) + # [a ~b] ==> (hcat a (call-pre ~ b)) return end # ~ is the only non-syntactic assignment-precedence operator. @@ -885,8 +885,8 @@ function parse_with_chains(ps::ParseState, down, is_op, chain_ops) is_both_unary_and_binary(t) && !preceding_whitespace(peek_token(ps, 2)) # The following is two elements of a hcat - # [x +y] ==> (hcat x (call + y)) - # [x+y +z] ==> (hcat (call-i x + y) (call + z)) + # [x +y] ==> (hcat x (call-pre + y)) + # [x+y +z] ==> (hcat (call-i x + y) (call-pre + z)) # Conversely the following are infix calls # [x +₁y] ==> (vect (call-i x +₁ y)) # [x+y+z] ==> (vect (call-i x + y z)) @@ -914,7 +914,7 @@ function parse_chain(ps::ParseState, down, op_kind) if ps.space_sensitive && preceding_whitespace(t) && is_both_unary_and_binary(t) && !preceding_whitespace(peek_token(ps, 2)) - # [x +y] ==> (hcat x (call + y)) + # [x +y] ==> (hcat x (call-pre + y)) break end bump(ps, TRIVIA_FLAG) @@ -948,16 +948,16 @@ function parse_unary_subtype(ps::ParseState) elseif k2 in KSet"{ (" # parse <:{T}(x::T) or <:(x::T) like other unary operators # <:{T}(x::T) ==> (call (curly <: T) (:: x T)) - # <:(x::T) ==> (<: (:: x T)) + # <:(x::T) ==> (<:-pre (:: x T)) parse_where(ps, parse_juxtapose) else - # <: A where B ==> (<: (where A B)) + # <: A where B ==> (<:-pre (where A B)) mark = position(ps) bump(ps, TRIVIA_FLAG) parse_where(ps, parse_juxtapose) # Flisp parser handled this, but I don't know how it can happen... @check peek_behind(ps).kind != K"tuple" - emit(ps, mark, k) + emit(ps, mark, k, PREFIX_OP_FLAG) end else parse_where(ps, parse_juxtapose) @@ -1015,7 +1015,7 @@ function is_juxtapose(ps, prev_k, t) # Not juxtaposition - parse_juxtapose will consume only the first token. # x.3 ==> x # sqrt(2)2 ==> (call sqrt 2) - # x' y ==> x + # x' y ==> (call-post x ') # x 'y ==> x return !preceding_whitespace(t) && @@ -1039,7 +1039,7 @@ end # 2(x) ==> (call-i 2 * x) # (2)(3)x ==> (call-i 2 * 3 x) # (x-1)y ==> (call-i (call-i x - 1) * y) -# x'y ==> x +# x'y ==> (call-i (call-post x ') * y) # # flisp: parse-juxtapose function parse_juxtapose(ps::ParseState) @@ -1098,11 +1098,11 @@ function parse_unary(ps::ParseState) if is_prec_power(k3) || k3 in KSet"[ {" # `[`, `{` (issue #18851) and `^` have higher precedence than # unary negation - # -2^x ==> (call - (call-i 2 ^ x)) - # -2[1, 3] ==> (call - (ref 2 1 3)) + # -2^x ==> (call-pre - (call-i 2 ^ x)) + # -2[1, 3] ==> (call-pre - (ref 2 1 3)) bump(ps) parse_factor(ps) - emit(ps, mark, K"call") + emit(ps, mark, K"call", PREFIX_OP_FLAG) else # We have a signed numeric literal. Glue the operator to the # next token to create a signed literal: @@ -1115,17 +1115,17 @@ function parse_unary(ps::ParseState) end end # Things which are not quite negative literals result in a unary call instead - # -0x1 ==> (call - 0x01) - # - 2 ==> (call - 2) - # .-2 ==> (call .- 2) + # -0x1 ==> (call-pre - 0x01) + # - 2 ==> (call-pre - 2) + # .-2 ==> (call-pre .- 2) parse_unary_call(ps) end # Parse calls to unary operators and prefix calls involving arbitrary operators # with bracketed arglists (as opposed to infix notation) # -# +a ==> (call + a) -# +(a,b) ==> (call + a b) +# +a ==> (call-pre + a) +# +(a,b) ==> (call-pre + a b) # # flisp: parse-unary-call function parse_unary_call(ps::ParseState) @@ -1208,33 +1208,33 @@ function parse_unary_call(ps::ParseState) else # Unary function calls with brackets as grouping, not an arglist if opts.is_block - # +(a;b) ==> (call + (block a b)) + # +(a;b) ==> (call-pre + (block a b)) emit(ps, mark_before_paren, K"block") end # Not a prefix operator call but a block; `=` is not `kw` - # +(a=1) ==> (call + (= a 1)) + # +(a=1) ==> (call-pre + (= a 1)) # Unary operators have lower precedence than ^ - # +(a)^2 ==> (call + (call-i a ^ 2)) - # +(a)(x,y)^2 ==> (call + (call-i (call a x y) ^ 2)) + # +(a)^2 ==> (call-pre + (call-i a ^ 2)) + # +(a)(x,y)^2 ==> (call-pre + (call-i (call a x y) ^ 2)) parse_call_chain(ps, mark_before_paren) parse_factor_with_initial_ex(ps, mark_before_paren) - emit(ps, mark, op_node_kind) + emit(ps, mark, op_node_kind, PREFIX_OP_FLAG) end else if is_unary_op(op_t) # Normal unary calls - # +x ==> (call + x) - # √x ==> (call √ x) - # ±x ==> (call ± x) + # +x ==> (call-pre + x) + # √x ==> (call-pre √ x) + # ±x ==> (call-pre ± x) bump(ps, op_tok_flags) else - # /x ==> (call (error /) x) - # +₁ x ==> (call (error +₁) x) - # .<: x ==> (call (error .<:) x) + # /x ==> (call-pre (error /) x) + # +₁ x ==> (call-pre (error +₁) x) + # .<: x ==> (call-pre (error .<:) x) bump(ps, error="not a unary operator") end parse_unary(ps) - emit(ps, mark, op_node_kind) + emit(ps, mark, op_node_kind, PREFIX_OP_FLAG) end end @@ -1433,6 +1433,8 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) finish_macroname(ps, mark, valid_macroname, macro_name_position) end # f(a,b) ==> (call f a b) + # f(a; b=1) ==> (call f a (parameters (b 1))) + # (a=1)() ==> (call (= a 1)) # f (a) ==> (call f (error-t) a b) bump_disallowed_space(ps) bump(ps, TRIVIA_FLAG) @@ -1457,6 +1459,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) K"]", ps.end_symbol) # a[i] ==> (ref a i) # a[i,j] ==> (ref a i j) + # (a=1)[] ==> (ref (= a 1)) # T[x y] ==> (typed_hcat T x y) # T[x ; y] ==> (typed_vcat T x y) # T[a b; c d] ==> (typed_vcat T (row a b) (row c d)) @@ -1562,15 +1565,10 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) this_iter_valid_macroname = true end elseif k == K"'" && !preceding_whitespace(t) - if !is_suffixed(t) - # f' ==> (' f) - bump(ps, TRIVIA_FLAG) - emit(ps, mark, k) - else - # f'ᵀ ==> (call 'ᵀ f) - bump(ps) - emit(ps, mark, K"call", INFIX_FLAG) - end + # f' ==> (call-post f ') + # f'ᵀ ==> (call-post f 'ᵀ) + bump(ps) + emit(ps, mark, K"call", POSTFIX_OP_FLAG) elseif k == K"{" # Type parameter curlies and macro calls if is_macrocall diff --git a/src/tokenize.jl b/src/tokenize.jl index 9bbcb7d7..ba3c782b 100644 --- a/src/tokenize.jl +++ b/src/tokenize.jl @@ -945,7 +945,6 @@ function lex_backslash(l::Lexer) return emit(l, K"\\") end -# TODO .op function lex_dot(l::Lexer) if accept(l, '.') if accept(l, '.') diff --git a/test/parser.jl b/test/parser.jl index 082198fa..1a050dad 100644 --- a/test/parser.jl +++ b/test/parser.jl @@ -48,7 +48,7 @@ tests = [ "a .+= b" => "(.+= a b)" "a, b = c, d" => "(= (tuple a b) (tuple c d))" "x, = xs" => "(= (tuple x) xs)" - "[a ~b]" => "(hcat a (call ~ b))" + "[a ~b]" => "(hcat a (call-pre ~ b))" "a ~ b" => "(call-i a ~ b)" "[a ~ b c]" => "(hcat (call-i a ~ b) c)" ], @@ -122,8 +122,8 @@ tests = [ "a + b .+ c" => "(call-i (call-i a + b) .+ c)" # parse_with_chains: # The following is two elements of a hcat - "[x +y]" => "(hcat x (call + y))" - "[x+y +z]" => "(hcat (call-i x + y) (call + z))" + "[x +y]" => "(hcat x (call-pre + y))" + "[x+y +z]" => "(hcat (call-i x + y) (call-pre + z))" # Conversely the following are infix calls "[x +₁y]" => "(vect (call-i x +₁ y))" "[x+y+z]" => "(vect (call-i x + y z))" @@ -142,14 +142,14 @@ tests = [ "2(x)" => "(call-i 2 * x)" "(2)(3)x" => "(call-i 2 * 3 x)" "(x-1)y" => "(call-i (call-i x - 1) * y)" - "x'y" => "(call-i (' x) * y)" + "x'y" => "(call-i (call-post x ') * y)" # errors "\"a\"\"b\"" => "(call-i (string \"a\") * (error-t) (string \"b\"))" "\"a\"x" => "(call-i (string \"a\") * (error-t) x)" # Not juxtaposition - parse_juxtapose will consume only the first token. "x.3" => "x" "sqrt(2)2" => "(call sqrt 2)" - "x' y" => "(' x)" + "x' y" => "(call-post x ')" "x 'y" => "x" "0xenomorph" => "0x0e" ], @@ -157,13 +157,13 @@ tests = [ ":T" => "(quote T)" "in::T" => "(:: in T)" "isa::T" => "(:: isa T)" - "-2^x" => "(call - (call-i 2 ^ x))" - "-2[1, 3]" => "(call - (ref 2 1 3))" + "-2^x" => "(call-pre - (call-i 2 ^ x))" + "-2[1, 3]" => "(call-pre - (ref 2 1 3))" "-2" => "-2" "+2.0" => "2.0" - "-0x1" => "(call - 0x01)" - "- 2" => "(call - 2)" - ".-2" => "(call .- 2)" + "-0x1" => "(call-pre - 0x01)" + "- 2" => "(call-pre - 2)" + ".-2" => "(call-pre .- 2)" ], JuliaSyntax.parse_unary_call => [ # Standalone dotted operators are parsed as (|.| op) @@ -179,7 +179,7 @@ tests = [ "*(x)" => "(call * x)" # Prefix function calls for operators which are both binary and unary "+(a,b)" => "(call + a b)" - "+(a=1,)" => "(call + (= a 1))" + "+(a=1,)" => "(call + (= a 1))" => Expr(:call, :+, Expr(:kw, :a, 1)) "+(a...)" => "(call + (... a))" "+(a;b,c)" => "(call + a (parameters b c))" "+(;a)" => "(call + (parameters a))" @@ -189,19 +189,19 @@ tests = [ "+(a,b)^2" => "(call-i (call + a b) ^ 2)" "+(a,b)(x)^2" => "(call-i (call (call + a b) x) ^ 2)" # Unary function calls with brackets as grouping, not an arglist - "+(a;b)" => "(call + (block a b))" - "+(a=1)" => "(call + (= a 1))" + "+(a;b)" => "(call-pre + (block a b))" + "+(a=1)" => "(call-pre + (= a 1))" => Expr(:call, :+, Expr(:(=), :a, 1)) # Unary operators have lower precedence than ^ - "+(a)^2" => "(call + (call-i a ^ 2))" - "+(a)(x,y)^2" => "(call + (call-i (call a x y) ^ 2))" + "+(a)^2" => "(call-pre + (call-i a ^ 2))" + "+(a)(x,y)^2" => "(call-pre + (call-i (call a x y) ^ 2))" # Normal unary calls (see parse_unary) - "+x" => "(call + x)" - "√x" => "(call √ x)" - "±x" => "(call ± x)" + "+x" => "(call-pre + x)" + "√x" => "(call-pre √ x)" + "±x" => "(call-pre ± x)" # Not a unary operator - "/x" => "(call (error /) x)" - "+₁ x" => "(call (error +₁) x)" - ".<: x" => "(call (error .<:) x)" + "/x" => "(call-pre (error /) x)" + "+₁ x" => "(call-pre (error +₁) x)" + ".<: x" => "(call-pre (error .<:) x)" ], JuliaSyntax.parse_factor => [ "x^y" => "(call-i x ^ y)" @@ -218,8 +218,8 @@ tests = [ "<: \n" => "<:" "<: =" => "<:" "<:{T}(x::T)" => "(call (curly <: T) (:: x T))" - "<:(x::T)" => "(<: (:: x T))" - "<: A where B" => "(<: (where A B))" + "<:(x::T)" => "(<:-pre (:: x T))" + "<: A where B" => "(<:-pre (where A B))" # Really for parse_where "x where \n {T}" => "(where x T)" "x where {T,S}" => "(where x T S)" @@ -242,6 +242,9 @@ tests = [ "f(x)" => "(call f x)" "\$f(x)" => "(call (\$ f) x)" "f(a,b)" => "(call f a b)" + "f(a=1; b=2)" => "(call f (= a 1) (parameters (= b 2)))" => + Expr(:call, :f, Expr(:parameters, Expr(:kw, :b, 2)), Expr(:kw, :a, 1)) + "(a=1)()" => "(call (= a 1))" => Expr(:call, Expr(:(=), :a, 1)) "f (a)" => "(call f (error-t) a)" "f(a).g(b)" => "(call (. (call f a) (quote g)) b)" "\$A.@x" => "(macrocall (. (\$ A) (quote @x)))" @@ -284,12 +287,16 @@ tests = [ "a[i]" => "(ref a i)" "a [i]" => "(ref a (error-t) i)" "a[i,j]" => "(ref a i j)" + "(a=1)[]" => "(ref (= a 1))" => Expr(:ref, Expr(:(=), :a, 1)) "T[x y]" => "(typed_hcat T x y)" "T[x ; y]" => "(typed_vcat T x y)" "T[a b; c d]" => "(typed_vcat T (row a b) (row c d))" "T[x for x in xs]" => "(typed_comprehension T (generator x (= x xs)))" ((v=v"1.8",), "T[a ; b ;; c ; d]") => "(typed_ncat-2 T (nrow-1 a b) (nrow-1 c d))" "f.(a,b)" => "(. f (tuple a b))" + "f.(a=1; b=2)" => "(. f (tuple (= a 1) (parameters (= b 2))))" => + Expr(:., :f, Expr(:tuple, Expr(:parameters, Expr(:kw, :b, 2)), Expr(:kw, :a, 1))) + "(a=1).()" => "(. (= a 1) (tuple))" => Expr(:., Expr(:(=), :a, 1), Expr(:tuple)) "f. (x)" => "(. f (error-t) (tuple x))" # Other dotted syntax "A.:+" => "(. A (quote +))" @@ -301,8 +308,8 @@ tests = [ "f.x.y" => "(. (. f (quote x)) (quote y))" "x .y" => "(. x (error-t) (quote y))" # Adjoint - "f'" => "(' f)" - "f'ᵀ" => "(call-i f 'ᵀ)" + "f'" => "(call-post f ')" + "f'ᵀ" => "(call-post f 'ᵀ)" # Curly calls "@S{a,b}" => "(macrocall @S (braces a b))" "S{a,b}" => "(curly S a b)" @@ -322,6 +329,7 @@ tests = [ "x\"s\"in" => """(macrocall @x_str (string-r "s") "in")""" "x\"s\"2" => """(macrocall @x_str (string-r "s") 2)""" "x\"s\"10.0" => """(macrocall @x_str (string-r "s") 10.0)""" + # ], JuliaSyntax.parse_resword => [ # In normal_context