diff --git a/README.md b/README.md index e91fc4bf..713ac441 100644 --- a/README.md +++ b/README.md @@ -375,7 +375,7 @@ The children of our trees are strictly in source order. This has many consequences in places where `Expr` reorders child expressions. * Infix and postfix operator calls have the operator name in the *second* child position. `a + b` is parsed as `(call-i a + b)` - where the infix `-i` flag indicates infix child position - rather than `Expr(:call, :+, :a, :b)`. -* Flattened generators are represented in source order +* Generators are represented in source order as a single node rather than multiple nested flatten and generator expressions. ### No `LineNumberNode`s @@ -434,15 +434,16 @@ class of tokenization errors and lets the parser deal with them. * Using `try catch else finally end` is parsed with `K"catch"` `K"else"` and `K"finally"` children to avoid the awkwardness of the optional child nodes in the `Expr` representation (#234) * The dotted import path syntax as in `import A.b.c` is parsed with a `K"importpath"` kind rather than `K"."`, because a bare `A.b.c` has a very different nested/quoted expression representation (#244) * We use flags rather than child nodes to represent the difference between `struct` and `mutable struct`, `module` and `baremodule` (#220) +* Multiple iterations within the header of a `for`, as in `for a=as, b=bs body end` are represented with a `cartesian_iterator` head rather than a `block`, as these lists of iterators are neither semantically nor syntactically a sequence of statements. Unlike other uses of `block` (see also generators). ## More detail on tree differences -### Flattened generators +### Generators Flattened generators are uniquely problematic because the Julia AST doesn't respect a key rule we normally expect: that the children of an AST node are a -*contiguous* range in the source text. This is because the `for`s in +*contiguous* range in the source text. For example, the `for`s in `[xy for x in xs for y in ys]` are parsed in the normal order of a for loop to mean @@ -469,16 +470,30 @@ however, note that if this tree were flattened, the order would be source order. However, our green tree is strictly source-ordered, so we must deviate from the -Julia AST. The natural representation seems to be to remove the generators and -use a flattened structure: +Julia AST. We deal with this by grouping cartesian products of iterators +(separated by commas) within `cartesian_iterator` blocks as in `for` loops, and +use the presence of multiple iterator blocks rather than the `flatten` head to +distinguish flattened iterators. The nested flattens and generators of `Expr` +forms are reconstructed later. In this form the tree structure resembles the +source much more closely. For example, `(xy for x in xs for y in ys)` is parsed as ``` -(flatten +(generator xy (= x xs) (= y ys)) ``` +And the cartesian iteration `(xy for x in xs, y in ys)` is parsed as + +``` +(generator + xy + (cartesian_iterator + (= x xs) + (= y ys))) +``` + ### Whitespace trivia inside strings For triple quoted strings, the indentation isn't part of the string data so diff --git a/src/expr.jl b/src/expr.jl index 8978eb3e..632266dc 100644 --- a/src/expr.jl +++ b/src/expr.jl @@ -126,41 +126,34 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, # Convert children insert_linenums = (headsym === :block || headsym === :toplevel) && need_linenodes args = Vector{Any}(undef, length(node_args)*(insert_linenums ? 2 : 1)) - if headsym === :for && length(node_args) == 2 - # No line numbers in for loop iteration spec - args[1] = _to_expr(node_args[1], iteration_spec=true, need_linenodes=false) - args[2] = _to_expr(node_args[2]) - elseif headsym === :let && length(node_args) == 2 - # No line numbers in let statement binding list - args[1] = _to_expr(node_args[1], need_linenodes=false) - args[2] = _to_expr(node_args[2]) + eq_to_kw_in_call = + ((headsym === :call || headsym === :dotcall) && is_prefix_call(node)) || + headsym === :ref + eq_to_kw_all = (headsym === :parameters && !map_kw_in_params) || + (headsym === :parens && eq_to_kw) + in_vcbr = headsym === :vect || headsym === :curly || headsym === :braces || headsym === :ref + if insert_linenums && isempty(node_args) + push!(args, source_location(LineNumberNode, node.source, node.position)) else - eq_to_kw_in_call = - ((headsym === :call || headsym === :dotcall) && is_prefix_call(node)) || - headsym === :ref - eq_to_kw_all = (headsym === :parameters && !map_kw_in_params) || - (headsym === :parens && eq_to_kw) - in_vcbr = headsym === :vect || headsym === :curly || headsym === :braces || headsym === :ref - if insert_linenums && isempty(node_args) - push!(args, source_location(LineNumberNode, node.source, node.position)) - else - for i in 1:length(node_args) - n = node_args[i] - if insert_linenums - args[2*i-1] = source_location(LineNumberNode, n.source, n.position) - end - eq_to_kw = eq_to_kw_in_call && i > 1 || eq_to_kw_all - coalesce_dot_with_ops = i==1 && - (nodekind in KSet"call dotcall curly" || - nodekind == K"quote" && flags(node) == COLON_QUOTE) - args[insert_linenums ? 2*i : i] = - _to_expr(n, eq_to_kw=eq_to_kw, - map_kw_in_params=in_vcbr, - coalesce_dot=coalesce_dot_with_ops) - end - if nodekind == K"block" && has_flags(node, PARENS_FLAG) - popfirst!(args) + for i in 1:length(node_args) + n = node_args[i] + if insert_linenums + args[2*i-1] = source_location(LineNumberNode, n.source, n.position) end + eq_to_kw = eq_to_kw_in_call && i > 1 || eq_to_kw_all + coalesce_dot_with_ops = i==1 && + (nodekind in KSet"call dotcall curly" || + nodekind == K"quote" && flags(node) == COLON_QUOTE) + args[insert_linenums ? 2*i : i] = + _to_expr(n, eq_to_kw=eq_to_kw, + map_kw_in_params=in_vcbr, + coalesce_dot=coalesce_dot_with_ops, + iteration_spec=(headsym == :for && i == 1), + need_linenodes=!(headsym == :let && i == 1) + ) + end + if nodekind == K"block" && has_flags(node, PARENS_FLAG) + popfirst!(args) end end @@ -202,6 +195,10 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, elseif headsym in (:ref, :curly) # Move parameters blocks to args[2] reorder_parameters!(args, 2) + elseif headsym === :for + if Meta.isexpr(args[1], :cartesian_iterator) + args[1] = Expr(:block, args[1].args...) + end elseif headsym in (:tuple, :vect, :braces) # Move parameters blocks to args[1] reorder_parameters!(args, 1) @@ -258,18 +255,32 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true, push!(args, else_) end end - elseif headsym === :filter - pushfirst!(args, last(args)) - pop!(args) - elseif headsym === :flatten - # The order of nodes inside the generators in Julia's flatten AST - # is noncontiguous in the source text, so need to reconstruct - # Julia's AST here from our alternative `flatten` expression. - gen = Expr(:generator, args[1], args[end]) - for i in length(args)-1:-1:2 - gen = Expr(:flatten, Expr(:generator, gen, args[i])) + elseif headsym === :generator + # Reconstruct the nested Expr form for generator from our flatter + # source-ordered `generator` format. + gen = args[1] + for j = length(args):-1:2 + if Meta.isexpr(args[j], :cartesian_iterator) + gen = Expr(:generator, gen, args[j].args...) + else + gen = Expr(:generator, gen, args[j]) + end + if j < length(args) + # Additional `for`s flatten the inner generator + gen = Expr(:flatten, gen) + end end return gen + elseif headsym == :filter + @assert length(args) == 2 + iterspec = args[1] + outargs = Any[args[2]] + if Meta.isexpr(iterspec, :cartesian_iterator) + append!(outargs, iterspec.args) + else + push!(outargs, iterspec) + end + args = outargs elseif headsym in (:nrow, :ncat) # For lack of a better place, the dimension argument to nrow/ncat # is stored in the flags diff --git a/src/kinds.jl b/src/kinds.jl index 60e2979f..f32ccf56 100644 --- a/src/kinds.jl +++ b/src/kinds.jl @@ -905,7 +905,7 @@ const _kind_names = # Comprehensions "generator" "filter" - "flatten" + "cartesian_iterator" "comprehension" "typed_comprehension" "END_SYNTAX_KINDS" diff --git a/src/parser.jl b/src/parser.jl index 8abb272c..f013934c 100644 --- a/src/parser.jl +++ b/src/parser.jl @@ -1785,13 +1785,9 @@ function parse_resword(ps::ParseState) emit(ps, mark, K"while") elseif word == K"for" # for x in xs end ==> (for (= x xs) (block)) - # for x in xs, y in ys \n a \n end ==> (for (block (= x xs) (= y ys)) (block a)) + # for x in xs, y in ys \n a \n end ==> (for (cartesian_iterator (= x xs) (= y ys)) (block a)) bump(ps, TRIVIA_FLAG) - m = position(ps) - n_subexprs = parse_comma_separated(ps, parse_iteration_spec) - if n_subexprs > 1 - emit(ps, m, K"block") - end + parse_iteration_specs(ps) parse_block(ps) bump_closing_token(ps, K"end") emit(ps, mark, K"for") @@ -2627,6 +2623,16 @@ function parse_iteration_spec(ps::ParseState) emit(ps, mark, K"=") end +# Parse an iteration spec, or a comma separate list of such for for loops and +# generators +function parse_iteration_specs(ps::ParseState) + mark = position(ps) + n_iters = parse_comma_separated(ps, parse_iteration_spec) + if n_iters > 1 + emit(ps, mark, K"cartesian_iterator") + end +end + # flisp: parse-space-separated-exprs function parse_space_separated_exprs(ps::ParseState) ps = with_space_sensitive(ps) @@ -2671,61 +2677,41 @@ function parse_vect(ps::ParseState, closer) return (K"vect", EMPTY_FLAGS) end -# Flattened generators are hard because the Julia AST doesn't respect a key -# rule we normally expect: that the children of an AST node are a contiguous -# range in the source text. This is because the `for`s in -# `[xy for x in xs for y in ys]` are parsed in the normal order of a for as +# Parse generators # -# (flatten -# (generator -# (generator -# xy -# y in ys) -# x in xs)) +# We represent generators quite differently from `Expr`: +# * Cartesian products of iterators are grouped within cartesian_iterator +# nodes, as in the short form of `for` loops. +# * The `generator` kind is used for both cartesian and flattened generators # -# We deal with this by only emitting the flatten: -# -# (flatten xy (= x xs) (= y ys)) -# -# then reconstructing the nested flattens and generators when converting to Expr. -# -# [x for a = as for b = bs if cond1 for c = cs if cond2] ==> (comprehension (flatten x (= a as) (filter (= b bs) cond1) (filter (= c cs) cond2))) -# [x for a = as if begin cond2 end] => (comprehension (generator x (filter (= a as) (block cond2)))) +# (x for a in as for b in bs) ==> (parens (generator x (= a as) (= b bs))) +# (x for a in as, b in bs) ==> (parens (generator x (cartesian_iterator (= a as) (= b bs)))) +# (x for a in as, b in bs if z) ==> (parens (generator x (filter (cartesian_iterator (= a as) (= b bs)) z))) # # flisp: parse-generator -function parse_generator(ps::ParseState, mark, flatten=false) - t = peek_token(ps) - if !preceding_whitespace(t) - # [(x)for x in xs] ==> (comprehension (generator (parens x) (error) (= x xs))) - bump_invisible(ps, K"error", TRIVIA_FLAG, - error="Expected space before `for` in generator") - end - @check kind(t) == K"for" - bump(ps, TRIVIA_FLAG) - filter_mark = position(ps) - parse_comma_separated(ps, parse_iteration_spec) - if peek(ps) == K"if" - # (a for x in xs if cond) ==> (parens (generator a (filter (= x xs) cond))) +function parse_generator(ps::ParseState, mark) + while (t = peek_token(ps); kind(t) == K"for") + if !preceding_whitespace(t) + # ((x)for x in xs) ==> (parens (generator (parens x) (error) (= x xs))) + bump_invisible(ps, K"error", TRIVIA_FLAG, + error="Expected space before `for` in generator") + end bump(ps, TRIVIA_FLAG) - parse_cond(ps) - emit(ps, filter_mark, K"filter") - end - t = peek_token(ps) - if kind(t) == K"for" - # (xy for x in xs for y in ys) ==> (parens (flatten xy (= x xs) (= y ys))) - # (xy for x in xs for y in ys for z in zs) ==> (parens (flatten xy (= x xs) (= y ys) (= z zs))) - parse_generator(ps, mark, true) - if !flatten - emit(ps, mark, K"flatten") + iter_mark = position(ps) + parse_iteration_specs(ps) + if peek(ps) == K"if" + # (x for a in as if z) ==> (parens (generator x (filter (= a as) z))) + bump(ps, TRIVIA_FLAG) + parse_cond(ps) + emit(ps, iter_mark, K"filter") end - elseif !flatten - # (x for a in as) ==> (parens (generator x (= a as))) - emit(ps, mark, K"generator") end + emit(ps, mark, K"generator") end # flisp: parse-comprehension function parse_comprehension(ps::ParseState, mark, closer) + # [x for a in as] ==> (comprehension (generator x a in as)) ps = ParseState(ps, whitespace_newline=true, space_sensitive=false, end_symbol=false) diff --git a/test/expr.jl b/test/expr.jl index 47043253..afa03b20 100644 --- a/test/expr.jl +++ b/test/expr.jl @@ -98,18 +98,7 @@ ) end - @testset "No line numbers in for/let bindings" begin - @test parsestmt(Expr, "for i=is, j=js\nbody\nend") == - Expr(:for, - Expr(:block, - Expr(:(=), :i, :is), - Expr(:(=), :j, :js), - ), - Expr(:block, - LineNumberNode(2), - :body - ) - ) + @testset "No line numbers in let bindings" begin @test parsestmt(Expr, "let i=is, j=js\nbody\nend") == Expr(:let, Expr(:block, @@ -142,6 +131,28 @@ )) end + @testset "for" begin + @test parsestmt(Expr, "for i=is body end") == + Expr(:for, + Expr(:(=), :i, :is), + Expr(:block, + LineNumberNode(1), + :body + ) + ) + @test parsestmt(Expr, "for i=is, j=js\nbody\nend") == + Expr(:for, + Expr(:block, + Expr(:(=), :i, :is), + Expr(:(=), :j, :js), + ), + Expr(:block, + LineNumberNode(2), + :body + ) + ) + end + @testset "Long form anonymous functions" begin @test parsestmt(Expr, "function (xs...)\nbody end") == Expr(:function, @@ -352,6 +363,53 @@ @test parsestmt(Expr, "[x,y ; z]") == Expr(:vect, Expr(:parameters, :z), :x, :y) end + @testset "generators" begin + @test parsestmt(Expr, "(x for a in as for b in bs)") == + Expr(:flatten, Expr(:generator, + Expr(:generator, :x, Expr(:(=), :b, :bs)), + Expr(:(=), :a, :as))) + @test parsestmt(Expr, "(x for a in as, b in bs)") == + Expr(:generator, :x, Expr(:(=), :a, :as), Expr(:(=), :b, :bs)) + @test parsestmt(Expr, "(x for a in as, b in bs if z)") == + Expr(:generator, :x, + Expr(:filter, :z, Expr(:(=), :a, :as), Expr(:(=), :b, :bs))) + @test parsestmt(Expr, "(x for a in as, b in bs for c in cs, d in ds)") == + Expr(:flatten, + Expr(:generator, + Expr(:generator, :x, Expr(:(=), :c, :cs), Expr(:(=), :d, :ds)), + Expr(:(=), :a, :as), Expr(:(=), :b, :bs))) + @test parsestmt(Expr, "(x for a in as for b in bs if z)") == + Expr(:flatten, Expr(:generator, + Expr(:generator, :x, Expr(:filter, :z, Expr(:(=), :b, :bs))), + Expr(:(=), :a, :as))) + @test parsestmt(Expr, "(x for a in as if z for b in bs)") == + Expr(:flatten, Expr(:generator, + Expr(:generator, :x, Expr(:(=), :b, :bs)), + Expr(:filter, :z, Expr(:(=), :a, :as)))) + @test parsestmt(Expr, "[x for a = as for b = bs if cond1 for c = cs if cond2]" ) == + Expr(:comprehension, + Expr(:flatten, + Expr(:generator, + Expr(:flatten, + Expr(:generator, + Expr(:generator, + :x, + Expr(:filter, + :cond2, + Expr(:(=), :c, :cs))), + Expr(:filter, + :cond1, + Expr(:(=), :b, :bs)))), + Expr(:(=), :a, :as)))) + @test parsestmt(Expr, "[x for a = as if begin cond2 end]" ) == + Expr(:comprehension, Expr(:generator, :x, + Expr(:filter, + Expr(:block, LineNumberNode(1), :cond2), + Expr(:(=), :a, :as)))) + @test parsestmt(Expr, "(x for a in as if z)") == + Expr(:generator, :x, Expr(:filter, :z, Expr(:(=), :a, :as))) + end + @testset "try" begin @test parsestmt(Expr, "try x catch e; y end") == Expr(:try, diff --git a/test/parser.jl b/test/parser.jl index e4d6604c..b0550595 100644 --- a/test/parser.jl +++ b/test/parser.jl @@ -451,7 +451,7 @@ tests = [ "while x < y \n a \n b \n end" => "(while (call-i x < y) (block a b))" # for "for x in xs end" => "(for (= x xs) (block))" - "for x in xs, y in ys \n a \n end" => "(for (block (= x xs) (= y ys)) (block a))" + "for x in xs, y in ys \n a \n end" => "(for (cartesian_iterator (= x xs) (= y ys)) (block a))" # let "let x=1\n end" => "(let (block (= x 1)) (block))" "let x=1 ; end" => "(let (block (= x 1)) (block))" @@ -766,13 +766,16 @@ tests = [ "[x for a in as]" => "(comprehension (generator x (= a as)))" "[x \n\n for a in as]" => "(comprehension (generator x (= a as)))" # parse_generator - "[x for a = as for b = bs if cond1 for c = cs if cond2]" => "(comprehension (flatten x (= a as) (filter (= b bs) cond1) (filter (= c cs) cond2)))" + "(x for a in as for b in bs)" => "(parens (generator x (= a as) (= b bs)))" + "(x for a in as, b in bs)" => "(parens (generator x (cartesian_iterator (= a as) (= b bs))))" + "(x for a in as, b in bs if z)" => "(parens (generator x (filter (cartesian_iterator (= a as) (= b bs)) z)))" + "(x for a in as, b in bs for c in cs, d in ds)" => "(parens (generator x (cartesian_iterator (= a as) (= b bs)) (cartesian_iterator (= c cs) (= d ds))))" + "(x for a in as for b in bs if z)" => "(parens (generator x (= a as) (filter (= b bs) z)))" + "(x for a in as if z for b in bs)" => "(parens (generator x (filter (= a as) z) (= b bs)))" + "[x for a = as for b = bs if cond1 for c = cs if cond2]" => "(comprehension (generator x (= a as) (filter (= b bs) cond1) (filter (= c cs) cond2)))" "[x for a = as if begin cond2 end]" => "(comprehension (generator x (filter (= a as) (block cond2))))" "[(x)for x in xs]" => "(comprehension (generator (parens x) (error-t) (= x xs)))" - "(a for x in xs if cond)" => "(parens (generator a (filter (= x xs) cond)))" - "(xy for x in xs for y in ys)" => "(parens (flatten xy (= x xs) (= y ys)))" - "(xy for x in xs for y in ys for z in zs)" => "(parens (flatten xy (= x xs) (= y ys) (= z zs)))" - "(x for a in as)" => "(parens (generator x (= a as)))" + "(x for a in as if z)" => "(parens (generator x (filter (= a as) z)))" # parse_vect "[x, y]" => "(vect x y)" "[x, y]" => "(vect x y)"