Skip to content

Commit

Permalink
Record fixity of call type in flags (#124)
Browse files Browse the repository at this point in the history
We now record which precise call syntax was used out of the four
options:
* Prefix calls with parens
* Prefix operator calls
* Infix operator calls
* Postfix operator calls

This allows us to distinguish keyword arguments from assignment, fixing
several bugs with = to kw conversion.

Also, change to emit unadorned postfix adjoint as `(call-post x ')` rather
than as a syntactic operator `(' x)`, for consistency with suffixed
versions like `x'ᵀ`.
  • Loading branch information
c42f authored Oct 14, 2022
1 parent 384f745 commit 700101e
Show file tree
Hide file tree
Showing 5 changed files with 125 additions and 102 deletions.
54 changes: 27 additions & 27 deletions src/expr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -99,46 +99,46 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true,
args[1] = _to_expr(node_args[1], need_linenodes=false)
args[2] = _to_expr(node_args[2])
else
eq_to_kw = headsym == :call && !has_flags(node, INFIX_FLAG) ||
headsym == :ref ||
(headsym == :parameters && !inside_vect_or_braces) ||
(headsym == :tuple && inside_dot_expr)
eq_to_kw_in_call =
headsym == :call && is_prefix_call(node) ||
headsym == :ref
eq_to_kw_all = headsym == :parameters && !inside_vect_or_braces ||
(headsym == :tuple && inside_dot_expr)
in_dot = headsym == :.
in_vb = headsym == :vect || headsym == :braces
if insert_linenums
if isempty(node_args)
push!(args, source_location(LineNumberNode, node.source, node.position))
else
for i in 1:length(node_args)
n = node_args[i]
args[2*i-1] = source_location(LineNumberNode, n.source, n.position)
args[2*i] = _to_expr(n,
eq_to_kw=eq_to_kw,
inside_dot_expr=in_dot,
inside_vect_or_braces=in_vb)
end
end
if insert_linenums && isempty(node_args)
push!(args, source_location(LineNumberNode, node.source, node.position))
else
for i in 1:length(node_args)
args[i] = _to_expr(node_args[i],
eq_to_kw=eq_to_kw,
inside_dot_expr=in_dot,
inside_vect_or_braces=in_vb)
n = node_args[i]
if insert_linenums
args[2*i-1] = source_location(LineNumberNode, n.source, n.position)
end
eq_to_kw = eq_to_kw_in_call && i > 1 || eq_to_kw_all
args[insert_linenums ? 2*i : i] =
_to_expr(n, eq_to_kw=eq_to_kw,
inside_dot_expr=in_dot,
inside_vect_or_braces=in_vb)
end
end
end
# Julia's standard `Expr` ASTs have children stored in a canonical
# order which is often not always source order. We permute the children
# here as necessary to get the canonical order.
if is_infix(node.raw)
args[2], args[1] = args[1], args[2]
end

# Special cases for various expression heads
loc = source_location(LineNumberNode, node.source, node.position)
if headsym == :macrocall
insert!(args, 2, loc)
elseif headsym in (:call, :ref)
# Julia's standard `Expr` ASTs have children stored in a canonical
# order which is often not always source order. We permute the children
# here as necessary to get the canonical order.
if is_infix_op_call(node) || is_postfix_op_call(node)
args[2], args[1] = args[1], args[2]
end
# Lower (call x ') to special ' head
if is_postfix_op_call(node) && args[1] == Symbol("'")
popfirst!(args)
headsym = Symbol("'")
end
# Move parameters block to args[2]
if length(args) > 1 && Meta.isexpr(args[end], :parameters)
insert!(args, 2, args[end])
Expand Down
40 changes: 29 additions & 11 deletions src/parse_stream.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,25 @@
# TODO: Use `primitive type SyntaxFlags 16 end` rather than an alias?
const RawFlags = UInt16
const EMPTY_FLAGS = RawFlags(0)
# Applied to tokens which are syntax trivia after parsing
const TRIVIA_FLAG = RawFlags(1<<0)
# Some of the following flags are head-specific and could probably be allowed
# to cover the same bits...
const INFIX_FLAG = RawFlags(1<<1)
# Record whether syntactic operators were dotted
const DOTOP_FLAG = RawFlags(1<<2)

# Record whether operators are dotted
const DOTOP_FLAG = RawFlags(1<<1)
# Record whether operator has a suffix
const SUFFIXED_FLAG = RawFlags(1<<2)

# Distinguish various syntaxes which are mapped to K"call"
const PREFIX_CALL_FLAG = RawFlags(0<<3)
const INFIX_FLAG = RawFlags(1<<3)
const PREFIX_OP_FLAG = RawFlags(2<<3)
const POSTFIX_OP_FLAG = RawFlags(3<<3)

# The next two bits could overlap with the previous two if necessary
# Set when kind == K"String" was triple-delimited as with """ or ```
const TRIPLE_STRING_FLAG = RawFlags(1<<3)
const TRIPLE_STRING_FLAG = RawFlags(1<<5)
# Set when a string or identifier needs "raw string" unescaping
const RAW_STRING_FLAG = RawFlags(1<<4)
# Record whether operator has a suffix
const SUFFIXED_FLAG = RawFlags(1<<6)
const RAW_STRING_FLAG = RawFlags(1<<6)

# Token-only flag
# Record whether a token had preceding whitespace
Expand All @@ -34,6 +41,10 @@ function set_numeric_flags(n::Integer)
f
end

function call_type_flags(f::RawFlags)
f & 0b11000
end

function numeric_flags(f::RawFlags)
Int((f >> 8) % UInt8)
end
Expand Down Expand Up @@ -70,7 +81,9 @@ function untokenize(head::SyntaxHead; unique=true, include_flag_suff=true)
if include_flag_suff && suffix_flags != EMPTY_FLAGS
str = str*"-"
is_trivia(head) && (str = str*"t")
is_infix(head) && (str = str*"i")
is_infix_op_call(head) && (str = str*"i")
is_prefix_op_call(head) && (str = str*"pre")
is_postfix_op_call(head) && (str = str*"post")
has_flags(head, TRIPLE_STRING_FLAG) && (str = str*"s")
has_flags(head, RAW_STRING_FLAG) && (str = str*"r")
is_suffixed(head) && (str = str*"S")
Expand All @@ -90,8 +103,13 @@ flags(x) = flags(head(x))

# Predicates based on flags()
has_flags(x, test_flags) = has_flags(flags(x), test_flags)
call_type_flags(x) = call_type_flags(flags(x))

is_trivia(x) = has_flags(x, TRIVIA_FLAG)
is_infix(x) = has_flags(x, INFIX_FLAG)
is_prefix_call(x) = call_type_flags(x) == PREFIX_CALL_FLAG
is_infix_op_call(x) = call_type_flags(x) == INFIX_FLAG
is_prefix_op_call(x) = call_type_flags(x) == PREFIX_OP_FLAG
is_postfix_op_call(x) = call_type_flags(x) == POSTFIX_OP_FLAG
is_dotted(x) = has_flags(x, DOTOP_FLAG)
is_suffixed(x) = has_flags(x, SUFFIXED_FLAG)
is_decorated(x) = is_dotted(x) || is_suffixed(x)
Expand Down
74 changes: 36 additions & 38 deletions src/parser.jl
Original file line number Diff line number Diff line change
Expand Up @@ -544,7 +544,7 @@ function parse_assignment_with_initial_ex(ps::ParseState, mark, down::T) where {
if k == K"~"
if ps.space_sensitive && !preceding_whitespace(peek_token(ps, 2))
# Unary ~ in space sensitive context is not assignment precedence
# [a ~b] ==> (hcat a (call ~ b))
# [a ~b] ==> (hcat a (call-pre ~ b))
return
end
# ~ is the only non-syntactic assignment-precedence operator.
Expand Down Expand Up @@ -885,8 +885,8 @@ function parse_with_chains(ps::ParseState, down, is_op, chain_ops)
is_both_unary_and_binary(t) &&
!preceding_whitespace(peek_token(ps, 2))
# The following is two elements of a hcat
# [x +y] ==> (hcat x (call + y))
# [x+y +z] ==> (hcat (call-i x + y) (call + z))
# [x +y] ==> (hcat x (call-pre + y))
# [x+y +z] ==> (hcat (call-i x + y) (call-pre + z))
# Conversely the following are infix calls
# [x +₁y] ==> (vect (call-i x +₁ y))
# [x+y+z] ==> (vect (call-i x + y z))
Expand Down Expand Up @@ -914,7 +914,7 @@ function parse_chain(ps::ParseState, down, op_kind)
if ps.space_sensitive && preceding_whitespace(t) &&
is_both_unary_and_binary(t) &&
!preceding_whitespace(peek_token(ps, 2))
# [x +y] ==> (hcat x (call + y))
# [x +y] ==> (hcat x (call-pre + y))
break
end
bump(ps, TRIVIA_FLAG)
Expand Down Expand Up @@ -948,16 +948,16 @@ function parse_unary_subtype(ps::ParseState)
elseif k2 in KSet"{ ("
# parse <:{T}(x::T) or <:(x::T) like other unary operators
# <:{T}(x::T) ==> (call (curly <: T) (:: x T))
# <:(x::T) ==> (<: (:: x T))
# <:(x::T) ==> (<:-pre (:: x T))
parse_where(ps, parse_juxtapose)
else
# <: A where B ==> (<: (where A B))
# <: A where B ==> (<:-pre (where A B))
mark = position(ps)
bump(ps, TRIVIA_FLAG)
parse_where(ps, parse_juxtapose)
# Flisp parser handled this, but I don't know how it can happen...
@check peek_behind(ps).kind != K"tuple"
emit(ps, mark, k)
emit(ps, mark, k, PREFIX_OP_FLAG)
end
else
parse_where(ps, parse_juxtapose)
Expand Down Expand Up @@ -1015,7 +1015,7 @@ function is_juxtapose(ps, prev_k, t)
# Not juxtaposition - parse_juxtapose will consume only the first token.
# x.3 ==> x
# sqrt(2)2 ==> (call sqrt 2)
# x' y ==> x
# x' y ==> (call-post x ')
# x 'y ==> x

return !preceding_whitespace(t) &&
Expand All @@ -1039,7 +1039,7 @@ end
# 2(x) ==> (call-i 2 * x)
# (2)(3)x ==> (call-i 2 * 3 x)
# (x-1)y ==> (call-i (call-i x - 1) * y)
# x'y ==> x
# x'y ==> (call-i (call-post x ') * y)
#
# flisp: parse-juxtapose
function parse_juxtapose(ps::ParseState)
Expand Down Expand Up @@ -1098,11 +1098,11 @@ function parse_unary(ps::ParseState)
if is_prec_power(k3) || k3 in KSet"[ {"
# `[`, `{` (issue #18851) and `^` have higher precedence than
# unary negation
# -2^x ==> (call - (call-i 2 ^ x))
# -2[1, 3] ==> (call - (ref 2 1 3))
# -2^x ==> (call-pre - (call-i 2 ^ x))
# -2[1, 3] ==> (call-pre - (ref 2 1 3))
bump(ps)
parse_factor(ps)
emit(ps, mark, K"call")
emit(ps, mark, K"call", PREFIX_OP_FLAG)
else
# We have a signed numeric literal. Glue the operator to the
# next token to create a signed literal:
Expand All @@ -1115,17 +1115,17 @@ function parse_unary(ps::ParseState)
end
end
# Things which are not quite negative literals result in a unary call instead
# -0x1 ==> (call - 0x01)
# - 2 ==> (call - 2)
# .-2 ==> (call .- 2)
# -0x1 ==> (call-pre - 0x01)
# - 2 ==> (call-pre - 2)
# .-2 ==> (call-pre .- 2)
parse_unary_call(ps)
end

# Parse calls to unary operators and prefix calls involving arbitrary operators
# with bracketed arglists (as opposed to infix notation)
#
# +a ==> (call + a)
# +(a,b) ==> (call + a b)
# +a ==> (call-pre + a)
# +(a,b) ==> (call-pre + a b)
#
# flisp: parse-unary-call
function parse_unary_call(ps::ParseState)
Expand Down Expand Up @@ -1208,33 +1208,33 @@ function parse_unary_call(ps::ParseState)
else
# Unary function calls with brackets as grouping, not an arglist
if opts.is_block
# +(a;b) ==> (call + (block a b))
# +(a;b) ==> (call-pre + (block a b))
emit(ps, mark_before_paren, K"block")
end
# Not a prefix operator call but a block; `=` is not `kw`
# +(a=1) ==> (call + (= a 1))
# +(a=1) ==> (call-pre + (= a 1))
# Unary operators have lower precedence than ^
# +(a)^2 ==> (call + (call-i a ^ 2))
# +(a)(x,y)^2 ==> (call + (call-i (call a x y) ^ 2))
# +(a)^2 ==> (call-pre + (call-i a ^ 2))
# +(a)(x,y)^2 ==> (call-pre + (call-i (call a x y) ^ 2))
parse_call_chain(ps, mark_before_paren)
parse_factor_with_initial_ex(ps, mark_before_paren)
emit(ps, mark, op_node_kind)
emit(ps, mark, op_node_kind, PREFIX_OP_FLAG)
end
else
if is_unary_op(op_t)
# Normal unary calls
# +x ==> (call + x)
# √x ==> (call √ x)
# ±x ==> (call ± x)
# +x ==> (call-pre + x)
# √x ==> (call-pre √ x)
# ±x ==> (call-pre ± x)
bump(ps, op_tok_flags)
else
# /x ==> (call (error /) x)
# +₁ x ==> (call (error +₁) x)
# .<: x ==> (call (error .<:) x)
# /x ==> (call-pre (error /) x)
# +₁ x ==> (call-pre (error +₁) x)
# .<: x ==> (call-pre (error .<:) x)
bump(ps, error="not a unary operator")
end
parse_unary(ps)
emit(ps, mark, op_node_kind)
emit(ps, mark, op_node_kind, PREFIX_OP_FLAG)
end
end

Expand Down Expand Up @@ -1433,6 +1433,8 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
finish_macroname(ps, mark, valid_macroname, macro_name_position)
end
# f(a,b) ==> (call f a b)
# f(a; b=1) ==> (call f a (parameters (b 1)))
# (a=1)() ==> (call (= a 1))
# f (a) ==> (call f (error-t) a b)
bump_disallowed_space(ps)
bump(ps, TRIVIA_FLAG)
Expand All @@ -1457,6 +1459,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
K"]", ps.end_symbol)
# a[i] ==> (ref a i)
# a[i,j] ==> (ref a i j)
# (a=1)[] ==> (ref (= a 1))
# T[x y] ==> (typed_hcat T x y)
# T[x ; y] ==> (typed_vcat T x y)
# T[a b; c d] ==> (typed_vcat T (row a b) (row c d))
Expand Down Expand Up @@ -1562,15 +1565,10 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
this_iter_valid_macroname = true
end
elseif k == K"'" && !preceding_whitespace(t)
if !is_suffixed(t)
# f' ==> (' f)
bump(ps, TRIVIA_FLAG)
emit(ps, mark, k)
else
# f'ᵀ ==> (call 'ᵀ f)
bump(ps)
emit(ps, mark, K"call", INFIX_FLAG)
end
# f' ==> (call-post f ')
# f'ᵀ ==> (call-post f 'ᵀ)
bump(ps)
emit(ps, mark, K"call", POSTFIX_OP_FLAG)
elseif k == K"{"
# Type parameter curlies and macro calls
if is_macrocall
Expand Down
1 change: 0 additions & 1 deletion src/tokenize.jl
Original file line number Diff line number Diff line change
Expand Up @@ -945,7 +945,6 @@ function lex_backslash(l::Lexer)
return emit(l, K"\\")
end

# TODO .op
function lex_dot(l::Lexer)
if accept(l, '.')
if accept(l, '.')
Expand Down
Loading

0 comments on commit 700101e

Please sign in to comment.