Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Record fixity of call type in flags #124

Merged
merged 2 commits into from
Oct 14, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 27 additions & 27 deletions src/expr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -99,46 +99,46 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true,
args[1] = _to_expr(node_args[1], need_linenodes=false)
args[2] = _to_expr(node_args[2])
else
eq_to_kw = headsym == :call && !has_flags(node, INFIX_FLAG) ||
headsym == :ref ||
(headsym == :parameters && !inside_vect_or_braces) ||
(headsym == :tuple && inside_dot_expr)
eq_to_kw_in_call =
headsym == :call && is_prefix_call(node) ||
headsym == :ref
eq_to_kw_all = headsym == :parameters && !inside_vect_or_braces ||
(headsym == :tuple && inside_dot_expr)
in_dot = headsym == :.
in_vb = headsym == :vect || headsym == :braces
if insert_linenums
if isempty(node_args)
push!(args, source_location(LineNumberNode, node.source, node.position))
else
for i in 1:length(node_args)
n = node_args[i]
args[2*i-1] = source_location(LineNumberNode, n.source, n.position)
args[2*i] = _to_expr(n,
eq_to_kw=eq_to_kw,
inside_dot_expr=in_dot,
inside_vect_or_braces=in_vb)
end
end
if insert_linenums && isempty(node_args)
push!(args, source_location(LineNumberNode, node.source, node.position))
else
for i in 1:length(node_args)
args[i] = _to_expr(node_args[i],
eq_to_kw=eq_to_kw,
inside_dot_expr=in_dot,
inside_vect_or_braces=in_vb)
n = node_args[i]
if insert_linenums
args[2*i-1] = source_location(LineNumberNode, n.source, n.position)
end
eq_to_kw = eq_to_kw_in_call && i > 1 || eq_to_kw_all
args[insert_linenums ? 2*i : i] =
_to_expr(n, eq_to_kw=eq_to_kw,
inside_dot_expr=in_dot,
inside_vect_or_braces=in_vb)
end
end
end
# Julia's standard `Expr` ASTs have children stored in a canonical
# order which is often not always source order. We permute the children
# here as necessary to get the canonical order.
if is_infix(node.raw)
args[2], args[1] = args[1], args[2]
end

# Special cases for various expression heads
loc = source_location(LineNumberNode, node.source, node.position)
if headsym == :macrocall
insert!(args, 2, loc)
elseif headsym in (:call, :ref)
# Julia's standard `Expr` ASTs have children stored in a canonical
# order which is often not always source order. We permute the children
# here as necessary to get the canonical order.
if is_infix_op_call(node) || is_suffix_op_call(node)
args[2], args[1] = args[1], args[2]
end
# Lower (call x ') to special ' head
if is_suffix_op_call(node) && args[1] == Symbol("'")
popfirst!(args)
headsym = Symbol("'")
end
# Move parameters block to args[2]
if length(args) > 1 && Meta.isexpr(args[end], :parameters)
insert!(args, 2, args[end])
Expand Down
42 changes: 31 additions & 11 deletions src/parse_stream.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,25 @@
# TODO: Use `primitive type SyntaxFlags 16 end` rather than an alias?
const RawFlags = UInt16
const EMPTY_FLAGS = RawFlags(0)
# Applied to tokens which are syntax trivia after parsing
const TRIVIA_FLAG = RawFlags(1<<0)
# Some of the following flags are head-specific and could probably be allowed
# to cover the same bits...
const INFIX_FLAG = RawFlags(1<<1)
# Record whether syntactic operators were dotted
const DOTOP_FLAG = RawFlags(1<<2)

# Record whether operators are dotted
const DOTOP_FLAG = RawFlags(1<<1)
# Record whether operator has a suffix
const SUFFIXED_FLAG = RawFlags(1<<2)

# Distinguish various syntaxes which are mapped to K"call"
const PREFIX_CALL_FLAG = RawFlags(0<<3)
const INFIX_FLAG = RawFlags(1<<3)
const PREFIX_OP_FLAG = RawFlags(2<<3)
const POSTFIX_OP_FLAG = RawFlags(3<<3)

# The next two bits could overlap with the previous two if necessary
# Set when kind == K"String" was triple-delimited as with """ or ```
const TRIPLE_STRING_FLAG = RawFlags(1<<3)
const TRIPLE_STRING_FLAG = RawFlags(1<<5)
# Set when a string or identifier needs "raw string" unescaping
const RAW_STRING_FLAG = RawFlags(1<<4)
# Record whether operator has a suffix
const SUFFIXED_FLAG = RawFlags(1<<6)
const RAW_STRING_FLAG = RawFlags(1<<6)

# Token-only flag
# Record whether a token had preceding whitespace
Expand All @@ -34,6 +41,10 @@ function set_numeric_flags(n::Integer)
f
end

function call_type_flags(f::RawFlags)
f & 0b11000
end

function numeric_flags(f::RawFlags)
Int((f >> 8) % UInt8)
end
Expand Down Expand Up @@ -70,7 +81,11 @@ function untokenize(head::SyntaxHead; unique=true, include_flag_suff=true)
if include_flag_suff && suffix_flags != EMPTY_FLAGS
str = str*"-"
is_trivia(head) && (str = str*"t")
is_infix(head) && (str = str*"i")
is_infix_op_call(head) && (str = str*"i")
# call op flag name mnemonic: i-infix, h,j - left and right of `i` in
# alphabetic order
is_prefix_op_call(head) && (str = str*"h")
is_suffix_op_call(head) && (str = str*"j")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We're not limited to one-char suffixes here, right? So -pre/-suf or -pre/-post would probably be the more obvious choices.

Copy link
Member Author

@c42f c42f Oct 14, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Flags can be combined (eg raw and triple string), so I did limit it to one char suffixes on purpose.

Could be a good idea though to make these clearer. But if we do, let's do all the flags at once.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(ie, in a separate PR)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually I'll do this now after all: These particular flags aren't combined with others so this will be clearer and the lack of consistency shouldn't be a big deal.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

has_flags(head, TRIPLE_STRING_FLAG) && (str = str*"s")
has_flags(head, RAW_STRING_FLAG) && (str = str*"r")
is_suffixed(head) && (str = str*"S")
Expand All @@ -90,8 +105,13 @@ flags(x) = flags(head(x))

# Predicates based on flags()
has_flags(x, test_flags) = has_flags(flags(x), test_flags)
call_type_flags(x) = call_type_flags(flags(x))

is_trivia(x) = has_flags(x, TRIVIA_FLAG)
is_infix(x) = has_flags(x, INFIX_FLAG)
is_prefix_call(x) = call_type_flags(x) == PREFIX_CALL_FLAG
is_infix_op_call(x) = call_type_flags(x) == INFIX_FLAG
is_prefix_op_call(x) = call_type_flags(x) == PREFIX_OP_FLAG
is_suffix_op_call(x) = call_type_flags(x) == POSTFIX_OP_FLAG
is_dotted(x) = has_flags(x, DOTOP_FLAG)
is_suffixed(x) = has_flags(x, SUFFIXED_FLAG)
is_decorated(x) = is_dotted(x) || is_suffixed(x)
Expand Down
74 changes: 36 additions & 38 deletions src/parser.jl
Original file line number Diff line number Diff line change
Expand Up @@ -544,7 +544,7 @@ function parse_assignment_with_initial_ex(ps::ParseState, mark, down::T) where {
if k == K"~"
if ps.space_sensitive && !preceding_whitespace(peek_token(ps, 2))
# Unary ~ in space sensitive context is not assignment precedence
# [a ~b] ==> (hcat a (call ~ b))
# [a ~b] ==> (hcat a (call-h ~ b))
return
end
# ~ is the only non-syntactic assignment-precedence operator.
Expand Down Expand Up @@ -885,8 +885,8 @@ function parse_with_chains(ps::ParseState, down, is_op, chain_ops)
is_both_unary_and_binary(t) &&
!preceding_whitespace(peek_token(ps, 2))
# The following is two elements of a hcat
# [x +y] ==> (hcat x (call + y))
# [x+y +z] ==> (hcat (call-i x + y) (call + z))
# [x +y] ==> (hcat x (call-h + y))
# [x+y +z] ==> (hcat (call-i x + y) (call-h + z))
# Conversely the following are infix calls
# [x +₁y] ==> (vect (call-i x +₁ y))
# [x+y+z] ==> (vect (call-i x + y z))
Expand Down Expand Up @@ -914,7 +914,7 @@ function parse_chain(ps::ParseState, down, op_kind)
if ps.space_sensitive && preceding_whitespace(t) &&
is_both_unary_and_binary(t) &&
!preceding_whitespace(peek_token(ps, 2))
# [x +y] ==> (hcat x (call + y))
# [x +y] ==> (hcat x (call-h + y))
break
end
bump(ps, TRIVIA_FLAG)
Expand Down Expand Up @@ -948,16 +948,16 @@ function parse_unary_subtype(ps::ParseState)
elseif k2 in KSet"{ ("
# parse <:{T}(x::T) or <:(x::T) like other unary operators
# <:{T}(x::T) ==> (call (curly <: T) (:: x T))
# <:(x::T) ==> (<: (:: x T))
# <:(x::T) ==> (<:-h (:: x T))
parse_where(ps, parse_juxtapose)
else
# <: A where B ==> (<: (where A B))
# <: A where B ==> (<:-h (where A B))
mark = position(ps)
bump(ps, TRIVIA_FLAG)
parse_where(ps, parse_juxtapose)
# Flisp parser handled this, but I don't know how it can happen...
@check peek_behind(ps).kind != K"tuple"
emit(ps, mark, k)
emit(ps, mark, k, PREFIX_OP_FLAG)
end
else
parse_where(ps, parse_juxtapose)
Expand Down Expand Up @@ -1015,7 +1015,7 @@ function is_juxtapose(ps, prev_k, t)
# Not juxtaposition - parse_juxtapose will consume only the first token.
# x.3 ==> x
# sqrt(2)2 ==> (call sqrt 2)
# x' y ==> x
# x' y ==> (call-j x ')
# x 'y ==> x

return !preceding_whitespace(t) &&
Expand All @@ -1039,7 +1039,7 @@ end
# 2(x) ==> (call-i 2 * x)
# (2)(3)x ==> (call-i 2 * 3 x)
# (x-1)y ==> (call-i (call-i x - 1) * y)
# x'y ==> x
# x'y ==> (call-i (call-j x ') * y)
#
# flisp: parse-juxtapose
function parse_juxtapose(ps::ParseState)
Expand Down Expand Up @@ -1098,11 +1098,11 @@ function parse_unary(ps::ParseState)
if is_prec_power(k3) || k3 in KSet"[ {"
# `[`, `{` (issue #18851) and `^` have higher precedence than
# unary negation
# -2^x ==> (call - (call-i 2 ^ x))
# -2[1, 3] ==> (call - (ref 2 1 3))
# -2^x ==> (call-h - (call-i 2 ^ x))
# -2[1, 3] ==> (call-h - (ref 2 1 3))
bump(ps)
parse_factor(ps)
emit(ps, mark, K"call")
emit(ps, mark, K"call", PREFIX_OP_FLAG)
else
# We have a signed numeric literal. Glue the operator to the
# next token to create a signed literal:
Expand All @@ -1115,17 +1115,17 @@ function parse_unary(ps::ParseState)
end
end
# Things which are not quite negative literals result in a unary call instead
# -0x1 ==> (call - 0x01)
# - 2 ==> (call - 2)
# .-2 ==> (call .- 2)
# -0x1 ==> (call-h - 0x01)
# - 2 ==> (call-h - 2)
# .-2 ==> (call-h .- 2)
parse_unary_call(ps)
end

# Parse calls to unary operators and prefix calls involving arbitrary operators
# with bracketed arglists (as opposed to infix notation)
#
# +a ==> (call + a)
# +(a,b) ==> (call + a b)
# +a ==> (call-h + a)
# +(a,b) ==> (call-h + a b)
#
# flisp: parse-unary-call
function parse_unary_call(ps::ParseState)
Expand Down Expand Up @@ -1208,33 +1208,33 @@ function parse_unary_call(ps::ParseState)
else
# Unary function calls with brackets as grouping, not an arglist
if opts.is_block
# +(a;b) ==> (call + (block a b))
# +(a;b) ==> (call-h + (block a b))
emit(ps, mark_before_paren, K"block")
end
# Not a prefix operator call but a block; `=` is not `kw`
# +(a=1) ==> (call + (= a 1))
# +(a=1) ==> (call-h + (= a 1))
# Unary operators have lower precedence than ^
# +(a)^2 ==> (call + (call-i a ^ 2))
# +(a)(x,y)^2 ==> (call + (call-i (call a x y) ^ 2))
# +(a)^2 ==> (call-h + (call-i a ^ 2))
# +(a)(x,y)^2 ==> (call-h + (call-i (call a x y) ^ 2))
parse_call_chain(ps, mark_before_paren)
parse_factor_with_initial_ex(ps, mark_before_paren)
emit(ps, mark, op_node_kind)
emit(ps, mark, op_node_kind, PREFIX_OP_FLAG)
end
else
if is_unary_op(op_t)
# Normal unary calls
# +x ==> (call + x)
# √x ==> (call √ x)
# ±x ==> (call ± x)
# +x ==> (call-h + x)
# √x ==> (call-h √ x)
# ±x ==> (call-h ± x)
bump(ps, op_tok_flags)
else
# /x ==> (call (error /) x)
# +₁ x ==> (call (error +₁) x)
# .<: x ==> (call (error .<:) x)
# /x ==> (call-h (error /) x)
# +₁ x ==> (call-h (error +₁) x)
# .<: x ==> (call-h (error .<:) x)
bump(ps, error="not a unary operator")
end
parse_unary(ps)
emit(ps, mark, op_node_kind)
emit(ps, mark, op_node_kind, PREFIX_OP_FLAG)
end
end

Expand Down Expand Up @@ -1433,6 +1433,8 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
finish_macroname(ps, mark, valid_macroname, macro_name_position)
end
# f(a,b) ==> (call f a b)
# f(a; b=1) ==> (call f a (parameters (b 1)))
# (a=1)() ==> (call (= a 1))
# f (a) ==> (call f (error-t) a b)
bump_disallowed_space(ps)
bump(ps, TRIVIA_FLAG)
Expand All @@ -1457,6 +1459,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
K"]", ps.end_symbol)
# a[i] ==> (ref a i)
# a[i,j] ==> (ref a i j)
# (a=1)[] ==> (ref (= a 1))
# T[x y] ==> (typed_hcat T x y)
# T[x ; y] ==> (typed_vcat T x y)
# T[a b; c d] ==> (typed_vcat T (row a b) (row c d))
Expand Down Expand Up @@ -1562,15 +1565,10 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
this_iter_valid_macroname = true
end
elseif k == K"'" && !preceding_whitespace(t)
if !is_suffixed(t)
# f' ==> (' f)
bump(ps, TRIVIA_FLAG)
emit(ps, mark, k)
else
# f'ᵀ ==> (call 'ᵀ f)
bump(ps)
emit(ps, mark, K"call", INFIX_FLAG)
end
# f' ==> (call-j f ')
# f'ᵀ ==> (call-j f 'ᵀ)
bump(ps)
emit(ps, mark, K"call", POSTFIX_OP_FLAG)
elseif k == K"{"
# Type parameter curlies and macro calls
if is_macrocall
Expand Down
1 change: 0 additions & 1 deletion src/tokenize.jl
Original file line number Diff line number Diff line change
Expand Up @@ -945,7 +945,6 @@ function lex_backslash(l::Lexer)
return emit(l, K"\\")
end

# TODO .op
function lex_dot(l::Lexer)
if accept(l, '.')
if accept(l, '.')
Expand Down
Loading