Skip to content

Commit

Permalink
Replace K"true" and K"false" with K"Bool" (#488)
Browse files Browse the repository at this point in the history
Use a single `K"Bool"` Kind for booleans. This is both more convenient
and more consistent with other literal kinds such as K"Integer" which
group all integers under a single kind.

Replace the use of the invisible `K"false"` token in catch blocks with a
new kind `K"Placeholder"` - this removes the last of the invisible
tokens, other than error tokens! K"Placeholder" will also be really
useful for JuliaLowering as a kind for all-underscore identifiers.
  • Loading branch information
c42f authored Aug 9, 2024
1 parent abf099e commit b92fc5e
Show file tree
Hide file tree
Showing 8 changed files with 50 additions and 34 deletions.
4 changes: 4 additions & 0 deletions src/expr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,10 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads,
args = Any[args[1], a2a...]
end
end
elseif k == K"catch"
if kind(childheads[1]) == K"Placeholder"
args[1] = false
end
elseif k == K"try"
# Try children in source order:
# try_block catch_var catch_block else_block finally_block
Expand Down
9 changes: 6 additions & 3 deletions src/kinds.jl
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ register_kinds!(JuliaSyntax, 0, [
# Identifiers
"BEGIN_IDENTIFIERS"
"Identifier"
"Placeholder" # Used for empty catch variables, and all-underscore identifiers in lowering
# Macro names are modelled as special kinds of identifiers because the full
# macro name may not appear as characters in the source: The `@` may be
# detached from the macro name as in `@A.x` (ugh!!), or have a _str or _cmd
Expand Down Expand Up @@ -253,6 +254,7 @@ register_kinds!(JuliaSyntax, 0, [
"END_KEYWORDS"

"BEGIN_LITERAL"
"Bool"
"Integer"
"BinInt"
"HexInt"
Expand All @@ -262,8 +264,6 @@ register_kinds!(JuliaSyntax, 0, [
"String"
"Char"
"CmdString"
"true"
"false"
"END_LITERAL"

"BEGIN_DELIMITERS"
Expand Down Expand Up @@ -1067,7 +1067,7 @@ register_kinds!(JuliaSyntax, 0, [

# Special tokens
"TOMBSTONE" # Empty placeholder for kind to be filled later
"None" # Placeholder; never emitted by lexer
"None" # Never emitted by lexer/parser
"EndMarker" # EOF

"BEGIN_ERRORS"
Expand Down Expand Up @@ -1097,6 +1097,7 @@ const _nonunique_kind_names = Set([
K"Whitespace"
K"NewlineWs"
K"Identifier"
K"Placeholder"

K"ErrorEofMultiComment"
K"ErrorInvalidNumericConstant"
Expand Down Expand Up @@ -1169,6 +1170,7 @@ const _token_error_descriptions = Dict{Kind, String}(

#-------------------------------------------------------------------------------
# Predicates
is_identifier(k::Kind) = K"BEGIN_IDENTIFIERS" <= k <= K"END_IDENTIFIERS"
is_contextual_keyword(k::Kind) = K"BEGIN_CONTEXTUAL_KEYWORDS" <= k <= K"END_CONTEXTUAL_KEYWORDS"
is_error(k::Kind) = K"BEGIN_ERRORS" <= k <= K"END_ERRORS" || k == K"ErrorInvalidOperator" || k == K"Error**"
is_keyword(k::Kind) = K"BEGIN_KEYWORDS" <= k <= K"END_KEYWORDS"
Expand All @@ -1177,6 +1179,7 @@ is_literal(k::Kind) = K"BEGIN_LITERAL" <= k <= K"END_LITERAL"
is_operator(k::Kind) = K"BEGIN_OPS" <= k <= K"END_OPS"
is_word_operator(k::Kind) = (k == K"in" || k == K"isa" || k == K"where")

is_identifier(k) = is_identifier(kind(k))
is_contextual_keyword(k) = is_contextual_keyword(kind(k))
is_error(k) = is_error(kind(k))
is_keyword(k) = is_keyword(kind(k))
Expand Down
8 changes: 3 additions & 5 deletions src/literal_parsing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -406,10 +406,8 @@ function parse_julia_literal(txtbuf::Vector{UInt8}, head::SyntaxHead, srcrange)
last(srcrange)+1, Diagnostic[])
end
return had_error ? ErrorVal() : String(take!(io))
elseif k == K"true"
return true
elseif k == K"false"
return false
elseif k == K"Bool"
return txtbuf[first(srcrange)] == u8"t"
end

# TODO: Avoid allocating temporary String here
Expand All @@ -418,7 +416,7 @@ function parse_julia_literal(txtbuf::Vector{UInt8}, head::SyntaxHead, srcrange)
parse_int_literal(val_str)
elseif k in KSet"BinInt OctInt HexInt"
parse_uint_literal(val_str, k)
elseif k == K"Identifier"
elseif k == K"Identifier" || k == K"Placeholder"
if has_flags(head, RAW_STRING_FLAG)
io = IOBuffer()
unescape_raw_string(io, txtbuf, first(srcrange), last(srcrange)+1, false)
Expand Down
12 changes: 6 additions & 6 deletions src/parser.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2254,15 +2254,15 @@ function parse_try(ps)
if peek(ps) == K"else"
# catch-else syntax: https://github.com/JuliaLang/julia/pull/42211
#
#v1.8: try catch ; else end ==> (try (block) (catch false (block)) (else (block)))
#v1.8: try catch ; else end ==> (try (block) (catch (block)) (else (block)))
else_mark = position(ps)
bump(ps, TRIVIA_FLAG)
parse_block(ps)
if !has_catch
#v1.8: try else x finally y end ==> (try (block) (else (error (block x))) (finally (block y)))
emit(ps, else_mark, K"error", error="Expected `catch` before `else`")
end
#v1.7: try catch ; else end ==> (try (block) (catch false (block)) (else (error (block))))
#v1.7: try catch ; else end ==> (try (block) (catch (block)) (else (error (block))))
min_supported_version(v"1.8", ps, else_mark, "`else` after `catch`")
emit(ps, else_mark, K"else")
end
Expand Down Expand Up @@ -2302,10 +2302,10 @@ function parse_catch(ps::ParseState)
bump(ps, TRIVIA_FLAG)
k = peek(ps)
if k in KSet"NewlineWs ;" || is_closing_token(ps, k)
# try x catch end ==> (try (block x) (catch false (block)))
# try x catch ; y end ==> (try (block x) (catch false (block y)))
# try x catch \n y end ==> (try (block x) (catch false (block y)))
bump_invisible(ps, K"false")
# try x catch end ==> (try (block x) (catch (block)))
# try x catch ; y end ==> (try (block x) (catch (block y)))
# try x catch \n y end ==> (try (block x) (catch (block y)))
bump_invisible(ps, K"Placeholder")
else
# try x catch e y end ==> (try (block x) (catch e (block y)))
# try x catch $e y end ==> (try (block x) (catch ($ e) (block y)))
Expand Down
19 changes: 15 additions & 4 deletions src/syntax_tree.jl
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,19 @@ byte_range(node::AbstractSyntaxNode) = node.position:(node.position + span(node)

sourcefile(node::AbstractSyntaxNode) = node.source

function leaf_string(ex)
if !is_leaf(ex)
throw(ArgumentError("_value_string should be used for leaf nodes only"))
end
k = kind(ex)
value = ex.val
# TODO: Dispatch on kind extension module (??)
return k == K"Placeholder" ? "□"*string(value) :
is_identifier(k) ? string(value) :
value isa Symbol ? string(value) : # see parse_julia_literal for other cases which go here
repr(value)
end

function _show_syntax_node(io, current_filename, node::AbstractSyntaxNode,
indent, show_byte_offsets)
fname = filename(node)
Expand All @@ -134,8 +147,7 @@ function _show_syntax_node(io, current_filename, node::AbstractSyntaxNode,
posstr *= "$(lpad(first_byte(node),6)):$(rpad(last_byte(node),6))│"
end
val = node.val
nodestr = !is_leaf(node) ? "[$(untokenize(head(node)))]" :
isa(val, Symbol) ? string(val) : repr(val)
nodestr = is_leaf(node) ? leaf_string(node) : "[$(untokenize(head(node)))]"
treestr = string(indent, nodestr)
# Add filename if it's changed from the previous node
if fname != current_filename[]
Expand All @@ -157,8 +169,7 @@ function _show_syntax_node_sexpr(io, node::AbstractSyntaxNode)
if is_error(node)
print(io, "(", untokenize(head(node)), ")")
else
val = node.val
print(io, val isa Symbol ? string(val) : repr(val))
print(io, leaf_string(node))
end
else
print(io, "(", untokenize(head(node)))
Expand Down
10 changes: 6 additions & 4 deletions src/tokenize.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1319,8 +1319,10 @@ function lex_identifier(l::Lexer, c)

if n > MAX_KW_LENGTH
emit(l, K"Identifier")
elseif h == _true_hash || h == _false_hash
emit(l, K"Bool")
else
emit(l, get(kw_hash, h, K"Identifier"))
emit(l, get(_kw_hash, h, K"Identifier"))
end
end

Expand Down Expand Up @@ -1374,8 +1376,6 @@ K"while",
K"in",
K"isa",
K"where",
K"true",
K"false",

K"abstract",
K"as",
Expand All @@ -1387,6 +1387,8 @@ K"type",
K"var",
]

const kw_hash = Dict(simple_hash(lowercase(string(kw))) => kw for kw in kws)
const _true_hash = simple_hash("true")
const _false_hash = simple_hash("false")
const _kw_hash = Dict(simple_hash(lowercase(string(kw))) => kw for kw in kws)

end # module
10 changes: 5 additions & 5 deletions test/parser.jl
Original file line number Diff line number Diff line change
Expand Up @@ -635,18 +635,18 @@ tests = [
"(try (block x) (catch e (block y)) (finally (block z)))"
((v=v"1.8",), "try \n x \n catch e \n y \n else z finally \n w end") =>
"(try (block x) (catch e (block y)) (else (block z)) (finally (block w)))"
"try x catch end" => "(try (block x) (catch false (block)))"
"try x catch ; y end" => "(try (block x) (catch false (block y)))"
"try x catch \n y end" => "(try (block x) (catch false (block y)))"
"try x catch end" => "(try (block x) (catch (block)))"
"try x catch ; y end" => "(try (block x) (catch (block y)))"
"try x catch \n y end" => "(try (block x) (catch (block y)))"
"try x catch e y end" => "(try (block x) (catch e (block y)))"
"try x catch \$e y end" => "(try (block x) (catch (\$ e) (block y)))"
"try x catch var\"#\" y end" => "(try (block x) (catch (var #) (block y)))"
"try x catch e+3 y end" => "(try (block x) (catch (error (call-i e + 3)) (block y)))"
"try x finally y end" => "(try (block x) (finally (block y)))"
# v1.8 only
((v=v"1.8",), "try catch ; else end") => "(try (block) (catch false (block)) (else (block)))"
((v=v"1.8",), "try catch ; else end") => "(try (block) (catch (block)) (else (block)))"
((v=v"1.8",), "try else x finally y end") => "(try (block) (else (error (block x))) (finally (block y)))"
((v=v"1.7",), "try catch ; else end") => "(try (block) (catch false (block)) (else (error (block))))"
((v=v"1.7",), "try catch ; else end") => "(try (block) (catch (block)) (else (error (block))))"
# finally before catch :-(
"try x finally y catch e z end" => "(try (block x) (finally (block y)) (catch e (block z)))"
"try x end" => "(try (block x) (error-t))"
Expand Down
12 changes: 5 additions & 7 deletions test/tokenize.jl
Original file line number Diff line number Diff line change
Expand Up @@ -198,9 +198,10 @@ end
end

@testset "tokenizing true/false literals" begin
@test tok("somtext true", 3).kind == K"true"
@test tok("somtext false", 3).kind == K"false"
@test tok("somtext true", 3).kind == K"Bool"
@test tok("somtext false", 3).kind == K"Bool"
@test tok("somtext tr", 3).kind == K"Identifier"
@test tok("somtext truething", 3).kind == K"Identifier"
@test tok("somtext falsething", 3).kind == K"Identifier"
end

Expand Down Expand Up @@ -962,9 +963,6 @@ const all_kws = Set([
"primitive",
"type",
"var",
# Literals
"true",
"false",
# Word-like operators
"in",
"isa",
Expand All @@ -974,14 +972,14 @@ const all_kws = Set([
function check_kw_hashes(iter)
for cs in iter
str = String([cs...])
if Tokenize.simple_hash(str) in keys(Tokenize.kw_hash)
if Tokenize.simple_hash(str) in keys(Tokenize._kw_hash)
@test str in all_kws
end
end
end

@testset "simple_hash" begin
@test length(all_kws) == length(Tokenize.kw_hash)
@test length(all_kws) == length(Tokenize._kw_hash)

@testset "Length $len keywords" for len in 1:5
check_kw_hashes(String([cs...]) for cs in Iterators.product(['a':'z' for _ in 1:len]...))
Expand Down

0 comments on commit b92fc5e

Please sign in to comment.