Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace K"true" and K"false" with K"Bool" #488

Merged
merged 1 commit into from
Aug 9, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Replace K"true" and K"false" with K"Bool"
Use a single `K"Bool"` Kind for booleans. This is both more convenient
and more consistent with other literal kinds such as K"Integer" which
group all integers under a single kind.

Replace the use of the invisible `K"false"` token in catch blocks with a
new kind `K"Placeholder"` - this removes the last of the invisible
tokens, other than error tokens! K"Placeholder" will also be really
useful for JuliaLowering as a kind for all-underscore identifiers.
  • Loading branch information
c42f committed Aug 9, 2024
commit 9ac2353c8c651368f950fa997b659195e0e3db39
4 changes: 4 additions & 0 deletions src/expr.jl
Original file line number Diff line number Diff line change
@@ -326,6 +326,10 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads,
args = Any[args[1], a2a...]
end
end
elseif k == K"catch"
if kind(childheads[1]) == K"Placeholder"
args[1] = false
end
elseif k == K"try"
# Try children in source order:
# try_block catch_var catch_block else_block finally_block
9 changes: 6 additions & 3 deletions src/kinds.jl
Original file line number Diff line number Diff line change
@@ -197,6 +197,7 @@ register_kinds!(JuliaSyntax, 0, [
# Identifiers
"BEGIN_IDENTIFIERS"
"Identifier"
"Placeholder" # Used for empty catch variables, and all-underscore identifiers in lowering
# Macro names are modelled as special kinds of identifiers because the full
# macro name may not appear as characters in the source: The `@` may be
# detached from the macro name as in `@A.x` (ugh!!), or have a _str or _cmd
@@ -253,6 +254,7 @@ register_kinds!(JuliaSyntax, 0, [
"END_KEYWORDS"

"BEGIN_LITERAL"
"Bool"
"Integer"
"BinInt"
"HexInt"
@@ -262,8 +264,6 @@ register_kinds!(JuliaSyntax, 0, [
"String"
"Char"
"CmdString"
"true"
"false"
"END_LITERAL"

"BEGIN_DELIMITERS"
@@ -1067,7 +1067,7 @@ register_kinds!(JuliaSyntax, 0, [

# Special tokens
"TOMBSTONE" # Empty placeholder for kind to be filled later
"None" # Placeholder; never emitted by lexer
"None" # Never emitted by lexer/parser
"EndMarker" # EOF

"BEGIN_ERRORS"
@@ -1097,6 +1097,7 @@ const _nonunique_kind_names = Set([
K"Whitespace"
K"NewlineWs"
K"Identifier"
K"Placeholder"

K"ErrorEofMultiComment"
K"ErrorInvalidNumericConstant"
@@ -1169,6 +1170,7 @@ const _token_error_descriptions = Dict{Kind, String}(

#-------------------------------------------------------------------------------
# Predicates
is_identifier(k::Kind) = K"BEGIN_IDENTIFIERS" <= k <= K"END_IDENTIFIERS"
is_contextual_keyword(k::Kind) = K"BEGIN_CONTEXTUAL_KEYWORDS" <= k <= K"END_CONTEXTUAL_KEYWORDS"
is_error(k::Kind) = K"BEGIN_ERRORS" <= k <= K"END_ERRORS" || k == K"ErrorInvalidOperator" || k == K"Error**"
is_keyword(k::Kind) = K"BEGIN_KEYWORDS" <= k <= K"END_KEYWORDS"
@@ -1177,6 +1179,7 @@ is_literal(k::Kind) = K"BEGIN_LITERAL" <= k <= K"END_LITERAL"
is_operator(k::Kind) = K"BEGIN_OPS" <= k <= K"END_OPS"
is_word_operator(k::Kind) = (k == K"in" || k == K"isa" || k == K"where")

is_identifier(k) = is_identifier(kind(k))
is_contextual_keyword(k) = is_contextual_keyword(kind(k))
is_error(k) = is_error(kind(k))
is_keyword(k) = is_keyword(kind(k))
8 changes: 3 additions & 5 deletions src/literal_parsing.jl
Original file line number Diff line number Diff line change
@@ -406,10 +406,8 @@ function parse_julia_literal(txtbuf::Vector{UInt8}, head::SyntaxHead, srcrange)
last(srcrange)+1, Diagnostic[])
end
return had_error ? ErrorVal() : String(take!(io))
elseif k == K"true"
return true
elseif k == K"false"
return false
elseif k == K"Bool"
return txtbuf[first(srcrange)] == u8"t"
end

# TODO: Avoid allocating temporary String here
@@ -418,7 +416,7 @@ function parse_julia_literal(txtbuf::Vector{UInt8}, head::SyntaxHead, srcrange)
parse_int_literal(val_str)
elseif k in KSet"BinInt OctInt HexInt"
parse_uint_literal(val_str, k)
elseif k == K"Identifier"
elseif k == K"Identifier" || k == K"Placeholder"
if has_flags(head, RAW_STRING_FLAG)
io = IOBuffer()
unescape_raw_string(io, txtbuf, first(srcrange), last(srcrange)+1, false)
12 changes: 6 additions & 6 deletions src/parser.jl
Original file line number Diff line number Diff line change
@@ -2254,15 +2254,15 @@ function parse_try(ps)
if peek(ps) == K"else"
# catch-else syntax: https://github.com/JuliaLang/julia/pull/42211
#
#v1.8: try catch ; else end ==> (try (block) (catch false (block)) (else (block)))
#v1.8: try catch ; else end ==> (try (block) (catch (block)) (else (block)))
else_mark = position(ps)
bump(ps, TRIVIA_FLAG)
parse_block(ps)
if !has_catch
#v1.8: try else x finally y end ==> (try (block) (else (error (block x))) (finally (block y)))
emit(ps, else_mark, K"error", error="Expected `catch` before `else`")
end
#v1.7: try catch ; else end ==> (try (block) (catch false (block)) (else (error (block))))
#v1.7: try catch ; else end ==> (try (block) (catch (block)) (else (error (block))))
min_supported_version(v"1.8", ps, else_mark, "`else` after `catch`")
emit(ps, else_mark, K"else")
end
@@ -2302,10 +2302,10 @@ function parse_catch(ps::ParseState)
bump(ps, TRIVIA_FLAG)
k = peek(ps)
if k in KSet"NewlineWs ;" || is_closing_token(ps, k)
# try x catch end ==> (try (block x) (catch false (block)))
# try x catch ; y end ==> (try (block x) (catch false (block y)))
# try x catch \n y end ==> (try (block x) (catch false (block y)))
bump_invisible(ps, K"false")
# try x catch end ==> (try (block x) (catch (block)))
# try x catch ; y end ==> (try (block x) (catch (block y)))
# try x catch \n y end ==> (try (block x) (catch (block y)))
bump_invisible(ps, K"Placeholder")
else
# try x catch e y end ==> (try (block x) (catch e (block y)))
# try x catch $e y end ==> (try (block x) (catch ($ e) (block y)))
19 changes: 15 additions & 4 deletions src/syntax_tree.jl
Original file line number Diff line number Diff line change
@@ -125,6 +125,19 @@ byte_range(node::AbstractSyntaxNode) = node.position:(node.position + span(node)

sourcefile(node::AbstractSyntaxNode) = node.source

function leaf_string(ex)
if !is_leaf(ex)
throw(ArgumentError("_value_string should be used for leaf nodes only"))
end
k = kind(ex)
value = ex.val
# TODO: Dispatch on kind extension module (??)
return k == K"Placeholder" ? "□"*string(value) :
is_identifier(k) ? string(value) :
value isa Symbol ? string(value) : # see parse_julia_literal for other cases which go here
repr(value)
end

function _show_syntax_node(io, current_filename, node::AbstractSyntaxNode,
indent, show_byte_offsets)
fname = filename(node)
@@ -134,8 +147,7 @@ function _show_syntax_node(io, current_filename, node::AbstractSyntaxNode,
posstr *= "$(lpad(first_byte(node),6)):$(rpad(last_byte(node),6))│"
end
val = node.val
nodestr = !is_leaf(node) ? "[$(untokenize(head(node)))]" :
isa(val, Symbol) ? string(val) : repr(val)
nodestr = is_leaf(node) ? leaf_string(node) : "[$(untokenize(head(node)))]"
treestr = string(indent, nodestr)
# Add filename if it's changed from the previous node
if fname != current_filename[]
@@ -157,8 +169,7 @@ function _show_syntax_node_sexpr(io, node::AbstractSyntaxNode)
if is_error(node)
print(io, "(", untokenize(head(node)), ")")
else
val = node.val
print(io, val isa Symbol ? string(val) : repr(val))
print(io, leaf_string(node))
end
else
print(io, "(", untokenize(head(node)))
10 changes: 6 additions & 4 deletions src/tokenize.jl
Original file line number Diff line number Diff line change
@@ -1319,8 +1319,10 @@ function lex_identifier(l::Lexer, c)

if n > MAX_KW_LENGTH
emit(l, K"Identifier")
elseif h == _true_hash || h == _false_hash
emit(l, K"Bool")
else
emit(l, get(kw_hash, h, K"Identifier"))
emit(l, get(_kw_hash, h, K"Identifier"))
end
end

@@ -1374,8 +1376,6 @@ K"while",
K"in",
K"isa",
K"where",
K"true",
K"false",

K"abstract",
K"as",
@@ -1387,6 +1387,8 @@ K"type",
K"var",
]

const kw_hash = Dict(simple_hash(lowercase(string(kw))) => kw for kw in kws)
const _true_hash = simple_hash("true")
const _false_hash = simple_hash("false")
const _kw_hash = Dict(simple_hash(lowercase(string(kw))) => kw for kw in kws)

end # module
10 changes: 5 additions & 5 deletions test/parser.jl
Original file line number Diff line number Diff line change
@@ -635,18 +635,18 @@ tests = [
"(try (block x) (catch e (block y)) (finally (block z)))"
((v=v"1.8",), "try \n x \n catch e \n y \n else z finally \n w end") =>
"(try (block x) (catch e (block y)) (else (block z)) (finally (block w)))"
"try x catch end" => "(try (block x) (catch false (block)))"
"try x catch ; y end" => "(try (block x) (catch false (block y)))"
"try x catch \n y end" => "(try (block x) (catch false (block y)))"
"try x catch end" => "(try (block x) (catch (block)))"
"try x catch ; y end" => "(try (block x) (catch (block y)))"
"try x catch \n y end" => "(try (block x) (catch (block y)))"
"try x catch e y end" => "(try (block x) (catch e (block y)))"
"try x catch \$e y end" => "(try (block x) (catch (\$ e) (block y)))"
"try x catch var\"#\" y end" => "(try (block x) (catch (var #) (block y)))"
"try x catch e+3 y end" => "(try (block x) (catch (error (call-i e + 3)) (block y)))"
"try x finally y end" => "(try (block x) (finally (block y)))"
# v1.8 only
((v=v"1.8",), "try catch ; else end") => "(try (block) (catch false (block)) (else (block)))"
((v=v"1.8",), "try catch ; else end") => "(try (block) (catch (block)) (else (block)))"
((v=v"1.8",), "try else x finally y end") => "(try (block) (else (error (block x))) (finally (block y)))"
((v=v"1.7",), "try catch ; else end") => "(try (block) (catch false (block)) (else (error (block))))"
((v=v"1.7",), "try catch ; else end") => "(try (block) (catch (block)) (else (error (block))))"
# finally before catch :-(
"try x finally y catch e z end" => "(try (block x) (finally (block y)) (catch e (block z)))"
"try x end" => "(try (block x) (error-t))"
12 changes: 5 additions & 7 deletions test/tokenize.jl
Original file line number Diff line number Diff line change
@@ -198,9 +198,10 @@ end
end

@testset "tokenizing true/false literals" begin
@test tok("somtext true", 3).kind == K"true"
@test tok("somtext false", 3).kind == K"false"
@test tok("somtext true", 3).kind == K"Bool"
@test tok("somtext false", 3).kind == K"Bool"
@test tok("somtext tr", 3).kind == K"Identifier"
@test tok("somtext truething", 3).kind == K"Identifier"
@test tok("somtext falsething", 3).kind == K"Identifier"
end

@@ -962,9 +963,6 @@ const all_kws = Set([
"primitive",
"type",
"var",
# Literals
"true",
"false",
# Word-like operators
"in",
"isa",
@@ -974,14 +972,14 @@ const all_kws = Set([
function check_kw_hashes(iter)
for cs in iter
str = String([cs...])
if Tokenize.simple_hash(str) in keys(Tokenize.kw_hash)
if Tokenize.simple_hash(str) in keys(Tokenize._kw_hash)
@test str in all_kws
end
end
end

@testset "simple_hash" begin
@test length(all_kws) == length(Tokenize.kw_hash)
@test length(all_kws) == length(Tokenize._kw_hash)

@testset "Length $len keywords" for len in 1:5
check_kw_hashes(String([cs...]) for cs in Iterators.product(['a':'z' for _ in 1:len]...))
Loading