Skip to content

Commit

Permalink
Fix crashes due to lexing ambiguity of string delimiters (#394)
Browse files Browse the repository at this point in the history
There are some lexing ambituities in primes vs cmd delimiters. We break
these with a simple rule in the lexer but there's edge cases of invalid
or extremely strange syntax where this can be inconsistent with the
parser. The following were some such cases which caused an assertion
error in the parser.

    "var\"#\"``\$"
    "x in'``\$"

This change avoids crashing in those cases, emitting an error instead.

See also #25
  • Loading branch information
c42f authored Nov 23, 2023
1 parent 5769755 commit acb609d
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 7 deletions.
25 changes: 20 additions & 5 deletions src/parser.jl
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,10 @@ function is_both_unary_and_binary(t)
)
end

function is_string_macro_suffix(k)
k == K"Identifier" || is_keyword(k) || is_word_operator(k) || is_number(k)
end

# flisp: invalid-identifier?
function is_valid_identifier(k)
k = kind(k)
Expand Down Expand Up @@ -1707,7 +1711,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
parse_string(ps, true)
t = peek_token(ps)
k = kind(t)
if !preceding_whitespace(t) && (k == K"Identifier" || is_keyword(k) || is_word_operator(k) || is_number(k))
if !preceding_whitespace(t) && is_string_macro_suffix(k)
# Macro sufficies can include keywords and numbers
# x"s"y ==> (macrocall @x_str (string-r "s") "y")
# x"s"end ==> (macrocall @x_str (string-r "s") "end")
Expand Down Expand Up @@ -2344,7 +2348,7 @@ function parse_macro_name(ps::ParseState)
# @! x ==> (macrocall @! x)
# @.. x ==> (macrocall @.. x)
# @$ x ==> (macrocall @$ x)
# @var"#" x ==> (macrocall (var #) @$ x)
# @var"#" x ==> (macrocall (var @#) x)
bump_disallowed_space(ps)
mark = position(ps)
parse_atom(ps, false)
Expand Down Expand Up @@ -3182,7 +3186,13 @@ function parse_string(ps::ParseState, raw::Bool)
t = peek_full_token(ps)
k = kind(t)
if k == K"$"
@assert !raw # The lexer detects raw strings separately
if raw
# FIXME: This case is actually a tokenization error:
# The `K"$"` token should not occur when a raw string
# is being parsed, but this would require the lexer to know
# about the parse state. (see also parse_atom)
break
end
if prev_chunk_newline
# """\n$x\n a""" ==> (string-s x "\n" " a")
indent_ref_i = first_byte(t)
Expand Down Expand Up @@ -3526,11 +3536,16 @@ function parse_atom(ps::ParseState, check_identifiers=true)
# var"x"+ ==> x
# var"x") ==> x
# var"x"( ==> x
else
elseif is_string_macro_suffix(k)
# var"x"end ==> (var x (error-t))
# var"x"1 ==> (var x (error-t))
# var"x"y ==> (var x (error-t))
bump(ps, TRIVIA_FLAG, error="suffix not allowed after var\"...\" syntax")
bump(ps, TRIVIA_FLAG, error="suffix not allowed after `var\"...\"` syntax")
elseif k == K"`" || k == K"\"" || k == K"\"\"\"" || k == K"```"
# Disallow `var"#""str". To allow this we'd need to fix `raw`
# detection in lex_quote to be consistent with the parser.
bump_invisible(ps, K"error", TRIVIA_FLAG,
error="`var\"...\"` syntax not supported as string macro name")
end
emit(ps, mark, K"var")
elseif check_identifiers && is_closing_token(ps, leading_kind)
Expand Down
8 changes: 6 additions & 2 deletions test/parser.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1012,9 +1012,13 @@ parsestmt_test_specs = [
"(abstract\ntype X end)" => "(wrapper (parens abstract (error-t type X)) (error-t end ✘))"
"(mutable\nstruct X end)" => "(wrapper (parens mutable (error-t struct X)) (error-t end ✘))"

# The following is currently broken but at least the parser shouldn't
# crash.
# Lexer vs parser: issues detecting which tokens are string delimiters and
# detecting raw vs non-raw strings. The old parser was tightly coupled to
# the lexer and the parser state was used to disambiguate these cases.
"x in' '" => "(call-i x in (char (error)))"
"x in'``\$" => "(call-i x in (call-i (juxtapose (char '`' (error-t)) (macrocall core_@cmd (cmdstring-r (error-t)))) \$ (error)))"
"var\"#\"`str`" => "(juxtapose (var # (error-t)) (macrocall core_@cmd (cmdstring-r \"str\")))"
"var\"#\"\"str\"" => "(juxtapose (var # (error-t)) (error-t) (string \"str\"))"
]

@testset "Parser does not crash on broken code" begin
Expand Down

0 comments on commit acb609d

Please sign in to comment.