Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

properly support malformed char literals #44765

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions src/ast.c
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ JL_DLLEXPORT jl_sym_t *jl_acquire_sym;
JL_DLLEXPORT jl_sym_t *jl_release_sym;
JL_DLLEXPORT jl_sym_t *jl_acquire_release_sym;
JL_DLLEXPORT jl_sym_t *jl_sequentially_consistent_sym;
JL_DLLEXPORT jl_sym_t *jl_julia_char_sym;


static const uint8_t flisp_system_image[] = {
Expand Down Expand Up @@ -366,6 +367,7 @@ void jl_init_common_symbols(void)
jl_release_sym = jl_symbol("release");
jl_acquire_release_sym = jl_symbol("acquire_release");
jl_sequentially_consistent_sym = jl_symbol("sequentially_consistent");
jl_julia_char_sym = jl_symbol("julia_char");
}

JL_DLLEXPORT void jl_lisp_prompt(void)
Expand Down Expand Up @@ -575,6 +577,13 @@ static jl_value_t *scm_to_julia_(fl_context_t *fl_ctx, value_t e, jl_module_t *m
ex = scm_to_julia_(fl_ctx, car_(e), mod);
temp = jl_new_struct(jl_quotenode_type, ex);
}
else if (sym == jl_julia_char_sym) {
value_t v = car_(e);
if (!(iscprim(v) && cp_class((cprim_t*)ptr(v)) == fl_ctx->uint32type))
jl_error("malformed julia char");
uint32_t c = *(uint32_t*)cp_data((cprim_t*)ptr(v));
temp = jl_box_char(c);
}
if (temp) {
JL_GC_POP();
return temp;
Expand Down
1 change: 1 addition & 0 deletions src/flisp/flisp.c
Original file line number Diff line number Diff line change
Expand Up @@ -2402,6 +2402,7 @@ static void lisp_init(fl_context_t *fl_ctx, size_t initial_heapsize)
#endif

fl_ctx->jl_sym = symbol(fl_ctx, "julia_value");
fl_ctx->jl_char_sym = symbol(fl_ctx, "julia_char");

fl_ctx->the_empty_vector = tagptr(alloc_words(fl_ctx, 1), TAG_VECTOR);
vector_setsize(fl_ctx->the_empty_vector, 0);
Expand Down
1 change: 1 addition & 0 deletions src/flisp/flisp.h
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,7 @@ struct _fl_context_t {
value_t apply_func, apply_v, apply_e;

value_t jl_sym;
value_t jl_char_sym;
// persistent buffer (avoid repeated malloc/free)
// for julia_extensions.c: normalize
size_t jlbuflen;
Expand Down
25 changes: 25 additions & 0 deletions src/flisp/julia_extensions.c
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,30 @@ value_t fl_string2normsymbol(fl_context_t *fl_ctx, value_t *args, uint32_t nargs
return symbol(fl_ctx, normalize(fl_ctx, (char*)cvalue_data(args[0])));
}

// Return the uint32 representation if the string can be represented as a single Julia `Char`
// object. Otherwise return false. Note that it does allow for overlong chars like 'abcd', as
// long as they don't exceed 4 bytes
value_t fl_string_only_julia_char(fl_context_t *fl_ctx, value_t *args, uint32_t nargs) {
argcount(fl_ctx, "string.only-julia-char", nargs, 1);
if (!fl_isstring(fl_ctx, args[0]))
type_error(fl_ctx, "string.only-julia-char", "string", args[0]);
uint8_t *s = (uint8_t*)cvalue_data(args[0]);
size_t len = cv_len((cvalue_t*)ptr(args[0]));
if (!(0 < len && len <= 4))
return fl_ctx->F;

uint32_t u = (uint32_t)s[0] << 24;
if (len == 1) goto ret;
u |= (uint32_t)s[1] << 16;
if (len == 2) goto ret;
u |= (uint32_t)s[2] << 8;
if (len == 3) goto ret;
u |= (uint32_t)s[3];

ret:
return fl_list2(fl_ctx, fl_ctx->jl_char_sym, mk_uint32(fl_ctx, u));
}

static const builtinspec_t julia_flisp_func_info[] = {
{ "skip-ws", fl_skipws },
{ "accum-julia-symbol", fl_accum_julia_symbol },
Expand All @@ -371,6 +395,7 @@ static const builtinspec_t julia_flisp_func_info[] = {
{ "strip-op-suffix", fl_julia_strip_op_suffix },
{ "underscore-symbol?", fl_julia_underscore_symbolp },
{ "string->normsymbol", fl_string2normsymbol },
{ "string.only-julia-char", fl_string_only_julia_char },
{ NULL, NULL }
};

Expand Down
61 changes: 38 additions & 23 deletions src/julia-parser.scm
Original file line number Diff line number Diff line change
Expand Up @@ -2465,29 +2465,44 @@
(cond ;; char literal
((eq? t '|'|)
(take-token s)
(let ((firstch (read-char (ts:port s))))
(if (and (not (eqv? firstch #\\))
(not (eof-object? firstch))
(eqv? (peek-char (ts:port s)) #\'))
;; easy case: 1 character, no \
(begin (read-char (ts:port s)) firstch)
(let ((b (open-output-string)))
(let loop ((c firstch))
(if (not (eqv? c #\'))
(begin (if (eqv? c #\") ;; issue 14683
(error "invalid character literal"))
(write-char (not-eof-1 c) b)
(if (eqv? c #\\)
(write-char (not-eof-1 (read-char (ts:port s)))
b))
(loop (read-char (ts:port s))))))
(let ((str (unescape-string (io.tostring! b))))
(let ((len (string-length str)))
(if (= len 1)
(string.char str 0)
(if (= len 0)
(error "invalid empty character literal")
(error "character literal contains multiple characters")))))))))
(let ((firstch (read-char (ts:port s)))
(b (open-output-string)))
;; need to account for escape codes. In the case of `\x12` or `\12`, we even
;; allow multiple codes in a single char literal to represent malformed chars
(let loop ((c firstch) (allowed-digits 0) (oct? #f) (first? #t) (only-raw? #t))
(cond
((eof-object? c)
(error "incomplete: invalid character syntax"))
((= c #\')
(and first? (eqv? (peek-char (ts:port s)) #\')
(write-char (read-char (ts:port s)) b)))
((= c #\\)
(let* ((c (not-eof-1 (read-char (ts:port s))))
(only-raw? (and only-raw? (or (= c #\x) (char-oct? c))))
(allowed-digits (case c (#\x 2)
(#\u 4)
(#\U 8)
(else (if (char-oct? c) 2 0)))))
(or first? only-raw?
(error "character literal contains multiple characters"))
(write-char #\\ b)
(write-char c b)
(loop (read-char (ts:port s)) allowed-digits (char-oct? c) #f only-raw?)))
((and (> allowed-digits 0) ((if oct? char-oct? char-hex?) c))
(write-char c b)
(loop (read-char (ts:port s)) (- allowed-digits 1) oct? #f only-raw?))
;; only allow one char if it's not an escape code
(first?
(if (= c #\") (write-char #\\ b)) ;; need to escape double quote
(write-char c b)
(loop (read-char (ts:port s)) 0 #f #f #f))
(else (error "character literal contains multiple characters"))))
(let* ((str (unescape-string (io.tostring! b)))
(c (string.only-julia-char str)))
(or c
(if (= (string-length str) 0)
(error "invalid empty character literal")
(error "character literal contains multiple characters"))))))

;; symbol/expression quote
((eq? t ':)
Expand Down
1 change: 1 addition & 0 deletions src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -1476,6 +1476,7 @@ extern JL_DLLEXPORT jl_sym_t *jl_acquire_sym;
extern JL_DLLEXPORT jl_sym_t *jl_release_sym;
extern JL_DLLEXPORT jl_sym_t *jl_acquire_release_sym;
extern JL_DLLEXPORT jl_sym_t *jl_sequentially_consistent_sym;
extern JL_DLLEXPORT jl_sym_t *jl_julia_char_sym;

JL_DLLEXPORT enum jl_memory_order jl_get_atomic_order(jl_sym_t *order, char loading, char storing);
JL_DLLEXPORT enum jl_memory_order jl_get_atomic_order_checked(jl_sym_t *order, char loading, char storing);
Expand Down
54 changes: 35 additions & 19 deletions test/syntax.jl
Original file line number Diff line number Diff line change
Expand Up @@ -276,9 +276,6 @@ end
@test Meta.parse("'\"'") == Meta.parse("'\\\"'") == '"' == "\""[1] == '\42'
simeonschaub marked this conversation as resolved.
Show resolved Hide resolved

# issue #24558
@test_throws ParseError Meta.parse("'\\xff'")
@test_throws ParseError Meta.parse("'\\x80'")
@test_throws ParseError Meta.parse("'ab'")
@test '\u2200' == "\u2200"[1]

@test_throws ParseError Meta.parse("f(2x for x=1:10, y")
Expand Down Expand Up @@ -317,19 +314,16 @@ let p = 15
@test 2p+1 == 31 # not a hex float literal
end

function test_parseerror(str, msg)
try
Meta.parse(str)
@test false
catch e
@test isa(e,ParseError) && e.msg == msg
end
macro test_parseerror(str, msg)
ex = :(@test_throws ParseError($(esc(msg))) Meta.parse($(esc(str))))
ex.args[2] = __source__
return ex
end
test_parseerror("0x", "invalid numeric constant \"0x\"")
test_parseerror("0b", "invalid numeric constant \"0b\"")
test_parseerror("0o", "invalid numeric constant \"0o\"")
test_parseerror("0x0.1", "hex float literal must contain \"p\" or \"P\"")
test_parseerror("0x1.0p", "invalid numeric constant \"0x1.0\"")
@test_parseerror("0x", "invalid numeric constant \"0x\"")
@test_parseerror("0b", "invalid numeric constant \"0b\"")
@test_parseerror("0o", "invalid numeric constant \"0o\"")
@test_parseerror("0x0.1", "hex float literal must contain \"p\" or \"P\"")
@test_parseerror("0x1.0p", "invalid numeric constant \"0x1.0\"")

# issue #15798
@test Meta.lower(Main, Base.parse_input_line("""
Expand All @@ -345,8 +339,8 @@ test_parseerror("0x1.0p", "invalid numeric constant \"0x1.0\"")
""")::Expr) == 23341

# issue #15763
test_parseerror("if\nfalse\nend", "missing condition in \"if\" at none:1")
test_parseerror("if false\nelseif\nend", "missing condition in \"elseif\" at none:2")
@test_parseerror("if\nfalse\nend", "missing condition in \"if\" at none:1")
@test_parseerror("if false\nelseif\nend", "missing condition in \"elseif\" at none:2")

# issue #15828
@test Meta.lower(Main, Meta.parse("x...")) == Expr(:error, "\"...\" expression outside call")
Expand Down Expand Up @@ -2054,8 +2048,8 @@ end == 1
# issue #29982
@test Meta.parse("'a'") == 'a'
@test Meta.parse("'\U0061'") == 'a'
test_parseerror("''", "invalid empty character literal")
test_parseerror("'abc'", "character literal contains multiple characters")
@test_parseerror("''", "invalid empty character literal")
@test_parseerror("'abc'", "character literal contains multiple characters")

# optional soft scope: #28789, #33864

Expand Down Expand Up @@ -3280,3 +3274,25 @@ end
# issue 44723
demo44723()::Any = Base.Experimental.@opaque () -> true ? 1 : 2
@test demo44723()() == 1

@testset "issue 25072" begin
@test '\xc0\x80' == reinterpret(Char, 0xc0800000)
@test '\x80' == reinterpret(Char, 0x80000000)
@test '\xff' == reinterpret(Char, 0xff000000)
@test '\xff\xff\xff\xff' == reinterpret(Char, 0xffffffff)
@test '\uffff' == Char(0xffff)
@test '\U00002014' == Char(0x2014)
@test '\100' == reinterpret(Char, UInt32(0o100) << 24)
@test '\100\42' == reinterpret(Char, (UInt32(0o100) << 24) | (UInt32(0o42) << 16))
@test_parseerror "''" "invalid empty character literal"
@test_parseerror "'\\xff\\xff\\xff\\xff\\xff'" "character literal contains multiple characters"
@test_parseerror "'abcd'" "character literal contains multiple characters"
@test_parseerror "'\\uff\\xff'" "character literal contains multiple characters"
@test_parseerror "'\\xff\\uff'" "character literal contains multiple characters"
@test_parseerror "'\\xffa'" "character literal contains multiple characters"
@test_parseerror "'\\uffffa'" "character literal contains multiple characters"
@test_parseerror "'\\U00002014a'" "character literal contains multiple characters"
@test_parseerror "'\\1000'" "character literal contains multiple characters"
@test Meta.isexpr(Meta.parse("'a"), :incomplete)
@test ''' == "'"[1]
end