From 034c5dfce132c0b375a8503277d482db7900f0d2 Mon Sep 17 00:00:00 2001 From: Jeff Bezanson Date: Thu, 11 Nov 2021 18:03:23 -0500 Subject: [PATCH] disallow unbalanced bidirectional formatting in strings and comments (#42918) (cherry picked from commit 2cfebad7a9e6cf48d036c7a4893c845431707834) --- src/julia-parser.scm | 102 ++++++++++++++++++++++++++----------------- test/syntax.jl | 18 ++++++++ 2 files changed, 79 insertions(+), 41 deletions(-) diff --git a/src/julia-parser.scm b/src/julia-parser.scm index dbd9c35ff4ff4a..4c4d0a47fd42ec 100644 --- a/src/julia-parser.scm +++ b/src/julia-parser.scm @@ -219,13 +219,6 @@ (define (newline? c) (eqv? c #\newline)) -(define (skip-to-eol port) - (let ((c (peek-char port))) - (cond ((eof-object? c) c) - ((eqv? c #\newline) c) - (else (read-char port) - (skip-to-eol port))))) - (define (op-or-sufchar? c) (or (op-suffix-char? c) (opchar? c))) (define (read-operator port c0 (postfix? #f)) @@ -486,33 +479,56 @@ (pair? (cadr t)) (eq? (car (cadr t)) 'core) (memq (cadadr t) '(@int128_str @uint128_str @big_str)))) +(define (make-bidi-state) '(0 . 0)) + +(define (update-bidi-state st c) + (case c + ((#\U202A #\U202B #\U202D #\U202E) (cons (+ (car st) 1) (cdr st))) ;; LRE RLE LRO RLO + ((#\U2066 #\U2067 #\U2068) (cons (car st) (+ (cdr st) 1))) ;; LRI RLI FSI + ((#\U202C) (cons (- (car st) 1) (cdr st))) ;; PDF + ((#\U2069) (cons (car st) (- (cdr st) 1))) ;; PDI + ((#\newline) '(0 . 0)) + (else st))) + +(define (bidi-state-terminated? st) (equal? st '(0 . 0))) + +(define (skip-line-comment port) + (let ((c (peek-char port))) + (cond ((eof-object? c) c) + ((eqv? c #\newline) c) + (else (read-char port) + (skip-line-comment port))))) + +(define (skip-multiline-comment port count bds) + (let ((c (read-char port))) + (if (eof-object? c) + (error "incomplete: unterminated multi-line comment #= ... =#") ; NOTE: changing this may affect code in base/client.jl + (if (eqv? c #\=) + (let ((c (peek-char port))) + (if (eqv? c #\#) + (begin + (read-char port) + (if (> count 1) + (skip-multiline-comment port (- count 1) bds) + (if (not (bidi-state-terminated? bds)) + (error "unbalanced bidirectional formatting in comment")))) + (skip-multiline-comment port count (update-bidi-state bds c)))) + (if (eqv? c #\#) + (skip-multiline-comment port + (if (eqv? (peek-char port) #\=) + (begin (read-char port) + (+ count 1)) + count) + bds) + (skip-multiline-comment port count (update-bidi-state bds c))))))) + ;; skip to end of comment, starting at #: either #... or #= .... =#. (define (skip-comment port) - (define (skip-multiline-comment port count) - (let ((c (read-char port))) - (if (eof-object? c) - (error "incomplete: unterminated multi-line comment #= ... =#") ; NOTE: changing this may affect code in base/client.jl - (begin (if (eqv? c #\=) - (let ((c (peek-char port))) - (if (eqv? c #\#) - (begin - (read-char port) - (if (> count 1) - (skip-multiline-comment port (- count 1)))) - (skip-multiline-comment port count))) - (if (eqv? c #\#) - (skip-multiline-comment port - (if (eqv? (peek-char port) #\=) - (begin (read-char port) - (+ count 1)) - count)) - (skip-multiline-comment port count))))))) - (read-char port) ; read # that was already peeked (if (eqv? (peek-char port) #\=) (begin (read-char port) ; read initial = - (skip-multiline-comment port 1)) - (skip-to-eol port))) + (skip-multiline-comment port 1 (make-bidi-state))) + (skip-line-comment port))) (define (skip-ws-and-comments port) (skip-ws port #t) @@ -2221,24 +2237,28 @@ (let loop ((c (read-char p)) (b (open-output-string)) (e ()) - (quotes 0)) + (quotes 0) + (bds (make-bidi-state))) (cond ((eqv? c delim) (if (< quotes n) - (loop (read-char p) b e (+ quotes 1)) - (reverse (cons (io.tostring! b) e)))) + (loop (read-char p) b e (+ quotes 1) bds) + (begin + (if (not (bidi-state-terminated? bds)) + (error "unbalanced bidirectional formatting in string literal")) + (reverse (cons (io.tostring! b) e))))) ((= quotes 1) (if (not raw) (write-char #\\ b)) (write-char delim b) - (loop c b e 0)) + (loop c b e 0 (update-bidi-state bds c))) ((= quotes 2) (if (not raw) (write-char #\\ b)) (write-char delim b) (if (not raw) (write-char #\\ b)) (write-char delim b) - (loop c b e 0)) + (loop c b e 0 (update-bidi-state bds c))) ((eqv? c #\\) (if raw @@ -2251,16 +2271,16 @@ (io.write b (string.rep "\\" (div count 2))) (if (odd? count) (begin (write-char delim b) - (loop (read-char p) b e 0)) - (loop nxch b e 0))) + (loop (read-char p) b e 0 bds)) + (loop nxch b e 0 bds))) (else (io.write b (string.rep "\\" count)) (write-char nxch b) - (loop (read-char p) b e 0)))) + (loop (read-char p) b e 0 (update-bidi-state bds nxch))))) (let ((nxch (not-eof-for delim (read-char p)))) (write-char #\\ b) (write-char nxch b) - (loop (read-char p) b e 0)))) + (loop (read-char p) b e 0 (update-bidi-state bds nxch))))) ((and (eqv? c #\$) (not raw)) (let* ((ex (parse-interpolate s)) @@ -2270,7 +2290,7 @@ (loop (read-char p) (open-output-string) (list* ex (io.tostring! b) e) - 0))) + 0 bds))) ; convert literal \r and \r\n in strings to \n (issue #11988) ((eqv? c #\return) ; \r @@ -2278,11 +2298,11 @@ (if (eqv? (peek-char p) #\linefeed) ; \r\n (read-char p)) (write-char #\newline b) - (loop (read-char p) b e 0))) + (loop (read-char p) b e 0 bds))) (else (write-char (not-eof-for delim c) b) - (loop (read-char p) b e 0))))) + (loop (read-char p) b e 0 (update-bidi-state bds c)))))) (define (not-eof-1 c) (if (eof-object? c) diff --git a/test/syntax.jl b/test/syntax.jl index bfc479b4bb2e6c..f188fe3494baa2 100644 --- a/test/syntax.jl +++ b/test/syntax.jl @@ -2719,3 +2719,21 @@ macro m42220() end @test @m42220()() isa Vector{Float64} @test @m42220()(Bool) isa Vector{Bool} + +@test_throws ParseError Meta.parse(""" +function checkUserAccess(u::User) + if u.accessLevel != "user\u202e \u2066# users are not allowed\u2069\u2066" + return true + end + return false +end +""") + +@test_throws ParseError Meta.parse(""" +function checkUserAccess(u::User) + #=\u202e \u2066if (u.isAdmin)\u2069 \u2066 begin admins only =# + return true + #= end admin only \u202e \u2066end\u2069 \u2066=# + return false +end +""")