Skip to content

Commit

Permalink
Add the changes file and revert the modification of the lexer
Browse files Browse the repository at this point in the history
Signed-off-by: Alpha DIALLO <[email protected]>
  • Loading branch information
moyodiallo committed Mar 19, 2024
1 parent 672eeb9 commit 8cb27fc
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 36 deletions.
2 changes: 2 additions & 0 deletions doc/changes/10113.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
- fix: handle utf8 characters in the dune files. (#10113, fixes #9728,
@moyodiallo)
14 changes: 7 additions & 7 deletions src/dune_sexp/escape.ml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ module Utf8 = struct
| 1 -> true
| 2 ->
let b1 = unsafe_get s (i + 1) in
if b1 lsr 6 != 0b10 then false else true
not (b1 lsr 6 != 0b10)
| 3 ->
let b0 = unsafe_get s i in
let b1 = unsafe_get s (i + 1) in
Expand All @@ -37,9 +37,9 @@ module Utf8 = struct
then false
else (
match b0 with
| 0xE0 -> if b1 < 0xA0 || 0xBF < b1 then false else true
| 0xED -> if b1 < 0x80 || 0x9F < b1 then false else true
| _ -> if b1 lsr 6 != 0b10 then false else true)
| 0xE0 -> not (b1 < 0xA0 || 0xBF < b1)
| 0xED -> not (b1 < 0x80 || 0x9F < b1)
| _ -> not (b1 lsr 6 != 0b10))
| 4 ->
let b0 = unsafe_get s i in
let b1 = unsafe_get s (i + 1) in
Expand All @@ -49,9 +49,9 @@ module Utf8 = struct
then false
else (
match b0 with
| 0xF0 -> if b1 < 0x90 || 0xBF < b1 then false else true
| 0xF4 -> if b1 < 0x80 || 0x8F < b1 then false else true
| _ -> if b1 lsr 6 != 0b10 then false else true)
| 0xF0 -> not (b1 < 0x90 || 0xBF < b1)
| 0xF4 -> not (b1 < 0x80 || 0x8F < b1)
| _ -> not (b1 lsr 6 != 0b10))
| _ -> false
;;
end
Expand Down
36 changes: 8 additions & 28 deletions src/dune_sexp/lexer.mll
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,11 @@ end

type t = with_comments:bool -> Lexing.lexbuf -> Token.t

let error ?(delta = 0) ?(delta_stop = 0) lexbuf message =
let error ?(delta = 0) lexbuf message =
let start = Lexing.lexeme_start_p lexbuf in
let stop = Lexing.lexeme_end_p lexbuf in
let loc =
Loc.create ~start:{ start with pos_cnum = start.pos_cnum + delta }
~stop:{ stop with pos_cnum = stop.pos_cnum + delta_stop }
~stop:(Lexing.lexeme_end_p lexbuf)
in
User_error.raise ~loc [ Pp.text message ]

Expand Down Expand Up @@ -145,8 +144,6 @@ let hexdigit = ['0'-'9' 'a'-'f' 'A'-'F']
let atom_char = [^ ';' '(' ')' '"' '\000'-'\032' '\127'-'\255']
let varname_char = atom_char # [ ':' '%' '{' '}' ]

let non_ascii = ['\128'-'\255']

rule token with_comments = parse
| newline
{ Lexing.new_line lexbuf; token with_comments lexbuf }
Expand Down Expand Up @@ -355,30 +352,13 @@ and template_variable = parse
}
| '}' | eof
{ error lexbuf "%{...} forms cannot be empty" }
| (varname_char* as skip) (non_ascii* as maybe_utf) (_ as other)
| (varname_char+ ':' ((':' | varname_char)*) as skip) (non_ascii* as maybe_utf) (_ as other)
| (varname_char* as skip) (_ as other)
| (varname_char+ ':' ((':' | varname_char)*) as skip) (_ as other)
{
let utf_len = String.length maybe_utf in
let uchar =
if utf_len > 1 then
let uchar_len = Escape.Utf8.next_utf8_length maybe_utf 0 in
if uchar_len <= utf_len && Escape.Utf8.is_utf8_valid maybe_utf 0 uchar_len then
Some (String.sub maybe_utf ~pos:0 ~len:uchar_len, uchar_len)
else None
else None
in
match uchar with
| Some (uchar, len) ->
error
~delta:(String.length skip)
~delta_stop:(-len)
lexbuf
(Printf.sprintf "The character %s is not allowed inside %%{...} forms" uchar)
| _ ->
error
~delta:(String.length skip)
lexbuf
(Printf.sprintf "The character %C is not allowed inside %%{...} forms" other)
error
~delta:(String.length skip)
lexbuf
(Printf.sprintf "The character %C is not allowed inside %%{...} forms" other)
}
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ Utf8 characters are handled for now, this is also related to the issue #9728
> (run foo %{bin:é})
> EOF
File "", line 1, characters 15-16:
Error: The character é is not allowed inside %{...} forms
Error: The character '\195' is not allowed inside %{...} forms
[1]

$ dune format-dune-file <<EOF
Expand Down

0 comments on commit 8cb27fc

Please sign in to comment.