From c53e4849dd50b794f4d2d6afc6834420685202ef Mon Sep 17 00:00:00 2001 From: Daniel Hilst Date: Sun, 25 Sep 2022 09:42:04 -0300 Subject: [PATCH] Add JSON5 code --- dune-project | 11 ++ lib/json5/ast.ml | 37 ++++++ lib/json5/basic.ml | 7 ++ lib/json5/dune | 6 +- lib/json5/let_syntax.ml | 3 + lib/json5/lexer.ml | 224 +++++++++++++++++++++++++++---------- lib/json5/parser.ml | 132 +++++++++++++--------- lib/json5/read.ml | 34 ++++++ lib/json5/safe.ml | 7 ++ lib/json5/types.ml | 18 --- lib/json5/yojson_json5.ml | 2 + lib/json5/yojson_json5.mli | 71 ++++++++++++ test/json5/dune | 5 - test/json5/json5_test.ml | 127 --------------------- test_json5/dune | 4 + test_json5/test.ml | 92 +++++++++++++++ yojson-json5.opam | 37 ++++++ yojson_json5.opam | 32 ------ 18 files changed, 546 insertions(+), 303 deletions(-) create mode 100644 lib/json5/ast.ml create mode 100644 lib/json5/basic.ml create mode 100644 lib/json5/let_syntax.ml create mode 100644 lib/json5/read.ml create mode 100644 lib/json5/safe.ml delete mode 100644 lib/json5/types.ml create mode 100644 lib/json5/yojson_json5.ml create mode 100644 lib/json5/yojson_json5.mli delete mode 100644 test/json5/dune delete mode 100644 test/json5/json5_test.ml create mode 100644 test_json5/dune create mode 100644 test_json5/test.ml create mode 100644 yojson-json5.opam delete mode 100644 yojson_json5.opam diff --git a/dune-project b/dune-project index 242aac15..f766e1eb 100644 --- a/dune-project +++ b/dune-project @@ -35,3 +35,14 @@ meant for developers that are worried about performance changes in Yojson.") (core (>= v0.14.0)) (core_unix (>= v0.14.0)) (sexplib (>= v0.9.0)))) + +(package + (name yojson-json5) + (synopsis "Yojson_json5 is a parsing and printing library for the JSON5 format") + (description "Yojson_json5 is a parsing and printing library for the JSON5 format. +It supports parsing JSON5 to Yojson.Basic.t and Yojson.Safe.t types.") + (depends + (ocaml (>= 4.08)) + (sedlex (>= 2.5)) + (alcotest (and :with-test (>= 0.8.5))))) + diff --git a/lib/json5/ast.ml b/lib/json5/ast.ml new file mode 100644 index 00000000..8864a8fc --- /dev/null +++ b/lib/json5/ast.ml @@ -0,0 +1,37 @@ +type internal = + | Assoc of (string * internal) list + | List of internal list + | StringLit of string + | IntLit of string + | FloatLit of string + | Bool of bool + | Null + +let strip_quotes s = String.(sub s 1 (length s - 2)) + +let safe_strip_quotes s = + if String.(get s 0 = '"' && get s (length s - 1) = '"') then strip_quotes s + else s + +let rec to_basic = function + | Assoc l -> + `Assoc + (List.map (fun (name, obj) -> (safe_strip_quotes name, to_basic obj)) l) + | List l -> `List (List.map to_basic l) + | StringLit s -> `String (strip_quotes s) + | FloatLit s -> `Float (float_of_string s) + | IntLit s -> `Int (int_of_string s) + | Bool b -> `Bool b + | Null -> `Null + +let rec to_safe = function + | Assoc l -> + `Assoc + (List.map (fun (name, obj) -> (safe_strip_quotes name, to_safe obj)) l) + | List l -> `List (List.map to_safe l) + | StringLit s -> `String (strip_quotes s) + | FloatLit s -> `Float (float_of_string s) + | IntLit s -> ( + match int_of_string_opt s with Some i -> `Int i | None -> `Intlit s) + | Bool b -> `Bool b + | Null -> `Null diff --git a/lib/json5/basic.ml b/lib/json5/basic.ml new file mode 100644 index 00000000..02998593 --- /dev/null +++ b/lib/json5/basic.ml @@ -0,0 +1,7 @@ +include Yojson.Basic + +include Read.Make (struct + type t = Yojson.Basic.t + + let convert = Ast.to_basic +end) diff --git a/lib/json5/dune b/lib/json5/dune index 63b082c9..f0e63ab8 100644 --- a/lib/json5/dune +++ b/lib/json5/dune @@ -1,8 +1,6 @@ (library (name yojson_json5) - (public_name yojson_json5) + (public_name yojson-json5) (libraries yojson sedlex) (preprocess - (pps ppx_deriving.show sedlex.ppx ppx_deriving.eq) - ) -) + (pps sedlex.ppx))) diff --git a/lib/json5/let_syntax.ml b/lib/json5/let_syntax.ml new file mode 100644 index 00000000..83931e28 --- /dev/null +++ b/lib/json5/let_syntax.ml @@ -0,0 +1,3 @@ +module Result = struct + let ( let* ) = Result.bind +end diff --git a/lib/json5/lexer.ml b/lib/json5/lexer.ml index 859fb1d4..ebadfd7b 100644 --- a/lib/json5/lexer.ml +++ b/lib/json5/lexer.ml @@ -1,100 +1,200 @@ -open Types +type token = + | OPEN_PAREN + | CLOSE_PAREN + | OPEN_BRACE + | CLOSE_BRACE + | OPEN_BRACKET + | CLOSE_BRACKET + | COLON + | COMMA + | COMMENT of string + | TRUE + | FALSE + | NULL + | FLOAT of string + | INT_OR_FLOAT of string + | INT of string + | STRING of string + | IDENTIFIER_NAME of string -(* From https://www.ecma-international.org/ecma-262/5.1/#sec-7 *) +let pp_token ppf = function + | OPEN_PAREN -> Format.fprintf ppf "'('" + | CLOSE_PAREN -> Format.fprintf ppf "')'" + | OPEN_BRACE -> Format.fprintf ppf "'{'" + | CLOSE_BRACE -> Format.fprintf ppf "'}'" + | OPEN_BRACKET -> Format.fprintf ppf "'['" + | CLOSE_BRACKET -> Format.fprintf ppf "']'" + | COLON -> Format.fprintf ppf "':'" + | COMMA -> Format.fprintf ppf "','" + | COMMENT s -> Format.fprintf ppf "COMMENT '%s'" s + | TRUE -> Format.fprintf ppf "'true'" + | FALSE -> Format.fprintf ppf "'false'" + | NULL -> Format.fprintf ppf "'null'" + | FLOAT s -> Format.fprintf ppf "FLOAT '%s'" s + | INT_OR_FLOAT s -> Format.fprintf ppf "INT_OR_FLOAT '%s'" s + | INT s -> Format.fprintf ppf "INT '%s'" s + | STRING s -> Format.fprintf ppf "STRING '%s'" s + | IDENTIFIER_NAME s -> Format.fprintf ppf "IDENTIFIER_NAME '%s'" s -(* -let digit = [%sedlex.regexp? '0'..'9'] -let number = [%sedlex.regexp? Plus digit] -*) let source_character = [%sedlex.regexp? any] let line_terminator = [%sedlex.regexp? 0x000A | 0x000D | 0x2028 | 0x2029] -let line_terminator_sequence = [%sedlex.regexp? 0x000A | 0x000D, Opt 0x000A | 0x2028 | 0x2029] + +let line_terminator_sequence = + [%sedlex.regexp? 0x000A | 0x000D, Opt 0x000A | 0x2028 | 0x2029] (* NUMBERS, 7.8.3 *) -let non_zero_digit = [%sedlex.regexp? '1'..'9'] -let decimal_digit = [%sedlex.regexp? '0'..'9'] +let non_zero_digit = [%sedlex.regexp? '1' .. '9'] +let decimal_digit = [%sedlex.regexp? '0' .. '9'] let decimal_digits = [%sedlex.regexp? Plus decimal_digit] -let hex_digit = [%sedlex.regexp? '0'..'9'|'a'..'f'|'A'..'F'] -let exponent_indicator = [%sedlex.regexp? 'e'|'E'] -let signed_integer = [%sedlex.regexp? decimal_digits | '+', decimal_digits | '-', decimal_digits] +let hex_digit = [%sedlex.regexp? '0' .. '9' | 'a' .. 'f' | 'A' .. 'F'] +let exponent_indicator = [%sedlex.regexp? 'e' | 'E'] + +let signed_integer = + [%sedlex.regexp? decimal_digits | '+', decimal_digits | '-', decimal_digits] + let exponent_part = [%sedlex.regexp? exponent_indicator, signed_integer] -let decimal_integer_literal = [%sedlex.regexp? '0' | non_zero_digit, Opt decimal_digits] -let hex_integer_literal = [%sedlex.regexp? "0x", Plus hex_digit | "0X", Plus hex_digit] + +let decimal_integer_literal = + [%sedlex.regexp? '0' | non_zero_digit, Opt decimal_digits] + +let hex_integer_literal = + [%sedlex.regexp? "0x", Plus hex_digit | "0X", Plus hex_digit] + (* float *) -let float_literal = [%sedlex.regexp? decimal_integer_literal, '.', Opt decimal_digits, Opt exponent_part | '.', decimal_digits, Opt exponent_part] -let json5_float = [%sedlex.regexp? float_literal | '+', float_literal | '-', float_literal] +let float_literal = + [%sedlex.regexp? + ( decimal_integer_literal, '.', Opt decimal_digits, Opt exponent_part + | '.', decimal_digits, Opt exponent_part )] + +let json5_float = + [%sedlex.regexp? float_literal | '+', float_literal | '-', float_literal] + (* int_or_float *) -let int_or_float_literal = [%sedlex.regexp? decimal_integer_literal, Opt exponent_part] -let json5_int_or_float = [%sedlex.regexp? int_or_float_literal | '+', int_or_float_literal | '-', int_or_float_literal] +let int_or_float_literal = + [%sedlex.regexp? decimal_integer_literal, Opt exponent_part] + +let json5_int_or_float = + [%sedlex.regexp? + int_or_float_literal | '+', int_or_float_literal | '-', int_or_float_literal] + (* int/hex *) -let json5_int = [%sedlex.regexp? hex_integer_literal | '+', hex_integer_literal | '-', hex_integer_literal] +let int_literal = + [%sedlex.regexp? decimal_digits | '+', decimal_digits | '-', decimal_digits] + +let json5_int = + [%sedlex.regexp? + ( hex_integer_literal + | '+', hex_integer_literal + | '-', hex_integer_literal + | int_literal )] (* STRING LITERALS, 7.8.4 *) -let unicode_escape_sequence = [%sedlex.regexp? 'u', hex_digit, hex_digit, hex_digit, hex_digit] +let unicode_escape_sequence = + [%sedlex.regexp? 'u', hex_digit, hex_digit, hex_digit, hex_digit] + let single_escape_character = [%sedlex.regexp? Chars {|'"\\bfnrtv|}] -let escape_character = [%sedlex.regexp? single_escape_character | decimal_digit | 'x' | 'u' ] -let non_escape_character = [%sedlex.regexp? Sub (source_character, ( escape_character | line_terminator ) ) ] -let character_escape_sequence = [%sedlex.regexp? single_escape_character | non_escape_character ] -let line_continuation = [%sedlex.regexp? '\\', line_terminator_sequence ] -let escape_sequence = [%sedlex.regexp? character_escape_sequence | '0' | unicode_escape_sequence ] (* TODO *) -let single_string_character = [%sedlex.regexp? Sub (source_character, ('\'' | '\\' | line_terminator)) | '\\', escape_sequence | line_continuation ] -let double_string_character = [%sedlex.regexp? Sub (source_character, ('"' | '\\' | line_terminator)) | '\\', escape_sequence | line_continuation ] -let string_literal = [%sedlex.regexp? '"', Star double_string_character, '"' | '\'', Star single_string_character, '\'' ] +let escape_character = + [%sedlex.regexp? single_escape_character | decimal_digit | 'x' | 'u'] + +let non_escape_character = + [%sedlex.regexp? Sub (source_character, (escape_character | line_terminator))] + +let character_escape_sequence = + [%sedlex.regexp? single_escape_character | non_escape_character] +let line_continuation = [%sedlex.regexp? '\\', line_terminator_sequence] + +let escape_sequence = + [%sedlex.regexp? character_escape_sequence | '0' | unicode_escape_sequence] + +let single_string_character = + [%sedlex.regexp? + ( Sub (source_character, ('\'' | '\\' | line_terminator)) + | '\\', escape_sequence + | line_continuation )] + +let double_string_character = + [%sedlex.regexp? + ( Sub (source_character, ('"' | '\\' | line_terminator)) + | '\\', escape_sequence + | line_continuation )] + +let string_literal = + [%sedlex.regexp? + ( '"', Star double_string_character, '"' + | '\'', Star single_string_character, '\'' )] (* IDENTIFIER_NAME (keys in objects) *) -let unicode_combining_mark =[%sedlex.regexp? mn | mc] +let unicode_combining_mark = [%sedlex.regexp? mn | mc] let unicode_digit = [%sedlex.regexp? nd] let unicode_connector_punctuation = [%sedlex.regexp? pc] let unicode_letter = [%sedlex.regexp? lu | ll | lt | lm | lo | nl] let zwnj = [%sedlex.regexp? 0x200C] let zwj = [%sedlex.regexp? 0x200D] -let identifier_start = [%sedlex.regexp? unicode_letter | '$' | '_' | '\\', unicode_escape_sequence] -let identifier_part = [%sedlex.regexp? identifier_start | unicode_combining_mark | unicode_digit | unicode_connector_punctuation | zwnj | zwj] + +let identifier_start = + [%sedlex.regexp? unicode_letter | '$' | '_' | '\\', unicode_escape_sequence] + +let identifier_part = + [%sedlex.regexp? + ( identifier_start | unicode_combining_mark | unicode_digit + | unicode_connector_punctuation | zwnj | zwj )] + let identifier_name = [%sedlex.regexp? identifier_start, Star identifier_part] (* COMMENTS 7.4 *) -let single_line_comment_char = [%sedlex.regexp? Sub (source_character, line_terminator)] +let single_line_comment_char = + [%sedlex.regexp? Sub (source_character, line_terminator)] + let single_line_comment = [%sedlex.regexp? "//", Star single_line_comment_char] let multi_line_not_asterisk_char = [%sedlex.regexp? Sub (source_character, '*')] let multi_line_not_slash_char = [%sedlex.regexp? Sub (source_character, '/')] -let multi_line_comment_char = [%sedlex.regexp? multi_line_not_asterisk_char | '*', Plus multi_line_not_slash_char] -let multi_line_comment = [%sedlex.regexp? "/*", Star multi_line_comment_char, Opt '*', "*/"] + +let multi_line_comment_char = + [%sedlex.regexp? + multi_line_not_asterisk_char | '*', Plus multi_line_not_slash_char] + +let multi_line_comment = + [%sedlex.regexp? "/*", Star multi_line_comment_char, Opt '*', "*/"] + let comment = [%sedlex.regexp? multi_line_comment | single_line_comment] -let white_space = [%sedlex.regexp? 0x0009 | 0x000B | 0x000C | 0x0020 | 0x00A0 | 0xFEFF | zs] +let white_space = + [%sedlex.regexp? 0x0009 | 0x000B | 0x000C | 0x0020 | 0x00A0 | 0xFEFF | zs] -let rec lex tokens buf = +let rec lex : token list -> Sedlexing.lexbuf -> (token list, string) result = + fun tokens buf -> let lexeme = Sedlexing.Utf8.lexeme in match%sedlex buf with - | '{' -> lex (OPEN_BRACE::tokens) buf - | '}' -> lex (CLOSE_BRACE::tokens) buf - | '[' -> lex (OPEN_BRACKET::tokens) buf - | ']' -> lex (CLOSE_BRACKET::tokens) buf - | ':' -> lex (COLON::tokens) buf - | ',' -> lex (COMMA::tokens) buf - | comment - | white_space - | line_terminator -> lex tokens buf - | "true" -> lex (TRUE::tokens) buf - | "false" -> lex (FALSE::tokens) buf - | "null" -> lex (NULL::tokens) buf - | string_literal -> let s = lexeme buf in - lex (STRING s::tokens) buf + | '(' -> lex (OPEN_PAREN :: tokens) buf + | ')' -> lex (CLOSE_PAREN :: tokens) buf + | '{' -> lex (OPEN_BRACE :: tokens) buf + | '}' -> lex (CLOSE_BRACE :: tokens) buf + | '[' -> lex (OPEN_BRACKET :: tokens) buf + | ']' -> lex (CLOSE_BRACKET :: tokens) buf + | ':' -> lex (COLON :: tokens) buf + | ',' -> lex (COMMA :: tokens) buf + | multi_line_comment | single_line_comment | white_space | line_terminator -> + lex tokens buf + | "true" -> lex (TRUE :: tokens) buf + | "false" -> lex (FALSE :: tokens) buf + | "null" -> lex (NULL :: tokens) buf | json5_float -> - let s = float_of_string @@ lexeme buf in - lex (FLOAT s::tokens) buf - | json5_int_or_float -> - let s = lexeme buf in - lex (INT_OR_FLOAT s::tokens) buf + let s = lexeme buf in + lex (FLOAT s :: tokens) buf | json5_int -> - let s = int_of_string @@ lexeme buf in - lex (INT s::tokens) buf + let s = lexeme buf in + lex (INT s :: tokens) buf + | json5_int_or_float -> + let s = lexeme buf in + lex (INT_OR_FLOAT s :: tokens) buf | identifier_name -> - let s = lexeme buf in - lex (IDENTIFIER_NAME s::tokens) buf - | eof -> List.rev tokens + let s = lexeme buf in + lex (IDENTIFIER_NAME s :: tokens) buf + | string_literal -> + let s = lexeme buf in + lex (STRING s :: tokens) buf + | eof -> Ok (List.rev tokens) | _ -> - let s = lexeme buf in - failwith @@ "Unexpected character: '" ^ s ^ "'" + lexeme buf |> Format.asprintf "Unexpected character: '%s'" |> Result.error diff --git a/lib/json5/parser.ml b/lib/json5/parser.ml index f3293bac..815c02e0 100644 --- a/lib/json5/parser.ml +++ b/lib/json5/parser.ml @@ -1,62 +1,84 @@ -open Types - -let escape_string x = x +open Let_syntax.Result let rec parse_list acc = function - | [] -> failwith "List never ends" - | CLOSE_BRACKET::xs - | COMMA::CLOSE_BRACKET::xs -> (acc, xs) - | xs -> ( - let (v, xs) = parse xs in - match xs with - | [] -> failwith "List was not closed" - | CLOSE_BRACKET::xs - | COMMA::CLOSE_BRACKET::xs -> (v::acc, xs) - | COMMA::xs -> parse_list (v::acc) xs - | x::_ -> - let s = Format.asprintf "Unexpected list token: %a" pp_token x in - failwith s) - + | [] -> Error "List never ends" + | Lexer.CLOSE_BRACKET :: xs | COMMA :: CLOSE_BRACKET :: xs -> Ok (acc, xs) + | xs -> ( + let* v, xs = parse xs in + match xs with + | [] -> Error "List was not closed" + | Lexer.CLOSE_BRACKET :: xs | COMMA :: CLOSE_BRACKET :: xs -> + Ok (v :: acc, xs) + | COMMA :: xs -> parse_list (v :: acc) xs + | x :: _ -> + let s = + Format.asprintf "Unexpected list token: %a" Lexer.pp_token x + in + Error s) + and parse_assoc acc = function - | [] -> failwith "Assoc never ends" - | CLOSE_BRACE::xs - | COMMA::CLOSE_BRACE::xs -> (acc, xs) - | (STRING k)::COLON::xs - | (IDENTIFIER_NAME k)::COLON::xs -> ( - let (v, xs) = parse xs in - let item = (k, v) in - match xs with - | [] -> failwith "Object was not closed" - | CLOSE_BRACE::xs - | COMMA::CLOSE_BRACE::xs -> (item::acc, xs) - | COMMA::xs -> parse_assoc (item::acc) xs - | x::_ -> - let s = Format.asprintf "Unexpected assoc list token: %a" pp_token x in - failwith s) - | x::_ -> - let s = Format.asprintf "Unexpected assoc list token: %a" pp_token x in - failwith s + | [] -> Error "Assoc never ends" + | Lexer.CLOSE_BRACE :: xs | COMMA :: CLOSE_BRACE :: xs -> Ok (acc, xs) + | STRING k :: COLON :: xs | IDENTIFIER_NAME k :: COLON :: xs -> ( + let* v, xs = parse xs in + let item = (k, v) in + match xs with + | [] -> Error "Object was not closed" + | Lexer.CLOSE_BRACE :: xs | COMMA :: CLOSE_BRACE :: xs -> + Ok (item :: acc, xs) + | COMMA :: xs -> parse_assoc (item :: acc) xs + | x :: _ -> + let s = + Format.asprintf "Unexpected assoc list token: %a" Lexer.pp_token x + in + Error s) + | x :: _ -> + let s = + Format.asprintf "Unexpected assoc list token: %a" Lexer.pp_token x + in + Error s + +and parse = function + | [] -> Error "empty list of tokens" + | token :: xs -> ( + match token with + | TRUE -> Ok (Ast.Bool true, xs) + | FALSE -> Ok (Bool false, xs) + | NULL -> Ok (Null, xs) + | INT v -> Ok (IntLit v, xs) + | FLOAT v -> Ok (FloatLit v, xs) + | INT_OR_FLOAT v -> Ok (FloatLit v, xs) + | STRING s -> Ok (StringLit s, xs) + | OPEN_BRACKET -> + let* l, xs = parse_list [] xs in + Ok (Ast.List (List.rev l), xs) + | OPEN_BRACE -> + let* a, xs = parse_assoc [] xs in + Ok (Ast.Assoc (List.rev a), xs) + | x -> + let s = Format.asprintf "Unexpected token: %a" Lexer.pp_token x in + Error s) -and parse : token list -> (t * token list) = function - | [] -> failwith "empty list of tokens" - | token::xs -> - match token with - | TRUE -> (`Bool true, xs) - | FALSE -> (`Bool false, xs) - | NULL -> (`Null, xs) - | INT v -> (`Int v, xs) - | FLOAT v -> (`Float v, xs) - | INT_OR_FLOAT v -> (`String v, xs) - | STRING s -> (`String (escape_string s), xs) - | OPEN_BRACKET -> - let (l, xs) = parse_list [] xs in - (`List (List.rev l), xs) - | OPEN_BRACE -> - let (a, xs) = parse_assoc [] xs in - (`Assoc (List.rev a), xs) - | x -> - let s = Format.asprintf "Unexpected token: %a" pp_token x in - failwith s +let parse_from_lexbuf ?fname ?lnum lexbuffer = + let fname = Option.value fname ~default:"" in + Sedlexing.set_filename lexbuffer fname; + let lnum = Option.value lnum ~default:1 in + let pos = + { Lexing.pos_fname = fname; pos_lnum = lnum; pos_bol = 0; pos_cnum = 0 } + in + Sedlexing.set_position lexbuffer pos; + let* tokens = Lexer.lex [] lexbuffer in + let* ast = parse tokens in + Ok (fst ast) +let parse_from_string ?fname ?lnum input = + parse_from_lexbuf (Sedlexing.Utf8.from_string input) ?fname ?lnum +let parse_from_channel ?fname ?lnum ic = + parse_from_lexbuf (Sedlexing.Utf8.from_channel ic) ?fname ?lnum +let parse_from_file ?fname ?lnum filename = + let ic = open_in filename in + let out = parse_from_channel ?fname ?lnum ic in + close_in ic; + out diff --git a/lib/json5/read.ml b/lib/json5/read.ml new file mode 100644 index 00000000..4f4cacbf --- /dev/null +++ b/lib/json5/read.ml @@ -0,0 +1,34 @@ +open Let_syntax.Result + +module type S = sig + type t + + val convert : Ast.internal -> t +end + +module type Out = sig + type t + + val from_string : ?fname:string -> ?lnum:int -> string -> (t, string) result + + val from_channel : + ?fname:string -> ?lnum:int -> in_channel -> (t, string) result + + val from_file : ?fname:string -> ?lnum:int -> string -> (t, string) result +end + +module Make (F : S) : Out with type t = F.t = struct + type t = F.t + + let from_string ?fname ?lnum input = + let* ast = Parser.parse_from_string ?fname ?lnum input in + Ok (F.convert ast) + + let from_channel ?fname ?lnum ic = + let* ast = Parser.parse_from_channel ?fname ?lnum ic in + Ok (F.convert ast) + + let from_file ?fname ?lnum file = + let* ast = Parser.parse_from_file ?fname ?lnum file in + Ok (F.convert ast) +end diff --git a/lib/json5/safe.ml b/lib/json5/safe.ml new file mode 100644 index 00000000..1eace77c --- /dev/null +++ b/lib/json5/safe.ml @@ -0,0 +1,7 @@ +include Yojson.Safe + +include Read.Make (struct + type t = Yojson.Safe.t + + let convert = Ast.to_safe +end) diff --git a/lib/json5/types.ml b/lib/json5/types.ml deleted file mode 100644 index ac98404d..00000000 --- a/lib/json5/types.ml +++ /dev/null @@ -1,18 +0,0 @@ -type token = - | OPEN_BRACE - | CLOSE_BRACE - | OPEN_BRACKET - | CLOSE_BRACKET - | COLON - | COMMA - | TRUE - | FALSE - | NULL - | FLOAT of float - | INT_OR_FLOAT of string - | INT of int - | STRING of string - | IDENTIFIER_NAME of string - [@@deriving show, eq] - -type t = Yojson.Safe.t diff --git a/lib/json5/yojson_json5.ml b/lib/json5/yojson_json5.ml new file mode 100644 index 00000000..58606838 --- /dev/null +++ b/lib/json5/yojson_json5.ml @@ -0,0 +1,2 @@ +module Safe = Safe +module Basic = Basic diff --git a/lib/json5/yojson_json5.mli b/lib/json5/yojson_json5.mli new file mode 100644 index 00000000..8e069cde --- /dev/null +++ b/lib/json5/yojson_json5.mli @@ -0,0 +1,71 @@ +module Safe : sig + type t = Yojson.Safe.t + + val from_string : ?fname:string -> ?lnum:int -> string -> (t, string) result + + val from_channel : + ?fname:string -> ?lnum:int -> in_channel -> (t, string) result + + val from_file : ?fname:string -> ?lnum:int -> string -> (t, string) result + + val to_string : + ?buf:Buffer.t -> ?len:int -> ?suf:string -> ?std:bool -> t -> string + + val to_channel : + ?buf:Stdlib.Buffer.t -> + ?len:int -> + ?suf:string -> + ?std:bool -> + Stdlib.out_channel -> + t -> + unit + + val to_output : + ?buf:Stdlib.Buffer.t -> + ?len:int -> + ?suf:string -> + ?std:bool -> + < output : string -> int -> int -> int > -> + t -> + unit + + val to_file : ?len:int -> ?std:bool -> ?suf:string -> string -> t -> unit + val pp : Format.formatter -> t -> unit + val equal : t -> t -> bool +end + +module Basic : sig + type t = Yojson.Basic.t + + val from_string : ?fname:string -> ?lnum:int -> string -> (t, string) result + + val from_channel : + ?fname:string -> ?lnum:int -> in_channel -> (t, string) result + + val from_file : ?fname:string -> ?lnum:int -> string -> (t, string) result + + val to_string : + ?buf:Buffer.t -> ?len:int -> ?suf:string -> ?std:bool -> t -> string + + val to_channel : + ?buf:Stdlib.Buffer.t -> + ?len:int -> + ?suf:string -> + ?std:bool -> + Stdlib.out_channel -> + t -> + unit + + val to_output : + ?buf:Stdlib.Buffer.t -> + ?len:int -> + ?suf:string -> + ?std:bool -> + < output : string -> int -> int -> int > -> + t -> + unit + + val to_file : ?len:int -> ?std:bool -> ?suf:string -> string -> t -> unit + val pp : Format.formatter -> t -> unit + val equal : t -> t -> bool +end diff --git a/test/json5/dune b/test/json5/dune deleted file mode 100644 index 85583f9a..00000000 --- a/test/json5/dune +++ /dev/null @@ -1,5 +0,0 @@ -(executable - (name json5_test) - (libraries yojson_json5 alcotest) - (preprocess - (pps ppx_deriving.show ppx_deriving.eq))) diff --git a/test/json5/json5_test.ml b/test/json5/json5_test.ml deleted file mode 100644 index be6b6614..00000000 --- a/test/json5/json5_test.ml +++ /dev/null @@ -1,127 +0,0 @@ -module Lexer = Yojson_json5.Lexer -open Yojson_json5.Types - -let tokenize_json5 (json_string) = - let buf = Sedlexing.Utf8.from_string json_string in - Lexer.lex [] buf - -let token = Alcotest.testable pp_token equal_token - -let test_float () = - Alcotest.(check (list token)) "Simple" [FLOAT 23.52] (tokenize_json5 "23.52"); - Alcotest.(check (list token)) "No leading number" [FLOAT 0.52] (tokenize_json5 ".52"); - Alcotest.(check (list token)) "With exponent" [FLOAT 210.; FLOAT 210.] (tokenize_json5 "2.1e2 2.1E2") - -let test_int_or_float () = - Alcotest.(check (list token)) "Int or float" [INT_OR_FLOAT "42"] (tokenize_json5 "42") - -let test_int () = - Alcotest.(check (list token)) "Hex/Int" [INT 16] (tokenize_json5 "0x10") - -let test_string () = - Alcotest.(check (list token)) "Doublequoted simple" [STRING "\"hello\""] (tokenize_json5 "\"hello\""); - Alcotest.(check (list token)) "Doublequoted single-character escape sequence" [STRING {|"\'\"\\\b\f\n\r\t\v"|}] (tokenize_json5 {|"\'\"\\\b\f\n\r\t\v"|}); - Alcotest.(check (list token)) "Doublequoted non-escape-character escape sequence" [STRING {|"foo\z"|}] (tokenize_json5 {|"foo\z"|}); - Alcotest.(check (list token)) "Doublequoted zero escape sequence" [STRING {|"\0"|}] (tokenize_json5 {|"\0"|}); - (* Alcotest.check_raises "Doublequoted zero then one escape sequence" (Failure "Unexpected character: ''") (fun () -> ignore @@ tokenize_json5 {|"\01"|}); *) - Alcotest.(check (list token)) "Doublequoted unicode escape" [STRING "\"\\uD83D\\uDC2A\""] (tokenize_json5 "\"\\uD83D\\uDC2A\""); - Alcotest.(check (list token)) "Doublequoted line continuation" [STRING "\"hel\\\nlo\""] (tokenize_json5 "\"hel\\\nlo\""); - Alcotest.(check (list token)) "Singlequoted simple" [STRING "'hello'"] (tokenize_json5 "'hello'"); - Alcotest.(check (list token)) "Singlequoted single-character escape sequence" [STRING {|'\'\"\\\b\f\n\r\t\v'|}] (tokenize_json5 {|'\'\"\\\b\f\n\r\t\v'|}); - Alcotest.(check (list token)) "Singlequoted non-escape-character escape sequence" [STRING {|'\z'|}] (tokenize_json5 {|'\z'|}); - Alcotest.(check (list token)) "Singlequoted zero escape sequence" [STRING {|'\0'|}] (tokenize_json5 {|'\0'|}); - Alcotest.(check (list token)) "Singlequoted unicode escape" [STRING "'\\uD83D\\uDC2A'"] (tokenize_json5 "'\\uD83D\\uDC2A'"); - Alcotest.(check (list token)) "Singlequoted line continuation" [STRING "'hel\\\nlo'"] (tokenize_json5 "'hel\\\nlo'"); - (* Alcotest.(check (list token)) "Singlequoted one escape sequence" [STRING {|'\1'|}] (tokenize_json5 {|'\1'|}); *) - () - -let test_identifier () = - Alcotest.(check (list token)) - "Identifer name in an object" - [OPEN_BRACE; IDENTIFIER_NAME "hj"; COLON; INT_OR_FLOAT "42"; CLOSE_BRACE] - (tokenize_json5 "{hj: 42}") - -let test_multi_line_comments () = - Alcotest.(check (list token)) "Simple case" [] (tokenize_json5 "/* hello\nworld */"); - Alcotest.(check (list token)) "Between numbers" [INT_OR_FLOAT "1"; INT_OR_FLOAT "1"] (tokenize_json5 "1/* hello\nworld */1"); - Alcotest.(check (list token)) "Empty" [INT_OR_FLOAT "1"; INT_OR_FLOAT "1"] (tokenize_json5 "1/**/1"); - Alcotest.(check (list token)) "Contains slash" [INT_OR_FLOAT "1"; INT_OR_FLOAT "1"] (tokenize_json5 "1/*/*/1"); - Alcotest.(check (list token)) "Contains asterisk" [INT_OR_FLOAT "1"; INT_OR_FLOAT "1"] (tokenize_json5 "1/***/1"); - Alcotest.(check (list token)) "Contains double asterisk" [INT_OR_FLOAT "1"; INT_OR_FLOAT "1"] (tokenize_json5 "1/****/1"); - Alcotest.check_raises "Contains comment end" (Failure "Unexpected character: ''") (fun () -> ignore @@ tokenize_json5 "/* */ */") - -let test_single_line_comments () = - Alcotest.(check (list token)) "Simple case" [] (tokenize_json5 "//foo\n"); - Alcotest.(check (list token)) "Between numbers" [INT_OR_FLOAT "1"; INT_OR_FLOAT "1"] (tokenize_json5 "1//foo\n1") - - -(** - * PARSING - *) - -let yojson = Alcotest.testable Yojson.Safe.pp Yojson.Safe.equal - -let parse tokens = - let (json, _) = Yojson_json5.Parser.parse tokens in - json - -let test_parser_simple () = - Alcotest.(check yojson) "Simple null" `Null (parse [NULL]); - Alcotest.(check yojson) "Simple true" (`Bool true) (parse [TRUE]); - Alcotest.(check yojson) "Simple false" (`Bool false) (parse [FALSE]); - Alcotest.(check yojson) "Simple int" (`Int 3) (parse [INT 3]); - Alcotest.(check yojson) "Simple float" (`Float 3.4) (parse [FLOAT 3.4]); - () - -let test_parser_string () = - Alcotest.(check yojson) "Simple string" (`String "hello") (parse [STRING "hello"]); - Alcotest.(check yojson) "Escape sequences" (`String "a\'\"\\\b\n\r\ta") (parse [STRING "a\'\"\\\b\n\r\ta"]); - () - -let test_parser_list () = - Alcotest.(check yojson) "Empty list" (`List []) (parse [OPEN_BRACKET; CLOSE_BRACKET]); - Alcotest.(check yojson) "List with bools" (`List [`Bool false; `Bool true]) (parse [OPEN_BRACKET; FALSE; COMMA; TRUE; CLOSE_BRACKET]); - Alcotest.(check yojson) "List of lists" (`List [ `List []; `Null ]) (parse [OPEN_BRACKET; OPEN_BRACKET; CLOSE_BRACKET; COMMA; NULL; CLOSE_BRACKET]); - Alcotest.(check yojson) "List with trailing comma" (`List [ `Null ]) (parse [OPEN_BRACKET; NULL; COMMA; CLOSE_BRACKET]); - Alcotest.(check yojson) "List of list with content" (`List [ `List [ `Bool true ] ]) (parse [OPEN_BRACKET; OPEN_BRACKET; TRUE; CLOSE_BRACKET; CLOSE_BRACKET]); - () - -let test_parser_object () = - Alcotest.(check yojson) "Empty object" (`Assoc []) (parse [OPEN_BRACE; CLOSE_BRACE]); - Alcotest.(check yojson) "String key" (`Assoc [ ("foo", `Null) ]) (parse [OPEN_BRACE; STRING "foo"; COLON; NULL; CLOSE_BRACE]); - Alcotest.(check yojson) "Identifer key" (`Assoc [ ("foo", `Null) ]) (parse [OPEN_BRACE; IDENTIFIER_NAME "foo"; COLON; NULL; CLOSE_BRACE]); - Alcotest.(check yojson) "Trailing comma" (`Assoc [ ("foo", `Null) ]) (parse [OPEN_BRACE; STRING "foo"; COLON; NULL; COMMA; CLOSE_BRACE]); - Alcotest.(check yojson) "Nested object" (`Assoc [ ("foo", `Assoc [ ("bar", `Null) ]) ]) (parse [OPEN_BRACE; STRING "foo"; COLON; OPEN_BRACE; STRING "bar"; COLON; NULL; CLOSE_BRACE; COMMA; CLOSE_BRACE]); - Alcotest.(check yojson) "Mixed keys and values" (`Assoc [ ("foo", `Bool true); ("bar", `Null); ("baz", `Bool false) ]) (parse [OPEN_BRACE; STRING "foo"; COLON; TRUE; COMMA; IDENTIFIER_NAME "bar"; COLON; NULL; COMMA; IDENTIFIER_NAME "baz"; COLON; FALSE; CLOSE_BRACE]); - () - - -(** - * RUN - *) - -let () = - let open Alcotest in - run "JSON5" [ - "Numbers", [ - test_case "Float" `Quick test_float; - test_case "Int or float" `Quick test_int_or_float; - test_case "Int" `Quick test_int; - ]; - "Strings", [ - test_case "String" `Quick test_string; - ]; - "Objects", [ - test_case "Identifiers" `Quick test_identifier; - ]; - "Comments", [ - test_case "Multi-line comments" `Quick test_multi_line_comments; - test_case "Single-line comments" `Quick test_single_line_comments; - ]; - "Parse", [ - test_case "Simple parsing" `Quick test_parser_simple; - test_case "Strings" `Quick test_parser_string; - test_case "Simple list parsing" `Quick test_parser_list; - test_case "Simple object parsing" `Quick test_parser_object; - ]; - ] diff --git a/test_json5/dune b/test_json5/dune new file mode 100644 index 00000000..46b63c12 --- /dev/null +++ b/test_json5/dune @@ -0,0 +1,4 @@ +(test + (name test) + (package yojson-json5) + (libraries alcotest yojson_json5)) diff --git a/test_json5/test.ml b/test_json5/test.ml new file mode 100644 index 00000000..3488a54c --- /dev/null +++ b/test_json5/test.ml @@ -0,0 +1,92 @@ +module M = struct + include Yojson_json5.Safe + + let from_string s = + match from_string s with + | Ok t -> t + | Error e -> raise (Yojson.Json_error e) +end + +let yojson_json5 = Alcotest.testable M.pp M.equal + +let test_from_string () = + Alcotest.(check yojson_json5) "Empty object" (`Assoc []) (M.from_string "{}"); + Alcotest.(check yojson_json5) "Empty list" (`List []) (M.from_string "[]"); + Alcotest.(check yojson_json5) + "List" + (`List [ `Int 1; `String "2"; `Float 3. ]) + (M.from_string "[1, \"2\", 3.0]"); + Alcotest.(check yojson_json5) "true" (`Bool true) (M.from_string "true"); + Alcotest.(check yojson_json5) "false" (`Bool false) (M.from_string "false"); + Alcotest.(check yojson_json5) "null" `Null (M.from_string "null"); + Alcotest.(check yojson_json5) + "double quotes string" (`String "hello world") + (M.from_string {|"hello world"|}); + Alcotest.(check yojson_json5) + "single quotes string" (`String "hello world") + (M.from_string {|'hello world'|}); + Alcotest.(check yojson_json5) + "float" (`Float 12345.67890) + (M.from_string "12345.67890"); + Alcotest.(check yojson_json5) "hex" (`Int 0x1) (M.from_string "0x1"); + Alcotest.(check yojson_json5) "int" (`Int 1) (M.from_string "1"); + Alcotest.(check yojson_json5) + "line break" (`String "foo\\\nbar") + (M.from_string "\"foo\\\nbar\""); + Alcotest.(check yojson_json5) + "string and comment" (`String "bar") + (M.from_string "\"bar\" //foo"); + let expected = + `Assoc + [ + ("unquoted", `String "and you can quote me on that"); + ("singleQuotes", `String "I can use \"double quotes\" here"); + ("lineBreaks", `String {|Look, Mom! \ +No \\n's!|}); + ("hexadecimal", `Int 0xdecaf); + ("leadingDecimalPoint", `Float 0.8675309); + ("andTrailing", `Float 8675309.0); + ("positiveSign", `Int 1); + ("trailingComma", `String "in objects"); + ("andIn", `List [ `String "arrays" ]); + ("backwardsCompatible", `String "with JSON"); + ] + in + Alcotest.(check yojson_json5) + "More elaborated" expected + (M.from_string + {|{ + // comments + unquoted: 'and you can quote me on that', + singleQuotes: 'I can use "double quotes" here', + lineBreaks: "Look, Mom! \ +No \\n's!", + hexadecimal: 0xdecaf, + leadingDecimalPoint: .8675309, andTrailing: 8675309., + positiveSign: +1, + trailingComma: 'in objects', andIn: ['arrays',], + "backwardsCompatible": "with JSON", +}|}) + +let test_to_string () = + Alcotest.(check string) "Empty object" "{}" (M.to_string (`Assoc [])); + Alcotest.(check string) "Empty list" "[]" (M.to_string (`List [])); + Alcotest.(check string) "true" "true" (M.to_string (`Bool true)); + Alcotest.(check string) "false" "false" (M.to_string (`Bool false)); + Alcotest.(check string) "null" "null" (M.to_string `Null); + Alcotest.(check string) + "string" "\"hello world\"" + (M.to_string (`String "hello world")); + Alcotest.(check string) "float" "12345.6789" (M.to_string (`Float 12345.6789)); + Alcotest.(check string) "hex" "1" (M.to_string (`Int 0x1)); + Alcotest.(check string) "int" "1" (M.to_string (`Int 1)) + +(* Run it *) +let () = + let open Alcotest in + run "JSON5" + [ + ( "from_string", + [ test_case "reading from string" `Quick test_from_string ] ); + ("to_string", [ test_case "write to string" `Quick test_to_string ]); + ] diff --git a/yojson-json5.opam b/yojson-json5.opam new file mode 100644 index 00000000..c458ef00 --- /dev/null +++ b/yojson-json5.opam @@ -0,0 +1,37 @@ +# This file is generated by dune, edit dune-project instead +opam-version: "2.0" +synopsis: + "Yojson_json5 is a parsing and printing library for the JSON5 format" +description: """ +Yojson_json5 is a parsing and printing library for the JSON5 format. +It supports parsing JSON5 to Yojson.Basic.t and Yojson.Safe.t types.""" +maintainer: [ + "paul-elliot@tarides.com" "nathan@tarides.com" "marek@tarides.com" +] +authors: ["Martin Jambon"] +license: "BSD-3-Clause" +homepage: "https://github.com/ocaml-community/yojson" +doc: "https://ocaml-community.github.io/yojson" +bug-reports: "https://github.com/ocaml-community/yojson/issues" +depends: [ + "dune" {>= "2.7"} + "ocaml" {>= "4.08"} + "sedlex" {>= "2.5"} + "alcotest" {with-test & >= "0.8.5"} + "odoc" {with-doc} +] +build: [ + ["dune" "subst"] {dev} + [ + "dune" + "build" + "-p" + name + "-j" + jobs + "@install" + "@runtest" {with-test} + "@doc" {with-doc} + ] +] +dev-repo: "git+https://github.com/ocaml-community/yojson.git" diff --git a/yojson_json5.opam b/yojson_json5.opam deleted file mode 100644 index 712d4c4d..00000000 --- a/yojson_json5.opam +++ /dev/null @@ -1,32 +0,0 @@ -opam-version: "2.0" -maintainer: ["nathan@cryptosense.com" "marek@xivilization.net"] -authors: ["Martin Jambon"] -homepage: "https://github.com/ocaml-community/yojson" -bug-reports: "https://github.com/ocaml-community/yojson/issues" -dev-repo: "git+https://github.com/ocaml-community/yojson.git" -doc: "https://ocaml-community.github.io/yojson/" -build: [ - ["dune" "subst"] {pinned} - ["dune" "build" "-p" name "-j" jobs] -] -run-test: [["dune" "runtest" "-p" name "-j" jobs]] -depends: [ - "ocaml" {>= "4.02.3"} - "dune" - "sedlex" - "alcotest" {with-test & >= "0.8.5"} - "ppx_deriving" -] -synopsis: - "Yojson is an optimized parsing and printing library for the JSON format" -description: """ -Yojson is an optimized parsing and printing library for the JSON format. - -It addresses a few shortcomings of json-wheel including 2x speedup, -polymorphic variants and optional syntax for tuples and variants. - -ydump is a pretty-printing command-line program provided with the -yojson package. - -The program atdgen can be used to derive OCaml-JSON serializers and -deserializers from type definitions."""