From ce3b8cbf8d000df7cc62e914cb0476294ca0e4a6 Mon Sep 17 00:00:00 2001 From: Roman Leshchinskiy Date: Mon, 22 May 2023 11:18:52 +0100 Subject: [PATCH] fix: Improve JSON string quoting in Dune tracing We want to include more things in the trace file and we should always produce valid JSON here. Signed-off-by: Roman Leshchinskiy --- CHANGES.md | 3 +++ src/dune_stats/dune_stats.ml | 41 ++++++++++++++++++++++++++++++++++-- 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 3745e7ca72a..05d633b00ae 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,6 +1,9 @@ Unreleased ---------- +- Fix string quoting in the json file written by `--trace-file` (#7773, + @rleshchinskiy) + - Read `pkg-config` arguments from the `PKG_CONFIG_ARGN` environment variable (#1492, #7734, @anmonteiro) diff --git a/src/dune_stats/dune_stats.ml b/src/dune_stats/dune_stats.ml index 2b128c9acd9..3add0858af8 100644 --- a/src/dune_stats/dune_stats.ml +++ b/src/dune_stats/dune_stats.ml @@ -11,9 +11,46 @@ end module Json = struct include Chrome_trace.Json + let copy_substring s buf start pos = + if pos > start then Buffer.add_substring buf s start (pos - start) + + let rec quote_characters_to_buf s buf n start pos = + (* check if a character is a valid utf-8 continuation byte *) + let is_cb i = i < n && Char.code s.[i] land 0xc0 = 0x80 [@@inline] in + if pos < n then + match s.[pos] with + | '\b' -> escape s buf n start pos "\\b" + | '\t' -> escape s buf n start pos "\\t" + | '\n' -> escape s buf n start pos "\\n" + | '\012' -> escape s buf n start pos "\\f" + | '\r' -> escape s buf n start pos "\\r" + | '\\' -> escape s buf n start pos "\\\\" + | '"' -> escape s buf n start pos "\\\"" + | '\000' .. '\031' as c -> + escape s buf n start pos (sprintf "\\u%04x" (Char.code c)) + | '\032' .. '\127' -> quote_characters_to_buf s buf n start (pos + 1) + (* Check for valid UTF-8 *) + | '\xc0' .. '\xdf' when is_cb (pos + 1) -> + quote_characters_to_buf s buf n start (pos + 2) + | '\xe0' .. '\xef' when is_cb (pos + 1) && is_cb (pos + 2) -> + quote_characters_to_buf s buf n start (pos + 3) + | '\xf0' .. '\xf7' + when is_cb (pos + 1) && is_cb (pos + 2) && is_cb (pos + 3) -> + quote_characters_to_buf s buf n start (pos + 4) + (* Replace unrepresentable bytes by the Unicode replacement character (0xFFFD), + encoded in UTF-8 *) + | _ -> escape s buf n start pos "\xef\xbf\xbd" + else copy_substring s buf start pos + + and escape s buf n start pos e = + copy_substring s buf start pos; + Buffer.add_string buf e; + quote_characters_to_buf s buf n (pos + 1) (pos + 1) + let quote_string_to_buf s buf = - (* TODO: escaping is wrong here, in particular for control characters *) - Buffer.add_string buf (sprintf "%S" s) + Buffer.add_char buf '"'; + quote_characters_to_buf s buf (String.length s) 0 0; + Buffer.add_char buf '"' let rec to_buf t buf = match t with