From 8647872f4db7041c1422ee953c0b72a5b7cf8893 Mon Sep 17 00:00:00 2001 From: Olivier Nicole Date: Tue, 3 Sep 2024 17:53:19 +0200 Subject: [PATCH] Decode sourcemap mappings only when necessary --- CHANGES.md | 1 + compiler/bin-js_of_ocaml/cmd_arg.ml | 4 +- compiler/bin-js_of_ocaml/link.ml | 2 +- compiler/lib/js_output.ml | 4 +- compiler/lib/source_map.ml | 358 +++++++++++---------- compiler/lib/source_map.mli | 32 +- compiler/tests-compiler/sourcemap.ml | 15 +- compiler/tests-sourcemap/dump_sourcemap.ml | 3 +- 8 files changed, 230 insertions(+), 189 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 1d9de3f2a6..d72933d484 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -6,6 +6,7 @@ * Misc: yojson is no longer optional * Compiler: speedup global_flow/global_deadcode pass on large bytecode * Compiler: speedup json parsing, relying on Yojson.Raw (#1640) +* Compiler: Decode sourcemap mappings only when necessary (#1664) * Compiler: make indirect call using sequence instead of using the call method [f.call(null, args)] becomes [(0,f)(args)] * Runtime: change Sys.os_type on windows (Cygwin -> Win32) diff --git a/compiler/bin-js_of_ocaml/cmd_arg.ml b/compiler/bin-js_of_ocaml/cmd_arg.ml index 5ffe0fd7f2..fbaf4411ce 100644 --- a/compiler/bin-js_of_ocaml/cmd_arg.ml +++ b/compiler/bin-js_of_ocaml/cmd_arg.ml @@ -314,7 +314,7 @@ let options = ; sources = [] ; sources_content = (if sourcemap_don't_inline_content then None else Some []) ; names = [] - ; mappings = [] + ; mappings = Source_map.Mappings.empty } ) else None in @@ -543,7 +543,7 @@ let options_runtime_only = ; sources = [] ; sources_content = (if sourcemap_don't_inline_content then None else Some []) ; names = [] - ; mappings = [] + ; mappings = Source_map.Mappings.empty } ) else None in diff --git a/compiler/bin-js_of_ocaml/link.ml b/compiler/bin-js_of_ocaml/link.ml index cebb9e53d7..45c2dd0c0e 100644 --- a/compiler/bin-js_of_ocaml/link.ml +++ b/compiler/bin-js_of_ocaml/link.ml @@ -108,7 +108,7 @@ let options = ; sources = [] ; sources_content = Some [] ; names = [] - ; mappings = [] + ; mappings = Source_map.Mappings.empty } ) else None in diff --git a/compiler/lib/js_output.ml b/compiler/lib/js_output.ml index 33c57f23f0..64dcdcab64 100644 --- a/compiler/lib/js_output.ml +++ b/compiler/lib/js_output.ml @@ -1996,8 +1996,9 @@ let program ?(accept_unnamed_var = false) f ?source_map p = | None -> filename | Some _ -> Filename.concat "/builtin" filename) in + let sm_mappings = Source_map.Mappings.decode sm.mappings in let mappings = - List.rev_append_map !temp_mappings sm.mappings ~f:(fun (pos, m) -> + List.rev_append_map !temp_mappings sm_mappings ~f:(fun (pos, m) -> let gen_line = pos.PP.p_line + 1 in let gen_col = pos.PP.p_col in match m with @@ -2012,6 +2013,7 @@ let program ?(accept_unnamed_var = false) f ?source_map p = Source_map.Gen_Ori_Name { gen_line; gen_col; ori_source; ori_line; ori_col; ori_name }) in + let mappings = Source_map.Mappings.encode mappings in Some { sm with Source_map.sources; names; sources_content; mappings } in PP.check f; diff --git a/compiler/lib/source_map.ml b/compiler/lib/source_map.ml index 179c9b021d..a2d30d9f85 100644 --- a/compiler/lib/source_map.ml +++ b/compiler/lib/source_map.ml @@ -50,7 +50,165 @@ type map = ; ori_name : int } -type mapping = map list +let gen_line = function + | Gen { gen_line; _ } | Gen_Ori { gen_line; _ } | Gen_Ori_Name { gen_line; _ } -> + gen_line + +let gen_col = function + | Gen { gen_col; _ } | Gen_Ori { gen_col; _ } | Gen_Ori_Name { gen_col; _ } -> gen_col + +module Mappings = struct + type t = Uninterpreted of string [@@unboxed] + + let empty = Uninterpreted "" + + let of_string : string -> t = fun s -> Uninterpreted s + + let to_string : t -> string = fun (Uninterpreted s) -> s + + let encode mapping = + let a = Array.of_list mapping in + let len = Array.length a in + Array.stable_sort + ~cmp:(fun t1 t2 -> + match compare (gen_line t1) (gen_line t2) with + | 0 -> compare (gen_col t1) (gen_col t2) + | n -> n) + a; + let buf = Buffer.create 1024 in + (* The binary format encodes lines starting at zero, but + [ori_line] and [gen_line] are 1 based. *) + let gen_line_r = ref 1 in + let gen_col_r = ref 0 in + let ori_source_r = ref 0 in + let ori_line_r = ref 1 in + let ori_col_r = ref 0 in + let ori_name_r = ref 0 in + let rec loop prev i = + if i < len + then + let c = a.(i) in + if i + 1 < len && gen_line c = gen_line a.(i + 1) && gen_col c = gen_col a.(i + 1) + then (* Only keep one source location per generated location *) + loop prev (i + 1) + else ( + if !gen_line_r <> gen_line c + then ( + assert (!gen_line_r < gen_line c); + for _i = !gen_line_r to gen_line c - 1 do + Buffer.add_char buf ';' + done; + gen_col_r := 0; + gen_line_r := gen_line c) + else if i > 0 + then Buffer.add_char buf ','; + let l = + match c with + | Gen { gen_line = _; gen_col } -> + let res = [ gen_col - !gen_col_r ] in + gen_col_r := gen_col; + res + | Gen_Ori { gen_line = _; gen_col; ori_source; ori_line; ori_col } -> + let res = + [ gen_col - !gen_col_r + ; ori_source - !ori_source_r + ; ori_line - !ori_line_r + ; ori_col - !ori_col_r + ] + in + gen_col_r := gen_col; + ori_col_r := ori_col; + ori_line_r := ori_line; + ori_source_r := ori_source; + res + | Gen_Ori_Name + { gen_line = _; gen_col; ori_source; ori_line; ori_col; ori_name } -> + let res = + [ gen_col - !gen_col_r + ; ori_source - !ori_source_r + ; ori_line - !ori_line_r + ; ori_col - !ori_col_r + ; ori_name - !ori_name_r + ] + in + gen_col_r := gen_col; + ori_col_r := ori_col; + ori_line_r := ori_line; + ori_source_r := ori_source; + ori_name_r := ori_name; + res + in + Vlq64.encode_l buf l; + loop i (i + 1)) + in + loop (-1) 0; + Uninterpreted (Buffer.contents buf) + + let decode (Uninterpreted str) = + let total_len = String.length str in + let gen_col = ref 0 in + let ori_source = ref 0 in + let ori_line = ref 1 in + let ori_col = ref 0 in + let ori_name = ref 0 in + let rec readline line pos acc = + if pos >= total_len + then List.rev acc + else + let last = try String.index_from str pos ';' with Not_found -> total_len in + gen_col := 0; + let pos, acc = + if pos = last then pos + 1, acc else read_tokens line pos last acc + in + readline (succ line) pos acc + and read_tokens line start stop acc = + let last = + try min (String.index_from str start ',') stop with Not_found -> stop + in + let v = Vlq64.decode_l str ~pos:start ~len:(last - start) in + match v with + | [] -> last + 1, acc + | v -> + let v = + match v with + | [ g ] -> + gen_col := !gen_col + g; + Gen { gen_line = line; gen_col = !gen_col } + | [ g; os; ol; oc ] -> + gen_col := !gen_col + g; + ori_source := !ori_source + os; + ori_line := !ori_line + ol; + ori_col := !ori_col + oc; + Gen_Ori + { gen_line = line + ; gen_col = !gen_col + ; ori_source = !ori_source + ; ori_line = !ori_line + ; ori_col = !ori_col + } + | [ g; os; ol; oc; on ] -> + gen_col := !gen_col + g; + ori_source := !ori_source + os; + ori_line := !ori_line + ol; + ori_col := !ori_col + oc; + ori_name := !ori_name + on; + Gen_Ori_Name + { gen_line = line + ; gen_col = !gen_col + ; ori_source = !ori_source + ; ori_line = !ori_line + ; ori_col = !ori_col + ; ori_name = !ori_name + } + | _ -> invalid_arg "Source_map.mapping_of_string" + in + let acc = v :: acc in + if last = stop then last + 1, acc else read_tokens line (last + 1) stop acc + in + (* The binary format encodes lines starting at zero, but + [ori_line] and [gen_line] are 1 based. *) + readline 1 0 [] +end type t = { version : int @@ -59,7 +217,7 @@ type t = ; sources : string list ; sources_content : Source_content.t option list option ; names : string list - ; mappings : mapping + ; mappings : Mappings.t } let empty ~filename = @@ -69,155 +227,9 @@ let empty ~filename = ; sources = [] ; sources_content = None ; names = [] - ; mappings = [] + ; mappings = Mappings.empty } -let gen_line = function - | Gen { gen_line; _ } | Gen_Ori { gen_line; _ } | Gen_Ori_Name { gen_line; _ } -> - gen_line - -let gen_col = function - | Gen { gen_col; _ } | Gen_Ori { gen_col; _ } | Gen_Ori_Name { gen_col; _ } -> gen_col - -let string_of_mapping mapping = - let a = Array.of_list mapping in - let len = Array.length a in - Array.stable_sort - ~cmp:(fun t1 t2 -> - match compare (gen_line t1) (gen_line t2) with - | 0 -> compare (gen_col t1) (gen_col t2) - | n -> n) - a; - let buf = Buffer.create 1024 in - (* The binary format encodes lines starting at zero, but - [ori_line] and [gen_line] are 1 based. *) - let gen_line_r = ref 1 in - let gen_col_r = ref 0 in - let ori_source_r = ref 0 in - let ori_line_r = ref 1 in - let ori_col_r = ref 0 in - let ori_name_r = ref 0 in - let rec loop prev i = - if i < len - then - let c = a.(i) in - if i + 1 < len && gen_line c = gen_line a.(i + 1) && gen_col c = gen_col a.(i + 1) - then (* Only keep one source location per generated location *) - loop prev (i + 1) - else ( - if !gen_line_r <> gen_line c - then ( - assert (!gen_line_r < gen_line c); - for _i = !gen_line_r to gen_line c - 1 do - Buffer.add_char buf ';' - done; - gen_col_r := 0; - gen_line_r := gen_line c) - else if i > 0 - then Buffer.add_char buf ','; - let l = - match c with - | Gen { gen_line = _; gen_col } -> - let res = [ gen_col - !gen_col_r ] in - gen_col_r := gen_col; - res - | Gen_Ori { gen_line = _; gen_col; ori_source; ori_line; ori_col } -> - let res = - [ gen_col - !gen_col_r - ; ori_source - !ori_source_r - ; ori_line - !ori_line_r - ; ori_col - !ori_col_r - ] - in - gen_col_r := gen_col; - ori_col_r := ori_col; - ori_line_r := ori_line; - ori_source_r := ori_source; - res - | Gen_Ori_Name - { gen_line = _; gen_col; ori_source; ori_line; ori_col; ori_name } -> - let res = - [ gen_col - !gen_col_r - ; ori_source - !ori_source_r - ; ori_line - !ori_line_r - ; ori_col - !ori_col_r - ; ori_name - !ori_name_r - ] - in - gen_col_r := gen_col; - ori_col_r := ori_col; - ori_line_r := ori_line; - ori_source_r := ori_source; - ori_name_r := ori_name; - res - in - Vlq64.encode_l buf l; - loop i (i + 1)) - in - loop (-1) 0; - Buffer.contents buf - -let mapping_of_string str = - let total_len = String.length str in - let gen_col = ref 0 in - let ori_source = ref 0 in - let ori_line = ref 1 in - let ori_col = ref 0 in - let ori_name = ref 0 in - let rec readline line pos acc = - if pos >= total_len - then List.rev acc - else - let last = try String.index_from str pos ';' with Not_found -> total_len in - gen_col := 0; - let pos, acc = if pos = last then pos + 1, acc else read_tokens line pos last acc in - readline (succ line) pos acc - and read_tokens line start stop acc = - let last = try min (String.index_from str start ',') stop with Not_found -> stop in - let v = Vlq64.decode_l str ~pos:start ~len:(last - start) in - match v with - | [] -> last + 1, acc - | v -> - let v = - match v with - | [ g ] -> - gen_col := !gen_col + g; - Gen { gen_line = line; gen_col = !gen_col } - | [ g; os; ol; oc ] -> - gen_col := !gen_col + g; - ori_source := !ori_source + os; - ori_line := !ori_line + ol; - ori_col := !ori_col + oc; - Gen_Ori - { gen_line = line - ; gen_col = !gen_col - ; ori_source = !ori_source - ; ori_line = !ori_line - ; ori_col = !ori_col - } - | [ g; os; ol; oc; on ] -> - gen_col := !gen_col + g; - ori_source := !ori_source + os; - ori_line := !ori_line + ol; - ori_col := !ori_col + oc; - ori_name := !ori_name + on; - Gen_Ori_Name - { gen_line = line - ; gen_col = !gen_col - ; ori_source = !ori_source - ; ori_line = !ori_line - ; ori_col = !ori_col - ; ori_name = !ori_name - } - | _ -> invalid_arg "Source_map.mapping_of_string" - in - let acc = v :: acc in - if last = stop then last + 1, acc else read_tokens line (last + 1) stop acc - in - (* The binary format encodes lines starting at zero, but - [ori_line] and [gen_line] are 1 based. *) - readline 1 0 [] - let maps ~sources_offset ~names_offset x = match x with | Gen _ -> x @@ -230,7 +242,7 @@ let maps ~sources_offset ~names_offset x = Gen_Ori_Name { gen_line; gen_col; ori_source; ori_line; ori_col; ori_name } let filter_map sm ~f = - let a = Array.of_list sm.mappings in + let a = Array.of_list (Mappings.decode sm.mappings) in Array.stable_sort ~cmp:(fun t1 t2 -> match compare (gen_line t1) (gen_line t2) with @@ -264,46 +276,48 @@ let filter_map sm ~f = loop acc xs in let mappings = loop [] l in - { sm with mappings } + { sm with mappings = Mappings.encode mappings } let merge = function | [] -> None | _ :: _ as l -> - let rec loop acc_rev ~sources_offset ~names_offset l = + let rec loop acc_rev mappings_rev ~sources_offset ~names_offset l = match l with - | [] -> acc_rev + | [] -> acc_rev, mappings_rev | sm :: rest -> - let acc_rev = - { acc_rev with - sources = List.rev_append sm.sources acc_rev.sources - ; names = List.rev_append sm.names acc_rev.names - ; sources_content = - (match sm.sources_content, acc_rev.sources_content with - | Some x, Some acc_rev -> Some (List.rev_append x acc_rev) - | None, _ | _, None -> None) - ; mappings = - List.rev_append_map - ~f:(maps ~sources_offset ~names_offset) - sm.mappings - acc_rev.mappings - } + let acc_rev, mappings_rev = + ( { acc_rev with + sources = List.rev_append sm.sources acc_rev.sources + ; names = List.rev_append sm.names acc_rev.names + ; sources_content = + (match sm.sources_content, acc_rev.sources_content with + | Some x, Some acc_rev -> Some (List.rev_append x acc_rev) + | None, _ | _, None -> None) + ; mappings = Mappings.empty + } + , List.rev_append_map + ~f:(maps ~sources_offset ~names_offset) + (Mappings.decode sm.mappings) + mappings_rev ) in loop acc_rev + mappings_rev ~sources_offset:(sources_offset + List.length sm.sources) ~names_offset:(names_offset + List.length sm.names) rest in - let acc_rev = + let acc_rev, mappings_rev = loop { (empty ~filename:"") with sources_content = Some [] } + [] ~sources_offset:0 ~names_offset:0 l in Some { acc_rev with - mappings = List.rev acc_rev.mappings + mappings = Mappings.encode (List.rev mappings_rev) ; sources = List.rev acc_rev.sources ; names = List.rev acc_rev.names ; sources_content = Option.map ~f:List.rev acc_rev.sources_content @@ -331,7 +345,7 @@ let json t = | Some s -> rewrite_path s) ) ; "names", `List (List.map t.names ~f:(fun s -> stringlit s)) ; "sources", `List (List.map t.sources ~f:(fun s -> stringlit (rewrite_path s))) - ; "mappings", stringlit (string_of_mapping t.mappings) + ; "mappings", stringlit (Mappings.to_string t.mappings) ; ( "sourcesContent" , `List (match t.sources_content with @@ -411,8 +425,8 @@ let of_json (json : Yojson.Raw.t) = in let mappings = match string "mappings" rest with - | None -> mapping_of_string "" - | Some s -> mapping_of_string s + | None -> Mappings.empty + | Some s -> Mappings.of_string s in { version = int_of_float (float_of_string version) ; file diff --git a/compiler/lib/source_map.mli b/compiler/lib/source_map.mli index 6c68720c43..a5d6278329 100644 --- a/compiler/lib/source_map.mli +++ b/compiler/lib/source_map.mli @@ -44,7 +44,28 @@ type map = ; ori_name : int } -type mapping = map list +module Mappings : sig + type t + + val empty : t + (** Represents the empty mapping. *) + + val of_string : string -> t + (** By default, mappings are left uninterpreted, since many operations can be + performed efficiently directly on the encoded form. Therefore this + function is mostly a no-op and very cheap. It does not perform any + validation of its argument, unlike {!val:decode}. It is guaranteed that + {!val:of_string} and {!val:to_string} are inverse functions. *) + + val decode : t -> map list + (** Parse the mappings. *) + + val encode : map list -> t + + val to_string : t -> string + (** Returns the mappings as a string in the Source map v3 format. This + function is mostly a no-op and is very cheap. *) +end type t = { version : int @@ -53,17 +74,16 @@ type t = ; sources : string list ; sources_content : Source_content.t option list option ; names : string list - ; mappings : mapping + ; mappings : Mappings.t + (** Left uninterpreted, since most useful operations can be performed efficiently + directly on the encoded form, and a full decoding can be costly for big + sourcemaps. *) } val filter_map : t -> f:(int -> int option) -> t val merge : t list -> t option -val mapping_of_string : string -> mapping - -val string_of_mapping : mapping -> string - val empty : filename:string -> t val to_string : t -> string diff --git a/compiler/tests-compiler/sourcemap.ml b/compiler/tests-compiler/sourcemap.ml index 791cb94d27..6625996539 100644 --- a/compiler/tests-compiler/sourcemap.ml +++ b/compiler/tests-compiler/sourcemap.ml @@ -23,7 +23,8 @@ open Util let print_mapping (sm : Source_map.t) = let sources = Array.of_list sm.sources in let _names = Array.of_list sm.names in - List.iter sm.mappings ~f:(fun (m : Source_map.map) -> + let mappings = Source_map.Mappings.decode sm.mappings in + List.iter mappings ~f:(fun (m : Source_map.map) -> match m with | Gen_Ori { gen_line; gen_col; ori_line; ori_col; ori_source } | Gen_Ori_Name { gen_line; gen_col; ori_line; ori_col; ori_source; ori_name = _ } -> @@ -110,8 +111,8 @@ function x (a, b) { let%expect_test _ = let map_str = ";;;;EAEE,EAAE,EAAC,CAAE;ECQY,UACC" in - let map = Source_map.mapping_of_string map_str in - let map_str' = Source_map.string_of_mapping map in + let map = Source_map.Mappings.(decode (of_string map_str)) in + let map_str' = Source_map.Mappings.(to_string (encode map)) in print_endline map_str; print_endline map_str'; [%expect @@ -128,21 +129,23 @@ let%expect_test _ = { (Source_map.empty ~filename:"1.map") with names = [ "na"; "nb"; "nc" ] ; sources = [ "sa"; "sb" ] - ; mappings = [ gen (1, 1) (10, 10) 0; gen (3, 3) (20, 20) 1 ] + ; mappings = + Source_map.Mappings.encode [ gen (1, 1) (10, 10) 0; gen (3, 3) (20, 20) 1 ] } in let s2 : Source_map.t = { (Source_map.empty ~filename:"2.map") with names = [ "na2"; "nb2" ] ; sources = [ "sa2" ] - ; mappings = [ gen (3, 3) (5, 5) 0 ] + ; mappings = Source_map.Mappings.encode [ gen (3, 3) (5, 5) 0 ] } in let m = Source_map.merge [ s1; Source_map.filter_map s2 ~f:(fun x -> Some (x + 20)) ] in (match m with | None -> () | Some sm -> - print_endline (Source_map.string_of_mapping sm.mappings); + let encoded_mappings = sm.Source_map.mappings in + print_endline (Source_map.Mappings.to_string encoded_mappings); print_mapping sm); [%expect {| diff --git a/compiler/tests-sourcemap/dump_sourcemap.ml b/compiler/tests-sourcemap/dump_sourcemap.ml index 1d69de2fe9..27dfa136f3 100644 --- a/compiler/tests-sourcemap/dump_sourcemap.ml +++ b/compiler/tests-sourcemap/dump_sourcemap.ml @@ -40,7 +40,8 @@ let print_mapping lines (sm : Source_map.t) = let lines = Array.of_list lines in let sources = Array.of_list sm.sources in let _names = Array.of_list sm.names in - List.iter sm.mappings ~f:(fun (m : Source_map.map) -> + let mappings = Source_map.Mappings.decode sm.mappings in + List.iter mappings ~f:(fun (m : Source_map.map) -> let file = function | -1 -> "null" | n -> normalize_path sources.(n)