diff --git a/controller/rq_completion.ml b/controller/rq_completion.ml index dd7e1243b..502dc27b0 100644 --- a/controller/rq_completion.ml +++ b/controller/rq_completion.ml @@ -78,9 +78,11 @@ let validate_line ~(doc : Fleche.Doc.t) ~line = (* This returns a byte-based char offset for the line *) let validate_position ~doc ~point = let line, char = point in - Option.bind (validate_line ~doc ~line) (fun line -> + Option.map + (fun line -> let char = Coq.Utf8.get_byte_offset_from_utf16_pos line char in - Option.bind char (fun index -> Some (String.get line index))) + String.get line char) + (validate_line ~doc ~line) let get_char_at_point ~(doc : Fleche.Doc.t) ~point = let line, char = point in diff --git a/controller/rq_hover.ml b/controller/rq_hover.ml index 3bbf7e9bf..c2724a26d 100644 --- a/controller/rq_hover.ml +++ b/controller/rq_hover.ml @@ -243,8 +243,11 @@ end let () = List.iter Register.add [ Loc_info.h; Stats.h; Type.h; Notation.h ] let hover ~token ~doc ~point = + let point = lsp_point_to_doc_point ~doc point in let node = Info.LC.node ~doc ~point Exact in let range = Option.map Doc.Node.range node in + (* XXX: EJGA, ranges should be already converted, why is this needed? *) + let range = Option.map (doc_range_to_lsp_range ~doc) range in let hovers = Register.fire ~token ~doc ~point ~node in match hovers with | [] -> `Null diff --git a/coq/utf8.ml b/coq/utf8.ml index 9051e7178..7f1cf029c 100644 --- a/coq/utf8.ml +++ b/coq/utf8.ml @@ -179,18 +179,54 @@ let string_get_utf_8_uchar s i = let get_byte_offset_from_utf16_pos line (char : int) = let byte_idx = ref 0 in let utf16_char_count = ref 0 in - while !byte_idx < String.length line && !utf16_char_count < char do - let ch = string_get_utf_8_uchar line !byte_idx in - byte_idx := next line !byte_idx; - let code_unit_count = - uchar_utf_16_byte_length (uchar_utf_decode_uchar ch) / 2 - in - utf16_char_count := !utf16_char_count + code_unit_count; - () + let len = String.length line in + (try + while !utf16_char_count < char do + let ch = string_get_utf_8_uchar line !byte_idx in + let next_idx = next line !byte_idx in + if next_idx >= len then raise Not_found else byte_idx := next_idx; + let code_unit_count = + uchar_utf_16_byte_length (uchar_utf_decode_uchar ch) / 2 + in + utf16_char_count := !utf16_char_count + code_unit_count; + () + done + with _ -> ()); + !byte_idx + +let get_unicode_offset_from_utf16_pos line (char : int) = + let byte_idx = ref 0 in + let count = ref 0 in + let utf16_char_count = ref 0 in + let len = String.length line in + (try + while !utf16_char_count < char do + let ch = string_get_utf_8_uchar line !byte_idx in + let next_idx = next line !byte_idx in + if next_idx >= len then raise Not_found else byte_idx := next_idx; + let code_unit_count = + uchar_utf_16_byte_length (uchar_utf_decode_uchar ch) / 2 + in + utf16_char_count := !utf16_char_count + code_unit_count; + count := !count + 1; + () + done + with _ -> ()); + !count + +let get_utf16_offset_from_unicode_offset line (char : int) = + let offset16 = ref 0 in + let idx = ref 0 in + for _ = 0 to char - 1 do + let ch = string_get_utf_8_uchar line !idx in + let byte_len = uchar_utf_16_byte_length (uchar_utf_decode_uchar ch) in + offset16 := !offset16 + (byte_len / 2); + idx := next line !idx done; - if !byte_idx < String.length line then Some !byte_idx else None + !offset16 -let%test_unit "utf16 offsets" = +let%test_unit "utf16 byte offsets" = + let check_last s i = i < String.length s && next s i == String.length s in let testcases_x = [ ("aax", 2, true) ; (" xoo", 2, true) @@ -205,15 +241,52 @@ let%test_unit "utf16 offsets" = List.iter (fun (s, i, b) -> let res = get_byte_offset_from_utf16_pos s i in - if b then - let res = Option.map (fun i -> s.[i]) res in - match res with - | Some x when x = 'x' -> () - | Some x -> + if b then ( + let res = s.[res] in + if res != 'x' then failwith - (Printf.sprintf "Didn't find x in test %s : %d, instead: %c" s i x) - | None -> - failwith (Printf.sprintf "Didn't not find x in test %s : %d" s i) - else if res != None then - failwith (Printf.sprintf "Shouldn't find x in test %s : %d" s i)) + (Printf.sprintf "Didn't find x in test %s : %d, instead: %c" s i res)) + else if not (check_last s res) then + failwith + (Printf.sprintf "Shouldn't find x in test %s / %d got %d" s i res)) testcases_x + +let%test_unit "utf16 unicode offsets" = + let testcases = + [ ("aax", 2, 2) + ; (" xoo", 2, 2) + ; ("0123", 4, 3) + ; (" 𝒞x", 4, 3) + ; (" 𝒞x𝒞", 4, 3) + ; (" 𝒞∫x", 5, 4) + ; (" 𝒞", 4, 2) + ; ("∫x.dy", 1, 1) + ] + in + List.iter + (fun (s, i, e) -> + let res = get_unicode_offset_from_utf16_pos s i in + if res != e then + failwith + (Printf.sprintf "Wrong result: got %d expected %d in test %s" res e s)) + testcases + +let%test_unit "unicode utf16 offsets" = + let testcases = + [ ("aax", 2, 2) + ; (" xoo", 2, 2) + ; ("0123", 3, 3) + ; (" 𝒞x", 3, 4) + ; (" 𝒞x𝒞", 3, 4) + ; (" 𝒞∫x", 4, 5) + ; (" 𝒞", 2, 2) + ; ("∫x.dy", 1, 1) + ] + in + List.iter + (fun (s, i, e) -> + let res = get_utf16_offset_from_unicode_offset s i in + if res != e then + failwith + (Printf.sprintf "Wrong result: got %d expected %d in test %s" res e s)) + testcases diff --git a/coq/utf8.mli b/coq/utf8.mli index 2c66e66e6..5d69292e6 100644 --- a/coq/utf8.mli +++ b/coq/utf8.mli @@ -48,5 +48,15 @@ val index_of_char : line:string -> char:char -> index option val length : string -> char (** Get the byte potition of a code point indexed in UTF-16 code units in a - UTF-8 encoded string. *) -val get_byte_offset_from_utf16_pos : string -> int -> int option + UTF-8 encoded string. Returns the position of the last character if the + UTF-16 position was out of bounds. *) +val get_byte_offset_from_utf16_pos : string -> int -> int + +(** Get the unicode potition of a code point indexed in UTF-16 code units in a + utf-8 encoded string. Returns the position of the last character if the + utf-16 position was out of bounds. *) +val get_unicode_offset_from_utf16_pos : string -> int -> int + +(** Get the utf16 potition of a code point indexed in unicode code points in a + UTF-8 encoded string. The position must be in bounds. *) +val get_utf16_offset_from_unicode_offset : string -> int -> int diff --git a/lsp/core.ml b/lsp/core.ml index 8d8b61644..828388322 100644 --- a/lsp/core.ml +++ b/lsp/core.ml @@ -160,3 +160,25 @@ module DocumentDiagnosticReportPartialResult = struct } [@@deriving to_yojson] end + +(* Utility functions to translate between coordinate systems *) + +let lsp_point_to_doc_point ~(doc : Fleche.Doc.t) point = + let line, char = point in + let line_count = Array.length doc.contents.lines in + (* lines cannot be empty *) + let line = min (line_count - 1) line in + let s = Array.get doc.contents.lines line in + let char = Coq.Utf8.get_unicode_offset_from_utf16_pos s char in + (line, char) + +let doc_point_to_lsp_point ~(doc : Fleche.Doc.t) (point : Lang.Point.t) = + let s = Array.get doc.contents.lines point.line in + let char = Coq.Utf8.get_utf16_offset_from_unicode_offset s point.character in + { point with character = char } + +let doc_range_to_lsp_range ~(doc : Fleche.Doc.t) (range : Lang.Range.t) : + Lang.Range.t = + { start = doc_point_to_lsp_point ~doc range.start + ; end_ = doc_point_to_lsp_point ~doc range.end_ + } diff --git a/lsp/core.mli b/lsp/core.mli index 7a6cdd8b3..6f083eb0f 100644 --- a/lsp/core.mli +++ b/lsp/core.mli @@ -163,3 +163,14 @@ module DocumentDiagnosticReportPartialResult : sig } [@@deriving to_yojson] end + +(** Translate an LSP position into a Fleche position, in Unicode code points. If + the LSP positon is out of bounds, the result is capped to the last + line/character. *) +val lsp_point_to_doc_point : doc:Fleche.Doc.t -> int * int -> int * int + +(** Translate a Fleche position into an UTF-16 LSP position. *) +val doc_point_to_lsp_point : doc:Fleche.Doc.t -> Lang.Point.t -> Lang.Point.t + +(** Translate a Fleche range into an UTF-16 LSP range. *) +val doc_range_to_lsp_range : doc:Fleche.Doc.t -> Lang.Range.t -> Lang.Range.t