Skip to content

Commit

Permalink
[Hover] Follow LSP and treat positions as UTF16 code units
Browse files Browse the repository at this point in the history
  • Loading branch information
ineol committed Nov 24, 2023
1 parent 9a96682 commit feda594
Show file tree
Hide file tree
Showing 8 changed files with 148 additions and 25 deletions.
6 changes: 4 additions & 2 deletions controller/rq_completion.ml
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,11 @@ let validate_line ~(doc : Fleche.Doc.t) ~line =
(* This returns a byte-based char offset for the line *)
let validate_position ~doc ~point =
let line, char = point in
Option.bind (validate_line ~doc ~line) (fun line ->
Option.map
(fun line ->
let char = Coq.Utf8.get_byte_offset_from_utf16_pos line char in
Option.bind char (fun index -> Some (String.get line index)))
String.get line char)
(validate_line ~doc ~line)

let get_char_at_point ~(doc : Fleche.Doc.t) ~point =
let line, char = point in
Expand Down
2 changes: 2 additions & 0 deletions controller/rq_hover.ml
Original file line number Diff line number Diff line change
Expand Up @@ -230,8 +230,10 @@ end
let () = List.iter Register.add [ Loc_info.h; Stats.h; Type.h; Notation.h ]

let hover ~doc ~point =
let point = lsp_point_to_doc_point ~doc point in
let node = Info.LC.node ~doc ~point Exact in
let range = Option.map Doc.Node.range node in
let range = Option.map (doc_range_to_lsp_range ~doc) range in
let hovers = Register.fire ~doc ~point ~node in
match hovers with
| [] -> `Null
Expand Down
3 changes: 2 additions & 1 deletion coq/dune
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@
; Unfortunate we have to link the STM due to the LTAC plugin
; depending on it, we should fix this upstream
(inline_tests)
(preprocess (pps ppx_inline_test))
(preprocess
(pps ppx_inline_test))
(libraries lang coq-core.vernac coq-serapi.serlib))
113 changes: 93 additions & 20 deletions coq/utf8.ml
Original file line number Diff line number Diff line change
Expand Up @@ -179,18 +179,54 @@ let string_get_utf_8_uchar s i =
let get_byte_offset_from_utf16_pos line (char : int) =
let byte_idx = ref 0 in
let utf16_char_count = ref 0 in
while !byte_idx < String.length line && !utf16_char_count < char do
let ch = string_get_utf_8_uchar line !byte_idx in
byte_idx := next line !byte_idx;
let code_unit_count =
uchar_utf_16_byte_length (uchar_utf_decode_uchar ch) / 2
in
utf16_char_count := !utf16_char_count + code_unit_count;
()
let len = String.length line in
(try
while !utf16_char_count < char do
let ch = string_get_utf_8_uchar line !byte_idx in
let next_idx = next line !byte_idx in
if next_idx >= len then raise Not_found else byte_idx := next_idx;
let code_unit_count =
uchar_utf_16_byte_length (uchar_utf_decode_uchar ch) / 2
in
utf16_char_count := !utf16_char_count + code_unit_count;
()
done
with _ -> ());
!byte_idx

let get_unicode_offset_from_utf16_pos line (char : int) =
let byte_idx = ref 0 in
let count = ref 0 in
let utf16_char_count = ref 0 in
let len = String.length line in
(try
while !utf16_char_count < char do
let ch = string_get_utf_8_uchar line !byte_idx in
let next_idx = next line !byte_idx in
if next_idx >= len then raise Not_found else byte_idx := next_idx;
let code_unit_count =
uchar_utf_16_byte_length (uchar_utf_decode_uchar ch) / 2
in
utf16_char_count := !utf16_char_count + code_unit_count;
count := !count + 1;
()
done
with _ -> ());
!count

let get_utf16_offset_from_unicode_offset line (char : int) =
let offset16 = ref 0 in
let idx = ref 0 in
for _ = 0 to char - 1 do
let ch = string_get_utf_8_uchar line !idx in
let byte_len = uchar_utf_16_byte_length (uchar_utf_decode_uchar ch) in
offset16 := !offset16 + (byte_len / 2);
idx := next line !idx
done;
if !byte_idx < String.length line then Some !byte_idx else None
!offset16

let%test_unit "utf16 offsets" =
let%test_unit "utf16 byte offsets" =
let check_last s i = i < String.length s && next s i == String.length s in
let testcases_x =
[ ("aax", 2, true)
; (" xoo", 2, true)
Expand All @@ -205,15 +241,52 @@ let%test_unit "utf16 offsets" =
List.iter
(fun (s, i, b) ->
let res = get_byte_offset_from_utf16_pos s i in
if b then
let res = Option.map (fun i -> s.[i]) res in
match res with
| Some x when x = 'x' -> ()
| Some x ->
if b then (
let res = s.[res] in
if res != 'x' then
failwith
(Printf.sprintf "Didn't find x in test %s : %d, instead: %c" s i x)
| None ->
failwith (Printf.sprintf "Didn't not find x in test %s : %d" s i)
else if res != None then
failwith (Printf.sprintf "Shouldn't find x in test %s : %d" s i))
(Printf.sprintf "Didn't find x in test %s : %d, instead: %c" s i res))
else if not (check_last s res) then
failwith
(Printf.sprintf "Shouldn't find x in test %s / %d got %d" s i res))
testcases_x

let%test_unit "utf16 unicode offsets" =
let testcases =
[ ("aax", 2, 2)
; (" xoo", 2, 2)
; ("0123", 4, 3)
; (" 𝒞x", 4, 3)
; (" 𝒞x𝒞", 4, 3)
; (" 𝒞∫x", 5, 4)
; (" 𝒞", 4, 2)
; ("∫x.dy", 1, 1)
]
in
List.iter
(fun (s, i, e) ->
let res = get_unicode_offset_from_utf16_pos s i in
if res != e then
failwith
(Printf.sprintf "Wrong result: got %d expected %d in test %s" res e s))
testcases

let%test_unit "unicode utf16 offsets" =
let testcases =
[ ("aax", 2, 2)
; (" xoo", 2, 2)
; ("0123", 3, 3)
; (" 𝒞x", 3, 4)
; (" 𝒞x𝒞", 3, 4)
; (" 𝒞∫x", 4, 5)
; (" 𝒞", 2, 2)
; ("∫x.dy", 1, 1)
]
in
List.iter
(fun (s, i, e) ->
let res = get_utf16_offset_from_unicode_offset s i in
if res != e then
failwith
(Printf.sprintf "Wrong result: got %d expected %d in test %s" res e s))
testcases
14 changes: 12 additions & 2 deletions coq/utf8.mli
Original file line number Diff line number Diff line change
Expand Up @@ -48,5 +48,15 @@ val index_of_char : line:string -> char:char -> index option
val length : string -> char

(** Get the byte potition of a code point indexed in UTF-16 code units in a
UTF-8 encoded string. *)
val get_byte_offset_from_utf16_pos : string -> int -> int option
UTF-8 encoded string. Returns the position of the last character if the
UTF-16 position was out of bounds. *)
val get_byte_offset_from_utf16_pos : string -> int -> int

(** Get the unicode potition of a code point indexed in UTF-16 code units in a
utf-8 encoded string. Returns the position of the last character if the
utf-16 position was out of bounds. *)
val get_unicode_offset_from_utf16_pos : string -> int -> int

(** Get the utf16 potition of a code point indexed in unicode code points in a
UTF-8 encoded string. The position must be in bounds. *)
val get_utf16_offset_from_unicode_offset : string -> int -> int
23 changes: 23 additions & 0 deletions lsp/core.ml
Original file line number Diff line number Diff line change
Expand Up @@ -160,3 +160,26 @@ module DocumentDiagnosticReportPartialResult = struct
}
[@@deriving to_yojson]
end


(* Utility functions to translate between coordinate systems *)

let lsp_point_to_doc_point ~(doc : Fleche.Doc.t) point =
let line, char = point in
let line_count = Array.length doc.contents.lines in
(* lines cannot be empty *)
let line = min (line_count - 1) line in
let s = Array.get doc.contents.lines line in
let char = Coq.Utf8.get_unicode_offset_from_utf16_pos s char in
(line, char)

let doc_point_to_lsp_point ~(doc : Fleche.Doc.t) (point : Lang.Point.t) =
let s = Array.get doc.contents.lines point.line in
let char = Coq.Utf8.get_utf16_offset_from_unicode_offset s point.character in
{ point with character = char }

let doc_range_to_lsp_range ~(doc : Fleche.Doc.t) (range : Lang.Range.t) :
Lang.Range.t =
{ start = doc_point_to_lsp_point ~doc range.start
; end_ = doc_point_to_lsp_point ~doc range.end_
}
11 changes: 11 additions & 0 deletions lsp/core.mli
Original file line number Diff line number Diff line change
Expand Up @@ -163,3 +163,14 @@ module DocumentDiagnosticReportPartialResult : sig
}
[@@deriving to_yojson]
end

(** Translate an LSP position into a Fleche position, in Unicode code points. If
the LSP positon is out of bounds, the result is capped to the last
line/character. *)
val lsp_point_to_doc_point : doc:Fleche.Doc.t -> int * int -> int * int

(** Translate a Fleche position into an UTF-16 LSP position. *)
val doc_point_to_lsp_point : doc:Fleche.Doc.t -> Lang.Point.t -> Lang.Point.t

(** Translate a Fleche range into an UTF-16 LSP range. *)
val doc_range_to_lsp_range : doc:Fleche.Doc.t -> Lang.Range.t -> Lang.Range.t
1 change: 1 addition & 0 deletions lsp/jLang.mli
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,4 @@ end

val mk_diagnostics :
uri:Lang.LUri.File.t -> version:int -> Lang.Diagnostic.t list -> Yojson.Safe.t

0 comments on commit feda594

Please sign in to comment.