Skip to content

Commit

Permalink
Merge pull request #756 from ejgallego/input_help_unicode
Browse files Browse the repository at this point in the history
[hover] Show input help for Unicode characters
  • Loading branch information
ejgallego authored Jun 5, 2024
2 parents 9bcc395 + 91c79ca commit d46cfd6
Show file tree
Hide file tree
Showing 11 changed files with 288 additions and 138 deletions.
2 changes: 2 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
#754)
- [vscode] Fix focus race when a Coq file is in column 2 (@ejgallego,
#755, cc: #722, #725)
- [hover] Show input howto for unicode characters on hover
(@ejgallego, Léo Stefanesco, #756)

# coq-lsp 0.1.10: Hasta el 40 de Mayo _en effect_...
----------------------------------------------------
Expand Down
44 changes: 33 additions & 11 deletions controller/rq_common.ml
Original file line number Diff line number Diff line change
Expand Up @@ -55,22 +55,44 @@ let validate_line ~(contents : Fleche.Contents.t) ~line =
Some (Array.get contents.lines line)
else None

let validate_column char line =
let validate_column ~get char line =
let length = Lang.Utf.length_utf16 line in
if char < length then
let char = Lang.Utf.utf8_offset_of_utf16_offset ~line ~offset:char in
Some (String.get line char)
get line char
else None

(* This returns a byte-based char offset for the line *)
let validate_position ~contents ~point =
let validate_position ~get ~contents ~point =
let line, char = point in
validate_line ~contents ~line |> fun l -> Option.bind l (validate_column char)
validate_line ~contents ~line |> fun l ->
Option.bind l (validate_column ~get char)

let get_char_at_point ~contents ~point =
let line, char = point in
if char >= 1 then
let point = (line, char - 1) in
validate_position ~contents ~point
else (* Can't get previous char *)
None
let get_char_at_point_gen ~prev ~get ~contents ~point =
if prev then
let line, char = point in
if char >= 1 then
let point = (line, char - 1) in
validate_position ~get ~contents ~point
else (* Can't get previous char *)
None
else validate_position ~get ~contents ~point

let get_char_at_point ~prev ~contents ~point =
let get line utf8_offset = Some (String.get line utf8_offset) in
get_char_at_point_gen ~prev ~get ~contents ~point

let get_uchar_at_point ~prev ~contents ~point =
let get line utf8_offset =
let decode =
Lang.Compat.OCaml4_14.String.get_utf_8_uchar line utf8_offset
in
if Lang.Compat.OCaml4_14.Uchar.utf_decode_is_valid decode then
let str =
String.sub line utf8_offset
(Lang.Compat.OCaml4_14.Uchar.utf_decode_length decode)
in
Some (Lang.Compat.OCaml4_14.Uchar.utf_decode_uchar decode, str)
else None
in
get_char_at_point_gen ~prev ~get ~contents ~point
9 changes: 8 additions & 1 deletion controller/rq_common.mli
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,11 @@ val get_id_at_point :
contents:Fleche.Contents.t -> point:int * int -> string option

val get_char_at_point :
contents:Fleche.Contents.t -> point:int * int -> char option
prev:bool -> contents:Fleche.Contents.t -> point:int * int -> char option

(* Get both the uchar and its utf-8 string representation *)
val get_uchar_at_point :
prev:bool
-> contents:Fleche.Contents.t
-> point:int * int
-> (Uchar.t * string) option
10 changes: 2 additions & 8 deletions controller/rq_completion.ml
Original file line number Diff line number Diff line change
Expand Up @@ -60,21 +60,15 @@ let mk_unicode_completion_item point (label, newText) =
mk_completion ~label ~labelDetails ~textEdit ~commitCharacters ()

let unicode_list point : Yojson.Safe.t list =
let ulist =
match !Fleche.Config.v.unicode_completion with
| Off -> []
| Internal_small -> Unicode_bindings.small
| Normal -> Unicode_bindings.normal
| Extended -> Unicode_bindings.extended
in
let ulist = Unicode_bindings.from_config () in
(* Coq's CList.map is tail-recursive *)
CList.map (mk_unicode_completion_item point) ulist

let completion ~token:_ ~(doc : Fleche.Doc.t) ~point =
(* Instead of get_char_at_point we should have a CompletionContext.t, to be
addressed in further completion PRs *)
let contents = doc.contents in
(match Rq_common.get_char_at_point ~contents ~point with
(match Rq_common.get_char_at_point ~prev:true ~contents ~point with
| None ->
let incomplete = true in
let items = [] in
Expand Down
27 changes: 26 additions & 1 deletion controller/rq_hover.ml
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,29 @@ module Notation : HoverProvider = struct
let h = Handler.WithNode info_notation
end

module InputHelp : HoverProvider = struct
let mk_map map =
List.fold_left
(fun m (tex, uni) -> CString.Map.add uni tex m)
CString.Map.empty map

(* A bit hackish, but OK *)
let unimap =
Lazy.from_fun (fun () -> mk_map (Unicode_bindings.from_config ()))

let input_help ~token:_ ~contents ~point ~node:_ =
(* check if contents at point match *)
match Rq_common.get_uchar_at_point ~prev:false ~contents ~point with
| Some (uchar, uchar_str)
when Lang.Compat.OCaml4_14.Uchar.utf_8_byte_length uchar > 1 ->
Option.map
(fun tex -> Format.asprintf "Input %s with %s" uchar_str tex)
(CString.Map.find_opt uchar_str (Lazy.force unimap))
| Some _ | None -> None

let h = Handler.MaybeNode input_help
end

module Register = struct
let handlers : Handler.t list ref = ref []
let add fn = handlers := fn :: !handlers
Expand All @@ -240,7 +263,9 @@ module Register = struct
end

(* Register in-file hover plugins *)
let () = List.iter Register.add [ Loc_info.h; Stats.h; Type.h; Notation.h ]
let () =
List.iter Register.add
[ Loc_info.h; Stats.h; Type.h; Notation.h; InputHelp.h ]

let hover ~token ~(doc : Fleche.Doc.t) ~point =
let node = Info.LC.node ~doc ~point Exact in
Expand Down
7 changes: 7 additions & 0 deletions controller/unicode_bindings.ml
Original file line number Diff line number Diff line change
Expand Up @@ -1697,3 +1697,10 @@ let extended =
; ("\\_v", "")
; ("\\_x", "")
]

let from_config () =
match !Fleche.Config.v.unicode_completion with
| Off -> []
| Internal_small -> small
| Normal -> normal
| Extended -> extended
3 changes: 3 additions & 0 deletions controller/unicode_bindings.mli
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,6 @@ val normal : (string * string) list

(** All the supported bindings for unicode characters in a table. *)
val extended : (string * string) list

(** Return the list selected in config *)
val from_config : unit -> (string * string) list
176 changes: 176 additions & 0 deletions lang/compat.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
(* OCaml compat *)

(* The following is copied from Ocaml's standard library Bytes and Uchar
modules. We use the public safe variant of various functions, so it should be
slower.
TODO: when our minimum supported Ocaml version is >= 4.14 we shoud switch to
the standard library. *)
module Uchar_ = Uchar

module OCaml4_14 = struct
module Uchar = struct
type utf_decode = int

(* From Uchar.ml *)
let rep = 0xFFFD
let valid_bit = 27
let decode_bits = 24
let[@inline] utf_decode_is_valid d = d lsr valid_bit = 1
let[@inline] utf_decode_length d = (d lsr decode_bits) land 0b111
let[@inline] utf_decode_uchar d = Uchar.unsafe_of_int (d land 0xFFFFFF)
let[@inline] utf_decode n u = ((8 lor n) lsl decode_bits) lor Uchar.to_int u
let[@inline] utf_decode_invalid n = (n lsl decode_bits) lor rep

let utf_8_byte_length u =
match Uchar.to_int u with
| u when u < 0 -> assert false
| u when u <= 0x007F -> 1
| u when u <= 0x07FF -> 2
| u when u <= 0xFFFF -> 3
| u when u <= 0x10FFFF -> 4
| _ -> assert false

let utf_16_byte_length u =
match Uchar.to_int u with
| u when u < 0 -> assert false
| u when u <= 0xFFFF -> 2
| u when u <= 0x10FFFF -> 4
| _ -> assert false
end

module String = struct
let[@inline] not_in_x80_to_xBF b = b lsr 6 <> 0b10
let[@inline] not_in_xA0_to_xBF b = b lsr 5 <> 0b101
let[@inline] not_in_x80_to_x9F b = b lsr 5 <> 0b100
let[@inline] not_in_x90_to_xBF b = b < 0x90 || 0xBF < b
let[@inline] not_in_x80_to_x8F b = b lsr 4 <> 0x8
let[@inline] utf_8_uchar_2 b0 b1 = ((b0 land 0x1F) lsl 6) lor (b1 land 0x3F)

let[@inline] utf_8_uchar_3 b0 b1 b2 =
((b0 land 0x0F) lsl 12) lor ((b1 land 0x3F) lsl 6) lor (b2 land 0x3F)

let[@inline] utf_8_uchar_4 b0 b1 b2 b3 =
((b0 land 0x07) lsl 18)
lor ((b1 land 0x3F) lsl 12)
lor ((b2 land 0x3F) lsl 6)
lor (b3 land 0x3F)

let[@inline] dec_ret n u = Uchar.utf_decode n (Uchar_.unsafe_of_int u)
let dec_invalid = Uchar.utf_decode_invalid

let get_utf_8_uchar s i =
let b = Bytes.unsafe_of_string s in
let b0 = Bytes.get_uint8 b i in
(* raises if [i] is not a valid index. *)
let get = Bytes.get_uint8 in
let max = Bytes.length b - 1 in
match Char.unsafe_chr b0 with
(* See The Unicode Standard, Table 3.7 *)
| '\x00' .. '\x7F' -> dec_ret 1 b0
| '\xC2' .. '\xDF' ->
let i = i + 1 in
if i > max then dec_invalid 1
else
let b1 = get b i in
if not_in_x80_to_xBF b1 then dec_invalid 1
else dec_ret 2 (utf_8_uchar_2 b0 b1)
| '\xE0' ->
let i = i + 1 in
if i > max then dec_invalid 1
else
let b1 = get b i in
if not_in_xA0_to_xBF b1 then dec_invalid 1
else
let i = i + 1 in
if i > max then dec_invalid 2
else
let b2 = get b i in
if not_in_x80_to_xBF b2 then dec_invalid 2
else dec_ret 3 (utf_8_uchar_3 b0 b1 b2)
| '\xE1' .. '\xEC' | '\xEE' .. '\xEF' ->
let i = i + 1 in
if i > max then dec_invalid 1
else
let b1 = get b i in
if not_in_x80_to_xBF b1 then dec_invalid 1
else
let i = i + 1 in
if i > max then dec_invalid 2
else
let b2 = get b i in
if not_in_x80_to_xBF b2 then dec_invalid 2
else dec_ret 3 (utf_8_uchar_3 b0 b1 b2)
| '\xED' ->
let i = i + 1 in
if i > max then dec_invalid 1
else
let b1 = get b i in
if not_in_x80_to_x9F b1 then dec_invalid 1
else
let i = i + 1 in
if i > max then dec_invalid 2
else
let b2 = get b i in
if not_in_x80_to_xBF b2 then dec_invalid 2
else dec_ret 3 (utf_8_uchar_3 b0 b1 b2)
| '\xF0' ->
let i = i + 1 in
if i > max then dec_invalid 1
else
let b1 = get b i in
if not_in_x90_to_xBF b1 then dec_invalid 1
else
let i = i + 1 in
if i > max then dec_invalid 2
else
let b2 = get b i in
if not_in_x80_to_xBF b2 then dec_invalid 2
else
let i = i + 1 in
if i > max then dec_invalid 3
else
let b3 = get b i in
if not_in_x80_to_xBF b3 then dec_invalid 3
else dec_ret 4 (utf_8_uchar_4 b0 b1 b2 b3)
| '\xF1' .. '\xF3' ->
let i = i + 1 in
if i > max then dec_invalid 1
else
let b1 = get b i in
if not_in_x80_to_xBF b1 then dec_invalid 1
else
let i = i + 1 in
if i > max then dec_invalid 2
else
let b2 = get b i in
if not_in_x80_to_xBF b2 then dec_invalid 2
else
let i = i + 1 in
if i > max then dec_invalid 3
else
let b3 = get b i in
if not_in_x80_to_xBF b3 then dec_invalid 3
else dec_ret 4 (utf_8_uchar_4 b0 b1 b2 b3)
| '\xF4' ->
let i = i + 1 in
if i > max then dec_invalid 1
else
let b1 = get b i in
if not_in_x80_to_x8F b1 then dec_invalid 1
else
let i = i + 1 in
if i > max then dec_invalid 2
else
let b2 = get b i in
if not_in_x80_to_xBF b2 then dec_invalid 2
else
let i = i + 1 in
if i > max then dec_invalid 3
else
let b3 = get b i in
if not_in_x80_to_xBF b3 then dec_invalid 3
else dec_ret 4 (utf_8_uchar_4 b0 b1 b2 b3)
| _ -> dec_invalid 1
end
end
16 changes: 16 additions & 0 deletions lang/compat.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
module OCaml4_14 : sig
module Uchar : sig
type utf_decode

val utf_decode_is_valid : utf_decode -> bool
val utf_decode_uchar : utf_decode -> Uchar.t
val utf_decode_length : utf_decode -> int
val utf_decode : int -> Uchar.t -> int
val utf_8_byte_length : Uchar.t -> int
val utf_16_byte_length : Uchar.t -> int
end

module String : sig
val get_utf_8_uchar : string -> int -> Uchar.utf_decode
end
end
Loading

0 comments on commit d46cfd6

Please sign in to comment.