Fix unicode classification of non-spacing marks.

This commit adds the ~2000 non-spacing marks into the IdentPart category. This includes all the combining marks, and thus fixes coq#19512. This also means that characters in the range 1DC0-1DFF can no longer appear at the start of an identifier (which does not make sense anyway, as they are combining marks). This commit also fixes a few exceptions, which were actually no exception: - the dot is already in Symbol, - phonetic extensions are already in Letter.
silene · Oct 15, 2024 · b1ba7e7 · b1ba7e7
1 parent a79fb09
commit b1ba7e7
Showing 1 changed file with 2 additions and 11 deletions.
diff --git a/clib/unicode.ml b/clib/unicode.ml
@@ -98,25 +98,16 @@ let classify =
         Unicodetable.nd;           (* Number, decimal digits.           *)
         Unicodetable.nl;           (* Number, letter.                   *)
         Unicodetable.no;           (* Number, other.                    *)
+        Unicodetable.mn;           (* Non-spacing marks.                *)
       ];
 
-    (* Workaround. Some characters seems to be missing in
-       Camomile's category tables. We add them manually. *)
-    mk_lookup_table_from_unicode_tables_for Letter
-      [
-        [(0x01D00, 0x01D7F)];      (* Phonetic Extensions.              *)
-        [(0x01D80, 0x01DBF)];      (* Phonetic Extensions Suppl.        *)
-        [(0x01DC0, 0x01DFF)];      (* Combining Diacritical Marks Suppl.*)
-      ];
-
-    (* Exceptions (from a previous version of this function).           *)
+    (* Exceptions from Number, other.                                   *)
     mk_lookup_table_from_unicode_tables_for Symbol
       [
         [(0x000B2, 0x000B3)];      (* Superscript 2-3.                  *)
         single 0x000B9;            (* Superscript 1.                    *)
         single 0x02070;            (* Superscript 0.                    *)
         [(0x02074, 0x02079)];      (* Superscript 4-9.                  *)
-        single 0x0002E;            (* Dot.                              *)
       ];
     mk_lookup_table_from_unicode_tables_for Separator
       [