Overhaul unicode parsing

It now instead iterates the actual unicode code points. This is better than what it was previously doing but it is still not entirely correct w.r.t to unicode sequences. This handling of unicode code points does however make it slightly easier to handle UTF-16 if needed in the future. This also adds some long needed tests for buffer methods.
datagubbe · Aug 29, 2024 · c7cb5ee · c7cb5ee
1 parent 991283f
commit c7cb5ee
Show file tree

Hide file tree

Showing 18 changed files with 779 additions and 662 deletions.
diff --git a/dged.nix b/dged.nix
@@ -10,6 +10,8 @@
 , valgrind
 , linkFarm
 , fetchFromGitHub
+, glibcLocalesUtf8
+, strace
 }:
 stdenv.mkDerivation {
   name = "dged";
@@ -32,6 +34,10 @@ stdenv.mkDerivation {
     bmake docs
   '';
 
+  # needed for tests to work in sandboxed builds
+  LANG = "en_US.UTF-8";
+  LOCALE_ARCHIVE = "${glibcLocalesUtf8}/lib/locale/locale-archive";
+
   TREESITTER_GRAMMARS = with tree-sitter-grammars;
     linkFarm "tree-sitter-grammars" rec {
       "bash" = tree-sitter-bash;