From d0f7dd7c842c3b21df37222555984ca44f21b15f Mon Sep 17 00:00:00 2001 From: bfredl Date: Mon, 16 Sep 2024 19:28:37 +0200 Subject: [PATCH] refactor(multibyte): neo-casefolding without allocation fixes #30400 --- src/nvim/mbyte.c | 17 +++-------------- test/unit/mbyte_spec.lua | 6 ++++++ 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c index 05f81c48a9d512..01e720283e4cbf 100644 --- a/src/nvim/mbyte.c +++ b/src/nvim/mbyte.c @@ -1379,22 +1379,11 @@ int utf_fold(int a) return a; } - utf8proc_uint8_t input_str[16] = { 0 }; - if (utf8proc_encode_char(a, input_str) <= 0) { - return a; - } - - utf8proc_uint8_t *fold_str_utf; - if (utf8proc_map((utf8proc_uint8_t *)input_str, 0, &fold_str_utf, - UTF8PROC_NULLTERM | UTF8PROC_CASEFOLD) < 0) { - return a; - } - - int fold_codepoint_utf = utf_ptr2char((char *)fold_str_utf); + utf8proc_int32_t result[1]; - xfree(fold_str_utf); + utf8proc_ssize_t res = utf8proc_decompose_char(a, result, 1, UTF8PROC_CASEFOLD, NULL); - return fold_codepoint_utf; + return (res == 1) ? result[0] : a; } // Vim's own character class functions. These exist because many library diff --git a/test/unit/mbyte_spec.lua b/test/unit/mbyte_spec.lua index e0c0244989e9fd..0a322ce651af7e 100644 --- a/test/unit/mbyte_spec.lua +++ b/test/unit/mbyte_spec.lua @@ -351,6 +351,12 @@ describe('mbyte', function() describe('utf_fold', function() itp('does not crash with surrogates #30527', function() eq(0xDDFB, lib.utf_fold(0xDDFB)) + eq(0xd800, lib.utf_fold(0xd800)) -- high surrogate, invalid as a character + end) + + itp("doesn't crash on invalid codepoints", function() + eq(9000000, lib.utf_fold(9000000)) + eq(0, lib.utf_fold(0)) end) end) end)