From b41af4e1928fce3cf6e67cad17dfa9e6b45788ae Mon Sep 17 00:00:00 2001 From: nick black Date: Wed, 5 Jun 2024 09:47:59 -0400 Subject: [PATCH] use encoding_us_utf8() for ncdirect --- src/lib/direct.c | 3 ++- src/lib/internal.h | 27 +++++++++++++++++++++++++++ src/lib/metric.c | 25 +++++++++++++------------ src/lib/notcurses.c | 27 +++------------------------ 4 files changed, 45 insertions(+), 37 deletions(-) diff --git a/src/lib/direct.c b/src/lib/direct.c index c77fd0d44f..1667acd245 100644 --- a/src/lib/direct.c +++ b/src/lib/direct.c @@ -893,8 +893,9 @@ ncdirect* ncdirect_core_init(const char* termtype, FILE* outfp, uint64_t flags){ } const char* encoding = nl_langinfo(CODESET); bool utf8 = false; - if(encoding && strcmp(encoding, "UTF-8") == 0){ + if(encoding && encoding_is_utf8(encoding)){ utf8 = true; + ncmetric_use_utf8(); } if(setup_signals(ret, (flags & NCDIRECT_OPTION_NO_QUIT_SIGHANDLERS), true, ncdirect_stop_minimal)){ diff --git a/src/lib/internal.h b/src/lib/internal.h index 168d3f1a9a..49a08bbbaf 100644 --- a/src/lib/internal.h +++ b/src/lib/internal.h @@ -1889,6 +1889,33 @@ int putenv_term(const char* termname) __attribute__ ((nonnull (1))); int set_loglevel_from_env(ncloglevel_e* loglevel) __attribute__ ((nonnull (1))); +// glibc's _nl_normalize_charset() converts to lowercase, removing everything +// but alnums. furthermore, "cs" is a valid prefix meaning "character set". +static inline bool +encoding_is_utf8(const char *enc){ + if(tolower(enc[0]) == 'c' && tolower(enc[1]) == 's'){ // strncasecmp() isn't ansi/iso + enc += 2; // skip initial "cs" if present. + } + const char utfstr[] = "utf8"; + const char* match = utfstr; + while(*enc){ + if(isalnum(*enc)){ // we only care about alnums + if(tolower(*enc) != tolower(*match)){ + return false; + } + ++match; + } + ++enc; + } + if(*match){ + return false; + } + return true; +} + +// tell ncmetric that utf8 is available. should be per-context, but isn't. +void ncmetric_use_utf8(void); + #undef API #undef ALLOC diff --git a/src/lib/metric.c b/src/lib/metric.c index ab78ed7511..26bb9d6f9b 100644 --- a/src/lib/metric.c +++ b/src/lib/metric.c @@ -8,26 +8,27 @@ static const wchar_t UTF8_SUBPREFIX[] = L"mµnpfazy"; // 10^24-1 static const wchar_t ASCII_SUBPREFIX[] = L"munpfazy"; // 10^24-1 + +// we want to use UTF8_SUBPREFIX if we have utf8 available to us. we could +// pull this out of const struct notcurses*, except these ncnmetric() doesn't +// take one, and we don't want to break the API. instead, we call this from +// notcurses_init() when we create a utf8 context. a gross hack =\. +static pthread_once_t utf8_verdict = PTHREAD_ONCE_INIT; static const wchar_t* SUBPREFIXES = ASCII_SUBPREFIX; -static pthread_once_t utf8_detector = PTHREAD_ONCE_INIT; -// sure hope we've called setlocale() by the time we hit this! static void -detect_utf8(void){ - const char* encoding = nl_langinfo(CODESET); - if(encoding){ - if(strcmp(encoding, "UTF-8") == 0){ - SUBPREFIXES = UTF8_SUBPREFIX; - } - } +ncmetric_use_utf8_internal(void){ + SUBPREFIXES = UTF8_SUBPREFIX; +} + +void ncmetric_use_utf8(void){ + pthread_once(&utf8_verdict, ncmetric_use_utf8_internal); } const char* ncnmetric(uintmax_t val, size_t s, uintmax_t decimal, char* buf, int omitdec, uintmax_t mult, int uprefix){ - // FIXME this is global to the process...ick :/ - fesetround(FE_TONEAREST); - pthread_once(&utf8_detector, detect_utf8); + fesetround(FE_TONEAREST); // FIXME global to the process...ick :/ // these two must have the same number of elements const wchar_t* subprefixes = SUBPREFIXES; const wchar_t prefixes[] = L"KMGTPEZY"; // 10^21-1 encompasses 2^64-1 diff --git a/src/lib/notcurses.c b/src/lib/notcurses.c index c5b96c7cbd..95ad3a6938 100644 --- a/src/lib/notcurses.c +++ b/src/lib/notcurses.c @@ -1095,30 +1095,6 @@ int ncplane_destroy_family(ncplane *ncp){ return ret; } -// glibc's _nl_normalize_charset() converts to lowercase, removing everything -// but alnums. furthermore, "cs" is a valid prefix meaning "character set". -static bool -encoding_is_utf8(const char *enc){ - if(tolower(enc[0]) == 'c' && tolower(enc[1]) == 's'){ // strncasecmp() isn't ansi/iso - enc += 2; // skip initial "cs" if present. - } - const char utfstr[] = "utf8"; - const char* match = utfstr; - while(*enc){ - if(isalnum(*enc)){ // we only care about alnums - if(tolower(*enc) != tolower(*match)){ - return false; - } - ++match; - } - ++enc; - } - if(*match){ - return false; - } - return true; -} - // it's critical that we're using UTF-8 encoding if at all possible. since the // client might not have called setlocale(2) (if they weren't reading the // directions...), go ahead and try calling setlocale(LC_ALL, "") and then @@ -1271,6 +1247,9 @@ notcurses_early_init(const struct notcurses_options* opts, FILE* fp, unsigned* u free(ret); return NULL; } + if(utf8){ + ncmetric_use_utf8(); + } return ret; }