Skip to content

Commit

Permalink
Update utf8proc to v2.6.0
Browse files Browse the repository at this point in the history
  • Loading branch information
koutcher committed Dec 13, 2020
1 parent 29b04e9 commit 9e67e01
Show file tree
Hide file tree
Showing 3 changed files with 7,423 additions and 5,933 deletions.
21 changes: 18 additions & 3 deletions compat/utf8proc.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
* Unicode data files.
*
* The original data files are available at
* http://www.unicode.org/Public/UNIDATA/
* https://www.unicode.org/Public/UNIDATA/
*
* Please notice the copyright statement in the file "utf8proc_data.c".
*/
Expand Down Expand Up @@ -290,8 +290,11 @@ static utf8proc_bool grapheme_break_simple(int lbc, int tbc) {

static utf8proc_bool grapheme_break_extended(int lbc, int tbc, utf8proc_int32_t *state)
{
int lbc_override = ((state && *state != UTF8PROC_BOUNDCLASS_START)
? *state : lbc);
int lbc_override;
if (*state == UTF8PROC_BOUNDCLASS_START)
*state = lbc_override = lbc;
else
lbc_override = *state;
utf8proc_bool break_permitted = grapheme_break_simple(lbc_override, tbc);
if (state) {
// Special support for GB 12/13 made possible by GB999. After two RI
Expand Down Expand Up @@ -384,6 +387,18 @@ UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_totitle(utf8proc_int32_t c)
return cu != UINT16_MAX ? seqindex_decode_index(cu) : c;
}

UTF8PROC_DLLEXPORT int utf8proc_islower(utf8proc_int32_t c)
{
const utf8proc_property_t *p = utf8proc_get_property(c);
return p->lowercase_seqindex != p->uppercase_seqindex && p->lowercase_seqindex == UINT16_MAX;
}

UTF8PROC_DLLEXPORT int utf8proc_isupper(utf8proc_int32_t c)
{
const utf8proc_property_t *p = utf8proc_get_property(c);
return p->lowercase_seqindex != p->uppercase_seqindex && p->uppercase_seqindex == UINT16_MAX && p->category != UTF8PROC_CATEGORY_LT;
}

/* return a character width analogous to wcwidth (except portable and
hopefully less buggy than most system wcwidth functions). */
UTF8PROC_DLLEXPORT int utf8proc_charwidth(utf8proc_int32_t c) {
Expand Down
16 changes: 14 additions & 2 deletions compat/utf8proc.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@
/** The MAJOR version number (increased when backwards API compatibility is broken). */
#define UTF8PROC_VERSION_MAJOR 2
/** The MINOR version number (increased when new functionality is added in a backwards-compatible manner). */
#define UTF8PROC_VERSION_MINOR 5
#define UTF8PROC_VERSION_MINOR 6
/** The PATCH version (increased for fixes that do not change the API). */
#define UTF8PROC_VERSION_PATCH 0
/** @} */
Expand Down Expand Up @@ -502,7 +502,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(
* string and orders the decomposed sequences correctly.
*
* If the @ref UTF8PROC_NULLTERM flag in `options` is set, processing
* will be stopped, when a NULL byte is encounted, otherwise `strlen`
* will be stopped, when a NULL byte is encountered, otherwise `strlen`
* bytes are processed. The result (in the form of 32-bit unicode
* codepoints) is written into the buffer being pointed to by
* `buffer` (which must contain at least `bufsize` entries). In case of
Expand Down Expand Up @@ -635,6 +635,18 @@ UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_toupper(utf8proc_int32_t c);
*/
UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_totitle(utf8proc_int32_t c);

/**
* Given a codepoint `c`, return `1` if the codepoint corresponds to a lower-case character
* and `0` otherwise.
*/
UTF8PROC_DLLEXPORT int utf8proc_islower(utf8proc_int32_t c);

/**
* Given a codepoint `c`, return `1` if the codepoint corresponds to an upper-case character
* and `0` otherwise.
*/
UTF8PROC_DLLEXPORT int utf8proc_isupper(utf8proc_int32_t c);

/**
* Given a codepoint, return a character width analogous to `wcwidth(codepoint)`,
* except that a width of 0 is returned for non-printable codepoints
Expand Down
Loading

0 comments on commit 9e67e01

Please sign in to comment.