Skip to content

Commit

Permalink
prepare multi-byte support
Browse files Browse the repository at this point in the history
and fix isalpha crashes on bad libc's. Fixes GH kokke#70.
e.g. UTF-8.
  • Loading branch information
Reinhard Urban authored and rurban committed Jun 11, 2022
1 parent 6c8467e commit 6b00d75
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 3 deletions.
7 changes: 4 additions & 3 deletions re.c
Original file line number Diff line number Diff line change
Expand Up @@ -296,15 +296,15 @@ void re_print(regex_t* pattern)
/* Private functions: */
static int matchdigit(char c)
{
return isdigit(c);
return isdigit((unsigned char)c);
}
static int matchalpha(char c)
{
return isalpha(c);
return isalpha((unsigned char)c);
}
static int matchwhitespace(char c)
{
return isspace(c);
return isspace((unsigned char)c);
}
static int matchalphanum(char c)
{
Expand Down Expand Up @@ -407,6 +407,7 @@ static int matchstar(regex_t p, regex_t* pattern, const char* text, int* matchle
{
int prelen = *matchlength;
const char* prepoint = text;
// TODO check if multibyte, and use mbtowc() then
while ((text[0] != '\0') && matchone(p, *text))
{
text++;
Expand Down
5 changes: 5 additions & 0 deletions tests/test1.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

#include <stdio.h>
#include <string.h>
//#include <locale.h>
#include "re.h"


Expand Down Expand Up @@ -90,6 +91,8 @@ char* test_vector[][4] =
{ NOK, "a\\", "a\\", (char*) 0 },
{ NOK, "\\", "\\", (char*) 0 },
{ OK, "\\\\", "\\", (char*) 1 },
// no multibyte support yet
//{ OK, "\\w+", "Çüéâ", (char*) 4 },
};


Expand All @@ -106,6 +109,8 @@ int main()
size_t nfailed = 0;
size_t i;

//setlocale(LC_CTYPE, "en_US.UTF-8");

for (i = 0; i < ntests; ++i)
{
pattern = test_vector[i][1];
Expand Down

0 comments on commit 6b00d75

Please sign in to comment.