Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speed up identifier detection #89

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions slimcc.h
Original file line number Diff line number Diff line change
Expand Up @@ -601,8 +601,11 @@ extern bool dont_reuse_stack;

int encode_utf8(char *buf, uint32_t c);
uint32_t decode_utf8(char **new_pos, char *p);
bool is_ident1(uint32_t c);
bool is_ident2(uint32_t c);
#define is_ident1(c) ((c < 0x80) ? is_ident1_ascii(c) : is_ident1_non_ascii(c))
#define is_ident1_ascii(c) ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c == '$')
#define is_ident2_ascii(c) (is_ident1_ascii(c) || (c >= '0' && c <= '9'))
bool is_ident1_non_ascii(uint32_t c);
bool is_ident2_non_ascii(uint32_t c);
int display_width(char *p, int len);

//
Expand Down
12 changes: 6 additions & 6 deletions tokenize.c
Original file line number Diff line number Diff line change
Expand Up @@ -129,15 +129,15 @@ static Token *new_token(TokenKind kind, char *start, char *end) {
static int read_ident(char *p) {
char *start = p;

for (bool is_first = true;; is_first = false) {
if (Isalnum(*p) || *p == '_' || *p == '$') {
for (;;) {
if (p == start ? is_ident1_ascii(*p) : is_ident2_ascii(*p)) {
p++;
continue;
}
if ((unsigned char)*p >= 128) {
if ((uint32_t)*p >= 128) {
char *pos;
uint32_t c = decode_utf8(&pos, p);
if (is_first ? is_ident1(c) : is_ident2(c)) {
if (p == start ? is_ident1_non_ascii(c) : is_ident2_non_ascii(c)) {
p = pos;
continue;
}
Expand Down Expand Up @@ -424,13 +424,13 @@ static Token *new_pp_number(char *start, char *p) {
p += 2;
continue;
}
if (Isalnum(*p) || *p == '_' || *p == '$') {
if (is_ident2_ascii(*p)) {
p++;
continue;
}
if ((unsigned char)*p >= 128) {
char *pos;
if (is_ident2(decode_utf8(&pos, p))) {
if (is_ident2_non_ascii(decode_utf8(&pos, p))) {
p = pos;
continue;
}
Expand Down
9 changes: 4 additions & 5 deletions unicode.c
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,8 @@ static bool in_range(uint32_t c, UTF32Range *range, int len) {
// the first character of an identifier.
//
// Non-ASCII characters correspond to XID_Start set of Unicode 15.1.
bool is_ident1(uint32_t c) {
bool is_ident1_non_ascii(uint32_t c) {
static UTF32Range range[] = {
{'$', '$'}, {'A', 'Z'}, {'_', '_'}, {'a', 'z'},
{0x00AA, 0x00AA}, {0x00B5, 0x00B5}, {0x00BA, 0x00BA}, {0x00C0, 0x00D6},
{0x00D8, 0x00F6}, {0x00F8, 0x02C1}, {0x02C6, 0x02D1}, {0x02E0, 0x02E4},
{0x02EC, 0x02EC}, {0x02EE, 0x02EE}, {0x0370, 0x0374}, {0x0376, 0x0377},
Expand Down Expand Up @@ -269,9 +268,9 @@ bool is_ident1(uint32_t c) {
// character of an identifier.
//
// Non-ASCII characters correspond to XID_Continue set of Unicode 15.1.
bool is_ident2(uint32_t c) {
bool is_ident2_non_ascii(uint32_t c) {
static UTF32Range range[] = {
{'0', '9'}, {0x00B7, 0x00B7}, {0x0300, 0x036F}, {0x0387, 0x0387},
{0x00B7, 0x00B7}, {0x0300, 0x036F}, {0x0387, 0x0387},
{0x0483, 0x0487}, {0x0591, 0x05BD}, {0x05BF, 0x05BF}, {0x05C1, 0x05C2},
{0x05C4, 0x05C5}, {0x05C7, 0x05C7}, {0x0610, 0x061A}, {0x064B, 0x0669},
{0x0670, 0x0670}, {0x06D6, 0x06DC}, {0x06DF, 0x06E4}, {0x06E7, 0x06E8},
Expand Down Expand Up @@ -368,7 +367,7 @@ bool is_ident2(uint32_t c) {
{0xE0100, 0xE01EF}
};

return is_ident1(c) || in_range(c, range, sizeof(range) / sizeof(UTF32Range));
return is_ident1(c) || in_range(c, range, sizeof(range) / sizeof(UTF32Range));
}

// Returns the number of columns needed to display a given
Expand Down