Skip to content

Commit

Permalink
std: add ascii with C ASCII character classes
Browse files Browse the repository at this point in the history
Does NOT look at the locale the way the C functions do.

       int isalnum(int c);
       int isalpha(int c);
       int iscntrl(int c);
       int isdigit(int c);
       int isgraph(int c);
       int islower(int c);
       int isprint(int c);
       int ispunct(int c);
       int isspace(int c);
       int isupper(int c);
       int isxdigit(int c);

       int isascii(int c);
       int isblank(int c);

       int toupper(int c);
       int tolower(int c);

Tested to match glibc (when using C locale) with this program:

const c = @cImport({
    // See ziglang#515
    @cdefine("_NO_CRT_STDIO_INLINE", "1");
    @Cinclude("stdio.h");
    @Cinclude("string.h");
    @Cinclude("ctype.h");
});

const std = @import("std");
const ascii = std.ascii;
const abort = std.os.abort;

export fn main(argc: c_int, argv: **u8) c_int {
    var i: u8 = undefined;
    i = 0;
    while (true) {
        if (ascii.isAlNum(i) != (c.isalnum(i) > 0)) { abort(); }
        if (ascii.isAlpha(i) != (c.isalpha(i) > 0)) { abort(); }
        if (ascii.isCtrl(i) != (c.iscntrl(i) > 0)) { abort(); }
        if (ascii.isDigit(i) != (c.isdigit(i) > 0)) { abort(); }
        if (ascii.isGraph(i) != (c.isgraph(i) > 0)) { abort(); }
        if (ascii.isLower(i) != (c.islower(i) > 0)) { abort(); }
        if (ascii.isPrint(i) != (c.isprint(i) > 0)) { abort(); }
        if (ascii.isPunct(i) != (c.ispunct(i) > 0)) { abort(); }
        if (ascii.isSpace(i) != (c.isspace(i) > 0)) { abort(); }
        if (ascii.isUpper(i) != (c.isupper(i) > 0)) { abort(); }
        if (ascii.isXDigit(i) != (c.isxdigit(i) > 0)) { abort(); }
        if (i == 255) { break; }
        i += 1;
    }

    _ = c.printf(c"Success!\n");
    return 0;
}
  • Loading branch information
shawnl committed Mar 22, 2019
1 parent b5cc92f commit 08fa521
Show file tree
Hide file tree
Showing 3 changed files with 234 additions and 0 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -444,6 +444,7 @@ set(ZIG_CPP_SOURCES

set(ZIG_STD_FILES
"array_list.zig"
"ascii.zig"
"atomic.zig"
"atomic/int.zig"
"atomic/queue.zig"
Expand Down
232 changes: 232 additions & 0 deletions std/ascii.zig
Original file line number Diff line number Diff line change
@@ -0,0 +1,232 @@
// Does NOT look at the locale the way C89's toupper(3), isspace() et cetera does.
// I could have taken only a u7 to make this clear, but it would be slower
// It is my opinion that encodings other than UTF-8 should not be supported.
//
// (and 128 bytes is not much to pay).
// Also does not handle Unicode character classes.
//
// https://upload.wikimedia.org/wikipedia/commons/thumb/c/cf/USASCII_code_chart.png/1200px-USASCII_code_chart.png

const tIndex = enum(u3) {
Alpha,
Hex,
Space,
Digit,
Lower,
Upper,
// Ctrl, < 0x20 || == DEL
// Print, = Graph || == ' '. NOT '\t' et cetera
Punct,
Graph,
//ASCII, | ~0b01111111
//isBlank, == ' ' || == '\x09'
};

const combinedTable = init: {
comptime var table: [256]u8 = undefined;

const std = @import("std");
const mem = std.mem;

const alpha = []u1{
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
};
const lower = []u1{
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
};
const upper = []u1{
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
const digit = []u1{
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,

0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
const hex = []u1{
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,

0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
const space = []u1{
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
const punct = []u1{
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,

1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
};
const graph = []u1{
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
};

comptime var i = 0;
inline while (i < 128) : (i += 1) {
table[i] =
u8(alpha[i]) << @enumToInt(tIndex.Alpha) |
u8(hex[i]) << @enumToInt(tIndex.Hex) |
u8(space[i]) << @enumToInt(tIndex.Space) |
u8(digit[i]) << @enumToInt(tIndex.Digit) |
u8(lower[i]) << @enumToInt(tIndex.Lower) |
u8(upper[i]) << @enumToInt(tIndex.Upper) |
u8(punct[i]) << @enumToInt(tIndex.Punct) |
u8(graph[i]) << @enumToInt(tIndex.Graph);
}
mem.set(u8, table[128..256], 0);
break :init table;
};

fn inTable(c: u8, t: tIndex) bool {
return (combinedTable[c] & (u8(1) << @enumToInt(t))) != 0;
}

pub fn isAlNum(c: u8) bool {
return (combinedTable[c] & ((u8(1) << @enumToInt(tIndex.Alpha)) |
u8(1) << @enumToInt(tIndex.Digit))) != 0;
}

pub fn isAlpha(c: u8) bool {
return inTable(c, tIndex.Alpha);
}

pub fn isCtrl(c: u8) bool {
return c < 0x20 or c == 127; //DEL
}

pub fn isCntrl(c: u8) bool {
return isCtrl(c);
}

pub fn isDigit(c: u8) bool {
return inTable(c, tIndex.Digit);
}

pub fn isGraph(c: u8) bool {
return inTable(c, tIndex.Graph);
}

pub fn isLower(c: u8) bool {
return inTable(c, tIndex.Lower);
}

pub fn isPrint(c: u8) bool {
return inTable(c, tIndex.Graph) or c == ' ';
}

pub fn isPunct(c: u8) bool {
return inTable(c, tIndex.Punct);
}

pub fn isSpace(c: u8) bool {
return inTable(c, tIndex.Space);
}

pub fn isUpper(c: u8) bool {
return inTable(c, tIndex.Upper);
}

pub fn isXDigit(c: u8) bool {
return inTable(c, tIndex.Hex);
}

pub fn isASCII(c: u8) bool {
return c < 128;
}

pub fn isBlank(c: u8) bool {
return (c == ' ') or (c == '\x09');
}

pub fn toUpper(c: u8) u8 {
if (isLower(c)) {
return c - 0x20;
} else {
return c;
}
}

pub fn toLower(c: u8) u8 {
if (isUpper(c)) {
return c + 0x20;
} else {
return c;
}
}

test "ascii character classes" {
const std = @import("std");
const testing = std.testing;

testing.expect('C' == toUpper('c'));
testing.expect(':' == toUpper(':'));
testing.expect('\xab' == toUpper('\xab'));
testing.expect('c' == toLower('C'));
testing.expect(isAlpha('c'));
testing.expect(!isAlpha('5'));
testing.expect(isSpace(' '));
}
1 change: 1 addition & 0 deletions std/std.zig
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ pub const pdb = @import("pdb.zig");
pub const rand = @import("rand.zig");
pub const rb = @import("rb.zig");
pub const sort = @import("sort.zig");
pub const ascii = @import("ascii.zig");
pub const testing = @import("testing.zig");
pub const unicode = @import("unicode.zig");
pub const valgrind = @import("valgrind.zig");
Expand Down

0 comments on commit 08fa521

Please sign in to comment.