-
Notifications
You must be signed in to change notification settings - Fork 0
/
module.jai
59 lines (52 loc) · 1.72 KB
/
module.jai
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
is_utf_cont :: inline (b: u8) -> bool {
return (b & 0xc0) == 0x80;
}
parse_unicode :: (str: string) -> result: u16, success: bool {
val, success, remainder := string_to_int(str, base = 16);
if !success || val > 0xFFFF || remainder.count return 0, false;
return xx val, true;
}
encode_utf8 :: (val: u16, result: *u8) -> len: u8 {
if val & 0xF800 {
result[0] = xx (0xE0 | ((val & 0xF000) >> 12));
result[1] = xx (0x80 | ((val & 0x0FC0) >> 6));
result[2] = xx (0x80 | (val & 0x003F));
return 3;
} else if val & 0x0F80 {
result[0] = xx (0xC0 | ((val & 0x0FC0) >> 6));
result[1] = xx (0x80 | (val & 0x003F));
return 2;
} else {
result[0] = xx (val & 0x7F);
return 1;
}
}
is_valid_utf8 :: (str: string) -> valid:bool {
for i: 0..str.count-1 {
cur := str[i];
if cur >= 0x80 {
// Must be between 0xc2 and 0xf4 inclusive to be valid
if (cur - 0xc2) > (0xf4 - 0xc2) return false;
if cur < 0xe0 { // 2-byte sequence
if i + 1 >= str.count || !is_utf_cont(str[i+1]) return false;
i += 1;
} else if cur < 0xf0 { // 3-byte sequence
if i + 2 >= str.count || !is_utf_cont(str[i+1]) || !is_utf_cont(str[i+2]) return false;
// Check for surrogate chars
if cur == 0xed && str[i+1] > 0x9f return false;
// ToDo: Check if total >= 0x800
// uc = ((uc & 0xf)<<12) | ((*str & 0x3f)<<6) | (str[1] & 0x3f);
i += 2;
} else { // 4-byte sequence
if i + 3 >= str.count || !is_utf_cont(str[i+1]) || !is_utf_cont(str[i+2]) || !is_utf_cont(str[i+3]) return false;
// Make sure its in valid range (0x10000 - 0x10ffff)
if cur == 0xf0 && str[i + 1] < 0x90 return false;
if cur == 0xf4 && str[i + 1] > 0x8f return false;
i += 3;
}
}
}
return true;
}
#scope_file
#import "Basic";