-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathparser.py
executable file
·88 lines (66 loc) · 2.31 KB
/
parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/usr/bin/env python3
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import re
import sys
import os.path
import requests
GET_NAME_CHECKED_TEMPLATE = \
'''
pub fn get_name_checked(c: u32) -> Option<&'static str> {{
Some(match UNICODE.get(&c) {{
Some(s) => s,
None => match c {{
{range_matches}
_ => return None,
}},
}})
}}
'''
RANGE_MATCH_TEMPLATE = ' 0x{first:X}..=0x{last:X} => "{name}",\n'
def get_data(line):
data = line.split(';')
num = int(data[0], 16)
name = data[1]
if name == '<control>' and data[10] != '':
name = data[10]
return (num, name)
def main(filename, dstfilename):
char_ranges_firsts = {}
char_ranges_lasts = {}
with open(filename) as fin, open(dstfilename, 'w') as fout:
for line in fin:
num, name = get_data(line.strip())
fout.write(f'{num:X};{name}\n')
if match := re.search(r'<([\w\s]+), (First|Last)>', name):
range_name = match.group(1)
if match.group(2) == 'First':
char_ranges_firsts[range_name] = num
else:
char_ranges_lasts[range_name] = num
range_matches = ''
for range_name in char_ranges_firsts:
first = char_ranges_firsts[range_name]
last = char_ranges_lasts[range_name]
range_matches += RANGE_MATCH_TEMPLATE.format(first=first, last=last, name=range_name)
print(GET_NAME_CHECKED_TEMPLATE.format(range_matches=range_matches))
if __name__ == '__main__':
latest = "https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt"
src = 'UnicodeData.txt'
dst = 'UnicodeDataFixed.txt'
if len(sys.argv) == 1:
if not os.path.exists(src):
print(f'Downloading {src} from {latest}')
r = requests.get(latest)
with open(src, 'w+') as src_f:
src_f.write(r.text)
elif len(sys.argv) == 2:
src = sys.argv[1]
elif len(sys.argv) == 3:
src = sys.argv[1]
dst = sys.argv[2]
else:
print(f'Heck, use it like this.\n {sys.argv[0]} sourcefile.txt destfile.txt');
exit(1)
main(src, dst)