forked from kylewm/brevity
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfetch_list.py
46 lines (33 loc) · 958 Bytes
/
fetch_list.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import pprint
import requests
import json
r = requests.get('http://data.iana.org/TLD/tlds-alpha-by-domain.txt')
lines = r.text.splitlines()
tlds = ["'" + line.lower() + "'" for line in lines if not line.startswith('#')]
print('[' + ', '.join(tlds) + ']')
# make a prefix tree
tree = {}
for tld in tlds:
branch = tree
for letter in tld:
branch = branch.setdefault(letter, {})
branch['$'] = {}
#with(open('tree.json', 'w')) as f:
# json.dump(tree, f, indent=True)
# build a regex
def build_regex(branch):
choices = []
suffix = ''
if '$' in branch:
suffix = '?'
for letter in sorted(branch):
if letter != '$':
choice = letter + build_regex(branch[letter])
choices.append(choice)
if not choices:
return ''
if len(choices) == 1 and not suffix:
return choices[0]
return '(?:' + '|'.join(choices) + ')' + suffix
re = build_regex(tree)
print(re)