-
Notifications
You must be signed in to change notification settings - Fork 1
/
publicsuffix.py
106 lines (76 loc) · 2.52 KB
/
publicsuffix.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
"""Public Suffix List module for Python.
"""
import codecs
import os.path
class PublicSuffixList(object):
def __init__(self, input_file=None):
"""Reads and parses public suffix list.
input_file is a file object or another iterable that returns
lines of a public suffix list file. If input_file is None, an
UTF-8 encoded file named "publicsuffix.txt" in the same
directory as this Python module is used.
The file format is described at http://publicsuffix.org/list/
"""
if input_file is None:
input_path = os.path.join(os.path.dirname(__file__), 'publicsuffix.txt')
input_file = codecs.open(input_path, "r", "utf8")
root = self._build_structure(input_file)
self.root = self._simplify(root)
def _find_node(self, parent, parts):
if not parts:
return parent
if len(parent) == 1:
parent.append({})
assert len(parent) == 2
negate, children = parent
child = parts.pop()
child_node = children.get(child, None)
if not child_node:
children[child] = child_node = [0]
return self._find_node(child_node, parts)
def _add_rule(self, root, rule):
if rule.startswith('!'):
negate = 1
rule = rule[1:]
else:
negate = 0
parts = rule.split('.')
self._find_node(root, parts)[0] = negate
def _simplify(self, node):
if len(node) == 1:
return node[0]
return (node[0], dict((k, self._simplify(v)) for (k, v) in node[1].items()))
def _build_structure(self, fp):
root = [0]
for line in fp:
line = line.strip()
if line.startswith('//') or not line:
continue
self._add_rule(root, line.split()[0].lstrip('.'))
return root
def _lookup_node(self, matches, depth, parent, parts):
if parent in (0, 1):
negate = parent
children = None
else:
negate, children = parent
matches[-depth] = negate
if depth < len(parts) and children:
for name in ('*', parts[-depth]):
child = children.get(name, None)
if child is not None:
self._lookup_node(matches, depth+1, child, parts)
def get_public_suffix(self, domain):
"""get_public_suffix("www.example.com") -> "example.com"
Calling this function with a DNS name will return the
public suffix for that name.
Note that for internationalized domains the list at
http://publicsuffix.org uses decoded names, so it is
up to the caller to decode any Punycode-encoded names.
"""
parts = domain.lower().strip('.').split('.')
hits = [None] * len(parts)
self._lookup_node(hits, 1, self.root, parts)
for i, what in enumerate(hits):
if what is not None and what == 0:
return '.'.join(parts[i:])