-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlink_parser.py
53 lines (43 loc) · 1.33 KB
/
link_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import re
link_tags = re.compile(r'\[\[.*?\]\]')
def extract_link_tags(text):
return link_tags.findall(text)
def non_empty_links(link):
return link != ''
def remove_enclosing_brackets(link):
return link[2:-2]
"""
Split the link by the separator and return the field in the
given index. Negative indices start from the end of the array.
"""
def get_field(link, seperator, index):
fields = link.split(seperator)
if (len(fields) == 1):
# Seperator not present
return False
if (index < 0):
return fields[len(fields) + index]
else:
return fields[index]
def extract_link(tag_content):
"""
Arrow links:
[[display text->link]] format
[[link<-display text]] format
Interpret the rightmost '->' and the leftmost '<-' as the divider.
"""
if get_field(tag_content, '->', -1):
return get_field(tag_content, '->', -1)
elif get_field(tag_content, '<-', 0):
return get_field(tag_content, '<-', 0)
elif get_field(tag_content, '|', -1):
return get_field(tag_content, '|', -1)
else:
return tag_content
def link_parser(text):
result = extract_link_tags(text)
result = list(map( remove_enclosing_brackets, result))
result = list(map( extract_link, result))
result = filter( non_empty_links, result)
result = set(result)
return result