This repository has been archived by the owner on Feb 24, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
clean_stop_names.py
104 lines (97 loc) · 3.09 KB
/
clean_stop_names.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import csvio
import sys
import re
# This cleans up stop names and tries to regularize them as much as possible
RULES = (
# squeeze out any reference to space
# N.Canal -> N Canal
r'\.',' ',
# now normalize spaces
r'\s+',' ',
# remmove trailing parantheticals
r'\(\w+\)$','',
# trim again in case above rules introduced trailing and leading whitespace
r'\s+$','',
r'^\s+','',
)
SPELLING = (
# spelling errors
'Annunciation','Annunication',
'Behman','Behrman',
'Carondolet','Carondelet',
'Derbingy','Derbigny',
'Downmam','Downman',
'Dumanie','Dumaine',
'Elks Place','Elk Place',
'Esplanande','Esplanade',
'Esplande','Esplanade',
'Lonely Oaks','Lonely Oak',
'Majestic Oak','Majestic Oaks',
'Newcastle','New Castle',
'Opeloousas','Opelousas',
'Preiur','Prieur',
'Wiliams','Williams',
'First','1st',
'Loyola/Tulane','Tulane/Loyola',
"D' Hemecourt","D'Hemecourt",
"F C Williams","Fannie C Williams",
"Saint Paul's Church","Saint Paul's",
"Majestic Oakss","Majestic Oaks"
)
REMAP = (
"Napoleon at St Charles Ave","Napoleon at St Charles",
"Paris Ave at Aviator","Paris Ave at Aviators",
"Downman at Fillmore","Downman at Filmore",
"Monroe at Mark","Monroe at Marks",
"Woodlawn at Tall Timbers","Woodland at Tall Timbers",
# carrolton
"Canal and Carrollton","Canal and S Carrollton",
"Canal at Carrollton","Canal at S Carrollton",
"Carrollton at Canal","S Carrollton at Canal",
"Carrollton at S Claiborne","S Carrollton at S Claiborne",
"Carrollton at Willow","S Carrollton at Willow",
"Claiborne at Carrollton","S Claiborne at S Carrollton",
# claiborne
"Claiborne at Washington","S Claiborne at Washington",
"Esplanade at Claiborne","Esplanade at N Claiborne",
"M L King at Claiborne","M L King at S Claiborne",
"Napoleon at Claiborne","Napoleon at S Claiborne",
"Washington at Claiborne","Washington at S Claiborne",
# broad
"St Bernard at Broad","St Bernard at N Broad",
"Tulane at Broad","Tulane at S Broad",
"Washington at Broad","Washington at S Broad",
"Broad at Washington","S Broad at Washington"
# paris
"Paris at Burbank","Paris Ave at Burbank",
"Paris at Mirabeau","Paris Ave at Mirabeau",
"Mirabeau at Paris","Mirabeau at Paris Ave"
)
def normalize(name):
name = name.strip()
for i in xrange(0,len(RULES),2):
name = re.sub(RULES[i],RULES[i+1],name)
for i in xrange(0,len(SPELLING),2):
name = re.sub(r'\b'+re.escape(SPELLING[i])+r'\b',SPELLING[i+1],name)
for i in xrange(0,len(REMAP),2):
if name == REMAP[i]:
name = REMAP[i+1]
return name
def strip_stop_code_prefix(stop_code):
"""
Starting with 2014/06/01 release the stop codes are prefixed with a single
letter charachter (N) which can safely be removed
"""
if stop_code.startswith("N"):
return stop_code[1:]
else:
return stop_code
def process(stops):
csvio.transform(stops,'stop_name',normalize)
csvio.transform(stops,'stop_code',strip_stop_code_prefix)
return stops
if __name__ == '__main__':
datadir = sys.argv[1]
stops = csvio.read('%s/stops.txt' % datadir)
stops = process(stops)
csvio.write('%s/stops.txt' % datadir,stops)