-
Notifications
You must be signed in to change notification settings - Fork 0
/
sched.py
executable file
·133 lines (117 loc) · 4.31 KB
/
sched.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#!/usr/bin/env python
# input: static_trip_id, arrived, departed, stop_id, ...
# static_trip_id deocded as
# A20111204SAT_021150_2..N08R is decoded as follows:
# A sub-Division identifier. A=IRT, B=BMT+IND
# 20111204 schedule effective date
# SAT service code. Typically it will be WKD-Weekday, SAT-Saturday or SUNSunday
# 021150 *origin time in 0.01s past midnightn
# 2.. *route id
# N *direction
# 08R path identifier
# The *'d fields define trip uniquely
# arrival and depart are in local time
import os, sys, sys, time, csv, datetime, re, pymongo
from math import *
tau = 900. # seconds
rates = {} # "stop_id route_id" -> (rate, t, trip_id)
done = {} # keys for which we are done
from pymongo import MongoClient
client = MongoClient('localhost',3333)
db = client.mta
sched = db.sched
# empty
sched.remove()
reader = csv.reader(sys.stdin)
# For loading the main sched collection
n_bulk = 1000
batch = []
n = 0
# (stop_id, route_id) => seq
seqs = {}
# We're counting on effective date being in decreasing order, arrival times in increasing order
for row in reader:
if len(row)<4:
continue
(trip_id, arrived, departed, stop_id,stop_sequence) = row[:5]
m = re.match('([AB])(\d{8})(\w{3})_(\d{6})_(\w+)\.*([NS]).*', trip_id)
if not m:
continue
(_,eff_date,service_code,origin_time,route_id,direction) = m.groups()
key = (stop_id, route_id,service_code)
if key in done:
continue
s_arrived = arrived
m = re.match('(\d\d):(\d\d):(\d\d)',arrived)
if not m:
continue
(hour,minute,second) = [int(x) for x in m.groups()]
arrived = hour*3600 + minute*60 + second
now = arrived
if key in rates:
(rate, t, prev_trip_id, prev_arrived) = rates[key]
if prev_arrived > now:
done[key] = True
continue
rate = rate*exp(-(arrived-prev_arrived)/tau) + 1.0
else:
rate = 1.0
prev_arrived = now
rates[key] = (rate,arrived, trip_id, arrived)
# Correct for discreteness
if rate>1.0:
rate = -1.0 / log(1.0 - 1.0/rate)
# Scale to hourly
rate = rate * 3600/tau
batch.append({'now' : now,
't_day' : now,
's_arrived' : s_arrived,
'service_code' : service_code,
'route_id' : route_id,
'eff_date' : eff_date,
'stop_id' : stop_id,
'since' : now-prev_arrived,
'rate' : rate,
'tau' : tau})
if len(batch)>n_bulk:
sched.insert(batch)
n = n + len(batch)
batch = []
print >> sys.stderr,"Inserted",n,"records"
combo = (stop_id, route_id, direction)
if not combo in seqs:
seqs[combo] = stop_sequence
if len(batch)>0:
sched.insert(batch)
pass
route2stop = {}
batch = []
for stop_id, route_id, direction in seqs.keys():
route2stop[(route_id,direction)] = []
for stop_id, route_id, direction in seqs.keys():
stop_sequence = seqs[(stop_id,route_id,direction)]
route2stop[(route_id,direction)].append((stop_id,stop_sequence))
for route_id, direction in route2stop.keys():
stops = route2stop[(route_id,direction)]
# stops.sort(cmp=lambda a,b: int(a[1])-int(b[1]))
stops.sort(key=lambda x: x[1])
batch.append({'route_id':route_id,
'direction':direction,
'stops':[x[0] for x in stops]})
db.routestops.remove()
db.routestops.insert(batch)
# sched.update({'now' : now,
# 'route_id' : route_id,
# 'stop_id' : stop_id,
# 'service_code' : service_code},
# {'now' : now,
# 's_arrived' : s_arrived,
# 'service_code' : service_code,
# 'route_id' : route_id,
# 'eff_date' : eff_date,
# 'stop_id' : stop_id,
# 'since' : now-prev_arrived,
# 'rate' : rate,
# 'tau' : tau},
# upsert=True)
# print "%d, %s, %s, %s, %s, %d, %f" % (now, s_arrived, service_code, route_id, stop_id, now-prev_arrived, rate)