Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Radius by frequency #61

Merged
merged 4 commits into from
Feb 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion aggregated_stops.qml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
<Option name="properties" type="Map">
<Option name="size" type="Map">
<Option name="active" type="bool" value="true"/>
<Option name="expression" type="QString" value="1.2 * &quot;count&quot; ^ 1.2"/>
<Option name="expression" type="QString" value="1 + 0.3 * &quot;count&quot; ^ 0.5"/>
<Option name="type" type="int" value="3"/>
</Option>
</Option>
Expand Down
23 changes: 11 additions & 12 deletions gtfs_go_dialog.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,14 +233,8 @@ def execution(self):
"aggregated_csv": "",
}

gtfs_parser = GTFSParser(
extracted_dir,
as_frequency=self.ui.aggregateCheckbox.isChecked(),
as_unify_stops=self.ui.unifyCheckBox.isChecked(),
delimiter=self.get_delimiter(),
)

if self.ui.simpleCheckbox.isChecked():
gtfs_parser = GTFSParser(extracted_dir)
routes_geojson = {
"type": "FeatureCollection",
"features": gtfs_parser.read_routes(
Expand Down Expand Up @@ -271,13 +265,18 @@ def execution(self):
json.dump(stops_geojson, f, ensure_ascii=False)

if self.ui.aggregateCheckbox.isChecked():
gtfs_parser = GTFSParser(
extracted_dir,
as_frequency=self.ui.aggregateCheckbox.isChecked(),
as_unify_stops=self.ui.unifyCheckBox.isChecked(),
delimiter=self.get_delimiter(),
yyyymmdd=self.get_yyyymmdd(),
begin_time=self.get_time_filter(self.ui.beginTimeLineEdit),
end_time=self.get_time_filter(self.ui.endTimeLineEdit),
)
aggregated_routes_geojson = {
"type": "FeatureCollection",
"features": gtfs_parser.read_route_frequency(
yyyymmdd=self.get_yyyymmdd(),
begin_time=self.get_time_filter(self.ui.beginTimeLineEdit),
end_time=self.get_time_filter(self.ui.endTimeLineEdit),
),
"features": gtfs_parser.read_route_frequency(),
}
aggregated_stops_geojson = {
"type": "FeatureCollection",
Expand Down
70 changes: 46 additions & 24 deletions gtfs_parser/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ def __init__(
as_unify_stops=False,
delimiter="",
max_distance_degree=0.01,
yyyymmdd="",
begin_time="",
end_time="",
):

txts = glob.glob(os.path.join(src_dir, "**", "*.txt"), recursive=True)
Expand All @@ -35,7 +38,12 @@ def __init__(
self.similar_stops_df = None
if as_frequency:
self.__aggregate_similar_stops(
delimiter, max_distance_degree, as_unify_stops
delimiter,
max_distance_degree,
as_unify_stops,
yyyymmdd=yyyymmdd,
begin_time=begin_time,
end_time=end_time,
)

@staticmethod
Expand Down Expand Up @@ -254,8 +262,41 @@ def __get_shapes_coordinates(self):
return shapes_df.groupby("shape_id")["pt"].apply(tuple)

def __aggregate_similar_stops(
self, delimiter: str, max_distance_degree: float, as_unify_stops: bool
self,
delimiter: str,
max_distance_degree: float,
as_unify_stops: bool,
yyyymmdd="",
begin_time="",
end_time="",
):
# filter stop_times by whether serviced or not
if yyyymmdd:
trips_filtered_by_day = self.__get_trips_on_a_date(yyyymmdd)
self.dataframes["stop_times"] = pd.merge(
self.dataframes["stop_times"],
trips_filtered_by_day,
on="trip_id",
how="left",
)
self.dataframes["stop_times"] = self.dataframes["stop_times"][
self.dataframes["stop_times"]["service_flag"] == 1
]

# time filter
if begin_time and end_time:
# departure_time is nullable and expressed in "hh:mm:ss" or "h:mm:ss" format.
# Hour can be mor than 24.
# Therefore, drop null records and convert times to integers.
int_dep_times = (
self.dataframes["stop_times"]
.departure_time.str.replace(":", "")
.astype(int)
)
self.dataframes["stop_times"] = self.dataframes["stop_times"][
self.dataframes["stop_times"].departure_time != ""
][(int_dep_times >= int(begin_time)) & (int_dep_times < int(end_time))]

if as_unify_stops:
parent_ids = self.dataframes["stops"]["parent_station"].unique()
self.dataframes["stops"]["is_parent"] = self.dataframes["stops"][
Expand Down Expand Up @@ -287,7 +328,8 @@ def __aggregate_similar_stops(
].map(lambda val: val if type(val) == str else val.stop_name)

position_count = (
self.dataframes["stops"]
self.dataframes["stop_times"]
.merge(self.dataframes["stops"], on="stop_id", how="left")
.groupby("position_id")
.size()
.to_frame()
Expand Down Expand Up @@ -468,7 +510,7 @@ def read_interpolated_stops(self):
for stop in stop_dicts
]

def read_route_frequency(self, yyyymmdd="", begin_time="", end_time=""):
def read_route_frequency(self):
"""
By grouped stops, aggregate route frequency.
Filtering trips by a date, you can aggregate frequency only route serviced on the date.
Expand All @@ -489,14 +531,6 @@ def read_route_frequency(self, yyyymmdd="", begin_time="", end_time=""):
.copy()
)

# filter stop_times by whether serviced or not
if yyyymmdd:
trips_filtered_by_day = self.__get_trips_on_a_date(yyyymmdd)
stop_times_df = pd.merge(
stop_times_df, trips_filtered_by_day, on="trip_id", how="left"
)
stop_times_df = stop_times_df[stop_times_df["service_flag"] == 1]

# join agency info)
stop_times_df = pd.merge(
stop_times_df,
Expand Down Expand Up @@ -549,18 +583,6 @@ def read_route_frequency(self, yyyymmdd="", begin_time="", end_time=""):
index=stop_times_df.query("prev_trip_id != next_trip_id").index
)

# time filter
if begin_time and end_time:
# departure_time is nullable and expressed in "hh:mm:ss" or "h:mm:ss" format.
# Hour can be mor than 24.
# Therefore, drop null records and convert times to integers.
int_dep_times = stop_times_df.departure_time.str.replace(":", "").astype(
int
)
stop_times_df = stop_times_df[stop_times_df.departure_time != ""][
(int_dep_times >= int(begin_time)) & (int_dep_times < int(end_time))
]

# define path_id by prev-stops-centroid and next-stops-centroid
stop_times_df["path_id"] = (
stop_times_df["prev_stop_id"]
Expand Down
72 changes: 30 additions & 42 deletions gtfs_parser/tests/test_gtfs_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,62 +4,66 @@

from gtfs_parser.__main__ import GTFSParser # nopep8

FIXTURE_DIR = os.path.join(os.path.dirname(
__file__), 'fixture')
FIXTURE_DIR = os.path.join(os.path.dirname(__file__), "fixture")


class TestGtfsParser(unittest.TestCase):
gtfs_parser = GTFSParser(FIXTURE_DIR)
gtfs_parser_frequency = GTFSParser(FIXTURE_DIR, as_frequency=True)
gtfs_parser_frequency_unify = GTFSParser(FIXTURE_DIR,
as_frequency=True,
as_unify_stops=True)
gtfs_parser_frequency_unify = GTFSParser(
FIXTURE_DIR, as_frequency=True, as_unify_stops=True
)

def test_init(self):
# 13 txt files are in ./fixture
self.assertEqual(
13, len(glob.glob(os.path.join(FIXTURE_DIR, '*.txt'))))
self.assertEqual(13, len(glob.glob(os.path.join(FIXTURE_DIR, "*.txt"))))
# read tables in constants.py
self.assertEqual(12, len(self.gtfs_parser.dataframes.keys()))

# as_frequency: some columns regarding frequency aggregation
self.assertFalse(
"similar_stop_id" in self.gtfs_parser.dataframes["stops"].columns)
"similar_stop_id" in self.gtfs_parser.dataframes["stops"].columns
)
self.assertTrue(
"similar_stop_id" in self.gtfs_parser_frequency.dataframes["stops"].columns)
"similar_stop_id" in self.gtfs_parser_frequency.dataframes["stops"].columns
)
self.assertTrue(
"similar_stop_id" in self.gtfs_parser_frequency_unify.dataframes["stops"].columns)
"similar_stop_id"
in self.gtfs_parser_frequency_unify.dataframes["stops"].columns
)

# as_unify: some columns regarding stop-grouping added
self.assertFalse("position_id" in self.gtfs_parser.dataframes["stops"].columns)
self.assertFalse(
"position_id" in self.gtfs_parser.dataframes["stops"].columns)
self.assertFalse(
"position_id" in self.gtfs_parser_frequency.dataframes["stops"].columns)
"position_id" in self.gtfs_parser_frequency.dataframes["stops"].columns
)
self.assertTrue(
"position_id" in self.gtfs_parser_frequency_unify.dataframes["stops"].columns)
"position_id"
in self.gtfs_parser_frequency_unify.dataframes["stops"].columns
)

def test_read_stops(self):
# list of geojson-feature
self.assertEqual(899, len(self.gtfs_parser.read_stops()))
# num of stops is not changed by aggregation
self.assertEqual(899, len(self.gtfs_parser_frequency.read_stops()))
# num of stops is not changed by as_unify_stops
self.assertEqual(
899, len(self.gtfs_parser_frequency_unify.read_stops()))
self.assertEqual(899, len(self.gtfs_parser_frequency_unify.read_stops()))

# remove no-route stops
self.assertEqual(
896, len(self.gtfs_parser.read_stops(ignore_no_route=True)))
self.assertEqual(896, len(self.gtfs_parser.read_stops(ignore_no_route=True)))

def test_read_routes(self):
# num of features in routes.geojson depends on not shapes.txt but routes.txt
self.assertEqual(32, len(self.gtfs_parser.read_routes()))
self.assertEqual(32, len(self.gtfs_parser.read_routes(no_shapes=True)))
# as_frequency and as_unify make no effect to read_routes()
self.assertEqual(
32, len(self.gtfs_parser_frequency.read_routes(no_shapes=True)))
32, len(self.gtfs_parser_frequency.read_routes(no_shapes=True))
)
self.assertEqual(
32, len(self.gtfs_parser_frequency_unify.read_routes(no_shapes=True)))
32, len(self.gtfs_parser_frequency_unify.read_routes(no_shapes=True))
)

def test_read_interpolated_stops(self):
with self.assertRaises(TypeError):
Expand All @@ -68,36 +72,20 @@ def test_read_interpolated_stops(self):

# read_interpolated_stops unify stops having same lat-lon into one featrure.
# there are no stops having same lat-lon in fixture
self.assertEqual(
899, len(self.gtfs_parser_frequency.read_interpolated_stops()))
self.assertEqual(899, len(self.gtfs_parser_frequency.read_interpolated_stops()))

# as_unify means near and similar named stops move into same lat-lon(centroid of them)
self.assertEqual(
518, len(self.gtfs_parser_frequency_unify.read_interpolated_stops()))
518, len(self.gtfs_parser_frequency_unify.read_interpolated_stops())
)

def test_read_route_frequency(self):
with self.assertRaises(KeyError):
self.gtfs_parser.read_route_frequency()

# each route_frequency feature is drawn between 2 stops
self.assertEqual(
956, len(self.gtfs_parser_frequency.read_route_frequency()))
self.assertEqual(956, len(self.gtfs_parser_frequency.read_route_frequency()))
# unify some 'similar' stops into same position, this decrease num of route_frequency features
self.assertEqual(
918, len(self.gtfs_parser_frequency_unify.read_route_frequency()))

# out of service of GTFS -> 0
self.assertEqual(0, len(
self.gtfs_parser_frequency_unify.read_route_frequency(yyyymmdd="20210530")))

# some routes are not in service on 20210730, Friday
freq20210730 = self.gtfs_parser_frequency_unify.read_route_frequency(
yyyymmdd="20210730")
self.assertEqual(916, len(freq20210730))
self.assertEqual(114, freq20210730[0]["properties"]["frequency"])

# 20210801 - Sunday
freq20210801 = self.gtfs_parser_frequency_unify.read_route_frequency(
yyyymmdd="20210801")
self.assertEqual(736, len(freq20210801))
self.assertEqual(62, freq20210801[0]["properties"]["frequency"])
918, len(self.gtfs_parser_frequency_unify.read_route_frequency())
)