diff --git a/.github/workflows/test_gtfs_parser.yml b/.github/workflows/test_gtfs_parser.yml
index fba9f17..b1cb0ac 100644
--- a/.github/workflows/test_gtfs_parser.yml
+++ b/.github/workflows/test_gtfs_parser.yml
@@ -27,5 +27,4 @@ jobs:
run: python -m pip install pandas
- name: unittest
- run: python -m unittest discover gtfs_parser
- continue-on-error: true
\ No newline at end of file
+ run: python -m unittest discover gtfs_parser
\ No newline at end of file
diff --git a/README.md b/README.md
index 7f8f759..2546921 100644
--- a/README.md
+++ b/README.md
@@ -101,5 +101,5 @@ pip install pandas
```
cd GTFS-GO
-python -m unittest discover
+python -m unittest discover gtfs_parser/tests
```
diff --git a/frequency.qml b/aggregated_routes.qml
similarity index 100%
rename from frequency.qml
rename to aggregated_routes.qml
diff --git a/aggregated_stops.qml b/aggregated_stops.qml
new file mode 100644
index 0000000..07ed7ac
--- /dev/null
+++ b/aggregated_stops.qml
@@ -0,0 +1,181 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 0
+ 0
+ 0
+
diff --git a/frequency_stops.qml b/frequency_stops.qml
deleted file mode 100644
index df3165b..0000000
--- a/frequency_stops.qml
+++ /dev/null
@@ -1,122 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- 0
- 0
- 0
-
diff --git a/gtfs_go_dialog.py b/gtfs_go_dialog.py
index 2125674..376be45 100755
--- a/gtfs_go_dialog.py
+++ b/gtfs_go_dialog.py
@@ -50,35 +50,32 @@
FILENAME_RESULT_CSV,
STOPS_MINIMUM_VISIBLE_SCALE,
)
-DATALIST_JSON_PATH = os.path.join(
- os.path.dirname(__file__), 'gtfs_go_datalist.json')
-TEMP_DIR = os.path.join(tempfile.gettempdir(), 'GTFSGo')
-REPOSITORY_ENUM = {
- "preset": 0,
- "japanDpf": 1
-}
+DATALIST_JSON_PATH = os.path.join(os.path.dirname(__file__), "gtfs_go_datalist.json")
+TEMP_DIR = os.path.join(tempfile.gettempdir(), "GTFSGo")
+REPOSITORY_ENUM = {"preset": 0, "japanDpf": 1}
-class GTFSGoDialog(QDialog):
+class GTFSGoDialog(QDialog):
def __init__(self, iface):
"""Constructor."""
super().__init__()
- self.ui = uic.loadUi(os.path.join(os.path.dirname(
- __file__), 'gtfs_go_dialog_base.ui'), self)
- with open(DATALIST_JSON_PATH, encoding='utf-8') as f:
+ self.ui = uic.loadUi(
+ os.path.join(os.path.dirname(__file__), "gtfs_go_dialog_base.ui"), self
+ )
+ with open(DATALIST_JSON_PATH, encoding="utf-8") as f:
self.datalist = json.load(f)
self.iface = iface
- self.combobox_zip_text = self.tr('---Load local ZipFile---')
+ self.combobox_zip_text = self.tr("---Load local ZipFile---")
self.init_gui()
def init_gui(self):
# repository combobox
+ self.repositoryCombobox.addItem(self.tr("Preset"), REPOSITORY_ENUM["preset"])
self.repositoryCombobox.addItem(
- self.tr('Preset'), REPOSITORY_ENUM['preset'])
- self.repositoryCombobox.addItem(
- self.tr('[Japan]GTFS data repository'), REPOSITORY_ENUM['japanDpf'])
+ self.tr("[Japan]GTFS data repository"), REPOSITORY_ENUM["japanDpf"]
+ )
# local repository data select combobox
self.ui.comboBox.addItem(self.combobox_zip_text, None)
@@ -93,19 +90,20 @@ def init_gui(self):
self.ui.outputDirFileWidget.fileChanged.connect(self.refresh)
self.ui.unifyCheckBox.stateChanged.connect(self.refresh)
self.ui.timeFilterCheckBox.stateChanged.connect(self.refresh)
- self.ui.simpleRadioButton.clicked.connect(self.refresh)
- self.ui.freqRadioButton.clicked.connect(self.refresh)
+ self.ui.simpleCheckbox.clicked.connect(self.refresh)
+ self.ui.aggregateCheckbox.clicked.connect(self.refresh)
# time filter - validate user input
self.ui.beginTimeLineEdit.editingFinished.connect(
- lambda: self.validate_time_lineedit(self.ui.beginTimeLineEdit))
+ lambda: self.validate_time_lineedit(self.ui.beginTimeLineEdit)
+ )
self.ui.endTimeLineEdit.editingFinished.connect(
- lambda: self.validate_time_lineedit(self.ui.endTimeLineEdit))
+ lambda: self.validate_time_lineedit(self.ui.endTimeLineEdit)
+ )
# set today DateEdit
now = datetime.datetime.now()
- self.ui.filterByDateDateEdit.setDate(
- QDate(now.year, now.month, now.day))
+ self.ui.filterByDateDateEdit.setDate(QDate(now.year, now.month, now.day))
self.refresh()
@@ -118,8 +116,7 @@ def init_local_repository_gui(self):
def init_japan_dpf_gui(self):
self.japanDpfResultTableView.clicked.connect(self.refresh)
- self.japanDpfResultTableView.setSelectionBehavior(
- QAbstractItemView.SelectRows)
+ self.japanDpfResultTableView.setSelectionBehavior(QAbstractItemView.SelectRows)
self.japan_dpf_set_table([])
for idx, header in enumerate(repository.japan_dpf.table.HEADERS):
if header in repository.japan_dpf.table.HEADERS_TO_HIDE:
@@ -127,16 +124,15 @@ def init_japan_dpf_gui(self):
self.japanDpfPrefectureCombobox.addItem(self.tr("any"), None)
for prefname in constants.JAPAN_PREFS:
- self.japanDpfPrefectureCombobox.addItem(
- prefname, prefname)
+ self.japanDpfPrefectureCombobox.addItem(prefname, prefname)
now = datetime.datetime.now()
- self.ui.japanDpfTargetDateEdit.setDate(
- QDate(now.year, now.month, now.day))
+ self.ui.japanDpfTargetDateEdit.setDate(QDate(now.year, now.month, now.day))
self.japanDpfExtentGroupBox.setMapCanvas(iface.mapCanvas())
self.japanDpfExtentGroupBox.setOutputCrs(
- QgsCoordinateReferenceSystem("EPSG:4326"))
+ QgsCoordinateReferenceSystem("EPSG:4326")
+ )
# TODO: APIでextentパラメータが未実装なので一時的にUIを非表示
self.japanDpfExtentGroupBox.setVisible(False)
@@ -158,18 +154,18 @@ def make_combobox_text(self, data):
Returns:
str: combobox-text
"""
- return '[' + data["country"] + ']' + '[' + data["region"] + ']' + data["name"]
+ return "[" + data["country"] + "]" + "[" + data["region"] + "]" + data["name"]
def download_zip(self, url: str) -> str:
data = urllib.request.urlopen(url).read()
- download_path = os.path.join(TEMP_DIR, str(uuid.uuid4()) + '.zip')
- with open(download_path, mode='wb') as f:
+ download_path = os.path.join(TEMP_DIR, str(uuid.uuid4()) + ".zip")
+ with open(download_path, mode="wb") as f:
f.write(data)
return download_path
def extract_zip(self, zip_path: str) -> str:
- extracted_dir = os.path.join(TEMP_DIR, 'extract', str(uuid.uuid4()))
+ extracted_dir = os.path.join(TEMP_DIR, "extract", str(uuid.uuid4()))
os.makedirs(extracted_dir, exist_ok=True)
with zipfile.ZipFile(zip_path) as z:
z.extractall(extracted_dir)
@@ -177,29 +173,41 @@ def extract_zip(self, zip_path: str) -> str:
def get_target_feed_infos(self):
feed_infos = []
- if self.repositoryCombobox.currentData() == REPOSITORY_ENUM['preset']:
+ if self.repositoryCombobox.currentData() == REPOSITORY_ENUM["preset"]:
if self.ui.comboBox.currentData():
- feed_infos.append({
- "path": self.ui.comboBox.currentData().get("url"),
- "group": self.ui.comboBox.currentData().get("name"),
- "dir": self.ui.comboBox.currentData().get("name")
- })
- elif self.ui.comboBox.currentData() is None and self.ui.zipFileWidget.filePath():
- feed_infos.append({
- "path": self.ui.zipFileWidget.filePath(),
- "group": os.path.basename(self.ui.zipFileWidget.filePath()).split(".")[0],
- "dir": os.path.basename(self.ui.zipFileWidget.filePath()).split(".")[0]
- })
- elif self.repositoryCombobox.currentData() == REPOSITORY_ENUM['japanDpf']:
+ feed_infos.append(
+ {
+ "path": self.ui.comboBox.currentData().get("url"),
+ "group": self.ui.comboBox.currentData().get("name"),
+ "dir": self.ui.comboBox.currentData().get("name"),
+ }
+ )
+ elif (
+ self.ui.comboBox.currentData() is None
+ and self.ui.zipFileWidget.filePath()
+ ):
+ feed_infos.append(
+ {
+ "path": self.ui.zipFileWidget.filePath(),
+ "group": os.path.basename(
+ self.ui.zipFileWidget.filePath()
+ ).split(".")[0],
+ "dir": os.path.basename(self.ui.zipFileWidget.filePath()).split(
+ "."
+ )[0],
+ }
+ )
+ elif self.repositoryCombobox.currentData() == REPOSITORY_ENUM["japanDpf"]:
selected_rows = self.japanDpfResultTableView.selectionModel().selectedRows()
for row in selected_rows:
- row_data = self.get_selected_row_data_in_japan_dpf_table(
- row.row())
- feed_infos.append({
- "path": row_data["gtfs_url"],
- "group": row_data["agency_name"] + "-" + row_data["gtfs_name"],
- "dir": row_data["agency_id"] + "-" + row_data["gtfs_id"],
- })
+ row_data = self.get_selected_row_data_in_japan_dpf_table(row.row())
+ feed_infos.append(
+ {
+ "path": row_data["gtfs_url"],
+ "group": row_data["agency_name"] + "-" + row_data["gtfs_name"],
+ "dir": row_data["agency_id"] + "-" + row_data["gtfs_id"],
+ }
+ )
return feed_infos
def execution(self):
@@ -208,67 +216,115 @@ def execution(self):
os.makedirs(TEMP_DIR, exist_ok=True)
for feed_info in self.get_target_feed_infos():
- if feed_info["path"].startswith('http'):
+ if feed_info["path"].startswith("http"):
feed_info["path"] = self.download_zip(feed_info["path"])
extracted_dir = self.extract_zip(feed_info["path"])
- output_dir = os.path.join(self.outputDirFileWidget.filePath(),
- feed_info["dir"])
+ output_dir = os.path.join(
+ self.outputDirFileWidget.filePath(), feed_info["dir"]
+ )
os.makedirs(output_dir, exist_ok=True)
- if self.ui.simpleRadioButton.isChecked():
+ written_files = {
+ "routes": "",
+ "stops": "",
+ "aggregated_routes": "",
+ "aggregated_stops": "",
+ "aggregated_csv": "",
+ }
+
+ if self.ui.simpleCheckbox.isChecked():
gtfs_parser = GTFSParser(extracted_dir)
routes_geojson = {
- 'type': 'FeatureCollection',
- 'features': gtfs_parser.read_routes(no_shapes=self.ui.ignoreShapesCheckbox.isChecked())
+ "type": "FeatureCollection",
+ "features": gtfs_parser.read_routes(
+ no_shapes=self.ui.ignoreShapesCheckbox.isChecked()
+ ),
}
stops_geojson = {
- 'type': 'FeatureCollection',
- 'features': gtfs_parser.read_stops(ignore_no_route=self.ui.ignoreNoRouteStopsCheckbox.isChecked())
+ "type": "FeatureCollection",
+ "features": gtfs_parser.read_stops(
+ ignore_no_route=self.ui.ignoreNoRouteStopsCheckbox.isChecked()
+ ),
}
- route_filename = 'route.geojson'
- stops_filename = 'stops.geojson'
- else:
+ # write
+ written_files["routes"] = os.path.join(output_dir, "routes.geojson")
+ written_files["stops"] = os.path.join(output_dir, "stops.geojson")
+ with open(
+ written_files["routes"],
+ mode="w",
+ encoding="utf-8",
+ ) as f:
+ json.dump(routes_geojson, f, ensure_ascii=False)
+
+ with open(
+ written_files["stops"],
+ mode="w",
+ encoding="utf-8",
+ ) as f:
+ json.dump(stops_geojson, f, ensure_ascii=False)
+
+ if self.ui.aggregateCheckbox.isChecked():
gtfs_parser = GTFSParser(
extracted_dir,
- as_frequency=True,
+ as_frequency=self.ui.aggregateCheckbox.isChecked(),
as_unify_stops=self.ui.unifyCheckBox.isChecked(),
- delimiter=self.get_delimiter()
+ delimiter=self.get_delimiter(),
+ yyyymmdd=self.get_yyyymmdd(),
+ begin_time=self.get_time_filter(self.ui.beginTimeLineEdit),
+ end_time=self.get_time_filter(self.ui.endTimeLineEdit),
)
-
- routes_geojson = {
- 'type': 'FeatureCollection',
- 'features': gtfs_parser.read_route_frequency(yyyymmdd=self.get_yyyymmdd(),
- begin_time=self.get_time_filter(
- self.ui.beginTimeLineEdit),
- end_time=self.get_time_filter(self.ui.endTimeLineEdit))
+ aggregated_routes_geojson = {
+ "type": "FeatureCollection",
+ "features": gtfs_parser.read_route_frequency(),
}
- stops_geojson = {
- 'type': 'FeatureCollection',
- 'features': gtfs_parser.read_interpolated_stops()
+ aggregated_stops_geojson = {
+ "type": "FeatureCollection",
+ "features": gtfs_parser.read_interpolated_stops(),
}
- route_filename = 'frequency.geojson'
- stops_filename = 'frequency_stops.geojson'
-
- # write stop_id conversion result csv
- with open(os.path.join(output_dir, FILENAME_RESULT_CSV), mode="w", encoding="cp932", errors="ignore")as f:
- gtfs_parser.dataframes['stops'][[
- 'stop_id', 'stop_name', 'similar_stop_id', 'similar_stop_name']].to_csv(f, index=False)
-
- with open(os.path.join(output_dir, route_filename), mode='w', encoding='utf-8') as f:
- json.dump(routes_geojson, f, ensure_ascii=False)
- with open(os.path.join(output_dir, stops_filename), mode='w', encoding='utf-8') as f:
- json.dump(stops_geojson, f, ensure_ascii=False)
-
- self.show_geojson(output_dir,
- stops_filename,
- route_filename,
- feed_info["group"])
+ # write
+ written_files["aggregated_routes"] = os.path.join(
+ output_dir, "aggregated_routes.geojson"
+ )
+ written_files["aggregated_stops"] = os.path.join(
+ output_dir, "aggregated_stops.geojson"
+ )
+ written_files["aggregated_csv"] = os.path.join(output_dir, "result.csv")
+ with open(
+ written_files["aggregated_stops"],
+ mode="w",
+ encoding="utf-8",
+ ) as f:
+ json.dump(aggregated_stops_geojson, f, ensure_ascii=False)
+ with open(
+ written_files["aggregated_routes"],
+ mode="w",
+ encoding="utf-8",
+ ) as f:
+ json.dump(aggregated_routes_geojson, f, ensure_ascii=False)
+ with open(
+ written_files["aggregated_csv"],
+ mode="w",
+ encoding="cp932",
+ errors="ignore",
+ ) as f:
+ gtfs_parser.dataframes["stops"][
+ ["stop_id", "stop_name", "similar_stop_id", "similar_stop_name"]
+ ].to_csv(f, index=False)
+
+ self.show_geojson(
+ feed_info["group"],
+ written_files["stops"],
+ written_files["routes"],
+ written_files["aggregated_stops"],
+ written_files["aggregated_routes"],
+ written_files["aggregated_csv"],
+ )
def get_yyyymmdd(self):
if not self.ui.filterByDateCheckBox.isChecked():
- return ''
+ return ""
date = self.ui.filterByDateDateEdit.date()
yyyy = str(date.year()).zfill(4)
mm = str(date.month()).zfill(2)
@@ -277,75 +333,124 @@ def get_yyyymmdd(self):
def get_delimiter(self):
if not self.ui.unifyCheckBox.isChecked():
- return ''
+ return ""
if not self.ui.delimiterCheckBox.isChecked():
- return ''
+ return ""
return self.ui.delimiterLineEdit.text()
def get_time_filter(self, lineEdit):
if not self.ui.timeFilterCheckBox.isChecked():
- return ''
- return lineEdit.text().replace(':', '')
-
- def show_geojson(self, geojson_dir: str, stops_filename: str, route_filename: str, group_name: str):
- # these geojsons will already have been generated
- stops_geojson = os.path.join(geojson_dir, stops_filename)
- routes_geojson = os.path.join(geojson_dir, route_filename)
-
- stops_vlayer = QgsVectorLayer(
- stops_geojson, stops_filename.split('.')[0], 'ogr')
- routes_vlayer = QgsVectorLayer(
- routes_geojson, route_filename.split('.')[0], 'ogr')
-
- # make and set labeling for stops
- stops_labeling = get_labeling_for_stops(
- target_field_name="stop_name" if self.ui.simpleRadioButton.isChecked() else "similar_stop_name")
- stops_vlayer.setLabelsEnabled(True)
- stops_vlayer.setLabeling(stops_labeling)
-
- # adjust layer visibility
- stops_vlayer.setMinimumScale(STOPS_MINIMUM_VISIBLE_SCALE)
- stops_vlayer.setScaleBasedVisibility(True)
-
- # there are two type route renderer, normal, frequency
- if self.ui.simpleRadioButton.isChecked():
- routes_renderer = Renderer(routes_vlayer, 'route_name')
+ return ""
+ return lineEdit.text().replace(":", "")
+
+ def show_geojson(
+ self,
+ group_name: str,
+ stops_geojson: str,
+ routes_geojson: str,
+ aggregated_stops_geojson: str,
+ aggregated_routes_geojson: str,
+ aggregated_csv: str,
+ ):
+ root = QgsProject().instance().layerTreeRoot()
+ group = root.insertGroup(0, group_name)
+ group.setExpanded(True)
+
+ if routes_geojson != "":
+ routes_vlayer = QgsVectorLayer(
+ routes_geojson, os.path.basename(routes_geojson).split(".")[0], "ogr"
+ )
+ routes_renderer = Renderer(routes_vlayer, "route_name")
routes_vlayer.setRenderer(routes_renderer.make_renderer())
- added_layers = [routes_vlayer, stops_vlayer]
- stops_renderer = Renderer(stops_vlayer, 'stop_name')
+
+ QgsProject.instance().addMapLayer(routes_vlayer, False)
+ group.insertLayer(0, routes_vlayer)
+
+ if stops_geojson != "":
+ stops_vlayer = QgsVectorLayer(
+ stops_geojson, os.path.basename(stops_geojson).split(".")[0], "ogr"
+ )
+ # make and set labeling for stops
+ stops_labeling = get_labeling_for_stops("stop_names")
+ stops_vlayer.setLabelsEnabled(True)
+ stops_vlayer.setLabeling(stops_labeling)
+
+ # adjust layer visibility
+ stops_vlayer.setMinimumScale(STOPS_MINIMUM_VISIBLE_SCALE)
+ stops_vlayer.setScaleBasedVisibility(True)
+
+ stops_renderer = Renderer(stops_vlayer, "stop_name")
stops_vlayer.setRenderer(stops_renderer.make_renderer())
- else:
- # frequency mode
- routes_vlayer.loadNamedStyle(os.path.join(
- os.path.dirname(__file__), 'frequency.qml'))
- stops_vlayer.loadNamedStyle(os.path.join(
- os.path.dirname(__file__), 'frequency_stops.qml'))
- csv_vlayer = QgsVectorLayer(os.path.join(
- geojson_dir, FILENAME_RESULT_CSV), FILENAME_RESULT_CSV, 'ogr')
- added_layers = [routes_vlayer, stops_vlayer, csv_vlayer]
-
- # add two layers as a group
- self.add_layers_as_group(group_name, added_layers)
+
+ QgsProject.instance().addMapLayer(stops_vlayer, False)
+ group.insertLayer(0, stops_vlayer)
+
+ if aggregated_routes_geojson != "":
+ aggregated_routes_vlayer = QgsVectorLayer(
+ aggregated_routes_geojson,
+ os.path.basename(aggregated_routes_geojson).split(".")[0],
+ "ogr",
+ )
+ aggregated_routes_vlayer.loadNamedStyle(
+ os.path.join(os.path.dirname(__file__), "aggregated_routes.qml")
+ )
+
+ QgsProject.instance().addMapLayer(aggregated_routes_vlayer, False)
+ group.insertLayer(0, aggregated_routes_vlayer)
+
+ if aggregated_stops_geojson != "":
+ aggregated_stops_vlayer = QgsVectorLayer(
+ aggregated_stops_geojson,
+ os.path.basename(aggregated_stops_geojson).split(".")[0],
+ "ogr",
+ )
+ aggregated_stops_vlayer.loadNamedStyle(
+ os.path.join(os.path.dirname(__file__), "aggregated_stops.qml")
+ )
+
+ QgsProject.instance().addMapLayer(aggregated_stops_vlayer, False)
+ group.insertLayer(0, aggregated_stops_vlayer)
+
+ if aggregated_csv != "":
+ aggregated_csv_vlayer = QgsVectorLayer(
+ aggregated_csv,
+ os.path.basename(aggregated_csv).split(".")[0],
+ "ogr",
+ )
+
+ QgsProject.instance().addMapLayer(aggregated_csv_vlayer, False)
+ group.insertLayer(0, aggregated_csv_vlayer)
self.iface.messageBar().pushInfo(
- self.tr('finish'),
- self.tr('generated geojson files: ') + geojson_dir)
+ self.tr("finish"), self.tr("generated geojson files: ")
+ )
self.ui.close()
def refresh(self):
self.localDataSelectAreaWidget.setVisible(
- self.repositoryCombobox.currentData() == REPOSITORY_ENUM['preset'])
+ self.repositoryCombobox.currentData() == REPOSITORY_ENUM["preset"]
+ )
self.japanDpfDataSelectAreaWidget.setVisible(
- self.repositoryCombobox.currentData() == REPOSITORY_ENUM['japanDpf'])
+ self.repositoryCombobox.currentData() == REPOSITORY_ENUM["japanDpf"]
+ )
# idiom to shrink window to fit its content
self.resize(0, 0)
self.adjustSize()
self.ui.zipFileWidget.setEnabled(
- self.ui.comboBox.currentText() == self.combobox_zip_text)
- self.ui.pushButton.setEnabled((len(self.get_target_feed_infos()) > 0) and
- (self.ui.outputDirFileWidget.filePath() != ''))
+ self.ui.comboBox.currentText() == self.combobox_zip_text
+ )
+
+ # set executable
+ self.ui.pushButton.setEnabled(
+ (len(self.get_target_feed_infos()) > 0)
+ and (self.ui.outputDirFileWidget.filePath() != "")
+ and (
+ self.ui.simpleCheckbox.isChecked()
+ or self.ui.aggregateCheckbox.isChecked()
+ )
+ )
# stops unify mode
is_unify = self.ui.unifyCheckBox.isChecked()
@@ -357,35 +462,15 @@ def refresh(self):
self.ui.beginTimeLineEdit.setEnabled(has_time_filter)
self.ui.endTimeLineEdit.setEnabled(has_time_filter)
- # radio button - mode toggle
- self.ui.simpleFrame.setEnabled(self.ui.simpleRadioButton.isChecked())
- self.ui.freqFrame.setEnabled(self.ui.freqRadioButton.isChecked())
-
- def add_layers_as_group(self, group_name: str, layers: [QgsMapLayer]):
- """
- add layers into project as a group.
- the order of layers is reverse to layers list order.
- if layers: [layer_A, layer_B, layer_C]
- then in tree:
- - layer_C
- - layer_B
- - layer_A
-
- Args:
- group_name (str): [description]
- layers ([type]): [description]
- """
- root = QgsProject().instance().layerTreeRoot()
- group = root.insertGroup(0, group_name)
- group.setExpanded(True)
- for layer in layers:
- QgsProject.instance().addMapLayer(layer, False)
- group.insertLayer(0, layer)
+ # mode toggle
+ self.ui.simpleFrame.setEnabled(self.ui.simpleCheckbox.isChecked())
+ self.ui.freqFrame.setEnabled(self.ui.aggregateCheckbox.isChecked())
@staticmethod
def validate_time_lineedit(lineedit):
- digits = ''.join(
- list(filter(lambda char: char.isdigit(), list(lineedit.text())))).ljust(6, "0")[-6:]
+ digits = "".join(
+ list(filter(lambda char: char.isdigit(), list(lineedit.text())))
+ ).ljust(6, "0")[-6:]
# limit to 29:59:59
hh = str(min(29, int(digits[0:2]))).zfill(2)
@@ -405,20 +490,35 @@ def japan_dpf_search(self):
mm = str(target_date.month()).zfill(2)
dd = str(target_date.day()).zfill(2)
- extent = None if self.japanDpfExtentGroupBox.outputExtent().isEmpty(
- ) else self.japanDpfExtentGroupBox.outputExtent().toString().replace(" : ", ",")
+ extent = (
+ None
+ if self.japanDpfExtentGroupBox.outputExtent().isEmpty()
+ else self.japanDpfExtentGroupBox.outputExtent()
+ .toString()
+ .replace(" : ", ",")
+ )
- pref = None if self.japanDpfPrefectureCombobox.currentData(
- ) is None else urllib.parse.quote(self.japanDpfPrefectureCombobox.currentData())
+ pref = (
+ None
+ if self.japanDpfPrefectureCombobox.currentData() is None
+ else urllib.parse.quote(self.japanDpfPrefectureCombobox.currentData())
+ )
try:
- results = repository.japan_dpf.api.get_feeds(yyyy+mm+dd,
- extent=extent,
- pref=pref)
+ results = repository.japan_dpf.api.get_feeds(
+ yyyy + mm + dd, extent=extent, pref=pref
+ )
self.japan_dpf_set_table(results)
except Exception as e:
QMessageBox.information(
- self, self.tr('Error'), self.tr('Error occured, please check:\n- Internet connection.\n- Repository-server') + "\n\n" + e)
+ self,
+ self.tr("Error"),
+ self.tr(
+ "Error occured, please check:\n- Internet connection.\n- Repository-server"
+ )
+ + "\n\n"
+ + e,
+ )
finally:
self.japanDpfSearchButton.setEnabled(True)
self.japanDpfSearchButton.setText(self.tr("Search"))
@@ -438,6 +538,7 @@ def japan_dpf_set_table(self, results: list):
def get_selected_row_data_in_japan_dpf_table(self, row: int):
data = {}
for col_idx, col_name in enumerate(repository.japan_dpf.table.HEADERS):
- data[col_name] = self.japanDpfResultTableView.model().index(row,
- col_idx).data()
+ data[col_name] = (
+ self.japanDpfResultTableView.model().index(row, col_idx).data()
+ )
return data
diff --git a/gtfs_go_dialog_base.ui b/gtfs_go_dialog_base.ui
index d1cdec4..ee5448b 100755
--- a/gtfs_go_dialog_base.ui
+++ b/gtfs_go_dialog_base.ui
@@ -6,8 +6,8 @@
0
0
- 543
- 808
+ 592
+ 1074
@@ -123,10 +123,10 @@
-
-
+
20
-
+
20
@@ -229,7 +229,7 @@
-
-
+
simple routes and stops
@@ -290,13 +290,10 @@
-
-
+
aggregate route frequency
-
- false
-
-
diff --git a/gtfs_parser/__main__.py b/gtfs_parser/__main__.py
index c0bd8af..4dfaa96 100644
--- a/gtfs_parser/__main__.py
+++ b/gtfs_parser/__main__.py
@@ -16,82 +16,67 @@
def latlon_to_str(latlon):
- return ''.join(list(map(lambda coord: str(round(coord, 4)), latlon)))
+ return "".join(list(map(lambda coord: str(round(coord, 4)), latlon)))
class GTFSParser:
- def __init__(self,
- src_dir: str,
- as_frequency=False,
- as_unify_stops=False,
- delimiter='',
- max_distance_degree=0.01):
-
- txts = glob.glob(os.path.join(
- src_dir, '**', '*.txt'), recursive=True)
- self.dataframes = {}
- for txt in txts:
- datatype = os.path.basename(txt).split('.')[0]
- if os.path.basename(datatype) not in GTFS_DATATYPES:
- print(f'{datatype} is not specified in GTFS, skipping...')
+ def __init__(
+ self,
+ src_dir: str,
+ as_frequency=False,
+ as_unify_stops=False,
+ delimiter="",
+ max_distance_degree=0.01,
+ yyyymmdd="",
+ begin_time="",
+ end_time="",
+ ):
+ txts = glob.glob(os.path.join(src_dir, "**", "*.txt"), recursive=True)
+ self.dataframes = self.__load_tables(txts)
+
+ self.similar_stops_df = None
+ if as_frequency:
+ self.__aggregate_similar_stops(
+ delimiter,
+ max_distance_degree,
+ as_unify_stops,
+ yyyymmdd=yyyymmdd,
+ begin_time=begin_time,
+ end_time=end_time,
+ )
+
+ @staticmethod
+ def __load_tables(text_files: list) -> dict:
+ tables = {}
+ for txt in text_files:
+ datatype = os.path.basename(txt).split(".")[0]
+ if datatype not in GTFS_DATATYPES:
+ print(f"{datatype} is not specified in GTFS, skipping...")
continue
- with open(txt, encoding='utf-8_sig') as t:
- df = pd.read_csv(t, dtype=str)
+ with open(txt, encoding="utf-8_sig") as f:
+ df = pd.read_csv(f, dtype=str)
if len(df) == 0:
- print(f'{datatype}.txt is empty, skipping...')
+ print(f"{datatype}.txt is empty, skipping...")
continue
- self.dataframes[os.path.basename(txt).split('.')[0]] = df
+ tables[os.path.basename(txt).split(".")[0]] = df
for datatype in GTFS_DATATYPES:
- if GTFS_DATATYPES[datatype]['required'] and \
- datatype not in self.dataframes:
- raise FileNotFoundError(f'{datatype} is not exists.')
-
- # cast some numeric value columns to int or float
- self.dataframes['stops'] = self.dataframes['stops'].astype(
- {'stop_lon': float, 'stop_lat': float})
- self.dataframes['stop_times'] = self.dataframes['stop_times'].astype({
- 'stop_sequence': int})
- if self.dataframes.get('shapes') is not None:
- self.dataframes['shapes'] = self.dataframes['shapes'].astype(
- {'shape_pt_lon': float, 'shape_pt_lat': float, 'shape_pt_sequence': int})
-
- if 'parent_station' not in self.dataframes.get('stops').columns:
- # parent_station is optional column on GTFS but use in this module
- # when parent_station is not in stops, fill by 'nan' (not NaN)
- self.dataframes['stops']['parent_station'] = 'nan'
+ if GTFS_DATATYPES[datatype]["required"] and datatype not in tables:
+ raise FileNotFoundError(f"{datatype} is not exists.")
- if as_frequency:
- self.similar_stops_df = None
- if as_unify_stops:
- self.aggregate_similar_stops(delimiter, max_distance_degree)
- else:
- # no unifying stops
- self.dataframes['stops']['similar_stop_id'] = self.dataframes['stops']['stop_id']
- self.dataframes['stops']['similar_stop_name'] = self.dataframes['stops']['stop_name']
- self.dataframes['stops']['similar_stops_centroid'] = self.dataframes['stops'][[
- 'stop_lon', 'stop_lat']].values.tolist()
- self.similar_stops_df = self.dataframes['stops'][[
- 'similar_stop_id', 'similar_stop_name', 'similar_stops_centroid']].copy()
-
- def aggregate_similar_stops(self, delimiter, max_distance_degree):
- parent_ids = self.dataframes['stops']['parent_station'].unique()
- self.dataframes['stops']['is_parent'] = self.dataframes['stops']['stop_id'].map(
- lambda stop_id: 1 if stop_id in parent_ids else 0)
-
- self.dataframes['stops'][['similar_stop_id', 'similar_stop_name', 'similar_stops_centroid']] = self.dataframes['stops']['stop_id'].map(
- lambda stop_id: self.get_similar_stop_tuple(stop_id, delimiter, max_distance_degree)).apply(pd.Series)
- self.dataframes['stops']['position_id'] = self.dataframes['stops']['similar_stops_centroid'].map(
- latlon_to_str)
- self.dataframes['stops']['unique_id'] = self.dataframes['stops']['similar_stop_id'] + \
- self.dataframes['stops']['position_id']
-
- # sometimes stop_name accidently becomes pd.Series instead of str.
- self.dataframes['stops']['similar_stop_name'] = self.dataframes['stops']['similar_stop_name'].map(
- lambda val: val if type(val) == str else val.stop_name)
-
- self.similar_stops_df = self.dataframes['stops'].drop_duplicates(
- subset='unique_id')[[
- 'position_id', 'similar_stop_id', 'similar_stop_name', 'similar_stops_centroid']].copy()
+ # cast some numeric columns from str to numeric
+ tables["stops"] = tables["stops"].astype({"stop_lon": float, "stop_lat": float})
+ tables["stop_times"] = tables["stop_times"].astype({"stop_sequence": int})
+ if tables.get("shapes") is not None:
+ tables["shapes"] = tables["shapes"].astype(
+ {"shape_pt_lon": float, "shape_pt_lat": float, "shape_pt_sequence": int}
+ )
+
+ # parent_station is optional column on GTFS but use in this module
+ # when parent_station is not in stops, fill by 'nan' (not NaN)
+ if "parent_station" not in tables.get("stops").columns:
+ tables["stops"]["parent_station"] = "nan"
+
+ return tables
def read_stops(self, ignore_no_route=False) -> list:
"""
@@ -104,167 +89,291 @@ def read_stops(self, ignore_no_route=False) -> list:
list: [description]
"""
- stops_df = self.dataframes['stops'][[
- 'stop_id', 'stop_lat', 'stop_lon', 'stop_name']]
- route_id_on_stops = self.get_route_ids_on_stops()
+ # get unique list of route_id related to each stop
+ stop_times_trip_df = pd.merge(
+ self.dataframes["stop_times"],
+ self.dataframes["trips"],
+ on="trip_id",
+ )
+ route_ids_on_stops = stop_times_trip_df.groupby("stop_id")["route_id"].unique()
+ route_ids_on_stops.apply(lambda x: x.sort())
+ # parse stops to GeoJSON-Features
features = []
- for stop in stops_df.itertuples():
- if stop.stop_id in route_id_on_stops:
- route_ids = route_id_on_stops.at[stop.stop_id].tolist()
- else:
- if ignore_no_route:
- continue
- route_ids = []
+ for stop in self.dataframes["stops"][
+ ["stop_id", "stop_lat", "stop_lon", "stop_name"]
+ ].itertuples():
+ # get all route_id related to the stop
+ route_ids = []
+ if stop.stop_id in route_ids_on_stops:
+ route_ids = route_ids_on_stops.at[stop.stop_id].tolist()
+
+ if len(route_ids) == 0 and ignore_no_route:
+ # skip to output the stop
+ continue
- features.append({
- 'type': 'Feature',
- 'geometry': {
- 'type': 'Point',
- 'coordinates': [stop.stop_lon, stop.stop_lat]
- },
- 'properties': {
- 'stop_id': stop.stop_id,
- 'stop_name': stop.stop_name,
- 'route_ids': route_ids
+ features.append(
+ {
+ "type": "Feature",
+ "geometry": {
+ "type": "Point",
+ "coordinates": (stop.stop_lon, stop.stop_lat),
+ },
+ "properties": {
+ "stop_id": stop.stop_id,
+ "stop_name": stop.stop_name,
+ "route_ids": route_ids,
+ },
}
- })
+ )
return features
- def get_route_ids_on_stops(self):
- stop_times_trip_df = pd.merge(
- self.dataframes['stop_times'],
- self.dataframes['trips'],
- on='trip_id',
- )
- group = stop_times_trip_df.groupby('stop_id')['route_id'].unique()
- group.apply(lambda x: x.sort())
- return group
-
- def read_interpolated_stops(self):
+ def read_routes(self, no_shapes=False) -> list:
"""
- Read stops "interpolated" by parent station or stop_id or stop_name and distance.
- There are many similar stops that are near to each, has same name, or has same prefix in stop_id.
- In traffic analyzing, it is good for that similar stops to be grouped as same stop.
- This method group them by some elements, parent, id, name and distance.
+ read routes by shapes or stop_times
+ First, this method try to load shapes and parse it into routes,
+ but shapes is optional table in GTFS. Then is shapes does not exist or no_shapes is True,
+ this parse routes by stop_time, stops, trips, and routes.
Args:
- delimiter (str, optional): stop_id delimiter, sample_A, sample_B, then delimiter is '_'. Defaults to ''.
- max_distance_degree (float, optional): distance limit in grouping by stop_name. Defaults to 0.01.
+ no_shapes (bool, optional): ignore shapes table. Defaults to False.
Returns:
- [type]: [description]
+ [list]: list of GeoJSON-Feature-dict
"""
+ features = []
- stop_dicts = self.similar_stops_df[[
- 'similar_stop_id', 'similar_stop_name', 'similar_stops_centroid']].to_dict(orient='records')
- return [{
- 'type': 'Feature',
- 'geometry': {
- 'type': 'Point',
- 'coordinates': stop['similar_stops_centroid']
- },
- 'properties': {
- 'similar_stop_name': stop['similar_stop_name'],
- 'similar_stop_id': stop['similar_stop_id'],
- }
- } for stop in stop_dicts]
+ if self.dataframes.get("shapes") is None or no_shapes:
+ # trip-route-merge:A
+ trips_routes = pd.merge(
+ self.dataframes["trips"][["trip_id", "route_id"]],
+ self.dataframes["routes"][
+ ["route_id", "route_long_name", "route_short_name"]
+ ],
+ on="route_id",
+ )
- def read_route_frequency(self, yyyymmdd='', begin_time='', end_time=''):
- """
- By grouped stops, aggregate route frequency.
- Filtering trips by a date, you can aggregate frequency only route serviced on the date.
+ # stop_times-stops-merge:B
+ stop_times_stop = pd.merge(
+ self.dataframes["stop_times"][["stop_id", "trip_id", "stop_sequence"]],
+ self.dataframes.get("stops")[["stop_id", "stop_lon", "stop_lat"]],
+ on="stop_id",
+ )
- Args:
- yyyymmdd (str, optional): date, like 20210401. Defaults to ''.
- begin_time (str, optional): 'hhmmss' <= departure time, like 030000. Defaults to ''.
- end_time (str, optional): 'hhmmss' > departure time, like 280000. Defaults to ''.
+ # A-B-merge
+ merged = pd.merge(stop_times_stop, trips_routes, on="trip_id")
+ merged["route_concat_name"] = merged["route_long_name"].fillna("") + merged[
+ "route_short_name"
+ ].fillna("")
- Returns:
- [type]: [description]
- """
- stop_times_df = self.dataframes.get(
- 'stop_times')[['stop_id', 'trip_id', 'stop_sequence', 'departure_time']].sort_values(
- ['trip_id', 'stop_sequence']).copy()
+ # parse routes
+ for route_id in merged["route_id"].unique():
+ route = merged[merged["route_id"] == route_id]
+ trip_id = route["trip_id"].unique()[0]
+ route = route[route["trip_id"] == trip_id].sort_values("stop_sequence")
+ features.append(
+ {
+ "type": "Feature",
+ "geometry": {
+ "type": "LineString",
+ "coordinates": route[
+ ["stop_lon", "stop_lat"]
+ ].values.tolist(),
+ },
+ "properties": {
+ "route_id": str(route_id),
+ "route_name": route.route_concat_name.values.tolist()[0],
+ },
+ }
+ )
+ else:
+ # parse shape.txt to GeoJSON-Features
+ shape_coords = self.__get_shapes_coordinates()
+ shape_ids_on_routes = self.__get_shape_ids_on_routes()
+ # list-up already loaded shape_ids
+ loaded_shape_ids = set()
+ for route in self.dataframes.get("routes").itertuples():
+ if shape_ids_on_routes.get(route.route_id) is None:
+ continue
- # filter stop_times by whether serviced or not
- if yyyymmdd:
- trips_filtered_by_day = self.get_trips_on_a_date(yyyymmdd)
- stop_times_df = pd.merge(
- stop_times_df, trips_filtered_by_day, on='trip_id', how='left')
- stop_times_df = stop_times_df[stop_times_df['service_flag'] == 1]
+ # get coords by route_id
+ coordinates = []
+ for shape_id in shape_ids_on_routes[route.route_id]:
+ coordinates.append(shape_coords.at[shape_id])
+ loaded_shape_ids.add(shape_id) # update loaded shape_ids
+
+ route_name = self.__get_route_name_from_tupple(route)
+ features.append(
+ {
+ "type": "Feature",
+ "geometry": {
+ "type": "MultiLineString",
+ "coordinates": coordinates,
+ },
+ "properties": {
+ "route_id": str(route.route_id),
+ "route_name": route_name,
+ },
+ }
+ )
+
+ # load shapes unloaded yet
+ for shape_id in list(
+ filter(lambda id: id not in loaded_shape_ids, shape_coords.index)
+ ):
+ features.append(
+ {
+ "type": "Feature",
+ "geometry": {
+ "type": "MultiLineString",
+ "coordinates": [shape_coords.at[shape_id]],
+ },
+ "properties": {
+ "route_id": None,
+ "route_name": str(shape_id),
+ },
+ }
+ )
- # join agency info)
- stop_times_df = pd.merge(stop_times_df, self.dataframes['trips'][[
- 'trip_id', 'route_id']], on='trip_id', how='left')
- stop_times_df = pd.merge(stop_times_df, self.dataframes['routes'][[
- 'route_id', 'agency_id']], on='route_id', how='left')
- stop_times_df = pd.merge(stop_times_df, self.dataframes['agency'][[
- 'agency_id', 'agency_name']], on='agency_id', how='left')
+ return features
- # get prev and next stops_id, stop_name, trip_id
- stop_times_df = pd.merge(stop_times_df, self.dataframes['stops'][[
- 'stop_id', 'similar_stop_id', 'similar_stop_name', 'similar_stops_centroid']], on='stop_id', how='left')
- stop_times_df['prev_stop_id'] = stop_times_df['similar_stop_id']
- stop_times_df['prev_trip_id'] = stop_times_df['trip_id']
- stop_times_df['prev_stop_name'] = stop_times_df['similar_stop_name']
- stop_times_df['prev_similar_stops_centroid'] = stop_times_df['similar_stops_centroid']
- stop_times_df['next_stop_id'] = stop_times_df['similar_stop_id'].shift(
- -1)
- stop_times_df['next_trip_id'] = stop_times_df['trip_id'].shift(-1)
- stop_times_df['next_stop_name'] = stop_times_df['similar_stop_name'].shift(
- -1)
- stop_times_df['next_similar_stops_centroid'] = stop_times_df['similar_stops_centroid'].shift(
- -1)
+ @staticmethod
+ def __get_route_name_from_tupple(route):
+ if not pd.isna(route.route_short_name):
+ return route.route_short_name
+ elif not pd.isna(route.route_long_name):
+ return route.route_long_name
+ else:
+ ValueError(f'{route} have neither "route_long_name" or "route_short_time".')
- # drop last stops (-> stops has no next stop)
- stop_times_df = stop_times_df.drop(
- index=stop_times_df.query('prev_trip_id != next_trip_id').index)
+ def __get_shape_ids_on_routes(self):
+ trips_with_shape_df = self.dataframes["trips"][["route_id", "shape_id"]].dropna(
+ subset=["shape_id"]
+ )
+ group = trips_with_shape_df.groupby("route_id")["shape_id"].unique()
+ group.apply(lambda x: x.sort())
+ return group
+
+ def __get_shapes_coordinates(self):
+ shapes_df = self.dataframes["shapes"].copy()
+ shapes_df.sort_values("shape_pt_sequence")
+ shapes_df["pt"] = shapes_df[["shape_pt_lon", "shape_pt_lat"]].values.tolist()
+ return shapes_df.groupby("shape_id")["pt"].apply(tuple)
+
+ def __aggregate_similar_stops(
+ self,
+ delimiter: str,
+ max_distance_degree: float,
+ as_unify_stops: bool,
+ yyyymmdd="",
+ begin_time="",
+ end_time="",
+ ):
+ # filter stop_times by whether serviced or not
+ if yyyymmdd:
+ trips_filtered_by_day = self.__get_trips_on_a_date(yyyymmdd)
+ self.dataframes["stop_times"] = pd.merge(
+ self.dataframes["stop_times"],
+ trips_filtered_by_day,
+ on="trip_id",
+ how="left",
+ )
+ self.dataframes["stop_times"] = self.dataframes["stop_times"][
+ self.dataframes["stop_times"]["service_flag"] == 1
+ ]
# time filter
if begin_time and end_time:
- stop_times_df = self.stop_time_filter(stop_times_df, begin_time, end_time)
-
- # define path_id by prev-stops-centroid and next-stops-centroid
- stop_times_df['path_id'] = stop_times_df['prev_stop_id'] + stop_times_df['next_stop_id'] + stop_times_df['prev_similar_stops_centroid'].map(
- latlon_to_str) + stop_times_df['next_similar_stops_centroid'].map(latlon_to_str)
-
- # aggregate path-frequency
- path_frequency = stop_times_df[['similar_stop_id', 'path_id']].groupby(
- 'path_id').count().reset_index()
- path_frequency.columns = ['path_id', 'path_count']
- path_data = pd.merge(path_frequency, stop_times_df.drop_duplicates(
- subset='path_id'), on='path_id')
- path_data_dict = path_data.to_dict(orient='records')
-
- return [{
- 'type': 'Feature',
- 'geometry': {
- 'type': 'LineString',
- 'coordinates': (path['prev_similar_stops_centroid'],
- path['next_similar_stops_centroid'])
- },
- 'properties': {
- 'frequency': path['path_count'],
- 'prev_stop_id': path['prev_stop_id'],
- 'prev_stop_name': path['prev_stop_name'],
- 'next_stop_id': path['next_stop_id'],
- 'next_stop_name': path['next_stop_name'],
- 'agency_id':path['agency_id'],
- 'agency_name': path['agency_name']
- }
- } for path in path_data_dict]
-
- def stop_time_filter(self, stop_time_df, begin_time, end_time):
- # departure_time is nullable and expressed in "hh:mm:ss" or "h:mm:ss" format.
- # Hour can be mor than 24.
- # Therefore, drop null records and convert times to integers.
- df = stop_time_df[stop_time_df.departure_time != '']
- int_dep_times = stop_time_df.departure_time.str.replace(':', '').astype(int)
- return df[(int_dep_times >= int(begin_time)) & (int_dep_times < int(end_time))]
-
- @ lru_cache(maxsize=None)
- def get_similar_stop_tuple(self, stop_id: str, delimiter='', max_distance_degree=0.01):
+ # departure_time is nullable and expressed in "hh:mm:ss" or "h:mm:ss" format.
+ # Hour can be mor than 24.
+ # Therefore, drop null records and convert times to integers.
+ int_dep_times = (
+ self.dataframes["stop_times"]
+ .departure_time.str.replace(":", "")
+ .astype(int)
+ )
+ self.dataframes["stop_times"] = self.dataframes["stop_times"][
+ self.dataframes["stop_times"].departure_time != ""
+ ][(int_dep_times >= int(begin_time)) & (int_dep_times < int(end_time))]
+
+ if as_unify_stops:
+ parent_ids = self.dataframes["stops"]["parent_station"].unique()
+ self.dataframes["stops"]["is_parent"] = self.dataframes["stops"][
+ "stop_id"
+ ].map(lambda stop_id: 1 if stop_id in parent_ids else 0)
+
+ self.dataframes["stops"][
+ ["similar_stop_id", "similar_stop_name", "similar_stops_centroid"]
+ ] = (
+ self.dataframes["stops"]["stop_id"]
+ .map(
+ lambda stop_id: self.__get_similar_stop_tuple(
+ stop_id, delimiter, max_distance_degree
+ )
+ )
+ .apply(pd.Series)
+ )
+ self.dataframes["stops"]["position_id"] = self.dataframes["stops"][
+ "similar_stops_centroid"
+ ].map(latlon_to_str)
+ self.dataframes["stops"]["unique_id"] = (
+ self.dataframes["stops"]["similar_stop_id"]
+ + self.dataframes["stops"]["position_id"]
+ )
+
+ # sometimes stop_name accidently becomes pd.Series instead of str.
+ self.dataframes["stops"]["similar_stop_name"] = self.dataframes["stops"][
+ "similar_stop_name"
+ ].map(lambda val: val if type(val) == str else val.stop_name)
+
+ position_count = (
+ self.dataframes["stop_times"]
+ .merge(self.dataframes["stops"], on="stop_id", how="left")
+ .groupby("position_id")
+ .size()
+ .to_frame()
+ .reset_index()
+ )
+ position_count.columns = ["position_id", "position_count"]
+
+ self.similar_stops_df = pd.merge(
+ self.dataframes["stops"].drop_duplicates(subset="position_id")[
+ [
+ "position_id",
+ "similar_stop_id",
+ "similar_stop_name",
+ "similar_stops_centroid",
+ ]
+ ],
+ position_count,
+ on="position_id",
+ how="left",
+ )
+ else:
+ # no unifying stops
+ self.dataframes["stops"]["similar_stop_id"] = self.dataframes["stops"][
+ "stop_id"
+ ]
+ self.dataframes["stops"]["similar_stop_name"] = self.dataframes["stops"][
+ "stop_name"
+ ]
+ self.dataframes["stops"]["similar_stops_centroid"] = self.dataframes[
+ "stops"
+ ][["stop_lon", "stop_lat"]].values.tolist()
+ self.dataframes["stops"]["position_count"] = 1
+ self.similar_stops_df = self.dataframes["stops"][
+ [
+ "similar_stop_id",
+ "similar_stop_name",
+ "similar_stops_centroid",
+ "position_count",
+ ]
+ ].copy()
+
+ @lru_cache(maxsize=None)
+ def __get_similar_stop_tuple(
+ self, stop_id: str, delimiter="", max_distance_degree=0.01
+ ):
"""
With one stop_id, group stops by parent, stop_id, or stop_name and each distance.
- parent: if stop has parent_station, the 'centroid' is parent_station lat-lon
@@ -277,290 +386,348 @@ def get_similar_stop_tuple(self, stop_id: str, delimiter='', max_distance_degree
Returns:
str, str, [float, float]: similar_stop_id, similar_stop_name, similar_stops_centroid
"""
- stops_df = self.dataframes['stops'].sort_values('stop_id')
- stop = stops_df[stops_df['stop_id'] == stop_id].iloc[0]
-
- if stop['is_parent'] == 1:
- return stop['stop_id'], stop['stop_name'], [stop['stop_lon'], stop['stop_lat']]
-
- if str(stop['parent_station']) != 'nan':
- similar_stop_id = stop['parent_station']
- similar_stop = stops_df[stops_df['stop_id'] == similar_stop_id]
- similar_stop_name = similar_stop[['stop_name']].iloc[0]
- similar_stop_centroid = similar_stop[[
- 'stop_lon', 'stop_lat']].iloc[0].values.tolist()
+ stops_df = self.dataframes["stops"].sort_values("stop_id")
+ stop = stops_df[stops_df["stop_id"] == stop_id].iloc[0]
+
+ if stop["is_parent"] == 1:
+ return (
+ stop["stop_id"],
+ stop["stop_name"],
+ [stop["stop_lon"], stop["stop_lat"]],
+ )
+
+ if str(stop["parent_station"]) != "nan":
+ similar_stop_id = stop["parent_station"]
+ similar_stop = stops_df[stops_df["stop_id"] == similar_stop_id]
+ similar_stop_name = similar_stop[["stop_name"]].iloc[0]
+ similar_stop_centroid = (
+ similar_stop[["stop_lon", "stop_lat"]].iloc[0].values.tolist()
+ )
return similar_stop_id, similar_stop_name, similar_stop_centroid
if delimiter:
- stops_df_id_delimited = self.get_stops_id_delimited(delimiter)
+ stops_df_id_delimited = self.__get_stops_id_delimited(delimiter)
stop_id_prefix = stop_id.rsplit(delimiter, 1)[0]
if stop_id_prefix != stop_id:
similar_stop_id = stop_id_prefix
- seperated_only_stops = stops_df_id_delimited[stops_df_id_delimited['delimited']]
- similar_stops = seperated_only_stops[seperated_only_stops['stop_id_prefix'] == stop_id_prefix][[
- 'stop_name', 'similar_stops_centroid_lon', 'similar_stops_centroid_lat']]
- similar_stop_name = similar_stops[['stop_name']].iloc[0]
- similar_stop_centroid = similar_stops[[
- 'similar_stops_centroid_lon', 'similar_stops_centroid_lat']].values.tolist()[0]
+ seperated_only_stops = stops_df_id_delimited[
+ stops_df_id_delimited["delimited"]
+ ]
+ similar_stops = seperated_only_stops[
+ seperated_only_stops["stop_id_prefix"] == stop_id_prefix
+ ][
+ [
+ "stop_name",
+ "similar_stops_centroid_lon",
+ "similar_stops_centroid_lat",
+ ]
+ ]
+ similar_stop_name = similar_stops[["stop_name"]].iloc[0]
+ similar_stop_centroid = similar_stops[
+ ["similar_stops_centroid_lon", "similar_stops_centroid_lat"]
+ ].values.tolist()[0]
return similar_stop_id, similar_stop_name, similar_stop_centroid
else:
# when cannot seperate stop_id, grouping by name and distance
- stops_df = stops_df_id_delimited[~stops_df_id_delimited['delimited']]
+ stops_df = stops_df_id_delimited[~stops_df_id_delimited["delimited"]]
# grouping by name and distance
- similar_stops = stops_df[stops_df['stop_name'] == stop['stop_name']][[
- 'stop_id', 'stop_name', 'stop_lon', 'stop_lat']]
+ similar_stops = stops_df[stops_df["stop_name"] == stop["stop_name"]][
+ ["stop_id", "stop_name", "stop_lon", "stop_lat"]
+ ]
similar_stops = similar_stops.query(
- f'(stop_lon - {stop["stop_lon"]}) ** 2 + (stop_lat - {stop["stop_lat"]}) ** 2 < {max_distance_degree ** 2}')
- similar_stop_centroid = similar_stops[[
- 'stop_lon', 'stop_lat']].mean().values.tolist()
- similar_stop_id = similar_stops['stop_id'].iloc[0]
- similar_stop_name = stop['stop_name']
+ f'(stop_lon - {stop["stop_lon"]}) ** 2 + (stop_lat - {stop["stop_lat"]}) ** 2 < {max_distance_degree ** 2}'
+ )
+ similar_stop_centroid = (
+ similar_stops[["stop_lon", "stop_lat"]].mean().values.tolist()
+ )
+ similar_stop_id = similar_stops["stop_id"].iloc[0]
+ similar_stop_name = stop["stop_name"]
return similar_stop_id, similar_stop_name, similar_stop_centroid
- def get_similar_stops_by_name_and_distance(self, stop_name, distance):
- similar_stops = self.stops_df[self.stops_df['stop_name'] == stop['stop_name']][[
- 'stop_lon', 'stop_lat']].copy()
- similar_stops = similar_stops.query(
- f'(stop_lon - {stop["stop_lon"]}) ** 2 + (stop_lat - {stop["stop_lat"]}) ** 2 < {max_distance_degree ** 2}')
- return similar_stops
-
- @ lru_cache(maxsize=None)
- def get_stops_id_delimited(self, delimiter):
- stops_df = self.dataframes.get(
- 'stops')[['stop_id', 'stop_name', 'stop_lon', 'stop_lat', 'parent_station']].copy()
- stops_df['stop_id_prefix'] = stops_df['stop_id'].map(
- lambda stop_id: stop_id.rsplit(delimiter, 1)[0])
- stops_df['delimited'] = stops_df['stop_id'] != stops_df['stop_id_prefix']
- grouped_by_prefix = stops_df[[
- 'stop_id_prefix', 'stop_lon', 'stop_lat']].groupby('stop_id_prefix').mean().reset_index()
+ @lru_cache(maxsize=None)
+ def __get_stops_id_delimited(self, delimiter: str):
+ stops_df = self.dataframes.get("stops")[
+ ["stop_id", "stop_name", "stop_lon", "stop_lat", "parent_station"]
+ ].copy()
+ stops_df["stop_id_prefix"] = stops_df["stop_id"].map(
+ lambda stop_id: stop_id.rsplit(delimiter, 1)[0]
+ )
+ stops_df["delimited"] = stops_df["stop_id"] != stops_df["stop_id_prefix"]
+ grouped_by_prefix = (
+ stops_df[["stop_id_prefix", "stop_lon", "stop_lat"]]
+ .groupby("stop_id_prefix")
+ .mean()
+ .reset_index()
+ )
grouped_by_prefix.columns = [
- 'stop_id_prefix', 'similar_stops_centroid_lon', 'similar_stops_centroid_lat']
+ "stop_id_prefix",
+ "similar_stops_centroid_lon",
+ "similar_stops_centroid_lat",
+ ]
stops_df_with_centroid = pd.merge(
- stops_df, grouped_by_prefix, on='stop_id_prefix', how='left')
+ stops_df, grouped_by_prefix, on="stop_id_prefix", how="left"
+ )
return stops_df_with_centroid
- @ classmethod
- def get_route_name_from_tupple(cls, route):
- if not pd.isna(route.route_short_name):
- return route.route_short_name
- elif not pd.isna(route.route_long_name):
- return route.route_long_name
- else:
- ValueError(
- f'{route} have neither "route_long_name" or "route_short_time".')
+ def read_interpolated_stops(self):
+ """
+ Read stops "interpolated" by parent station or stop_id or stop_name and distance.
+ There are many similar stops that are near to each, has same name, or has same prefix in stop_id.
+ In traffic analyzing, it is good for that similar stops to be grouped as same stop.
+ This method group them by some elements, parent, id, name and distance.
- def routes_count(self, no_shapes=False):
- if self.dataframes.get('shapes') is None or no_shapes:
- route_ids = self.dataframes.get('trips')['route_id'].unique()
- return len(route_ids)
- else:
- shape_ids = self.dataframes.get('shapes')['shape_id'].unique()
- return len(shape_ids)
-
- @ lru_cache(maxsize=None)
- def get_shape_ids_on_routes(self):
- trips_with_shape_df = self.dataframes['trips'][[
- 'route_id', 'shape_id']].dropna(subset=['shape_id'])
- group = trips_with_shape_df.groupby('route_id')['shape_id'].unique()
- group.apply(lambda x: x.sort())
- return group
+ Args:
+ delimiter (str, optional): stop_id delimiter, sample_A, sample_B, then delimiter is '_'. Defaults to ''.
+ max_distance_degree (float, optional): distance limit in grouping by stop_name. Defaults to 0.01.
- @ lru_cache(maxsize=None)
- def get_shapes_coordinates(self):
- shapes_df = self.dataframes['shapes'].copy()
- shapes_df.sort_values('shape_pt_sequence')
- shapes_df['pt'] = shapes_df[[
- 'shape_pt_lon', 'shape_pt_lat']].values.tolist()
- return shapes_df.groupby('shape_id')['pt'].apply(list)
+ Returns:
+ [type]: [description]
+ """
- def get_trips_on_a_date(self, yyyymmdd: str):
+ stop_dicts = self.similar_stops_df[
+ [
+ "similar_stop_id",
+ "similar_stop_name",
+ "similar_stops_centroid",
+ "position_count",
+ ]
+ ].to_dict(orient="records")
+ return [
+ {
+ "type": "Feature",
+ "geometry": {
+ "type": "Point",
+ "coordinates": stop["similar_stops_centroid"],
+ },
+ "properties": {
+ "similar_stop_name": stop["similar_stop_name"],
+ "similar_stop_id": stop["similar_stop_id"],
+ "count": stop["position_count"],
+ },
+ }
+ for stop in stop_dicts
+ ]
+
+ def read_route_frequency(self):
"""
- get trips are on service on a date.
+ By grouped stops, aggregate route frequency.
+ Filtering trips by a date, you can aggregate frequency only route serviced on the date.
Args:
- yyyymmdd (str): [description]
+ yyyymmdd (str, optional): date, like 20210401. Defaults to ''.
+ begin_time (str, optional): 'hhmmss' <= departure time, like 030000. Defaults to ''.
+ end_time (str, optional): 'hhmmss' > departure time, like 280000. Defaults to ''.
Returns:
[type]: [description]
"""
- # sunday, monday, tuesday...
- day_of_week = datetime.date(int(yyyymmdd[0:4]), int(
- yyyymmdd[4:6]), int(yyyymmdd[6:8])).strftime('%A').lower()
-
- # filter services by day
- calendar_df = self.dataframes['calendar'].copy()
- calendar_df = calendar_df.astype({'start_date': int, 'end_date': int})
- calendar_df = calendar_df[calendar_df[day_of_week] == '1']
- calendar_df = calendar_df.query(
- f'start_date <= {int(yyyymmdd)} and {int(yyyymmdd)} <= end_date', engine='python')
-
- services_on_a_day = calendar_df[['service_id']]
+ stop_times_df = (
+ self.dataframes.get("stop_times")[
+ ["stop_id", "trip_id", "stop_sequence", "departure_time"]
+ ]
+ .sort_values(["trip_id", "stop_sequence"])
+ .copy()
+ )
- calendar_dates_df = self.dataframes.get('calendar_dates')
- if calendar_dates_df is not None:
- filtered = calendar_dates_df[calendar_dates_df['date'] == yyyymmdd][[
- 'service_id', 'exception_type']]
- to_be_removed_services = filtered[filtered['exception_type'] == '2']
- to_be_appended_services = filtered[filtered['exception_type'] == '1'][[
- 'service_id']]
+ # join agency info)
+ stop_times_df = pd.merge(
+ stop_times_df,
+ self.dataframes["trips"][["trip_id", "route_id"]],
+ on="trip_id",
+ how="left",
+ )
+ stop_times_df = pd.merge(
+ stop_times_df,
+ self.dataframes["routes"][["route_id", "agency_id"]],
+ on="route_id",
+ how="left",
+ )
+ stop_times_df = pd.merge(
+ stop_times_df,
+ self.dataframes["agency"][["agency_id", "agency_name"]],
+ on="agency_id",
+ how="left",
+ )
- services_on_a_day = pd.merge(
- services_on_a_day, to_be_removed_services, on='service_id', how='left')
- services_on_a_day = services_on_a_day[services_on_a_day['exception_type'] != '2']
- services_on_a_day = pd.concat(
- [services_on_a_day, to_be_appended_services])
+ # get prev and next stops_id, stop_name, trip_id
+ stop_times_df = pd.merge(
+ stop_times_df,
+ self.dataframes["stops"][
+ [
+ "stop_id",
+ "similar_stop_id",
+ "similar_stop_name",
+ "similar_stops_centroid",
+ ]
+ ],
+ on="stop_id",
+ how="left",
+ )
+ stop_times_df["prev_stop_id"] = stop_times_df["similar_stop_id"]
+ stop_times_df["prev_trip_id"] = stop_times_df["trip_id"]
+ stop_times_df["prev_stop_name"] = stop_times_df["similar_stop_name"]
+ stop_times_df["prev_similar_stops_centroid"] = stop_times_df[
+ "similar_stops_centroid"
+ ]
+ stop_times_df["next_stop_id"] = stop_times_df["similar_stop_id"].shift(-1)
+ stop_times_df["next_trip_id"] = stop_times_df["trip_id"].shift(-1)
+ stop_times_df["next_stop_name"] = stop_times_df["similar_stop_name"].shift(-1)
+ stop_times_df["next_similar_stops_centroid"] = stop_times_df[
+ "similar_stops_centroid"
+ ].shift(-1)
- services_on_a_day['service_flag'] = 1
+ # drop last stops (-> stops has no next stop)
+ stop_times_df = stop_times_df.drop(
+ index=stop_times_df.query("prev_trip_id != next_trip_id").index
+ )
- # filter trips
- trips_df = self.dataframes['trips'].copy()
- trip_service = pd.merge(trips_df, services_on_a_day, on='service_id')
- trip_service = trip_service[trip_service['service_flag'] == 1]
+ # define path_id by prev-stops-centroid and next-stops-centroid
+ stop_times_df["path_id"] = (
+ stop_times_df["prev_stop_id"]
+ + stop_times_df["next_stop_id"]
+ + stop_times_df["prev_similar_stops_centroid"].map(latlon_to_str)
+ + stop_times_df["next_similar_stops_centroid"].map(latlon_to_str)
+ )
- return trip_service[['trip_id', 'service_flag']]
+ # aggregate path-frequency
+ path_frequency = (
+ stop_times_df[["similar_stop_id", "path_id"]]
+ .groupby("path_id")
+ .count()
+ .reset_index()
+ )
+ path_frequency.columns = ["path_id", "path_count"]
+ path_data = pd.merge(
+ path_frequency,
+ stop_times_df.drop_duplicates(subset="path_id"),
+ on="path_id",
+ )
+ path_data_dict = path_data.to_dict(orient="records")
+
+ return [
+ {
+ "type": "Feature",
+ "geometry": {
+ "type": "LineString",
+ "coordinates": (
+ path["prev_similar_stops_centroid"],
+ path["next_similar_stops_centroid"],
+ ),
+ },
+ "properties": {
+ "frequency": path["path_count"],
+ "prev_stop_id": path["prev_stop_id"],
+ "prev_stop_name": path["prev_stop_name"],
+ "next_stop_id": path["next_stop_id"],
+ "next_stop_name": path["next_stop_name"],
+ "agency_id": path["agency_id"],
+ "agency_name": path["agency_name"],
+ },
+ }
+ for path in path_data_dict
+ ]
- def read_routes(self, no_shapes=False) -> list:
+ def __get_trips_on_a_date(self, yyyymmdd: str):
"""
- read routes by shapes or stop_times
- First, this method try to load shapes and parse it into routes,
- but shapes is optional table in GTFS. Then is shapes does not exist or no_shapes is True,
- this parse routes by stop_time, stops, trips, and routes.
+ get trips are on service on a date.
Args:
- no_shapes (bool, optional): ignore shapes table. Defaults to False.
+ yyyymmdd (str): [description]
Returns:
- [list]: list of GeoJSON-Feature-dict
+ [type]: [description]
"""
- if self.dataframes.get('shapes') is None or no_shapes:
- # no-shape routes
+ # sunday, monday, tuesday...
+ day_of_week = (
+ datetime.date(int(yyyymmdd[0:4]), int(yyyymmdd[4:6]), int(yyyymmdd[6:8]))
+ .strftime("%A")
+ .lower()
+ )
- # trip-route-merge:A
- trips_df = self.dataframes['trips'][['trip_id', 'route_id']]
- routes_df = self.dataframes['routes'][[
- 'route_id', 'route_long_name', 'route_short_name']]
- trips_routes = pd.merge(trips_df, routes_df, on='route_id')
+ # filter services by day
+ calendar_df = self.dataframes["calendar"].copy()
+ calendar_df = calendar_df.astype({"start_date": int, "end_date": int})
+ calendar_df = calendar_df[calendar_df[day_of_week] == "1"]
+ calendar_df = calendar_df.query(
+ f"start_date <= {int(yyyymmdd)} and {int(yyyymmdd)} <= end_date",
+ engine="python",
+ )
- # stop_times-stops-merge:B
- stop_times_df = self.dataframes['stop_times'][[
- 'stop_id', 'trip_id', 'stop_sequence']]
- stops_df = self.dataframes.get(
- 'stops')[['stop_id', 'stop_lon', 'stop_lat']]
- merged = pd.merge(
- stop_times_df, stops_df[['stop_id', 'stop_lon', 'stop_lat']], on='stop_id')
+ services_on_a_day = calendar_df[["service_id"]]
- # A-B-merge
- merged = pd.merge(merged, trips_routes, on='trip_id')
- merged['route_concat_name'] = merged['route_long_name'].fillna('') + \
- merged['route_short_name'].fillna('')
+ calendar_dates_df = self.dataframes.get("calendar_dates")
+ if calendar_dates_df is not None:
+ filtered = calendar_dates_df[calendar_dates_df["date"] == yyyymmdd][
+ ["service_id", "exception_type"]
+ ]
+ to_be_removed_services = filtered[filtered["exception_type"] == "2"]
+ to_be_appended_services = filtered[filtered["exception_type"] == "1"][
+ ["service_id"]
+ ]
- # parse routes
- route_ids = merged['route_id'].unique()
- features = []
- for route_id in route_ids:
- route = merged[merged['route_id'] == route_id]
- trip_id = route['trip_id'].unique()[0]
- route = route[route['trip_id'] ==
- trip_id].sort_values('stop_sequence')
- features.append({
- 'type': 'Feature',
- 'geometry': {
- 'type': 'LineString',
- 'coordinates': route[['stop_lon', 'stop_lat']].values.tolist()
- },
- 'properties': {
- 'route_id': str(route_id),
- 'route_name': route.route_concat_name.values.tolist()[0],
- }
- })
- return features
- else:
- shape_coords = self.get_shapes_coordinates()
- shape_ids_on_routes = self.get_shape_ids_on_routes()
- features = []
- for route in self.dataframes.get('routes').itertuples():
- if shape_ids_on_routes.get(route.route_id) is None:
- continue
- coordinates = [shape_coords.at[shape_id]
- for shape_id in shape_ids_on_routes[route.route_id]]
- route_name = self.get_route_name_from_tupple(route)
- features.append({
- 'type': 'Feature',
- 'geometry': {
- 'type': 'MultiLineString',
- 'coordinates': coordinates
- },
- 'properties': {
- 'route_id': str(route.route_id),
- 'route_name': route_name,
- }
- })
+ services_on_a_day = pd.merge(
+ services_on_a_day, to_be_removed_services, on="service_id", how="left"
+ )
+ services_on_a_day = services_on_a_day[
+ services_on_a_day["exception_type"] != "2"
+ ]
+ services_on_a_day = pd.concat([services_on_a_day, to_be_appended_services])
- # list-up already loaded shape_ids, dropping dupulicates
- loaded_shape_ids = list(set(sum([list(val)
- for val in shape_ids_on_routes], [])))
+ services_on_a_day["service_flag"] = 1
- # load shape_ids unloaded yet
- for shape_id in shape_coords.index:
- if shape_id in loaded_shape_ids:
- continue
- features.append({
- 'type': 'Feature',
- 'geometry': {
- 'type': 'MultiLineString',
- 'coordinates': [shape_coords.at[shape_id]]
- },
- 'properties': {
- 'route_id': None,
- 'route_name': str(shape_id),
- }
- })
- return features
+ # filter trips
+ trips_df = self.dataframes["trips"].copy()
+ trip_service = pd.merge(trips_df, services_on_a_day, on="service_id")
+ trip_service = trip_service[trip_service["service_flag"] == 1]
+
+ return trip_service[["trip_id", "service_flag"]]
if __name__ == "__main__":
import argparse
import shutil
+
parser = argparse.ArgumentParser()
- parser.add_argument('--zip')
- parser.add_argument('--src_dir')
- parser.add_argument('--output_dir')
- parser.add_argument('--no_shapes', action='store_true')
- parser.add_argument('--ignore_no_route', action='store_true')
- parser.add_argument('--frequency', action='store_true')
- parser.add_argument('--yyyymmdd')
- parser.add_argument('--as_unify_stops', action='store_true')
- parser.add_argument('--delimiter')
- parser.add_argument('--begin_time')
- parser.add_argument('--end_time')
+ parser.add_argument("--zip")
+ parser.add_argument("--src_dir")
+ parser.add_argument("--output_dir")
+ parser.add_argument("--no_shapes", action="store_true")
+ parser.add_argument("--ignore_no_route", action="store_true")
+ parser.add_argument("--frequency", action="store_true")
+ parser.add_argument("--yyyymmdd")
+ parser.add_argument("--as_unify_stops", action="store_true")
+ parser.add_argument("--delimiter")
+ parser.add_argument("--begin_time")
+ parser.add_argument("--end_time")
args = parser.parse_args()
if args.zip is None and args.src_dir is None:
- raise RuntimeError('gtfs-jp-parser needs zipfile or src_dir.')
+ raise RuntimeError("gtfs-jp-parser needs zipfile or src_dir.")
if args.yyyymmdd:
if len(args.yyyymmdd) != 8:
raise RuntimeError(
- f'yyyymmdd must be 8 characters string, for example 20210401, your is {args.yyyymmdd} ({len(args.yyyymmdd)} characters)')
+ f"yyyymmdd must be 8 characters string, for example 20210401, your is {args.yyyymmdd} ({len(args.yyyymmdd)} characters)"
+ )
if args.begin_time:
if len(args.begin_time) != 6:
raise RuntimeError(
- f'begin_time must be "hhmmss", your is {args.begin_time}')
+ f'begin_time must be "hhmmss", your is {args.begin_time}'
+ )
if not args.end_time:
- raise RuntimeError('end_time is not set.')
+ raise RuntimeError("end_time is not set.")
if args.end_time:
if len(args.end_time) != 6:
- raise RuntimeError(
- f'end_time must be "hhmmss", your is {args.end_time}')
+ raise RuntimeError(f'end_time must be "hhmmss", your is {args.end_time}')
if not args.begin_time:
- raise RuntimeError('begin_time is not set.')
+ raise RuntimeError("begin_time is not set.")
if args.zip:
- print('extracting zipfile...')
- temp_dir = os.path.join(tempfile.gettempdir(), 'gtfs-jp-parser')
+ print("extracting zipfile...")
+ temp_dir = os.path.join(tempfile.gettempdir(), "gtfs-jp-parser")
if os.path.exists(temp_dir):
shutil.rmtree(temp_dir)
os.mkdir(temp_dir)
@@ -570,42 +737,39 @@ def read_routes(self, no_shapes=False) -> list:
else:
output_dir = args.src_dir
gtfs_parser = GTFSParser(
- output_dir, as_frequency=args.frequency, as_unify_stops=args.as_unify_stops, delimiter=args.delimiter)
+ output_dir,
+ as_frequency=args.frequency,
+ as_unify_stops=args.as_unify_stops,
+ delimiter=args.delimiter,
+ )
- print('GTFS loaded.')
+ print("GTFS loaded.")
if args.output_dir:
output_dir = args.output_dir
if args.frequency:
stops_features = gtfs_parser.read_interpolated_stops()
- stops_geojson = {
- 'type': 'FeatureCollection',
- 'features': stops_features
- }
+ stops_geojson = {"type": "FeatureCollection", "features": stops_features}
routes_features = gtfs_parser.read_route_frequency(
- yyyymmdd=args.yyyymmdd, begin_time=args.begin_time, end_time=args.end_time)
- routes_geojson = {
- 'type': 'FeatureCollection',
- 'features': routes_features
- }
- gtfs_parser.dataframes['stops'][['stop_id', 'stop_name', 'similar_stop_id', 'similar_stop_name']].to_csv(os.path.join(
- output_dir, 'result.csv'), index=False, encoding='cp932')
+ yyyymmdd=args.yyyymmdd, begin_time=args.begin_time, end_time=args.end_time
+ )
+ routes_geojson = {"type": "FeatureCollection", "features": routes_features}
+ gtfs_parser.dataframes["stops"][
+ ["stop_id", "stop_name", "similar_stop_id", "similar_stop_name"]
+ ].to_csv(os.path.join(output_dir, "result.csv"), index=False, encoding="cp932")
else:
routes_features = gtfs_parser.read_routes(no_shapes=args.no_shapes)
- routes_geojson = {
- 'type': 'FeatureCollection',
- 'features': routes_features
- }
- stops_features = gtfs_parser.read_stops(
- ignore_no_route=args.ignore_no_route)
- stops_geojson = {
- 'type': 'FeatureCollection',
- 'features': stops_features
- }
-
- print('writing geojsons...')
- with open(os.path.join(output_dir, 'routes.geojson'), mode='w', encoding='utf-8') as f:
+ routes_geojson = {"type": "FeatureCollection", "features": routes_features}
+ stops_features = gtfs_parser.read_stops(ignore_no_route=args.ignore_no_route)
+ stops_geojson = {"type": "FeatureCollection", "features": stops_features}
+
+ print("writing geojsons...")
+ with open(
+ os.path.join(output_dir, "routes.geojson"), mode="w", encoding="utf-8"
+ ) as f:
json.dump(routes_geojson, f, ensure_ascii=False)
- with open(os.path.join(output_dir, 'stops.geojson'), mode='w', encoding='utf-8') as f:
+ with open(
+ os.path.join(output_dir, "stops.geojson"), mode="w", encoding="utf-8"
+ ) as f:
json.dump(stops_geojson, f, ensure_ascii=False)
diff --git a/gtfs_parser/tests/test_gtfs_parser.py b/gtfs_parser/tests/test_gtfs_parser.py
index bb78f0a..fe6e4c5 100644
--- a/gtfs_parser/tests/test_gtfs_parser.py
+++ b/gtfs_parser/tests/test_gtfs_parser.py
@@ -4,39 +4,43 @@
from gtfs_parser.__main__ import GTFSParser # nopep8
-FIXTURE_DIR = os.path.join(os.path.dirname(
- __file__), 'fixture')
+FIXTURE_DIR = os.path.join(os.path.dirname(__file__), "fixture")
class TestGtfsParser(unittest.TestCase):
gtfs_parser = GTFSParser(FIXTURE_DIR)
gtfs_parser_frequency = GTFSParser(FIXTURE_DIR, as_frequency=True)
- gtfs_parser_frequency_unify = GTFSParser(FIXTURE_DIR,
- as_frequency=True,
- as_unify_stops=True)
+ gtfs_parser_frequency_unify = GTFSParser(
+ FIXTURE_DIR, as_frequency=True, as_unify_stops=True
+ )
def test_init(self):
# 13 txt files are in ./fixture
- self.assertEqual(
- 13, len(glob.glob(os.path.join(FIXTURE_DIR, '*.txt'))))
+ self.assertEqual(13, len(glob.glob(os.path.join(FIXTURE_DIR, "*.txt"))))
# read tables in constants.py
self.assertEqual(12, len(self.gtfs_parser.dataframes.keys()))
# as_frequency: some columns regarding frequency aggregation
self.assertFalse(
- "similar_stop_id" in self.gtfs_parser.dataframes["stops"].columns)
+ "similar_stop_id" in self.gtfs_parser.dataframes["stops"].columns
+ )
self.assertTrue(
- "similar_stop_id" in self.gtfs_parser_frequency.dataframes["stops"].columns)
+ "similar_stop_id" in self.gtfs_parser_frequency.dataframes["stops"].columns
+ )
self.assertTrue(
- "similar_stop_id" in self.gtfs_parser_frequency_unify.dataframes["stops"].columns)
+ "similar_stop_id"
+ in self.gtfs_parser_frequency_unify.dataframes["stops"].columns
+ )
# as_unify: some columns regarding stop-grouping added
+ self.assertFalse("position_id" in self.gtfs_parser.dataframes["stops"].columns)
self.assertFalse(
- "position_id" in self.gtfs_parser.dataframes["stops"].columns)
- self.assertFalse(
- "position_id" in self.gtfs_parser_frequency.dataframes["stops"].columns)
+ "position_id" in self.gtfs_parser_frequency.dataframes["stops"].columns
+ )
self.assertTrue(
- "position_id" in self.gtfs_parser_frequency_unify.dataframes["stops"].columns)
+ "position_id"
+ in self.gtfs_parser_frequency_unify.dataframes["stops"].columns
+ )
def test_read_stops(self):
# list of geojson-feature
@@ -44,12 +48,10 @@ def test_read_stops(self):
# num of stops is not changed by aggregation
self.assertEqual(899, len(self.gtfs_parser_frequency.read_stops()))
# num of stops is not changed by as_unify_stops
- self.assertEqual(
- 899, len(self.gtfs_parser_frequency_unify.read_stops()))
+ self.assertEqual(899, len(self.gtfs_parser_frequency_unify.read_stops()))
# remove no-route stops
- self.assertEqual(
- 896, len(self.gtfs_parser.read_stops(ignore_no_route=True)))
+ self.assertEqual(896, len(self.gtfs_parser.read_stops(ignore_no_route=True)))
def test_read_routes(self):
# num of features in routes.geojson depends on not shapes.txt but routes.txt
@@ -57,47 +59,33 @@ def test_read_routes(self):
self.assertEqual(32, len(self.gtfs_parser.read_routes(no_shapes=True)))
# as_frequency and as_unify make no effect to read_routes()
self.assertEqual(
- 32, len(self.gtfs_parser_frequency.read_routes(no_shapes=True)))
+ 32, len(self.gtfs_parser_frequency.read_routes(no_shapes=True))
+ )
self.assertEqual(
- 32, len(self.gtfs_parser_frequency_unify.read_routes(no_shapes=True)))
+ 32, len(self.gtfs_parser_frequency_unify.read_routes(no_shapes=True))
+ )
def test_read_interpolated_stops(self):
- with self.assertRaises(AttributeError):
+ with self.assertRaises(TypeError):
# read_interpolated_stops() needs as_frequency=True
self.gtfs_parser.read_interpolated_stops()
# read_interpolated_stops unify stops having same lat-lon into one featrure.
# there are no stops having same lat-lon in fixture
- self.assertEqual(
- 899, len(self.gtfs_parser_frequency.read_interpolated_stops()))
+ self.assertEqual(899, len(self.gtfs_parser_frequency.read_interpolated_stops()))
# as_unify means near and similar named stops move into same lat-lon(centroid of them)
self.assertEqual(
- 518, len(self.gtfs_parser_frequency_unify.read_interpolated_stops()))
+ 518, len(self.gtfs_parser_frequency_unify.read_interpolated_stops())
+ )
def test_read_route_frequency(self):
with self.assertRaises(KeyError):
self.gtfs_parser.read_route_frequency()
# each route_frequency feature is drawn between 2 stops
- self.assertEqual(
- 956, len(self.gtfs_parser_frequency.read_route_frequency()))
+ self.assertEqual(956, len(self.gtfs_parser_frequency.read_route_frequency()))
# unify some 'similar' stops into same position, this decrease num of route_frequency features
self.assertEqual(
- 918, len(self.gtfs_parser_frequency_unify.read_route_frequency()))
-
- # out of service of GTFS -> 0
- self.assertEqual(0, len(
- self.gtfs_parser_frequency_unify.read_route_frequency(yyyymmdd="20210530")))
-
- # some routes are not in service on 20210730, Friday
- freq20210730 = self.gtfs_parser_frequency_unify.read_route_frequency(
- yyyymmdd="20210730")
- self.assertEqual(916, len(freq20210730))
- self.assertEqual(114, freq20210730[0]["properties"]["frequency"])
-
- # 20210801 - Sunday
- freq20210801 = self.gtfs_parser_frequency_unify.read_route_frequency(
- yyyymmdd="20210801")
- self.assertEqual(736, len(freq20210801))
- self.assertEqual(62, freq20210801[0]["properties"]["frequency"])
+ 918, len(self.gtfs_parser_frequency_unify.read_route_frequency())
+ )
diff --git a/metadata.txt b/metadata.txt
index 2a85144..b1ac160 100644
--- a/metadata.txt
+++ b/metadata.txt
@@ -6,7 +6,7 @@
name=GTFS-GO
qgisMinimumVersion=3.0
description=The plugin to extract GTFS data and to show routes and stops.
-version=3.0.1
+version=3.1.1
author=MIERUNE Inc.
email=info@mierune.co.jp