diff --git a/.github/workflows/test_gtfs_parser.yml b/.github/workflows/test_gtfs_parser.yml index fba9f17..b1cb0ac 100644 --- a/.github/workflows/test_gtfs_parser.yml +++ b/.github/workflows/test_gtfs_parser.yml @@ -27,5 +27,4 @@ jobs: run: python -m pip install pandas - name: unittest - run: python -m unittest discover gtfs_parser - continue-on-error: true \ No newline at end of file + run: python -m unittest discover gtfs_parser \ No newline at end of file diff --git a/README.md b/README.md index 7f8f759..2546921 100644 --- a/README.md +++ b/README.md @@ -101,5 +101,5 @@ pip install pandas ``` cd GTFS-GO -python -m unittest discover +python -m unittest discover gtfs_parser/tests ``` diff --git a/frequency.qml b/aggregated_routes.qml similarity index 100% rename from frequency.qml rename to aggregated_routes.qml diff --git a/aggregated_stops.qml b/aggregated_stops.qml new file mode 100644 index 0000000..07ed7ac --- /dev/null +++ b/aggregated_stops.qml @@ -0,0 +1,181 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0 + 0 + 0 + diff --git a/frequency_stops.qml b/frequency_stops.qml deleted file mode 100644 index df3165b..0000000 --- a/frequency_stops.qml +++ /dev/null @@ -1,122 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 0 - 0 - 0 - diff --git a/gtfs_go_dialog.py b/gtfs_go_dialog.py index 2125674..376be45 100755 --- a/gtfs_go_dialog.py +++ b/gtfs_go_dialog.py @@ -50,35 +50,32 @@ FILENAME_RESULT_CSV, STOPS_MINIMUM_VISIBLE_SCALE, ) -DATALIST_JSON_PATH = os.path.join( - os.path.dirname(__file__), 'gtfs_go_datalist.json') -TEMP_DIR = os.path.join(tempfile.gettempdir(), 'GTFSGo') -REPOSITORY_ENUM = { - "preset": 0, - "japanDpf": 1 -} +DATALIST_JSON_PATH = os.path.join(os.path.dirname(__file__), "gtfs_go_datalist.json") +TEMP_DIR = os.path.join(tempfile.gettempdir(), "GTFSGo") +REPOSITORY_ENUM = {"preset": 0, "japanDpf": 1} -class GTFSGoDialog(QDialog): +class GTFSGoDialog(QDialog): def __init__(self, iface): """Constructor.""" super().__init__() - self.ui = uic.loadUi(os.path.join(os.path.dirname( - __file__), 'gtfs_go_dialog_base.ui'), self) - with open(DATALIST_JSON_PATH, encoding='utf-8') as f: + self.ui = uic.loadUi( + os.path.join(os.path.dirname(__file__), "gtfs_go_dialog_base.ui"), self + ) + with open(DATALIST_JSON_PATH, encoding="utf-8") as f: self.datalist = json.load(f) self.iface = iface - self.combobox_zip_text = self.tr('---Load local ZipFile---') + self.combobox_zip_text = self.tr("---Load local ZipFile---") self.init_gui() def init_gui(self): # repository combobox + self.repositoryCombobox.addItem(self.tr("Preset"), REPOSITORY_ENUM["preset"]) self.repositoryCombobox.addItem( - self.tr('Preset'), REPOSITORY_ENUM['preset']) - self.repositoryCombobox.addItem( - self.tr('[Japan]GTFS data repository'), REPOSITORY_ENUM['japanDpf']) + self.tr("[Japan]GTFS data repository"), REPOSITORY_ENUM["japanDpf"] + ) # local repository data select combobox self.ui.comboBox.addItem(self.combobox_zip_text, None) @@ -93,19 +90,20 @@ def init_gui(self): self.ui.outputDirFileWidget.fileChanged.connect(self.refresh) self.ui.unifyCheckBox.stateChanged.connect(self.refresh) self.ui.timeFilterCheckBox.stateChanged.connect(self.refresh) - self.ui.simpleRadioButton.clicked.connect(self.refresh) - self.ui.freqRadioButton.clicked.connect(self.refresh) + self.ui.simpleCheckbox.clicked.connect(self.refresh) + self.ui.aggregateCheckbox.clicked.connect(self.refresh) # time filter - validate user input self.ui.beginTimeLineEdit.editingFinished.connect( - lambda: self.validate_time_lineedit(self.ui.beginTimeLineEdit)) + lambda: self.validate_time_lineedit(self.ui.beginTimeLineEdit) + ) self.ui.endTimeLineEdit.editingFinished.connect( - lambda: self.validate_time_lineedit(self.ui.endTimeLineEdit)) + lambda: self.validate_time_lineedit(self.ui.endTimeLineEdit) + ) # set today DateEdit now = datetime.datetime.now() - self.ui.filterByDateDateEdit.setDate( - QDate(now.year, now.month, now.day)) + self.ui.filterByDateDateEdit.setDate(QDate(now.year, now.month, now.day)) self.refresh() @@ -118,8 +116,7 @@ def init_local_repository_gui(self): def init_japan_dpf_gui(self): self.japanDpfResultTableView.clicked.connect(self.refresh) - self.japanDpfResultTableView.setSelectionBehavior( - QAbstractItemView.SelectRows) + self.japanDpfResultTableView.setSelectionBehavior(QAbstractItemView.SelectRows) self.japan_dpf_set_table([]) for idx, header in enumerate(repository.japan_dpf.table.HEADERS): if header in repository.japan_dpf.table.HEADERS_TO_HIDE: @@ -127,16 +124,15 @@ def init_japan_dpf_gui(self): self.japanDpfPrefectureCombobox.addItem(self.tr("any"), None) for prefname in constants.JAPAN_PREFS: - self.japanDpfPrefectureCombobox.addItem( - prefname, prefname) + self.japanDpfPrefectureCombobox.addItem(prefname, prefname) now = datetime.datetime.now() - self.ui.japanDpfTargetDateEdit.setDate( - QDate(now.year, now.month, now.day)) + self.ui.japanDpfTargetDateEdit.setDate(QDate(now.year, now.month, now.day)) self.japanDpfExtentGroupBox.setMapCanvas(iface.mapCanvas()) self.japanDpfExtentGroupBox.setOutputCrs( - QgsCoordinateReferenceSystem("EPSG:4326")) + QgsCoordinateReferenceSystem("EPSG:4326") + ) # TODO: APIでextentパラメータが未実装なので一時的にUIを非表示 self.japanDpfExtentGroupBox.setVisible(False) @@ -158,18 +154,18 @@ def make_combobox_text(self, data): Returns: str: combobox-text """ - return '[' + data["country"] + ']' + '[' + data["region"] + ']' + data["name"] + return "[" + data["country"] + "]" + "[" + data["region"] + "]" + data["name"] def download_zip(self, url: str) -> str: data = urllib.request.urlopen(url).read() - download_path = os.path.join(TEMP_DIR, str(uuid.uuid4()) + '.zip') - with open(download_path, mode='wb') as f: + download_path = os.path.join(TEMP_DIR, str(uuid.uuid4()) + ".zip") + with open(download_path, mode="wb") as f: f.write(data) return download_path def extract_zip(self, zip_path: str) -> str: - extracted_dir = os.path.join(TEMP_DIR, 'extract', str(uuid.uuid4())) + extracted_dir = os.path.join(TEMP_DIR, "extract", str(uuid.uuid4())) os.makedirs(extracted_dir, exist_ok=True) with zipfile.ZipFile(zip_path) as z: z.extractall(extracted_dir) @@ -177,29 +173,41 @@ def extract_zip(self, zip_path: str) -> str: def get_target_feed_infos(self): feed_infos = [] - if self.repositoryCombobox.currentData() == REPOSITORY_ENUM['preset']: + if self.repositoryCombobox.currentData() == REPOSITORY_ENUM["preset"]: if self.ui.comboBox.currentData(): - feed_infos.append({ - "path": self.ui.comboBox.currentData().get("url"), - "group": self.ui.comboBox.currentData().get("name"), - "dir": self.ui.comboBox.currentData().get("name") - }) - elif self.ui.comboBox.currentData() is None and self.ui.zipFileWidget.filePath(): - feed_infos.append({ - "path": self.ui.zipFileWidget.filePath(), - "group": os.path.basename(self.ui.zipFileWidget.filePath()).split(".")[0], - "dir": os.path.basename(self.ui.zipFileWidget.filePath()).split(".")[0] - }) - elif self.repositoryCombobox.currentData() == REPOSITORY_ENUM['japanDpf']: + feed_infos.append( + { + "path": self.ui.comboBox.currentData().get("url"), + "group": self.ui.comboBox.currentData().get("name"), + "dir": self.ui.comboBox.currentData().get("name"), + } + ) + elif ( + self.ui.comboBox.currentData() is None + and self.ui.zipFileWidget.filePath() + ): + feed_infos.append( + { + "path": self.ui.zipFileWidget.filePath(), + "group": os.path.basename( + self.ui.zipFileWidget.filePath() + ).split(".")[0], + "dir": os.path.basename(self.ui.zipFileWidget.filePath()).split( + "." + )[0], + } + ) + elif self.repositoryCombobox.currentData() == REPOSITORY_ENUM["japanDpf"]: selected_rows = self.japanDpfResultTableView.selectionModel().selectedRows() for row in selected_rows: - row_data = self.get_selected_row_data_in_japan_dpf_table( - row.row()) - feed_infos.append({ - "path": row_data["gtfs_url"], - "group": row_data["agency_name"] + "-" + row_data["gtfs_name"], - "dir": row_data["agency_id"] + "-" + row_data["gtfs_id"], - }) + row_data = self.get_selected_row_data_in_japan_dpf_table(row.row()) + feed_infos.append( + { + "path": row_data["gtfs_url"], + "group": row_data["agency_name"] + "-" + row_data["gtfs_name"], + "dir": row_data["agency_id"] + "-" + row_data["gtfs_id"], + } + ) return feed_infos def execution(self): @@ -208,67 +216,115 @@ def execution(self): os.makedirs(TEMP_DIR, exist_ok=True) for feed_info in self.get_target_feed_infos(): - if feed_info["path"].startswith('http'): + if feed_info["path"].startswith("http"): feed_info["path"] = self.download_zip(feed_info["path"]) extracted_dir = self.extract_zip(feed_info["path"]) - output_dir = os.path.join(self.outputDirFileWidget.filePath(), - feed_info["dir"]) + output_dir = os.path.join( + self.outputDirFileWidget.filePath(), feed_info["dir"] + ) os.makedirs(output_dir, exist_ok=True) - if self.ui.simpleRadioButton.isChecked(): + written_files = { + "routes": "", + "stops": "", + "aggregated_routes": "", + "aggregated_stops": "", + "aggregated_csv": "", + } + + if self.ui.simpleCheckbox.isChecked(): gtfs_parser = GTFSParser(extracted_dir) routes_geojson = { - 'type': 'FeatureCollection', - 'features': gtfs_parser.read_routes(no_shapes=self.ui.ignoreShapesCheckbox.isChecked()) + "type": "FeatureCollection", + "features": gtfs_parser.read_routes( + no_shapes=self.ui.ignoreShapesCheckbox.isChecked() + ), } stops_geojson = { - 'type': 'FeatureCollection', - 'features': gtfs_parser.read_stops(ignore_no_route=self.ui.ignoreNoRouteStopsCheckbox.isChecked()) + "type": "FeatureCollection", + "features": gtfs_parser.read_stops( + ignore_no_route=self.ui.ignoreNoRouteStopsCheckbox.isChecked() + ), } - route_filename = 'route.geojson' - stops_filename = 'stops.geojson' - else: + # write + written_files["routes"] = os.path.join(output_dir, "routes.geojson") + written_files["stops"] = os.path.join(output_dir, "stops.geojson") + with open( + written_files["routes"], + mode="w", + encoding="utf-8", + ) as f: + json.dump(routes_geojson, f, ensure_ascii=False) + + with open( + written_files["stops"], + mode="w", + encoding="utf-8", + ) as f: + json.dump(stops_geojson, f, ensure_ascii=False) + + if self.ui.aggregateCheckbox.isChecked(): gtfs_parser = GTFSParser( extracted_dir, - as_frequency=True, + as_frequency=self.ui.aggregateCheckbox.isChecked(), as_unify_stops=self.ui.unifyCheckBox.isChecked(), - delimiter=self.get_delimiter() + delimiter=self.get_delimiter(), + yyyymmdd=self.get_yyyymmdd(), + begin_time=self.get_time_filter(self.ui.beginTimeLineEdit), + end_time=self.get_time_filter(self.ui.endTimeLineEdit), ) - - routes_geojson = { - 'type': 'FeatureCollection', - 'features': gtfs_parser.read_route_frequency(yyyymmdd=self.get_yyyymmdd(), - begin_time=self.get_time_filter( - self.ui.beginTimeLineEdit), - end_time=self.get_time_filter(self.ui.endTimeLineEdit)) + aggregated_routes_geojson = { + "type": "FeatureCollection", + "features": gtfs_parser.read_route_frequency(), } - stops_geojson = { - 'type': 'FeatureCollection', - 'features': gtfs_parser.read_interpolated_stops() + aggregated_stops_geojson = { + "type": "FeatureCollection", + "features": gtfs_parser.read_interpolated_stops(), } - route_filename = 'frequency.geojson' - stops_filename = 'frequency_stops.geojson' - - # write stop_id conversion result csv - with open(os.path.join(output_dir, FILENAME_RESULT_CSV), mode="w", encoding="cp932", errors="ignore")as f: - gtfs_parser.dataframes['stops'][[ - 'stop_id', 'stop_name', 'similar_stop_id', 'similar_stop_name']].to_csv(f, index=False) - - with open(os.path.join(output_dir, route_filename), mode='w', encoding='utf-8') as f: - json.dump(routes_geojson, f, ensure_ascii=False) - with open(os.path.join(output_dir, stops_filename), mode='w', encoding='utf-8') as f: - json.dump(stops_geojson, f, ensure_ascii=False) - - self.show_geojson(output_dir, - stops_filename, - route_filename, - feed_info["group"]) + # write + written_files["aggregated_routes"] = os.path.join( + output_dir, "aggregated_routes.geojson" + ) + written_files["aggregated_stops"] = os.path.join( + output_dir, "aggregated_stops.geojson" + ) + written_files["aggregated_csv"] = os.path.join(output_dir, "result.csv") + with open( + written_files["aggregated_stops"], + mode="w", + encoding="utf-8", + ) as f: + json.dump(aggregated_stops_geojson, f, ensure_ascii=False) + with open( + written_files["aggregated_routes"], + mode="w", + encoding="utf-8", + ) as f: + json.dump(aggregated_routes_geojson, f, ensure_ascii=False) + with open( + written_files["aggregated_csv"], + mode="w", + encoding="cp932", + errors="ignore", + ) as f: + gtfs_parser.dataframes["stops"][ + ["stop_id", "stop_name", "similar_stop_id", "similar_stop_name"] + ].to_csv(f, index=False) + + self.show_geojson( + feed_info["group"], + written_files["stops"], + written_files["routes"], + written_files["aggregated_stops"], + written_files["aggregated_routes"], + written_files["aggregated_csv"], + ) def get_yyyymmdd(self): if not self.ui.filterByDateCheckBox.isChecked(): - return '' + return "" date = self.ui.filterByDateDateEdit.date() yyyy = str(date.year()).zfill(4) mm = str(date.month()).zfill(2) @@ -277,75 +333,124 @@ def get_yyyymmdd(self): def get_delimiter(self): if not self.ui.unifyCheckBox.isChecked(): - return '' + return "" if not self.ui.delimiterCheckBox.isChecked(): - return '' + return "" return self.ui.delimiterLineEdit.text() def get_time_filter(self, lineEdit): if not self.ui.timeFilterCheckBox.isChecked(): - return '' - return lineEdit.text().replace(':', '') - - def show_geojson(self, geojson_dir: str, stops_filename: str, route_filename: str, group_name: str): - # these geojsons will already have been generated - stops_geojson = os.path.join(geojson_dir, stops_filename) - routes_geojson = os.path.join(geojson_dir, route_filename) - - stops_vlayer = QgsVectorLayer( - stops_geojson, stops_filename.split('.')[0], 'ogr') - routes_vlayer = QgsVectorLayer( - routes_geojson, route_filename.split('.')[0], 'ogr') - - # make and set labeling for stops - stops_labeling = get_labeling_for_stops( - target_field_name="stop_name" if self.ui.simpleRadioButton.isChecked() else "similar_stop_name") - stops_vlayer.setLabelsEnabled(True) - stops_vlayer.setLabeling(stops_labeling) - - # adjust layer visibility - stops_vlayer.setMinimumScale(STOPS_MINIMUM_VISIBLE_SCALE) - stops_vlayer.setScaleBasedVisibility(True) - - # there are two type route renderer, normal, frequency - if self.ui.simpleRadioButton.isChecked(): - routes_renderer = Renderer(routes_vlayer, 'route_name') + return "" + return lineEdit.text().replace(":", "") + + def show_geojson( + self, + group_name: str, + stops_geojson: str, + routes_geojson: str, + aggregated_stops_geojson: str, + aggregated_routes_geojson: str, + aggregated_csv: str, + ): + root = QgsProject().instance().layerTreeRoot() + group = root.insertGroup(0, group_name) + group.setExpanded(True) + + if routes_geojson != "": + routes_vlayer = QgsVectorLayer( + routes_geojson, os.path.basename(routes_geojson).split(".")[0], "ogr" + ) + routes_renderer = Renderer(routes_vlayer, "route_name") routes_vlayer.setRenderer(routes_renderer.make_renderer()) - added_layers = [routes_vlayer, stops_vlayer] - stops_renderer = Renderer(stops_vlayer, 'stop_name') + + QgsProject.instance().addMapLayer(routes_vlayer, False) + group.insertLayer(0, routes_vlayer) + + if stops_geojson != "": + stops_vlayer = QgsVectorLayer( + stops_geojson, os.path.basename(stops_geojson).split(".")[0], "ogr" + ) + # make and set labeling for stops + stops_labeling = get_labeling_for_stops("stop_names") + stops_vlayer.setLabelsEnabled(True) + stops_vlayer.setLabeling(stops_labeling) + + # adjust layer visibility + stops_vlayer.setMinimumScale(STOPS_MINIMUM_VISIBLE_SCALE) + stops_vlayer.setScaleBasedVisibility(True) + + stops_renderer = Renderer(stops_vlayer, "stop_name") stops_vlayer.setRenderer(stops_renderer.make_renderer()) - else: - # frequency mode - routes_vlayer.loadNamedStyle(os.path.join( - os.path.dirname(__file__), 'frequency.qml')) - stops_vlayer.loadNamedStyle(os.path.join( - os.path.dirname(__file__), 'frequency_stops.qml')) - csv_vlayer = QgsVectorLayer(os.path.join( - geojson_dir, FILENAME_RESULT_CSV), FILENAME_RESULT_CSV, 'ogr') - added_layers = [routes_vlayer, stops_vlayer, csv_vlayer] - - # add two layers as a group - self.add_layers_as_group(group_name, added_layers) + + QgsProject.instance().addMapLayer(stops_vlayer, False) + group.insertLayer(0, stops_vlayer) + + if aggregated_routes_geojson != "": + aggregated_routes_vlayer = QgsVectorLayer( + aggregated_routes_geojson, + os.path.basename(aggregated_routes_geojson).split(".")[0], + "ogr", + ) + aggregated_routes_vlayer.loadNamedStyle( + os.path.join(os.path.dirname(__file__), "aggregated_routes.qml") + ) + + QgsProject.instance().addMapLayer(aggregated_routes_vlayer, False) + group.insertLayer(0, aggregated_routes_vlayer) + + if aggregated_stops_geojson != "": + aggregated_stops_vlayer = QgsVectorLayer( + aggregated_stops_geojson, + os.path.basename(aggregated_stops_geojson).split(".")[0], + "ogr", + ) + aggregated_stops_vlayer.loadNamedStyle( + os.path.join(os.path.dirname(__file__), "aggregated_stops.qml") + ) + + QgsProject.instance().addMapLayer(aggregated_stops_vlayer, False) + group.insertLayer(0, aggregated_stops_vlayer) + + if aggregated_csv != "": + aggregated_csv_vlayer = QgsVectorLayer( + aggregated_csv, + os.path.basename(aggregated_csv).split(".")[0], + "ogr", + ) + + QgsProject.instance().addMapLayer(aggregated_csv_vlayer, False) + group.insertLayer(0, aggregated_csv_vlayer) self.iface.messageBar().pushInfo( - self.tr('finish'), - self.tr('generated geojson files: ') + geojson_dir) + self.tr("finish"), self.tr("generated geojson files: ") + ) self.ui.close() def refresh(self): self.localDataSelectAreaWidget.setVisible( - self.repositoryCombobox.currentData() == REPOSITORY_ENUM['preset']) + self.repositoryCombobox.currentData() == REPOSITORY_ENUM["preset"] + ) self.japanDpfDataSelectAreaWidget.setVisible( - self.repositoryCombobox.currentData() == REPOSITORY_ENUM['japanDpf']) + self.repositoryCombobox.currentData() == REPOSITORY_ENUM["japanDpf"] + ) # idiom to shrink window to fit its content self.resize(0, 0) self.adjustSize() self.ui.zipFileWidget.setEnabled( - self.ui.comboBox.currentText() == self.combobox_zip_text) - self.ui.pushButton.setEnabled((len(self.get_target_feed_infos()) > 0) and - (self.ui.outputDirFileWidget.filePath() != '')) + self.ui.comboBox.currentText() == self.combobox_zip_text + ) + + # set executable + self.ui.pushButton.setEnabled( + (len(self.get_target_feed_infos()) > 0) + and (self.ui.outputDirFileWidget.filePath() != "") + and ( + self.ui.simpleCheckbox.isChecked() + or self.ui.aggregateCheckbox.isChecked() + ) + ) # stops unify mode is_unify = self.ui.unifyCheckBox.isChecked() @@ -357,35 +462,15 @@ def refresh(self): self.ui.beginTimeLineEdit.setEnabled(has_time_filter) self.ui.endTimeLineEdit.setEnabled(has_time_filter) - # radio button - mode toggle - self.ui.simpleFrame.setEnabled(self.ui.simpleRadioButton.isChecked()) - self.ui.freqFrame.setEnabled(self.ui.freqRadioButton.isChecked()) - - def add_layers_as_group(self, group_name: str, layers: [QgsMapLayer]): - """ - add layers into project as a group. - the order of layers is reverse to layers list order. - if layers: [layer_A, layer_B, layer_C] - then in tree: - - layer_C - - layer_B - - layer_A - - Args: - group_name (str): [description] - layers ([type]): [description] - """ - root = QgsProject().instance().layerTreeRoot() - group = root.insertGroup(0, group_name) - group.setExpanded(True) - for layer in layers: - QgsProject.instance().addMapLayer(layer, False) - group.insertLayer(0, layer) + # mode toggle + self.ui.simpleFrame.setEnabled(self.ui.simpleCheckbox.isChecked()) + self.ui.freqFrame.setEnabled(self.ui.aggregateCheckbox.isChecked()) @staticmethod def validate_time_lineedit(lineedit): - digits = ''.join( - list(filter(lambda char: char.isdigit(), list(lineedit.text())))).ljust(6, "0")[-6:] + digits = "".join( + list(filter(lambda char: char.isdigit(), list(lineedit.text()))) + ).ljust(6, "0")[-6:] # limit to 29:59:59 hh = str(min(29, int(digits[0:2]))).zfill(2) @@ -405,20 +490,35 @@ def japan_dpf_search(self): mm = str(target_date.month()).zfill(2) dd = str(target_date.day()).zfill(2) - extent = None if self.japanDpfExtentGroupBox.outputExtent().isEmpty( - ) else self.japanDpfExtentGroupBox.outputExtent().toString().replace(" : ", ",") + extent = ( + None + if self.japanDpfExtentGroupBox.outputExtent().isEmpty() + else self.japanDpfExtentGroupBox.outputExtent() + .toString() + .replace(" : ", ",") + ) - pref = None if self.japanDpfPrefectureCombobox.currentData( - ) is None else urllib.parse.quote(self.japanDpfPrefectureCombobox.currentData()) + pref = ( + None + if self.japanDpfPrefectureCombobox.currentData() is None + else urllib.parse.quote(self.japanDpfPrefectureCombobox.currentData()) + ) try: - results = repository.japan_dpf.api.get_feeds(yyyy+mm+dd, - extent=extent, - pref=pref) + results = repository.japan_dpf.api.get_feeds( + yyyy + mm + dd, extent=extent, pref=pref + ) self.japan_dpf_set_table(results) except Exception as e: QMessageBox.information( - self, self.tr('Error'), self.tr('Error occured, please check:\n- Internet connection.\n- Repository-server') + "\n\n" + e) + self, + self.tr("Error"), + self.tr( + "Error occured, please check:\n- Internet connection.\n- Repository-server" + ) + + "\n\n" + + e, + ) finally: self.japanDpfSearchButton.setEnabled(True) self.japanDpfSearchButton.setText(self.tr("Search")) @@ -438,6 +538,7 @@ def japan_dpf_set_table(self, results: list): def get_selected_row_data_in_japan_dpf_table(self, row: int): data = {} for col_idx, col_name in enumerate(repository.japan_dpf.table.HEADERS): - data[col_name] = self.japanDpfResultTableView.model().index(row, - col_idx).data() + data[col_name] = ( + self.japanDpfResultTableView.model().index(row, col_idx).data() + ) return data diff --git a/gtfs_go_dialog_base.ui b/gtfs_go_dialog_base.ui index d1cdec4..ee5448b 100755 --- a/gtfs_go_dialog_base.ui +++ b/gtfs_go_dialog_base.ui @@ -6,8 +6,8 @@ 0 0 - 543 - 808 + 592 + 1074 @@ -123,10 +123,10 @@ - + 20 - + 20 @@ -229,7 +229,7 @@ - + simple routes and stops @@ -290,13 +290,10 @@ - + aggregate route frequency - - false - diff --git a/gtfs_parser/__main__.py b/gtfs_parser/__main__.py index c0bd8af..4dfaa96 100644 --- a/gtfs_parser/__main__.py +++ b/gtfs_parser/__main__.py @@ -16,82 +16,67 @@ def latlon_to_str(latlon): - return ''.join(list(map(lambda coord: str(round(coord, 4)), latlon))) + return "".join(list(map(lambda coord: str(round(coord, 4)), latlon))) class GTFSParser: - def __init__(self, - src_dir: str, - as_frequency=False, - as_unify_stops=False, - delimiter='', - max_distance_degree=0.01): - - txts = glob.glob(os.path.join( - src_dir, '**', '*.txt'), recursive=True) - self.dataframes = {} - for txt in txts: - datatype = os.path.basename(txt).split('.')[0] - if os.path.basename(datatype) not in GTFS_DATATYPES: - print(f'{datatype} is not specified in GTFS, skipping...') + def __init__( + self, + src_dir: str, + as_frequency=False, + as_unify_stops=False, + delimiter="", + max_distance_degree=0.01, + yyyymmdd="", + begin_time="", + end_time="", + ): + txts = glob.glob(os.path.join(src_dir, "**", "*.txt"), recursive=True) + self.dataframes = self.__load_tables(txts) + + self.similar_stops_df = None + if as_frequency: + self.__aggregate_similar_stops( + delimiter, + max_distance_degree, + as_unify_stops, + yyyymmdd=yyyymmdd, + begin_time=begin_time, + end_time=end_time, + ) + + @staticmethod + def __load_tables(text_files: list) -> dict: + tables = {} + for txt in text_files: + datatype = os.path.basename(txt).split(".")[0] + if datatype not in GTFS_DATATYPES: + print(f"{datatype} is not specified in GTFS, skipping...") continue - with open(txt, encoding='utf-8_sig') as t: - df = pd.read_csv(t, dtype=str) + with open(txt, encoding="utf-8_sig") as f: + df = pd.read_csv(f, dtype=str) if len(df) == 0: - print(f'{datatype}.txt is empty, skipping...') + print(f"{datatype}.txt is empty, skipping...") continue - self.dataframes[os.path.basename(txt).split('.')[0]] = df + tables[os.path.basename(txt).split(".")[0]] = df for datatype in GTFS_DATATYPES: - if GTFS_DATATYPES[datatype]['required'] and \ - datatype not in self.dataframes: - raise FileNotFoundError(f'{datatype} is not exists.') - - # cast some numeric value columns to int or float - self.dataframes['stops'] = self.dataframes['stops'].astype( - {'stop_lon': float, 'stop_lat': float}) - self.dataframes['stop_times'] = self.dataframes['stop_times'].astype({ - 'stop_sequence': int}) - if self.dataframes.get('shapes') is not None: - self.dataframes['shapes'] = self.dataframes['shapes'].astype( - {'shape_pt_lon': float, 'shape_pt_lat': float, 'shape_pt_sequence': int}) - - if 'parent_station' not in self.dataframes.get('stops').columns: - # parent_station is optional column on GTFS but use in this module - # when parent_station is not in stops, fill by 'nan' (not NaN) - self.dataframes['stops']['parent_station'] = 'nan' + if GTFS_DATATYPES[datatype]["required"] and datatype not in tables: + raise FileNotFoundError(f"{datatype} is not exists.") - if as_frequency: - self.similar_stops_df = None - if as_unify_stops: - self.aggregate_similar_stops(delimiter, max_distance_degree) - else: - # no unifying stops - self.dataframes['stops']['similar_stop_id'] = self.dataframes['stops']['stop_id'] - self.dataframes['stops']['similar_stop_name'] = self.dataframes['stops']['stop_name'] - self.dataframes['stops']['similar_stops_centroid'] = self.dataframes['stops'][[ - 'stop_lon', 'stop_lat']].values.tolist() - self.similar_stops_df = self.dataframes['stops'][[ - 'similar_stop_id', 'similar_stop_name', 'similar_stops_centroid']].copy() - - def aggregate_similar_stops(self, delimiter, max_distance_degree): - parent_ids = self.dataframes['stops']['parent_station'].unique() - self.dataframes['stops']['is_parent'] = self.dataframes['stops']['stop_id'].map( - lambda stop_id: 1 if stop_id in parent_ids else 0) - - self.dataframes['stops'][['similar_stop_id', 'similar_stop_name', 'similar_stops_centroid']] = self.dataframes['stops']['stop_id'].map( - lambda stop_id: self.get_similar_stop_tuple(stop_id, delimiter, max_distance_degree)).apply(pd.Series) - self.dataframes['stops']['position_id'] = self.dataframes['stops']['similar_stops_centroid'].map( - latlon_to_str) - self.dataframes['stops']['unique_id'] = self.dataframes['stops']['similar_stop_id'] + \ - self.dataframes['stops']['position_id'] - - # sometimes stop_name accidently becomes pd.Series instead of str. - self.dataframes['stops']['similar_stop_name'] = self.dataframes['stops']['similar_stop_name'].map( - lambda val: val if type(val) == str else val.stop_name) - - self.similar_stops_df = self.dataframes['stops'].drop_duplicates( - subset='unique_id')[[ - 'position_id', 'similar_stop_id', 'similar_stop_name', 'similar_stops_centroid']].copy() + # cast some numeric columns from str to numeric + tables["stops"] = tables["stops"].astype({"stop_lon": float, "stop_lat": float}) + tables["stop_times"] = tables["stop_times"].astype({"stop_sequence": int}) + if tables.get("shapes") is not None: + tables["shapes"] = tables["shapes"].astype( + {"shape_pt_lon": float, "shape_pt_lat": float, "shape_pt_sequence": int} + ) + + # parent_station is optional column on GTFS but use in this module + # when parent_station is not in stops, fill by 'nan' (not NaN) + if "parent_station" not in tables.get("stops").columns: + tables["stops"]["parent_station"] = "nan" + + return tables def read_stops(self, ignore_no_route=False) -> list: """ @@ -104,167 +89,291 @@ def read_stops(self, ignore_no_route=False) -> list: list: [description] """ - stops_df = self.dataframes['stops'][[ - 'stop_id', 'stop_lat', 'stop_lon', 'stop_name']] - route_id_on_stops = self.get_route_ids_on_stops() + # get unique list of route_id related to each stop + stop_times_trip_df = pd.merge( + self.dataframes["stop_times"], + self.dataframes["trips"], + on="trip_id", + ) + route_ids_on_stops = stop_times_trip_df.groupby("stop_id")["route_id"].unique() + route_ids_on_stops.apply(lambda x: x.sort()) + # parse stops to GeoJSON-Features features = [] - for stop in stops_df.itertuples(): - if stop.stop_id in route_id_on_stops: - route_ids = route_id_on_stops.at[stop.stop_id].tolist() - else: - if ignore_no_route: - continue - route_ids = [] + for stop in self.dataframes["stops"][ + ["stop_id", "stop_lat", "stop_lon", "stop_name"] + ].itertuples(): + # get all route_id related to the stop + route_ids = [] + if stop.stop_id in route_ids_on_stops: + route_ids = route_ids_on_stops.at[stop.stop_id].tolist() + + if len(route_ids) == 0 and ignore_no_route: + # skip to output the stop + continue - features.append({ - 'type': 'Feature', - 'geometry': { - 'type': 'Point', - 'coordinates': [stop.stop_lon, stop.stop_lat] - }, - 'properties': { - 'stop_id': stop.stop_id, - 'stop_name': stop.stop_name, - 'route_ids': route_ids + features.append( + { + "type": "Feature", + "geometry": { + "type": "Point", + "coordinates": (stop.stop_lon, stop.stop_lat), + }, + "properties": { + "stop_id": stop.stop_id, + "stop_name": stop.stop_name, + "route_ids": route_ids, + }, } - }) + ) return features - def get_route_ids_on_stops(self): - stop_times_trip_df = pd.merge( - self.dataframes['stop_times'], - self.dataframes['trips'], - on='trip_id', - ) - group = stop_times_trip_df.groupby('stop_id')['route_id'].unique() - group.apply(lambda x: x.sort()) - return group - - def read_interpolated_stops(self): + def read_routes(self, no_shapes=False) -> list: """ - Read stops "interpolated" by parent station or stop_id or stop_name and distance. - There are many similar stops that are near to each, has same name, or has same prefix in stop_id. - In traffic analyzing, it is good for that similar stops to be grouped as same stop. - This method group them by some elements, parent, id, name and distance. + read routes by shapes or stop_times + First, this method try to load shapes and parse it into routes, + but shapes is optional table in GTFS. Then is shapes does not exist or no_shapes is True, + this parse routes by stop_time, stops, trips, and routes. Args: - delimiter (str, optional): stop_id delimiter, sample_A, sample_B, then delimiter is '_'. Defaults to ''. - max_distance_degree (float, optional): distance limit in grouping by stop_name. Defaults to 0.01. + no_shapes (bool, optional): ignore shapes table. Defaults to False. Returns: - [type]: [description] + [list]: list of GeoJSON-Feature-dict """ + features = [] - stop_dicts = self.similar_stops_df[[ - 'similar_stop_id', 'similar_stop_name', 'similar_stops_centroid']].to_dict(orient='records') - return [{ - 'type': 'Feature', - 'geometry': { - 'type': 'Point', - 'coordinates': stop['similar_stops_centroid'] - }, - 'properties': { - 'similar_stop_name': stop['similar_stop_name'], - 'similar_stop_id': stop['similar_stop_id'], - } - } for stop in stop_dicts] + if self.dataframes.get("shapes") is None or no_shapes: + # trip-route-merge:A + trips_routes = pd.merge( + self.dataframes["trips"][["trip_id", "route_id"]], + self.dataframes["routes"][ + ["route_id", "route_long_name", "route_short_name"] + ], + on="route_id", + ) - def read_route_frequency(self, yyyymmdd='', begin_time='', end_time=''): - """ - By grouped stops, aggregate route frequency. - Filtering trips by a date, you can aggregate frequency only route serviced on the date. + # stop_times-stops-merge:B + stop_times_stop = pd.merge( + self.dataframes["stop_times"][["stop_id", "trip_id", "stop_sequence"]], + self.dataframes.get("stops")[["stop_id", "stop_lon", "stop_lat"]], + on="stop_id", + ) - Args: - yyyymmdd (str, optional): date, like 20210401. Defaults to ''. - begin_time (str, optional): 'hhmmss' <= departure time, like 030000. Defaults to ''. - end_time (str, optional): 'hhmmss' > departure time, like 280000. Defaults to ''. + # A-B-merge + merged = pd.merge(stop_times_stop, trips_routes, on="trip_id") + merged["route_concat_name"] = merged["route_long_name"].fillna("") + merged[ + "route_short_name" + ].fillna("") - Returns: - [type]: [description] - """ - stop_times_df = self.dataframes.get( - 'stop_times')[['stop_id', 'trip_id', 'stop_sequence', 'departure_time']].sort_values( - ['trip_id', 'stop_sequence']).copy() + # parse routes + for route_id in merged["route_id"].unique(): + route = merged[merged["route_id"] == route_id] + trip_id = route["trip_id"].unique()[0] + route = route[route["trip_id"] == trip_id].sort_values("stop_sequence") + features.append( + { + "type": "Feature", + "geometry": { + "type": "LineString", + "coordinates": route[ + ["stop_lon", "stop_lat"] + ].values.tolist(), + }, + "properties": { + "route_id": str(route_id), + "route_name": route.route_concat_name.values.tolist()[0], + }, + } + ) + else: + # parse shape.txt to GeoJSON-Features + shape_coords = self.__get_shapes_coordinates() + shape_ids_on_routes = self.__get_shape_ids_on_routes() + # list-up already loaded shape_ids + loaded_shape_ids = set() + for route in self.dataframes.get("routes").itertuples(): + if shape_ids_on_routes.get(route.route_id) is None: + continue - # filter stop_times by whether serviced or not - if yyyymmdd: - trips_filtered_by_day = self.get_trips_on_a_date(yyyymmdd) - stop_times_df = pd.merge( - stop_times_df, trips_filtered_by_day, on='trip_id', how='left') - stop_times_df = stop_times_df[stop_times_df['service_flag'] == 1] + # get coords by route_id + coordinates = [] + for shape_id in shape_ids_on_routes[route.route_id]: + coordinates.append(shape_coords.at[shape_id]) + loaded_shape_ids.add(shape_id) # update loaded shape_ids + + route_name = self.__get_route_name_from_tupple(route) + features.append( + { + "type": "Feature", + "geometry": { + "type": "MultiLineString", + "coordinates": coordinates, + }, + "properties": { + "route_id": str(route.route_id), + "route_name": route_name, + }, + } + ) + + # load shapes unloaded yet + for shape_id in list( + filter(lambda id: id not in loaded_shape_ids, shape_coords.index) + ): + features.append( + { + "type": "Feature", + "geometry": { + "type": "MultiLineString", + "coordinates": [shape_coords.at[shape_id]], + }, + "properties": { + "route_id": None, + "route_name": str(shape_id), + }, + } + ) - # join agency info) - stop_times_df = pd.merge(stop_times_df, self.dataframes['trips'][[ - 'trip_id', 'route_id']], on='trip_id', how='left') - stop_times_df = pd.merge(stop_times_df, self.dataframes['routes'][[ - 'route_id', 'agency_id']], on='route_id', how='left') - stop_times_df = pd.merge(stop_times_df, self.dataframes['agency'][[ - 'agency_id', 'agency_name']], on='agency_id', how='left') + return features - # get prev and next stops_id, stop_name, trip_id - stop_times_df = pd.merge(stop_times_df, self.dataframes['stops'][[ - 'stop_id', 'similar_stop_id', 'similar_stop_name', 'similar_stops_centroid']], on='stop_id', how='left') - stop_times_df['prev_stop_id'] = stop_times_df['similar_stop_id'] - stop_times_df['prev_trip_id'] = stop_times_df['trip_id'] - stop_times_df['prev_stop_name'] = stop_times_df['similar_stop_name'] - stop_times_df['prev_similar_stops_centroid'] = stop_times_df['similar_stops_centroid'] - stop_times_df['next_stop_id'] = stop_times_df['similar_stop_id'].shift( - -1) - stop_times_df['next_trip_id'] = stop_times_df['trip_id'].shift(-1) - stop_times_df['next_stop_name'] = stop_times_df['similar_stop_name'].shift( - -1) - stop_times_df['next_similar_stops_centroid'] = stop_times_df['similar_stops_centroid'].shift( - -1) + @staticmethod + def __get_route_name_from_tupple(route): + if not pd.isna(route.route_short_name): + return route.route_short_name + elif not pd.isna(route.route_long_name): + return route.route_long_name + else: + ValueError(f'{route} have neither "route_long_name" or "route_short_time".') - # drop last stops (-> stops has no next stop) - stop_times_df = stop_times_df.drop( - index=stop_times_df.query('prev_trip_id != next_trip_id').index) + def __get_shape_ids_on_routes(self): + trips_with_shape_df = self.dataframes["trips"][["route_id", "shape_id"]].dropna( + subset=["shape_id"] + ) + group = trips_with_shape_df.groupby("route_id")["shape_id"].unique() + group.apply(lambda x: x.sort()) + return group + + def __get_shapes_coordinates(self): + shapes_df = self.dataframes["shapes"].copy() + shapes_df.sort_values("shape_pt_sequence") + shapes_df["pt"] = shapes_df[["shape_pt_lon", "shape_pt_lat"]].values.tolist() + return shapes_df.groupby("shape_id")["pt"].apply(tuple) + + def __aggregate_similar_stops( + self, + delimiter: str, + max_distance_degree: float, + as_unify_stops: bool, + yyyymmdd="", + begin_time="", + end_time="", + ): + # filter stop_times by whether serviced or not + if yyyymmdd: + trips_filtered_by_day = self.__get_trips_on_a_date(yyyymmdd) + self.dataframes["stop_times"] = pd.merge( + self.dataframes["stop_times"], + trips_filtered_by_day, + on="trip_id", + how="left", + ) + self.dataframes["stop_times"] = self.dataframes["stop_times"][ + self.dataframes["stop_times"]["service_flag"] == 1 + ] # time filter if begin_time and end_time: - stop_times_df = self.stop_time_filter(stop_times_df, begin_time, end_time) - - # define path_id by prev-stops-centroid and next-stops-centroid - stop_times_df['path_id'] = stop_times_df['prev_stop_id'] + stop_times_df['next_stop_id'] + stop_times_df['prev_similar_stops_centroid'].map( - latlon_to_str) + stop_times_df['next_similar_stops_centroid'].map(latlon_to_str) - - # aggregate path-frequency - path_frequency = stop_times_df[['similar_stop_id', 'path_id']].groupby( - 'path_id').count().reset_index() - path_frequency.columns = ['path_id', 'path_count'] - path_data = pd.merge(path_frequency, stop_times_df.drop_duplicates( - subset='path_id'), on='path_id') - path_data_dict = path_data.to_dict(orient='records') - - return [{ - 'type': 'Feature', - 'geometry': { - 'type': 'LineString', - 'coordinates': (path['prev_similar_stops_centroid'], - path['next_similar_stops_centroid']) - }, - 'properties': { - 'frequency': path['path_count'], - 'prev_stop_id': path['prev_stop_id'], - 'prev_stop_name': path['prev_stop_name'], - 'next_stop_id': path['next_stop_id'], - 'next_stop_name': path['next_stop_name'], - 'agency_id':path['agency_id'], - 'agency_name': path['agency_name'] - } - } for path in path_data_dict] - - def stop_time_filter(self, stop_time_df, begin_time, end_time): - # departure_time is nullable and expressed in "hh:mm:ss" or "h:mm:ss" format. - # Hour can be mor than 24. - # Therefore, drop null records and convert times to integers. - df = stop_time_df[stop_time_df.departure_time != ''] - int_dep_times = stop_time_df.departure_time.str.replace(':', '').astype(int) - return df[(int_dep_times >= int(begin_time)) & (int_dep_times < int(end_time))] - - @ lru_cache(maxsize=None) - def get_similar_stop_tuple(self, stop_id: str, delimiter='', max_distance_degree=0.01): + # departure_time is nullable and expressed in "hh:mm:ss" or "h:mm:ss" format. + # Hour can be mor than 24. + # Therefore, drop null records and convert times to integers. + int_dep_times = ( + self.dataframes["stop_times"] + .departure_time.str.replace(":", "") + .astype(int) + ) + self.dataframes["stop_times"] = self.dataframes["stop_times"][ + self.dataframes["stop_times"].departure_time != "" + ][(int_dep_times >= int(begin_time)) & (int_dep_times < int(end_time))] + + if as_unify_stops: + parent_ids = self.dataframes["stops"]["parent_station"].unique() + self.dataframes["stops"]["is_parent"] = self.dataframes["stops"][ + "stop_id" + ].map(lambda stop_id: 1 if stop_id in parent_ids else 0) + + self.dataframes["stops"][ + ["similar_stop_id", "similar_stop_name", "similar_stops_centroid"] + ] = ( + self.dataframes["stops"]["stop_id"] + .map( + lambda stop_id: self.__get_similar_stop_tuple( + stop_id, delimiter, max_distance_degree + ) + ) + .apply(pd.Series) + ) + self.dataframes["stops"]["position_id"] = self.dataframes["stops"][ + "similar_stops_centroid" + ].map(latlon_to_str) + self.dataframes["stops"]["unique_id"] = ( + self.dataframes["stops"]["similar_stop_id"] + + self.dataframes["stops"]["position_id"] + ) + + # sometimes stop_name accidently becomes pd.Series instead of str. + self.dataframes["stops"]["similar_stop_name"] = self.dataframes["stops"][ + "similar_stop_name" + ].map(lambda val: val if type(val) == str else val.stop_name) + + position_count = ( + self.dataframes["stop_times"] + .merge(self.dataframes["stops"], on="stop_id", how="left") + .groupby("position_id") + .size() + .to_frame() + .reset_index() + ) + position_count.columns = ["position_id", "position_count"] + + self.similar_stops_df = pd.merge( + self.dataframes["stops"].drop_duplicates(subset="position_id")[ + [ + "position_id", + "similar_stop_id", + "similar_stop_name", + "similar_stops_centroid", + ] + ], + position_count, + on="position_id", + how="left", + ) + else: + # no unifying stops + self.dataframes["stops"]["similar_stop_id"] = self.dataframes["stops"][ + "stop_id" + ] + self.dataframes["stops"]["similar_stop_name"] = self.dataframes["stops"][ + "stop_name" + ] + self.dataframes["stops"]["similar_stops_centroid"] = self.dataframes[ + "stops" + ][["stop_lon", "stop_lat"]].values.tolist() + self.dataframes["stops"]["position_count"] = 1 + self.similar_stops_df = self.dataframes["stops"][ + [ + "similar_stop_id", + "similar_stop_name", + "similar_stops_centroid", + "position_count", + ] + ].copy() + + @lru_cache(maxsize=None) + def __get_similar_stop_tuple( + self, stop_id: str, delimiter="", max_distance_degree=0.01 + ): """ With one stop_id, group stops by parent, stop_id, or stop_name and each distance. - parent: if stop has parent_station, the 'centroid' is parent_station lat-lon @@ -277,290 +386,348 @@ def get_similar_stop_tuple(self, stop_id: str, delimiter='', max_distance_degree Returns: str, str, [float, float]: similar_stop_id, similar_stop_name, similar_stops_centroid """ - stops_df = self.dataframes['stops'].sort_values('stop_id') - stop = stops_df[stops_df['stop_id'] == stop_id].iloc[0] - - if stop['is_parent'] == 1: - return stop['stop_id'], stop['stop_name'], [stop['stop_lon'], stop['stop_lat']] - - if str(stop['parent_station']) != 'nan': - similar_stop_id = stop['parent_station'] - similar_stop = stops_df[stops_df['stop_id'] == similar_stop_id] - similar_stop_name = similar_stop[['stop_name']].iloc[0] - similar_stop_centroid = similar_stop[[ - 'stop_lon', 'stop_lat']].iloc[0].values.tolist() + stops_df = self.dataframes["stops"].sort_values("stop_id") + stop = stops_df[stops_df["stop_id"] == stop_id].iloc[0] + + if stop["is_parent"] == 1: + return ( + stop["stop_id"], + stop["stop_name"], + [stop["stop_lon"], stop["stop_lat"]], + ) + + if str(stop["parent_station"]) != "nan": + similar_stop_id = stop["parent_station"] + similar_stop = stops_df[stops_df["stop_id"] == similar_stop_id] + similar_stop_name = similar_stop[["stop_name"]].iloc[0] + similar_stop_centroid = ( + similar_stop[["stop_lon", "stop_lat"]].iloc[0].values.tolist() + ) return similar_stop_id, similar_stop_name, similar_stop_centroid if delimiter: - stops_df_id_delimited = self.get_stops_id_delimited(delimiter) + stops_df_id_delimited = self.__get_stops_id_delimited(delimiter) stop_id_prefix = stop_id.rsplit(delimiter, 1)[0] if stop_id_prefix != stop_id: similar_stop_id = stop_id_prefix - seperated_only_stops = stops_df_id_delimited[stops_df_id_delimited['delimited']] - similar_stops = seperated_only_stops[seperated_only_stops['stop_id_prefix'] == stop_id_prefix][[ - 'stop_name', 'similar_stops_centroid_lon', 'similar_stops_centroid_lat']] - similar_stop_name = similar_stops[['stop_name']].iloc[0] - similar_stop_centroid = similar_stops[[ - 'similar_stops_centroid_lon', 'similar_stops_centroid_lat']].values.tolist()[0] + seperated_only_stops = stops_df_id_delimited[ + stops_df_id_delimited["delimited"] + ] + similar_stops = seperated_only_stops[ + seperated_only_stops["stop_id_prefix"] == stop_id_prefix + ][ + [ + "stop_name", + "similar_stops_centroid_lon", + "similar_stops_centroid_lat", + ] + ] + similar_stop_name = similar_stops[["stop_name"]].iloc[0] + similar_stop_centroid = similar_stops[ + ["similar_stops_centroid_lon", "similar_stops_centroid_lat"] + ].values.tolist()[0] return similar_stop_id, similar_stop_name, similar_stop_centroid else: # when cannot seperate stop_id, grouping by name and distance - stops_df = stops_df_id_delimited[~stops_df_id_delimited['delimited']] + stops_df = stops_df_id_delimited[~stops_df_id_delimited["delimited"]] # grouping by name and distance - similar_stops = stops_df[stops_df['stop_name'] == stop['stop_name']][[ - 'stop_id', 'stop_name', 'stop_lon', 'stop_lat']] + similar_stops = stops_df[stops_df["stop_name"] == stop["stop_name"]][ + ["stop_id", "stop_name", "stop_lon", "stop_lat"] + ] similar_stops = similar_stops.query( - f'(stop_lon - {stop["stop_lon"]}) ** 2 + (stop_lat - {stop["stop_lat"]}) ** 2 < {max_distance_degree ** 2}') - similar_stop_centroid = similar_stops[[ - 'stop_lon', 'stop_lat']].mean().values.tolist() - similar_stop_id = similar_stops['stop_id'].iloc[0] - similar_stop_name = stop['stop_name'] + f'(stop_lon - {stop["stop_lon"]}) ** 2 + (stop_lat - {stop["stop_lat"]}) ** 2 < {max_distance_degree ** 2}' + ) + similar_stop_centroid = ( + similar_stops[["stop_lon", "stop_lat"]].mean().values.tolist() + ) + similar_stop_id = similar_stops["stop_id"].iloc[0] + similar_stop_name = stop["stop_name"] return similar_stop_id, similar_stop_name, similar_stop_centroid - def get_similar_stops_by_name_and_distance(self, stop_name, distance): - similar_stops = self.stops_df[self.stops_df['stop_name'] == stop['stop_name']][[ - 'stop_lon', 'stop_lat']].copy() - similar_stops = similar_stops.query( - f'(stop_lon - {stop["stop_lon"]}) ** 2 + (stop_lat - {stop["stop_lat"]}) ** 2 < {max_distance_degree ** 2}') - return similar_stops - - @ lru_cache(maxsize=None) - def get_stops_id_delimited(self, delimiter): - stops_df = self.dataframes.get( - 'stops')[['stop_id', 'stop_name', 'stop_lon', 'stop_lat', 'parent_station']].copy() - stops_df['stop_id_prefix'] = stops_df['stop_id'].map( - lambda stop_id: stop_id.rsplit(delimiter, 1)[0]) - stops_df['delimited'] = stops_df['stop_id'] != stops_df['stop_id_prefix'] - grouped_by_prefix = stops_df[[ - 'stop_id_prefix', 'stop_lon', 'stop_lat']].groupby('stop_id_prefix').mean().reset_index() + @lru_cache(maxsize=None) + def __get_stops_id_delimited(self, delimiter: str): + stops_df = self.dataframes.get("stops")[ + ["stop_id", "stop_name", "stop_lon", "stop_lat", "parent_station"] + ].copy() + stops_df["stop_id_prefix"] = stops_df["stop_id"].map( + lambda stop_id: stop_id.rsplit(delimiter, 1)[0] + ) + stops_df["delimited"] = stops_df["stop_id"] != stops_df["stop_id_prefix"] + grouped_by_prefix = ( + stops_df[["stop_id_prefix", "stop_lon", "stop_lat"]] + .groupby("stop_id_prefix") + .mean() + .reset_index() + ) grouped_by_prefix.columns = [ - 'stop_id_prefix', 'similar_stops_centroid_lon', 'similar_stops_centroid_lat'] + "stop_id_prefix", + "similar_stops_centroid_lon", + "similar_stops_centroid_lat", + ] stops_df_with_centroid = pd.merge( - stops_df, grouped_by_prefix, on='stop_id_prefix', how='left') + stops_df, grouped_by_prefix, on="stop_id_prefix", how="left" + ) return stops_df_with_centroid - @ classmethod - def get_route_name_from_tupple(cls, route): - if not pd.isna(route.route_short_name): - return route.route_short_name - elif not pd.isna(route.route_long_name): - return route.route_long_name - else: - ValueError( - f'{route} have neither "route_long_name" or "route_short_time".') + def read_interpolated_stops(self): + """ + Read stops "interpolated" by parent station or stop_id or stop_name and distance. + There are many similar stops that are near to each, has same name, or has same prefix in stop_id. + In traffic analyzing, it is good for that similar stops to be grouped as same stop. + This method group them by some elements, parent, id, name and distance. - def routes_count(self, no_shapes=False): - if self.dataframes.get('shapes') is None or no_shapes: - route_ids = self.dataframes.get('trips')['route_id'].unique() - return len(route_ids) - else: - shape_ids = self.dataframes.get('shapes')['shape_id'].unique() - return len(shape_ids) - - @ lru_cache(maxsize=None) - def get_shape_ids_on_routes(self): - trips_with_shape_df = self.dataframes['trips'][[ - 'route_id', 'shape_id']].dropna(subset=['shape_id']) - group = trips_with_shape_df.groupby('route_id')['shape_id'].unique() - group.apply(lambda x: x.sort()) - return group + Args: + delimiter (str, optional): stop_id delimiter, sample_A, sample_B, then delimiter is '_'. Defaults to ''. + max_distance_degree (float, optional): distance limit in grouping by stop_name. Defaults to 0.01. - @ lru_cache(maxsize=None) - def get_shapes_coordinates(self): - shapes_df = self.dataframes['shapes'].copy() - shapes_df.sort_values('shape_pt_sequence') - shapes_df['pt'] = shapes_df[[ - 'shape_pt_lon', 'shape_pt_lat']].values.tolist() - return shapes_df.groupby('shape_id')['pt'].apply(list) + Returns: + [type]: [description] + """ - def get_trips_on_a_date(self, yyyymmdd: str): + stop_dicts = self.similar_stops_df[ + [ + "similar_stop_id", + "similar_stop_name", + "similar_stops_centroid", + "position_count", + ] + ].to_dict(orient="records") + return [ + { + "type": "Feature", + "geometry": { + "type": "Point", + "coordinates": stop["similar_stops_centroid"], + }, + "properties": { + "similar_stop_name": stop["similar_stop_name"], + "similar_stop_id": stop["similar_stop_id"], + "count": stop["position_count"], + }, + } + for stop in stop_dicts + ] + + def read_route_frequency(self): """ - get trips are on service on a date. + By grouped stops, aggregate route frequency. + Filtering trips by a date, you can aggregate frequency only route serviced on the date. Args: - yyyymmdd (str): [description] + yyyymmdd (str, optional): date, like 20210401. Defaults to ''. + begin_time (str, optional): 'hhmmss' <= departure time, like 030000. Defaults to ''. + end_time (str, optional): 'hhmmss' > departure time, like 280000. Defaults to ''. Returns: [type]: [description] """ - # sunday, monday, tuesday... - day_of_week = datetime.date(int(yyyymmdd[0:4]), int( - yyyymmdd[4:6]), int(yyyymmdd[6:8])).strftime('%A').lower() - - # filter services by day - calendar_df = self.dataframes['calendar'].copy() - calendar_df = calendar_df.astype({'start_date': int, 'end_date': int}) - calendar_df = calendar_df[calendar_df[day_of_week] == '1'] - calendar_df = calendar_df.query( - f'start_date <= {int(yyyymmdd)} and {int(yyyymmdd)} <= end_date', engine='python') - - services_on_a_day = calendar_df[['service_id']] + stop_times_df = ( + self.dataframes.get("stop_times")[ + ["stop_id", "trip_id", "stop_sequence", "departure_time"] + ] + .sort_values(["trip_id", "stop_sequence"]) + .copy() + ) - calendar_dates_df = self.dataframes.get('calendar_dates') - if calendar_dates_df is not None: - filtered = calendar_dates_df[calendar_dates_df['date'] == yyyymmdd][[ - 'service_id', 'exception_type']] - to_be_removed_services = filtered[filtered['exception_type'] == '2'] - to_be_appended_services = filtered[filtered['exception_type'] == '1'][[ - 'service_id']] + # join agency info) + stop_times_df = pd.merge( + stop_times_df, + self.dataframes["trips"][["trip_id", "route_id"]], + on="trip_id", + how="left", + ) + stop_times_df = pd.merge( + stop_times_df, + self.dataframes["routes"][["route_id", "agency_id"]], + on="route_id", + how="left", + ) + stop_times_df = pd.merge( + stop_times_df, + self.dataframes["agency"][["agency_id", "agency_name"]], + on="agency_id", + how="left", + ) - services_on_a_day = pd.merge( - services_on_a_day, to_be_removed_services, on='service_id', how='left') - services_on_a_day = services_on_a_day[services_on_a_day['exception_type'] != '2'] - services_on_a_day = pd.concat( - [services_on_a_day, to_be_appended_services]) + # get prev and next stops_id, stop_name, trip_id + stop_times_df = pd.merge( + stop_times_df, + self.dataframes["stops"][ + [ + "stop_id", + "similar_stop_id", + "similar_stop_name", + "similar_stops_centroid", + ] + ], + on="stop_id", + how="left", + ) + stop_times_df["prev_stop_id"] = stop_times_df["similar_stop_id"] + stop_times_df["prev_trip_id"] = stop_times_df["trip_id"] + stop_times_df["prev_stop_name"] = stop_times_df["similar_stop_name"] + stop_times_df["prev_similar_stops_centroid"] = stop_times_df[ + "similar_stops_centroid" + ] + stop_times_df["next_stop_id"] = stop_times_df["similar_stop_id"].shift(-1) + stop_times_df["next_trip_id"] = stop_times_df["trip_id"].shift(-1) + stop_times_df["next_stop_name"] = stop_times_df["similar_stop_name"].shift(-1) + stop_times_df["next_similar_stops_centroid"] = stop_times_df[ + "similar_stops_centroid" + ].shift(-1) - services_on_a_day['service_flag'] = 1 + # drop last stops (-> stops has no next stop) + stop_times_df = stop_times_df.drop( + index=stop_times_df.query("prev_trip_id != next_trip_id").index + ) - # filter trips - trips_df = self.dataframes['trips'].copy() - trip_service = pd.merge(trips_df, services_on_a_day, on='service_id') - trip_service = trip_service[trip_service['service_flag'] == 1] + # define path_id by prev-stops-centroid and next-stops-centroid + stop_times_df["path_id"] = ( + stop_times_df["prev_stop_id"] + + stop_times_df["next_stop_id"] + + stop_times_df["prev_similar_stops_centroid"].map(latlon_to_str) + + stop_times_df["next_similar_stops_centroid"].map(latlon_to_str) + ) - return trip_service[['trip_id', 'service_flag']] + # aggregate path-frequency + path_frequency = ( + stop_times_df[["similar_stop_id", "path_id"]] + .groupby("path_id") + .count() + .reset_index() + ) + path_frequency.columns = ["path_id", "path_count"] + path_data = pd.merge( + path_frequency, + stop_times_df.drop_duplicates(subset="path_id"), + on="path_id", + ) + path_data_dict = path_data.to_dict(orient="records") + + return [ + { + "type": "Feature", + "geometry": { + "type": "LineString", + "coordinates": ( + path["prev_similar_stops_centroid"], + path["next_similar_stops_centroid"], + ), + }, + "properties": { + "frequency": path["path_count"], + "prev_stop_id": path["prev_stop_id"], + "prev_stop_name": path["prev_stop_name"], + "next_stop_id": path["next_stop_id"], + "next_stop_name": path["next_stop_name"], + "agency_id": path["agency_id"], + "agency_name": path["agency_name"], + }, + } + for path in path_data_dict + ] - def read_routes(self, no_shapes=False) -> list: + def __get_trips_on_a_date(self, yyyymmdd: str): """ - read routes by shapes or stop_times - First, this method try to load shapes and parse it into routes, - but shapes is optional table in GTFS. Then is shapes does not exist or no_shapes is True, - this parse routes by stop_time, stops, trips, and routes. + get trips are on service on a date. Args: - no_shapes (bool, optional): ignore shapes table. Defaults to False. + yyyymmdd (str): [description] Returns: - [list]: list of GeoJSON-Feature-dict + [type]: [description] """ - if self.dataframes.get('shapes') is None or no_shapes: - # no-shape routes + # sunday, monday, tuesday... + day_of_week = ( + datetime.date(int(yyyymmdd[0:4]), int(yyyymmdd[4:6]), int(yyyymmdd[6:8])) + .strftime("%A") + .lower() + ) - # trip-route-merge:A - trips_df = self.dataframes['trips'][['trip_id', 'route_id']] - routes_df = self.dataframes['routes'][[ - 'route_id', 'route_long_name', 'route_short_name']] - trips_routes = pd.merge(trips_df, routes_df, on='route_id') + # filter services by day + calendar_df = self.dataframes["calendar"].copy() + calendar_df = calendar_df.astype({"start_date": int, "end_date": int}) + calendar_df = calendar_df[calendar_df[day_of_week] == "1"] + calendar_df = calendar_df.query( + f"start_date <= {int(yyyymmdd)} and {int(yyyymmdd)} <= end_date", + engine="python", + ) - # stop_times-stops-merge:B - stop_times_df = self.dataframes['stop_times'][[ - 'stop_id', 'trip_id', 'stop_sequence']] - stops_df = self.dataframes.get( - 'stops')[['stop_id', 'stop_lon', 'stop_lat']] - merged = pd.merge( - stop_times_df, stops_df[['stop_id', 'stop_lon', 'stop_lat']], on='stop_id') + services_on_a_day = calendar_df[["service_id"]] - # A-B-merge - merged = pd.merge(merged, trips_routes, on='trip_id') - merged['route_concat_name'] = merged['route_long_name'].fillna('') + \ - merged['route_short_name'].fillna('') + calendar_dates_df = self.dataframes.get("calendar_dates") + if calendar_dates_df is not None: + filtered = calendar_dates_df[calendar_dates_df["date"] == yyyymmdd][ + ["service_id", "exception_type"] + ] + to_be_removed_services = filtered[filtered["exception_type"] == "2"] + to_be_appended_services = filtered[filtered["exception_type"] == "1"][ + ["service_id"] + ] - # parse routes - route_ids = merged['route_id'].unique() - features = [] - for route_id in route_ids: - route = merged[merged['route_id'] == route_id] - trip_id = route['trip_id'].unique()[0] - route = route[route['trip_id'] == - trip_id].sort_values('stop_sequence') - features.append({ - 'type': 'Feature', - 'geometry': { - 'type': 'LineString', - 'coordinates': route[['stop_lon', 'stop_lat']].values.tolist() - }, - 'properties': { - 'route_id': str(route_id), - 'route_name': route.route_concat_name.values.tolist()[0], - } - }) - return features - else: - shape_coords = self.get_shapes_coordinates() - shape_ids_on_routes = self.get_shape_ids_on_routes() - features = [] - for route in self.dataframes.get('routes').itertuples(): - if shape_ids_on_routes.get(route.route_id) is None: - continue - coordinates = [shape_coords.at[shape_id] - for shape_id in shape_ids_on_routes[route.route_id]] - route_name = self.get_route_name_from_tupple(route) - features.append({ - 'type': 'Feature', - 'geometry': { - 'type': 'MultiLineString', - 'coordinates': coordinates - }, - 'properties': { - 'route_id': str(route.route_id), - 'route_name': route_name, - } - }) + services_on_a_day = pd.merge( + services_on_a_day, to_be_removed_services, on="service_id", how="left" + ) + services_on_a_day = services_on_a_day[ + services_on_a_day["exception_type"] != "2" + ] + services_on_a_day = pd.concat([services_on_a_day, to_be_appended_services]) - # list-up already loaded shape_ids, dropping dupulicates - loaded_shape_ids = list(set(sum([list(val) - for val in shape_ids_on_routes], []))) + services_on_a_day["service_flag"] = 1 - # load shape_ids unloaded yet - for shape_id in shape_coords.index: - if shape_id in loaded_shape_ids: - continue - features.append({ - 'type': 'Feature', - 'geometry': { - 'type': 'MultiLineString', - 'coordinates': [shape_coords.at[shape_id]] - }, - 'properties': { - 'route_id': None, - 'route_name': str(shape_id), - } - }) - return features + # filter trips + trips_df = self.dataframes["trips"].copy() + trip_service = pd.merge(trips_df, services_on_a_day, on="service_id") + trip_service = trip_service[trip_service["service_flag"] == 1] + + return trip_service[["trip_id", "service_flag"]] if __name__ == "__main__": import argparse import shutil + parser = argparse.ArgumentParser() - parser.add_argument('--zip') - parser.add_argument('--src_dir') - parser.add_argument('--output_dir') - parser.add_argument('--no_shapes', action='store_true') - parser.add_argument('--ignore_no_route', action='store_true') - parser.add_argument('--frequency', action='store_true') - parser.add_argument('--yyyymmdd') - parser.add_argument('--as_unify_stops', action='store_true') - parser.add_argument('--delimiter') - parser.add_argument('--begin_time') - parser.add_argument('--end_time') + parser.add_argument("--zip") + parser.add_argument("--src_dir") + parser.add_argument("--output_dir") + parser.add_argument("--no_shapes", action="store_true") + parser.add_argument("--ignore_no_route", action="store_true") + parser.add_argument("--frequency", action="store_true") + parser.add_argument("--yyyymmdd") + parser.add_argument("--as_unify_stops", action="store_true") + parser.add_argument("--delimiter") + parser.add_argument("--begin_time") + parser.add_argument("--end_time") args = parser.parse_args() if args.zip is None and args.src_dir is None: - raise RuntimeError('gtfs-jp-parser needs zipfile or src_dir.') + raise RuntimeError("gtfs-jp-parser needs zipfile or src_dir.") if args.yyyymmdd: if len(args.yyyymmdd) != 8: raise RuntimeError( - f'yyyymmdd must be 8 characters string, for example 20210401, your is {args.yyyymmdd} ({len(args.yyyymmdd)} characters)') + f"yyyymmdd must be 8 characters string, for example 20210401, your is {args.yyyymmdd} ({len(args.yyyymmdd)} characters)" + ) if args.begin_time: if len(args.begin_time) != 6: raise RuntimeError( - f'begin_time must be "hhmmss", your is {args.begin_time}') + f'begin_time must be "hhmmss", your is {args.begin_time}' + ) if not args.end_time: - raise RuntimeError('end_time is not set.') + raise RuntimeError("end_time is not set.") if args.end_time: if len(args.end_time) != 6: - raise RuntimeError( - f'end_time must be "hhmmss", your is {args.end_time}') + raise RuntimeError(f'end_time must be "hhmmss", your is {args.end_time}') if not args.begin_time: - raise RuntimeError('begin_time is not set.') + raise RuntimeError("begin_time is not set.") if args.zip: - print('extracting zipfile...') - temp_dir = os.path.join(tempfile.gettempdir(), 'gtfs-jp-parser') + print("extracting zipfile...") + temp_dir = os.path.join(tempfile.gettempdir(), "gtfs-jp-parser") if os.path.exists(temp_dir): shutil.rmtree(temp_dir) os.mkdir(temp_dir) @@ -570,42 +737,39 @@ def read_routes(self, no_shapes=False) -> list: else: output_dir = args.src_dir gtfs_parser = GTFSParser( - output_dir, as_frequency=args.frequency, as_unify_stops=args.as_unify_stops, delimiter=args.delimiter) + output_dir, + as_frequency=args.frequency, + as_unify_stops=args.as_unify_stops, + delimiter=args.delimiter, + ) - print('GTFS loaded.') + print("GTFS loaded.") if args.output_dir: output_dir = args.output_dir if args.frequency: stops_features = gtfs_parser.read_interpolated_stops() - stops_geojson = { - 'type': 'FeatureCollection', - 'features': stops_features - } + stops_geojson = {"type": "FeatureCollection", "features": stops_features} routes_features = gtfs_parser.read_route_frequency( - yyyymmdd=args.yyyymmdd, begin_time=args.begin_time, end_time=args.end_time) - routes_geojson = { - 'type': 'FeatureCollection', - 'features': routes_features - } - gtfs_parser.dataframes['stops'][['stop_id', 'stop_name', 'similar_stop_id', 'similar_stop_name']].to_csv(os.path.join( - output_dir, 'result.csv'), index=False, encoding='cp932') + yyyymmdd=args.yyyymmdd, begin_time=args.begin_time, end_time=args.end_time + ) + routes_geojson = {"type": "FeatureCollection", "features": routes_features} + gtfs_parser.dataframes["stops"][ + ["stop_id", "stop_name", "similar_stop_id", "similar_stop_name"] + ].to_csv(os.path.join(output_dir, "result.csv"), index=False, encoding="cp932") else: routes_features = gtfs_parser.read_routes(no_shapes=args.no_shapes) - routes_geojson = { - 'type': 'FeatureCollection', - 'features': routes_features - } - stops_features = gtfs_parser.read_stops( - ignore_no_route=args.ignore_no_route) - stops_geojson = { - 'type': 'FeatureCollection', - 'features': stops_features - } - - print('writing geojsons...') - with open(os.path.join(output_dir, 'routes.geojson'), mode='w', encoding='utf-8') as f: + routes_geojson = {"type": "FeatureCollection", "features": routes_features} + stops_features = gtfs_parser.read_stops(ignore_no_route=args.ignore_no_route) + stops_geojson = {"type": "FeatureCollection", "features": stops_features} + + print("writing geojsons...") + with open( + os.path.join(output_dir, "routes.geojson"), mode="w", encoding="utf-8" + ) as f: json.dump(routes_geojson, f, ensure_ascii=False) - with open(os.path.join(output_dir, 'stops.geojson'), mode='w', encoding='utf-8') as f: + with open( + os.path.join(output_dir, "stops.geojson"), mode="w", encoding="utf-8" + ) as f: json.dump(stops_geojson, f, ensure_ascii=False) diff --git a/gtfs_parser/tests/test_gtfs_parser.py b/gtfs_parser/tests/test_gtfs_parser.py index bb78f0a..fe6e4c5 100644 --- a/gtfs_parser/tests/test_gtfs_parser.py +++ b/gtfs_parser/tests/test_gtfs_parser.py @@ -4,39 +4,43 @@ from gtfs_parser.__main__ import GTFSParser # nopep8 -FIXTURE_DIR = os.path.join(os.path.dirname( - __file__), 'fixture') +FIXTURE_DIR = os.path.join(os.path.dirname(__file__), "fixture") class TestGtfsParser(unittest.TestCase): gtfs_parser = GTFSParser(FIXTURE_DIR) gtfs_parser_frequency = GTFSParser(FIXTURE_DIR, as_frequency=True) - gtfs_parser_frequency_unify = GTFSParser(FIXTURE_DIR, - as_frequency=True, - as_unify_stops=True) + gtfs_parser_frequency_unify = GTFSParser( + FIXTURE_DIR, as_frequency=True, as_unify_stops=True + ) def test_init(self): # 13 txt files are in ./fixture - self.assertEqual( - 13, len(glob.glob(os.path.join(FIXTURE_DIR, '*.txt')))) + self.assertEqual(13, len(glob.glob(os.path.join(FIXTURE_DIR, "*.txt")))) # read tables in constants.py self.assertEqual(12, len(self.gtfs_parser.dataframes.keys())) # as_frequency: some columns regarding frequency aggregation self.assertFalse( - "similar_stop_id" in self.gtfs_parser.dataframes["stops"].columns) + "similar_stop_id" in self.gtfs_parser.dataframes["stops"].columns + ) self.assertTrue( - "similar_stop_id" in self.gtfs_parser_frequency.dataframes["stops"].columns) + "similar_stop_id" in self.gtfs_parser_frequency.dataframes["stops"].columns + ) self.assertTrue( - "similar_stop_id" in self.gtfs_parser_frequency_unify.dataframes["stops"].columns) + "similar_stop_id" + in self.gtfs_parser_frequency_unify.dataframes["stops"].columns + ) # as_unify: some columns regarding stop-grouping added + self.assertFalse("position_id" in self.gtfs_parser.dataframes["stops"].columns) self.assertFalse( - "position_id" in self.gtfs_parser.dataframes["stops"].columns) - self.assertFalse( - "position_id" in self.gtfs_parser_frequency.dataframes["stops"].columns) + "position_id" in self.gtfs_parser_frequency.dataframes["stops"].columns + ) self.assertTrue( - "position_id" in self.gtfs_parser_frequency_unify.dataframes["stops"].columns) + "position_id" + in self.gtfs_parser_frequency_unify.dataframes["stops"].columns + ) def test_read_stops(self): # list of geojson-feature @@ -44,12 +48,10 @@ def test_read_stops(self): # num of stops is not changed by aggregation self.assertEqual(899, len(self.gtfs_parser_frequency.read_stops())) # num of stops is not changed by as_unify_stops - self.assertEqual( - 899, len(self.gtfs_parser_frequency_unify.read_stops())) + self.assertEqual(899, len(self.gtfs_parser_frequency_unify.read_stops())) # remove no-route stops - self.assertEqual( - 896, len(self.gtfs_parser.read_stops(ignore_no_route=True))) + self.assertEqual(896, len(self.gtfs_parser.read_stops(ignore_no_route=True))) def test_read_routes(self): # num of features in routes.geojson depends on not shapes.txt but routes.txt @@ -57,47 +59,33 @@ def test_read_routes(self): self.assertEqual(32, len(self.gtfs_parser.read_routes(no_shapes=True))) # as_frequency and as_unify make no effect to read_routes() self.assertEqual( - 32, len(self.gtfs_parser_frequency.read_routes(no_shapes=True))) + 32, len(self.gtfs_parser_frequency.read_routes(no_shapes=True)) + ) self.assertEqual( - 32, len(self.gtfs_parser_frequency_unify.read_routes(no_shapes=True))) + 32, len(self.gtfs_parser_frequency_unify.read_routes(no_shapes=True)) + ) def test_read_interpolated_stops(self): - with self.assertRaises(AttributeError): + with self.assertRaises(TypeError): # read_interpolated_stops() needs as_frequency=True self.gtfs_parser.read_interpolated_stops() # read_interpolated_stops unify stops having same lat-lon into one featrure. # there are no stops having same lat-lon in fixture - self.assertEqual( - 899, len(self.gtfs_parser_frequency.read_interpolated_stops())) + self.assertEqual(899, len(self.gtfs_parser_frequency.read_interpolated_stops())) # as_unify means near and similar named stops move into same lat-lon(centroid of them) self.assertEqual( - 518, len(self.gtfs_parser_frequency_unify.read_interpolated_stops())) + 518, len(self.gtfs_parser_frequency_unify.read_interpolated_stops()) + ) def test_read_route_frequency(self): with self.assertRaises(KeyError): self.gtfs_parser.read_route_frequency() # each route_frequency feature is drawn between 2 stops - self.assertEqual( - 956, len(self.gtfs_parser_frequency.read_route_frequency())) + self.assertEqual(956, len(self.gtfs_parser_frequency.read_route_frequency())) # unify some 'similar' stops into same position, this decrease num of route_frequency features self.assertEqual( - 918, len(self.gtfs_parser_frequency_unify.read_route_frequency())) - - # out of service of GTFS -> 0 - self.assertEqual(0, len( - self.gtfs_parser_frequency_unify.read_route_frequency(yyyymmdd="20210530"))) - - # some routes are not in service on 20210730, Friday - freq20210730 = self.gtfs_parser_frequency_unify.read_route_frequency( - yyyymmdd="20210730") - self.assertEqual(916, len(freq20210730)) - self.assertEqual(114, freq20210730[0]["properties"]["frequency"]) - - # 20210801 - Sunday - freq20210801 = self.gtfs_parser_frequency_unify.read_route_frequency( - yyyymmdd="20210801") - self.assertEqual(736, len(freq20210801)) - self.assertEqual(62, freq20210801[0]["properties"]["frequency"]) + 918, len(self.gtfs_parser_frequency_unify.read_route_frequency()) + ) diff --git a/metadata.txt b/metadata.txt index 2a85144..b1ac160 100644 --- a/metadata.txt +++ b/metadata.txt @@ -6,7 +6,7 @@ name=GTFS-GO qgisMinimumVersion=3.0 description=The plugin to extract GTFS data and to show routes and stops. -version=3.0.1 +version=3.1.1 author=MIERUNE Inc. email=info@mierune.co.jp