From 10f3f131b68388018b6dd5126f67d1cbaf797cd1 Mon Sep 17 00:00:00 2001 From: Laurie Merrell Date: Fri, 14 Apr 2023 22:17:17 +0000 Subject: [PATCH] start notebook for EOD checks --- .../wait_time_end_of_day.ipynb | 370 ++++++++++++++++++ 1 file changed, 370 insertions(+) create mode 100644 rt_prediction_accuracy_metrics/wait_time_end_of_day.ipynb diff --git a/rt_prediction_accuracy_metrics/wait_time_end_of_day.ipynb b/rt_prediction_accuracy_metrics/wait_time_end_of_day.ipynb new file mode 100644 index 000000000..8a0007f24 --- /dev/null +++ b/rt_prediction_accuracy_metrics/wait_time_end_of_day.ipynb @@ -0,0 +1,370 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "d13b7563-1804-4f19-932d-86024e80f755", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import pandas_gbq" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "5ca6614b-3cda-4675-ac2f-01a48ab3b566", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/lib/python3.9/site-packages/geopandas/_compat.py:123: UserWarning: The Shapely GEOS version (3.11.1-CAPI-1.17.1) is incompatible with the GEOS version PyGEOS was compiled with (3.10.1-CAPI-1.16.0). Conversions between both will be slow.\n", + " warnings.warn(\n", + "/opt/conda/lib/python3.9/site-packages/google/cloud/bigquery/table.py:43: UserWarning: Shapely 2.0 is installed, but because PyGEOS is also installed, GeoPandas will still use PyGEOS by default for now. To force to use and test Shapely 2.0, you have to set the environment variable USE_PYGEOS=0. You can do this before starting the Python process, or in your code before importing geopandas:\n", + "\n", + "import os\n", + "os.environ['USE_PYGEOS'] = '0'\n", + "import geopandas\n", + "\n", + "In a future release, GeoPandas will switch to using Shapely by default. If you are using PyGEOS directly (calling PyGEOS functions on geometries from GeoPandas), this will then stop working and you are encouraged to migrate from PyGEOS to Shapely 2.0 (https://shapely.readthedocs.io/en/latest/migration_pygeos.html).\n", + " import geopandas # type: ignore\n" + ] + } + ], + "source": [ + "# get all activity for one route\n", + "# using Dumbarton Express as an example because it seems to have good data\n", + "\n", + "dumbarton_route_db1_20230315_tu_raw = pd.read_gbq(\n", + " \"\"\"\n", + " SELECT *\n", + " FROM `cal-itp-data-infra.mart_ad_hoc.fct_stop_time_updates_20230315_to_20230321`\n", + " WHERE base64_url = 'aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L3RyaXB1cGRhdGVzP2FnZW5jeT1ERQ==' \n", + " AND service_date = '2023-03-15'\n", + " AND trip_route_id = 'DB1'\n", + " \"\"\", project_id = 'cal-itp-data-infra')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bc335a97-a3ba-4f5c-907e-6a2a6c97ef75", + "metadata": {}, + "outputs": [], + "source": [ + "dumbarton_route_db1_20230315_tu_raw = pd.read_gbq(\n", + " \"\"\"\n", + " SELECT *\n", + " FROM `cal-itp-data-infra.mart_ad_hoc.fct_stop_time_updates_20230315_to_20230321`\n", + " WHERE base64_url = 'aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L3RyaXB1cGRhdGVzP2FnZW5jeT1ERQ==' \n", + " AND service_date = '2023-03-15'\n", + " AND trip_route_id = 'DB1'\n", + " \"\"\", project_id = 'cal-itp-data-infra')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cc4c0001-c9ad-456c-966e-0d4fab3aaa4e", + "metadata": {}, + "outputs": [], + "source": [ + "dumbarton_route_db1_20230315_tu_raw" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "6f5a3664-97e8-4c56-a6cf-fadbd6938e71", + "metadata": {}, + "outputs": [], + "source": [ + "db1_tu = dumbarton_route_db1_20230315_tu_raw.copy()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "5714e4db-c1c6-48e1-af98-96a2cf2ff745", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
keygtfs_dataset_keydthourbase64_url_extract_ts_config_extract_ts_gtfs_dataset_name_header_message_age_trip_update_message_age...arrival_delayarrival_timearrival_uncertaintydeparture_delaydeparture_timedeparture_uncertaintyschedule_relationshipservice_datearrival_time_pacificdeparture_time_pacific
08c170e832d6f62ed3b4f6982e9cf8df35c3e65766dda65958cf4da845286c0d52023-03-162023-03-16 00:00:00+00:00aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L3RyaXB1cG...2023-03-16 00:57:00+00:002023-03-15 19:04:01.925806+00:00Bay Area 511 Dumbarton Express TripUpdates194...31571678931180<NA><NA><NA><NA>None2023-03-152023-03-15 18:46:20NaT
1fd44b37c3cacc92dd89e399977e626f35c3e65766dda65958cf4da845286c0d52023-03-162023-03-16 00:00:00+00:00aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L3RyaXB1cG...2023-03-16 00:57:00+00:002023-03-15 19:04:01.925806+00:00Bay Area 511 Dumbarton Express TripUpdates194...31761678931430<NA><NA><NA><NA>None2023-03-152023-03-15 18:50:30NaT
267190d2a9e8527d4ab8cb9cda9f957825c3e65766dda65958cf4da845286c0d52023-03-162023-03-16 00:00:00+00:00aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L3RyaXB1cG...2023-03-16 00:57:00+00:002023-03-15 19:04:01.925806+00:00Bay Area 511 Dumbarton Express TripUpdates194...2621678933543<NA><NA><NA><NA>None2023-03-152023-03-15 19:25:43NaT
3dfb4054c4db6d6a54d36263a8114b1075c3e65766dda65958cf4da845286c0d52023-03-162023-03-16 00:00:00+00:00aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L3RyaXB1cG...2023-03-16 00:57:00+00:002023-03-15 19:04:01.925806+00:00Bay Area 511 Dumbarton Express TripUpdates194...14011678929648<NA><NA><NA><NA>None2023-03-152023-03-15 18:20:48NaT
4056c2ce83d4fa8919134a510a23b2fd25c3e65766dda65958cf4da845286c0d52023-03-162023-03-16 00:00:00+00:00aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L3RyaXB1cG...2023-03-16 00:57:00+00:002023-03-15 19:04:01.925806+00:00Bay Area 511 Dumbarton Express TripUpdates194...-221678935309<NA><NA><NA><NA>None2023-03-152023-03-15 19:55:09NaT
\n", + "

5 rows × 39 columns

\n", + "
" + ], + "text/plain": [ + " key gtfs_dataset_key \\\n", + "0 8c170e832d6f62ed3b4f6982e9cf8df3 5c3e65766dda65958cf4da845286c0d5 \n", + "1 fd44b37c3cacc92dd89e399977e626f3 5c3e65766dda65958cf4da845286c0d5 \n", + "2 67190d2a9e8527d4ab8cb9cda9f95782 5c3e65766dda65958cf4da845286c0d5 \n", + "3 dfb4054c4db6d6a54d36263a8114b107 5c3e65766dda65958cf4da845286c0d5 \n", + "4 056c2ce83d4fa8919134a510a23b2fd2 5c3e65766dda65958cf4da845286c0d5 \n", + "\n", + " dt hour \\\n", + "0 2023-03-16 2023-03-16 00:00:00+00:00 \n", + "1 2023-03-16 2023-03-16 00:00:00+00:00 \n", + "2 2023-03-16 2023-03-16 00:00:00+00:00 \n", + "3 2023-03-16 2023-03-16 00:00:00+00:00 \n", + "4 2023-03-16 2023-03-16 00:00:00+00:00 \n", + "\n", + " base64_url \\\n", + "0 aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L3RyaXB1cG... \n", + "1 aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L3RyaXB1cG... \n", + "2 aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L3RyaXB1cG... \n", + "3 aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L3RyaXB1cG... \n", + "4 aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L3RyaXB1cG... \n", + "\n", + " _extract_ts _config_extract_ts \\\n", + "0 2023-03-16 00:57:00+00:00 2023-03-15 19:04:01.925806+00:00 \n", + "1 2023-03-16 00:57:00+00:00 2023-03-15 19:04:01.925806+00:00 \n", + "2 2023-03-16 00:57:00+00:00 2023-03-15 19:04:01.925806+00:00 \n", + "3 2023-03-16 00:57:00+00:00 2023-03-15 19:04:01.925806+00:00 \n", + "4 2023-03-16 00:57:00+00:00 2023-03-15 19:04:01.925806+00:00 \n", + "\n", + " _gtfs_dataset_name _header_message_age \\\n", + "0 Bay Area 511 Dumbarton Express TripUpdates 19 \n", + "1 Bay Area 511 Dumbarton Express TripUpdates 19 \n", + "2 Bay Area 511 Dumbarton Express TripUpdates 19 \n", + "3 Bay Area 511 Dumbarton Express TripUpdates 19 \n", + "4 Bay Area 511 Dumbarton Express TripUpdates 19 \n", + "\n", + " _trip_update_message_age ... arrival_delay arrival_time \\\n", + "0 4 ... 3157 1678931180 \n", + "1 4 ... 3176 1678931430 \n", + "2 4 ... 262 1678933543 \n", + "3 4 ... 1401 1678929648 \n", + "4 4 ... -22 1678935309 \n", + "\n", + " arrival_uncertainty departure_delay departure_time departure_uncertainty \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "\n", + " schedule_relationship service_date arrival_time_pacific \\\n", + "0 None 2023-03-15 2023-03-15 18:46:20 \n", + "1 None 2023-03-15 2023-03-15 18:50:30 \n", + "2 None 2023-03-15 2023-03-15 19:25:43 \n", + "3 None 2023-03-15 2023-03-15 18:20:48 \n", + "4 None 2023-03-15 2023-03-15 19:55:09 \n", + "\n", + " departure_time_pacific \n", + "0 NaT \n", + "1 NaT \n", + "2 NaT \n", + "3 NaT \n", + "4 NaT \n", + "\n", + "[5 rows x 39 columns]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "db1_tu.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ef7a8e15-6e11-41c1-a009-2d983e0b2772", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}