Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RT prediction accuracy metrics exploration #724

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
370 changes: 370 additions & 0 deletions rt_prediction_accuracy_metrics/wait_time_end_of_day.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,370 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "d13b7563-1804-4f19-932d-86024e80f755",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import pandas_gbq"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "5ca6614b-3cda-4675-ac2f-01a48ab3b566",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/opt/conda/lib/python3.9/site-packages/geopandas/_compat.py:123: UserWarning: The Shapely GEOS version (3.11.1-CAPI-1.17.1) is incompatible with the GEOS version PyGEOS was compiled with (3.10.1-CAPI-1.16.0). Conversions between both will be slow.\n",
" warnings.warn(\n",
"/opt/conda/lib/python3.9/site-packages/google/cloud/bigquery/table.py:43: UserWarning: Shapely 2.0 is installed, but because PyGEOS is also installed, GeoPandas will still use PyGEOS by default for now. To force to use and test Shapely 2.0, you have to set the environment variable USE_PYGEOS=0. You can do this before starting the Python process, or in your code before importing geopandas:\n",
"\n",
"import os\n",
"os.environ['USE_PYGEOS'] = '0'\n",
"import geopandas\n",
"\n",
"In a future release, GeoPandas will switch to using Shapely by default. If you are using PyGEOS directly (calling PyGEOS functions on geometries from GeoPandas), this will then stop working and you are encouraged to migrate from PyGEOS to Shapely 2.0 (https://shapely.readthedocs.io/en/latest/migration_pygeos.html).\n",
" import geopandas # type: ignore\n"
]
}
],
"source": [
"# get all activity for one route\n",
"# using Dumbarton Express as an example because it seems to have good data\n",
"\n",
"dumbarton_route_db1_20230315_tu_raw = pd.read_gbq(\n",
" \"\"\"\n",
" SELECT *\n",
" FROM `cal-itp-data-infra.mart_ad_hoc.fct_stop_time_updates_20230315_to_20230321`\n",
" WHERE base64_url = 'aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L3RyaXB1cGRhdGVzP2FnZW5jeT1ERQ==' \n",
" AND service_date = '2023-03-15'\n",
" AND trip_route_id = 'DB1'\n",
" \"\"\", project_id = 'cal-itp-data-infra')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bc335a97-a3ba-4f5c-907e-6a2a6c97ef75",
"metadata": {},
"outputs": [],
"source": [
"dumbarton_route_db1_20230315_tu_raw = pd.read_gbq(\n",
" \"\"\"\n",
" SELECT *\n",
" FROM `cal-itp-data-infra.mart_ad_hoc.fct_stop_time_updates_20230315_to_20230321`\n",
" WHERE base64_url = 'aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L3RyaXB1cGRhdGVzP2FnZW5jeT1ERQ==' \n",
" AND service_date = '2023-03-15'\n",
" AND trip_route_id = 'DB1'\n",
" \"\"\", project_id = 'cal-itp-data-infra')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cc4c0001-c9ad-456c-966e-0d4fab3aaa4e",
"metadata": {},
"outputs": [],
"source": [
"dumbarton_route_db1_20230315_tu_raw"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "6f5a3664-97e8-4c56-a6cf-fadbd6938e71",
"metadata": {},
"outputs": [],
"source": [
"db1_tu = dumbarton_route_db1_20230315_tu_raw.copy()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "5714e4db-c1c6-48e1-af98-96a2cf2ff745",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>key</th>\n",
" <th>gtfs_dataset_key</th>\n",
" <th>dt</th>\n",
" <th>hour</th>\n",
" <th>base64_url</th>\n",
" <th>_extract_ts</th>\n",
" <th>_config_extract_ts</th>\n",
" <th>_gtfs_dataset_name</th>\n",
" <th>_header_message_age</th>\n",
" <th>_trip_update_message_age</th>\n",
" <th>...</th>\n",
" <th>arrival_delay</th>\n",
" <th>arrival_time</th>\n",
" <th>arrival_uncertainty</th>\n",
" <th>departure_delay</th>\n",
" <th>departure_time</th>\n",
" <th>departure_uncertainty</th>\n",
" <th>schedule_relationship</th>\n",
" <th>service_date</th>\n",
" <th>arrival_time_pacific</th>\n",
" <th>departure_time_pacific</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>8c170e832d6f62ed3b4f6982e9cf8df3</td>\n",
" <td>5c3e65766dda65958cf4da845286c0d5</td>\n",
" <td>2023-03-16</td>\n",
" <td>2023-03-16 00:00:00+00:00</td>\n",
" <td>aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L3RyaXB1cG...</td>\n",
" <td>2023-03-16 00:57:00+00:00</td>\n",
" <td>2023-03-15 19:04:01.925806+00:00</td>\n",
" <td>Bay Area 511 Dumbarton Express TripUpdates</td>\n",
" <td>19</td>\n",
" <td>4</td>\n",
" <td>...</td>\n",
" <td>3157</td>\n",
" <td>1678931180</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>None</td>\n",
" <td>2023-03-15</td>\n",
" <td>2023-03-15 18:46:20</td>\n",
" <td>NaT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>fd44b37c3cacc92dd89e399977e626f3</td>\n",
" <td>5c3e65766dda65958cf4da845286c0d5</td>\n",
" <td>2023-03-16</td>\n",
" <td>2023-03-16 00:00:00+00:00</td>\n",
" <td>aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L3RyaXB1cG...</td>\n",
" <td>2023-03-16 00:57:00+00:00</td>\n",
" <td>2023-03-15 19:04:01.925806+00:00</td>\n",
" <td>Bay Area 511 Dumbarton Express TripUpdates</td>\n",
" <td>19</td>\n",
" <td>4</td>\n",
" <td>...</td>\n",
" <td>3176</td>\n",
" <td>1678931430</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>None</td>\n",
" <td>2023-03-15</td>\n",
" <td>2023-03-15 18:50:30</td>\n",
" <td>NaT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>67190d2a9e8527d4ab8cb9cda9f95782</td>\n",
" <td>5c3e65766dda65958cf4da845286c0d5</td>\n",
" <td>2023-03-16</td>\n",
" <td>2023-03-16 00:00:00+00:00</td>\n",
" <td>aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L3RyaXB1cG...</td>\n",
" <td>2023-03-16 00:57:00+00:00</td>\n",
" <td>2023-03-15 19:04:01.925806+00:00</td>\n",
" <td>Bay Area 511 Dumbarton Express TripUpdates</td>\n",
" <td>19</td>\n",
" <td>4</td>\n",
" <td>...</td>\n",
" <td>262</td>\n",
" <td>1678933543</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>None</td>\n",
" <td>2023-03-15</td>\n",
" <td>2023-03-15 19:25:43</td>\n",
" <td>NaT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>dfb4054c4db6d6a54d36263a8114b107</td>\n",
" <td>5c3e65766dda65958cf4da845286c0d5</td>\n",
" <td>2023-03-16</td>\n",
" <td>2023-03-16 00:00:00+00:00</td>\n",
" <td>aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L3RyaXB1cG...</td>\n",
" <td>2023-03-16 00:57:00+00:00</td>\n",
" <td>2023-03-15 19:04:01.925806+00:00</td>\n",
" <td>Bay Area 511 Dumbarton Express TripUpdates</td>\n",
" <td>19</td>\n",
" <td>4</td>\n",
" <td>...</td>\n",
" <td>1401</td>\n",
" <td>1678929648</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>None</td>\n",
" <td>2023-03-15</td>\n",
" <td>2023-03-15 18:20:48</td>\n",
" <td>NaT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>056c2ce83d4fa8919134a510a23b2fd2</td>\n",
" <td>5c3e65766dda65958cf4da845286c0d5</td>\n",
" <td>2023-03-16</td>\n",
" <td>2023-03-16 00:00:00+00:00</td>\n",
" <td>aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L3RyaXB1cG...</td>\n",
" <td>2023-03-16 00:57:00+00:00</td>\n",
" <td>2023-03-15 19:04:01.925806+00:00</td>\n",
" <td>Bay Area 511 Dumbarton Express TripUpdates</td>\n",
" <td>19</td>\n",
" <td>4</td>\n",
" <td>...</td>\n",
" <td>-22</td>\n",
" <td>1678935309</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>None</td>\n",
" <td>2023-03-15</td>\n",
" <td>2023-03-15 19:55:09</td>\n",
" <td>NaT</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 39 columns</p>\n",
"</div>"
],
"text/plain": [
" key gtfs_dataset_key \\\n",
"0 8c170e832d6f62ed3b4f6982e9cf8df3 5c3e65766dda65958cf4da845286c0d5 \n",
"1 fd44b37c3cacc92dd89e399977e626f3 5c3e65766dda65958cf4da845286c0d5 \n",
"2 67190d2a9e8527d4ab8cb9cda9f95782 5c3e65766dda65958cf4da845286c0d5 \n",
"3 dfb4054c4db6d6a54d36263a8114b107 5c3e65766dda65958cf4da845286c0d5 \n",
"4 056c2ce83d4fa8919134a510a23b2fd2 5c3e65766dda65958cf4da845286c0d5 \n",
"\n",
" dt hour \\\n",
"0 2023-03-16 2023-03-16 00:00:00+00:00 \n",
"1 2023-03-16 2023-03-16 00:00:00+00:00 \n",
"2 2023-03-16 2023-03-16 00:00:00+00:00 \n",
"3 2023-03-16 2023-03-16 00:00:00+00:00 \n",
"4 2023-03-16 2023-03-16 00:00:00+00:00 \n",
"\n",
" base64_url \\\n",
"0 aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L3RyaXB1cG... \n",
"1 aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L3RyaXB1cG... \n",
"2 aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L3RyaXB1cG... \n",
"3 aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L3RyaXB1cG... \n",
"4 aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L3RyaXB1cG... \n",
"\n",
" _extract_ts _config_extract_ts \\\n",
"0 2023-03-16 00:57:00+00:00 2023-03-15 19:04:01.925806+00:00 \n",
"1 2023-03-16 00:57:00+00:00 2023-03-15 19:04:01.925806+00:00 \n",
"2 2023-03-16 00:57:00+00:00 2023-03-15 19:04:01.925806+00:00 \n",
"3 2023-03-16 00:57:00+00:00 2023-03-15 19:04:01.925806+00:00 \n",
"4 2023-03-16 00:57:00+00:00 2023-03-15 19:04:01.925806+00:00 \n",
"\n",
" _gtfs_dataset_name _header_message_age \\\n",
"0 Bay Area 511 Dumbarton Express TripUpdates 19 \n",
"1 Bay Area 511 Dumbarton Express TripUpdates 19 \n",
"2 Bay Area 511 Dumbarton Express TripUpdates 19 \n",
"3 Bay Area 511 Dumbarton Express TripUpdates 19 \n",
"4 Bay Area 511 Dumbarton Express TripUpdates 19 \n",
"\n",
" _trip_update_message_age ... arrival_delay arrival_time \\\n",
"0 4 ... 3157 1678931180 \n",
"1 4 ... 3176 1678931430 \n",
"2 4 ... 262 1678933543 \n",
"3 4 ... 1401 1678929648 \n",
"4 4 ... -22 1678935309 \n",
"\n",
" arrival_uncertainty departure_delay departure_time departure_uncertainty \\\n",
"0 <NA> <NA> <NA> <NA> \n",
"1 <NA> <NA> <NA> <NA> \n",
"2 <NA> <NA> <NA> <NA> \n",
"3 <NA> <NA> <NA> <NA> \n",
"4 <NA> <NA> <NA> <NA> \n",
"\n",
" schedule_relationship service_date arrival_time_pacific \\\n",
"0 None 2023-03-15 2023-03-15 18:46:20 \n",
"1 None 2023-03-15 2023-03-15 18:50:30 \n",
"2 None 2023-03-15 2023-03-15 19:25:43 \n",
"3 None 2023-03-15 2023-03-15 18:20:48 \n",
"4 None 2023-03-15 2023-03-15 19:55:09 \n",
"\n",
" departure_time_pacific \n",
"0 NaT \n",
"1 NaT \n",
"2 NaT \n",
"3 NaT \n",
"4 NaT \n",
"\n",
"[5 rows x 39 columns]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"db1_tu.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ef7a8e15-6e11-41c1-a009-2d983e0b2772",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}