Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add RT VP vs Sched Table #1708

Merged
merged 8 commits into from
Aug 30, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions warehouse/models/rt_views/_rt_views.yml
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,35 @@ models:
tests:
- unique
- not_null
- name: gtfs_rt_vs_schedule_trips_sample
description: |
Each row represents a comparison of the scheduled versus realtime vehicle positions data
for each route by day over the course of two months. This data is avalibale for two agenices,
SamTrans and Big Blue Bus.
tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- service_date
- calitp_itp_id
- calitp_url_number
- route_id
columns:
- name: service_date
description: Date for which data was present in our time frame
- *calitp_itp_id
- *calitp_url_number
- name: route_id
descritpion: Route ID
- name: agency_name
descritpion: Agency Name
- name: route_short_name
description: Common route name
- name: num_sched
description: Number of trips where GTFS scheduled data is present on given day.
- name: num_vp
description: Number of trips where GTFS RT vehicle position data is present on given day.
- name: pct_w_vp
description: Percent of scheduled trips with vehicle positions realtime data for that specific day and route.

exposures:
- name: rt_speed_maps
Expand Down
75 changes: 75 additions & 0 deletions warehouse/models/rt_views/gtfs_rt_vs_schedule_trips_sample.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
WITH
-- selecting the distinct trips from GTFS Vehicle Postions for two operators (SamTrans 290 and Big Blue Bus 300) and two months
vp_trips AS (
SELECT DISTINCT
calitp_itp_id,
calitp_url_number,
date AS service_date,
trip_id AS vp_trip_id
-- trip_route_id
-- note: to change when we want to include more operators. trip_route_id and trip_id are optional
-- https://gtfs.org/realtime/reference/#message-vehicleposition
FROM {{ ref('stg_rt__vehicle_positions') }}
WHERE service_date BETWEEN '2022-05-01' AND '2022-06-30'
AND (calitp_itp_id IN (300, 290)
)
),

--- selecting GTFS schedule data for the same two operators and two months
sched_trips AS (
SELECT
trip_id,
route_id,
service_date,
calitp_itp_id,
calitp_url_number
FROM {{ ref('gtfs_schedule_fact_daily_trips') }}
WHERE service_date BETWEEN '2022-05-01' AND '2022-06-30'
AND (calitp_itp_id IN (300, 290) AND is_in_service = True)
),

-- joining Vehicle Position data and Scheduled data on service date, trip id and itp id and url number
rt_sched_joined AS (
SELECT
T1.calitp_itp_id,
T1.calitp_url_number,
T1.route_id,
T1.service_date,
COUNT(T1.trip_id) AS num_sched,
COUNT(T2.vp_trip_id) AS num_vp
-- num_vp/num_sched AS pct_w_vp
FROM sched_trips AS T1
LEFT JOIN vp_trips AS T2
ON
T1.trip_id = T2.vp_trip_id
AND T1.calitp_itp_id = T2.calitp_itp_id
AND T1.calitp_url_number = T2.calitp_url_number
AND T1.service_date = T2.service_date
GROUP BY 1, 2, 3, 4
),

-- getting the percent of scheduled trips with vehicle position data and adding the common route name
gtfs_rt_vs_schedule_trips_sample AS (
SELECT
T1.calitp_itp_id,
T2.agency_name,
T1.calitp_url_number,
T1.route_id,
T2.route_short_name,
T1.service_date,
T2.calitp_extracted_at,
T2.calitp_deleted_at,
T1.num_sched,
T1.num_vp,
num_vp / num_sched AS pct_w_vp
FROM rt_sched_joined AS T1
LEFT JOIN {{ ref('gtfs_schedule_dim_routes') }} AS T2
ON
T1.route_id = T2.route_id
AND T1.calitp_itp_id = T2.calitp_itp_id
AND T1.calitp_url_number = T2.calitp_url_number
AND T1.service_date BETWEEN T2.calitp_extracted_at AND T2.calitp_deleted_at
)

SELECT *
FROM gtfs_rt_vs_schedule_trips_sample