Skip to content

Commit

Permalink
Merge pull request #1677 from cal-itp/dim-organizations-add-columns
Browse files Browse the repository at this point in the history
feat(airtable): add new columns per request #1674
  • Loading branch information
lauriemerrell authored Aug 8, 2022
2 parents f80b5fe + 41d3e18 commit 6dcea8e
Show file tree
Hide file tree
Showing 8 changed files with 154 additions and 0 deletions.
65 changes: 65 additions & 0 deletions warehouse/models/mart/transit_database/_mart_transit_database.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,27 @@ models:
metabase.semantic_type: type/FK
- name: details
description: Text description related to the organization
- name: reporting_category
description: |
Categories we want to hold ourselves accountable to:
Core: funded by Caltrans/can control or influence somewhat
Other Public Transit: (publicly available or managed) and publicly funded transit
Other Transit: transit that isn't the above
- name: ntp_id
description: National Transit Database (NTD) ID
- name: alias
- name: gtfs_static_status
description: |
Computed value from source data: "Static OK" indicates that the number
of associated (managed) `service` records is equal to the number of associated (managed) `service`
records with static GTFS. Otherwise, status is "Static Incomplete".
- name: gtfs_realtime_status
description: |
Computed value from source data: "RT OK" indicates that the number
of associated (managed) `service` records is equal to the number of associated (managed) `service`
records with realtime GTFS and at least one managed service has complete RT coverage (i.e.,
all three RT feed types: vehicle positions, trip updates, and service alerts).
Otherwise, status is "RT Incomplete".
- name: dim_services
description: '{{ doc("services_table") }}'
columns:
Expand All @@ -68,6 +89,7 @@ models:
- name: currently_operating
description: |
Boolean for whether service is currently active
- name: gtfs_schedule_status
- name: dim_products
description: '{{ doc("products_table") }}'
columns:
Expand Down Expand Up @@ -192,6 +214,7 @@ models:
description: |
For realtime dataset / service relationships,
the associated schedule dataset / service relationship.
- name: fares_v2_status
# self relationship test breaks metabase connector: https://github.com/JarvusInnovations/dbt-metabase/blob/master/dbtmetabase/parsers/dbt_manifest.py#L202
# TODO: make an aliased version of the relationship test
# tests:
Expand Down Expand Up @@ -388,3 +411,45 @@ models:
- relationships:
to: ref('dim_services')
field: key
- name: bridge_organizations_x_gtfs_datasets_produced
description: |
Mapping table between organizations and GTFS datasets
producted by that organization.
This is a many-to-many relationship.
tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- organization_key
- gtfs_dataset_key
columns:
- name: organization_key
tests:
- relationships:
to: ref('dim_organizations')
field: key
- name: gtfs_dataset_key
tests:
- relationships:
to: ref('dim_gtfs_datasets')
field: key
- name: bridge_organizations_x_funding_programs
description: |
Mapping table between organizations and associated
funding programs.
This is a many-to-many relationship.
tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- organization_key
- funding_program_key
columns:
- name: organization_key
tests:
- relationships:
to: ref('dim_organizations')
field: key
- name: funding_program_key
tests:
- relationships:
to: ref('dim_funding_programs')
field: key
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{{ config(materialized='table') }}

WITH latest_organizations AS (
{{ get_latest_dense_rank(
external_table = ref('stg_transit_database__organizations'),
order_by = 'calitp_extracted_at DESC'
) }}
),

latest_funding_programs AS (
{{ get_latest_dense_rank(
external_table = ref('stg_transit_database__funding_programs'),
order_by = 'calitp_extracted_at DESC'
) }}
),

bridge_organizations_x_funding_programs AS (
{{ transit_database_many_to_many(
table_a = 'latest_organizations',
table_a_key_col = 'key',
table_a_key_col_name = 'organization_key',
table_a_name_col = 'name',
table_a_name_col_name = 'organization_name',
table_a_join_col = 'funding_programs',
table_a_date_col = 'calitp_extracted_at',
table_b = 'latest_funding_programs',
table_b_key_col = 'key',
table_b_key_col_name = 'funding_program_key',
table_b_name_col = 'program',
table_b_name_col_name = 'funding_program',
table_b_join_col = 'organization',
table_b_date_col = 'calitp_extracted_at',
shared_date_name = 'calitp_extracted_at'
) }}
)

SELECT * FROM bridge_organizations_x_funding_programs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{{ config(materialized='table') }}

WITH latest_organizations AS (
{{ get_latest_dense_rank(
external_table = ref('stg_transit_database__organizations'),
order_by = 'calitp_extracted_at DESC'
) }}
),

latest_gtfs_datasets AS (
{{ get_latest_dense_rank(
external_table = ref('stg_transit_database__gtfs_datasets'),
order_by = 'calitp_extracted_at DESC'
) }}
),

bridge_organizations_x_gtfs_datasets_managed AS (
{{ transit_database_many_to_many(
table_a = 'latest_organizations',
table_a_key_col = 'key',
table_a_key_col_name = 'organization_key',
table_a_name_col = 'name',
table_a_name_col_name = 'organization_name',
table_a_join_col = 'gtfs_datasets_produced',
table_a_date_col = 'calitp_extracted_at',
table_b = 'latest_gtfs_datasets',
table_b_key_col = 'key',
table_b_key_col_name = 'gtfs_dataset_key',
table_b_name_col = 'name',
table_b_name_col_name = 'gtfs_dataset_name',
table_b_join_col = 'dataset_producers',
table_b_date_col = 'calitp_extracted_at',
shared_date_name = 'calitp_extracted_at'
) }}
)

SELECT * FROM bridge_organizations_x_gtfs_datasets_managed
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ dim_gtfs_service_data AS (
network_id,
route_id,
reference_static_gtfs_service_data_key,
fares_v2_status,
calitp_extracted_at
FROM latest_gtfs_service_data
)
Expand Down
5 changes: 5 additions & 0 deletions warehouse/models/mart/transit_database/dim_organizations.sql
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@ dim_organizations AS (
details,
caltrans_district,
website,
reporting_category,
ntp_id,
gtfs_static_status,
gtfs_realtime_status,
alias,
calitp_extracted_at
FROM latest_organizations
)
Expand Down
1 change: 1 addition & 0 deletions warehouse/models/mart/transit_database/dim_services.sql
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ dim_services AS (
mode,
currently_operating,
operating_counties,
gtfs_schedule_status,
calitp_extracted_at
FROM latest_services
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,18 @@ stg_transit_database__organizations AS (
organization_type,
roles,
itp_id,
ntp_id,
alias_ as alias,
details,
caltrans_district,
mobility_services_managed,
parent_organization,
website,
reporting_category,
funding_programs,
gtfs_datasets_produced,
gtfs_static_status,
gtfs_realtime_status,
dt AS calitp_extracted_at
FROM once_daily_organizations
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ stg_transit_database__services AS (
provider,
operator,
funding_sources,
gtfs_schedule_status,
operating_counties,
dt AS calitp_extracted_at
FROM once_daily_services
Expand Down

0 comments on commit 6dcea8e

Please sign in to comment.