Skip to content

Commit

Permalink
Add sources staging model (#5)
Browse files Browse the repository at this point in the history
* Added sources model and extracted additional columns from manifest json.

* Dedupe artifacts, add docs and add dim_dbt__sources
  • Loading branch information
kgpayne authored Mar 5, 2021
1 parent 413068e commit 63550bd
Show file tree
Hide file tree
Showing 7 changed files with 181 additions and 33 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,7 @@
target/
dbt_modules/
logs/

.vscode/
Pipfile
Pipfile.lock
3 changes: 2 additions & 1 deletion models/incremental/dim_dbt__models.sql
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,9 @@ fields as (
command_invocation_id,
artifact_generated_at,
node_id,
name,
model_database,
model_schema,
name,
depends_on_nodes,
package_name,
model_path,
Expand Down
38 changes: 38 additions & 0 deletions models/incremental/dim_dbt__sources.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
{{ config( materialized='incremental', unique_key='manifest_source_id' ) }}

with dbt_sources as (

select * from {{ ref('stg_dbt__sources') }}

),

dbt_sources_incremental as (

select *
from dbt_sources

{% if is_incremental() %}
-- this filter will only be applied on an incremental run
where artifact_generated_at > (select max(artifact_generated_at) from {{ this }})
{% endif %}

),

fields as (

select
manifest_source_id,
command_invocation_id,
artifact_generated_at,
node_id,
name,
source_name,
source_schema,
package_name,
relation_name,
source_path
from dbt_sources_incremental

)

select * from fields
76 changes: 51 additions & 25 deletions models/schemas.yml
Original file line number Diff line number Diff line change
Expand Up @@ -104,28 +104,54 @@ models:
- name: env_*
description: Columns for the environment variables set when the command was executed.

- name: dim_dbt__models
description: All dbt model metadata from every manifest.json.
columns:
- name: manifest_model_id
description: Primary key generated from the command_invocation_id and checksum.
tests:
- unique
- not_null
- name: command_invocation_id
description: The id of the command which resulted in the source artifact's generation.
- name: artifact_generated_at
description: Timestamp of when the source artifact was generated.
- name: node_id
description: Unique id for the node, in the form of model.[package_name].[model_name]
- name: name
description: The model name.
- name: model_schema
- name: depends_on_nodes
description: List of node ids the model depends on.
- name: package_name
- name: model_path
description: Filepath of the model.
- name: checksum
description: Unique identifier for the model. If a model is unchanged between separate executions this will remain the same.
- name: model_materialization
- name: dim_dbt__models
description: All dbt model metadata from every manifest.json.
columns:
- name: manifest_model_id
description: Primary key generated from the command_invocation_id and checksum.
tests:
- unique
- not_null
- name: command_invocation_id
description: The id of the command which resulted in the source artifact's generation.
- name: artifact_generated_at
description: Timestamp of when the source artifact was generated.
- name: node_id
description: Unique id for the node, in the form of model.[package_name].[model_name]
- name: name
description: The model name.
- name: model_schema
- name: depends_on_nodes
description: List of node ids the model depends on.
- name: package_name
- name: model_path
description: Filepath of the model.
- name: checksum
description: Unique identifier for the model. If a model is unchanged between separate executions this will remain the same.
- name: model_materialization

- name: dim_dbt__sources
description: All dbt source metadata from every manifest.json.
columns:
- name: manifest_source_id
description: Primary key generated from the command_invocation_id and checksum.
tests:
- unique
- not_null
- name: command_invocation_id
description: The id of the command which resulted in the source artifact's generation.
- name: artifact_generated_at
description: Timestamp of when the source artifact was generated.
- name: node_id
description: Unique id for the node, in the form of model.[package_name].[model_name]
- name: name
description: The source node name.
- name: source_name
description: The name of the source.
- name: source_schema
- name: package_name
description: Package source is defined in.
- name: relation_name
description: Name of the database entity this source resolved to.
- name: source_path
description: Filepath of the source.
32 changes: 29 additions & 3 deletions models/staging/stg_dbt__artifacts.sql
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,38 @@ with base as (
fields as (

select
data,
data:metadata:invocation_id::string as command_invocation_id,
generated_at,
path,
artifact_type
artifact_type,
data
from base

),

duduped as (

select
*,
row_number() over (
partition by command_invocation_id, artifact_type
order by generated_at desc
) as index
from fields
qualify index = 1

),

artifacts as (

select
command_invocation_id,
generated_at,
path,
artifact_type,
data
from duduped

)

select * from fields
select * from artifacts
10 changes: 6 additions & 4 deletions models/staging/stg_dbt__models.sql
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,12 @@ manifests as (
flatten as (

select
data:metadata:invocation_id::string as command_invocation_id,
command_invocation_id,
generated_at as artifact_generated_at,
node.key as node_id,
node.value:name::string as name,
node.value:database::string as model_database,
node.value:schema::string as model_schema,
node.value:name::string as name,
to_array(node.value:depends_on:nodes) as depends_on_nodes,
node.value:package_name::string as package_name,
node.value:path::string as model_path,
Expand All @@ -35,12 +36,13 @@ flatten as (
surrogate_key as (

select
{{ dbt_utils.surrogate_key(['command_invocation_id', 'checksum']) }} as manifest_model_id,
{{ dbt_utils.surrogate_key(['command_invocation_id', 'node_id']) }} as manifest_model_id,
command_invocation_id,
artifact_generated_at,
node_id,
name,
model_database,
model_schema,
name,
depends_on_nodes,
package_name,
model_path,
Expand Down
51 changes: 51 additions & 0 deletions models/staging/stg_dbt__sources.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
with base as (

select *
from {{ ref('stg_dbt__artifacts') }}

),

manifests as (

select *
from base
where artifact_type = 'manifest.json'

),

flatten as (

select
command_invocation_id,
generated_at as artifact_generated_at,
node.key as node_id,
node.value:name::string as name,
node.value:source_name::string as source_name,
node.value:schema::string as source_schema,
node.value:package_name::string as package_name,
node.value:relation_name::string as relation_name,
node.value:path::string as source_path
from manifests,
lateral flatten(input => data:sources) as node
where node.value:resource_type = 'source'

),

surrogate_key as (

select
{{ dbt_utils.surrogate_key(['command_invocation_id', 'node_id']) }} as manifest_source_id,
command_invocation_id,
artifact_generated_at,
node_id,
name,
source_name,
source_schema,
package_name,
relation_name,
source_path
from flatten

)

select * from surrogate_key

0 comments on commit 63550bd

Please sign in to comment.