Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add sources staging model #5

Merged
merged 3 commits into from
Mar 5, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,7 @@
target/
dbt_modules/
logs/

.vscode/
Pipfile
Pipfile.lock
3 changes: 2 additions & 1 deletion models/incremental/dim_dbt__models.sql
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,9 @@ fields as (
command_invocation_id,
artifact_generated_at,
node_id,
name,
model_database,
model_schema,
name,
depends_on_nodes,
package_name,
model_path,
Expand Down
38 changes: 38 additions & 0 deletions models/incremental/dim_dbt__sources.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
{{ config( materialized='incremental', unique_key='manifest_source_id' ) }}

with dbt_sources as (

select * from {{ ref('stg_dbt__sources') }}

),

dbt_sources_incremental as (

select *
from dbt_sources

{% if is_incremental() %}
-- this filter will only be applied on an incremental run
where artifact_generated_at > (select max(artifact_generated_at) from {{ this }})
{% endif %}

),

fields as (

select
manifest_source_id,
command_invocation_id,
artifact_generated_at,
node_id,
name,
source_name,
source_schema,
package_name,
relation_name,
source_path
from dbt_sources_incremental

)

select * from fields
76 changes: 51 additions & 25 deletions models/schemas.yml
Original file line number Diff line number Diff line change
Expand Up @@ -104,28 +104,54 @@ models:
- name: env_*
description: Columns for the environment variables set when the command was executed.

- name: dim_dbt__models
description: All dbt model metadata from every manifest.json.
columns:
- name: manifest_model_id
description: Primary key generated from the command_invocation_id and checksum.
tests:
- unique
- not_null
- name: command_invocation_id
description: The id of the command which resulted in the source artifact's generation.
- name: artifact_generated_at
description: Timestamp of when the source artifact was generated.
- name: node_id
description: Unique id for the node, in the form of model.[package_name].[model_name]
- name: name
description: The model name.
- name: model_schema
- name: depends_on_nodes
description: List of node ids the model depends on.
- name: package_name
- name: model_path
description: Filepath of the model.
- name: checksum
description: Unique identifier for the model. If a model is unchanged between separate executions this will remain the same.
- name: model_materialization
- name: dim_dbt__models
description: All dbt model metadata from every manifest.json.
columns:
- name: manifest_model_id
description: Primary key generated from the command_invocation_id and checksum.
tests:
- unique
- not_null
- name: command_invocation_id
description: The id of the command which resulted in the source artifact's generation.
- name: artifact_generated_at
description: Timestamp of when the source artifact was generated.
- name: node_id
description: Unique id for the node, in the form of model.[package_name].[model_name]
- name: name
description: The model name.
- name: model_schema
- name: depends_on_nodes
description: List of node ids the model depends on.
- name: package_name
- name: model_path
description: Filepath of the model.
- name: checksum
description: Unique identifier for the model. If a model is unchanged between separate executions this will remain the same.
- name: model_materialization

- name: dim_dbt__sources
description: All dbt source metadata from every manifest.json.
columns:
- name: manifest_source_id
description: Primary key generated from the command_invocation_id and checksum.
tests:
- unique
- not_null
- name: command_invocation_id
description: The id of the command which resulted in the source artifact's generation.
- name: artifact_generated_at
description: Timestamp of when the source artifact was generated.
- name: node_id
description: Unique id for the node, in the form of model.[package_name].[model_name]
- name: name
description: The source node name.
- name: source_name
description: The name of the source.
- name: source_schema
- name: package_name
description: Package source is defined in.
- name: relation_name
description: Name of the database entity this source resolved to.
- name: source_path
description: Filepath of the source.
32 changes: 29 additions & 3 deletions models/staging/stg_dbt__artifacts.sql
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,38 @@ with base as (
fields as (

select
data,
data:metadata:invocation_id::string as command_invocation_id,
generated_at,
path,
artifact_type
artifact_type,
data
from base

),

duduped as (

select
*,
row_number() over (
partition by command_invocation_id, artifact_type
order by generated_at desc
) as index
from fields
qualify index = 1

),

artifacts as (

select
command_invocation_id,
generated_at,
path,
artifact_type,
data
from duduped

)

select * from fields
select * from artifacts
10 changes: 6 additions & 4 deletions models/staging/stg_dbt__models.sql
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,12 @@ manifests as (
flatten as (

select
data:metadata:invocation_id::string as command_invocation_id,
command_invocation_id,
generated_at as artifact_generated_at,
node.key as node_id,
node.value:name::string as name,
node.value:database::string as model_database,
node.value:schema::string as model_schema,
node.value:name::string as name,
to_array(node.value:depends_on:nodes) as depends_on_nodes,
node.value:package_name::string as package_name,
node.value:path::string as model_path,
Expand All @@ -35,12 +36,13 @@ flatten as (
surrogate_key as (

select
{{ dbt_utils.surrogate_key(['command_invocation_id', 'checksum']) }} as manifest_model_id,
{{ dbt_utils.surrogate_key(['command_invocation_id', 'node_id']) }} as manifest_model_id,
command_invocation_id,
artifact_generated_at,
node_id,
name,
model_database,
model_schema,
name,
depends_on_nodes,
package_name,
model_path,
Expand Down
51 changes: 51 additions & 0 deletions models/staging/stg_dbt__sources.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
with base as (

select *
from {{ ref('stg_dbt__artifacts') }}

),

manifests as (

select *
from base
where artifact_type = 'manifest.json'

),

flatten as (

select
command_invocation_id,
generated_at as artifact_generated_at,
node.key as node_id,
node.value:name::string as name,
node.value:source_name::string as source_name,
node.value:schema::string as source_schema,
node.value:package_name::string as package_name,
node.value:relation_name::string as relation_name,
node.value:path::string as source_path
from manifests,
lateral flatten(input => data:sources) as node
where node.value:resource_type = 'source'

),

surrogate_key as (

select
{{ dbt_utils.surrogate_key(['command_invocation_id', 'node_id']) }} as manifest_source_id,
command_invocation_id,
artifact_generated_at,
node_id,
name,
source_name,
source_schema,
package_name,
relation_name,
source_path
from flatten

)

select * from surrogate_key