From f792d0bb7cc490ddad9aab2b3b8e4cd712a7f100 Mon Sep 17 00:00:00 2001 From: Danny Schwalen Date: Mon, 26 Sep 2022 10:34:33 -0500 Subject: [PATCH 1/9] add macro to save DQ test results --- macros/save_test_results.sql | 56 ++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 macros/save_test_results.sql diff --git a/macros/save_test_results.sql b/macros/save_test_results.sql new file mode 100644 index 0000000..f1f0bc3 --- /dev/null +++ b/macros/save_test_results.sql @@ -0,0 +1,56 @@ +{% macro save_test_results(results) %} + +{%- set test_results = [] -%} + +{%- for result in results -%} + {%- if result.node.resource_type == 'test' -%} + {%- do test_results.append(result) -%} + {%- endif -%} +{%- endfor -%} + +{%- set results_tbl -%} + `{{ target.database }}.{{ generate_schema_name('processed') }}.dbt_test_results` +{%- endset -%} + +{{ log('Centralizing test data in ' + results_tbl, info = true) if execute }} + +create table if not exists {{ results_tbl }} ( + test_id string, + test_name string, + project_name string, + target_db string, + dbt_run_env string, + test_severity string, + test_result string, + test_models string, + execution_time_seconds string, + dbt_cloud_run_id string, + create_update_ts timestamp +) +cluster by dbt_cloud_run_id +; + +insert into {{ results_tbl }} ( + + {% for result in test_results %} + select + '{{ result.node.unique_id }}' as test_id, + '{{ result.node.name }}' as test_name, + '{{ project_name }}' as project_name, + '{{ target.database }}' as target_db, + '{{ env_var("DBT_RUN_ENV") }}' as dbt_run_env, + '{{ result.node.config.severity }}' as test_severity, + '{{ result.status }}' as test_result, + '{% for node_id in result.node.depends_on.nodes -%} + {{ get_full_model_name(node_id) }} + {%- if not loop.last -%},{%- endif -%} + {%- endfor %}' as test_models, + '{{ result.execution_time }}' as execution_time_seconds, + '{{ env_var("DBT_CLOUD_RUN_ID", invocation_id) }}' as dbt_cloud_run_id, + current_timestamp() as create_update_ts + + {{ 'union all' if not loop.last }} + {% endfor %} +); + +{% endmacro %} From 0ec8105d786e88a41faa64a556e3a2f0df37f8a0 Mon Sep 17 00:00:00 2001 From: Danny Schwalen Date: Thu, 29 Sep 2022 08:13:34 -0500 Subject: [PATCH 2/9] add helper macro for getting full model name --- macros/helpers/_get_full_model_name.sql | 14 ++++++++++++++ macros/save_test_results.sql | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 macros/helpers/_get_full_model_name.sql diff --git a/macros/helpers/_get_full_model_name.sql b/macros/helpers/_get_full_model_name.sql new file mode 100644 index 0000000..86d662a --- /dev/null +++ b/macros/helpers/_get_full_model_name.sql @@ -0,0 +1,14 @@ +{%- macro _get_full_model_name(node_id) -%} + {%- set node_list = [] -%} + {%- if node_id.split('.')[0] == 'model' -%} + {%- set node_list = graph.nodes.values() | selectattr("resource_type", "equalto", "model") -%} + {%- elif node_id.split('.')[0] == 'source' -%} + {% set node_list = graph.sources.values() -%} + {%- endif -%} + + {%- for node in node_list -%} + {%- if node.unique_id == node_id -%} + `{{ node.database }}.{{ node.schema }}.{{ node.name }}` + {%- endif -%} + {%- endfor -%} +{%- endmacro -%} \ No newline at end of file diff --git a/macros/save_test_results.sql b/macros/save_test_results.sql index f1f0bc3..5ea1a7d 100644 --- a/macros/save_test_results.sql +++ b/macros/save_test_results.sql @@ -42,7 +42,7 @@ insert into {{ results_tbl }} ( '{{ result.node.config.severity }}' as test_severity, '{{ result.status }}' as test_result, '{% for node_id in result.node.depends_on.nodes -%} - {{ get_full_model_name(node_id) }} + {{ _get_full_model_name(node_id) }} {%- if not loop.last -%},{%- endif -%} {%- endfor %}' as test_models, '{{ result.execution_time }}' as execution_time_seconds, From 22b2802fe79caf6d845e747a78ef7ab94e10cc34 Mon Sep 17 00:00:00 2001 From: Danny Schwalen Date: Thu, 29 Sep 2022 08:46:45 -0500 Subject: [PATCH 3/9] handle scenario with no test results --- macros/save_test_results.sql | 44 ++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/macros/save_test_results.sql b/macros/save_test_results.sql index 5ea1a7d..307bc99 100644 --- a/macros/save_test_results.sql +++ b/macros/save_test_results.sql @@ -30,27 +30,27 @@ create table if not exists {{ results_tbl }} ( cluster by dbt_cloud_run_id ; -insert into {{ results_tbl }} ( - - {% for result in test_results %} - select - '{{ result.node.unique_id }}' as test_id, - '{{ result.node.name }}' as test_name, - '{{ project_name }}' as project_name, - '{{ target.database }}' as target_db, - '{{ env_var("DBT_RUN_ENV") }}' as dbt_run_env, - '{{ result.node.config.severity }}' as test_severity, - '{{ result.status }}' as test_result, - '{% for node_id in result.node.depends_on.nodes -%} - {{ _get_full_model_name(node_id) }} - {%- if not loop.last -%},{%- endif -%} - {%- endfor %}' as test_models, - '{{ result.execution_time }}' as execution_time_seconds, - '{{ env_var("DBT_CLOUD_RUN_ID", invocation_id) }}' as dbt_cloud_run_id, - current_timestamp() as create_update_ts - - {{ 'union all' if not loop.last }} - {% endfor %} -); +{% if test_results|length > 0 %} + insert into {{ results_tbl }} ( + {% for result in test_results %} + select + '{{ result.node.unique_id }}' as test_id, + '{{ result.node.name }}' as test_name, + '{{ project_name }}' as project_name, + '{{ target.database }}' as target_db, + '{{ env_var("DBT_RUN_ENV") }}' as dbt_run_env, + '{{ result.node.config.severity }}' as test_severity, + '{{ result.status }}' as test_result, + '{% for node_id in result.node.depends_on.nodes -%} + {{ get_full_model_name(node_id) }} + {%- if not loop.last -%},{%- endif -%} + {%- endfor %}' as test_models, + '{{ result.execution_time }}' as execution_time_seconds, + '{{ env_var("DBT_CLOUD_RUN_ID", invocation_id) }}' as dbt_cloud_run_id, + current_timestamp() as create_update_ts + {{ 'union all' if not loop.last }} + {% endfor %} + ); +{% endif %} {% endmacro %} From 06595f300e7f1c3369fb81e0833a1aa4835c689d Mon Sep 17 00:00:00 2001 From: Danny Schwalen Date: Thu, 29 Sep 2022 09:05:19 -0500 Subject: [PATCH 4/9] fix helper macro reference --- macros/save_test_results.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macros/save_test_results.sql b/macros/save_test_results.sql index 307bc99..1b1ebef 100644 --- a/macros/save_test_results.sql +++ b/macros/save_test_results.sql @@ -42,7 +42,7 @@ cluster by dbt_cloud_run_id '{{ result.node.config.severity }}' as test_severity, '{{ result.status }}' as test_result, '{% for node_id in result.node.depends_on.nodes -%} - {{ get_full_model_name(node_id) }} + {{ _get_full_model_name(node_id) }} {%- if not loop.last -%},{%- endif -%} {%- endfor %}' as test_models, '{{ result.execution_time }}' as execution_time_seconds, From f6d67cfb8142f5efb0e1ce358e21b0e8159f3660 Mon Sep 17 00:00:00 2001 From: Danny Schwalen Date: Thu, 29 Sep 2022 10:07:36 -0500 Subject: [PATCH 5/9] update README --- README.md | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index eb9ea36..beaa878 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,8 @@ To update this repo, you will need write access to the General Mills public repo - [generate_schema_name](#generate_schema_name) [(source)](./macros/generate_schema_name.sql) - [smart_source](#smart_source) [(source)](./macros/smart_source.sql) - [materialized_views](#materialized_views) [(source)](./macros/bigquery) +- [save_test_results](#save_test_results) [(source)](./macros/save_test_results.sql) + - uses helper macro [generate_schema_name](#generate_schema_name) [(source)](./macros/helpers/generate_schema_name.sql) ### Usage @@ -79,4 +81,18 @@ Materialized views are powerful but they can be costly, so please consult with t +schema: output materialized_views: +materialized: materialized_view - +schema: output \ No newline at end of file + +schema: output + + +#### save_test_results +This macro saves dbt data quality test results to a table in the target project's processed dataset: `processed.dbt_test_results`. This is an append-only table that associates each data quality check/result to a particular dbt run. For development runs (zdev), a separate results table will be created in the corresponding zdev processed dataset. + +Runs that are associated with a dbt Cloud job will be associated with their corresponding `DBT_CLOUD_RUN_ID`, while runs that were kicked off from the CLI are associated with their `invocation_id` (since they are not given a cloud run id). + +To use this macro within a project, include the following in the `dbt_project.yml`: +```yml +# SQL statements to be executed after the completion of a run, build, test, etc. +# Full documentation: https://docs.getdbt.com/reference/project-configs/on-run-start-on-run-end +on-run-end: + - '{{ save_test_results(results) }}' +``` \ No newline at end of file From 3333cc14b8fb0693f0380dae737a722f31f28a03 Mon Sep 17 00:00:00 2001 From: Danny Schwalen Date: Thu, 29 Sep 2022 10:33:49 -0500 Subject: [PATCH 6/9] reference package in macro calls --- macros/save_test_results.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/macros/save_test_results.sql b/macros/save_test_results.sql index 1b1ebef..d8495ba 100644 --- a/macros/save_test_results.sql +++ b/macros/save_test_results.sql @@ -9,7 +9,7 @@ {%- endfor -%} {%- set results_tbl -%} - `{{ target.database }}.{{ generate_schema_name('processed') }}.dbt_test_results` + `{{ target.database }}.{{ gmi_common_dbt_utils.generate_schema_name('processed') }}.dbt_test_results` {%- endset -%} {{ log('Centralizing test data in ' + results_tbl, info = true) if execute }} @@ -42,7 +42,7 @@ cluster by dbt_cloud_run_id '{{ result.node.config.severity }}' as test_severity, '{{ result.status }}' as test_result, '{% for node_id in result.node.depends_on.nodes -%} - {{ _get_full_model_name(node_id) }} + {{ gmi_common_dbt_utils._get_full_model_name(node_id) }} {%- if not loop.last -%},{%- endif -%} {%- endfor %}' as test_models, '{{ result.execution_time }}' as execution_time_seconds, From cb6a6dc052088952769e4e5c4aaa585fffb89b49 Mon Sep 17 00:00:00 2001 From: Danny Schwalen Date: Thu, 29 Sep 2022 10:35:14 -0500 Subject: [PATCH 7/9] rename helper function --- .../{_get_full_model_name.sql => get_full_model_name.sql} | 2 +- macros/save_test_results.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) rename macros/helpers/{_get_full_model_name.sql => get_full_model_name.sql} (92%) diff --git a/macros/helpers/_get_full_model_name.sql b/macros/helpers/get_full_model_name.sql similarity index 92% rename from macros/helpers/_get_full_model_name.sql rename to macros/helpers/get_full_model_name.sql index 86d662a..f4c129f 100644 --- a/macros/helpers/_get_full_model_name.sql +++ b/macros/helpers/get_full_model_name.sql @@ -1,4 +1,4 @@ -{%- macro _get_full_model_name(node_id) -%} +{%- macro get_full_model_name(node_id) -%} {%- set node_list = [] -%} {%- if node_id.split('.')[0] == 'model' -%} {%- set node_list = graph.nodes.values() | selectattr("resource_type", "equalto", "model") -%} diff --git a/macros/save_test_results.sql b/macros/save_test_results.sql index d8495ba..e414948 100644 --- a/macros/save_test_results.sql +++ b/macros/save_test_results.sql @@ -42,7 +42,7 @@ cluster by dbt_cloud_run_id '{{ result.node.config.severity }}' as test_severity, '{{ result.status }}' as test_result, '{% for node_id in result.node.depends_on.nodes -%} - {{ gmi_common_dbt_utils._get_full_model_name(node_id) }} + {{ gmi_common_dbt_utils.get_full_model_name(node_id) }} {%- if not loop.last -%},{%- endif -%} {%- endfor %}' as test_models, '{{ result.execution_time }}' as execution_time_seconds, From 689eaa29f6cb25681658f2136cc3a416f8dc0996 Mon Sep 17 00:00:00 2001 From: Danny Schwalen Date: Thu, 29 Sep 2022 10:36:46 -0500 Subject: [PATCH 8/9] update README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index beaa878..9ba75ab 100644 --- a/README.md +++ b/README.md @@ -94,5 +94,5 @@ To use this macro within a project, include the following in the `dbt_project.ym # SQL statements to be executed after the completion of a run, build, test, etc. # Full documentation: https://docs.getdbt.com/reference/project-configs/on-run-start-on-run-end on-run-end: - - '{{ save_test_results(results) }}' + - '{{ gmi_common_dbt_utils.save_test_results(results) }}' ``` \ No newline at end of file From 062cee6e4aa1df71adc6ed13c3ff9b6bde1bdbb8 Mon Sep 17 00:00:00 2001 From: Danny Schwalen Date: Thu, 13 Oct 2022 08:27:16 -0500 Subject: [PATCH 9/9] fix README links --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5a338a1..c489964 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ To update this repo, you will need write access to the General Mills public repo - [smart_source](#smart_source) [(source)](./macros/smart_source.sql) - [materialized_views](#materialized_views) [(source)](./macros/bigquery) - [save_test_results](#save_test_results) [(source)](./macros/save_test_results.sql) - - uses helper macro [generate_schema_name](#generate_schema_name) [(source)](./macros/helpers/generate_schema_name.sql) + - uses macros [generate_schema_name](#generate_schema_name) [(source)](./macros/generate_schema_name.sql) and `get_full_model_name` [source](./macros/helpers/generate_schema_name.sql) ### Usage