Merge pull request #6 from GeneralMills/feature/save-test-results

Feature/save test results
GeneralMills · Oct 13, 2022 · 06186cb · 06186cb
2 parents d74ccb4 + 062cee6
commit 06186cb
Show file tree

Hide file tree

Showing 3 changed files with 88 additions and 1 deletion.
diff --git a/README.md b/README.md
@@ -22,6 +22,8 @@ To update this repo, you will need write access to the General Mills public repo
 - [generate_schema_name](#generate_schema_name) [(source)](./macros/generate_schema_name.sql)
 - [smart_source](#smart_source) [(source)](./macros/smart_source.sql)
 - [materialized_views](#materialized_views) [(source)](./macros/bigquery)
+- [save_test_results](#save_test_results) [(source)](./macros/save_test_results.sql)
+   - uses macros [generate_schema_name](#generate_schema_name) [(source)](./macros/generate_schema_name.sql) and `get_full_model_name` [source](./macros/helpers/generate_schema_name.sql)
 
 
 ### Usage 
@@ -81,6 +83,21 @@ Materialized views are powerful but they can be costly, so please consult with t
         +materialized: materialized_view
         +schema: output
 
+
+#### save_test_results
+This macro saves dbt data quality test results to a table in the target project's processed dataset: `processed.dbt_test_results`. This is an append-only table that associates each data quality check/result to a particular dbt run. For development runs (zdev), a separate results table will be created in the corresponding zdev processed dataset.
+
+Runs that are associated with a dbt Cloud job will be associated with their corresponding `DBT_CLOUD_RUN_ID`, while runs that were kicked off from the CLI are associated with their `invocation_id` (since they are not given a cloud run id).
+
+To use this macro within a project, include the following in the `dbt_project.yml`:
+```yml
+# SQL statements to be executed after the completion of a run, build, test, etc.
+# Full documentation: https://docs.getdbt.com/reference/project-configs/on-run-start-on-run-end
+on-run-end:
+  - '{{ gmi_common_dbt_utils.save_test_results(results) }}'
+```
+
+
 #### Big Query Catalog Macro
 The `bq_catalog.sql` Macro overrides the default macro that gathers the metadata necessary for generating dbt docs.
 The default macro queries the `project.dataset.__TABLES__` metadata table,
@@ -93,4 +110,4 @@ In order to access the table_schema in the process of generating the documenatio
 you must specify the Big Query region where your data is housed. Add the following two lines to your
 project variables, substituting the Big Query region of your project.
 vars:
-bq_region: 'region-US'
+bq_region: 'region-US'
diff --git a/macros/helpers/get_full_model_name.sql b/macros/helpers/get_full_model_name.sql
@@ -0,0 +1,14 @@
+{%- macro get_full_model_name(node_id) -%}
+    {%- set node_list = [] -%}
+    {%- if node_id.split('.')[0] == 'model' -%}
+        {%- set node_list = graph.nodes.values() | selectattr("resource_type", "equalto", "model") -%}
+    {%- elif node_id.split('.')[0] == 'source' -%}
+        {% set node_list = graph.sources.values() -%}
+    {%- endif -%}
+
+    {%- for node in node_list -%}
+        {%- if node.unique_id == node_id -%}
+            `{{ node.database }}.{{ node.schema }}.{{ node.name }}`
+        {%- endif -%}
+    {%- endfor -%}
+{%- endmacro -%}
diff --git a/macros/save_test_results.sql b/macros/save_test_results.sql
@@ -0,0 +1,56 @@
+{% macro save_test_results(results) %}
+
+{%- set test_results = [] -%}
+
+{%- for result in results -%}
+    {%- if result.node.resource_type == 'test' -%}
+        {%- do test_results.append(result) -%}
+    {%- endif -%}
+{%- endfor -%}
+
+{%- set results_tbl -%}
+    `{{ target.database }}.{{ gmi_common_dbt_utils.generate_schema_name('processed') }}.dbt_test_results`
+{%- endset -%}
+
+{{ log('Centralizing test data in ' + results_tbl, info = true) if execute }}
+
+create table if not exists {{ results_tbl }} (
+    test_id string,
+    test_name string,
+    project_name string,
+    target_db string,
+    dbt_run_env string,
+    test_severity string,
+    test_result string,
+    test_models string,
+    execution_time_seconds string,
+    dbt_cloud_run_id string,
+    create_update_ts timestamp
+)
+cluster by dbt_cloud_run_id
+;
+
+{% if test_results|length > 0 %}
+    insert into {{ results_tbl }} (
+        {% for result in test_results %}
+            select
+                '{{ result.node.unique_id }}' as test_id,
+                '{{ result.node.name }}' as test_name,
+                '{{ project_name }}' as project_name,
+                '{{ target.database }}' as target_db,
+                '{{ env_var("DBT_RUN_ENV") }}' as dbt_run_env,
+                '{{ result.node.config.severity }}' as test_severity,
+                '{{ result.status }}' as test_result,
+                '{% for node_id in result.node.depends_on.nodes -%}
+                    {{ gmi_common_dbt_utils.get_full_model_name(node_id) }}
+                    {%- if not loop.last -%},{%- endif -%}
+                {%- endfor %}' as test_models,
+                '{{ result.execution_time }}' as execution_time_seconds,
+                '{{ env_var("DBT_CLOUD_RUN_ID", invocation_id) }}' as dbt_cloud_run_id,
+                current_timestamp() as create_update_ts
+            {{ 'union all' if not loop.last }}
+        {% endfor %}
+    );
+{% endif %}
+
+{% endmacro %}