diff --git a/docs/customization/querying-columns.md b/docs/customization/querying-columns.md new file mode 100644 index 00000000..31289d40 --- /dev/null +++ b/docs/customization/querying-columns.md @@ -0,0 +1,16 @@ +# Querying columns with SQL + +The model `stg_columns` ([source](https://github.com/dbt-labs/dbt-project-evaluator/tree/main/models/staging/graph/stg_columns.sql)), created with the package, lists all the columns from all the dbt nodes (models, sources, tests, snapshots) + +You can use this model to help with questions such as: + +- Are there columns with the same name in different nodes? +- Do any columns in the YAML configuration lack descriptions? +- Do any columns share the same name but have different descriptions? +- Are there columns with names that match a specific pattern (regex)? +- Have any prohibited names been used for columns? + + +## Defining additional tests that match your exact requirements + +You can create a custom test against `{{ ref(stg_columns) }}` to test for your specific check! When running the package you'd need to make sure to also include children of the package's models by using the `package:dbt_project_evalutator+` selector. diff --git a/integration_tests/models/dbt_project_evaluator_schema_tests/core.yml b/integration_tests/models/dbt_project_evaluator_schema_tests/core.yml index 9d852c06..6865799b 100644 --- a/integration_tests/models/dbt_project_evaluator_schema_tests/core.yml +++ b/integration_tests/models/dbt_project_evaluator_schema_tests/core.yml @@ -9,7 +9,7 @@ models: tests: - unique - not_null - + - name: int_all_graph_resources description: "This table shows one record for each enabled resource in the graph and information about that resource." columns: @@ -34,4 +34,4 @@ models: - name: unique_id tests: - unique - - not_null \ No newline at end of file + - not_null diff --git a/integration_tests/models/dbt_project_evaluator_schema_tests/graph.yml b/integration_tests/models/dbt_project_evaluator_schema_tests/graph.yml index 739d321c..afcab7f9 100644 --- a/integration_tests/models/dbt_project_evaluator_schema_tests/graph.yml +++ b/integration_tests/models/dbt_project_evaluator_schema_tests/graph.yml @@ -49,6 +49,14 @@ models: - unique - not_null + - name: stg_columns + description: "Staging model from the graph variable, one record per column resource." + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - node_unique_id + - name + - name: stg_sources description: "Staging model from the graph variable, one record per source resource." columns: diff --git a/macros/insert_resources_from_graph.sql b/macros/insert_resources_from_graph.sql index aeea9734..f20e9326 100644 --- a/macros/insert_resources_from_graph.sql +++ b/macros/insert_resources_from_graph.sql @@ -1,5 +1,5 @@ -{% macro insert_resources_from_graph(relation, resource_type='nodes', relationships=False, batch_size=var('insert_batch_size') | int) %} - {%- set values = get_resource_values(resource_type, relationships) -%} +{% macro insert_resources_from_graph(relation, resource_type='nodes', relationships=False, columns=False, batch_size=var('insert_batch_size') | int) %} + {%- set values = get_resource_values(resource_type, relationships, columns) -%} {%- set values_length = values | length -%} {%- set loop_count = (values_length / batch_size) | round(0, 'ceil') | int -%} diff --git a/macros/unpack/get_column_values.sql b/macros/unpack/get_column_values.sql new file mode 100644 index 00000000..d1607b57 --- /dev/null +++ b/macros/unpack/get_column_values.sql @@ -0,0 +1,39 @@ +{%- macro get_column_values(node_type) -%} + {{ return(adapter.dispatch('get_column_values', 'dbt_project_evaluator')(node_type)) }} +{%- endmacro -%} + +{%- macro default__get_column_values(node_type) -%} + + {%- if execute -%} + {%- if node_type == 'nodes' %} + {% set nodes_list = graph.nodes.values() %} + {%- elif node_type == 'sources' -%} + {% set nodes_list = graph.sources.values() %} + {%- else -%} + {{ exceptions.raise_compiler_error("node_type needs to be either nodes or sources, got " ~ node_type) }} + {% endif -%} + + {%- set values = [] -%} + + {%- for node in nodes_list -%} + {%- for column in node.columns.values() -%} + + {%- set values_line = + [ + wrap_string_with_quotes(node.unique_id), + wrap_string_with_quotes(dbt.escape_single_quotes(column.name)), + wrap_string_with_quotes(dbt.escape_single_quotes(column.description)), + wrap_string_with_quotes(dbt.escape_single_quotes(column.data_type)), + wrap_string_with_quotes(dbt.escape_single_quotes(column.quote)) + ] + %} + + {%- do values.append(values_line) -%} + + {%- endfor -%} + {%- endfor -%} + {{ return(values) }} + + {%- endif -%} + +{%- endmacro -%} diff --git a/macros/unpack/get_resource_values.sql b/macros/unpack/get_resource_values.sql index 47c301f8..4fe76d59 100644 --- a/macros/unpack/get_resource_values.sql +++ b/macros/unpack/get_resource_values.sql @@ -1,6 +1,8 @@ -{% macro get_resource_values(resource=None, relationships=None) %} +{% macro get_resource_values(resource=None, relationships=None, columns=None) %} {% if relationships %} {{ return(adapter.dispatch('get_relationship_values', 'dbt_project_evaluator')(node_type=resource)) }} + {% elif columns %} + {{ return(adapter.dispatch('get_column_values', 'dbt_project_evaluator')(node_type=resource)) }} {% elif resource == 'exposures' %} {{ return(adapter.dispatch('get_exposure_values', 'dbt_project_evaluator')()) }} {% elif resource == 'sources' %} diff --git a/mkdocs.yml b/mkdocs.yml index ed205fa5..4679af57 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -81,6 +81,7 @@ nav: - Configuring exceptions to the rules: customization/exceptions.md - Excluding packages and models/sources based on path: customization/excluding-packages-and-paths.md - Display issues in the logs: customization/issues-in-log.md + - Querying columns: customization/querying-columns.md - Run in CI Check: ci-check.md - Querying the DAG: querying-the-dag.md - Contributing: contributing.md \ No newline at end of file diff --git a/models/staging/graph/base/base_node_columns.sql b/models/staging/graph/base/base_node_columns.sql new file mode 100644 index 00000000..c6297cf9 --- /dev/null +++ b/models/staging/graph/base/base_node_columns.sql @@ -0,0 +1,24 @@ +{{ + config( + materialized='table', + post_hook="{{ insert_resources_from_graph(this, resource_type='nodes', columns=True) }}" + ) +}} + +{% if execute %} + {{ check_model_is_table(model) }} +{% endif %} +/* Bigquery won't let us `where` without `from` so we use this workaround */ +with dummy_cte as ( + select 1 as foo +) + +select + cast(null as {{ dbt.type_string() }}) as node_unique_id, + cast(null as {{ dbt.type_string()}}) as name, + cast(null as {{ dbt.type_string()}}) as description, + cast(null as {{ dbt.type_string()}}) as data_type, + cast(null as {{ dbt.type_string()}}) as quote + +from dummy_cte +where false \ No newline at end of file diff --git a/models/staging/graph/base/base_source_columns.sql b/models/staging/graph/base/base_source_columns.sql new file mode 100644 index 00000000..4592ecf5 --- /dev/null +++ b/models/staging/graph/base/base_source_columns.sql @@ -0,0 +1,24 @@ +{{ + config( + materialized='table', + post_hook="{{ insert_resources_from_graph(this, resource_type='sources', columns=True) }}" + ) +}} + +{% if execute %} + {{ check_model_is_table(model) }} +{% endif %} +/* Bigquery won't let us `where` without `from` so we use this workaround */ +with dummy_cte as ( + select 1 as foo +) + +select + cast(null as {{ dbt.type_string() }}) as node_unique_id, + cast(null as {{ dbt.type_string()}}) as name, + cast(null as {{ dbt.type_string()}}) as description, + cast(null as {{ dbt.type_string()}}) as data_type, + cast(null as {{ dbt.type_string()}}) as quote + +from dummy_cte +where false \ No newline at end of file diff --git a/models/staging/graph/stg_columns.sql b/models/staging/graph/stg_columns.sql new file mode 100644 index 00000000..8fd8df3a --- /dev/null +++ b/models/staging/graph/stg_columns.sql @@ -0,0 +1,11 @@ +with + +final as ( + + {{ dbt_utils.union_relations([ + ref('base_node_columns'), + ref('base_source_columns') + ])}} +) + +select * from final