From 9b88eb67a110f80476c3a94c3a06095cac9bc28f Mon Sep 17 00:00:00 2001 From: Ben Edwards Date: Fri, 14 Jun 2019 13:49:22 +1000 Subject: [PATCH 01/37] Change postgres `get_catalog` to not use `information_schema` - `information_schema` in Postgres is not very performant due to the complex views used to create it - use underlying `pg_catalog` tables/views instead - returns the same rows/columns as the `information_schema` version - order of rows is different, this is because there was only a partial sort on the `information_schema` version - `column_type` will return different values to before - some arrays were `ARRAY`, will now be `type[]` - user-defined types were previously `USER_DEFINED`, now will be the name of the user-defined type <-- main point of this PR - performance is 2-5x faster, depending on query caching --- .../dbt/include/postgres/macros/catalog.sql | 88 ++++++------------- 1 file changed, 29 insertions(+), 59 deletions(-) diff --git a/plugins/postgres/dbt/include/postgres/macros/catalog.sql b/plugins/postgres/dbt/include/postgres/macros/catalog.sql index 3558f3ff649..55a02fa6eff 100644 --- a/plugins/postgres/dbt/include/postgres/macros/catalog.sql +++ b/plugins/postgres/dbt/include/postgres/macros/catalog.sql @@ -8,65 +8,35 @@ {% set database = information_schemas[0].database %} {{ adapter.verify_database(database) }} - with table_owners as ( - - select - '{{ database }}' as table_database, - schemaname as table_schema, - tablename as table_name, - tableowner as table_owner - - from pg_tables - - union all - - select - '{{ database }}' as table_database, - schemaname as table_schema, - viewname as table_name, - viewowner as table_owner - - from pg_views - - ), - - tables as ( - - select - table_catalog as table_database, - table_schema, - table_name, - table_type - - from information_schema.tables - - ), - - columns as ( - - select - table_catalog as table_database, - table_schema, - table_name, - null as table_comment, - column_name, - ordinal_position as column_index, - data_type as column_type, - null as column_comment - - from information_schema.columns - - ) - - select * - from tables - join columns using (table_database, table_schema, table_name) - join table_owners using (table_database, table_schema, table_name) - - where table_schema != 'information_schema' - and table_schema not like 'pg_%' - - order by column_index + select + '{{ database }}' as table_database, + sch.nspname as table_schema, + tbl.relname as table_name, + case tbl.relkind + when 'r' then 'BASE TABLE' + else 'VIEW' + end as table_kind, + null::text as table_comment, + col.attname as column_name, + col.attnum as column_index, + pg_catalog.format_type(col.atttypid, col.atttypmod) as column_type, + null::text as column_comment, + pg_get_userbyid(tbl.relowner) as table_owner + + from pg_catalog.pg_namespace sch + join pg_catalog.pg_class tbl on tbl.relnamespace = sch.oid + join pg_catalog.pg_attribute col on col.attrelid = tbl.oid + + where sch.nspname != 'information_schema' + and sch.nspname not like 'pg_%' -- avoid postgres system schemas + and tbl.relpersistence = 'p' -- [p]ermanent table. Other values are [u]nlogged table, [t]emporary table + and tbl.relkind in ('r', 'v', 'm') -- o[r]dinary table, [v]iew, [m]aterialized view. Other values are [i]ndex, [S]equence, [c]omposite type, [t]OAST table, [f]oreign table + and col.attnum >= 1 -- negative numbers are used for system columns such as oid + + order by + sch.nspname, + tbl.relname, + col.attnum {%- endcall -%} From 03bc58116cf1f6a0e34256cf33c71667cea71574 Mon Sep 17 00:00:00 2001 From: Ben Edwards Date: Fri, 14 Jun 2019 17:21:08 +1000 Subject: [PATCH 02/37] Fix incorrectly named column `table_type` --- plugins/postgres/dbt/include/postgres/macros/catalog.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/postgres/dbt/include/postgres/macros/catalog.sql b/plugins/postgres/dbt/include/postgres/macros/catalog.sql index 55a02fa6eff..0a393b15b6f 100644 --- a/plugins/postgres/dbt/include/postgres/macros/catalog.sql +++ b/plugins/postgres/dbt/include/postgres/macros/catalog.sql @@ -15,7 +15,7 @@ case tbl.relkind when 'r' then 'BASE TABLE' else 'VIEW' - end as table_kind, + end as table_type, null::text as table_comment, col.attname as column_name, col.attnum as column_index, From 38c2d82c881f4062ca183e3e22dc86dff51b699d Mon Sep 17 00:00:00 2001 From: Ben Edwards Date: Tue, 2 Jul 2019 09:09:33 +1000 Subject: [PATCH 03/37] Cleaned up filtering to be in line with information_schema logic --- plugins/postgres/dbt/include/postgres/macros/catalog.sql | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/plugins/postgres/dbt/include/postgres/macros/catalog.sql b/plugins/postgres/dbt/include/postgres/macros/catalog.sql index 0a393b15b6f..a002914344e 100644 --- a/plugins/postgres/dbt/include/postgres/macros/catalog.sql +++ b/plugins/postgres/dbt/include/postgres/macros/catalog.sql @@ -29,9 +29,11 @@ where sch.nspname != 'information_schema' and sch.nspname not like 'pg_%' -- avoid postgres system schemas + and not pg_is_other_temp_schema(sch.oid) -- not a temporary schema belonging to another session and tbl.relpersistence = 'p' -- [p]ermanent table. Other values are [u]nlogged table, [t]emporary table - and tbl.relkind in ('r', 'v', 'm') -- o[r]dinary table, [v]iew, [m]aterialized view. Other values are [i]ndex, [S]equence, [c]omposite type, [t]OAST table, [f]oreign table - and col.attnum >= 1 -- negative numbers are used for system columns such as oid + and tbl.relkind in ('r', 'v', 'f', 'p') -- o[r]dinary table, [v]iew, [f]oreign table, [p]artitioned table. Other values are [i]ndex, [S]equence, [c]omposite type, [t]OAST table, [m]aterialized view + and col.attnum > 0 -- negative numbers are used for system columns such as oid + and not col.attisdropped -- column as not been dropped order by sch.nspname, From 2645667257619bc0046f3222e1d227760d4441a6 Mon Sep 17 00:00:00 2001 From: Ben Edwards Date: Wed, 3 Jul 2019 07:32:54 +1000 Subject: [PATCH 04/37] Fix table type v = view r, f, p = all are different forms of table --- plugins/postgres/dbt/include/postgres/macros/catalog.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/postgres/dbt/include/postgres/macros/catalog.sql b/plugins/postgres/dbt/include/postgres/macros/catalog.sql index a002914344e..78f5d09e9af 100644 --- a/plugins/postgres/dbt/include/postgres/macros/catalog.sql +++ b/plugins/postgres/dbt/include/postgres/macros/catalog.sql @@ -13,8 +13,8 @@ sch.nspname as table_schema, tbl.relname as table_name, case tbl.relkind - when 'r' then 'BASE TABLE' - else 'VIEW' + when 'v' then 'VIEW' + else 'BASE TABLE' end as table_type, null::text as table_comment, col.attname as column_name, From fa6fb1b53d9036b4294db59b449aae4fad1881fa Mon Sep 17 00:00:00 2001 From: aminamos <26092352+aminamos@users.noreply.github.com> Date: Mon, 15 Jul 2019 19:14:22 -0700 Subject: [PATCH 05/37] updated dbt.exceptions reference to exceptions in .sql files --- core/dbt/include/global_project/macros/adapters/common.sql | 6 +++--- core/dbt/include/global_project/macros/etc/datetime.sql | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/core/dbt/include/global_project/macros/adapters/common.sql b/core/dbt/include/global_project/macros/adapters/common.sql index c02239d6f49..1a6c3ecd59b 100644 --- a/core/dbt/include/global_project/macros/adapters/common.sql +++ b/core/dbt/include/global_project/macros/adapters/common.sql @@ -115,7 +115,7 @@ {% endmacro %} {% macro default__get_columns_in_relation(relation) -%} - {{ dbt.exceptions.raise_not_implemented( + {{ exceptions.raise_not_implemented( 'get_columns_in_relation macro not implemented for adapter '+adapter.type()) }} {% endmacro %} @@ -224,7 +224,7 @@ {% macro default__list_relations_without_caching(information_schema, schema) %} - {{ dbt.exceptions.raise_not_implemented( + {{ exceptions.raise_not_implemented( 'list_relations_without_caching macro not implemented for adapter '+adapter.type()) }} {% endmacro %} @@ -235,7 +235,7 @@ {% macro default__current_timestamp() -%} - {{ dbt.exceptions.raise_not_implemented( + {{ exceptions.raise_not_implemented( 'current_timestamp macro not implemented for adapter '+adapter.type()) }} {%- endmacro %} diff --git a/core/dbt/include/global_project/macros/etc/datetime.sql b/core/dbt/include/global_project/macros/etc/datetime.sql index f94e8251b2f..43fd731d1da 100644 --- a/core/dbt/include/global_project/macros/etc/datetime.sql +++ b/core/dbt/include/global_project/macros/etc/datetime.sql @@ -48,7 +48,7 @@ {% set start_date = partition_range[0] %} {% set end_date = partition_range[1] %} {% else %} - {{ dbt.exceptions.raise_compiler_error("Invalid partition time. Expected format: {Start Date}[,{End Date}]. Got: " ~ raw_partition_date) }} + {{ exceptions.raise_compiler_error("Invalid partition time. Expected format: {Start Date}[,{End Date}]. Got: " ~ raw_partition_date) }} {% endif %} {{ return(dates_in_range(start_date, end_date, in_fmt=date_fmt)) }} From de56e88a00e14fffc4051ef347b2cfbcd7d4f3ed Mon Sep 17 00:00:00 2001 From: aminamos <26092352+aminamos@users.noreply.github.com> Date: Tue, 16 Jul 2019 14:18:52 -0700 Subject: [PATCH 06/37] updated raise_not_implemented, commented on profile.py --- core/dbt/config/profile.py | 2 +- core/dbt/exceptions.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/core/dbt/config/profile.py b/core/dbt/config/profile.py index f5d7c05d6e6..46ac5bdc4e8 100644 --- a/core/dbt/config/profile.py +++ b/core/dbt/config/profile.py @@ -148,7 +148,7 @@ def __eq__(self, other): if not (isinstance(other, self.__class__) and isinstance(self, other.__class__)): return False - return False + return False # 'unreachable code', remove line? return self.to_profile_info() == other.to_profile_info() def validate(self): diff --git a/core/dbt/exceptions.py b/core/dbt/exceptions.py index f742b59f5f5..6162035efbd 100644 --- a/core/dbt/exceptions.py +++ b/core/dbt/exceptions.py @@ -236,7 +236,8 @@ class VersionsNotCompatibleException(SemverException): class NotImplementedException(Exception): - pass + CODE = 10010 + MESSAGE = "Error: this is not implemented" class FailedToConnectException(DatabaseException): @@ -687,6 +688,7 @@ def warn_or_raise(exc, log_fmt=None): raise_duplicate_patch_name, raise_duplicate_resource_name, raise_invalid_schema_yml_version, + raise_not_implemented, relation_wrong_type, ] } From 8046992e08172d68ac157fe73da0407f6f926627 Mon Sep 17 00:00:00 2001 From: aminamos <26092352+aminamos@users.noreply.github.com> Date: Tue, 16 Jul 2019 14:28:31 -0700 Subject: [PATCH 07/37] added string interpolation to raise_not_implemented --- core/dbt/exceptions.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/core/dbt/exceptions.py b/core/dbt/exceptions.py index 6162035efbd..294fc784378 100644 --- a/core/dbt/exceptions.py +++ b/core/dbt/exceptions.py @@ -237,7 +237,7 @@ class VersionsNotCompatibleException(SemverException): class NotImplementedException(Exception): CODE = 10010 - MESSAGE = "Error: this is not implemented" + MESSAGE = "ERROR: this is not implemented" class FailedToConnectException(DatabaseException): @@ -647,7 +647,9 @@ def raise_unrecognized_credentials_type(typename, supported_types): def raise_not_implemented(msg): - raise NotImplementedException(msg) + raise NotImplementedException( + "ERROR: {}" + .format(msg)) def warn_or_error(msg, node=None, log_fmt=None): From a55a27acf62841b830c77397d17f288ac4d31b4b Mon Sep 17 00:00:00 2001 From: aminamos <26092352+aminamos@users.noreply.github.com> Date: Tue, 16 Jul 2019 16:13:58 -0700 Subject: [PATCH 08/37] removed extra line --- core/dbt/config/profile.py | 1 - 1 file changed, 1 deletion(-) diff --git a/core/dbt/config/profile.py b/core/dbt/config/profile.py index 46ac5bdc4e8..bfeb3a112a3 100644 --- a/core/dbt/config/profile.py +++ b/core/dbt/config/profile.py @@ -148,7 +148,6 @@ def __eq__(self, other): if not (isinstance(other, self.__class__) and isinstance(self, other.__class__)): return False - return False # 'unreachable code', remove line? return self.to_profile_info() == other.to_profile_info() def validate(self): From 5e6e7469512b47af0e61eb317646acb51c465d42 Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Tue, 16 Jul 2019 23:24:19 -0400 Subject: [PATCH 09/37] possible fix for re-used check cols on BQ --- .../materializations/snapshot/snapshot.sql | 38 +++++++++---------- .../materializations/snapshot/strategies.sql | 14 +++++-- .../dbt/adapters/bigquery/connections.py | 19 ++++++++-- 3 files changed, 46 insertions(+), 25 deletions(-) diff --git a/core/dbt/include/global_project/macros/materializations/snapshot/snapshot.sql b/core/dbt/include/global_project/macros/materializations/snapshot/snapshot.sql index 82ea029b323..a52e302d814 100644 --- a/core/dbt/include/global_project/macros/materializations/snapshot/snapshot.sql +++ b/core/dbt/include/global_project/macros/materializations/snapshot/snapshot.sql @@ -31,6 +31,15 @@ ), + snapshotted_data as ( + + select *, + {{ strategy.unique_key }} as dbt_unique_key + + from {{ target_relation }} + + ), + source_data as ( select *, @@ -43,15 +52,6 @@ from snapshot_query ), - snapshotted_data as ( - - select *, - {{ strategy.unique_key }} as dbt_unique_key - - from {{ target_relation }} - - ), - insertions as ( select @@ -84,6 +84,15 @@ ), + snapshotted_data as ( + + select *, + {{ strategy.unique_key }} as dbt_unique_key + + from {{ target_relation }} + + ), + source_data as ( select @@ -96,15 +105,6 @@ from snapshot_query ), - snapshotted_data as ( - - select *, - {{ strategy.unique_key }} as dbt_unique_key - - from {{ target_relation }} - - ), - updates as ( select @@ -202,7 +202,7 @@ {%- endif -%} {% set strategy_macro = strategy_dispatch(strategy_name) %} - {% set strategy = strategy_macro(model, "snapshotted_data", "source_data", config) %} + {% set strategy = strategy_macro(model, "snapshotted_data", "source_data", config, target_relation_exists) %} {% if not target_relation_exists %} diff --git a/core/dbt/include/global_project/macros/materializations/snapshot/strategies.sql b/core/dbt/include/global_project/macros/materializations/snapshot/strategies.sql index 452cfbbd66c..f23fc91f9e3 100644 --- a/core/dbt/include/global_project/macros/materializations/snapshot/strategies.sql +++ b/core/dbt/include/global_project/macros/materializations/snapshot/strategies.sql @@ -62,7 +62,7 @@ {# Core strategy definitions #} -{% macro snapshot_timestamp_strategy(node, snapshotted_rel, current_rel, config) %} +{% macro snapshot_timestamp_strategy(node, snapshotted_rel, current_rel, config, target_exists) %} {% set primary_key = config['unique_key'] %} {% set updated_at = config['updated_at'] %} @@ -81,7 +81,7 @@ {% endmacro %} -{% macro snapshot_check_strategy(node, snapshotted_rel, current_rel, config) %} +{% macro snapshot_check_strategy(node, snapshotted_rel, current_rel, config, target_exists) %} {% set check_cols_config = config['check_cols'] %} {% set primary_key = config['unique_key'] %} {% set updated_at = snapshot_get_time() %} @@ -106,7 +106,15 @@ ) {%- endset %} - {% set scd_id_cols = [primary_key] + (check_cols | list) %} + {% if target_exists %} + {% set tbl_version -%} + cast((select count(*) from {{ snapshotted_rel }} where {{ snapshotted_rel }}.dbt_unique_key = {{ primary_key }}) as string) + {%- endset %} + {% set scd_id_cols = [primary_key, tbl_version] + (check_cols | list) %} + {% else %} + {% set scd_id_cols = [primary_key] + (check_cols | list) %} + {% endif %} + {% set scd_id_expr = snapshot_hash_arguments(scd_id_cols) %} {% do return({ diff --git a/plugins/bigquery/dbt/adapters/bigquery/connections.py b/plugins/bigquery/dbt/adapters/bigquery/connections.py index 24731c1a747..678b272dc9c 100644 --- a/plugins/bigquery/dbt/adapters/bigquery/connections.py +++ b/plugins/bigquery/dbt/adapters/bigquery/connections.py @@ -202,15 +202,28 @@ def raw_execute(self, sql, fetch=False): def execute(self, sql, auto_begin=False, fetch=None): # auto_begin is ignored on bigquery, and only included for consistency - _, iterator = self.raw_execute(sql, fetch=fetch) + query_job, iterator = self.raw_execute(sql, fetch=fetch) if fetch: res = self.get_table_from_response(iterator) else: res = dbt.clients.agate_helper.empty_table() - # If we get here, the query succeeded - status = 'OK' + if query_job.statement_type == 'CREATE_VIEW': + status = 'CREATE VIEW' + + elif query_job.statement_type == 'CREATE_TABLE_AS_SELECT': + conn = self.get_thread_connection() + client = conn.handle + table = client.get_table(query_job.destination) + status = 'CREATE TABLE ({})'.format(table.num_rows) + + elif query_job.statement_type in ['INSERT', 'DELETE', 'MERGE']: + status = '{} ({})'.format(query_job.statement_type, query_job.num_dml_affected_rows) + + else: + status = 'OK' + return status, res def create_bigquery_table(self, database, schema, table_name, callback, From 4df0bbd8147f055d623cf48ceef5d82bae10c6d7 Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Tue, 16 Jul 2019 23:43:03 -0400 Subject: [PATCH 10/37] touchup var name and sql formatting --- .../materializations/snapshot/strategies.sql | 20 +++++++++++-------- .../macros/materializations/snapshot.sql | 9 ++++++--- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/core/dbt/include/global_project/macros/materializations/snapshot/strategies.sql b/core/dbt/include/global_project/macros/materializations/snapshot/strategies.sql index f23fc91f9e3..0fa1abbe68e 100644 --- a/core/dbt/include/global_project/macros/materializations/snapshot/strategies.sql +++ b/core/dbt/include/global_project/macros/materializations/snapshot/strategies.sql @@ -35,16 +35,17 @@ {# Create SCD Hash SQL fields cross-db #} -{% macro snapshot_hash_arguments(args) %} +{% macro snapshot_hash_arguments(args) -%} {{ adapter_macro('snapshot_hash_arguments', args) }} -{% endmacro %} +{%- endmacro %} -{% macro default__snapshot_hash_arguments(args) %} +{% macro default__snapshot_hash_arguments(args) -%} md5({% for arg in args %} - coalesce(cast({{ arg }} as varchar ), '') {% if not loop.last %} || '|' || {% endif %} + coalesce(cast({{ arg }} as varchar ), '') + {% if not loop.last %} || '|' || {% endif %} {% endfor %}) -{% endmacro %} +{%- endmacro %} {# @@ -107,10 +108,13 @@ {%- endset %} {% if target_exists %} - {% set tbl_version -%} - cast((select count(*) from {{ snapshotted_rel }} where {{ snapshotted_rel }}.dbt_unique_key = {{ primary_key }}) as string) + {% set row_version -%} + ( + select count(*) from {{ snapshotted_rel }} + where {{ snapshotted_rel }}.dbt_unique_key = {{ primary_key }} + ) {%- endset %} - {% set scd_id_cols = [primary_key, tbl_version] + (check_cols | list) %} + {% set scd_id_cols = [primary_key, row_version] + (check_cols | list) %} {% else %} {% set scd_id_cols = [primary_key] + (check_cols | list) %} {% endif %} diff --git a/plugins/bigquery/dbt/include/bigquery/macros/materializations/snapshot.sql b/plugins/bigquery/dbt/include/bigquery/macros/materializations/snapshot.sql index 4cd5a04467c..2cfbdb2956e 100644 --- a/plugins/bigquery/dbt/include/bigquery/macros/materializations/snapshot.sql +++ b/plugins/bigquery/dbt/include/bigquery/macros/materializations/snapshot.sql @@ -1,6 +1,9 @@ -{% macro bigquery__snapshot_hash_arguments(args) %} - to_hex(md5(concat({% for arg in args %}coalesce(cast({{ arg }} as string), ''){% if not loop.last %}, '|',{% endif %}{% endfor %}))) -{% endmacro %} +{% macro bigquery__snapshot_hash_arguments(args) -%} + to_hex(md5(concat({% for arg in args %} + coalesce(cast({{ arg }} as string), ''){% if not loop.last %}, '|',{% endif -%} + {% endfor %} + ))) +{%- endmacro %} {% macro bigquery__create_columns(relation, columns) %} {{ adapter.alter_table_add_columns(relation, columns) }} From e86c11e5de943f0fc1e247a25ef898cb7fa49524 Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Sat, 20 Jul 2019 15:59:36 -0400 Subject: [PATCH 11/37] set application name in snowflake connections --- .../dbt/adapters/snowflake/connections.py | 1 + test/unit/test_snowflake_adapter.py | 15 +++++++++------ 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/plugins/snowflake/dbt/adapters/snowflake/connections.py b/plugins/snowflake/dbt/adapters/snowflake/connections.py index a2116e9c734..f78690797a7 100644 --- a/plugins/snowflake/dbt/adapters/snowflake/connections.py +++ b/plugins/snowflake/dbt/adapters/snowflake/connections.py @@ -131,6 +131,7 @@ def open(cls, connection): autocommit=False, client_session_keep_alive=credentials.get( 'client_session_keep_alive', False), + application='dbt', **auth_args ) diff --git a/test/unit/test_snowflake_adapter.py b/test/unit/test_snowflake_adapter.py index caba79ea2e2..4453ab8c55f 100644 --- a/test/unit/test_snowflake_adapter.py +++ b/test/unit/test_snowflake_adapter.py @@ -164,7 +164,7 @@ def test_client_session_keep_alive_false_by_default(self): account='test_account', autocommit=False, client_session_keep_alive=False, database='test_database', role=None, schema='public', user='test_user', - warehouse='test_warehouse', private_key=None) + warehouse='test_warehouse', private_key=None, application='dbt') ]) def test_client_session_keep_alive_true(self): @@ -178,7 +178,7 @@ def test_client_session_keep_alive_true(self): account='test_account', autocommit=False, client_session_keep_alive=True, database='test_database', role=None, schema='public', user='test_user', - warehouse='test_warehouse', private_key=None) + warehouse='test_warehouse', private_key=None, application='dbt') ]) def test_user_pass_authentication(self): @@ -192,7 +192,8 @@ def test_user_pass_authentication(self): account='test_account', autocommit=False, client_session_keep_alive=False, database='test_database', password='test_password', role=None, schema='public', - user='test_user', warehouse='test_warehouse', private_key=None) + user='test_user', warehouse='test_warehouse', private_key=None, + application='dbt') ]) def test_authenticator_user_pass_authentication(self): @@ -207,7 +208,8 @@ def test_authenticator_user_pass_authentication(self): client_session_keep_alive=False, database='test_database', password='test_password', role=None, schema='public', user='test_user', warehouse='test_warehouse', - authenticator='test_sso_url', private_key=None) + authenticator='test_sso_url', private_key=None, + application='dbt') ]) def test_authenticator_externalbrowser_authentication(self): @@ -222,7 +224,7 @@ def test_authenticator_externalbrowser_authentication(self): client_session_keep_alive=False, database='test_database', role=None, schema='public', user='test_user', warehouse='test_warehouse', authenticator='externalbrowser', - private_key=None) + private_key=None, application='dbt') ]) @patch('dbt.adapters.snowflake.SnowflakeConnectionManager._get_private_key', return_value='test_key') @@ -239,5 +241,6 @@ def test_authenticator_private_key_authentication(self, mock_get_private_key): account='test_account', autocommit=False, client_session_keep_alive=False, database='test_database', role=None, schema='public', user='test_user', - warehouse='test_warehouse', private_key='test_key') + warehouse='test_warehouse', private_key='test_key', + application='dbt') ]) From a2e801c2de127e514d67e4518343732742cee140 Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Sun, 21 Jul 2019 13:40:43 -0400 Subject: [PATCH 12/37] pep8 --- plugins/bigquery/dbt/adapters/bigquery/connections.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/plugins/bigquery/dbt/adapters/bigquery/connections.py b/plugins/bigquery/dbt/adapters/bigquery/connections.py index 678b272dc9c..5743b0aa52b 100644 --- a/plugins/bigquery/dbt/adapters/bigquery/connections.py +++ b/plugins/bigquery/dbt/adapters/bigquery/connections.py @@ -219,7 +219,10 @@ def execute(self, sql, auto_begin=False, fetch=None): status = 'CREATE TABLE ({})'.format(table.num_rows) elif query_job.statement_type in ['INSERT', 'DELETE', 'MERGE']: - status = '{} ({})'.format(query_job.statement_type, query_job.num_dml_affected_rows) + status = '{} ({})'.format( + query_job.statement_type, + query_job.num_dml_affected_rows + ) else: status = 'OK' From 35d1a7a1b587a83baf5e13bb1c85014b82a366fc Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Sun, 21 Jul 2019 15:26:40 -0400 Subject: [PATCH 13/37] add tests --- .../check_snapshots_test_current.sql | 51 +++++++++++++++++ .../check-snapshots/check_cols_cycle.sql | 33 +++++++++++ .../test_snapshot_check_cols.py | 55 +++++++++++++++++++ 3 files changed, 139 insertions(+) create mode 100644 test/integration/004_simple_snapshot_test/check-snapshots-expected/check_snapshots_test_current.sql create mode 100644 test/integration/004_simple_snapshot_test/check-snapshots/check_cols_cycle.sql create mode 100644 test/integration/004_simple_snapshot_test/test_snapshot_check_cols.py diff --git a/test/integration/004_simple_snapshot_test/check-snapshots-expected/check_snapshots_test_current.sql b/test/integration/004_simple_snapshot_test/check-snapshots-expected/check_snapshots_test_current.sql new file mode 100644 index 00000000000..414afb4727c --- /dev/null +++ b/test/integration/004_simple_snapshot_test/check-snapshots-expected/check_snapshots_test_current.sql @@ -0,0 +1,51 @@ + + +with query as ( + + -- check that the current value for id=1 is red + select case when ( + select count(*) + from {{ ref('check_cols_cycle') }} + where id = 1 and color = 'red' and dbt_valid_to is null + ) = 1 then 0 else 1 end as failures + + union all + + -- check that the previous 'red' value for id=1 is invalidated + select case when ( + select count(*) + from {{ ref('check_cols_cycle') }} + where id = 1 and color = 'red' and dbt_valid_to is not null + ) = 1 then 0 else 1 end as failures + + union all + + -- check that there's only one current record for id=2 + select case when ( + select count(*) + from {{ ref('check_cols_cycle') }} + where id = 2 and color = 'pink' and dbt_valid_to is null + ) = 1 then 0 else 1 end as failures + + union all + + -- check that the previous value for id=2 is represented + select case when ( + select count(*) + from {{ ref('check_cols_cycle') }} + where id = 2 and color = 'green' and dbt_valid_to is not null + ) = 1 then 0 else 1 end as failures + + union all + + -- check that there are 5 records total in the table + select case when ( + select count(*) + from {{ ref('check_cols_cycle') }} + ) = 5 then 0 else 1 end as failures + +) + +select * +from query +where failures = 1 diff --git a/test/integration/004_simple_snapshot_test/check-snapshots/check_cols_cycle.sql b/test/integration/004_simple_snapshot_test/check-snapshots/check_cols_cycle.sql new file mode 100644 index 00000000000..8b36f35a1bc --- /dev/null +++ b/test/integration/004_simple_snapshot_test/check-snapshots/check_cols_cycle.sql @@ -0,0 +1,33 @@ + +{% snapshot check_cols_cycle %} + + {{ + config( + target_database=database, + target_schema=schema, + unique_key='id', + strategy='check', + check_cols=['color'] + ) + }} + + {% if var('version') == 1 %} + + select 1 as id, 'red' as color union all + select 2 as id, 'green' as color + + {% elif var('version') == 2 %} + + select 1 as id, 'blue' as color union all + select 2 as id, 'green' as color + + {% elif var('version') == 3 %} + + select 1 as id, 'red' as color union all + select 2 as id, 'pink' as color + + {% else %} + {% do exceptions.raise_compiler_error("Got bad version: " ~ var('version')) %} + {% endif %} + +{% endsnapshot %} diff --git a/test/integration/004_simple_snapshot_test/test_snapshot_check_cols.py b/test/integration/004_simple_snapshot_test/test_snapshot_check_cols.py new file mode 100644 index 00000000000..0416ed97eff --- /dev/null +++ b/test/integration/004_simple_snapshot_test/test_snapshot_check_cols.py @@ -0,0 +1,55 @@ +from test.integration.base import DBTIntegrationTest, use_profile +import dbt.exceptions + + +class TestSimpleSnapshotFiles(DBTIntegrationTest): + NUM_SNAPSHOT_MODELS = 1 + + @property + def schema(self): + return "simple_snapshot_004" + + @property + def models(self): + return "models" + + @property + def project_config(self): + return { + "snapshot-paths": ['check-snapshots'], + "test-paths": ['check-snapshots-expected'], + "source-paths": [], + } + + def test_snapshot_check_cols_cycle(self): + results = self.run_dbt(["snapshot", '--vars', 'version: 1']) + self.assertEqual(len(results), 1) + + results = self.run_dbt(["snapshot", '--vars', 'version: 2']) + self.assertEqual(len(results), 1) + + results = self.run_dbt(["snapshot", '--vars', 'version: 3']) + self.assertEqual(len(results), 1) + + def assert_expected(self): + self.run_dbt(['test', '--data', '--vars', 'version: 3']) + + @use_profile('snowflake') + def test__snowflake__simple_snapshot(self): + self.test_snapshot_check_cols_cycle() + self.assert_expected() + + @use_profile('postgres') + def test__postgres__simple_snapshot(self): + self.test_snapshot_check_cols_cycle() + self.assert_expected() + + @use_profile('bigquery') + def test__bigquery__simple_snapshot(self): + self.test_snapshot_check_cols_cycle() + self.assert_expected() + + @use_profile('redshift') + def test__redshift__simple_snapshot(self): + self.test_snapshot_check_cols_cycle() + self.assert_expected() From b6e7351431c2666e63d2acb7f79ac387ab7df658 Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Mon, 22 Jul 2019 11:14:03 -0400 Subject: [PATCH 14/37] snapshot surrogate key whitespace control --- .../macros/materializations/snapshot/strategies.sql | 4 ++-- .../dbt/include/bigquery/macros/materializations/snapshot.sql | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/core/dbt/include/global_project/macros/materializations/snapshot/strategies.sql b/core/dbt/include/global_project/macros/materializations/snapshot/strategies.sql index 0fa1abbe68e..6de9151ddff 100644 --- a/core/dbt/include/global_project/macros/materializations/snapshot/strategies.sql +++ b/core/dbt/include/global_project/macros/materializations/snapshot/strategies.sql @@ -41,10 +41,10 @@ {% macro default__snapshot_hash_arguments(args) -%} - md5({% for arg in args %} + md5({%- for arg in args -%} coalesce(cast({{ arg }} as varchar ), '') {% if not loop.last %} || '|' || {% endif %} - {% endfor %}) + {%- endfor -%}) {%- endmacro %} diff --git a/plugins/bigquery/dbt/include/bigquery/macros/materializations/snapshot.sql b/plugins/bigquery/dbt/include/bigquery/macros/materializations/snapshot.sql index 2cfbdb2956e..836a44c8d72 100644 --- a/plugins/bigquery/dbt/include/bigquery/macros/materializations/snapshot.sql +++ b/plugins/bigquery/dbt/include/bigquery/macros/materializations/snapshot.sql @@ -1,7 +1,7 @@ {% macro bigquery__snapshot_hash_arguments(args) -%} - to_hex(md5(concat({% for arg in args %} + to_hex(md5(concat({%- for arg in args -%} coalesce(cast({{ arg }} as string), ''){% if not loop.last %}, '|',{% endif -%} - {% endfor %} + {%- endfor -%} ))) {%- endmacro %} From 709ee2a0e84006bdc4a6e9d5c73a152c1dcdf1be Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Tue, 23 Jul 2019 17:07:55 -0600 Subject: [PATCH 15/37] Add environment variables for macro debugging flags --- core/dbt/clients/jinja.py | 53 +++++++++++++++++++++++++++----------- core/dbt/context/common.py | 9 +++++++ 2 files changed, 47 insertions(+), 15 deletions(-) diff --git a/core/dbt/clients/jinja.py b/core/dbt/clients/jinja.py index c193f0542f3..9cadb2aa082 100644 --- a/core/dbt/clients/jinja.py +++ b/core/dbt/clients/jinja.py @@ -1,6 +1,7 @@ import codecs import linecache import os +import tempfile import jinja2 import jinja2._compat @@ -18,6 +19,34 @@ from dbt.logger import GLOBAL_LOGGER as logger # noqa +def _linecache_inject(source, write): + if write: + # this is the only reliable way to accomplish this. Obviously, it's + # really darn noisy and will fill your temporary directory + tmp_file = tempfile.NamedTemporaryFile( + prefix='dbt-macro-compiled-', + suffix='.py', + delete=False, + mode='w+', + encoding='utf-8', + ) + tmp_file.write(source) + filename = tmp_file.name + else: + filename = codecs.encode(os.urandom(12), 'hex').decode('ascii') + + # encode, though I don't think this matters + filename = jinja2._compat.encode_filename(filename) + # put ourselves in the cache + linecache.cache[filename] = ( + len(source), + None, + [line + '\n' for line in source.splitlines()], + filename + ) + return filename + + class MacroFuzzParser(jinja2.parser.Parser): def parse_macro(self): node = jinja2.nodes.Macro(lineno=next(self.stream).lineno) @@ -43,22 +72,16 @@ def _parse(self, source, name, filename): def _compile(self, source, filename): """Override jinja's compilation to stash the rendered source inside - the python linecache for debugging. + the python linecache for debugging when the appropriate environment + variable is set. + + If the value is 'write', also write the files to disk. + WARNING: This can write a ton of data if you aren't careful. """ - if filename == '