diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 00000000..32b2b41e --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,35 @@ +version: 2.1 + +orbs: + python: circleci/python@1.1.0 + +jobs: + build-and-test: + docker: + - image: dataders/pyodbc:1.4 + - image: mcr.microsoft.com/mssql/server:2019-latest + environment: + ACCEPT_EULA: 'yes' + MSSQL_SA_PASSWORD: 5atyaNadella + MSSQL_IP_ADDRESS: 0.0.0.0 + executor: python/default + steps: + - checkout + - run: + name: wait for SQL Server container to set up + command: sleep 30 + - run: + name: test connection via SQL CMD + command: sqlcmd -S 'localhost,1433' -U sa -P 5atyaNadella -Q 'create database blog' + - python/install-packages: + pkg-manager: pip + - run: + name: Test adapter against dbt-adapter-tests + command: tox -e integration-synapse + +workflows: + main: + jobs: + - build-and-test: + context: + - DBT_SYNAPSE_PROFILE diff --git a/README.md b/README.md index 5230f609..293eea87 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,16 @@ port: 1433 schema: schemaname ``` +### Security +Encryption is not enabled by default, unless you specify it. + +To enable encryption, add the following to your target definition. This is the default encryption strategy recommended by MSFT. For more information see [this docs page](https://docs.microsoft.com/en-us/dotnet/framework/data/adonet/connection-string-syntax#using-trustservercertificate?WT.mc_id=DP-MVP-5003930) +```yaml +encrypt: true # adds "Encrypt=Yes" to connection string +trust_cert: false +``` +For a fully-secure, encrypted connection, you must enable `trust_cert: false` because `"TrustServerCertificate=Yes"` is default for `dbt-sqlserver` in order to not break already defined targets. + ### standard SQL Server authentication SQL Server credentials are supported for on-prem as well as cloud, and it is the default authentication method for `dbt-sqlsever` ``` diff --git a/dbt/adapters/sqlserver/connections.py b/dbt/adapters/sqlserver/connections.py index 4e0b6041..5b12a58a 100644 --- a/dbt/adapters/sqlserver/connections.py +++ b/dbt/adapters/sqlserver/connections.py @@ -49,7 +49,8 @@ class SQLServerCredentials(Credentials): # "sql", "ActiveDirectoryPassword" or "ActiveDirectoryInteractive", or # "ServicePrincipal" authentication: Optional[str] = "sql" - encrypt: Optional[str] = "yes" + encrypt: Optional[bool] = False + trust_cert: Optional[bool] = False _ALIASES = { "user": "UID", @@ -61,6 +62,7 @@ class SQLServerCredentials(Credentials): "auth": "authentication", "app_id": "client_id", "app_secret": "client_secret", + "TrustServerCertificate": "trust_cert", } @property @@ -82,6 +84,7 @@ def _connection_keys(self): "client_id", "authentication", "encrypt", + "trust_cert" ) @@ -163,12 +166,17 @@ def open(cls, connection): elif getattr(credentials, "windows_login", False): con_str.append(f"trusted_connection=yes") elif type_auth == "sql": - con_str.append("Authentication=SqlPassword") + #con_str.append("Authentication=SqlPassword") con_str.append(f"UID={{{credentials.UID}}}") con_str.append(f"PWD={{{credentials.PWD}}}") - if not getattr(credentials, "encrypt", False): - con_str.append(f"Encrypt={credentials.encrypt}") + # still confused whether to use "Yes", "yes", "True", or "true" + # to learn more visit + # https://docs.microsoft.com/en-us/sql/relational-databases/native-client/features/using-encryption-without-validation?view=sql-server-ver15 + if getattr(credentials, "encrypt", False) is True: + con_str.append(f"Encrypt=Yes") + if getattr(credentials, "trust_cert", False) is True: + con_str.append(f"TrustServerCertificate=Yes") con_str_concat = ';'.join(con_str) diff --git a/dbt/adapters/sqlserver/impl.py b/dbt/adapters/sqlserver/impl.py index ddd0cbae..8cab4554 100644 --- a/dbt/adapters/sqlserver/impl.py +++ b/dbt/adapters/sqlserver/impl.py @@ -1,6 +1,11 @@ from dbt.adapters.sql import SQLAdapter from dbt.adapters.sqlserver import SQLServerConnectionManager +from dbt.adapters.base.relation import BaseRelation import agate +from typing import ( + Optional, Tuple, Callable, Iterable, Type, Dict, Any, List, Mapping, + Iterator, Union, Set +) class SQLServerAdapter(SQLAdapter): @@ -34,3 +39,90 @@ def convert_number_type(cls, agate_table, col_idx): @classmethod def convert_time_type(cls, agate_table, col_idx): return "datetime" + + # Methods used in adapter tests + def timestamp_add_sql( + self, add_to: str, number: int = 1, interval: str = "hour" + ) -> str: + # note: 'interval' is not supported for T-SQL + # for backwards compatibility, we're compelled to set some sort of + # default. A lot of searching has lead me to believe that the + # '+ interval' syntax used in postgres/redshift is relatively common + # and might even be the SQL standard's intention. + return f"DATEADD({interval},{number},{add_to})" + + def string_add_sql( + self, add_to: str, value: str, location='append', + ) -> str: + """ + `+` is T-SQL's string concatenation operator + """ + if location == 'append': + return f"{add_to} + '{value}'" + elif location == 'prepend': + return f"'{value}' + {add_to}" + else: + raise RuntimeException( + f'Got an unexpected location value of "{location}"' + ) + + def get_rows_different_sql( + self, + relation_a: BaseRelation, + relation_b: BaseRelation, + column_names: Optional[List[str]] = None, + except_operator: str = "EXCEPT", + ) -> str: + + """ + note: using is not supported on Synapse so COLUMNS_EQUAL_SQL is adjsuted + Generate SQL for a query that returns a single row with a two + columns: the number of rows that are different between the two + relations and the number of mismatched rows. + """ + # This method only really exists for test reasons. + names: List[str] + if column_names is None: + columns = self.get_columns_in_relation(relation_a) + names = sorted((self.quote(c.name) for c in columns)) + else: + names = sorted((self.quote(n) for n in column_names)) + columns_csv = ", ".join(names) + + sql = COLUMNS_EQUAL_SQL.format( + columns=columns_csv, + relation_a=str(relation_a), + relation_b=str(relation_b), + except_op=except_operator, + ) + + return sql + + +COLUMNS_EQUAL_SQL = """ +with diff_count as ( + SELECT + 1 as id, + COUNT(*) as num_missing FROM ( + (SELECT {columns} FROM {relation_a} {except_op} + SELECT {columns} FROM {relation_b}) + UNION ALL + (SELECT {columns} FROM {relation_b} {except_op} + SELECT {columns} FROM {relation_a}) + ) as a +), table_a as ( + SELECT COUNT(*) as num_rows FROM {relation_a} +), table_b as ( + SELECT COUNT(*) as num_rows FROM {relation_b} +), row_count_diff as ( + select + 1 as id, + table_a.num_rows - table_b.num_rows as difference + from table_a, table_b +) +select + row_count_diff.difference as row_count_difference, + diff_count.num_missing as num_mismatched +from row_count_diff +join diff_count on row_count_diff.id = diff_count.id +""".strip() diff --git a/dbt/include/sqlserver/macros/adapters.sql b/dbt/include/sqlserver/macros/adapters.sql index 9a974ec1..41382809 100644 --- a/dbt/include/sqlserver/macros/adapters.sql +++ b/dbt/include/sqlserver/macros/adapters.sql @@ -51,10 +51,24 @@ {% endcall %} {% endmacro %} -{% macro sqlserver__drop_schema(database_name, schema_name) -%} +{% macro sqlserver__drop_schema(relation) -%} + {%- set tables_in_schema_query %} + SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES + WHERE TABLE_SCHEMA = '{{ relation.schema }}' + {% endset %} + {% set tables_to_drop = run_query(tables_in_schema_query).columns[0].values() %} + {% for table in tables_to_drop %} + {%- set schema_relation = adapter.get_relation(database=relation.database, + schema=relation.schema, + identifier=table) -%} + {% do drop_relation(schema_relation) %} + {%- endfor %} + {% call statement('drop_schema') -%} - drop schema if exists {{ relation.without_identifier().schema }} - {% endcall %} + IF EXISTS (SELECT * FROM sys.schemas WHERE name = '{{ relation.schema }}') + BEGIN + EXEC('DROP SCHEMA {{ relation.schema }}') + END {% endcall %} {% endmacro %} {% macro sqlserver__drop_relation(relation) -%} @@ -85,7 +99,7 @@ end {% endmacro %} -{% macro sqlserver__check_schema_exists(database, schema) -%} +{% macro sqlserver__check_schema_exists(information_schema, schema) -%} {% call statement('check_schema_exists', fetch_result=True, auto_begin=False) -%} --USE {{ database_name }} SELECT count(*) as schema_exist FROM sys.schemas WHERE name = '{{ schema }}' diff --git a/pyodbc.Dockerfile b/pyodbc.Dockerfile new file mode 100644 index 00000000..81144b02 --- /dev/null +++ b/pyodbc.Dockerfile @@ -0,0 +1,47 @@ + +FROM python:3.7-slim AS base + +ADD requirements.txt ./ + +# Setup dependencies for pyodbc +RUN \ + apt-get update && \ + apt-get install -y curl build-essential unixodbc-dev g++ apt-transport-https && \ + gpg --keyserver hkp://keys.gnupg.net --recv-keys 5072E1F5 + +# install netcat (i.e. `nc` command) +RUN apt install -y netcat + +RUN \ + export ACCEPT_EULA='Y' && \ + # Install pyodbc db drivers for MSSQL + curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - && \ + curl https://packages.microsoft.com/config/debian/9/prod.list > /etc/apt/sources.list.d/mssql-release.list && \ + apt-get update && \ + apt-get install -y msodbcsql17 odbc-postgresql mssql-tools + +# add sqlcmd to the path +ENV PATH="$PATH:/opt/mssql-tools/bin" + +# Update odbcinst.ini to make sure full path to driver is listed +RUN \ + sed 's/Driver=psql/Driver=\/usr\/lib\/x86_64-linux-gnu\/odbc\/psql/' /etc/odbcinst.ini > /tmp/temp.ini && \ + mv -f /tmp/temp.ini /etc/odbcinst.ini +# Install pip +RUN \ + pip install --upgrade pip && \ + pip install -r requirements.txt && \ + rm requirements.txt +# permission management +RUN \ + chmod +rwx /etc/ssl/openssl.cnf && \ + # change TLS back to version 1 + sed -i 's/TLSv1.2/TLSv1/g' /etc/ssl/openssl.cnf && \ + # allow weak certificates (certificate signed with SHA1) + # by downgrading OpenSSL security level from 2 to 1 + sed -i 's/SECLEVEL=2/SECLEVEL=1/g' /etc/ssl/openssl.cnf + +RUN \ + # Cleanup build dependencies + apt-get remove -y curl apt-transport-https debconf-utils g++ gcc rsync build-essential gnupg2 && \ + apt-get autoremove -y && apt-get autoclean -y \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..80015832 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +dbt-core~=0.18.0 +pyodbc>=4.0.27 +azure-identity>=1.4.0 +black~=20.8b1 +pytest-dbt-adapter~=0.3.0 +tox==3.2.0 +flake8>=3.5.0 +certifi==2020.6.20 \ No newline at end of file diff --git a/test/integration/sqlserver.dbtspec b/test/integration/sqlserver.dbtspec new file mode 100644 index 00000000..f4df03e4 --- /dev/null +++ b/test/integration/sqlserver.dbtspec @@ -0,0 +1,21 @@ + +target: + type: sqlserver + driver: "ODBC Driver 17 for SQL Server" + schema: "dbt_test_{{ var('_dbt_random_suffix') }}" + host: localhost + database: msdb + username: SA + password: 5atyaNadella + port: 1433 + threads: 8 +sequences: + test_dbt_empty: empty + test_dbt_base: base + test_dbt_ephemeral: ephemeral + test_dbt_incremental: incremental + test_dbt_snapshot_strategy_timestamp: snapshot_strategy_timestamp + # test_dbt_snapshot_strategy_check_cols: snapshot_strategy_check_cols + test_dbt_data_test: data_test + test_dbt_schema_test: schema_test + # test_dbt_ephemeral_data_tests: data_test_ephemeral_models diff --git a/tox.ini b/tox.ini new file mode 100644 index 00000000..3cd05a59 --- /dev/null +++ b/tox.ini @@ -0,0 +1,11 @@ +[tox] +skipsdist = True +envlist = unit, flake8, integration-synapse + +[testenv:integration-synapse] +basepython = python3 +commands = /bin/bash -c '{envpython} -m pytest -v test/integration/sqlserver.dbtspec' +passenv = DBT_SYNAPSE_DB DBT_SYNAPSE_PORT DBT_SYNAPSE_PWD DBT_SYNAPSE_SERVER DBT_SYNAPSE_UID +deps = + -r{toxinidir}/requirements.txt + -e. \ No newline at end of file