Skip to content

Commit

Permalink
Merge pull request #101 from fivetran/feature/sql-server-compatibility
Browse files Browse the repository at this point in the history
dbt-sqlserver compatibility 📯
  • Loading branch information
fivetran-jamie authored Dec 13, 2023
2 parents 617aaeb + 4b41f96 commit 4f6b212
Show file tree
Hide file tree
Showing 23 changed files with 205 additions and 91 deletions.
6 changes: 5 additions & 1 deletion .buildkite/hooks/pre-command
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,8 @@ export CI_SNOWFLAKE_DBT_WAREHOUSE=$(gcloud secrets versions access latest --secr
export CI_DATABRICKS_DBT_HOST=$(gcloud secrets versions access latest --secret="CI_DATABRICKS_DBT_HOST" --project="dbt-package-testing-363917")
export CI_DATABRICKS_DBT_HTTP_PATH=$(gcloud secrets versions access latest --secret="CI_DATABRICKS_DBT_HTTP_PATH" --project="dbt-package-testing-363917")
export CI_DATABRICKS_DBT_TOKEN=$(gcloud secrets versions access latest --secret="CI_DATABRICKS_DBT_TOKEN" --project="dbt-package-testing-363917")
export CI_DATABRICKS_DBT_CATALOG=$(gcloud secrets versions access latest --secret="CI_DATABRICKS_DBT_CATALOG" --project="dbt-package-testing-363917")
export CI_DATABRICKS_DBT_CATALOG=$(gcloud secrets versions access latest --secret="CI_DATABRICKS_DBT_CATALOG" --project="dbt-package-testing-363917")
export CI_SQLSERVER_DBT_SERVER=$(gcloud secrets versions access latest --secret="CI_SQLSERVER_DBT_SERVER" --project="dbt-package-testing-363917")
export CI_SQLSERVER_DBT_DATABASE=$(gcloud secrets versions access latest --secret="CI_SQLSERVER_DBT_DATABASE" --project="dbt-package-testing-363917")
export CI_SQLSERVER_DBT_USER=$(gcloud secrets versions access latest --secret="CI_SQLSERVER_DBT_USER" --project="dbt-package-testing-363917")
export CI_SQLSERVER_DBT_PASS=$(gcloud secrets versions access latest --secret="CI_SQLSERVER_DBT_PASS" --project="dbt-package-testing-363917")
17 changes: 16 additions & 1 deletion .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -71,4 +71,19 @@ steps:
- "CI_DATABRICKS_DBT_TOKEN"
- "CI_DATABRICKS_DBT_CATALOG"
commands: |
bash .buildkite/scripts/run_models.sh databricks
bash .buildkite/scripts/run_models.sh databricks
- label: ":azure: Run Tests - SQLServer"
key: "run_dbt_sqlserver"
plugins:
- docker#v3.13.0:
image: "python:3.8"
shell: [ "/bin/bash", "-e", "-c" ]
environment:
- "BASH_ENV=/tmp/.bashrc"
- "CI_SQLSERVER_DBT_SERVER"
- "CI_SQLSERVER_DBT_DATABASE"
- "CI_SQLSERVER_DBT_USER"
- "CI_SQLSERVER_DBT_PASS"
commands: |
bash .buildkite/scripts/run_models.sh sqlserver
25 changes: 24 additions & 1 deletion .buildkite/scripts/run_models.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,28 @@ apt-get install libsasl2-dev
python3 -m venv venv
. venv/bin/activate
pip install --upgrade pip setuptools
pip install -r integration_tests/requirements.txt
if [ "$1" == "sqlserver" ]; then
pip install -r integration_tests/requirements_sqlserver.txt

curl -sSL https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor > /usr/share/keyrings/microsoft-prod.gpg
curl -sSL https://packages.microsoft.com/config/debian/12/prod.list > /etc/apt/sources.list.d/mssql-release.list

apt-get update
ACCEPT_EULA=Y apt-get install -y msodbcsql18
ACCEPT_EULA=Y apt-get install -y mssql-tools18
echo 'export PATH="$PATH:/opt/mssql-tools18/bin"' >> ~/.bashrc
source ~/.bashrc
apt-get -y install unixodbc-dev
apt-get update

pip uninstall -y pyodbc
pip install --no-cache-dir --no-binary :all: pyodbc==4.0.39

# odbcinst -j

else
pip install -r integration_tests/requirements.txt
fi
mkdir -p ~/.dbt
cp integration_tests/ci/sample.profiles.yml ~/.dbt/profiles.yml

Expand All @@ -26,4 +47,6 @@ dbt test --target "$db"
dbt run --vars '{fivetran_platform__usage_pricing: false, fivetran_platform_using_destination_membership: false, fivetran_platform_using_user: false}' --target "$db" --full-refresh
dbt run --vars '{fivetran_platform__usage_pricing: false, fivetran_platform_using_destination_membership: false, fivetran_platform_using_user: false}' --target "$db"
dbt test --target "$db"
if [ "$1" != "sqlserver" ]; then
dbt run-operation fivetran_utils.drop_schemas_automation --target "$db"
fi
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ logs/
.DS_Store
dbt_packages/
integration_tests/.DS_Store
integration_tests/seeds/.DS_Store
integration_tests/seeds/.DS_Store
integration_tests/package-lock.yml
13 changes: 13 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,16 @@
# dbt_fivetran_log v1.4.0

## Feature Updates
- This release introduces compatibility with **SQL Server** 🥳 🎆 🍾 ([PR #101](https://github.com/fivetran/dbt_fivetran_log/pull/101))

## Bug Fixes
- Adjusts the uniqueness test on the recently introduced `fivetran_platform__audit_user_activity` model to test on `log_id` and `occurred_at` ([PR #102](https://github.com/fivetran/dbt_fivetran_log/pull/102)).
- Previously, the `log_id` was erroneously considered the primary key of this model.

## Under the Hood
- Removed `order by` from the final `select` statement in each model. This was done to reduce compute costs from the models ([PR #101](https://github.com/fivetran/dbt_fivetran_log/pull/101)).
- Converted all `group by`'s to explicitly reference the names of columns we are grouping by, instead of grouping by column number. This was necessary for SQL Server compatibility, as implicit groupings are not supported ([PR #101](https://github.com/fivetran/dbt_fivetran_log/pull/101)).

# dbt_fivetran_log v1.3.0

## 🚨 Breaking Changes 🚨
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ Refer to the table below for a detailed view of all models materialized by defau
# 🎯 How do I use the dbt package?
## Step 1: Pre-Requisites
- **Connector**: Have the Fivetran Platform connector syncing data into your warehouse.
- **Database support**: This package has been tested on **BigQuery**, **Snowflake**, **Redshift**, **Postgres**, and **Databricks**. Ensure you are using one of these supported databases.
- **Database support**: This package has been tested on **BigQuery**, **Snowflake**, **Redshift**, **Postgres**, **Databricks**, and **SQL Server**. Ensure you are using one of these supported databases.

### Databricks Dispatch Configuration
If you are using a Databricks destination with this package you will need to add the below (or a variation of the below) dispatch configuration within your `dbt_project.yml`. This is required in order for the package to accurately search for macros within the `dbt-labs/spark_utils` then the `dbt-labs/dbt_utils` packages respectively.
Expand Down
4 changes: 2 additions & 2 deletions dbt_project.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
config-version: 2
name: 'fivetran_log'
version: '1.3.0'
version: '1.4.0'
require-dbt-version: [">=1.3.0", "<2.0.0"]

models:
Expand All @@ -22,4 +22,4 @@ vars:
destination_membership: "{{ source('fivetran_platform', 'destination_membership') }}"
log: "{{ source('fivetran_platform', 'log') }}"
user: "{{ source('fivetran_platform', 'user') }}"
usage_cost: "{{ source('fivetran_platform', 'usage_cost') }}"
usage_cost: "{{ source('fivetran_platform', 'usage_cost') }}"
2 changes: 1 addition & 1 deletion docs/catalog.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/manifest.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/run_results.json

Large diffs are not rendered by default.

12 changes: 11 additions & 1 deletion integration_tests/ci/sample.profiles.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,4 +51,14 @@ integration_tests:
schema: fivetran_platform_integration_tests
threads: 2
token: "{{ env_var('CI_DATABRICKS_DBT_TOKEN') }}"
type: databricks
type: databricks
sqlserver:
type: sqlserver
driver: 'ODBC Driver 18 for SQL Server'
server: "{{ env_var('CI_SQLSERVER_DBT_SERVER') }}"
port: 1433
database: "{{ env_var('CI_SQLSERVER_DBT_DATABASE') }}"
schema: fivetran_platform_integration_tests
user: "{{ env_var('CI_SQLSERVER_DBT_USER') }}"
password: "{{ env_var('CI_SQLSERVER_DBT_PASS') }}"
threads: 8
24 changes: 12 additions & 12 deletions integration_tests/dbt_project.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name: 'fivetran_log_integration_tests'
version: '1.3.0'
version: '1.4.0'

config-version: 2
profile: 'integration_tests'
Expand Down Expand Up @@ -29,35 +29,35 @@ models:
seeds:
fivetran_log_integration_tests:
+column_types:
_fivetran_synced: timestamp
_fivetran_synced: "{{ 'datetime2' if target.type == 'sqlserver' else 'timestamp' }}"
account:
+column_types:
created_at: timestamp
created_at: "{{ 'datetime2' if target.type == 'sqlserver' else 'timestamp' }}"
incremental_mar:
+column_types:
measured_date: timestamp
measured_date: "{{ 'datetime2' if target.type == 'sqlserver' else 'timestamp' }}"
incremental_rows: "{{ 'int64' if target.type == 'bigquery' else 'bigint' }}"
connector:
+column_types:
signed_up: timestamp
signed_up: "{{ 'datetime2' if target.type == 'sqlserver' else 'timestamp' }}"
credits_used:
+column_types:
credits_consumed: "{{ 'int64' if target.type == 'bigquery' else 'bigint' }}"
destination:
+column_types:
created_at: timestamp
id: "{{ 'string' if target.type in ('bigquery', 'spark', 'databricks') else 'varchar' }}"
created_at: "{{ 'datetime2' if target.type == 'sqlserver' else 'timestamp' }}"
id: "{{ 'string' if target.type in ('bigquery', 'spark', 'databricks') else 'varchar' if target.type != 'sqlserver' else 'varchar(256)' }}"
destination_membership:
+column_types:
activated_at: timestamp
joined_at: timestamp
activated_at: "{{ 'datetime2' if target.type == 'sqlserver' else 'timestamp' }}"
joined_at: "{{ 'datetime2' if target.type == 'sqlserver' else 'timestamp' }}"
log:
+column_types:
time_stamp: timestamp
transformation_id: "{{ 'string' if target.type in ('bigquery', 'spark', 'databricks') else 'varchar' }}"
time_stamp: "{{ 'datetime2' if target.type == 'sqlserver' else 'timestamp' }}"
transformation_id: "{{ 'string' if target.type in ('bigquery', 'spark', 'databricks') else 'varchar' if target.type != 'sqlserver' else 'varchar(256)' }}"
user:
+column_types:
created_at: timestamp
created_at: "{{ 'datetime2' if target.type == 'sqlserver' else 'timestamp' }}"

clean-targets: # directories to be removed by `dbt clean`
- "target"
Expand Down
2 changes: 2 additions & 0 deletions integration_tests/requirements_sqlserver.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
dbt-sqlserver>=1.4.0,<2.0.0
pyodbc!=4.0.36,!=4.0.37,~=4.0.35
4 changes: 2 additions & 2 deletions macros/get_brand_columns.sql
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
{% macro get_connector_columns() %}

{% set columns = [
{"name": "_fivetran_deleted", "datatype": "boolean"},
{"name": "_fivetran_deleted", "datatype": dbt.type_boolean()},
{"name": "_fivetran_synced", "datatype": dbt.type_timestamp()},
{"name": "connecting_user_id", "datatype": dbt.type_string()},
{"name": "connector_id", "datatype": dbt.type_string()},
{"name": "connector_name", "datatype": dbt.type_string()},
{"name": "connector_type", "datatype": dbt.type_string()},
{"name": "connector_type_id", "datatype": dbt.type_string()},
{"name": "destination_id", "datatype": dbt.type_string()},
{"name": "paused", "datatype": "boolean"},
{"name": "paused", "datatype": dbt.type_boolean()},
{"name": "service_version", "datatype": dbt.type_int()},
{"name": "signed_up", "datatype": dbt.type_timestamp()}
] %}
Expand Down
16 changes: 10 additions & 6 deletions models/fivetran_platform.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,14 @@ models:
- name: errors_since_last_completed_sync
description: >
Aggregated line-separated list of error messages (in JSON format) raised synce the last
sync completion.
sync completion. Included by default for non-SQL Server targets, but can be disabled by setting
the `fivetran_platform_using_sync_alert_messages` var to False.
- name: warnings_since_last_completed_sync
description: >
Aggregated line-separated list of warning messages (in JSON format) raised synce the last
sync completion.
sync completion. Included by default for non-SQL Server targets, but can be disabled by setting
the `fivetran_platform_using_sync_alert_messages` var to False.
- name: fivetran_platform__mar_table_history
Expand Down Expand Up @@ -287,6 +289,11 @@ models:
Each record represents a user-triggered action in your Fivetran instance.This model is intended for audit-trail purposes, as it can be very helpful
when trying to trace a user action to a [log event](https://fivetran.com/docs/logs#logeventlist) such as a schema change, sync frequency update,
manual update, broken connection, etc.
tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- log_id
- occurred_at
columns:
- name: date_day
description: Date on which the user action occurred.
Expand Down Expand Up @@ -323,7 +330,4 @@ models:
- name: message_data
description: The details of the event in JSON format.
- name: log_id
description: Unique ID of the log record.
tests:
- unique
- not_null
description: Unique ID of the log record.
17 changes: 14 additions & 3 deletions models/fivetran_platform__audit_table.sql
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ with sync_log as (

-- Capture the latest timestamp in a call statement instead of a subquery for optimizing BQ costs on incremental runs
{%- call statement('max_sync_start', fetch_result=True) -%}
select date(max(sync_start)) from {{ this }}
select cast(max(sync_start) as date) from {{ this }}
{%- endcall -%}

-- load the result from the above query into a new variable
Expand All @@ -32,7 +32,7 @@ with sync_log as (
{%- set max_sync_start = query_result['data'][0][0] -%}

-- compare the new batch of data to the latest sync already stored in this model
and date(created_at) > '{{ max_sync_start }}'
and cast(created_at as date) > '{{ max_sync_start }}'

{% endif %}
),
Expand Down Expand Up @@ -127,7 +127,18 @@ sum_records_modified as (
and records_modified_log.created_at > limit_to_table_starts.sync_start
and records_modified_log.created_at < coalesce(limit_to_table_starts.sync_end, limit_to_table_starts.next_sync_start)

{{ dbt_utils.group_by(n=10) }}
-- explicit group by needed for SQL Server
group by
limit_to_table_starts.connector_id,
limit_to_table_starts.connector_name,
coalesce(records_modified_log.schema_name, limit_to_table_starts.connector_name),
limit_to_table_starts.table_name,
limit_to_table_starts.destination_id,
limit_to_table_starts.destination_name,
limit_to_table_starts.write_to_table_start,
limit_to_table_starts.write_to_table_end,
limit_to_table_starts.sync_start,
case when limit_to_table_starts.sync_end > limit_to_table_starts.next_sync_start then null else limit_to_table_starts.sync_end end
),

final as (
Expand Down
11 changes: 9 additions & 2 deletions models/fivetran_platform__audit_user_activity.sql
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,15 @@ final as (

select
{{ dbt.date_trunc('day', 'user_logs.created_at') }} as date_day,
{{ dbt_date.day_name('user_logs.created_at') }} as day_name,
{{ dbt_date.day_of_month('user_logs.created_at') }} as day_of_month,

{% if target.type != 'sqlserver' -%}
{{ dbt_date.day_name('user_logs.created_at') }} as day_name,
{{ dbt_date.day_of_month('user_logs.created_at') }} as day_of_month,
{% else -%}
format(cast(user_logs.created_at as date), 'ddd') as day_name,
day(user_logs.created_at) as day_of_month,
{% endif -%}

user_logs.created_at as occurred_at,
destination.destination_name,
destination.destination_id,
Expand Down
Loading

0 comments on commit 4f6b212

Please sign in to comment.