diff --git a/.circleci/config.yml b/.circleci/config.yml index 73bc97e..926cb24 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -133,6 +133,42 @@ jobs: - store_artifacts: path: ./logs + + integration-tests-trino: + + docker: + - image: cimg/python:3.11 + - image: trinodb/trino:432 + + resource_class: small + + environment: + DBT_PROFILES_DIR: ./integration_tests/ci + DBT_PROJECT_DIR: ./integration_tests + DBT_VERSION: 1.7.* + + steps: + - checkout + - run: + name: Install dbt adapter packages + command: | + python3 -m venv venv + . venv/bin/activate + pip install dbt-spark "dbt-trino==$DBT_VERSION" + - run: *dbt-deps + - setup_remote_docker + - run: + name: Run Trino server + command: | + docker run --name trino -p 8080:8080 -d -v `pwd`/integration_tests/docker/trino/catalog:/etc/trino/catalog trinodb/trino:432 + timeout 5m bash -c -- 'while ! docker logs trino 2>&1 | tail -n 1 | grep "SERVER STARTED"; do sleep 2; done' + - run: + name: "Run Tests - Trino" + command: | + . venv/bin/activate + dbt build -t trino --project-dir $DBT_PROJECT_DIR + + workflows: version: 2 test-all: @@ -145,3 +181,6 @@ workflows: - integration-tests-spark-thrift: requires: - hold + - integration-tests-trino: + requires: + - hold diff --git a/README.md b/README.md index 178780a..732a612 100644 --- a/README.md +++ b/README.md @@ -62,6 +62,7 @@ This package supports: * BigQuery * DuckDB * Spark (experimental) +* Trino For latest release, see [https://github.com/calogica/dbt-expectations/releases](https://github.com/calogica/dbt-expectations/releases) diff --git a/integration_tests/ci/profiles.yml b/integration_tests/ci/profiles.yml index ec5f964..43e729b 100644 --- a/integration_tests/ci/profiles.yml +++ b/integration_tests/ci/profiles.yml @@ -50,4 +50,15 @@ integration_tests: server_side_parameters: "spark.sql.parser.escapedStringLiterals": true + trino: + type: trino + method: none + host: localhost + port: 8080 + user: admin + catalog: memory + schema: default + timezone: UTC + threads: 4 + target: postgres diff --git a/integration_tests/models/schema_tests/schema.yml b/integration_tests/models/schema_tests/schema.yml index 9424cd8..812937d 100644 --- a/integration_tests/models/schema_tests/schema.yml +++ b/integration_tests/models/schema_tests/schema.yml @@ -25,13 +25,13 @@ models: regex: "(?i)[A-Z]" flags: i config: - enabled: "{{ target.type == 'bigquery' }}" + enabled: "{{ target.type in ['bigquery', 'trino'] }}" # match all uppercase, case-sensitive (where implemented), should fail - dbt_expectations.expect_column_values_to_match_regex: regex: "[A-Z]" flags: c config: - enabled: "{{ target.type not in ['bigquery', 'spark' ] }}" + enabled: "{{ target.type not in ['bigquery', 'spark', 'trino'] }}" error_if: "=0" warn_if: "<4" # do not match other non-email string, should pass @@ -72,7 +72,7 @@ models: regex_list: ["[A-G]", "[H-Z]"] flags: c config: - enabled: "{{ target.type not in ['bigquery', 'spark' ] }}" + enabled: "{{ target.type not in ['bigquery', 'spark', 'trino'] }}" error_if: "=0" warn_if: "<4" # match email address or other string @@ -242,7 +242,7 @@ models: - dbt_expectations.expect_row_values_to_have_data_for_every_n_datepart: date_col: date_day date_part: day - exclusion_condition: not(date_day = '2021-10-19') + exclusion_condition: not(date_day = cast('2021-10-19' as date)) - dbt_expectations.expect_row_values_to_have_data_for_every_n_datepart: date_col: date_day date_part: day @@ -329,14 +329,14 @@ models: - name: row_value tests: - dbt_expectations.expect_column_distinct_count_to_equal: - row_condition: date_day = {{ dbt_date.yesterday() }} + row_condition: cast(date_day as date) = {{ dbt_date.yesterday() }} value: 10 - dbt_expectations.expect_column_distinct_count_to_be_greater_than: - row_condition: date_day = {{ dbt_date.yesterday() }} + row_condition: cast(date_day as date) = {{ dbt_date.yesterday() }} value: 1 - dbt_expectations.expect_column_distinct_count_to_be_less_than: value: 11 - row_condition: date_day = {{ dbt_date.yesterday() }} + row_condition: cast(date_day as date) = {{ dbt_date.yesterday() }} diff --git a/macros/regex/regexp_instr.sql b/macros/regex/regexp_instr.sql index d8fe12c..a0c4c40 100644 --- a/macros/regex/regexp_instr.sql +++ b/macros/regex/regexp_instr.sql @@ -62,6 +62,23 @@ regexp_matches({{ source_value }}, '{{ regexp }}', '{{ flags }}') length(regexp_extract({{ source_value }}, '{{ regexp }}', 0)) {% endmacro %} +{% macro trino__regexp_instr(source_value, regexp, position, occurrence, is_raw, flags) %} + {% if flags %} + {{ dbt_expectations._validate_re2_flags(flags) }} + {# Trino prepends "(?flags)" to set flags for current group #} + {%- set regexp = "(?" ~ flags ~ ")" ~ regexp -%} + {% endif %} + {% if is_raw %} + {{ exceptions.warn( + "is_raw option is not supported for this adapter " + ~ "and is being ignored." + ) }} + {% endif %} + {%- set regexp_query = "regexp_position(" ~ source_value ~ ", '" ~ regexp ~ "', " ~ position ~ ", " ~ occurrence ~ ")" -%} + {# Trino regexp_position returns -1 if not found. Change it to 0, to be consistent with other adapters #} + if({{ regexp_query}} = -1, 0, {{ regexp_query}}) +{% endmacro %} + {% macro _validate_flags(flags, alphabet) %} {% for flag in flags %} {% if flag not in alphabet %} diff --git a/macros/schema_tests/aggregate_functions/expect_column_proportion_of_unique_values_to_be_between.sql b/macros/schema_tests/aggregate_functions/expect_column_proportion_of_unique_values_to_be_between.sql index f0436a1..eb39d0f 100644 --- a/macros/schema_tests/aggregate_functions/expect_column_proportion_of_unique_values_to_be_between.sql +++ b/macros/schema_tests/aggregate_functions/expect_column_proportion_of_unique_values_to_be_between.sql @@ -6,7 +6,7 @@ strictly=False ) %} {% set expression %} -count(distinct {{ column_name }})*1.0/count({{ column_name }}) +cast(count(distinct {{ column_name }}) as {{ dbt.type_float() }})/count({{ column_name }}) {% endset %} {{ dbt_expectations.expression_between(model, expression=expression, diff --git a/macros/schema_tests/multi-column/expect_select_column_values_to_be_unique_within_record.sql b/macros/schema_tests/multi-column/expect_select_column_values_to_be_unique_within_record.sql index e75f730..47ae5bb 100644 --- a/macros/schema_tests/multi-column/expect_select_column_values_to_be_unique_within_record.sql +++ b/macros/schema_tests/multi-column/expect_select_column_values_to_be_unique_within_record.sql @@ -40,7 +40,7 @@ with column_values as ( select - row_number() over(order by 1) as row_index, + row_number() over(order by {{ columns|join(', ') }}) as row_index, {% for column in columns -%} {{ column }}{% if not loop.last %},{% endif %} {%- endfor %} diff --git a/macros/utils/datatypes.sql b/macros/utils/datatypes.sql index f0e4571..e49004d 100644 --- a/macros/utils/datatypes.sql +++ b/macros/utils/datatypes.sql @@ -15,6 +15,10 @@ timestamp without time zone {%- endmacro %} +{% macro trino__type_timestamp() -%} + timestamp(3) +{%- endmacro %} + {# datetime ------------------------------------------------- #} {% macro type_datetime() -%} @@ -41,3 +45,7 @@ {% macro spark__type_datetime() -%} timestamp {%- endmacro %} + +{% macro trino__type_datetime() -%} + timestamp(3) +{%- endmacro %}