From fd6f6f084f94271aa12649f7f19864f49ad867b7 Mon Sep 17 00:00:00 2001
From: Cor Zuurmond
Date: Fri, 29 Sep 2023 10:46:42 +0200
Subject: [PATCH 001/102] Add Github action for integration test
---
.github/workflows/integration.yml | 86 +++++++++++++++++++++++++++++++
1 file changed, 86 insertions(+)
create mode 100644 .github/workflows/integration.yml
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
new file mode 100644
index 000000000..75203a1cb
--- /dev/null
+++ b/.github/workflows/integration.yml
@@ -0,0 +1,86 @@
+# **what?**
+# Runs integration tests.
+
+# **why?**
+# Ensure code for dbt meets a certain quality standard.
+
+# **when?**
+# This will run for all PRs, when code is pushed to a release
+# branch, and when manually triggered.
+
+name: Integration tests
+
+on:
+ push:
+ branches:
+ - "main"
+ - "*.latest"
+ - "releases/*"
+ pull_request:
+ workflow_dispatch:
+
+# explicitly turn off permissions for `GITHUB_TOKEN`
+permissions: read-all
+
+# will cancel previous workflows triggered by the same event and for the same ref for PRs or same SHA otherwise
+concurrency:
+ group: ${{ github.workflow }}-${{ github.event_name }}-${{ contains(github.event_name, 'pull_request') && github.event.pull_request.head.ref || github.sha }}
+ cancel-in-progress: true
+
+defaults:
+ run:
+ shell: bash
+
+jobs:
+ tests:
+ name: test with python ${{ matrix.python-version }}
+
+ runs-on: ubuntu-latest
+ timeout-minutes: 10
+
+ strategy:
+ fail-fast: false
+ matrix:
+ python-version:
+ - "3.8"
+ - "3.9"
+ - "3.10"
+ - "3.11"
+
+ env:
+ TOXENV: "unit"
+ PYTEST_ADDOPTS: "-v --color=yes --csv test_results.csv"
+
+ steps:
+ - name: Check out the repository
+ uses: actions/checkout@v3
+
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v4
+ with:
+ python-version: ${{ matrix.python-version }}
+
+ - uses: isbang/compose-action@v1.5.1
+ with:
+ compose-file: "./docker/docker-compose.yml"
+
+ - name: Install tox
+ run: |
+ python -m pip install --upgrade pip
+ python -m pip install tox
+
+ - name: Run tox
+ run: |
+ tox -e integration-spark-session
+ tox -e integration-spark-thrift
+
+ - name: Get current date
+ if: always()
+ id: date
+ run: echo "date=$(date +'%Y-%m-%dT%H_%M_%S')" >> $GITHUB_OUTPUT # Colons are not allowed in artifacts name
+
+ - uses: actions/upload-artifact@v3
+ if: always()
+ with:
+ name: tests_results_${{ matrix.python-version }}-${{ steps.date.outputs.date }}.csv
+ path: tests_results.csv
From 795e40a01cfb1de47168eb0c8d49c231989d2e08 Mon Sep 17 00:00:00 2001
From: Cor Zuurmond
Date: Fri, 29 Sep 2023 11:54:41 +0200
Subject: [PATCH 002/102] Update tox
---
tox.ini | 12 ++++--------
1 file changed, 4 insertions(+), 8 deletions(-)
diff --git a/tox.ini b/tox.ini
index 97017a926..e456d55d0 100644
--- a/tox.ini
+++ b/tox.ini
@@ -56,10 +56,7 @@ deps =
[testenv:integration-spark-thrift]
-allowlist_externals =
- /bin/bash
-basepython = python3.8
-commands = /bin/bash -c '{envpython} -m pytest -v --profile apache_spark {posargs} -n4 tests/functional/adapter/*'
+description = run integration tests against a Spark thrift server
passenv =
DBT_*
PYTEST_ADDOPTS
@@ -67,12 +64,10 @@ deps =
-r{toxinidir}/requirements.txt
-r{toxinidir}/dev-requirements.txt
-e.
+commands = pytest -v --profile apache_spark {posargs} -n4 tests/functional/adapter/*
[testenv:integration-spark-session]
-allowlist_externals =
- /bin/bash
-basepython = python3.10
-commands = /bin/bash -c '{envpython} -m pytest -v --profile spark_session {posargs} -n4 tests/functional/adapter/*'
+description = run integration tests against a Spark session
passenv =
DBT_*
PYTEST_*
@@ -81,3 +76,4 @@ deps =
-r{toxinidir}/requirements.txt
-r{toxinidir}/dev-requirements.txt
-e.[session]
+commands = pytest -v --profile spark_session {posargs} -n4 tests/functional/adapter/*
From ff39c5d065e8b8ec065e5531e29107e35ccfcd6e Mon Sep 17 00:00:00 2001
From: Cor Zuurmond
Date: Fri, 29 Sep 2023 14:42:11 +0200
Subject: [PATCH 003/102] Fetch spark from https link
---
docker/Dockerfile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docker/Dockerfile b/docker/Dockerfile
index bb4d378ed..b310fde4d 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -14,7 +14,7 @@ ENV PATH="/usr/spark/bin:/usr/spark/sbin:${PATH}"
RUN apt-get update && \
apt-get install -y wget netcat procps libpostgresql-jdbc-java && \
- wget -q "http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \
+ wget -q "https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \
tar xzf "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \
rm "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \
mv "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" /usr/spark && \
From 1505fc6fb4d26245e18e65485e73407c867a3ef3 Mon Sep 17 00:00:00 2001
From: Cor Zuurmond
Date: Fri, 29 Sep 2023 14:42:40 +0200
Subject: [PATCH 004/102] Use Spark version 3.1.2
---
docker/Dockerfile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docker/Dockerfile b/docker/Dockerfile
index b310fde4d..d1fd5357f 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -2,7 +2,7 @@ ARG OPENJDK_VERSION=8
FROM eclipse-temurin:${OPENJDK_VERSION}-jre
ARG BUILD_DATE
-ARG SPARK_VERSION=3.3.2
+ARG SPARK_VERSION=3.1.2
ARG HADOOP_VERSION=3
LABEL org.label-schema.name="Apache Spark ${SPARK_VERSION}" \
From 44fe33f4bd233f508c59c527a69590de1ec5f463 Mon Sep 17 00:00:00 2001
From: Cor Zuurmond
Date: Fri, 29 Sep 2023 14:50:13 +0200
Subject: [PATCH 005/102] Seperate running Spark session and thrift
---
.github/workflows/integration.yml | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 75203a1cb..d455e804b 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -69,10 +69,11 @@ jobs:
python -m pip install --upgrade pip
python -m pip install tox
- - name: Run tox
- run: |
- tox -e integration-spark-session
- tox -e integration-spark-thrift
+ - name: Run tox for Spark session
+ run: tox -e integration-spark-session
+
+ - name: Run tox for Spark thrift
+ run: tox -e integration-spark-thrift
- name: Get current date
if: always()
From 2655631fa3b6db8a7515f11495710675bca0ba4e Mon Sep 17 00:00:00 2001
From: Cor Zuurmond
Date: Fri, 29 Sep 2023 14:51:40 +0200
Subject: [PATCH 006/102] Use Spark 3.1.2 and Hadoop 3.2
---
docker/Dockerfile | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/docker/Dockerfile b/docker/Dockerfile
index d1fd5357f..85d01ba8a 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -2,8 +2,8 @@ ARG OPENJDK_VERSION=8
FROM eclipse-temurin:${OPENJDK_VERSION}-jre
ARG BUILD_DATE
-ARG SPARK_VERSION=3.1.2
-ARG HADOOP_VERSION=3
+ARG SPARK_VERSION=3.1.3
+ARG HADOOP_VERSION=3.2
LABEL org.label-schema.name="Apache Spark ${SPARK_VERSION}" \
org.label-schema.build-date=$BUILD_DATE \
From 915f67e9203dfb891ad4a22f3db7f9251b19ab84 Mon Sep 17 00:00:00 2001
From: Cor Zuurmond
Date: Fri, 29 Sep 2023 14:57:20 +0200
Subject: [PATCH 007/102] Reset tox.ini
---
tox.ini | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/tox.ini b/tox.ini
index e456d55d0..33055a211 100644
--- a/tox.ini
+++ b/tox.ini
@@ -57,6 +57,9 @@ deps =
[testenv:integration-spark-thrift]
description = run integration tests against a Spark thrift server
+allowlist_externals =
+ /bin/bash
+basepython = python3.8
passenv =
DBT_*
PYTEST_ADDOPTS
@@ -64,10 +67,13 @@ deps =
-r{toxinidir}/requirements.txt
-r{toxinidir}/dev-requirements.txt
-e.
-commands = pytest -v --profile apache_spark {posargs} -n4 tests/functional/adapter/*
+commands = /bin/bash -c '{envpython} -m pytest -v --profile apache_spark {posargs} -n4 tests/functional/adapter/*'
[testenv:integration-spark-session]
description = run integration tests against a Spark session
+allowlist_externals =
+ /bin/bash
+basepython = python3.10
passenv =
DBT_*
PYTEST_*
@@ -76,4 +82,4 @@ deps =
-r{toxinidir}/requirements.txt
-r{toxinidir}/dev-requirements.txt
-e.[session]
-commands = pytest -v --profile spark_session {posargs} -n4 tests/functional/adapter/*
+commands = /bin/bash -c '{envpython} -m pytest -v --profile spark_session {posargs} -n4 tests/functional/adapter/*'
From f0ef215e1c8186cf4270e695ec8663a5d745d127 Mon Sep 17 00:00:00 2001
From: Cor Zuurmond
Date: Fri, 29 Sep 2023 15:08:48 +0200
Subject: [PATCH 008/102] Remove base pythons in tox.ini
---
tox.ini | 2 --
1 file changed, 2 deletions(-)
diff --git a/tox.ini b/tox.ini
index 33055a211..31396b5ef 100644
--- a/tox.ini
+++ b/tox.ini
@@ -59,7 +59,6 @@ deps =
description = run integration tests against a Spark thrift server
allowlist_externals =
/bin/bash
-basepython = python3.8
passenv =
DBT_*
PYTEST_ADDOPTS
@@ -73,7 +72,6 @@ commands = /bin/bash -c '{envpython} -m pytest -v --profile apache_spark {posarg
description = run integration tests against a Spark session
allowlist_externals =
/bin/bash
-basepython = python3.10
passenv =
DBT_*
PYTEST_*
From e8457df87d636324aae416c4a8eea363779f0156 Mon Sep 17 00:00:00 2001
From: Cor Zuurmond
Date: Fri, 29 Sep 2023 15:19:19 +0200
Subject: [PATCH 009/102] Fix reference to Docker compose file
---
.github/workflows/integration.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index d455e804b..517815e27 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -62,7 +62,7 @@ jobs:
- uses: isbang/compose-action@v1.5.1
with:
- compose-file: "./docker/docker-compose.yml"
+ compose-file: "./docker-compose.yml"
- name: Install tox
run: |
From 842466a2883efd3a13826410f1477a0ff84c5e8f Mon Sep 17 00:00:00 2001
From: Cor Zuurmond
Date: Fri, 29 Sep 2023 15:42:11 +0200
Subject: [PATCH 010/102] Remove timeout
---
.github/workflows/integration.yml | 1 -
1 file changed, 1 deletion(-)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 517815e27..8eafa5c72 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -36,7 +36,6 @@ jobs:
name: test with python ${{ matrix.python-version }}
runs-on: ubuntu-latest
- timeout-minutes: 10
strategy:
fail-fast: false
From 0738f2d0bcc5f30eab1cc92b4c82720ce99e3265 Mon Sep 17 00:00:00 2001
From: Cor Zuurmond
Date: Fri, 29 Sep 2023 15:55:55 +0200
Subject: [PATCH 011/102] Remove artifact steps
---
.github/workflows/integration.yml | 11 -----------
1 file changed, 11 deletions(-)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 8eafa5c72..9f26bd2be 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -73,14 +73,3 @@ jobs:
- name: Run tox for Spark thrift
run: tox -e integration-spark-thrift
-
- - name: Get current date
- if: always()
- id: date
- run: echo "date=$(date +'%Y-%m-%dT%H_%M_%S')" >> $GITHUB_OUTPUT # Colons are not allowed in artifacts name
-
- - uses: actions/upload-artifact@v3
- if: always()
- with:
- name: tests_results_${{ matrix.python-version }}-${{ steps.date.outputs.date }}.csv
- path: tests_results.csv
From 277bef1a2a4368d54b2b1ce41b7894c51d4f7ef1 Mon Sep 17 00:00:00 2001
From: Cor Zuurmond
Date: Fri, 29 Sep 2023 15:56:33 +0200
Subject: [PATCH 012/102] Bump Spark and Hadoop versions
---
docker/Dockerfile | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 85d01ba8a..a9b9e0a2c 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -2,8 +2,8 @@ ARG OPENJDK_VERSION=8
FROM eclipse-temurin:${OPENJDK_VERSION}-jre
ARG BUILD_DATE
-ARG SPARK_VERSION=3.1.3
-ARG HADOOP_VERSION=3.2
+ARG SPARK_VERSION=3.4.1
+ARG HADOOP_VERSION=3
LABEL org.label-schema.name="Apache Spark ${SPARK_VERSION}" \
org.label-schema.build-date=$BUILD_DATE \
From 8d5853d3049c5e299ab7d824ab33fc374a9894ff Mon Sep 17 00:00:00 2001
From: Cor Zuurmond
Date: Fri, 29 Sep 2023 16:08:16 +0200
Subject: [PATCH 013/102] Reset Spark and Hadoop version
---
docker/Dockerfile | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/docker/Dockerfile b/docker/Dockerfile
index a9b9e0a2c..85d01ba8a 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -2,8 +2,8 @@ ARG OPENJDK_VERSION=8
FROM eclipse-temurin:${OPENJDK_VERSION}-jre
ARG BUILD_DATE
-ARG SPARK_VERSION=3.4.1
-ARG HADOOP_VERSION=3
+ARG SPARK_VERSION=3.1.3
+ARG HADOOP_VERSION=3.2
LABEL org.label-schema.name="Apache Spark ${SPARK_VERSION}" \
org.label-schema.build-date=$BUILD_DATE \
From 919528ab14dd731f9efa913d37b051bda8922e44 Mon Sep 17 00:00:00 2001
From: Cor Zuurmond
Date: Fri, 29 Sep 2023 16:09:09 +0200
Subject: [PATCH 014/102] Update comment
---
.github/workflows/integration.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 9f26bd2be..f4c34c5fb 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -2,7 +2,7 @@
# Runs integration tests.
# **why?**
-# Ensure code for dbt meets a certain quality standard.
+# Ensure code runs as expected.
# **when?**
# This will run for all PRs, when code is pushed to a release
From 15e48fd3f1f8d421f7f079a20ca8ba5fd5995d69 Mon Sep 17 00:00:00 2001
From: Cor Zuurmond
Date: Fri, 29 Sep 2023 16:12:25 +0200
Subject: [PATCH 015/102] Add changie
---
.changes/unreleased/Under the Hood-20230929-161218.yaml | 6 ++++++
1 file changed, 6 insertions(+)
create mode 100644 .changes/unreleased/Under the Hood-20230929-161218.yaml
diff --git a/.changes/unreleased/Under the Hood-20230929-161218.yaml b/.changes/unreleased/Under the Hood-20230929-161218.yaml
new file mode 100644
index 000000000..c82e8252e
--- /dev/null
+++ b/.changes/unreleased/Under the Hood-20230929-161218.yaml
@@ -0,0 +1,6 @@
+kind: Under the Hood
+body: Add Github action for integration testing
+time: 2023-09-29T16:12:18.968755+02:00
+custom:
+ Author: JCZuurmond
+ Issue: "719"
From 31cb05e7d7dc6e5e63b3027a66428f22d40f86ce Mon Sep 17 00:00:00 2001
From: Colin
Date: Wed, 18 Oct 2023 16:54:42 -0700
Subject: [PATCH 016/102] add databricks and PR execution protections
---
.github/scripts/update_dbt_core_branch.sh | 20 +++
.github/scripts/update_release_branch.sh | 11 ++
.github/workflows/integration.yml | 193 +++++++++++++++++++++-
3 files changed, 215 insertions(+), 9 deletions(-)
create mode 100755 .github/scripts/update_dbt_core_branch.sh
create mode 100644 .github/scripts/update_release_branch.sh
diff --git a/.github/scripts/update_dbt_core_branch.sh b/.github/scripts/update_dbt_core_branch.sh
new file mode 100755
index 000000000..d28a40c35
--- /dev/null
+++ b/.github/scripts/update_dbt_core_branch.sh
@@ -0,0 +1,20 @@
+#!/bin/bash -e
+set -e
+
+git_branch=$1
+target_req_file="dev-requirements.txt"
+core_req_sed_pattern="s|dbt-core.git.*#egg=dbt-core|dbt-core.git@${git_branch}#egg=dbt-core|g"
+postgres_req_sed_pattern="s|dbt-core.git.*#egg=dbt-postgres|dbt-core.git@${git_branch}#egg=dbt-postgres|g"
+tests_req_sed_pattern="s|dbt-core.git.*#egg=dbt-tests|dbt-core.git@${git_branch}#egg=dbt-tests|g"
+if [[ "$OSTYPE" == darwin* ]]; then
+ # mac ships with a different version of sed that requires a delimiter arg
+ sed -i "" "$core_req_sed_pattern" $target_req_file
+ sed -i "" "$postgres_req_sed_pattern" $target_req_file
+ sed -i "" "$tests_req_sed_pattern" $target_req_file
+else
+ sed -i "$core_req_sed_pattern" $target_req_file
+ sed -i "$postgres_req_sed_pattern" $target_req_file
+ sed -i "$tests_req_sed_pattern" $target_req_file
+fi
+core_version=$(curl "https://raw.githubusercontent.com/dbt-labs/dbt-core/${git_branch}/core/dbt/version.py" | grep "__version__ = *"|cut -d'=' -f2)
+bumpversion --allow-dirty --new-version "$core_version" major
diff --git a/.github/scripts/update_release_branch.sh b/.github/scripts/update_release_branch.sh
new file mode 100644
index 000000000..75b9ccef6
--- /dev/null
+++ b/.github/scripts/update_release_branch.sh
@@ -0,0 +1,11 @@
+#!/bin/bash -e
+set -e
+
+release_branch=$1
+target_req_file=".github/workflows/nightly-release.yml"
+if [[ "$OSTYPE" == darwin* ]]; then
+ # mac ships with a different version of sed that requires a delimiter arg
+ sed -i "" "s|[0-9].[0-9].latest|$release_branch|" $target_req_file
+else
+ sed -i "s|[0-9].[0-9].latest|$release_branch|" $target_req_file
+fi
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index f4c34c5fb..684bcfab5 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -18,6 +18,11 @@ on:
- "releases/*"
pull_request:
workflow_dispatch:
+ inputs:
+ dbt-core-branch:
+ description: "branch of dbt-core to use in dev-requirements.txt"
+ required: false
+ type: string
# explicitly turn off permissions for `GITHUB_TOKEN`
permissions: read-all
@@ -32,8 +37,60 @@ defaults:
shell: bash
jobs:
- tests:
- name: test with python ${{ matrix.python-version }}
+ # generate test metadata about what files changed and the testing matrix to use
+ test-metadata:
+ # run if not a PR from a forked repository or has a label to mark as safe to test
+ if: >-
+ github.event_name != 'pull_request_target' ||
+ github.event.pull_request.head.repo.full_name == github.repository ||
+ contains(github.event.pull_request.labels.*.name, 'ok to test')
+ runs-on: ubuntu-latest
+
+ outputs:
+ matrix: ${{ steps.generate-matrix.outputs.result }}
+ run-python-tests: ${{ steps.filter.outputs.bigquery-python }}
+
+ steps:
+ - name: Check out the repository (non-PR)
+ if: github.event_name != 'pull_request_target'
+ uses: actions/checkout@v3
+ with:
+ persist-credentials: false
+
+ - name: Check out the repository (PR)
+ if: github.event_name == 'pull_request_target'
+ uses: actions/checkout@v3
+ with:
+ persist-credentials: false
+ ref: ${{ github.event.pull_request.head.sha }}
+ - name: Check if relevant files changed
+ if: github.event_name == 'pull_request_target'
+ # https://github.com/marketplace/actions/paths-changes-filter
+ # For each filter, it sets output variable named by the filter to the text:
+ # 'true' - if any of changed files matches any of filter rules
+ # 'false' - if none of changed files matches any of filter rules
+ # also, returns:
+ # `changes` - JSON array with names of all filters matching any of the changed files
+ uses: dorny/paths-filter@v2
+ id: get-changes
+ with:
+ token: ${{ secrets.GITHUB_TOKEN }}
+ filters: |
+ spark:
+ - 'dbt/**'
+ - 'tests/**'
+ - 'dev-requirements.txt'
+ local-tests:
+ name: test spark local against python ${{ matrix.python-version }}
+
+ # run if not a PR from a forked repository or has a label to mark as safe to test
+ # also checks that the matrix generated is not empty
+ if: >-
+ (
+ github.event_name != 'pull_request_target' ||
+ github.event.pull_request.head.repo.full_name == github.repository ||
+ contains(github.event.pull_request.labels.*.name, 'ok to test')
+ )
runs-on: ubuntu-latest
@@ -45,31 +102,149 @@ jobs:
- "3.9"
- "3.10"
- "3.11"
+ test:
+ - "spark-thrift"
+ - "spark-session"
env:
- TOXENV: "unit"
PYTEST_ADDOPTS: "-v --color=yes --csv test_results.csv"
+ DBT_INVOCATION_ENV: github-actions
+ DD_CIVISIBILITY_AGENTLESS_ENABLED: true
+ DD_API_KEY: ${{ secrets.DATADOG_API_KEY }}
+ DD_SITE: datadoghq.com
+ DD_ENV: ci
+ DD_SERVICE: ${{ github.event.repository.name }}
steps:
- name: Check out the repository
+ if: github.event_name != 'pull_request_target'
uses: actions/checkout@v3
+ with:
+ persist-credentials: false
+
+ # explicity checkout the branch for the PR,
+ # this is necessary for the `pull_request_target` event
+ - name: Check out the repository (PR)
+ if: github.event_name == 'pull_request_target'
+ uses: actions/checkout@v3
+ with:
+ persist-credentials: false
+ ref: ${{ github.event.pull_request.head.sha }}
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
+ - name: Install python dependencies
+ run: |
+ python -m pip install --user --upgrade pip
+ python -m pip install tox
+ python -m pip --version
+ tox --version
+
+ - name: Update dev_requirements.txt
+ if: inputs.dbt-core-branch != ''
+ run: |
+ pip install bumpversion
+ ./.github/scripts/update_dbt_core_branch.sh ${{ inputs.dbt-core-branch }}
+
- uses: isbang/compose-action@v1.5.1
+ if: ${{ matrix.test == 'spark-thrift'}}
with:
compose-file: "./docker-compose.yml"
- - name: Install tox
+ - name: Run tox for Spark ${{ matrix.test }}
+ run: tox -e integration-${{ matrix.test }}
+
+ databricks-tests:
+ name: test spark databricks against python ${{ matrix.python-version }}
+ # run if not a PR from a forked repository or has a label to mark as safe to test
+ # also checks that the matrix generated is not empty
+ if: >-
+ (
+ github.event_name != 'pull_request_target' ||
+ github.event.pull_request.head.repo.full_name == github.repository ||
+ contains(github.event.pull_request.labels.*.name, 'ok to test')
+ )
+
+ runs-on: ubuntu-latest
+ container:
+ image: "fishtownanalytics/test-container:10"
+ strategy:
+ fail-fast: false
+ matrix:
+ python-version:
+ - "3.8"
+ - "3.9"
+ - "3.10"
+ - "3.11"
+ test:
+ - "databricks-odbc-sql-endpoint"
+ - "databricks-odbc-cluster"
+ - "spark-databricks-http"
+
+ env:
+ PYTEST_ADDOPTS: "-v --color=yes --csv test_results.csv"
+ DBT_INVOCATION_ENV: github-actions
+ DD_CIVISIBILITY_AGENTLESS_ENABLED: true
+ DD_API_KEY: ${{ secrets.DATADOG_API_KEY }}
+ DD_SITE: datadoghq.com
+ DD_ENV: ci
+ DD_SERVICE: ${{ github.event.repository.name }}
+ DBT_DATABRICKS_CLUSTER_NAME: ${{ secrets.DBT_DATABRICKS_CLUSTER_NAME }}
+ DBT_DATABRICKS_HOSTNAME: ${{ secrets.DBT_DATABRICKS_HOST }}
+ DBT_DATABRICKS_ENDPOINT: ${{ secrets.DBT_DATABRICKS_ENDPOINT }}
+ DBT_DATABRICKS_TOKEN: ${{ secrets.DBT_DATABRICKS_TOKEN }}
+ DBT_DATABRICS_USER: ${{ secrets.DBT_DATABRICKS_USER }}
+
+ steps:
+ - name: Check out the repository
+ if: github.event_name != 'pull_request_target'
+ uses: actions/checkout@v3
+ with:
+ persist-credentials: false
+
+ # explicity checkout the branch for the PR,
+ # this is necessary for the `pull_request_target` event
+ - name: Check out the repository (PR)
+ if: github.event_name == 'pull_request_target'
+ uses: actions/checkout@v3
+ with:
+ persist-credentials: false
+ ref: ${{ github.event.pull_request.head.sha }}
+
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v4
+ with:
+ python-version: ${{ matrix.python-version }}
+
+ - name: Install python dependencies
run: |
- python -m pip install --upgrade pip
+ python -m pip install --user --upgrade pip
python -m pip install tox
+ python -m pip --version
+ tox --version
- - name: Run tox for Spark session
- run: tox -e integration-spark-session
+ - name: Update dev_requirements.txt
+ if: inputs.dbt-core-branch != ''
+ run: |
+ pip install bumpversion
+ ./.github/scripts/update_dbt_core_branch.sh ${{ inputs.dbt-core-branch }}
- - name: Run tox for Spark thrift
- run: tox -e integration-spark-thrift
+ - name: Configure ODBC
+ if: ${{ matrix.test != "spark-databricks-http" }}
+ run: |
+ apt-get update && apt-get install -y --no-install-recommends \
+ g++ \
+ unixodbc-dev \
+ unzip
+
+ unzip /tmp/simba_odbc.zip -d /tmp/ \
+ && dpkg -i /tmp/SimbaSparkODBC-*/*.deb \
+ && echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini \
+ && rm /tmp/simba_odbc.zip \
+ && rm -rf /tmp/SimbaSparkODBC*
+
+ - name: Run tox for Spark ${{ matrix.test }}
+ run: tox -e integration-${{ matrix.test }}
\ No newline at end of file
From fd54d7f78ccc3b42ac12d7b3f95b99992996e606 Mon Sep 17 00:00:00 2001
From: Colin
Date: Mon, 23 Oct 2023 09:47:53 -0700
Subject: [PATCH 017/102] use single quotes
---
.github/workflows/integration.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 684bcfab5..a37744ca2 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -233,7 +233,7 @@ jobs:
./.github/scripts/update_dbt_core_branch.sh ${{ inputs.dbt-core-branch }}
- name: Configure ODBC
- if: ${{ matrix.test != "spark-databricks-http" }}
+ if: ${{ matrix.test != 'spark-databricks-http' }}
run: |
apt-get update && apt-get install -y --no-install-recommends \
g++ \
From 8de83390c8b7c4a169df33982cc61b59337e1dc2 Mon Sep 17 00:00:00 2001
From: Colin
Date: Mon, 23 Oct 2023 09:53:06 -0700
Subject: [PATCH 018/102] remove `_target` suffix
---
.github/workflows/integration.yml | 30 ++++++++++++------------------
1 file changed, 12 insertions(+), 18 deletions(-)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index a37744ca2..f33ade986 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -41,7 +41,7 @@ jobs:
test-metadata:
# run if not a PR from a forked repository or has a label to mark as safe to test
if: >-
- github.event_name != 'pull_request_target' ||
+ github.event_name != 'pull_request' ||
github.event.pull_request.head.repo.full_name == github.repository ||
contains(github.event.pull_request.labels.*.name, 'ok to test')
runs-on: ubuntu-latest
@@ -52,19 +52,19 @@ jobs:
steps:
- name: Check out the repository (non-PR)
- if: github.event_name != 'pull_request_target'
+ if: github.event_name != 'pull_request'
uses: actions/checkout@v3
with:
persist-credentials: false
- name: Check out the repository (PR)
- if: github.event_name == 'pull_request_target'
+ if: github.event_name == 'pull_request'
uses: actions/checkout@v3
with:
persist-credentials: false
ref: ${{ github.event.pull_request.head.sha }}
- name: Check if relevant files changed
- if: github.event_name == 'pull_request_target'
+ if: github.event_name == 'pull_request'
# https://github.com/marketplace/actions/paths-changes-filter
# For each filter, it sets output variable named by the filter to the text:
# 'true' - if any of changed files matches any of filter rules
@@ -87,7 +87,7 @@ jobs:
# also checks that the matrix generated is not empty
if: >-
(
- github.event_name != 'pull_request_target' ||
+ github.event_name != 'pull_request' ||
github.event.pull_request.head.repo.full_name == github.repository ||
contains(github.event.pull_request.labels.*.name, 'ok to test')
)
@@ -99,9 +99,6 @@ jobs:
matrix:
python-version:
- "3.8"
- - "3.9"
- - "3.10"
- - "3.11"
test:
- "spark-thrift"
- "spark-session"
@@ -117,15 +114,15 @@ jobs:
steps:
- name: Check out the repository
- if: github.event_name != 'pull_request_target'
+ if: github.event_name != 'pull_request'
uses: actions/checkout@v3
with:
persist-credentials: false
# explicity checkout the branch for the PR,
- # this is necessary for the `pull_request_target` event
+ # this is necessary for the `pull_request` event
- name: Check out the repository (PR)
- if: github.event_name == 'pull_request_target'
+ if: github.event_name == 'pull_request'
uses: actions/checkout@v3
with:
persist-credentials: false
@@ -163,7 +160,7 @@ jobs:
# also checks that the matrix generated is not empty
if: >-
(
- github.event_name != 'pull_request_target' ||
+ github.event_name != 'pull_request' ||
github.event.pull_request.head.repo.full_name == github.repository ||
contains(github.event.pull_request.labels.*.name, 'ok to test')
)
@@ -176,9 +173,6 @@ jobs:
matrix:
python-version:
- "3.8"
- - "3.9"
- - "3.10"
- - "3.11"
test:
- "databricks-odbc-sql-endpoint"
- "databricks-odbc-cluster"
@@ -200,15 +194,15 @@ jobs:
steps:
- name: Check out the repository
- if: github.event_name != 'pull_request_target'
+ if: github.event_name != 'pull_request'
uses: actions/checkout@v3
with:
persist-credentials: false
# explicity checkout the branch for the PR,
- # this is necessary for the `pull_request_target` event
+ # this is necessary for the `pull_request` event
- name: Check out the repository (PR)
- if: github.event_name == 'pull_request_target'
+ if: github.event_name == 'pull_request'
uses: actions/checkout@v3
with:
persist-credentials: false
From e85232f3e476f4f80dfe188f3395612589245f7b Mon Sep 17 00:00:00 2001
From: Colin
Date: Mon, 23 Oct 2023 10:33:28 -0700
Subject: [PATCH 019/102] add comment to test
---
.github/workflows/integration.yml | 2 ++
1 file changed, 2 insertions(+)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index f33ade986..b4f78a1c9 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -63,6 +63,7 @@ jobs:
with:
persist-credentials: false
ref: ${{ github.event.pull_request.head.sha }}
+
- name: Check if relevant files changed
if: github.event_name == 'pull_request'
# https://github.com/marketplace/actions/paths-changes-filter
@@ -80,6 +81,7 @@ jobs:
- 'dbt/**'
- 'tests/**'
- 'dev-requirements.txt'
+
local-tests:
name: test spark local against python ${{ matrix.python-version }}
From fe3300e22b830b4f78c6e9877ff8521ccc838019 Mon Sep 17 00:00:00 2001
From: Colin
Date: Mon, 23 Oct 2023 10:58:39 -0700
Subject: [PATCH 020/102] specify container user as root
---
.github/workflows/integration.yml | 1 +
1 file changed, 1 insertion(+)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index b4f78a1c9..4f45fc6ae 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -170,6 +170,7 @@ jobs:
runs-on: ubuntu-latest
container:
image: "fishtownanalytics/test-container:10"
+ options: --user root
strategy:
fail-fast: false
matrix:
From b37e14b9dc2c0279d669c2a8fcb8b098834cd27b Mon Sep 17 00:00:00 2001
From: Colin
Date: Mon, 23 Oct 2023 10:59:08 -0700
Subject: [PATCH 021/102] formatting
---
.github/workflows/integration.yml | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 4f45fc6ae..72a86c92e 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -236,7 +236,7 @@ jobs:
g++ \
unixodbc-dev \
unzip
-
+
unzip /tmp/simba_odbc.zip -d /tmp/ \
&& dpkg -i /tmp/SimbaSparkODBC-*/*.deb \
&& echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini \
@@ -244,4 +244,4 @@ jobs:
&& rm -rf /tmp/SimbaSparkODBC*
- name: Run tox for Spark ${{ matrix.test }}
- run: tox -e integration-${{ matrix.test }}
\ No newline at end of file
+ run: tox -e integration-${{ matrix.test }}
From 51511ecfee08958080dbb0a9c8dbe881bec7c9b3 Mon Sep 17 00:00:00 2001
From: Colin
Date: Mon, 23 Oct 2023 11:01:03 -0700
Subject: [PATCH 022/102] remove python setup for pre-existing container
---
.github/workflows/integration.yml | 5 -----
1 file changed, 5 deletions(-)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 72a86c92e..288c7ea18 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -211,11 +211,6 @@ jobs:
persist-credentials: false
ref: ${{ github.event.pull_request.head.sha }}
- - name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v4
- with:
- python-version: ${{ matrix.python-version }}
-
- name: Install python dependencies
run: |
python -m pip install --user --upgrade pip
From 98607b61458199d006ce8526e763bcc89f5426a6 Mon Sep 17 00:00:00 2001
From: Colin
Date: Mon, 23 Oct 2023 11:21:38 -0700
Subject: [PATCH 023/102] download simba
---
.github/workflows/integration.yml | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 288c7ea18..5f6e4b45b 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -230,8 +230,13 @@ jobs:
apt-get update && apt-get install -y --no-install-recommends \
g++ \
unixodbc-dev \
+ curl \
unzip
+ curl -OL \
+ https://databricks.com/wp-content/uploads/drivers-2020/SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip \
+ /tmp/simba_odbc.zip
+
unzip /tmp/simba_odbc.zip -d /tmp/ \
&& dpkg -i /tmp/SimbaSparkODBC-*/*.deb \
&& echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini \
From e6ec41460d986cc552fa46024be471147f152920 Mon Sep 17 00:00:00 2001
From: Colin
Date: Mon, 23 Oct 2023 11:31:45 -0700
Subject: [PATCH 024/102] fix curl call
---
.github/workflows/integration.yml | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 5f6e4b45b..764038394 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -233,9 +233,9 @@ jobs:
curl \
unzip
- curl -OL \
- https://databricks.com/wp-content/uploads/drivers-2020/SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip \
- /tmp/simba_odbc.zip
+ curl --create-dirs -OL \
+ --output-dir "/tmp/simba_odbc.zip" \
+ "https://databricks.com/wp-content/uploads/drivers-2020/SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip"
unzip /tmp/simba_odbc.zip -d /tmp/ \
&& dpkg -i /tmp/SimbaSparkODBC-*/*.deb \
From 05a2c0858434686ecc5f64ac4dd3d0bc3344c325 Mon Sep 17 00:00:00 2001
From: Colin
Date: Mon, 23 Oct 2023 11:33:31 -0700
Subject: [PATCH 025/102] fix curl call
---
.github/workflows/integration.yml | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 764038394..9fcd701fe 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -230,8 +230,9 @@ jobs:
apt-get update && apt-get install -y --no-install-recommends \
g++ \
unixodbc-dev \
- curl \
unzip
+
+ apt-get install curl
curl --create-dirs -OL \
--output-dir "/tmp/simba_odbc.zip" \
From a89ec581eff88b1c24a1da3cebd19c8981b6cd88 Mon Sep 17 00:00:00 2001
From: Colin
Date: Mon, 23 Oct 2023 11:35:13 -0700
Subject: [PATCH 026/102] fix curl call
---
.github/workflows/integration.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 9fcd701fe..f8fa81ceb 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -232,7 +232,7 @@ jobs:
unixodbc-dev \
unzip
- apt-get install curl
+ apt-get install -y curl
curl --create-dirs -OL \
--output-dir "/tmp/simba_odbc.zip" \
From 2a18fad185a748cb9ac82198653d97b7f3a5b597 Mon Sep 17 00:00:00 2001
From: Colin
Date: Mon, 23 Oct 2023 11:39:53 -0700
Subject: [PATCH 027/102] fix curl call
---
.github/workflows/integration.yml | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index f8fa81ceb..be6443a13 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -226,6 +226,7 @@ jobs:
- name: Configure ODBC
if: ${{ matrix.test != 'spark-databricks-http' }}
+ shell: bash
run: |
apt-get update && apt-get install -y --no-install-recommends \
g++ \
@@ -235,7 +236,7 @@ jobs:
apt-get install -y curl
curl --create-dirs -OL \
- --output-dir "/tmp/simba_odbc.zip" \
+ --output "/tmp/simba_odbc.zip" \
"https://databricks.com/wp-content/uploads/drivers-2020/SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip"
unzip /tmp/simba_odbc.zip -d /tmp/ \
From 1481396d6307b93f0b21aed722a6299bb50d29ba Mon Sep 17 00:00:00 2001
From: Colin
Date: Mon, 23 Oct 2023 11:44:48 -0700
Subject: [PATCH 028/102] fix curl call
---
.github/workflows/integration.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index be6443a13..a47d5271f 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -235,7 +235,7 @@ jobs:
apt-get install -y curl
- curl --create-dirs -OL \
+ curl --create-dirs \
--output "/tmp/simba_odbc.zip" \
"https://databricks.com/wp-content/uploads/drivers-2020/SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip"
From 31b427c47b6c064ba284b91818d964b3b03eff3a Mon Sep 17 00:00:00 2001
From: Colin
Date: Mon, 23 Oct 2023 11:50:51 -0700
Subject: [PATCH 029/102] fix curl call
---
.github/workflows/integration.yml | 12 +++++-------
1 file changed, 5 insertions(+), 7 deletions(-)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index a47d5271f..d9e71d5e6 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -234,16 +234,14 @@ jobs:
unzip
apt-get install -y curl
+ rm -rf /tmp && mkdir /tmp
+
+ curl -OL "https://databricks.com/wp-content/uploads/drivers-2020/SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip" \
- curl --create-dirs \
- --output "/tmp/simba_odbc.zip" \
- "https://databricks.com/wp-content/uploads/drivers-2020/SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip"
-
- unzip /tmp/simba_odbc.zip -d /tmp/ \
+ unzip SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip -d /tmp/ \
&& dpkg -i /tmp/SimbaSparkODBC-*/*.deb \
&& echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini \
- && rm /tmp/simba_odbc.zip \
- && rm -rf /tmp/SimbaSparkODBC*
+ && rm -rf /tmp
- name: Run tox for Spark ${{ matrix.test }}
run: tox -e integration-${{ matrix.test }}
From 15ba1da4adcb33dedec541dcdda6e0e1de1728a2 Mon Sep 17 00:00:00 2001
From: Colin
Date: Mon, 23 Oct 2023 12:00:02 -0700
Subject: [PATCH 030/102] fix db test naming
---
.github/workflows/integration.yml | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index d9e71d5e6..ff48a9b30 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -177,8 +177,8 @@ jobs:
python-version:
- "3.8"
test:
- - "databricks-odbc-sql-endpoint"
- - "databricks-odbc-cluster"
+ - "spark-databricks-odbc-sql-endpoint"
+ - "spark-databricks-odbc-cluster"
- "spark-databricks-http"
env:
@@ -190,7 +190,7 @@ jobs:
DD_ENV: ci
DD_SERVICE: ${{ github.event.repository.name }}
DBT_DATABRICKS_CLUSTER_NAME: ${{ secrets.DBT_DATABRICKS_CLUSTER_NAME }}
- DBT_DATABRICKS_HOSTNAME: ${{ secrets.DBT_DATABRICKS_HOST }}
+ DBT_DATABRICKS_HOST_NAME: ${{ secrets.DBT_DATABRICKS_HOST }}
DBT_DATABRICKS_ENDPOINT: ${{ secrets.DBT_DATABRICKS_ENDPOINT }}
DBT_DATABRICKS_TOKEN: ${{ secrets.DBT_DATABRICKS_TOKEN }}
DBT_DATABRICS_USER: ${{ secrets.DBT_DATABRICKS_USER }}
From ca33a236ebbdd9fa9cef5b1a703b0002b03257fe Mon Sep 17 00:00:00 2001
From: Colin
Date: Mon, 23 Oct 2023 13:52:37 -0700
Subject: [PATCH 031/102] confirm ODBC driver installed
---
.github/workflows/integration.yml | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index ff48a9b30..a8a131a61 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -241,7 +241,9 @@ jobs:
unzip SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip -d /tmp/ \
&& dpkg -i /tmp/SimbaSparkODBC-*/*.deb \
&& echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini \
- && rm -rf /tmp
+ && rm -rf /tmp \
+ && dpkg -l | grep Simba # confirm that the driver is installed
+
- name: Run tox for Spark ${{ matrix.test }}
run: tox -e integration-${{ matrix.test }}
From 6274d77151ba32cb4b45abddb300603d88d860c6 Mon Sep 17 00:00:00 2001
From: Colin
Date: Mon, 23 Oct 2023 14:17:52 -0700
Subject: [PATCH 032/102] add odbc driver env var
---
.github/workflows/integration.yml | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index a8a131a61..27f5d6bda 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -194,7 +194,7 @@ jobs:
DBT_DATABRICKS_ENDPOINT: ${{ secrets.DBT_DATABRICKS_ENDPOINT }}
DBT_DATABRICKS_TOKEN: ${{ secrets.DBT_DATABRICKS_TOKEN }}
DBT_DATABRICS_USER: ${{ secrets.DBT_DATABRICKS_USER }}
-
+ ODBC_DRIVER: "Simba"
steps:
- name: Check out the repository
if: github.event_name != 'pull_request'
@@ -240,7 +240,7 @@ jobs:
unzip SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip -d /tmp/ \
&& dpkg -i /tmp/SimbaSparkODBC-*/*.deb \
- && echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini \
+ && echo "[Simba]\nDriver = $HOME/opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini \
&& rm -rf /tmp \
&& dpkg -l | grep Simba # confirm that the driver is installed
From 0ba91a2ebc553e322fd20ff3ebb49c9aa810e656 Mon Sep 17 00:00:00 2001
From: Colin
Date: Mon, 23 Oct 2023 14:40:02 -0700
Subject: [PATCH 033/102] add odbc driver env var
---
.github/workflows/integration.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 27f5d6bda..1dd657085 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -240,7 +240,7 @@ jobs:
unzip SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip -d /tmp/ \
&& dpkg -i /tmp/SimbaSparkODBC-*/*.deb \
- && echo "[Simba]\nDriver = $HOME/opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini \
+ && echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini \
&& rm -rf /tmp \
&& dpkg -l | grep Simba # confirm that the driver is installed
From f09202681f49ac144508d4bc4c0f72460767455c Mon Sep 17 00:00:00 2001
From: Colin
Date: Mon, 23 Oct 2023 15:11:32 -0700
Subject: [PATCH 034/102] specify platform
---
.github/workflows/integration.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 1dd657085..38b8faa35 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -170,7 +170,7 @@ jobs:
runs-on: ubuntu-latest
container:
image: "fishtownanalytics/test-container:10"
- options: --user root
+ options: --user root --platform linux/amd64
strategy:
fail-fast: false
matrix:
From b968985be43080580252b9ac38e410248103e4e6 Mon Sep 17 00:00:00 2001
From: Colin
Date: Mon, 23 Oct 2023 15:47:48 -0700
Subject: [PATCH 035/102] check odbc driver integrity
---
.github/workflows/integration.yml | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 38b8faa35..61cf5a634 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -157,7 +157,7 @@ jobs:
run: tox -e integration-${{ matrix.test }}
databricks-tests:
- name: test spark databricks against python ${{ matrix.python-version }}
+ name: run ${{ matrix.test }} against python ${{ matrix.python-version }}
# run if not a PR from a forked repository or has a label to mark as safe to test
# also checks that the matrix generated is not empty
if: >-
@@ -193,7 +193,6 @@ jobs:
DBT_DATABRICKS_HOST_NAME: ${{ secrets.DBT_DATABRICKS_HOST }}
DBT_DATABRICKS_ENDPOINT: ${{ secrets.DBT_DATABRICKS_ENDPOINT }}
DBT_DATABRICKS_TOKEN: ${{ secrets.DBT_DATABRICKS_TOKEN }}
- DBT_DATABRICS_USER: ${{ secrets.DBT_DATABRICKS_USER }}
ODBC_DRIVER: "Simba"
steps:
- name: Check out the repository
@@ -244,6 +243,8 @@ jobs:
&& rm -rf /tmp \
&& dpkg -l | grep Simba # confirm that the driver is installed
+ ldd /opt/simba/spark/lib/64/libsparkodbc_sb64.so
+
- name: Run tox for Spark ${{ matrix.test }}
run: tox -e integration-${{ matrix.test }}
From 8a49567fcf3c9748dd75e6ff9c629759b92a4bbd Mon Sep 17 00:00:00 2001
From: Colin
Date: Mon, 23 Oct 2023 15:53:42 -0700
Subject: [PATCH 036/102] add dbt user env var
---
.github/workflows/integration.yml | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 61cf5a634..41177f054 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -193,6 +193,10 @@ jobs:
DBT_DATABRICKS_HOST_NAME: ${{ secrets.DBT_DATABRICKS_HOST }}
DBT_DATABRICKS_ENDPOINT: ${{ secrets.DBT_DATABRICKS_ENDPOINT }}
DBT_DATABRICKS_TOKEN: ${{ secrets.DBT_DATABRICKS_TOKEN }}
+ DBT_DATABRICKS_USERNAME: ${{ secrets.DBT_DATABRICKS_USERNAME }}
+ DBT_TEST_USER_1: "buildbot+dbt_test_user_1@dbtlabs.com"
+ DBT_TEST_USER_2: "buildbot+dbt_test_user_2@dbtlabs.com"
+ DBT_TEST_USER_3: "buildbot+dbt_test_user_3@dbtlabs.com"
ODBC_DRIVER: "Simba"
steps:
- name: Check out the repository
From 7723e8d90e7af6c2513b8e435ca40805591fcedc Mon Sep 17 00:00:00 2001
From: Colin
Date: Mon, 23 Oct 2023 15:58:29 -0700
Subject: [PATCH 037/102] add dbt user env var
---
.github/workflows/integration.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 41177f054..c91dc9bbb 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -193,7 +193,7 @@ jobs:
DBT_DATABRICKS_HOST_NAME: ${{ secrets.DBT_DATABRICKS_HOST }}
DBT_DATABRICKS_ENDPOINT: ${{ secrets.DBT_DATABRICKS_ENDPOINT }}
DBT_DATABRICKS_TOKEN: ${{ secrets.DBT_DATABRICKS_TOKEN }}
- DBT_DATABRICKS_USERNAME: ${{ secrets.DBT_DATABRICKS_USERNAME }}
+ DBT_DATABRICKS_USER: ${{ secrets.DBT_DATABRICKS_USERNAME }}
DBT_TEST_USER_1: "buildbot+dbt_test_user_1@dbtlabs.com"
DBT_TEST_USER_2: "buildbot+dbt_test_user_2@dbtlabs.com"
DBT_TEST_USER_3: "buildbot+dbt_test_user_3@dbtlabs.com"
From ea5ebfa32a90c909cbbc87e79bd094eb16030a1d Mon Sep 17 00:00:00 2001
From: Colin
Date: Mon, 23 Oct 2023 16:42:13 -0700
Subject: [PATCH 038/102] fix host_name env var
---
.github/workflows/integration.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index c91dc9bbb..5ee981c45 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -190,7 +190,7 @@ jobs:
DD_ENV: ci
DD_SERVICE: ${{ github.event.repository.name }}
DBT_DATABRICKS_CLUSTER_NAME: ${{ secrets.DBT_DATABRICKS_CLUSTER_NAME }}
- DBT_DATABRICKS_HOST_NAME: ${{ secrets.DBT_DATABRICKS_HOST }}
+ DBT_DATABRICKS_HOST_NAME: ${{ secrets.DBT_DATABRICKS_HOST_NAME }}
DBT_DATABRICKS_ENDPOINT: ${{ secrets.DBT_DATABRICKS_ENDPOINT }}
DBT_DATABRICKS_TOKEN: ${{ secrets.DBT_DATABRICKS_TOKEN }}
DBT_DATABRICKS_USER: ${{ secrets.DBT_DATABRICKS_USERNAME }}
From 610e5e912bebdcf105fcd64f777a035983fbffcb Mon Sep 17 00:00:00 2001
From: Colin
Date: Tue, 24 Oct 2023 09:55:55 -0700
Subject: [PATCH 039/102] try removing architecture arg
---
.github/workflows/integration.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 5ee981c45..631e8a6de 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -170,7 +170,7 @@ jobs:
runs-on: ubuntu-latest
container:
image: "fishtownanalytics/test-container:10"
- options: --user root --platform linux/amd64
+ options: --user root
strategy:
fail-fast: false
matrix:
From b4411ab011bb285cf2d07bf0be2ff90ee185f682 Mon Sep 17 00:00:00 2001
From: Colin
Date: Tue, 24 Oct 2023 10:01:01 -0700
Subject: [PATCH 040/102] swap back to pull_request_target
---
.github/workflows/integration.yml | 24 ++++++++++++------------
1 file changed, 12 insertions(+), 12 deletions(-)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 631e8a6de..62e276cc1 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -16,7 +16,7 @@ on:
- "main"
- "*.latest"
- "releases/*"
- pull_request:
+ pull_request_target:
workflow_dispatch:
inputs:
dbt-core-branch:
@@ -29,7 +29,7 @@ permissions: read-all
# will cancel previous workflows triggered by the same event and for the same ref for PRs or same SHA otherwise
concurrency:
- group: ${{ github.workflow }}-${{ github.event_name }}-${{ contains(github.event_name, 'pull_request') && github.event.pull_request.head.ref || github.sha }}
+ group: ${{ github.workflow }}-${{ github.event_name }}-${{ contains(github.event_name, 'pull_request_target') && github.event.pull_request.head.ref || github.sha }}
cancel-in-progress: true
defaults:
@@ -41,7 +41,7 @@ jobs:
test-metadata:
# run if not a PR from a forked repository or has a label to mark as safe to test
if: >-
- github.event_name != 'pull_request' ||
+ github.event_name != 'pull_request_target' ||
github.event.pull_request.head.repo.full_name == github.repository ||
contains(github.event.pull_request.labels.*.name, 'ok to test')
runs-on: ubuntu-latest
@@ -52,20 +52,20 @@ jobs:
steps:
- name: Check out the repository (non-PR)
- if: github.event_name != 'pull_request'
+ if: github.event_name != 'pull_request_target'
uses: actions/checkout@v3
with:
persist-credentials: false
- name: Check out the repository (PR)
- if: github.event_name == 'pull_request'
+ if: github.event_name == 'pull_request_target'
uses: actions/checkout@v3
with:
persist-credentials: false
ref: ${{ github.event.pull_request.head.sha }}
- name: Check if relevant files changed
- if: github.event_name == 'pull_request'
+ if: github.event_name == 'pull_request_target'
# https://github.com/marketplace/actions/paths-changes-filter
# For each filter, it sets output variable named by the filter to the text:
# 'true' - if any of changed files matches any of filter rules
@@ -89,7 +89,7 @@ jobs:
# also checks that the matrix generated is not empty
if: >-
(
- github.event_name != 'pull_request' ||
+ github.event_name != 'pull_request_target' ||
github.event.pull_request.head.repo.full_name == github.repository ||
contains(github.event.pull_request.labels.*.name, 'ok to test')
)
@@ -116,7 +116,7 @@ jobs:
steps:
- name: Check out the repository
- if: github.event_name != 'pull_request'
+ if: github.event_name != 'pull_request_target'
uses: actions/checkout@v3
with:
persist-credentials: false
@@ -124,7 +124,7 @@ jobs:
# explicity checkout the branch for the PR,
# this is necessary for the `pull_request` event
- name: Check out the repository (PR)
- if: github.event_name == 'pull_request'
+ if: github.event_name == 'pull_request_target'
uses: actions/checkout@v3
with:
persist-credentials: false
@@ -162,7 +162,7 @@ jobs:
# also checks that the matrix generated is not empty
if: >-
(
- github.event_name != 'pull_request' ||
+ github.event_name != 'pull_request_target' ||
github.event.pull_request.head.repo.full_name == github.repository ||
contains(github.event.pull_request.labels.*.name, 'ok to test')
)
@@ -200,7 +200,7 @@ jobs:
ODBC_DRIVER: "Simba"
steps:
- name: Check out the repository
- if: github.event_name != 'pull_request'
+ if: github.event_name != 'pull_request_target'
uses: actions/checkout@v3
with:
persist-credentials: false
@@ -208,7 +208,7 @@ jobs:
# explicity checkout the branch for the PR,
# this is necessary for the `pull_request` event
- name: Check out the repository (PR)
- if: github.event_name == 'pull_request'
+ if: github.event_name == 'pull_request_target'
uses: actions/checkout@v3
with:
persist-credentials: false
From cae6c8abc0abfc57d9a17dba3c0abb0495841249 Mon Sep 17 00:00:00 2001
From: Colin
Date: Tue, 24 Oct 2023 12:13:18 -0700
Subject: [PATCH 041/102] try running on host instead of container
---
.github/workflows/integration.yml | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 62e276cc1..10f9ce6f0 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -168,9 +168,6 @@ jobs:
)
runs-on: ubuntu-latest
- container:
- image: "fishtownanalytics/test-container:10"
- options: --user root
strategy:
fail-fast: false
matrix:
@@ -214,6 +211,11 @@ jobs:
persist-credentials: false
ref: ${{ github.event.pull_request.head.sha }}
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v4
+ with:
+ python-version: ${{ matrix.python-version }}
+
- name: Install python dependencies
run: |
python -m pip install --user --upgrade pip
From 0c689720b96d592ff2f8e8267bb5ef0e1e0a9736 Mon Sep 17 00:00:00 2001
From: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com>
Date: Tue, 24 Oct 2023 12:13:43 -0700
Subject: [PATCH 042/102] Update .github/workflows/integration.yml
Co-authored-by: Emily Rockman
---
.github/workflows/integration.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 10f9ce6f0..d1829197b 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -203,7 +203,7 @@ jobs:
persist-credentials: false
# explicity checkout the branch for the PR,
- # this is necessary for the `pull_request` event
+ # this is necessary for the `pull_request_target` event
- name: Check out the repository (PR)
if: github.event_name == 'pull_request_target'
uses: actions/checkout@v3
From b2f63bd09fb59ba9f751bc425f81242afeef8bd6 Mon Sep 17 00:00:00 2001
From: Colin
Date: Tue, 24 Oct 2023 14:37:41 -0700
Subject: [PATCH 043/102] try running odbcinst -j
---
.github/workflows/integration.yml | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index d1829197b..f3368d11a 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -250,7 +250,8 @@ jobs:
&& dpkg -l | grep Simba # confirm that the driver is installed
ldd /opt/simba/spark/lib/64/libsparkodbc_sb64.so
-
+ echo "--------------------------------------------"
+ odbcinst -j
- name: Run tox for Spark ${{ matrix.test }}
run: tox -e integration-${{ matrix.test }}
From 80eb7e45e25316dfa539786975c34b6655d77e88 Mon Sep 17 00:00:00 2001
From: Colin
Date: Tue, 24 Oct 2023 14:51:32 -0700
Subject: [PATCH 044/102] remove bash
---
.github/workflows/integration.yml | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index f3368d11a..45e313482 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -17,6 +17,10 @@ on:
- "*.latest"
- "releases/*"
pull_request_target:
+ types:
+ - opened
+ - synchronize
+ - labeled
workflow_dispatch:
inputs:
dbt-core-branch:
@@ -231,7 +235,6 @@ jobs:
- name: Configure ODBC
if: ${{ matrix.test != 'spark-databricks-http' }}
- shell: bash
run: |
apt-get update && apt-get install -y --no-install-recommends \
g++ \
From 4bbfa71b2c80f056a1e67c1587dbe06ac8fa3613 Mon Sep 17 00:00:00 2001
From: Colin
Date: Tue, 24 Oct 2023 14:54:33 -0700
Subject: [PATCH 045/102] add sudo
---
.github/workflows/integration.yml | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 45e313482..90e2782a8 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -236,12 +236,12 @@ jobs:
- name: Configure ODBC
if: ${{ matrix.test != 'spark-databricks-http' }}
run: |
- apt-get update && apt-get install -y --no-install-recommends \
+ sudo apt-get update && sudo apt-get install -y --no-install-recommends \
g++ \
unixodbc-dev \
unzip
- apt-get install -y curl
+ sudo apt-get install -y curl
rm -rf /tmp && mkdir /tmp
curl -OL "https://databricks.com/wp-content/uploads/drivers-2020/SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip" \
From b1d202023f10aaeb5b7742996ddcdf7ca4bc7abf Mon Sep 17 00:00:00 2001
From: Colin
Date: Tue, 24 Oct 2023 14:55:46 -0700
Subject: [PATCH 046/102] add sudo
---
.github/workflows/integration.yml | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 90e2782a8..142752b66 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -247,12 +247,12 @@ jobs:
curl -OL "https://databricks.com/wp-content/uploads/drivers-2020/SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip" \
unzip SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip -d /tmp/ \
- && dpkg -i /tmp/SimbaSparkODBC-*/*.deb \
+ && sudo dpkg -i /tmp/SimbaSparkODBC-*/*.deb \
&& echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini \
&& rm -rf /tmp \
- && dpkg -l | grep Simba # confirm that the driver is installed
+ && sudo dpkg -l | grep Simba # confirm that the driver is installed
- ldd /opt/simba/spark/lib/64/libsparkodbc_sb64.so
+ sudo ldd /opt/simba/spark/lib/64/libsparkodbc_sb64.so
echo "--------------------------------------------"
odbcinst -j
From 38fda3d22f8103c07ce0091a1b3b530c5d36d26f Mon Sep 17 00:00:00 2001
From: Colin
Date: Tue, 24 Oct 2023 14:59:11 -0700
Subject: [PATCH 047/102] update odbc.ini
---
.github/workflows/integration.yml | 1 +
1 file changed, 1 insertion(+)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 142752b66..08f55f848 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -248,6 +248,7 @@ jobs:
unzip SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip -d /tmp/ \
&& sudo dpkg -i /tmp/SimbaSparkODBC-*/*.deb \
+ && echo "[ODBC Data Sources]\nSimba=Databricks ODBC Connector" >> /etc/odbc.ini \
&& echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini \
&& rm -rf /tmp \
&& sudo dpkg -l | grep Simba # confirm that the driver is installed
From 6b599a1eceb755a5ef5b91d95760b01a364f648c Mon Sep 17 00:00:00 2001
From: Colin
Date: Tue, 24 Oct 2023 15:02:54 -0700
Subject: [PATCH 048/102] install libsasl2-modules-gssapi-mit
---
.github/workflows/integration.yml | 1 +
1 file changed, 1 insertion(+)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 08f55f848..6dfd716b2 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -239,6 +239,7 @@ jobs:
sudo apt-get update && sudo apt-get install -y --no-install-recommends \
g++ \
unixodbc-dev \
+ libsasl2-modules-gssapi-mit \
unzip
sudo apt-get install -y curl
From 0976c4f70fe8e36169dfb34b922c4e5cdc1f2238 Mon Sep 17 00:00:00 2001
From: Colin
Date: Tue, 24 Oct 2023 15:08:07 -0700
Subject: [PATCH 049/102] install libsasl2-modules-gssapi-mit
---
.github/workflows/integration.yml | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 6dfd716b2..6807507df 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -249,8 +249,8 @@ jobs:
unzip SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip -d /tmp/ \
&& sudo dpkg -i /tmp/SimbaSparkODBC-*/*.deb \
- && echo "[ODBC Data Sources]\nSimba=Databricks ODBC Connector" >> /etc/odbc.ini \
- && echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini \
+ && sudo echo "[ODBC Data Sources]\nSimba=Databricks ODBC Connector" >> /etc/odbc.ini \
+ && sudo echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini \
&& rm -rf /tmp \
&& sudo dpkg -l | grep Simba # confirm that the driver is installed
From 42f2784210514349c14b54dcba673139f0226470 Mon Sep 17 00:00:00 2001
From: Colin
Date: Tue, 24 Oct 2023 15:09:23 -0700
Subject: [PATCH 050/102] set -e on odbc install
---
.github/workflows/integration.yml | 1 +
1 file changed, 1 insertion(+)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 6807507df..235fb49e2 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -236,6 +236,7 @@ jobs:
- name: Configure ODBC
if: ${{ matrix.test != 'spark-databricks-http' }}
run: |
+ set -e
sudo apt-get update && sudo apt-get install -y --no-install-recommends \
g++ \
unixodbc-dev \
From 4f11291045081be0c2975772475b917ee24e4173 Mon Sep 17 00:00:00 2001
From: Colin
Date: Tue, 24 Oct 2023 15:13:07 -0700
Subject: [PATCH 051/102] set -e on odbc install
---
.github/workflows/integration.yml | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 235fb49e2..92794b427 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -248,12 +248,12 @@ jobs:
curl -OL "https://databricks.com/wp-content/uploads/drivers-2020/SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip" \
- unzip SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip -d /tmp/ \
- && sudo dpkg -i /tmp/SimbaSparkODBC-*/*.deb \
- && sudo echo "[ODBC Data Sources]\nSimba=Databricks ODBC Connector" >> /etc/odbc.ini \
- && sudo echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini \
- && rm -rf /tmp \
- && sudo dpkg -l | grep Simba # confirm that the driver is installed
+ unzip SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip -d /tmp/
+ sudo dpkg -i /tmp/SimbaSparkODBC-*/*.deb \
+ echo "--------------------------------------------"
+ sudo echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini \
+ rm -rf /tmp \
+ sudo dpkg -l | grep Simba # confirm that the driver is installed
sudo ldd /opt/simba/spark/lib/64/libsparkodbc_sb64.so
echo "--------------------------------------------"
From 1384084e4d08c3b3c9b449229192685eb90c96e0 Mon Sep 17 00:00:00 2001
From: Colin
Date: Tue, 24 Oct 2023 15:14:20 -0700
Subject: [PATCH 052/102] set -e on odbc install
---
.github/workflows/integration.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 92794b427..e76a5d9ac 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -249,7 +249,7 @@ jobs:
curl -OL "https://databricks.com/wp-content/uploads/drivers-2020/SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip" \
unzip SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip -d /tmp/
- sudo dpkg -i /tmp/SimbaSparkODBC-*/*.deb \
+ sudo dpkg -i /tmp/SimbaSparkODBC-*/*.deb
echo "--------------------------------------------"
sudo echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini \
rm -rf /tmp \
From 543e321077ed193d05e60a3c3acaba7aca2c0e37 Mon Sep 17 00:00:00 2001
From: Colin
Date: Tue, 24 Oct 2023 15:21:08 -0700
Subject: [PATCH 053/102] sudo echo odbc.inst
---
.github/workflows/integration.yml | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index e76a5d9ac..da40dde86 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -251,8 +251,9 @@ jobs:
unzip SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip -d /tmp/
sudo dpkg -i /tmp/SimbaSparkODBC-*/*.deb
echo "--------------------------------------------"
- sudo echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini \
- rm -rf /tmp \
+ sudo sh -c echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini \
+
+ rm -rf /tmp
sudo dpkg -l | grep Simba # confirm that the driver is installed
sudo ldd /opt/simba/spark/lib/64/libsparkodbc_sb64.so
From f380d46a99205051d1bac84d4741009fb5f1de77 Mon Sep 17 00:00:00 2001
From: Mike Alfare
Date: Wed, 1 Nov 2023 20:19:17 -0400
Subject: [PATCH 054/102] remove postgres components
---
.github/scripts/update_dbt_core_branch.sh | 3 ---
1 file changed, 3 deletions(-)
diff --git a/.github/scripts/update_dbt_core_branch.sh b/.github/scripts/update_dbt_core_branch.sh
index d28a40c35..1a5a5c2d7 100755
--- a/.github/scripts/update_dbt_core_branch.sh
+++ b/.github/scripts/update_dbt_core_branch.sh
@@ -4,16 +4,13 @@ set -e
git_branch=$1
target_req_file="dev-requirements.txt"
core_req_sed_pattern="s|dbt-core.git.*#egg=dbt-core|dbt-core.git@${git_branch}#egg=dbt-core|g"
-postgres_req_sed_pattern="s|dbt-core.git.*#egg=dbt-postgres|dbt-core.git@${git_branch}#egg=dbt-postgres|g"
tests_req_sed_pattern="s|dbt-core.git.*#egg=dbt-tests|dbt-core.git@${git_branch}#egg=dbt-tests|g"
if [[ "$OSTYPE" == darwin* ]]; then
# mac ships with a different version of sed that requires a delimiter arg
sed -i "" "$core_req_sed_pattern" $target_req_file
- sed -i "" "$postgres_req_sed_pattern" $target_req_file
sed -i "" "$tests_req_sed_pattern" $target_req_file
else
sed -i "$core_req_sed_pattern" $target_req_file
- sed -i "$postgres_req_sed_pattern" $target_req_file
sed -i "$tests_req_sed_pattern" $target_req_file
fi
core_version=$(curl "https://raw.githubusercontent.com/dbt-labs/dbt-core/${git_branch}/core/dbt/version.py" | grep "__version__ = *"|cut -d'=' -f2)
From c334f3273bd7dda434d9bb4dac0f57579c2117d7 Mon Sep 17 00:00:00 2001
From: Mike Alfare
Date: Wed, 1 Nov 2023 20:23:18 -0400
Subject: [PATCH 055/102] remove release related items
---
.github/scripts/update_release_branch.sh | 11 -----------
1 file changed, 11 deletions(-)
delete mode 100644 .github/scripts/update_release_branch.sh
diff --git a/.github/scripts/update_release_branch.sh b/.github/scripts/update_release_branch.sh
deleted file mode 100644
index 75b9ccef6..000000000
--- a/.github/scripts/update_release_branch.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/bin/bash -e
-set -e
-
-release_branch=$1
-target_req_file=".github/workflows/nightly-release.yml"
-if [[ "$OSTYPE" == darwin* ]]; then
- # mac ships with a different version of sed that requires a delimiter arg
- sed -i "" "s|[0-9].[0-9].latest|$release_branch|" $target_req_file
-else
- sed -i "s|[0-9].[0-9].latest|$release_branch|" $target_req_file
-fi
From 19dcff3f4f44c99ab4c4e3ad8872597a5185cefa Mon Sep 17 00:00:00 2001
From: Mike Alfare
Date: Wed, 1 Nov 2023 20:33:12 -0400
Subject: [PATCH 056/102] remove irrelevant output
---
.github/workflows/integration.yml | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index da40dde86..b85e058e2 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -52,7 +52,6 @@ jobs:
outputs:
matrix: ${{ steps.generate-matrix.outputs.result }}
- run-python-tests: ${{ steps.filter.outputs.bigquery-python }}
steps:
- name: Check out the repository (non-PR)
@@ -242,20 +241,20 @@ jobs:
unixodbc-dev \
libsasl2-modules-gssapi-mit \
unzip
-
+
sudo apt-get install -y curl
rm -rf /tmp && mkdir /tmp
-
+
curl -OL "https://databricks.com/wp-content/uploads/drivers-2020/SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip" \
unzip SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip -d /tmp/
sudo dpkg -i /tmp/SimbaSparkODBC-*/*.deb
echo "--------------------------------------------"
sudo sh -c echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini \
-
+
rm -rf /tmp
sudo dpkg -l | grep Simba # confirm that the driver is installed
-
+
sudo ldd /opt/simba/spark/lib/64/libsparkodbc_sb64.so
echo "--------------------------------------------"
odbcinst -j
From 01b0c0cdd74b88e92c7f44d58e092e356ed01b00 Mon Sep 17 00:00:00 2001
From: Mike Alfare
Date: Wed, 1 Nov 2023 20:37:19 -0400
Subject: [PATCH 057/102] move long bash script into its own file
---
.github/scripts/configure_odbc.sh | 23 +++++++++++++++++++++++
.github/workflows/integration.yml | 24 +-----------------------
2 files changed, 24 insertions(+), 23 deletions(-)
create mode 100644 .github/scripts/configure_odbc.sh
diff --git a/.github/scripts/configure_odbc.sh b/.github/scripts/configure_odbc.sh
new file mode 100644
index 000000000..e2bad8886
--- /dev/null
+++ b/.github/scripts/configure_odbc.sh
@@ -0,0 +1,23 @@
+set -e
+sudo apt-get update && sudo apt-get install -y --no-install-recommends \
+ g++ \
+ unixodbc-dev \
+ libsasl2-modules-gssapi-mit \
+ unzip
+
+sudo apt-get install -y curl
+rm -rf /tmp && mkdir /tmp
+
+curl -OL "https://databricks.com/wp-content/uploads/drivers-2020/SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip"
+
+unzip SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip -d /tmp/
+sudo dpkg -i /tmp/SimbaSparkODBC-*/*.deb
+echo "--------------------------------------------"
+sudo sh -c echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini
+
+rm -rf /tmp
+sudo dpkg -l | grep Simba # confirm that the driver is installed
+
+sudo ldd /opt/simba/spark/lib/64/libsparkodbc_sb64.so
+echo "--------------------------------------------"
+odbcinst -j
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index b85e058e2..b9d6ddcbe 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -235,29 +235,7 @@ jobs:
- name: Configure ODBC
if: ${{ matrix.test != 'spark-databricks-http' }}
run: |
- set -e
- sudo apt-get update && sudo apt-get install -y --no-install-recommends \
- g++ \
- unixodbc-dev \
- libsasl2-modules-gssapi-mit \
- unzip
-
- sudo apt-get install -y curl
- rm -rf /tmp && mkdir /tmp
-
- curl -OL "https://databricks.com/wp-content/uploads/drivers-2020/SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip" \
-
- unzip SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip -d /tmp/
- sudo dpkg -i /tmp/SimbaSparkODBC-*/*.deb
- echo "--------------------------------------------"
- sudo sh -c echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini \
-
- rm -rf /tmp
- sudo dpkg -l | grep Simba # confirm that the driver is installed
-
- sudo ldd /opt/simba/spark/lib/64/libsparkodbc_sb64.so
- echo "--------------------------------------------"
- odbcinst -j
+ ./.github/scripts/configure_odbc.sh
- name: Run tox for Spark ${{ matrix.test }}
run: tox -e integration-${{ matrix.test }}
From d3d28446b87595580380136c4cc42a369e38a069 Mon Sep 17 00:00:00 2001
From: Mike Alfare
Date: Wed, 1 Nov 2023 20:58:25 -0400
Subject: [PATCH 058/102] update integration.yml to align with other adapters
---
.github/workflows/integration.yml | 25 +++++++++++++------------
1 file changed, 13 insertions(+), 12 deletions(-)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index b9d6ddcbe..6bdee8c32 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -8,7 +8,7 @@
# This will run for all PRs, when code is pushed to a release
# branch, and when manually triggered.
-name: Integration tests
+name: Adapter Integration Tests
on:
push:
@@ -49,7 +49,6 @@ jobs:
github.event.pull_request.head.repo.full_name == github.repository ||
contains(github.event.pull_request.labels.*.name, 'ok to test')
runs-on: ubuntu-latest
-
outputs:
matrix: ${{ steps.generate-matrix.outputs.result }}
@@ -86,7 +85,7 @@ jobs:
- 'dev-requirements.txt'
local-tests:
- name: test spark local against python ${{ matrix.python-version }}
+ name: ${{ matrix.test }} / python ${{ matrix.python-version }} / ubuntu-latest
# run if not a PR from a forked repository or has a label to mark as safe to test
# also checks that the matrix generated is not empty
@@ -96,7 +95,6 @@ jobs:
github.event.pull_request.head.repo.full_name == github.repository ||
contains(github.event.pull_request.labels.*.name, 'ok to test')
)
-
runs-on: ubuntu-latest
strategy:
@@ -109,7 +107,8 @@ jobs:
- "spark-session"
env:
- PYTEST_ADDOPTS: "-v --color=yes --csv test_results.csv"
+ TOXENV: integration-${{ matrix.test }}
+ PYTEST_ADDOPTS: "-v --color=yes --csv integration_results.csv"
DBT_INVOCATION_ENV: github-actions
DD_CIVISIBILITY_AGENTLESS_ENABLED: true
DD_API_KEY: ${{ secrets.DATADOG_API_KEY }}
@@ -124,7 +123,7 @@ jobs:
with:
persist-credentials: false
- # explicity checkout the branch for the PR,
+ # explicitly checkout the branch for the PR,
# this is necessary for the `pull_request` event
- name: Check out the repository (PR)
if: github.event_name == 'pull_request_target'
@@ -156,11 +155,12 @@ jobs:
with:
compose-file: "./docker-compose.yml"
- - name: Run tox for Spark ${{ matrix.test }}
- run: tox -e integration-${{ matrix.test }}
+ - name: Run tox for ${{ matrix.test }}
+ run: tox -- --ddtrace
databricks-tests:
- name: run ${{ matrix.test }} against python ${{ matrix.python-version }}
+ name: ${{ matrix.test }} / python ${{ matrix.python-version }} / ubuntu-latest
+
# run if not a PR from a forked repository or has a label to mark as safe to test
# also checks that the matrix generated is not empty
if: >-
@@ -182,6 +182,7 @@ jobs:
- "spark-databricks-http"
env:
+ TOXENV: integration-${{ matrix.test }}
PYTEST_ADDOPTS: "-v --color=yes --csv test_results.csv"
DBT_INVOCATION_ENV: github-actions
DD_CIVISIBILITY_AGENTLESS_ENABLED: true
@@ -205,7 +206,7 @@ jobs:
with:
persist-credentials: false
- # explicity checkout the branch for the PR,
+ # explicitly checkout the branch for the PR,
# this is necessary for the `pull_request_target` event
- name: Check out the repository (PR)
if: github.event_name == 'pull_request_target'
@@ -237,5 +238,5 @@ jobs:
run: |
./.github/scripts/configure_odbc.sh
- - name: Run tox for Spark ${{ matrix.test }}
- run: tox -e integration-${{ matrix.test }}
+ - name: Run tox for ${{ matrix.test }}
+ run: tox -- --ddtrace
From 72daf90d0a5a20534e2b9c5b97f79cb50ca7742c Mon Sep 17 00:00:00 2001
From: Mike Alfare
Date: Wed, 1 Nov 2023 21:08:36 -0400
Subject: [PATCH 059/102] revert name change
---
.github/workflows/integration.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 6bdee8c32..37449d892 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -8,7 +8,7 @@
# This will run for all PRs, when code is pushed to a release
# branch, and when manually triggered.
-name: Adapter Integration Tests
+name: Integration tests
on:
push:
From b43c9d1a2e7a97ed1c59e28a74e36769de69616c Mon Sep 17 00:00:00 2001
From: Mike Alfare
Date: Wed, 1 Nov 2023 21:11:00 -0400
Subject: [PATCH 060/102] revert name change
---
.github/workflows/integration.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 37449d892..6bdee8c32 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -8,7 +8,7 @@
# This will run for all PRs, when code is pushed to a release
# branch, and when manually triggered.
-name: Integration tests
+name: Adapter Integration Tests
on:
push:
From 91715d23a01f0a1039d961b2c24790c8f1ded30e Mon Sep 17 00:00:00 2001
From: Mike Alfare
Date: Wed, 1 Nov 2023 21:23:31 -0400
Subject: [PATCH 061/102] combine databricks and spark tests
---
.github/workflows/integration.yml | 86 ++++---------------------------
1 file changed, 10 insertions(+), 76 deletions(-)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 6bdee8c32..1e60aee1b 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -84,7 +84,7 @@ jobs:
- 'tests/**'
- 'dev-requirements.txt'
- local-tests:
+ test:
name: ${{ matrix.test }} / python ${{ matrix.python-version }} / ubuntu-latest
# run if not a PR from a forked repository or has a label to mark as safe to test
@@ -105,85 +105,13 @@ jobs:
test:
- "spark-thrift"
- "spark-session"
-
- env:
- TOXENV: integration-${{ matrix.test }}
- PYTEST_ADDOPTS: "-v --color=yes --csv integration_results.csv"
- DBT_INVOCATION_ENV: github-actions
- DD_CIVISIBILITY_AGENTLESS_ENABLED: true
- DD_API_KEY: ${{ secrets.DATADOG_API_KEY }}
- DD_SITE: datadoghq.com
- DD_ENV: ci
- DD_SERVICE: ${{ github.event.repository.name }}
-
- steps:
- - name: Check out the repository
- if: github.event_name != 'pull_request_target'
- uses: actions/checkout@v3
- with:
- persist-credentials: false
-
- # explicitly checkout the branch for the PR,
- # this is necessary for the `pull_request` event
- - name: Check out the repository (PR)
- if: github.event_name == 'pull_request_target'
- uses: actions/checkout@v3
- with:
- persist-credentials: false
- ref: ${{ github.event.pull_request.head.sha }}
-
- - name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v4
- with:
- python-version: ${{ matrix.python-version }}
-
- - name: Install python dependencies
- run: |
- python -m pip install --user --upgrade pip
- python -m pip install tox
- python -m pip --version
- tox --version
-
- - name: Update dev_requirements.txt
- if: inputs.dbt-core-branch != ''
- run: |
- pip install bumpversion
- ./.github/scripts/update_dbt_core_branch.sh ${{ inputs.dbt-core-branch }}
-
- - uses: isbang/compose-action@v1.5.1
- if: ${{ matrix.test == 'spark-thrift'}}
- with:
- compose-file: "./docker-compose.yml"
-
- - name: Run tox for ${{ matrix.test }}
- run: tox -- --ddtrace
-
- databricks-tests:
- name: ${{ matrix.test }} / python ${{ matrix.python-version }} / ubuntu-latest
-
- # run if not a PR from a forked repository or has a label to mark as safe to test
- # also checks that the matrix generated is not empty
- if: >-
- (
- github.event_name != 'pull_request_target' ||
- github.event.pull_request.head.repo.full_name == github.repository ||
- contains(github.event.pull_request.labels.*.name, 'ok to test')
- )
-
- runs-on: ubuntu-latest
- strategy:
- fail-fast: false
- matrix:
- python-version:
- - "3.8"
- test:
- "spark-databricks-odbc-sql-endpoint"
- "spark-databricks-odbc-cluster"
- "spark-databricks-http"
env:
TOXENV: integration-${{ matrix.test }}
- PYTEST_ADDOPTS: "-v --color=yes --csv test_results.csv"
+ PYTEST_ADDOPTS: "-v --color=yes --csv integration_results.csv"
DBT_INVOCATION_ENV: github-actions
DD_CIVISIBILITY_AGENTLESS_ENABLED: true
DD_API_KEY: ${{ secrets.DATADOG_API_KEY }}
@@ -199,6 +127,7 @@ jobs:
DBT_TEST_USER_2: "buildbot+dbt_test_user_2@dbtlabs.com"
DBT_TEST_USER_3: "buildbot+dbt_test_user_3@dbtlabs.com"
ODBC_DRIVER: "Simba"
+
steps:
- name: Check out the repository
if: github.event_name != 'pull_request_target'
@@ -207,7 +136,7 @@ jobs:
persist-credentials: false
# explicitly checkout the branch for the PR,
- # this is necessary for the `pull_request_target` event
+ # this is necessary for the `pull_request` event
- name: Check out the repository (PR)
if: github.event_name == 'pull_request_target'
uses: actions/checkout@v3
@@ -233,8 +162,13 @@ jobs:
pip install bumpversion
./.github/scripts/update_dbt_core_branch.sh ${{ inputs.dbt-core-branch }}
+ - uses: isbang/compose-action@v1.5.1
+ if: ${{ matrix.test == 'spark-thrift'}}
+ with:
+ compose-file: "./docker-compose.yml"
+
- name: Configure ODBC
- if: ${{ matrix.test != 'spark-databricks-http' }}
+ if: ${{ matrix.test == 'spark-databricks-odbc-sql-endpoint' || matrix.test == 'spark-databricks-odbc-cluster' }}
run: |
./.github/scripts/configure_odbc.sh
From 943a8dc3030a4fbff9a1f401133a1ef382bb538a Mon Sep 17 00:00:00 2001
From: Mike Alfare
Date: Wed, 1 Nov 2023 21:26:27 -0400
Subject: [PATCH 062/102] combine databricks and spark tests
---
.github/workflows/integration.yml | 4 ----
1 file changed, 4 deletions(-)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 1e60aee1b..1389550a2 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -17,10 +17,6 @@ on:
- "*.latest"
- "releases/*"
pull_request_target:
- types:
- - opened
- - synchronize
- - labeled
workflow_dispatch:
inputs:
dbt-core-branch:
From 3d0decefb6a2a453c6a806cc467a2763f02a9ade Mon Sep 17 00:00:00 2001
From: Colin
Date: Thu, 30 Nov 2023 15:14:17 -0800
Subject: [PATCH 063/102] Add dagger
---
.github/scripts/configure_odbc.sh | 23 -------------------
dagger/configure_odbc.sh | 20 ++++++++++++++++
dagger/run_dbt_spark_tests.py | 38 +++++++++++++++++++++++++++++++
3 files changed, 58 insertions(+), 23 deletions(-)
delete mode 100644 .github/scripts/configure_odbc.sh
create mode 100755 dagger/configure_odbc.sh
create mode 100644 dagger/run_dbt_spark_tests.py
diff --git a/.github/scripts/configure_odbc.sh b/.github/scripts/configure_odbc.sh
deleted file mode 100644
index e2bad8886..000000000
--- a/.github/scripts/configure_odbc.sh
+++ /dev/null
@@ -1,23 +0,0 @@
-set -e
-sudo apt-get update && sudo apt-get install -y --no-install-recommends \
- g++ \
- unixodbc-dev \
- libsasl2-modules-gssapi-mit \
- unzip
-
-sudo apt-get install -y curl
-rm -rf /tmp && mkdir /tmp
-
-curl -OL "https://databricks.com/wp-content/uploads/drivers-2020/SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip"
-
-unzip SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip -d /tmp/
-sudo dpkg -i /tmp/SimbaSparkODBC-*/*.deb
-echo "--------------------------------------------"
-sudo sh -c echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini
-
-rm -rf /tmp
-sudo dpkg -l | grep Simba # confirm that the driver is installed
-
-sudo ldd /opt/simba/spark/lib/64/libsparkodbc_sb64.so
-echo "--------------------------------------------"
-odbcinst -j
diff --git a/dagger/configure_odbc.sh b/dagger/configure_odbc.sh
new file mode 100755
index 000000000..7126298c0
--- /dev/null
+++ b/dagger/configure_odbc.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+set -e
+apt update && apt install -y --no-install-recommends \
+ g++ \
+ git \
+ curl \
+ unixodbc-dev \
+ libsasl2-modules-gssapi-mit \
+ unzip
+
+rm -rf /tmp && mkdir /tmp
+
+curl -OL "https://databricks.com/wp-content/uploads/drivers-2020/SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip"
+
+unzip SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip -d /tmp/
+dpkg -i /tmp/*/simbaspark_2.6.16.1019-2_amd64.deb
+echo "--------------------------------------------"
+echo sh -c echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini
+dpkg -l | grep Simba # confirm that the driver is installed
+rm -rf /tmp
diff --git a/dagger/run_dbt_spark_tests.py b/dagger/run_dbt_spark_tests.py
new file mode 100644
index 000000000..85a3b8246
--- /dev/null
+++ b/dagger/run_dbt_spark_tests.py
@@ -0,0 +1,38 @@
+import argparse
+import sys
+
+import anyio as anyio
+import dagger as dagger
+
+
+async def test_spark(test_args):
+ async with dagger.Connection(dagger.Config(log_output=sys.stderr)) as client:
+ install_dir = client.host().directory("./", exclude=["\\.pytest_cache/*", ".idea/*"])
+ platform = dagger.Platform("linux/amd64")
+ tst_container = (
+ client.container()
+ .from_("python:3.8-slim")
+ .with_directory("/dbt_spark", install_dir)
+ .with_workdir("/dbt_spark")
+ .with_exec("./dagger/configure_odbc.sh")
+ .with_exec(["pip", "install", "-r", "requirements.txt"])
+ .with_exec(["pip", "install", "-r", "dev-requirements.txt"])
+ )
+
+ result = await (tst_container
+ .with_workdir("/dbt_spark")
+ .with_exec(["python", '-m', 'pytest', '-v',
+ '--profile', test_args.profile,
+ '-n', 'auto',
+ 'tests/functional/']
+ )
+ ).stdout()
+
+ return result
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--profile", required=True, type=str)
+args = parser.parse_args()
+
+anyio.run(test_spark, args)
From 080b816731708bc2bdae8f648588799b358b939c Mon Sep 17 00:00:00 2001
From: Colin
Date: Thu, 30 Nov 2023 15:15:07 -0800
Subject: [PATCH 064/102] remove platform
---
dagger/run_dbt_spark_tests.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/dagger/run_dbt_spark_tests.py b/dagger/run_dbt_spark_tests.py
index 85a3b8246..a2125a310 100644
--- a/dagger/run_dbt_spark_tests.py
+++ b/dagger/run_dbt_spark_tests.py
@@ -8,7 +8,7 @@
async def test_spark(test_args):
async with dagger.Connection(dagger.Config(log_output=sys.stderr)) as client:
install_dir = client.host().directory("./", exclude=["\\.pytest_cache/*", ".idea/*"])
- platform = dagger.Platform("linux/amd64")
+
tst_container = (
client.container()
.from_("python:3.8-slim")
From c8477ced3779879a40db2beca2135de38d9c3a87 Mon Sep 17 00:00:00 2001
From: Colin
Date: Mon, 8 Jan 2024 11:14:35 -0800
Subject: [PATCH 065/102] add dagger setup
---
.github/workflows/integration.yml | 30 +++----
.gitignore | 2 +
dagger/run_dbt_spark_tests.py | 105 +++++++++++++++++++++----
dagger/{ => scripts}/configure_odbc.sh | 16 +---
dev-requirements.txt | 4 +-
tests/conftest.py | 2 +-
6 files changed, 109 insertions(+), 50 deletions(-)
rename dagger/{ => scripts}/configure_odbc.sh (51%)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 1389550a2..88a73884f 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -96,14 +96,12 @@ jobs:
strategy:
fail-fast: false
matrix:
- python-version:
- - "3.8"
test:
- - "spark-thrift"
- - "spark-session"
- - "spark-databricks-odbc-sql-endpoint"
- - "spark-databricks-odbc-cluster"
- - "spark-databricks-http"
+ - "apache_spark"
+ - "spark_session"
+ - "databricks_sql_endpoint"
+ - "databricks_cluster"
+ - "databricks_http_cluster"
env:
TOXENV: integration-${{ matrix.test }}
@@ -143,14 +141,13 @@ jobs:
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
- python-version: ${{ matrix.python-version }}
+ python-version: "3.11"
- name: Install python dependencies
run: |
python -m pip install --user --upgrade pip
- python -m pip install tox
python -m pip --version
- tox --version
+ python -m pip install dagger-io~=0.8.0
- name: Update dev_requirements.txt
if: inputs.dbt-core-branch != ''
@@ -158,15 +155,6 @@ jobs:
pip install bumpversion
./.github/scripts/update_dbt_core_branch.sh ${{ inputs.dbt-core-branch }}
- - uses: isbang/compose-action@v1.5.1
- if: ${{ matrix.test == 'spark-thrift'}}
- with:
- compose-file: "./docker-compose.yml"
-
- - name: Configure ODBC
- if: ${{ matrix.test == 'spark-databricks-odbc-sql-endpoint' || matrix.test == 'spark-databricks-odbc-cluster' }}
- run: |
- ./.github/scripts/configure_odbc.sh
- - name: Run tox for ${{ matrix.test }}
- run: tox -- --ddtrace
+ - name: Run tests for ${{ matrix.test }}
+ run: python dagger/run_dbt_spark_tests.py --profile ${{ matrix.test }}
diff --git a/.gitignore b/.gitignore
index 33a83848c..1e8ff7411 100644
--- a/.gitignore
+++ b/.gitignore
@@ -44,3 +44,5 @@ test.env
.hive-metastore/
.spark-warehouse/
dbt-integration-tests
+/.tool-versions
+/.hypothesis/*
diff --git a/dagger/run_dbt_spark_tests.py b/dagger/run_dbt_spark_tests.py
index a2125a310..a5be95dd4 100644
--- a/dagger/run_dbt_spark_tests.py
+++ b/dagger/run_dbt_spark_tests.py
@@ -4,29 +4,106 @@
import anyio as anyio
import dagger as dagger
+PG_PORT = 5432
+
+
+async def get_postgres_container(client: dagger.Client) -> (dagger.Container, str):
+ ctr = await (
+ client.container()
+ .from_("postgres:13")
+ .with_env_variable("POSTGRES_PASSWORD", "postgres")
+ .with_exposed_port(PG_PORT)
+ )
+
+ return ctr, "postgres_db"
+
+
+async def get_spark_container(client: dagger.Client) -> (dagger.Container, str):
+ spark_dir = client.host().directory("./dagger/spark-container")
+ spark_ctr = (
+ client.container()
+ .from_("eclipse-temurin:8-jre")
+ .with_directory("/spark_setup", spark_dir)
+ .with_env_variable("SPARK_HOME", "/usr/spark")
+ .with_env_variable("PATH", "/usr/spark/bin:/usr/spark/sbin:$PATH", expand=True)
+ .with_file(
+ "/scripts/entrypoint.sh",
+ client.host().file("./dagger/spark-container/entrypoint.sh"),
+ permissions=755,
+ )
+ .with_file(
+ "/scripts/install_spark.sh",
+ client.host().file("./dagger/spark-container/install_spark.sh"),
+ permissions=755,
+ )
+ .with_exec(["./spark_setup/install_spark.sh"])
+ .with_file("/usr/spark/conf/hive-site.xml", spark_dir.file("/hive-site.xml"))
+ .with_file("/usr/spark/conf/spark-defaults.conf", spark_dir.file("spark-defaults.conf"))
+ )
+
+ # postgres is the metastore here
+ pg_ctr, pg_host = await get_postgres_container(client)
+
+ spark_ctr = (
+ spark_ctr.with_service_binding(alias=pg_host, service=pg_ctr)
+ .with_exec(
+ [
+ "/scripts/entrypoint.sh",
+ "--class",
+ "org.apache.spark.sql.hive.thriftserver.HiveThriftServer2",
+ "--name",
+ "Thrift JDBC/ODBC Server",
+ ]
+ )
+ .with_exposed_port(10000)
+ )
+
+ return spark_ctr, "spark_db"
+
async def test_spark(test_args):
async with dagger.Connection(dagger.Config(log_output=sys.stderr)) as client:
- install_dir = client.host().directory("./", exclude=["\\.pytest_cache/*", ".idea/*"])
-
+ req_files = client.host().directory("./", include=["*.txt", "*.env", "*.ini"])
+ dbt_spark_dir = client.host().directory("./dbt")
+ test_dir = client.host().directory("./tests")
+ scripts = client.host().directory("./dagger/scripts")
+ platform = dagger.Platform("linux/amd64")
tst_container = (
- client.container()
+ client.container(platform=platform)
.from_("python:3.8-slim")
- .with_directory("/dbt_spark", install_dir)
- .with_workdir("/dbt_spark")
- .with_exec("./dagger/configure_odbc.sh")
+ .with_directory("/.", req_files)
+ .with_directory("/dbt", dbt_spark_dir)
+ .with_directory("/tests", test_dir)
+ .with_directory("/scripts", scripts)
+ .with_exec("./scripts/install_os_reqs.sh")
.with_exec(["pip", "install", "-r", "requirements.txt"])
.with_exec(["pip", "install", "-r", "dev-requirements.txt"])
)
- result = await (tst_container
- .with_workdir("/dbt_spark")
- .with_exec(["python", '-m', 'pytest', '-v',
- '--profile', test_args.profile,
- '-n', 'auto',
- 'tests/functional/']
- )
- ).stdout()
+ if test_args.profile == "apache_spark":
+ spark_ctr, spark_host = await get_spark_container(client)
+ tst_container = tst_container.with_service_binding(alias=spark_host, service=spark_ctr)
+
+ elif test_args.profile in ["databricks_cluster", "databricks_sql_endpoint"]:
+ tst_container = tst_container.with_exec("./scripts/configure_odbc.sh")
+
+ elif test_args.profile == "spark_session":
+ tst_container = tst_container.with_exec(["pip", "install", "pyspark"])
+ tst_container = tst_container.with_exec(["apt-get", "install", "openjdk-17-jre", "-y"])
+
+ result = await tst_container.with_exec(
+ [
+ "python",
+ "-m",
+ "pytest",
+ "-v",
+ "--profile",
+ test_args.profile,
+ "-n",
+ "auto",
+ "tests/functional/",
+ ]
+ ).stdout()
return result
diff --git a/dagger/configure_odbc.sh b/dagger/scripts/configure_odbc.sh
similarity index 51%
rename from dagger/configure_odbc.sh
rename to dagger/scripts/configure_odbc.sh
index 7126298c0..50e80914d 100755
--- a/dagger/configure_odbc.sh
+++ b/dagger/scripts/configure_odbc.sh
@@ -1,20 +1,12 @@
#!/bin/bash
-set -e
-apt update && apt install -y --no-install-recommends \
- g++ \
- git \
- curl \
- unixodbc-dev \
- libsasl2-modules-gssapi-mit \
- unzip
-
+set -eo
rm -rf /tmp && mkdir /tmp
curl -OL "https://databricks.com/wp-content/uploads/drivers-2020/SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip"
-
unzip SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip -d /tmp/
-dpkg -i /tmp/*/simbaspark_2.6.16.1019-2_amd64.deb
+dpkg -i /tmp/SimbaSparkODBC-2.6.16.1019-Debian-64bit/simbaspark_2.6.16.1019-2_amd64.deb
echo "--------------------------------------------"
-echo sh -c echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini
+echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini
dpkg -l | grep Simba # confirm that the driver is installed
+export ODBC_DRIVER="/opt/simba/spark/lib/64/libsparkodbc_sb64.so"
rm -rf /tmp
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 8f94d509d..89c55d3f9 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -1,7 +1,7 @@
# install latest changes in dbt-core
# TODO: how to automate switching from develop to version branches?
-git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core
-git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=tests/adapter
+git+https://github.com/dbt-labs/dbt-core.git@c2bc2f009bbeeb46b3c69d082ab4d485597898af#egg=dbt-core&subdirectory=core
+git+https://github.com/dbt-labs/dbt-core.git@c2bc2f009bbeeb46b3c69d082ab4d485597898af#egg=dbt-tests-adapter&subdirectory=tests/adapter
# if version 1.x or greater -> pin to major version
# if version 0.x -> pin to minor
diff --git a/tests/conftest.py b/tests/conftest.py
index 94969e406..700ade4d3 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -38,7 +38,7 @@ def dbt_profile_target(request):
def apache_spark_target():
return {
"type": "spark",
- "host": "localhost",
+ "host": "spark_db",
"user": "dbt",
"method": "thrift",
"port": 10000,
From c0a37aeff43c549131299ea4b5a487baf06634ae Mon Sep 17 00:00:00 2001
From: Colin
Date: Mon, 8 Jan 2024 11:15:00 -0800
Subject: [PATCH 066/102] add dagger setup
---
dagger/scripts/install_os_reqs.sh | 10 +++++
dagger/spark-container/entrypoint.sh | 15 +++++++
dagger/spark-container/hive-site.xml | 46 ++++++++++++++++++++++
dagger/spark-container/install_spark.sh | 15 +++++++
dagger/spark-container/spark-defaults.conf | 9 +++++
5 files changed, 95 insertions(+)
create mode 100755 dagger/scripts/install_os_reqs.sh
create mode 100644 dagger/spark-container/entrypoint.sh
create mode 100644 dagger/spark-container/hive-site.xml
create mode 100755 dagger/spark-container/install_spark.sh
create mode 100644 dagger/spark-container/spark-defaults.conf
diff --git a/dagger/scripts/install_os_reqs.sh b/dagger/scripts/install_os_reqs.sh
new file mode 100755
index 000000000..47457b8d6
--- /dev/null
+++ b/dagger/scripts/install_os_reqs.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+set -eo
+apt-get update && apt-get install -y --no-install-recommends \
+ g++ \
+ git \
+ curl \
+ unixodbc \
+ unixodbc-dev \
+ libsasl2-modules-gssapi-mit \
+ unzip
\ No newline at end of file
diff --git a/dagger/spark-container/entrypoint.sh b/dagger/spark-container/entrypoint.sh
new file mode 100644
index 000000000..4b15cab61
--- /dev/null
+++ b/dagger/spark-container/entrypoint.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+if [ -n "$WAIT_FOR" ]; then
+ IFS=';' read -a HOSTPORT_ARRAY <<< "$WAIT_FOR"
+ for HOSTPORT in "${HOSTPORT_ARRAY[@]}"
+ do
+ WAIT_FOR_HOST=${HOSTPORT%:*}
+ WAIT_FOR_PORT=${HOSTPORT#*:}
+
+ echo Waiting for $WAIT_FOR_HOST to listen on $WAIT_FOR_PORT...
+ while ! nc -z $WAIT_FOR_HOST $WAIT_FOR_PORT; do echo sleeping; sleep 2; done
+ done
+fi
+echo "$PATH"
+exec spark-submit "$@"
diff --git a/dagger/spark-container/hive-site.xml b/dagger/spark-container/hive-site.xml
new file mode 100644
index 000000000..93e966fb7
--- /dev/null
+++ b/dagger/spark-container/hive-site.xml
@@ -0,0 +1,46 @@
+
+
+
+
+
+
+
+ javax.jdo.option.ConnectionURL
+ jdbc:postgresql://postgres_db/postgres
+
+
+
+ javax.jdo.option.ConnectionDriverName
+ org.postgresql.Driver
+
+
+
+ javax.jdo.option.ConnectionUserName
+ postgres
+
+
+
+ javax.jdo.option.ConnectionPassword
+ postgres
+
+
+
+ hive.metastore.schema.verification
+ false
+
+
diff --git a/dagger/spark-container/install_spark.sh b/dagger/spark-container/install_spark.sh
new file mode 100755
index 000000000..476f362a9
--- /dev/null
+++ b/dagger/spark-container/install_spark.sh
@@ -0,0 +1,15 @@
+set -e
+
+SPARK_VERSION=3.1.3
+HADOOP_VERSION=3.2
+
+apt-get update && \
+apt-get install -y wget netcat procps libpostgresql-jdbc-java && \
+wget -q "https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \
+tar xzf "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \
+rm "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \
+mv "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" /usr/spark && \
+ln -s /usr/share/java/postgresql-jdbc4.jar /usr/spark/jars/postgresql-jdbc4.jar && \
+apt-get remove -y wget && \
+apt-get autoremove -y && \
+apt-get clean
diff --git a/dagger/spark-container/spark-defaults.conf b/dagger/spark-container/spark-defaults.conf
new file mode 100644
index 000000000..30ec59591
--- /dev/null
+++ b/dagger/spark-container/spark-defaults.conf
@@ -0,0 +1,9 @@
+spark.driver.memory 2g
+spark.executor.memory 2g
+spark.hadoop.datanucleus.autoCreateTables true
+spark.hadoop.datanucleus.schema.autoCreateTables true
+spark.hadoop.datanucleus.fixedDatastore false
+spark.serializer org.apache.spark.serializer.KryoSerializer
+spark.jars.packages org.apache.hudi:hudi-spark3-bundle_2.12:0.10.0
+spark.sql.extensions org.apache.spark.sql.hudi.HoodieSparkSessionExtension
+spark.driver.userClassPathFirst true
From 8c6a7455a411d8573005ff555491ef438c0aea3d Mon Sep 17 00:00:00 2001
From: Colin
Date: Mon, 8 Jan 2024 11:39:10 -0800
Subject: [PATCH 067/102] set env vars
---
dagger/run_dbt_spark_tests.py | 38 ++++++++++++++++++++++++++++++++---
1 file changed, 35 insertions(+), 3 deletions(-)
diff --git a/dagger/run_dbt_spark_tests.py b/dagger/run_dbt_spark_tests.py
index a5be95dd4..ca7cffd3b 100644
--- a/dagger/run_dbt_spark_tests.py
+++ b/dagger/run_dbt_spark_tests.py
@@ -1,10 +1,38 @@
+import os
+
import argparse
import sys
import anyio as anyio
import dagger as dagger
+from dotenv import find_dotenv, load_dotenv
PG_PORT = 5432
+load_dotenv(find_dotenv("test.env"))
+DEFAULT_ENV_VARS = {
+"DBT_TEST_USER_1": "buildbot+dbt_test_user_1@dbtlabs.com",
+"DBT_TEST_USER_2":"buildbot+dbt_test_user_2@dbtlabs.com",
+"DBT_TEST_USER_3": "buildbot+dbt_test_user_3@dbtlabs.com",
+}
+
+def env_variables(envs: dict[str, str]):
+ def env_variables_inner(ctr: dagger.Container):
+ for key, value in envs.items():
+ ctr = ctr.with_env_variable(key, value)
+ return ctr
+
+ return env_variables_inner
+
+
+def get_databricks_env_vars():
+
+ return {
+ "DBT_DATABRICKS_TOKEN": os.environ["DBT_DATABRICKS_TOKEN"],
+ "DBT_DATABRICKS_HOST_NAME": os.environ["DBT_DATABRICKS_HOST_NAME"],
+ "DBT_DATABRICKS_ENDPOINT": os.environ["DBT_DATABRICKS_ENDPOINT"],
+ "DBT_DATABRICKS_CLUSTER_NAME": os.environ["DBT_DATABRICKS_CLUSTER_NAME"],
+ "ODBC_DRIVER": "/opt/simba/spark/lib/64/libsparkodbc_sb64.so",
+ }
async def get_postgres_container(client: dagger.Client) -> (dagger.Container, str):
@@ -63,6 +91,7 @@ async def get_spark_container(client: dagger.Client) -> (dagger.Container, str):
async def test_spark(test_args):
async with dagger.Connection(dagger.Config(log_output=sys.stderr)) as client:
+ test_profile = test_args.profile
req_files = client.host().directory("./", include=["*.txt", "*.env", "*.ini"])
dbt_spark_dir = client.host().directory("./dbt")
test_dir = client.host().directory("./tests")
@@ -80,17 +109,20 @@ async def test_spark(test_args):
.with_exec(["pip", "install", "-r", "dev-requirements.txt"])
)
- if test_args.profile == "apache_spark":
+ if test_profile == "apache_spark":
spark_ctr, spark_host = await get_spark_container(client)
tst_container = tst_container.with_service_binding(alias=spark_host, service=spark_ctr)
- elif test_args.profile in ["databricks_cluster", "databricks_sql_endpoint"]:
+ elif test_profile in ["databricks_cluster", "databricks_sql_endpoint"]:
tst_container = tst_container.with_exec("./scripts/configure_odbc.sh")
- elif test_args.profile == "spark_session":
+ elif test_profile == "spark_session":
tst_container = tst_container.with_exec(["pip", "install", "pyspark"])
tst_container = tst_container.with_exec(["apt-get", "install", "openjdk-17-jre", "-y"])
+ if "databricks" in test_profile:
+ tst_container = tst_container.with_(env_variables(get_databricks_env_vars()))
+ tst_container = tst_container.with_(env_variables(DEFAULT_ENV_VARS))
result = await tst_container.with_exec(
[
"python",
From 1ae321a264a1ebefa76ce1cb777ed2c9732bedc6 Mon Sep 17 00:00:00 2001
From: Colin
Date: Mon, 8 Jan 2024 11:41:10 -0800
Subject: [PATCH 068/102] install requirements
---
.github/workflows/integration.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 88a73884f..67b6ed8e3 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -147,7 +147,7 @@ jobs:
run: |
python -m pip install --user --upgrade pip
python -m pip --version
- python -m pip install dagger-io~=0.8.0
+ python -m pip install -r dagger/requirements.txt
- name: Update dev_requirements.txt
if: inputs.dbt-core-branch != ''
From 6361429e44b7e8bb0182a629850ca2db922e0ab6 Mon Sep 17 00:00:00 2001
From: Colin
Date: Mon, 8 Jan 2024 11:41:18 -0800
Subject: [PATCH 069/102] install requirements
---
dagger/requirements.txt | 2 ++
1 file changed, 2 insertions(+)
create mode 100644 dagger/requirements.txt
diff --git a/dagger/requirements.txt b/dagger/requirements.txt
new file mode 100644
index 000000000..3634ceeb7
--- /dev/null
+++ b/dagger/requirements.txt
@@ -0,0 +1,2 @@
+dagger-io~=0.8.0
+python-dotenv
\ No newline at end of file
From 6bca5dc715f7b142bc35c6e64c8bef7a89edbdee Mon Sep 17 00:00:00 2001
From: Colin
Date: Mon, 8 Jan 2024 14:51:42 -0800
Subject: [PATCH 070/102] add DEFAULT_ENV_VARS and test_path arg
---
dagger/run_dbt_spark_tests.py | 27 ++++++++++++---------------
1 file changed, 12 insertions(+), 15 deletions(-)
diff --git a/dagger/run_dbt_spark_tests.py b/dagger/run_dbt_spark_tests.py
index ca7cffd3b..864d9cad6 100644
--- a/dagger/run_dbt_spark_tests.py
+++ b/dagger/run_dbt_spark_tests.py
@@ -10,11 +10,12 @@
PG_PORT = 5432
load_dotenv(find_dotenv("test.env"))
DEFAULT_ENV_VARS = {
-"DBT_TEST_USER_1": "buildbot+dbt_test_user_1@dbtlabs.com",
-"DBT_TEST_USER_2":"buildbot+dbt_test_user_2@dbtlabs.com",
-"DBT_TEST_USER_3": "buildbot+dbt_test_user_3@dbtlabs.com",
+ "DBT_TEST_USER_1": os.getenv("DBT_TEST_USER_1", "buildbot+dbt_test_user_1@dbtlabs.com"),
+ "DBT_TEST_USER_2": os.getenv("DBT_TEST_USER_2","buildbot+dbt_test_user_2@dbtlabs.com"),
+ "DBT_TEST_USER_3": os.getenv("DBT_TEST_USER_3", "buildbot+dbt_test_user_3@dbtlabs.com"),
}
+
def env_variables(envs: dict[str, str]):
def env_variables_inner(ctr: dagger.Container):
for key, value in envs.items():
@@ -25,7 +26,6 @@ def env_variables_inner(ctr: dagger.Container):
def get_databricks_env_vars():
-
return {
"DBT_DATABRICKS_TOKEN": os.environ["DBT_DATABRICKS_TOKEN"],
"DBT_DATABRICKS_HOST_NAME": os.environ["DBT_DATABRICKS_HOST_NAME"],
@@ -123,18 +123,14 @@ async def test_spark(test_args):
if "databricks" in test_profile:
tst_container = tst_container.with_(env_variables(get_databricks_env_vars()))
tst_container = tst_container.with_(env_variables(DEFAULT_ENV_VARS))
+ test_path = test_args.test_path if test_args.test_path else "tests/functional/adapter"
result = await tst_container.with_exec(
- [
- "python",
- "-m",
- "pytest",
- "-v",
- "--profile",
- test_args.profile,
- "-n",
- "auto",
- "tests/functional/",
- ]
+ ["python", "-m", "pytest",
+ "-v",
+ "--profile", test_args.profile,
+ "-n", "auto",
+ test_path,
+ ]
).stdout()
return result
@@ -142,6 +138,7 @@ async def test_spark(test_args):
parser = argparse.ArgumentParser()
parser.add_argument("--profile", required=True, type=str)
+parser.add_argument("--test-path", required=False, type=str)
args = parser.parse_args()
anyio.run(test_spark, args)
From f4293e0999276393d7ce4e288dbd87c58d3adc32 Mon Sep 17 00:00:00 2001
From: Colin
Date: Mon, 8 Jan 2024 15:00:54 -0800
Subject: [PATCH 071/102] remove circle ci
---
.circleci/config.yml | 136 -------------------------------------------
README.md | 3 -
2 files changed, 139 deletions(-)
delete mode 100644 .circleci/config.yml
diff --git a/.circleci/config.yml b/.circleci/config.yml
deleted file mode 100644
index f2a3b6357..000000000
--- a/.circleci/config.yml
+++ /dev/null
@@ -1,136 +0,0 @@
-version: 2.1
-
-jobs:
- unit:
- environment:
- DBT_INVOCATION_ENV: circle
- docker:
- - image: fishtownanalytics/test-container:10
- steps:
- - checkout
- - run: tox -e flake8,unit
-
-# Turning off for now due to flaky runs of tests will turn back on at later date.
- integration-spark-session:
- environment:
- DBT_INVOCATION_ENV: circle
- docker:
- - image: godatadriven/pyspark:3.1
- steps:
- - checkout
- - run: apt-get update
- - run: conda install python=3.10
- - run: python3 -m pip install --upgrade pip
- - run: apt-get install -y git gcc g++ unixodbc-dev libsasl2-dev libxml2-dev libxslt-dev
- - run: python3 -m pip install tox
- - run:
- name: Run integration tests
- command: tox -e integration-spark-session
- no_output_timeout: 1h
- - store_artifacts:
- path: ./logs
-
- integration-spark-thrift:
- environment:
- DBT_INVOCATION_ENV: circle
- docker:
- - image: fishtownanalytics/test-container:10
- - image: godatadriven/spark:3.1.1
- environment:
- WAIT_FOR: localhost:5432
- command: >
- --class org.apache.spark.sql.hive.thriftserver.HiveThriftServer2
- --name Thrift JDBC/ODBC Server
- - image: postgres:9.6.17-alpine
- environment:
- POSTGRES_USER: dbt
- POSTGRES_PASSWORD: dbt
- POSTGRES_DB: metastore
-
- steps:
- - checkout
-
- - run:
- name: Wait for Spark-Thrift
- command: dockerize -wait tcp://localhost:10000 -timeout 15m -wait-retry-interval 5s
-
- - run:
- name: Run integration tests
- command: tox -e integration-spark-thrift
- no_output_timeout: 1h
- - store_artifacts:
- path: ./logs
-
- integration-spark-databricks-http:
- environment:
- DBT_INVOCATION_ENV: circle
- DBT_DATABRICKS_RETRY_ALL: True
- DBT_TEST_USER_1: "buildbot+dbt_test_user_1@dbtlabs.com"
- DBT_TEST_USER_2: "buildbot+dbt_test_user_2@dbtlabs.com"
- DBT_TEST_USER_3: "buildbot+dbt_test_user_3@dbtlabs.com"
- docker:
- - image: fishtownanalytics/test-container:10
- steps:
- - checkout
- - run:
- name: Run integration tests
- command: tox -e integration-spark-databricks-http
- no_output_timeout: 1h
- - store_artifacts:
- path: ./logs
-
- integration-spark-databricks-odbc-cluster: &databricks-odbc
- environment:
- DBT_INVOCATION_ENV: circle
- ODBC_DRIVER: Simba # TODO: move env var to Docker image
- DBT_TEST_USER_1: "buildbot+dbt_test_user_1@dbtlabs.com"
- DBT_TEST_USER_2: "buildbot+dbt_test_user_2@dbtlabs.com"
- DBT_TEST_USER_3: "buildbot+dbt_test_user_3@dbtlabs.com"
- docker:
- # image based on `fishtownanalytics/test-container` w/ Simba ODBC Spark driver installed
- - image: 828731156495.dkr.ecr.us-east-1.amazonaws.com/dbt-spark-odbc-test-container:latest
- aws_auth:
- aws_access_key_id: $AWS_ACCESS_KEY_ID_STAGING
- aws_secret_access_key: $AWS_SECRET_ACCESS_KEY_STAGING
- steps:
- - checkout
- - run:
- name: Run integration tests
- command: tox -e integration-spark-databricks-odbc-cluster
- no_output_timeout: 1h
- - store_artifacts:
- path: ./logs
-
- integration-spark-databricks-odbc-endpoint:
- <<: *databricks-odbc
- steps:
- - checkout
- - run:
- name: Run integration tests
- command: tox -e integration-spark-databricks-odbc-sql-endpoint
- no_output_timeout: 1h
- - store_artifacts:
- path: ./logs
-
-workflows:
- version: 2
- test-everything:
- jobs:
- - unit
- - integration-spark-session:
- requires:
- - unit
- - integration-spark-thrift:
- requires:
- - unit
- - integration-spark-databricks-http:
- requires:
- - integration-spark-thrift
- - integration-spark-databricks-odbc-cluster:
- context: aws-credentials
- requires:
- - integration-spark-thrift
- - integration-spark-databricks-odbc-endpoint:
- context: aws-credentials
- requires:
- - integration-spark-thrift
diff --git a/README.md b/README.md
index 2d2586795..7e95b1fc3 100644
--- a/README.md
+++ b/README.md
@@ -5,9 +5,6 @@
-
-
-
**[dbt](https://www.getdbt.com/)** enables data analysts and engineers to transform their data using the same practices that software engineers use to build applications.
From d39806558844a5babd6c1c0ad8e4712be7b89a4f Mon Sep 17 00:00:00 2001
From: Colin
Date: Mon, 8 Jan 2024 19:45:29 -0800
Subject: [PATCH 072/102] formatting
---
dagger/requirements.txt | 2 +-
dagger/run_dbt_spark_tests.py | 9 ++-------
dagger/scripts/install_os_reqs.sh | 2 +-
3 files changed, 4 insertions(+), 9 deletions(-)
diff --git a/dagger/requirements.txt b/dagger/requirements.txt
index 3634ceeb7..df36543c2 100644
--- a/dagger/requirements.txt
+++ b/dagger/requirements.txt
@@ -1,2 +1,2 @@
dagger-io~=0.8.0
-python-dotenv
\ No newline at end of file
+python-dotenv
diff --git a/dagger/run_dbt_spark_tests.py b/dagger/run_dbt_spark_tests.py
index 864d9cad6..c9455bdde 100644
--- a/dagger/run_dbt_spark_tests.py
+++ b/dagger/run_dbt_spark_tests.py
@@ -11,7 +11,7 @@
load_dotenv(find_dotenv("test.env"))
DEFAULT_ENV_VARS = {
"DBT_TEST_USER_1": os.getenv("DBT_TEST_USER_1", "buildbot+dbt_test_user_1@dbtlabs.com"),
- "DBT_TEST_USER_2": os.getenv("DBT_TEST_USER_2","buildbot+dbt_test_user_2@dbtlabs.com"),
+ "DBT_TEST_USER_2": os.getenv("DBT_TEST_USER_2", "buildbot+dbt_test_user_2@dbtlabs.com"),
"DBT_TEST_USER_3": os.getenv("DBT_TEST_USER_3", "buildbot+dbt_test_user_3@dbtlabs.com"),
}
@@ -125,12 +125,7 @@ async def test_spark(test_args):
tst_container = tst_container.with_(env_variables(DEFAULT_ENV_VARS))
test_path = test_args.test_path if test_args.test_path else "tests/functional/adapter"
result = await tst_container.with_exec(
- ["python", "-m", "pytest",
- "-v",
- "--profile", test_args.profile,
- "-n", "auto",
- test_path,
- ]
+ ["pytest", "-v", "--profile", test_profile, "-n", "auto", test_path]
).stdout()
return result
diff --git a/dagger/scripts/install_os_reqs.sh b/dagger/scripts/install_os_reqs.sh
index 47457b8d6..b50027f52 100755
--- a/dagger/scripts/install_os_reqs.sh
+++ b/dagger/scripts/install_os_reqs.sh
@@ -7,4 +7,4 @@ apt-get update && apt-get install -y --no-install-recommends \
unixodbc \
unixodbc-dev \
libsasl2-modules-gssapi-mit \
- unzip
\ No newline at end of file
+ unzip
From 6108d4405630639022346d48f5e8a9e39286757e Mon Sep 17 00:00:00 2001
From: Colin
Date: Mon, 8 Jan 2024 19:52:59 -0800
Subject: [PATCH 073/102] update changie
---
.changes/unreleased/Under the Hood-20230929-161218.yaml | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/.changes/unreleased/Under the Hood-20230929-161218.yaml b/.changes/unreleased/Under the Hood-20230929-161218.yaml
index c82e8252e..4dc54ae5c 100644
--- a/.changes/unreleased/Under the Hood-20230929-161218.yaml
+++ b/.changes/unreleased/Under the Hood-20230929-161218.yaml
@@ -1,6 +1,6 @@
kind: Under the Hood
-body: Add Github action for integration testing
+body: Add Github action for integration testing, use dagger-io to run tests. Remove circle ci workflow.
time: 2023-09-29T16:12:18.968755+02:00
custom:
- Author: JCZuurmond
+ Author: JCZuurmond, colin-rogers-dbt
Issue: "719"
From d472f3b61a4d84bc93323431638869a8ed1687b5 Mon Sep 17 00:00:00 2001
From: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com>
Date: Tue, 9 Jan 2024 09:33:57 -0800
Subject: [PATCH 074/102] Update .changes/unreleased/Under the
Hood-20230929-161218.yaml
Co-authored-by: Emily Rockman
---
.changes/unreleased/Under the Hood-20230929-161218.yaml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.changes/unreleased/Under the Hood-20230929-161218.yaml b/.changes/unreleased/Under the Hood-20230929-161218.yaml
index 4dc54ae5c..9b5c6818b 100644
--- a/.changes/unreleased/Under the Hood-20230929-161218.yaml
+++ b/.changes/unreleased/Under the Hood-20230929-161218.yaml
@@ -1,5 +1,5 @@
kind: Under the Hood
-body: Add Github action for integration testing, use dagger-io to run tests. Remove circle ci workflow.
+body: Add GitHub action for integration testing and use dagger-io to run tests. Remove CircleCI workflow.
time: 2023-09-29T16:12:18.968755+02:00
custom:
Author: JCZuurmond, colin-rogers-dbt
From ce92bcf4a9063d75beed734d9009a3e8f4be1dd0 Mon Sep 17 00:00:00 2001
From: Colin
Date: Tue, 9 Jan 2024 09:50:03 -0800
Subject: [PATCH 075/102] formatting fixes and simplify env_var handling
---
dagger/run_dbt_spark_tests.py | 26 ++++++++------------------
dagger/scripts/configure_odbc.sh | 1 -
docker/Dockerfile | 6 +++---
3 files changed, 11 insertions(+), 22 deletions(-)
diff --git a/dagger/run_dbt_spark_tests.py b/dagger/run_dbt_spark_tests.py
index c9455bdde..3e4c8347f 100644
--- a/dagger/run_dbt_spark_tests.py
+++ b/dagger/run_dbt_spark_tests.py
@@ -9,11 +9,13 @@
PG_PORT = 5432
load_dotenv(find_dotenv("test.env"))
-DEFAULT_ENV_VARS = {
- "DBT_TEST_USER_1": os.getenv("DBT_TEST_USER_1", "buildbot+dbt_test_user_1@dbtlabs.com"),
- "DBT_TEST_USER_2": os.getenv("DBT_TEST_USER_2", "buildbot+dbt_test_user_2@dbtlabs.com"),
- "DBT_TEST_USER_3": os.getenv("DBT_TEST_USER_3", "buildbot+dbt_test_user_3@dbtlabs.com"),
-}
+# if env vars aren't specified in test.env (i.e. in github actions worker), use the ones from the host
+TESTING_ENV_VARS = {env_name: os.environ[env_name] for env_name in os.environ
+ if env_name.startswith(("DD_", "DBT_"))}
+
+TESTING_ENV_VARS.update({
+ "ODBC_DRIVER": "/opt/simba/spark/lib/64/libsparkodbc_sb64.so",
+})
def env_variables(envs: dict[str, str]):
@@ -25,16 +27,6 @@ def env_variables_inner(ctr: dagger.Container):
return env_variables_inner
-def get_databricks_env_vars():
- return {
- "DBT_DATABRICKS_TOKEN": os.environ["DBT_DATABRICKS_TOKEN"],
- "DBT_DATABRICKS_HOST_NAME": os.environ["DBT_DATABRICKS_HOST_NAME"],
- "DBT_DATABRICKS_ENDPOINT": os.environ["DBT_DATABRICKS_ENDPOINT"],
- "DBT_DATABRICKS_CLUSTER_NAME": os.environ["DBT_DATABRICKS_CLUSTER_NAME"],
- "ODBC_DRIVER": "/opt/simba/spark/lib/64/libsparkodbc_sb64.so",
- }
-
-
async def get_postgres_container(client: dagger.Client) -> (dagger.Container, str):
ctr = await (
client.container()
@@ -120,9 +112,7 @@ async def test_spark(test_args):
tst_container = tst_container.with_exec(["pip", "install", "pyspark"])
tst_container = tst_container.with_exec(["apt-get", "install", "openjdk-17-jre", "-y"])
- if "databricks" in test_profile:
- tst_container = tst_container.with_(env_variables(get_databricks_env_vars()))
- tst_container = tst_container.with_(env_variables(DEFAULT_ENV_VARS))
+ tst_container = tst_container.with_(env_variables(TESTING_ENV_VARS))
test_path = test_args.test_path if test_args.test_path else "tests/functional/adapter"
result = await tst_container.with_exec(
["pytest", "-v", "--profile", test_profile, "-n", "auto", test_path]
diff --git a/dagger/scripts/configure_odbc.sh b/dagger/scripts/configure_odbc.sh
index 50e80914d..ddf020ad2 100755
--- a/dagger/scripts/configure_odbc.sh
+++ b/dagger/scripts/configure_odbc.sh
@@ -8,5 +8,4 @@ dpkg -i /tmp/SimbaSparkODBC-2.6.16.1019-Debian-64bit/simbaspark_2.6.16.1019-2_am
echo "--------------------------------------------"
echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini
dpkg -l | grep Simba # confirm that the driver is installed
-export ODBC_DRIVER="/opt/simba/spark/lib/64/libsparkodbc_sb64.so"
rm -rf /tmp
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 85d01ba8a..bb4d378ed 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -2,8 +2,8 @@ ARG OPENJDK_VERSION=8
FROM eclipse-temurin:${OPENJDK_VERSION}-jre
ARG BUILD_DATE
-ARG SPARK_VERSION=3.1.3
-ARG HADOOP_VERSION=3.2
+ARG SPARK_VERSION=3.3.2
+ARG HADOOP_VERSION=3
LABEL org.label-schema.name="Apache Spark ${SPARK_VERSION}" \
org.label-schema.build-date=$BUILD_DATE \
@@ -14,7 +14,7 @@ ENV PATH="/usr/spark/bin:/usr/spark/sbin:${PATH}"
RUN apt-get update && \
apt-get install -y wget netcat procps libpostgresql-jdbc-java && \
- wget -q "https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \
+ wget -q "http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \
tar xzf "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \
rm "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \
mv "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" /usr/spark && \
From 56b14bcd3702cfe85de73d3c8bdf6b794aeb1664 Mon Sep 17 00:00:00 2001
From: Colin
Date: Tue, 9 Jan 2024 13:30:42 -0800
Subject: [PATCH 076/102] remove tox, update CONTRIBUTING.md and cleanup GHA
workflows
---
.github/workflows/integration.yml | 66 ++++--------------------
.github/workflows/main.yml | 12 ++---
CONTRIBUTING.md | 24 +++++++--
dagger/run_dbt_spark_tests.py | 2 +-
tox.ini | 83 -------------------------------
5 files changed, 33 insertions(+), 154 deletions(-)
delete mode 100644 tox.ini
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 67b6ed8e3..53fb9c2ac 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -15,8 +15,14 @@ on:
branches:
- "main"
- "*.latest"
- - "releases/*"
+
pull_request_target:
+ paths-ignore:
+ - ".changes/**"
+ - ".flake8"
+ - ".gitignore"
+ - "**.md"
+
workflow_dispatch:
inputs:
dbt-core-branch:
@@ -37,60 +43,9 @@ defaults:
shell: bash
jobs:
- # generate test metadata about what files changed and the testing matrix to use
- test-metadata:
- # run if not a PR from a forked repository or has a label to mark as safe to test
- if: >-
- github.event_name != 'pull_request_target' ||
- github.event.pull_request.head.repo.full_name == github.repository ||
- contains(github.event.pull_request.labels.*.name, 'ok to test')
- runs-on: ubuntu-latest
- outputs:
- matrix: ${{ steps.generate-matrix.outputs.result }}
-
- steps:
- - name: Check out the repository (non-PR)
- if: github.event_name != 'pull_request_target'
- uses: actions/checkout@v3
- with:
- persist-credentials: false
-
- - name: Check out the repository (PR)
- if: github.event_name == 'pull_request_target'
- uses: actions/checkout@v3
- with:
- persist-credentials: false
- ref: ${{ github.event.pull_request.head.sha }}
-
- - name: Check if relevant files changed
- if: github.event_name == 'pull_request_target'
- # https://github.com/marketplace/actions/paths-changes-filter
- # For each filter, it sets output variable named by the filter to the text:
- # 'true' - if any of changed files matches any of filter rules
- # 'false' - if none of changed files matches any of filter rules
- # also, returns:
- # `changes` - JSON array with names of all filters matching any of the changed files
- uses: dorny/paths-filter@v2
- id: get-changes
- with:
- token: ${{ secrets.GITHUB_TOKEN }}
- filters: |
- spark:
- - 'dbt/**'
- - 'tests/**'
- - 'dev-requirements.txt'
test:
- name: ${{ matrix.test }} / python ${{ matrix.python-version }} / ubuntu-latest
-
- # run if not a PR from a forked repository or has a label to mark as safe to test
- # also checks that the matrix generated is not empty
- if: >-
- (
- github.event_name != 'pull_request_target' ||
- github.event.pull_request.head.repo.full_name == github.repository ||
- contains(github.event.pull_request.labels.*.name, 'ok to test')
- )
+ name: ${{ matrix.test }}
runs-on: ubuntu-latest
strategy:
@@ -104,8 +59,6 @@ jobs:
- "databricks_http_cluster"
env:
- TOXENV: integration-${{ matrix.test }}
- PYTEST_ADDOPTS: "-v --color=yes --csv integration_results.csv"
DBT_INVOCATION_ENV: github-actions
DD_CIVISIBILITY_AGENTLESS_ENABLED: true
DD_API_KEY: ${{ secrets.DATADOG_API_KEY }}
@@ -138,7 +91,7 @@ jobs:
persist-credentials: false
ref: ${{ github.event.pull_request.head.sha }}
- - name: Set up Python ${{ matrix.python-version }}
+ - name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.11"
@@ -155,6 +108,5 @@ jobs:
pip install bumpversion
./.github/scripts/update_dbt_core_branch.sh ${{ inputs.dbt-core-branch }}
-
- name: Run tests for ${{ matrix.test }}
run: python dagger/run_dbt_spark_tests.py --profile ${{ matrix.test }}
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 30126325e..338413116 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -19,7 +19,6 @@ on:
branches:
- "main"
- "*.latest"
- - "releases/*"
pull_request:
workflow_dispatch:
@@ -81,10 +80,6 @@ jobs:
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11"]
- env:
- TOXENV: "unit"
- PYTEST_ADDOPTS: "-v --color=yes --csv unit_results.csv"
-
steps:
- name: Check out the repository
uses: actions/checkout@v3
@@ -100,10 +95,9 @@ jobs:
sudo apt-get install libsasl2-dev
python -m pip install --user --upgrade pip
python -m pip --version
- python -m pip install tox
- tox --version
- - name: Run tox
- run: tox
+
+ - name: Run unit tests
+ run: python -m pytest --color=yes --csv unit_results.csv -v tests/unit
- name: Get current date
if: always()
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index a61306ea5..9145436b6 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -65,11 +65,27 @@ $EDITOR test.env
### Test commands
There are a few methods for running tests locally.
-#### `tox`
-`tox` takes care of managing Python virtualenvs and installing dependencies in order to run tests. You can also run tests in parallel, for example you can run unit tests for Python 3.8, Python 3.9, and `flake8` checks in parallel with `tox -p`. Also, you can run unit tests for specific python versions with `tox -e py38`. The configuration of these tests are located in `tox.ini`.
+#### dagger
+To run functional tests we rely on [dagger](https://dagger.io/). This launches a virtual container or containers to test against.
-#### `pytest`
-Finally, you can also run a specific test or group of tests using `pytest` directly. With a Python virtualenv active and dev dependencies installed you can do things like:
+```sh
+pip install -r dagger/requirements.txt
+python dagger/run_dbt_spark_tests.py --profile databricks_sql_endpoint --test-path tests/functional/adapter/test_basic.py::TestSimpleMaterializationsSpark::test_base
+```
+
+`--profile`: required, this is the kind of spark connection to test against
+
+_options_:
+ - "apache_spark"
+ - "spark_session"
+ - "databricks_sql_endpoint"
+ - "databricks_cluster"
+ - "databricks_http_cluster"
+
+`--test-path`: optional, this is the path to the test file you want to run. If not specified, all tests will be run.
+
+#### pytest
+Finally, you can also run a specific test or group of tests using `pytest` directly (if you have all the dependencies set up on your machine). With a Python virtualenv active and dev dependencies installed you can do things like:
```sh
# run all functional tests
diff --git a/dagger/run_dbt_spark_tests.py b/dagger/run_dbt_spark_tests.py
index 3e4c8347f..4cb16f7a0 100644
--- a/dagger/run_dbt_spark_tests.py
+++ b/dagger/run_dbt_spark_tests.py
@@ -14,7 +14,7 @@
if env_name.startswith(("DD_", "DBT_"))}
TESTING_ENV_VARS.update({
- "ODBC_DRIVER": "/opt/simba/spark/lib/64/libsparkodbc_sb64.so",
+ "ODBC_DRIVER": "Simba",
})
diff --git a/tox.ini b/tox.ini
deleted file mode 100644
index 31396b5ef..000000000
--- a/tox.ini
+++ /dev/null
@@ -1,83 +0,0 @@
-[tox]
-skipsdist = True
-envlist = unit, flake8, integration-spark-thrift
-
-[testenv:{unit,py38,py39,py310,py}]
-allowlist_externals =
- /bin/bash
-commands = /bin/bash -c '{envpython} -m pytest -v {posargs} tests/unit'
-passenv =
- DBT_*
- PYTEST_ADDOPTS
-deps =
- -r{toxinidir}/requirements.txt
- -r{toxinidir}/dev-requirements.txt
-
-[testenv:integration-spark-databricks-http]
-allowlist_externals =
- /bin/bash
-basepython = python3.8
-commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_http_cluster {posargs} -n4 tests/functional/adapter/*'
-passenv =
- DBT_*
- PYTEST_ADDOPTS
-deps =
- -r{toxinidir}/requirements.txt
- -r{toxinidir}/dev-requirements.txt
- -e.
-
-[testenv:integration-spark-databricks-odbc-cluster]
-allowlist_externals =
- /bin/bash
-basepython = python3.8
-commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_cluster {posargs} -n4 tests/functional/adapter/*'
-passenv =
- DBT_*
- PYTEST_ADDOPTS
- ODBC_DRIVER
-deps =
- -r{toxinidir}/requirements.txt
- -r{toxinidir}/dev-requirements.txt
- -e.
-
-[testenv:integration-spark-databricks-odbc-sql-endpoint]
-allowlist_externals =
- /bin/bash
-basepython = python3.8
-commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_sql_endpoint {posargs} -n4 tests/functional/adapter/*'
-passenv =
- DBT_*
- PYTEST_ADDOPTS
- ODBC_DRIVER
-deps =
- -r{toxinidir}/requirements.txt
- -r{toxinidir}/dev-requirements.txt
- -e.
-
-
-[testenv:integration-spark-thrift]
-description = run integration tests against a Spark thrift server
-allowlist_externals =
- /bin/bash
-passenv =
- DBT_*
- PYTEST_ADDOPTS
-deps =
- -r{toxinidir}/requirements.txt
- -r{toxinidir}/dev-requirements.txt
- -e.
-commands = /bin/bash -c '{envpython} -m pytest -v --profile apache_spark {posargs} -n4 tests/functional/adapter/*'
-
-[testenv:integration-spark-session]
-description = run integration tests against a Spark session
-allowlist_externals =
- /bin/bash
-passenv =
- DBT_*
- PYTEST_*
- PIP_CACHE_DIR
-deps =
- -r{toxinidir}/requirements.txt
- -r{toxinidir}/dev-requirements.txt
- -e.[session]
-commands = /bin/bash -c '{envpython} -m pytest -v --profile spark_session {posargs} -n4 tests/functional/adapter/*'
From 9849c1c2b4e3c14a772ef59b5f331e0b5785d673 Mon Sep 17 00:00:00 2001
From: Colin
Date: Tue, 9 Jan 2024 13:34:17 -0800
Subject: [PATCH 077/102] remove tox, update CONTRIBUTING.md and cleanup GHA
workflows
---
.github/workflows/integration.yml | 1 -
1 file changed, 1 deletion(-)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 53fb9c2ac..e2f0dcfdc 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -73,7 +73,6 @@ jobs:
DBT_TEST_USER_1: "buildbot+dbt_test_user_1@dbtlabs.com"
DBT_TEST_USER_2: "buildbot+dbt_test_user_2@dbtlabs.com"
DBT_TEST_USER_3: "buildbot+dbt_test_user_3@dbtlabs.com"
- ODBC_DRIVER: "Simba"
steps:
- name: Check out the repository
From f9a4c585a263d0a76c009ba1c9c7acc30f3bf462 Mon Sep 17 00:00:00 2001
From: Colin
Date: Tue, 9 Jan 2024 13:42:48 -0800
Subject: [PATCH 078/102] install test reqs in main.yml
---
.github/workflows/main.yml | 2 ++
1 file changed, 2 insertions(+)
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 338413116..c16a16206 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -95,6 +95,8 @@ jobs:
sudo apt-get install libsasl2-dev
python -m pip install --user --upgrade pip
python -m pip --version
+ python -m pip install -e .
+ python -m pip install -r dev-requirements.txt
- name: Run unit tests
run: python -m pytest --color=yes --csv unit_results.csv -v tests/unit
From bbe17a8fa8a2c181d5d98aafdf12eba9c371d96e Mon Sep 17 00:00:00 2001
From: Colin
Date: Tue, 9 Jan 2024 13:45:37 -0800
Subject: [PATCH 079/102] install test reqs in main.yml
---
.github/workflows/main.yml | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index c16a16206..20f3f88f4 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -95,8 +95,9 @@ jobs:
sudo apt-get install libsasl2-dev
python -m pip install --user --upgrade pip
python -m pip --version
- python -m pip install -e .
+ python -m pip install -r requirements.txt
python -m pip install -r dev-requirements.txt
+ python -m pip install -e .
- name: Run unit tests
run: python -m pytest --color=yes --csv unit_results.csv -v tests/unit
From 3f44e9663db6606a9fe0c5d5208ab2c2d31a791b Mon Sep 17 00:00:00 2001
From: Colin
Date: Tue, 9 Jan 2024 13:51:23 -0800
Subject: [PATCH 080/102] formatting
---
CONTRIBUTING.md | 4 ++--
dagger/run_dbt_spark_tests.py | 11 ++++++-----
2 files changed, 8 insertions(+), 7 deletions(-)
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 9145436b6..6fcaacea8 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -66,14 +66,14 @@ $EDITOR test.env
There are a few methods for running tests locally.
#### dagger
-To run functional tests we rely on [dagger](https://dagger.io/). This launches a virtual container or containers to test against.
+To run functional tests we rely on [dagger](https://dagger.io/). This launches a virtual container or containers to test against.
```sh
pip install -r dagger/requirements.txt
python dagger/run_dbt_spark_tests.py --profile databricks_sql_endpoint --test-path tests/functional/adapter/test_basic.py::TestSimpleMaterializationsSpark::test_base
```
-`--profile`: required, this is the kind of spark connection to test against
+`--profile`: required, this is the kind of spark connection to test against
_options_:
- "apache_spark"
diff --git a/dagger/run_dbt_spark_tests.py b/dagger/run_dbt_spark_tests.py
index 4cb16f7a0..dd1a4395d 100644
--- a/dagger/run_dbt_spark_tests.py
+++ b/dagger/run_dbt_spark_tests.py
@@ -10,12 +10,13 @@
PG_PORT = 5432
load_dotenv(find_dotenv("test.env"))
# if env vars aren't specified in test.env (i.e. in github actions worker), use the ones from the host
-TESTING_ENV_VARS = {env_name: os.environ[env_name] for env_name in os.environ
- if env_name.startswith(("DD_", "DBT_"))}
+TESTING_ENV_VARS = {
+ env_name: os.environ[env_name]
+ for env_name in os.environ
+ if env_name.startswith(("DD_", "DBT_"))
+}
-TESTING_ENV_VARS.update({
- "ODBC_DRIVER": "Simba",
-})
+TESTING_ENV_VARS.update({"ODBC_DRIVER": "Simba"})
def env_variables(envs: dict[str, str]):
From afd3866a4b39c0df0999bbcbc333d78eff9927eb Mon Sep 17 00:00:00 2001
From: Colin
Date: Wed, 10 Jan 2024 09:59:30 -0800
Subject: [PATCH 081/102] remove tox from dev-requirements.txt and Makefile
---
Makefile | 7 ++++---
dev-requirements.txt | 1 -
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/Makefile b/Makefile
index cc1d9f75d..2bd1055fa 100644
--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,7 @@
.PHONY: dev
dev: ## Installs adapter in develop mode along with development dependencies
@\
- pip install -e . -r requirements.txt -r dev-requirements.txt && pre-commit install
+ pip install -e . -r requirements.txt -r dev-requirements.txt -r dagger/requirements.txt && pre-commit install
.PHONY: dev-uninstall
dev-uninstall: ## Uninstalls all packages while maintaining the virtual environment
@@ -40,12 +40,13 @@ linecheck: ## Checks for all Python lines 100 characters or more
.PHONY: unit
unit: ## Runs unit tests with py38.
@\
- tox -e py38
+ python -m pytest tests/unit
.PHONY: test
test: ## Runs unit tests with py38 and code checks against staged changes.
@\
- tox -p -e py38; \
+ python -m pytest tests/unit; \
+ python dagger/run_dbt_spark_tests.py --profile spark_session \
pre-commit run black-check --hook-stage manual | grep -v "INFO"; \
pre-commit run flake8-check --hook-stage manual | grep -v "INFO"; \
pre-commit run mypy-check --hook-stage manual | grep -v "INFO"
diff --git a/dev-requirements.txt b/dev-requirements.txt
index bb3282b44..765482e25 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -22,7 +22,6 @@ pytest-dotenv~=0.5.2
pytest-logbook~=1.2
pytest-xdist~=3.5
pytz~=2023.3
-tox~=4.11
types-pytz~=2023.3
types-requests~=2.31
twine~=4.0
From 259ebc7cbe75a7f22bff8075e7c7bba0581cd585 Mon Sep 17 00:00:00 2001
From: Colin
Date: Wed, 10 Jan 2024 10:33:50 -0800
Subject: [PATCH 082/102] clarify spark crt instantiation
---
dagger/run_dbt_spark_tests.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/dagger/run_dbt_spark_tests.py b/dagger/run_dbt_spark_tests.py
index dd1a4395d..718519909 100644
--- a/dagger/run_dbt_spark_tests.py
+++ b/dagger/run_dbt_spark_tests.py
@@ -41,7 +41,7 @@ async def get_postgres_container(client: dagger.Client) -> (dagger.Container, st
async def get_spark_container(client: dagger.Client) -> (dagger.Container, str):
spark_dir = client.host().directory("./dagger/spark-container")
- spark_ctr = (
+ spark_ctr_base = (
client.container()
.from_("eclipse-temurin:8-jre")
.with_directory("/spark_setup", spark_dir)
@@ -66,7 +66,7 @@ async def get_spark_container(client: dagger.Client) -> (dagger.Container, str):
pg_ctr, pg_host = await get_postgres_container(client)
spark_ctr = (
- spark_ctr.with_service_binding(alias=pg_host, service=pg_ctr)
+ spark_ctr_base.with_service_binding(alias=pg_host, service=pg_ctr)
.with_exec(
[
"/scripts/entrypoint.sh",
From a8a7010d934c951512cd66f8b8cbf13d71c45176 Mon Sep 17 00:00:00 2001
From: Colin
Date: Wed, 10 Jan 2024 11:52:57 -0800
Subject: [PATCH 083/102] add comments on python-version
---
.github/workflows/integration.yml | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index e2f0dcfdc..94dece350 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -90,7 +90,8 @@ jobs:
persist-credentials: false
ref: ${{ github.event.pull_request.head.sha }}
- - name: Set up Python
+ # the python version used here is not what is used in the tests themselves
+ - name: Set up Python for dagger
uses: actions/setup-python@v4
with:
python-version: "3.11"
From fcf074b0510b163523b8ac998c605a39e6ead7bd Mon Sep 17 00:00:00 2001
From: Colin
Date: Wed, 10 Jan 2024 13:41:29 -0800
Subject: [PATCH 084/102] initial migration changes
---
dbt/adapters/spark/column.py | 2 +-
dbt/adapters/spark/connections.py | 9 +++++----
dbt/adapters/spark/impl.py | 13 +++++++------
dbt/adapters/spark/relation.py | 2 +-
dbt/adapters/spark/session.py | 2 +-
dev-requirements.txt | 4 ++--
6 files changed, 17 insertions(+), 15 deletions(-)
diff --git a/dbt/adapters/spark/column.py b/dbt/adapters/spark/column.py
index a57fa0565..dbc872051 100644
--- a/dbt/adapters/spark/column.py
+++ b/dbt/adapters/spark/column.py
@@ -2,7 +2,7 @@
from typing import Any, Dict, Optional, TypeVar, Union
from dbt.adapters.base.column import Column
-from dbt.dataclass_schema import dbtClassMixin
+from dbt.common.dataclass_schema import dbtClassMixin
Self = TypeVar("Self", bound="SparkColumn")
diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py
index 966f5584e..76390a2bc 100644
--- a/dbt/adapters/spark/connections.py
+++ b/dbt/adapters/spark/connections.py
@@ -2,11 +2,13 @@
import dbt.exceptions
from dbt.adapters.base import Credentials
+from dbt.adapters.contracts.connection import AdapterResponse, ConnectionState
+from dbt.adapters.events.logging import AdapterLogger
from dbt.adapters.sql import SQLConnectionManager
-from dbt.contracts.connection import ConnectionState, AdapterResponse
-from dbt.events import AdapterLogger
+
from dbt.utils import DECIMALS
from dbt.adapters.spark import __version__
+from dbt.adapters.spark.session import Connection
try:
from TCLIService.ttypes import TOperationState as ThriftState
@@ -22,8 +24,7 @@
pyodbc = None
from datetime import datetime
import sqlparams
-from dbt.contracts.connection import Connection
-from dbt.dataclass_schema import StrEnum
+from dbt.common.dataclass_schema import StrEnum
from dataclasses import dataclass, field
from typing import Any, Dict, Optional, Union, Tuple, List, Generator, Iterable, Sequence
diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index 16c3a3cb7..325139911 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -4,6 +4,9 @@
from typing import Any, Dict, Iterable, List, Optional, Union, Type, Tuple, Callable, Set
from dbt.adapters.base.relation import InformationSchema
+from dbt.adapters.contracts.connection import AdapterResponse
+from dbt.adapters.events.logging import AdapterLogger
+from dbt.common.utils import AttrDict, executor
from dbt.contracts.graph.manifest import Manifest
from typing_extensions import TypeAlias
@@ -13,6 +16,7 @@
import dbt
import dbt.exceptions
+
from dbt.adapters.base import AdapterConfig, PythonJobHelper
from dbt.adapters.base.impl import catch_as_completed, ConstraintSupport
from dbt.adapters.sql import SQLAdapter
@@ -24,12 +28,9 @@
AllPurposeClusterPythonJobHelper,
)
from dbt.adapters.base import BaseRelation
-from dbt.clients.agate_helper import DEFAULT_TYPE_TESTER
-from dbt.contracts.connection import AdapterResponse
-from dbt.contracts.graph.nodes import ConstraintType
-from dbt.contracts.relation import RelationType
-from dbt.events import AdapterLogger
-from dbt.utils import executor, AttrDict
+from dbt.adapters.contracts.relation import RelationType
+from dbt.common.clients.agate_helper import DEFAULT_TYPE_TESTER
+from dbt.common.contracts.constraints import ConstraintType
logger = AdapterLogger("Spark")
diff --git a/dbt/adapters/spark/relation.py b/dbt/adapters/spark/relation.py
index e80f2623f..1fa1272f4 100644
--- a/dbt/adapters/spark/relation.py
+++ b/dbt/adapters/spark/relation.py
@@ -2,9 +2,9 @@
from dataclasses import dataclass, field
from dbt.adapters.base.relation import BaseRelation, Policy
+from dbt.adapters.events.logging import AdapterLogger
from dbt.exceptions import DbtRuntimeError
-from dbt.events import AdapterLogger
logger = AdapterLogger("Spark")
diff --git a/dbt/adapters/spark/session.py b/dbt/adapters/spark/session.py
index b5b2bebdb..1def33be1 100644
--- a/dbt/adapters/spark/session.py
+++ b/dbt/adapters/spark/session.py
@@ -7,7 +7,7 @@
from typing import Any, Dict, List, Optional, Tuple, Union, Sequence
from dbt.adapters.spark.connections import SparkConnectionWrapper
-from dbt.events import AdapterLogger
+from dbt.adapters.events.logging import AdapterLogger
from dbt.utils import DECIMALS
from dbt.exceptions import DbtRuntimeError
from pyspark.sql import DataFrame, Row, SparkSession
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 765482e25..e56b221c7 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -1,7 +1,7 @@
# install latest changes in dbt-core
# TODO: how to automate switching from develop to version branches?
-git+https://github.com/dbt-labs/dbt-core.git@c2bc2f009bbeeb46b3c69d082ab4d485597898af#egg=dbt-core&subdirectory=core
-git+https://github.com/dbt-labs/dbt-core.git@c2bc2f009bbeeb46b3c69d082ab4d485597898af#egg=dbt-tests-adapter&subdirectory=tests/adapter
+git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core
+git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=tests/adapter
# if version 1.x or greater -> pin to major version
# if version 0.x -> pin to minor
From 1b1fcec674317324527c8f2900549157adf6d8b2 Mon Sep 17 00:00:00 2001
From: Colin
Date: Wed, 10 Jan 2024 13:42:16 -0800
Subject: [PATCH 085/102] unpin
---
dev-requirements.txt | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 765482e25..e56b221c7 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -1,7 +1,7 @@
# install latest changes in dbt-core
# TODO: how to automate switching from develop to version branches?
-git+https://github.com/dbt-labs/dbt-core.git@c2bc2f009bbeeb46b3c69d082ab4d485597898af#egg=dbt-core&subdirectory=core
-git+https://github.com/dbt-labs/dbt-core.git@c2bc2f009bbeeb46b3c69d082ab4d485597898af#egg=dbt-tests-adapter&subdirectory=tests/adapter
+git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core
+git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=tests/adapter
# if version 1.x or greater -> pin to major version
# if version 0.x -> pin to minor
From 0a2b73db07bd2128519b90952985a837a38d9a01 Mon Sep 17 00:00:00 2001
From: Colin
Date: Wed, 10 Jan 2024 17:19:03 -0800
Subject: [PATCH 086/102] implement core / adapters decoupling
---
dagger/run_dbt_spark_tests.py | 3 ++-
dbt/adapters/spark/connections.py | 12 ++++++------
dbt/adapters/spark/impl.py | 11 +++++------
dbt/adapters/spark/relation.py | 2 +-
dbt/adapters/spark/session.py | 4 ++--
tests/unit/test_adapter.py | 27 ++++++++++++++-------------
tests/unit/utils.py | 2 +-
7 files changed, 31 insertions(+), 30 deletions(-)
diff --git a/dagger/run_dbt_spark_tests.py b/dagger/run_dbt_spark_tests.py
index 718519909..2fde4a25d 100644
--- a/dagger/run_dbt_spark_tests.py
+++ b/dagger/run_dbt_spark_tests.py
@@ -2,6 +2,7 @@
import argparse
import sys
+from typing import Dict
import anyio as anyio
import dagger as dagger
@@ -19,7 +20,7 @@
TESTING_ENV_VARS.update({"ODBC_DRIVER": "Simba"})
-def env_variables(envs: dict[str, str]):
+def env_variables(envs: Dict[str, str]):
def env_variables_inner(ctr: dagger.Container):
for key, value in envs.items():
ctr = ctr.with_env_variable(key, value)
diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py
index 76390a2bc..fa6f48f52 100644
--- a/dbt/adapters/spark/connections.py
+++ b/dbt/adapters/spark/connections.py
@@ -2,13 +2,13 @@
import dbt.exceptions
from dbt.adapters.base import Credentials
-from dbt.adapters.contracts.connection import AdapterResponse, ConnectionState
+from dbt.adapters.contracts.connection import AdapterResponse, ConnectionState, Connection
from dbt.adapters.events.logging import AdapterLogger
from dbt.adapters.sql import SQLConnectionManager
+from dbt.common.exceptions import DbtConfigError
-from dbt.utils import DECIMALS
+from dbt.common.utils.encoding import DECIMALS
from dbt.adapters.spark import __version__
-from dbt.adapters.spark.session import Connection
try:
from TCLIService.ttypes import TOperationState as ThriftState
@@ -391,7 +391,7 @@ def validate_creds(cls, creds: Any, required: Iterable[str]) -> None:
for key in required:
if not hasattr(creds, key):
- raise dbt.exceptions.DbtProfileError(
+ raise DbtConfigError(
"The config '{}' is required when using the {} method"
" to connect to Spark".format(key, method)
)
@@ -482,7 +482,7 @@ def open(cls, connection: Connection) -> Connection:
endpoint=creds.endpoint
)
else:
- raise dbt.exceptions.DbtProfileError(
+ raise DbtConfigError(
"Either `cluster` or `endpoint` must set when"
" using the odbc method to connect to Spark"
)
@@ -526,7 +526,7 @@ def open(cls, connection: Connection) -> Connection:
Connection(server_side_parameters=creds.server_side_parameters)
)
else:
- raise dbt.exceptions.DbtProfileError(
+ raise DbtConfigError(
f"invalid credential method: {creds.method}"
)
break
diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index 325139911..8cc7d848b 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -7,7 +7,6 @@
from dbt.adapters.contracts.connection import AdapterResponse
from dbt.adapters.events.logging import AdapterLogger
from dbt.common.utils import AttrDict, executor
-from dbt.contracts.graph.manifest import Manifest
from typing_extensions import TypeAlias
@@ -28,7 +27,7 @@
AllPurposeClusterPythonJobHelper,
)
from dbt.adapters.base import BaseRelation
-from dbt.adapters.contracts.relation import RelationType
+from dbt.adapters.contracts.relation import RelationType, RelationConfig
from dbt.common.clients.agate_helper import DEFAULT_TYPE_TESTER
from dbt.common.contracts.constraints import ConstraintType
@@ -353,9 +352,9 @@ def _get_columns_for_catalog(self, relation: BaseRelation) -> Iterable[Dict[str,
yield as_dict
def get_catalog(
- self, manifest: Manifest, selected_nodes: Optional[Set] = None
+ self, relation_configs: Iterable[RelationConfig], selected_nodes: Optional[Set] = None
) -> Tuple[agate.Table, List[Exception]]:
- schema_map = self._get_catalog_schemas(manifest)
+ schema_map = self._get_catalog_schemas(relation_configs)
if len(schema_map) > 1:
raise dbt.exceptions.CompilationError(
f"Expected only one database in get_catalog, found " f"{list(schema_map)}"
@@ -372,7 +371,7 @@ def get_catalog(
self._get_one_catalog,
info,
[schema],
- manifest,
+ relation_configs,
)
)
catalogs, exceptions = catch_as_completed(futures)
@@ -382,7 +381,7 @@ def _get_one_catalog(
self,
information_schema: InformationSchema,
schemas: Set[str],
- manifest: Manifest,
+ relation_configs: Iterable[RelationConfig],
) -> agate.Table:
if len(schemas) != 1:
raise dbt.exceptions.CompilationError(
diff --git a/dbt/adapters/spark/relation.py b/dbt/adapters/spark/relation.py
index 1fa1272f4..a6d679d56 100644
--- a/dbt/adapters/spark/relation.py
+++ b/dbt/adapters/spark/relation.py
@@ -4,7 +4,7 @@
from dbt.adapters.base.relation import BaseRelation, Policy
from dbt.adapters.events.logging import AdapterLogger
-from dbt.exceptions import DbtRuntimeError
+from dbt.common.exceptions import DbtRuntimeError
logger = AdapterLogger("Spark")
diff --git a/dbt/adapters/spark/session.py b/dbt/adapters/spark/session.py
index 1def33be1..d5d3ff050 100644
--- a/dbt/adapters/spark/session.py
+++ b/dbt/adapters/spark/session.py
@@ -8,8 +8,8 @@
from dbt.adapters.spark.connections import SparkConnectionWrapper
from dbt.adapters.events.logging import AdapterLogger
-from dbt.utils import DECIMALS
-from dbt.exceptions import DbtRuntimeError
+from dbt.common.utils.encoding import DECIMALS
+from dbt.common.exceptions import DbtRuntimeError
from pyspark.sql import DataFrame, Row, SparkSession
from pyspark.sql.utils import AnalysisException
diff --git a/tests/unit/test_adapter.py b/tests/unit/test_adapter.py
index a7da63301..b46f7eef6 100644
--- a/tests/unit/test_adapter.py
+++ b/tests/unit/test_adapter.py
@@ -1,4 +1,5 @@
import unittest
+from multiprocessing import get_context
from unittest import mock
import dbt.flags as flags
@@ -146,7 +147,7 @@ def _get_target_odbc_sql_endpoint(self, project):
def test_http_connection(self):
config = self._get_target_http(self.project_cfg)
- adapter = SparkAdapter(config)
+ adapter = SparkAdapter(config, get_context("spawn"))
def hive_http_connect(thrift_transport, configuration):
self.assertEqual(thrift_transport.scheme, "https")
@@ -171,7 +172,7 @@ def hive_http_connect(thrift_transport, configuration):
def test_thrift_connection(self):
config = self._get_target_thrift(self.project_cfg)
- adapter = SparkAdapter(config)
+ adapter = SparkAdapter(config, get_context("spawn"))
def hive_thrift_connect(
host, port, username, auth, kerberos_service_name, password, configuration
@@ -195,7 +196,7 @@ def hive_thrift_connect(
def test_thrift_ssl_connection(self):
config = self._get_target_use_ssl_thrift(self.project_cfg)
- adapter = SparkAdapter(config)
+ adapter = SparkAdapter(config, get_context("spawn"))
def hive_thrift_connect(thrift_transport, configuration):
self.assertIsNotNone(thrift_transport)
@@ -215,7 +216,7 @@ def hive_thrift_connect(thrift_transport, configuration):
def test_thrift_connection_kerberos(self):
config = self._get_target_thrift_kerberos(self.project_cfg)
- adapter = SparkAdapter(config)
+ adapter = SparkAdapter(config, get_context("spawn"))
def hive_thrift_connect(
host, port, username, auth, kerberos_service_name, password, configuration
@@ -239,7 +240,7 @@ def hive_thrift_connect(
def test_odbc_cluster_connection(self):
config = self._get_target_odbc_cluster(self.project_cfg)
- adapter = SparkAdapter(config)
+ adapter = SparkAdapter(config, get_context("spawn"))
def pyodbc_connect(connection_str, autocommit):
self.assertTrue(autocommit)
@@ -266,7 +267,7 @@ def pyodbc_connect(connection_str, autocommit):
def test_odbc_endpoint_connection(self):
config = self._get_target_odbc_sql_endpoint(self.project_cfg)
- adapter = SparkAdapter(config)
+ adapter = SparkAdapter(config, get_context("spawn"))
def pyodbc_connect(connection_str, autocommit):
self.assertTrue(autocommit)
@@ -329,7 +330,7 @@ def test_parse_relation(self):
input_cols = [Row(keys=["col_name", "data_type"], values=r) for r in plain_rows]
config = self._get_target_http(self.project_cfg)
- rows = SparkAdapter(config).parse_describe_extended(relation, input_cols)
+ rows = SparkAdapter(config, get_context("spawn")).parse_describe_extended(relation, input_cols)
self.assertEqual(len(rows), 4)
self.assertEqual(
rows[0].to_column_dict(omit_none=False),
@@ -418,7 +419,7 @@ def test_parse_relation_with_integer_owner(self):
input_cols = [Row(keys=["col_name", "data_type"], values=r) for r in plain_rows]
config = self._get_target_http(self.project_cfg)
- rows = SparkAdapter(config).parse_describe_extended(relation, input_cols)
+ rows = SparkAdapter(config, get_context("spawn")).parse_describe_extended(relation, input_cols)
self.assertEqual(rows[0].to_column_dict().get("table_owner"), "1234")
@@ -454,7 +455,7 @@ def test_parse_relation_with_statistics(self):
input_cols = [Row(keys=["col_name", "data_type"], values=r) for r in plain_rows]
config = self._get_target_http(self.project_cfg)
- rows = SparkAdapter(config).parse_describe_extended(relation, input_cols)
+ rows = SparkAdapter(config, get_context("spawn")).parse_describe_extended(relation, input_cols)
self.assertEqual(len(rows), 1)
self.assertEqual(
rows[0].to_column_dict(omit_none=False),
@@ -483,7 +484,7 @@ def test_parse_relation_with_statistics(self):
def test_relation_with_database(self):
config = self._get_target_http(self.project_cfg)
- adapter = SparkAdapter(config)
+ adapter = SparkAdapter(config, get_context("spawn"))
# fine
adapter.Relation.create(schema="different", identifier="table")
with self.assertRaises(DbtRuntimeError):
@@ -564,7 +565,7 @@ def test_parse_columns_from_information_with_table_type_and_delta_provider(self)
)
config = self._get_target_http(self.project_cfg)
- columns = SparkAdapter(config).parse_columns_from_information(relation)
+ columns = SparkAdapter(config, get_context("spawn")).parse_columns_from_information(relation)
self.assertEqual(len(columns), 4)
self.assertEqual(
columns[0].to_column_dict(omit_none=False),
@@ -649,7 +650,7 @@ def test_parse_columns_from_information_with_view_type(self):
)
config = self._get_target_http(self.project_cfg)
- columns = SparkAdapter(config).parse_columns_from_information(relation)
+ columns = SparkAdapter(config, get_context("spawn")).parse_columns_from_information(relation)
self.assertEqual(len(columns), 4)
self.assertEqual(
columns[1].to_column_dict(omit_none=False),
@@ -715,7 +716,7 @@ def test_parse_columns_from_information_with_table_type_and_parquet_provider(sel
)
config = self._get_target_http(self.project_cfg)
- columns = SparkAdapter(config).parse_columns_from_information(relation)
+ columns = SparkAdapter(config, get_context("spawn")).parse_columns_from_information(relation)
self.assertEqual(len(columns), 4)
self.assertEqual(
diff --git a/tests/unit/utils.py b/tests/unit/utils.py
index ac8c62244..a32d6608d 100644
--- a/tests/unit/utils.py
+++ b/tests/unit/utils.py
@@ -9,7 +9,7 @@
import agate
import pytest
-from dbt.dataclass_schema import ValidationError
+from dbt.common.dataclass_schema import ValidationError
from dbt.config.project import PartialProject
From bd86ee1a3b7f9eebccf6410b78e51681244b05e0 Mon Sep 17 00:00:00 2001
From: Colin
Date: Thu, 11 Jan 2024 11:28:01 -0800
Subject: [PATCH 087/102] fix list_relations
---
dbt/adapters/spark/impl.py | 6 ++++++
dbt/include/spark/macros/adapters.sql | 4 ++--
2 files changed, 8 insertions(+), 2 deletions(-)
diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index 8cc7d848b..abc6a6ff6 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -1,3 +1,4 @@
+import os
import re
from concurrent.futures import Future
from dataclasses import dataclass
@@ -32,6 +33,11 @@
from dbt.common.contracts.constraints import ConstraintType
logger = AdapterLogger("Spark")
+packages = ["pyhive.hive", "thrift.transport", "thrift.protocol"]
+log_level = os.getenv("DBT_SPARK_LOG_LEVEL", "ERROR")
+for package in packages:
+ logger.debug(f"Setting {package} logging to {log_level}")
+ logger.set_adapter_dependency_log_level(package, log_level)
GET_COLUMNS_IN_RELATION_RAW_MACRO_NAME = "get_columns_in_relation_raw"
LIST_SCHEMAS_MACRO_NAME = "list_schemas"
diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql
index bfc1f198d..bf9f63cf9 100644
--- a/dbt/include/spark/macros/adapters.sql
+++ b/dbt/include/spark/macros/adapters.sql
@@ -294,7 +294,7 @@
{% macro spark__list_relations_without_caching(relation) %}
{% call statement('list_relations_without_caching', fetch_result=True) -%}
- show table extended in {{ relation }} like '*'
+ show table extended in {{ relation.schema }} like '*'
{% endcall %}
{% do return(load_result('list_relations_without_caching').table) %}
@@ -305,7 +305,7 @@
{#-- V2 iceberg tables #}
{#-- https://issues.apache.org/jira/browse/SPARK-33393 #}
{% call statement('list_relations_without_caching_show_tables', fetch_result=True) -%}
- show tables in {{ schema_relation }} like '*'
+ show tables in {{ schema_relation.schema }} like '*'
{% endcall %}
{% do return(load_result('list_relations_without_caching_show_tables').table) %}
From cb5e05c783c4c1bad1d1d400bc97131e22f866e2 Mon Sep 17 00:00:00 2001
From: Colin
Date: Thu, 11 Jan 2024 11:38:06 -0800
Subject: [PATCH 088/102] fix typing and exception imports
---
dbt/adapters/spark/connections.py | 13 ++++++-------
dbt/adapters/spark/impl.py | 20 +++++++++++++++++---
tests/unit/test_adapter.py | 24 ++++++++++++++++++------
3 files changed, 41 insertions(+), 16 deletions(-)
diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py
index fa6f48f52..c9c69294f 100644
--- a/dbt/adapters/spark/connections.py
+++ b/dbt/adapters/spark/connections.py
@@ -4,6 +4,7 @@
from dbt.adapters.base import Credentials
from dbt.adapters.contracts.connection import AdapterResponse, ConnectionState, Connection
from dbt.adapters.events.logging import AdapterLogger
+from dbt.adapters.exceptions import FailedToConnectError
from dbt.adapters.sql import SQLConnectionManager
from dbt.common.exceptions import DbtConfigError
@@ -292,11 +293,11 @@ def execute(self, sql: str, bindings: Optional[List[Any]] = None) -> None:
if poll_state.errorMessage:
logger.debug("Poll response: {}".format(poll_state))
logger.debug("Poll status: {}".format(state))
- raise dbt.exceptions.DbtDatabaseError(poll_state.errorMessage)
+ raise dbt.common.exceptions.DbtDatabaseError(poll_state.errorMessage)
elif state not in STATE_SUCCESS:
status_type = ThriftState._VALUES_TO_NAMES.get(state, "Unknown<{!r}>".format(state))
- raise dbt.exceptions.DbtDatabaseError(
+ raise dbt.common.exceptions.DbtDatabaseError(
"Query failed with status: {}".format(status_type)
)
@@ -526,9 +527,7 @@ def open(cls, connection: Connection) -> Connection:
Connection(server_side_parameters=creds.server_side_parameters)
)
else:
- raise DbtConfigError(
- f"invalid credential method: {creds.method}"
- )
+ raise DbtConfigError(f"invalid credential method: {creds.method}")
break
except Exception as e:
exc = e
@@ -538,7 +537,7 @@ def open(cls, connection: Connection) -> Connection:
msg = "Failed to connect"
if creds.token is not None:
msg += ", is your token valid?"
- raise dbt.exceptions.FailedToConnectError(msg) from e
+ raise FailedToConnectError(msg) from e
retryable_message = _is_retryable_error(e)
if retryable_message and creds.connect_retries > 0:
msg = (
@@ -559,7 +558,7 @@ def open(cls, connection: Connection) -> Connection:
logger.warning(msg)
time.sleep(creds.connect_timeout)
else:
- raise dbt.exceptions.FailedToConnectError("failed to connect") from e
+ raise FailedToConnectError("failed to connect") from e
else:
raise exc # type: ignore
diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index abc6a6ff6..e206dac92 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -2,7 +2,19 @@
import re
from concurrent.futures import Future
from dataclasses import dataclass
-from typing import Any, Dict, Iterable, List, Optional, Union, Type, Tuple, Callable, Set
+from typing import (
+ Any,
+ Dict,
+ Iterable,
+ List,
+ Optional,
+ Union,
+ Type,
+ Tuple,
+ Callable,
+ Set,
+ FrozenSet,
+)
from dbt.adapters.base.relation import InformationSchema
from dbt.adapters.contracts.connection import AdapterResponse
@@ -358,7 +370,9 @@ def _get_columns_for_catalog(self, relation: BaseRelation) -> Iterable[Dict[str,
yield as_dict
def get_catalog(
- self, relation_configs: Iterable[RelationConfig], selected_nodes: Optional[Set] = None
+ self,
+ relation_configs: Iterable[RelationConfig],
+ used_schemas: FrozenSet[Tuple[str, str]],
) -> Tuple[agate.Table, List[Exception]]:
schema_map = self._get_catalog_schemas(relation_configs)
if len(schema_map) > 1:
@@ -387,7 +401,7 @@ def _get_one_catalog(
self,
information_schema: InformationSchema,
schemas: Set[str],
- relation_configs: Iterable[RelationConfig],
+ used_schemas: FrozenSet[Tuple[str, str]],
) -> agate.Table:
if len(schemas) != 1:
raise dbt.exceptions.CompilationError(
diff --git a/tests/unit/test_adapter.py b/tests/unit/test_adapter.py
index b46f7eef6..54e9f0158 100644
--- a/tests/unit/test_adapter.py
+++ b/tests/unit/test_adapter.py
@@ -330,7 +330,9 @@ def test_parse_relation(self):
input_cols = [Row(keys=["col_name", "data_type"], values=r) for r in plain_rows]
config = self._get_target_http(self.project_cfg)
- rows = SparkAdapter(config, get_context("spawn")).parse_describe_extended(relation, input_cols)
+ rows = SparkAdapter(config, get_context("spawn")).parse_describe_extended(
+ relation, input_cols
+ )
self.assertEqual(len(rows), 4)
self.assertEqual(
rows[0].to_column_dict(omit_none=False),
@@ -419,7 +421,9 @@ def test_parse_relation_with_integer_owner(self):
input_cols = [Row(keys=["col_name", "data_type"], values=r) for r in plain_rows]
config = self._get_target_http(self.project_cfg)
- rows = SparkAdapter(config, get_context("spawn")).parse_describe_extended(relation, input_cols)
+ rows = SparkAdapter(config, get_context("spawn")).parse_describe_extended(
+ relation, input_cols
+ )
self.assertEqual(rows[0].to_column_dict().get("table_owner"), "1234")
@@ -455,7 +459,9 @@ def test_parse_relation_with_statistics(self):
input_cols = [Row(keys=["col_name", "data_type"], values=r) for r in plain_rows]
config = self._get_target_http(self.project_cfg)
- rows = SparkAdapter(config, get_context("spawn")).parse_describe_extended(relation, input_cols)
+ rows = SparkAdapter(config, get_context("spawn")).parse_describe_extended(
+ relation, input_cols
+ )
self.assertEqual(len(rows), 1)
self.assertEqual(
rows[0].to_column_dict(omit_none=False),
@@ -565,7 +571,9 @@ def test_parse_columns_from_information_with_table_type_and_delta_provider(self)
)
config = self._get_target_http(self.project_cfg)
- columns = SparkAdapter(config, get_context("spawn")).parse_columns_from_information(relation)
+ columns = SparkAdapter(config, get_context("spawn")).parse_columns_from_information(
+ relation
+ )
self.assertEqual(len(columns), 4)
self.assertEqual(
columns[0].to_column_dict(omit_none=False),
@@ -650,7 +658,9 @@ def test_parse_columns_from_information_with_view_type(self):
)
config = self._get_target_http(self.project_cfg)
- columns = SparkAdapter(config, get_context("spawn")).parse_columns_from_information(relation)
+ columns = SparkAdapter(config, get_context("spawn")).parse_columns_from_information(
+ relation
+ )
self.assertEqual(len(columns), 4)
self.assertEqual(
columns[1].to_column_dict(omit_none=False),
@@ -716,7 +726,9 @@ def test_parse_columns_from_information_with_table_type_and_parquet_provider(sel
)
config = self._get_target_http(self.project_cfg)
- columns = SparkAdapter(config, get_context("spawn")).parse_columns_from_information(relation)
+ columns = SparkAdapter(config, get_context("spawn")).parse_columns_from_information(
+ relation
+ )
self.assertEqual(len(columns), 4)
self.assertEqual(
From fd7a22fef6c598e06d703af52c3fff7b3b2f60ea Mon Sep 17 00:00:00 2001
From: Colin
Date: Thu, 11 Jan 2024 11:42:19 -0800
Subject: [PATCH 089/102] fix typing and exception imports
---
dbt/adapters/spark/connections.py | 29 +++++++++----------
dbt/adapters/spark/impl.py | 21 ++++++--------
dbt/adapters/spark/python_submissions.py | 37 +++++++++---------------
3 files changed, 35 insertions(+), 52 deletions(-)
diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py
index c9c69294f..1f2bc944a 100644
--- a/dbt/adapters/spark/connections.py
+++ b/dbt/adapters/spark/connections.py
@@ -1,12 +1,11 @@
from contextlib import contextmanager
-import dbt.exceptions
from dbt.adapters.base import Credentials
from dbt.adapters.contracts.connection import AdapterResponse, ConnectionState, Connection
from dbt.adapters.events.logging import AdapterLogger
from dbt.adapters.exceptions import FailedToConnectError
from dbt.adapters.sql import SQLConnectionManager
-from dbt.common.exceptions import DbtConfigError
+from dbt.common.exceptions import DbtConfigError, DbtRuntimeError, DbtDatabaseError
from dbt.common.utils.encoding import DECIMALS
from dbt.adapters.spark import __version__
@@ -94,15 +93,15 @@ def cluster_id(self) -> Optional[str]:
def __post_init__(self) -> None:
if self.method is None:
- raise dbt.exceptions.DbtRuntimeError("Must specify `method` in profile")
+ raise DbtRuntimeError("Must specify `method` in profile")
if self.host is None:
- raise dbt.exceptions.DbtRuntimeError("Must specify `host` in profile")
+ raise DbtRuntimeError("Must specify `host` in profile")
if self.schema is None:
- raise dbt.exceptions.DbtRuntimeError("Must specify `schema` in profile")
+ raise DbtRuntimeError("Must specify `schema` in profile")
# spark classifies database and schema as the same thing
if self.database is not None and self.database != self.schema:
- raise dbt.exceptions.DbtRuntimeError(
+ raise DbtRuntimeError(
f" schema: {self.schema} \n"
f" database: {self.database} \n"
f"On Spark, database must be omitted or have the same value as"
@@ -114,7 +113,7 @@ def __post_init__(self) -> None:
try:
import pyodbc # noqa: F401
except ImportError as e:
- raise dbt.exceptions.DbtRuntimeError(
+ raise DbtRuntimeError(
f"{self.method} connection method requires "
"additional dependencies. \n"
"Install the additional required dependencies with "
@@ -123,7 +122,7 @@ def __post_init__(self) -> None:
) from e
if self.method == SparkConnectionMethod.ODBC and self.cluster and self.endpoint:
- raise dbt.exceptions.DbtRuntimeError(
+ raise DbtRuntimeError(
"`cluster` and `endpoint` cannot both be set when"
f" using {self.method} method to connect to Spark"
)
@@ -132,7 +131,7 @@ def __post_init__(self) -> None:
self.method == SparkConnectionMethod.HTTP
or self.method == SparkConnectionMethod.THRIFT
) and not (ThriftState and THttpClient and hive):
- raise dbt.exceptions.DbtRuntimeError(
+ raise DbtRuntimeError(
f"{self.method} connection method requires "
"additional dependencies. \n"
"Install the additional required dependencies with "
@@ -143,7 +142,7 @@ def __post_init__(self) -> None:
try:
import pyspark # noqa: F401
except ImportError as e:
- raise dbt.exceptions.DbtRuntimeError(
+ raise DbtRuntimeError(
f"{self.method} connection method requires "
"additional dependencies. \n"
"Install the additional required dependencies with "
@@ -293,13 +292,11 @@ def execute(self, sql: str, bindings: Optional[List[Any]] = None) -> None:
if poll_state.errorMessage:
logger.debug("Poll response: {}".format(poll_state))
logger.debug("Poll status: {}".format(state))
- raise dbt.common.exceptions.DbtDatabaseError(poll_state.errorMessage)
+ raise DbtDatabaseError(poll_state.errorMessage)
elif state not in STATE_SUCCESS:
status_type = ThriftState._VALUES_TO_NAMES.get(state, "Unknown<{!r}>".format(state))
- raise dbt.common.exceptions.DbtDatabaseError(
- "Query failed with status: {}".format(status_type)
- )
+ raise DbtDatabaseError("Query failed with status: {}".format(status_type))
logger.debug("Poll status: {}, query complete".format(state))
@@ -360,9 +357,9 @@ def exception_handler(self, sql: str) -> Generator[None, None, None]:
thrift_resp = exc.args[0]
if hasattr(thrift_resp, "status"):
msg = thrift_resp.status.errorMessage
- raise dbt.exceptions.DbtRuntimeError(msg)
+ raise DbtRuntimeError(msg)
else:
- raise dbt.exceptions.DbtRuntimeError(str(exc))
+ raise DbtRuntimeError(str(exc))
def cancel(self, connection: Connection) -> None:
connection.handle.cancel()
diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index e206dac92..7e6c70e04 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -19,16 +19,13 @@
from dbt.adapters.base.relation import InformationSchema
from dbt.adapters.contracts.connection import AdapterResponse
from dbt.adapters.events.logging import AdapterLogger
+from dbt.common.exceptions import DbtRuntimeError, CompilationError
from dbt.common.utils import AttrDict, executor
from typing_extensions import TypeAlias
import agate
-import dbt
-import dbt.exceptions
-
-
from dbt.adapters.base import AdapterConfig, PythonJobHelper
from dbt.adapters.base.impl import catch_as_completed, ConstraintSupport
from dbt.adapters.sql import SQLAdapter
@@ -162,7 +159,7 @@ def _get_relation_information(self, row: agate.Row) -> RelationInfo:
try:
_schema, name, _, information = row
except ValueError:
- raise dbt.exceptions.DbtRuntimeError(
+ raise DbtRuntimeError(
f'Invalid value from "show tables extended ...", got {len(row)} values, expected 4'
)
@@ -173,7 +170,7 @@ def _get_relation_information_using_describe(self, row: agate.Row) -> RelationIn
try:
_schema, name, _ = row
except ValueError:
- raise dbt.exceptions.DbtRuntimeError(
+ raise DbtRuntimeError(
f'Invalid value from "show tables ...", got {len(row)} values, expected 3'
)
@@ -182,7 +179,7 @@ def _get_relation_information_using_describe(self, row: agate.Row) -> RelationIn
table_results = self.execute_macro(
DESCRIBE_TABLE_EXTENDED_MACRO_NAME, kwargs={"table_name": table_name}
)
- except dbt.exceptions.DbtRuntimeError as e:
+ except DbtRuntimeError as e:
logger.debug(f"Error while retrieving information about {table_name}: {e.msg}")
table_results = AttrDict()
@@ -237,7 +234,7 @@ def list_relations_without_caching(self, schema_relation: BaseRelation) -> List[
row_list=show_table_extended_rows,
relation_info_func=self._get_relation_information,
)
- except dbt.exceptions.DbtRuntimeError as e:
+ except DbtRuntimeError as e:
errmsg = getattr(e, "msg", "")
if f"Database '{schema_relation}' not found" in errmsg:
return []
@@ -254,7 +251,7 @@ def list_relations_without_caching(self, schema_relation: BaseRelation) -> List[
row_list=show_table_rows,
relation_info_func=self._get_relation_information_using_describe,
)
- except dbt.exceptions.DbtRuntimeError as e:
+ except DbtRuntimeError as e:
description = "Error while retrieving information about"
logger.debug(f"{description} {schema_relation}: {e.msg}")
return []
@@ -316,7 +313,7 @@ def get_columns_in_relation(self, relation: BaseRelation) -> List[SparkColumn]:
GET_COLUMNS_IN_RELATION_RAW_MACRO_NAME, kwargs={"relation": relation}
)
columns = self.parse_describe_extended(relation, rows)
- except dbt.exceptions.DbtRuntimeError as e:
+ except DbtRuntimeError as e:
# spark would throw error when table doesn't exist, where other
# CDW would just return and empty list, normalizing the behavior here
errmsg = getattr(e, "msg", "")
@@ -376,7 +373,7 @@ def get_catalog(
) -> Tuple[agate.Table, List[Exception]]:
schema_map = self._get_catalog_schemas(relation_configs)
if len(schema_map) > 1:
- raise dbt.exceptions.CompilationError(
+ raise CompilationError(
f"Expected only one database in get_catalog, found " f"{list(schema_map)}"
)
@@ -404,7 +401,7 @@ def _get_one_catalog(
used_schemas: FrozenSet[Tuple[str, str]],
) -> agate.Table:
if len(schemas) != 1:
- raise dbt.exceptions.CompilationError(
+ raise CompilationError(
f"Expected only one schema in spark _get_one_catalog, found " f"{schemas}"
)
diff --git a/dbt/adapters/spark/python_submissions.py b/dbt/adapters/spark/python_submissions.py
index 89831ca7f..0443b1d00 100644
--- a/dbt/adapters/spark/python_submissions.py
+++ b/dbt/adapters/spark/python_submissions.py
@@ -4,8 +4,9 @@
from typing import Any, Dict, Callable, Iterable
import uuid
-import dbt.exceptions
from dbt.adapters.base import PythonJobHelper
+from dbt.common.exceptions import DbtRuntimeError
+
from dbt.adapters.spark import SparkCredentials
from dbt.adapters.spark import __version__
@@ -53,7 +54,7 @@ def _create_work_dir(self, path: str) -> None:
},
)
if response.status_code != 200:
- raise dbt.exceptions.DbtRuntimeError(
+ raise DbtRuntimeError(
f"Error creating work_dir for python notebooks\n {response.content!r}"
)
@@ -71,9 +72,7 @@ def _upload_notebook(self, path: str, compiled_code: str) -> None:
},
)
if response.status_code != 200:
- raise dbt.exceptions.DbtRuntimeError(
- f"Error creating python notebook.\n {response.content!r}"
- )
+ raise DbtRuntimeError(f"Error creating python notebook.\n {response.content!r}")
def _submit_job(self, path: str, cluster_spec: dict) -> str:
job_spec = {
@@ -99,9 +98,7 @@ def _submit_job(self, path: str, cluster_spec: dict) -> str:
json=job_spec,
)
if submit_response.status_code != 200:
- raise dbt.exceptions.DbtRuntimeError(
- f"Error creating python run.\n {submit_response.content!r}"
- )
+ raise DbtRuntimeError(f"Error creating python run.\n {submit_response.content!r}")
return submit_response.json()["run_id"]
def _submit_through_notebook(self, compiled_code: str, cluster_spec: dict) -> None:
@@ -135,7 +132,7 @@ def _submit_through_notebook(self, compiled_code: str, cluster_spec: dict) -> No
json_run_output = run_output.json()
result_state = json_run_output["metadata"]["state"]["result_state"]
if result_state != "SUCCESS":
- raise dbt.exceptions.DbtRuntimeError(
+ raise DbtRuntimeError(
"Python model failed with traceback as:\n"
"(Note that the line number here does not "
"match the line number in your code due to dbt templating)\n"
@@ -169,9 +166,9 @@ def polling(
response = status_func(**status_func_kwargs)
state = get_state_func(response)
if exceeded_timeout:
- raise dbt.exceptions.DbtRuntimeError("python model run timed out")
+ raise DbtRuntimeError("python model run timed out")
if state != expected_end_state:
- raise dbt.exceptions.DbtRuntimeError(
+ raise DbtRuntimeError(
"python model run ended in state"
f"{state} with state_message\n{get_state_msg_func(response)}"
)
@@ -205,9 +202,7 @@ def create(self) -> str:
},
)
if response.status_code != 200:
- raise dbt.exceptions.DbtRuntimeError(
- f"Error creating an execution context.\n {response.content!r}"
- )
+ raise DbtRuntimeError(f"Error creating an execution context.\n {response.content!r}")
return response.json()["id"]
def destroy(self, context_id: str) -> str:
@@ -221,9 +216,7 @@ def destroy(self, context_id: str) -> str:
},
)
if response.status_code != 200:
- raise dbt.exceptions.DbtRuntimeError(
- f"Error deleting an execution context.\n {response.content!r}"
- )
+ raise DbtRuntimeError(f"Error deleting an execution context.\n {response.content!r}")
return response.json()["id"]
@@ -246,9 +239,7 @@ def execute(self, context_id: str, command: str) -> str:
},
)
if response.status_code != 200:
- raise dbt.exceptions.DbtRuntimeError(
- f"Error creating a command.\n {response.content!r}"
- )
+ raise DbtRuntimeError(f"Error creating a command.\n {response.content!r}")
return response.json()["id"]
def status(self, context_id: str, command_id: str) -> Dict[str, Any]:
@@ -263,9 +254,7 @@ def status(self, context_id: str, command_id: str) -> Dict[str, Any]:
},
)
if response.status_code != 200:
- raise dbt.exceptions.DbtRuntimeError(
- f"Error getting status of command.\n {response.content!r}"
- )
+ raise DbtRuntimeError(f"Error getting status of command.\n {response.content!r}")
return response.json()
@@ -298,7 +287,7 @@ def submit(self, compiled_code: str) -> None:
get_state_msg_func=lambda response: response.json()["results"]["data"],
)
if response["results"]["resultType"] == "error":
- raise dbt.exceptions.DbtRuntimeError(
+ raise DbtRuntimeError(
f"Python model failed with traceback as:\n"
f"{response['results']['cause']}"
)
From 77df8b743a8b0078de6bd8ec8f43b6ad8283e309 Mon Sep 17 00:00:00 2001
From: Colin
Date: Thu, 11 Jan 2024 11:48:16 -0800
Subject: [PATCH 090/102] add changie
---
.changes/unreleased/Under the Hood-20240111-114806.yaml | 6 ++++++
1 file changed, 6 insertions(+)
create mode 100644 .changes/unreleased/Under the Hood-20240111-114806.yaml
diff --git a/.changes/unreleased/Under the Hood-20240111-114806.yaml b/.changes/unreleased/Under the Hood-20240111-114806.yaml
new file mode 100644
index 000000000..31705f468
--- /dev/null
+++ b/.changes/unreleased/Under the Hood-20240111-114806.yaml
@@ -0,0 +1,6 @@
+kind: Under the Hood
+body: Update import paths and list_relations to support decoupling adapters/core
+time: 2024-01-11T11:48:06.120111-08:00
+custom:
+ Author: colin-rogers-dbt
+ Issue: "972"
From dfd58858d58ad0e14c4c912a10b748b11554e712 Mon Sep 17 00:00:00 2001
From: Colin
Date: Fri, 12 Jan 2024 14:42:55 -0800
Subject: [PATCH 091/102] replace dbt.common with dbt_common
---
dbt/adapters/spark/column.py | 2 +-
dbt/adapters/spark/connections.py | 6 +++---
dbt/adapters/spark/impl.py | 8 ++++----
dbt/adapters/spark/python_submissions.py | 2 +-
dbt/adapters/spark/relation.py | 2 +-
dbt/adapters/spark/session.py | 4 ++--
dev-requirements.txt | 4 ++--
tests/unit/utils.py | 2 +-
8 files changed, 15 insertions(+), 15 deletions(-)
diff --git a/dbt/adapters/spark/column.py b/dbt/adapters/spark/column.py
index dbc872051..39f6f529e 100644
--- a/dbt/adapters/spark/column.py
+++ b/dbt/adapters/spark/column.py
@@ -2,7 +2,7 @@
from typing import Any, Dict, Optional, TypeVar, Union
from dbt.adapters.base.column import Column
-from dbt.common.dataclass_schema import dbtClassMixin
+from dbt_common.dataclass_schema import dbtClassMixin
Self = TypeVar("Self", bound="SparkColumn")
diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py
index 1f2bc944a..6e9e631b7 100644
--- a/dbt/adapters/spark/connections.py
+++ b/dbt/adapters/spark/connections.py
@@ -5,9 +5,9 @@
from dbt.adapters.events.logging import AdapterLogger
from dbt.adapters.exceptions import FailedToConnectError
from dbt.adapters.sql import SQLConnectionManager
-from dbt.common.exceptions import DbtConfigError, DbtRuntimeError, DbtDatabaseError
+from dbt_common.exceptions import DbtConfigError, DbtRuntimeError, DbtDatabaseError
-from dbt.common.utils.encoding import DECIMALS
+from dbt_common.utils.encoding import DECIMALS
from dbt.adapters.spark import __version__
try:
@@ -24,7 +24,7 @@
pyodbc = None
from datetime import datetime
import sqlparams
-from dbt.common.dataclass_schema import StrEnum
+from dbt_common.dataclass_schema import StrEnum
from dataclasses import dataclass, field
from typing import Any, Dict, Optional, Union, Tuple, List, Generator, Iterable, Sequence
diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index 7e6c70e04..9a1a7ec06 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -19,8 +19,8 @@
from dbt.adapters.base.relation import InformationSchema
from dbt.adapters.contracts.connection import AdapterResponse
from dbt.adapters.events.logging import AdapterLogger
-from dbt.common.exceptions import DbtRuntimeError, CompilationError
-from dbt.common.utils import AttrDict, executor
+from dbt_common.exceptions import DbtRuntimeError, CompilationError
+from dbt_common.utils import AttrDict, executor
from typing_extensions import TypeAlias
@@ -38,8 +38,8 @@
)
from dbt.adapters.base import BaseRelation
from dbt.adapters.contracts.relation import RelationType, RelationConfig
-from dbt.common.clients.agate_helper import DEFAULT_TYPE_TESTER
-from dbt.common.contracts.constraints import ConstraintType
+from dbt_common.clients.agate_helper import DEFAULT_TYPE_TESTER
+from dbt_common.contracts.constraints import ConstraintType
logger = AdapterLogger("Spark")
packages = ["pyhive.hive", "thrift.transport", "thrift.protocol"]
diff --git a/dbt/adapters/spark/python_submissions.py b/dbt/adapters/spark/python_submissions.py
index 0443b1d00..e3e7cb370 100644
--- a/dbt/adapters/spark/python_submissions.py
+++ b/dbt/adapters/spark/python_submissions.py
@@ -5,7 +5,7 @@
import uuid
from dbt.adapters.base import PythonJobHelper
-from dbt.common.exceptions import DbtRuntimeError
+from dbt_common.exceptions import DbtRuntimeError
from dbt.adapters.spark import SparkCredentials
from dbt.adapters.spark import __version__
diff --git a/dbt/adapters/spark/relation.py b/dbt/adapters/spark/relation.py
index a6d679d56..860935cbd 100644
--- a/dbt/adapters/spark/relation.py
+++ b/dbt/adapters/spark/relation.py
@@ -4,7 +4,7 @@
from dbt.adapters.base.relation import BaseRelation, Policy
from dbt.adapters.events.logging import AdapterLogger
-from dbt.common.exceptions import DbtRuntimeError
+from dbt_common.exceptions import DbtRuntimeError
logger = AdapterLogger("Spark")
diff --git a/dbt/adapters/spark/session.py b/dbt/adapters/spark/session.py
index d5d3ff050..7a6982e50 100644
--- a/dbt/adapters/spark/session.py
+++ b/dbt/adapters/spark/session.py
@@ -8,8 +8,8 @@
from dbt.adapters.spark.connections import SparkConnectionWrapper
from dbt.adapters.events.logging import AdapterLogger
-from dbt.common.utils.encoding import DECIMALS
-from dbt.common.exceptions import DbtRuntimeError
+from dbt_common.utils.encoding import DECIMALS
+from dbt_common.exceptions import DbtRuntimeError
from pyspark.sql import DataFrame, Row, SparkSession
from pyspark.sql.utils import AnalysisException
diff --git a/dev-requirements.txt b/dev-requirements.txt
index e56b221c7..3dd8eb727 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -1,7 +1,7 @@
# install latest changes in dbt-core
# TODO: how to automate switching from develop to version branches?
-git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core
-git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=tests/adapter
+git+https://github.com/dbt-labs/dbt-core.git@er/remove-common-req-keep-legacy-logger#egg=dbt-core&subdirectory=core
+git+https://github.com/dbt-labs/dbt-core.git@er/remove-common-req-keep-legacy-logger#egg=dbt-tests-adapter&subdirectory=tests/adapter
# if version 1.x or greater -> pin to major version
# if version 0.x -> pin to minor
diff --git a/tests/unit/utils.py b/tests/unit/utils.py
index a32d6608d..17cd3ee78 100644
--- a/tests/unit/utils.py
+++ b/tests/unit/utils.py
@@ -9,7 +9,7 @@
import agate
import pytest
-from dbt.common.dataclass_schema import ValidationError
+from dbt_common.dataclass_schema import ValidationError
from dbt.config.project import PartialProject
From 3fc6d07fa731b68bbcd904839b7ac11e5941ea9b Mon Sep 17 00:00:00 2001
From: Colin
Date: Fri, 12 Jan 2024 14:51:57 -0800
Subject: [PATCH 092/102] update setup.py
---
setup.py | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/setup.py b/setup.py
index 301b4a41f..8e839c842 100644
--- a/setup.py
+++ b/setup.py
@@ -20,7 +20,6 @@
print('Please upgrade setuptools with "pip install --upgrade setuptools" ' "and try again")
sys.exit(1)
-
# pull long description from README
this_directory = os.path.abspath(os.path.dirname(__file__))
with open(os.path.join(this_directory, "README.md"), "r", encoding="utf8") as f:
@@ -73,8 +72,8 @@ def _get_dbt_core_version():
packages=find_namespace_packages(include=["dbt", "dbt.*"]),
include_package_data=True,
install_requires=[
- "dbt-core~={}".format(dbt_core_version),
"sqlparams>=3.0.0",
+ "dbt-common @ git+https://github.com/dbt-labs/dbt-common.git#egg=dbt",
],
extras_require={
"ODBC": odbc_extras,
From 17607c11482bdbee0ead33791eb59e43b5c99efe Mon Sep 17 00:00:00 2001
From: Colin
Date: Tue, 16 Jan 2024 15:46:01 -0800
Subject: [PATCH 093/102] add dbt-adapters
---
dagger/run_dbt_spark_tests.py | 3 ++-
dev-requirements.txt | 4 ++--
setup.py | 1 +
3 files changed, 5 insertions(+), 3 deletions(-)
diff --git a/dagger/run_dbt_spark_tests.py b/dagger/run_dbt_spark_tests.py
index 7adb352a2..512bb755b 100644
--- a/dagger/run_dbt_spark_tests.py
+++ b/dagger/run_dbt_spark_tests.py
@@ -86,7 +86,7 @@ async def get_spark_container(client: dagger.Client) -> (dagger.Container, str):
async def test_spark(test_args):
async with dagger.Connection(dagger.Config(log_output=sys.stderr)) as client:
test_profile = test_args.profile
- req_files = client.host().directory("./", include=["*.txt", "*.env", "*.ini"])
+ req_files = client.host().directory("./", include=["*.txt", "*.env", "*.ini", "*.md", "setup.py"])
dbt_spark_dir = client.host().directory("./dbt")
test_dir = client.host().directory("./tests")
scripts = client.host().directory("./dagger/scripts")
@@ -99,6 +99,7 @@ async def test_spark(test_args):
.with_directory("/tests", test_dir)
.with_directory("/scripts", scripts)
.with_exec("./scripts/install_os_reqs.sh")
+ .with_exec(["pip", "install", "-e", "."])
.with_exec(["pip", "install", "-r", "requirements.txt"])
.with_exec(["pip", "install", "-r", "dev-requirements.txt"])
)
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 3dd8eb727..e56b221c7 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -1,7 +1,7 @@
# install latest changes in dbt-core
# TODO: how to automate switching from develop to version branches?
-git+https://github.com/dbt-labs/dbt-core.git@er/remove-common-req-keep-legacy-logger#egg=dbt-core&subdirectory=core
-git+https://github.com/dbt-labs/dbt-core.git@er/remove-common-req-keep-legacy-logger#egg=dbt-tests-adapter&subdirectory=tests/adapter
+git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core
+git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=tests/adapter
# if version 1.x or greater -> pin to major version
# if version 0.x -> pin to minor
diff --git a/setup.py b/setup.py
index 8e839c842..6d2a14686 100644
--- a/setup.py
+++ b/setup.py
@@ -74,6 +74,7 @@ def _get_dbt_core_version():
install_requires=[
"sqlparams>=3.0.0",
"dbt-common @ git+https://github.com/dbt-labs/dbt-common.git#egg=dbt",
+ "dbt-adapters @ git+https://github.com/dbt-labs/dbt-adapters.git#egg=dbt",
],
extras_require={
"ODBC": odbc_extras,
From 79d74aa422a9178f8432346c8c982ad9af3f3844 Mon Sep 17 00:00:00 2001
From: Colin
Date: Mon, 22 Jan 2024 11:43:32 -0800
Subject: [PATCH 094/102] update setup.py
---
setup.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/setup.py b/setup.py
index 6d2a14686..d45b787f2 100644
--- a/setup.py
+++ b/setup.py
@@ -73,8 +73,8 @@ def _get_dbt_core_version():
include_package_data=True,
install_requires=[
"sqlparams>=3.0.0",
- "dbt-common @ git+https://github.com/dbt-labs/dbt-common.git#egg=dbt",
- "dbt-adapters @ git+https://github.com/dbt-labs/dbt-adapters.git#egg=dbt",
+ "dbt-common<1.0",
+ "dbt-adapters~=0.1.0a1",
],
extras_require={
"ODBC": odbc_extras,
From 011c9b59b5089dbadd516e7feea43dcc5bc970c2 Mon Sep 17 00:00:00 2001
From: Colin
Date: Mon, 22 Jan 2024 11:57:37 -0800
Subject: [PATCH 095/102] fix credentials import
---
dbt/adapters/spark/connections.py | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py
index 6e9e631b7..83048f921 100644
--- a/dbt/adapters/spark/connections.py
+++ b/dbt/adapters/spark/connections.py
@@ -1,7 +1,11 @@
from contextlib import contextmanager
-from dbt.adapters.base import Credentials
-from dbt.adapters.contracts.connection import AdapterResponse, ConnectionState, Connection
+from dbt.adapters.contracts.connection import (
+ AdapterResponse,
+ ConnectionState,
+ Connection,
+ Credentials,
+)
from dbt.adapters.events.logging import AdapterLogger
from dbt.adapters.exceptions import FailedToConnectError
from dbt.adapters.sql import SQLConnectionManager
From a40b07c241010d1a899a8f17e8b99f3421e565da Mon Sep 17 00:00:00 2001
From: Colin
Date: Mon, 22 Jan 2024 13:55:37 -0800
Subject: [PATCH 096/102] fix dev-requirements.txt
---
dagger/run_dbt_spark_tests.py | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/dagger/run_dbt_spark_tests.py b/dagger/run_dbt_spark_tests.py
index 512bb755b..80e4e5fa9 100644
--- a/dagger/run_dbt_spark_tests.py
+++ b/dagger/run_dbt_spark_tests.py
@@ -86,7 +86,9 @@ async def get_spark_container(client: dagger.Client) -> (dagger.Container, str):
async def test_spark(test_args):
async with dagger.Connection(dagger.Config(log_output=sys.stderr)) as client:
test_profile = test_args.profile
- req_files = client.host().directory("./", include=["*.txt", "*.env", "*.ini", "*.md", "setup.py"])
+ req_files = client.host().directory(
+ "./", include=["*.txt", "*.env", "*.ini", "*.md", "setup.py"]
+ )
dbt_spark_dir = client.host().directory("./dbt")
test_dir = client.host().directory("./tests")
scripts = client.host().directory("./dagger/scripts")
From 8aac398821d7aa9d20970bfe96a79c2c3d7029d2 Mon Sep 17 00:00:00 2001
From: Colin
Date: Wed, 24 Jan 2024 15:16:10 -0800
Subject: [PATCH 097/102] dagger improvements to caching and installing package
under test
---
dagger/run_dbt_spark_tests.py | 40 ++++++++++++++++++++++++-----------
setup.py | 1 +
tests/conftest.py | 8 +++----
3 files changed, 33 insertions(+), 16 deletions(-)
diff --git a/dagger/run_dbt_spark_tests.py b/dagger/run_dbt_spark_tests.py
index 80e4e5fa9..61eaed30c 100644
--- a/dagger/run_dbt_spark_tests.py
+++ b/dagger/run_dbt_spark_tests.py
@@ -29,18 +29,19 @@ def env_variables_inner(ctr: dagger.Container):
return env_variables_inner
-async def get_postgres_container(client: dagger.Client) -> (dagger.Container, str):
- ctr = await (
+def get_postgres_container(client: dagger.Client) -> (dagger.Container, str):
+ ctr = (
client.container()
.from_("postgres:13")
.with_env_variable("POSTGRES_PASSWORD", "postgres")
.with_exposed_port(PG_PORT)
+ .as_service()
)
return ctr, "postgres_db"
-async def get_spark_container(client: dagger.Client) -> (dagger.Container, str):
+def get_spark_container(client: dagger.Client) -> (dagger.Service, str):
spark_dir = client.host().directory("./dagger/spark-container")
spark_ctr_base = (
client.container()
@@ -64,7 +65,7 @@ async def get_spark_container(client: dagger.Client) -> (dagger.Container, str):
)
# postgres is the metastore here
- pg_ctr, pg_host = await get_postgres_container(client)
+ pg_ctr, pg_host = get_postgres_container(client)
spark_ctr = (
spark_ctr_base.with_service_binding(alias=pg_host, service=pg_ctr)
@@ -78,6 +79,7 @@ async def get_spark_container(client: dagger.Client) -> (dagger.Container, str):
]
)
.with_exposed_port(10000)
+ .as_service()
)
return spark_ctr, "spark_db"
@@ -86,32 +88,46 @@ async def get_spark_container(client: dagger.Client) -> (dagger.Container, str):
async def test_spark(test_args):
async with dagger.Connection(dagger.Config(log_output=sys.stderr)) as client:
test_profile = test_args.profile
+
+ # create cache volumes, these are persisted between runs saving time when developing locally
+ os_reqs_cache = client.cache_volume("os_reqs")
+ pip_cache = client.cache_volume("pip")
+
+ # setup directories as we don't want to copy the whole repo into the container
req_files = client.host().directory(
"./", include=["*.txt", "*.env", "*.ini", "*.md", "setup.py"]
)
dbt_spark_dir = client.host().directory("./dbt")
test_dir = client.host().directory("./tests")
scripts = client.host().directory("./dagger/scripts")
+
platform = dagger.Platform("linux/amd64")
tst_container = (
client.container(platform=platform)
.from_("python:3.8-slim")
- .with_directory("/.", req_files)
- .with_directory("/dbt", dbt_spark_dir)
- .with_directory("/tests", test_dir)
- .with_directory("/scripts", scripts)
- .with_exec("./scripts/install_os_reqs.sh")
- .with_exec(["pip", "install", "-e", "."])
+ .with_directory("/src", req_files)
+ .with_directory("/src/dbt", dbt_spark_dir)
+ .with_directory("/src/tests", test_dir)
+ .with_directory("/src/scripts", scripts)
+ .with_workdir("/src")
+ .with_mounted_cache("/var/cache/apt/archives", os_reqs_cache)
+ .with_exec(["./scripts/install_os_reqs.sh"])
+ )
+
+ tst_container = (
+ tst_container.with_mounted_cache("/root/.cache/pip", pip_cache)
+ .with_exec(["pip", "install", "-U", "pip"])
.with_exec(["pip", "install", "-r", "requirements.txt"])
.with_exec(["pip", "install", "-r", "dev-requirements.txt"])
+ .with_exec(["pip", "install", "-e", "."])
)
if test_profile == "apache_spark":
- spark_ctr, spark_host = await get_spark_container(client)
+ spark_ctr, spark_host = get_spark_container(client)
tst_container = tst_container.with_service_binding(alias=spark_host, service=spark_ctr)
elif test_profile in ["databricks_cluster", "databricks_sql_endpoint"]:
- tst_container = tst_container.with_exec("./scripts/configure_odbc.sh")
+ tst_container = tst_container.with_exec(["./scripts/configure_odbc.sh"])
elif test_profile == "spark_session":
tst_container = tst_container.with_exec(["pip", "install", "pyspark"])
diff --git a/setup.py b/setup.py
index d45b787f2..c1b439190 100644
--- a/setup.py
+++ b/setup.py
@@ -73,6 +73,7 @@ def _get_dbt_core_version():
include_package_data=True,
install_requires=[
"sqlparams>=3.0.0",
+ "dbt-core~={}".format(dbt_core_version),
"dbt-common<1.0",
"dbt-adapters~=0.1.0a1",
],
diff --git a/tests/conftest.py b/tests/conftest.py
index 700ade4d3..efd309c6a 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -42,9 +42,9 @@ def apache_spark_target():
"user": "dbt",
"method": "thrift",
"port": 10000,
- "connect_retries": 3,
- "connect_timeout": 5,
- "retry_all": True,
+ "connect_retries": 2,
+ "connect_timeout": 3,
+ "retry_all": False,
}
@@ -59,7 +59,7 @@ def databricks_cluster_target():
"port": 443,
"connect_retries": 3,
"connect_timeout": 5,
- "retry_all": True,
+ "retry_all": False,
"user": os.getenv("DBT_DATABRICKS_USER"),
}
From 6edcdcfdbf097a8f8315d5f8eec2a49e99b5e6d7 Mon Sep 17 00:00:00 2001
From: Colin
Date: Wed, 24 Jan 2024 15:16:31 -0800
Subject: [PATCH 098/102] update requirements
---
dagger/requirements.txt | 2 +-
dev-requirements.txt | 3 +--
2 files changed, 2 insertions(+), 3 deletions(-)
diff --git a/dagger/requirements.txt b/dagger/requirements.txt
index df36543c2..b50c448d3 100644
--- a/dagger/requirements.txt
+++ b/dagger/requirements.txt
@@ -1,2 +1,2 @@
-dagger-io~=0.8.0
+dagger-io~=0.9.7
python-dotenv
diff --git a/dev-requirements.txt b/dev-requirements.txt
index e56b221c7..28a626fc3 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -1,7 +1,6 @@
# install latest changes in dbt-core
# TODO: how to automate switching from develop to version branches?
-git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core
-git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=tests/adapter
+git+https://github.com/dbt-labs/dbt-adapters.git#subdirectory=dbt-tests-adapter
# if version 1.x or greater -> pin to major version
# if version 0.x -> pin to minor
From eeba17f132b0ff24502436a72284d7a2d3cc036f Mon Sep 17 00:00:00 2001
From: Colin
Date: Wed, 24 Jan 2024 15:17:13 -0800
Subject: [PATCH 099/102] add cluster start fixture
---
tests/functional/conftest.py | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)
create mode 100644 tests/functional/conftest.py
diff --git a/tests/functional/conftest.py b/tests/functional/conftest.py
new file mode 100644
index 000000000..07419aa40
--- /dev/null
+++ b/tests/functional/conftest.py
@@ -0,0 +1,18 @@
+from multiprocessing import Lock
+
+import pytest
+
+_db_start_lock = Lock()
+_DB_CLUSTER_STARTED = False
+
+
+@pytest.fixture(scope="class", autouse=True)
+def start_databricks_cluster(project, request):
+ global _DB_CLUSTER_STARTED
+ profile_type = request.config.getoption("--profile")
+ with _db_start_lock:
+ if "databricks" in profile_type and not _DB_CLUSTER_STARTED:
+ print("Starting Databricks cluster")
+ project.run_sql("SELECT 1")
+
+ _DB_CLUSTER_STARTED = True
From f3a4c2d5e9ea921656306933a64018db6fbfbe3e Mon Sep 17 00:00:00 2001
From: Colin
Date: Wed, 24 Jan 2024 16:09:00 -0800
Subject: [PATCH 100/102] update conftest.py
---
tests/conftest.py | 8 +++-----
tests/functional/conftest.py | 1 +
2 files changed, 4 insertions(+), 5 deletions(-)
diff --git a/tests/conftest.py b/tests/conftest.py
index efd309c6a..efba41a5f 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -87,11 +87,9 @@ def databricks_http_cluster_target():
"token": os.getenv("DBT_DATABRICKS_TOKEN"),
"method": "http",
"port": 443,
- # more retries + longer timout to handle unavailability while cluster is restarting
- # return failures quickly in dev, retry all failures in CI (up to 5 min)
- "connect_retries": 5,
- "connect_timeout": 60,
- "retry_all": bool(os.getenv("DBT_DATABRICKS_RETRY_ALL", False)),
+ "connect_retries": 3,
+ "connect_timeout": 5,
+ "retry_all": False,
"user": os.getenv("DBT_DATABRICKS_USER"),
}
diff --git a/tests/functional/conftest.py b/tests/functional/conftest.py
index 07419aa40..c1a0397bd 100644
--- a/tests/functional/conftest.py
+++ b/tests/functional/conftest.py
@@ -6,6 +6,7 @@
_DB_CLUSTER_STARTED = False
+# Running this should prevent tests from needing to be retried because the Databricks cluster isn't available
@pytest.fixture(scope="class", autouse=True)
def start_databricks_cluster(project, request):
global _DB_CLUSTER_STARTED
From 32c05bbd08e90f1245d9c77d00e063498484cbd7 Mon Sep 17 00:00:00 2001
From: Colin
Date: Wed, 24 Jan 2024 16:43:17 -0800
Subject: [PATCH 101/102] re-order dagger setup to reduce cache invalidation
---
dagger/run_dbt_spark_tests.py | 23 +++++++++++++----------
1 file changed, 13 insertions(+), 10 deletions(-)
diff --git a/dagger/run_dbt_spark_tests.py b/dagger/run_dbt_spark_tests.py
index 61eaed30c..436cb1e92 100644
--- a/dagger/run_dbt_spark_tests.py
+++ b/dagger/run_dbt_spark_tests.py
@@ -105,17 +105,16 @@ async def test_spark(test_args):
tst_container = (
client.container(platform=platform)
.from_("python:3.8-slim")
- .with_directory("/src", req_files)
- .with_directory("/src/dbt", dbt_spark_dir)
- .with_directory("/src/tests", test_dir)
- .with_directory("/src/scripts", scripts)
- .with_workdir("/src")
.with_mounted_cache("/var/cache/apt/archives", os_reqs_cache)
+ .with_mounted_cache("/root/.cache/pip", pip_cache)
+ # install OS deps first so any local changes don't invalidate the cache
+ .with_directory("/scripts", scripts)
.with_exec(["./scripts/install_os_reqs.sh"])
- )
-
- tst_container = (
- tst_container.with_mounted_cache("/root/.cache/pip", pip_cache)
+ # install dbt-spark + python deps
+ .with_directory("/src", req_files)
+ .with_directory("src/dbt", dbt_spark_dir)
+ .with_directory("src/tests", test_dir)
+ .with_workdir("/src")
.with_exec(["pip", "install", "-U", "pip"])
.with_exec(["pip", "install", "-r", "requirements.txt"])
.with_exec(["pip", "install", "-r", "dev-requirements.txt"])
@@ -127,7 +126,11 @@ async def test_spark(test_args):
tst_container = tst_container.with_service_binding(alias=spark_host, service=spark_ctr)
elif test_profile in ["databricks_cluster", "databricks_sql_endpoint"]:
- tst_container = tst_container.with_exec(["./scripts/configure_odbc.sh"])
+ tst_container = (
+ tst_container.with_workdir("/")
+ .with_exec(["./scripts/configure_odbc.sh"])
+ .with_workdir("/src")
+ )
elif test_profile == "spark_session":
tst_container = tst_container.with_exec(["pip", "install", "pyspark"])
From e8e4543a0e2c74a43179bcb916c719591f372968 Mon Sep 17 00:00:00 2001
From: Colin
Date: Wed, 24 Jan 2024 16:43:50 -0800
Subject: [PATCH 102/102] renove dbt-core version dependency version check
---
setup.py | 10 ----------
1 file changed, 10 deletions(-)
diff --git a/setup.py b/setup.py
index c1b439190..2d6e00e53 100644
--- a/setup.py
+++ b/setup.py
@@ -39,17 +39,8 @@ def _get_plugin_version_dict():
return match.groupdict()
-# require a compatible minor version (~=), prerelease if this is a prerelease
-def _get_dbt_core_version():
- parts = _get_plugin_version_dict()
- minor = "{major}.{minor}.0".format(**parts)
- pre = parts["prekind"] + "1" if parts["prekind"] else ""
- return f"{minor}{pre}"
-
-
package_name = "dbt-spark"
package_version = "1.8.0a1"
-dbt_core_version = _get_dbt_core_version()
description = """The Apache Spark adapter plugin for dbt"""
odbc_extras = ["pyodbc~=4.0.39"]
@@ -73,7 +64,6 @@ def _get_dbt_core_version():
include_package_data=True,
install_requires=[
"sqlparams>=3.0.0",
- "dbt-core~={}".format(dbt_core_version),
"dbt-common<1.0",
"dbt-adapters~=0.1.0a1",
],