-
Notifications
You must be signed in to change notification settings - Fork 22
232 lines (221 loc) · 14.5 KB
/
build.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
name: Build
on:
workflow_dispatch:
push:
branches:
- '!master'
# - '*' # matches every branch that doesn't contain a '/'
# - '*/*' # matches every branch containing a single '/'
# - '**' # matches every branch
# - '!master' # excludes master
pull_request:
types: [opened, synchronize, reopened, ready_for_review]
branches:
- '**'
jobs:
test:
runs-on: ubuntu-latest
steps:
- name: Env
run: |
echo "SONATYPE_USERNAME=${{ secrets.SONATYPE_USERNAME }}" >> $GITHUB_ENV
echo "SONATYPE_PASSWORD=${{ secrets.SONATYPE_PASSWORD }}" >> $GITHUB_ENV
echo "GITHUB_TOKEN=${{ secrets.GITHUB_TOKEN }}" >> $GITHUB_ENV
echo "REDSHIFT_DATABASE=${{ secrets.REDSHIFT_DATABASE }}" >> $GITHUB_ENV
echo "REDSHIFT_HOST=${{ secrets.REDSHIFT_HOST }}" >> $GITHUB_ENV
echo "REDSHIFT_PASSWORD=${{ secrets.REDSHIFT_PASSWORD }}" >> $GITHUB_ENV
echo "REDSHIFT_USER=${{ secrets.REDSHIFT_USER }}" >> $GITHUB_ENV
echo "REDSHIFT_ROLE=${{ secrets.REDSHIFT_ROLE }}" >> $GITHUB_ENV
echo "SNOWFLAKE_ACCOUNT=${{ secrets.SNOWFLAKE_ACCOUNT }}" >> $GITHUB_ENV
echo "SNOWFLAKE_DB=${{ secrets.SNOWFLAKE_DB }}" >> $GITHUB_ENV
echo "SNOWFLAKE_PASSWORD=${{ secrets.SNOWFLAKE_PASSWORD }}" >> $GITHUB_ENV
echo "SNOWFLAKE_USER=${{ secrets.SNOWFLAKE_USER }}" >> $GITHUB_ENV
echo "SNOWFLAKE_WAREHOUSE=${{ secrets.SNOWFLAKE_WAREHOUSE }}" >> $GITHUB_ENV
echo "AWS_ACCESS_KEY_ID=${{ secrets.AWS_ACCESS_KEY_ID }}" >> $GITHUB_ENV
echo "AWS_SECRET_ACCESS_KEY=${{ secrets.AWS_SECRET_ACCESS_KEY }}" >> $GITHUB_ENV
echo "AWS_ACCOUNT_ID=${{ secrets.AWS_ACCOUNT_ID }}" >> $GITHUB_ENV
echo "GCP_PROJECT=${{ secrets.GCP_PROJECT }}" >> $GITHUB_ENV
echo "TEMPORARY_GCS_BUCKET=${{ secrets.TEMPORARY_GCS_BUCKET }}" >> $GITHUB_ENV
# - uses: actions/setup-python@v2
# with:
# python-version: '3.10'
- uses: actions/checkout@v2
- name: Install graphviz
run: sudo apt-get install -y graphviz
- name: Set up Zulu 11
uses: actions/setup-java@v2
with:
distribution: 'zulu' # See 'Supported distributions' for available options
java-version: '11'
# - uses: vemonet/setup-spark@v1
# with:
# spark-version: '3.3.2'
# hadoop-version: '3'
# - run: spark-submit --version
- id: 'auth'
uses: 'google-github-actions/auth@v1'
with:
credentials_json: '${{ secrets.GCP_SERVICE_ACCOUNT }}'
- name: Use gcloud CLI
if: ${{ github.event_name != 'pull_request' || github.event.action == 'ready_for_review' }}
run: gcloud info
- name: Run tests(config) & Coverage Report On Push
if: ${{ github.event_name != 'pull_request' || github.event.action == 'ready_for_review' }}
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" sbt ++2.13.14! coverage "testOnly ai.starlake.config.*" coverageReport
- name: Run tests(extract) & Coverage Report On Push
if: ${{ github.event_name != 'pull_request' || github.event.action == 'ready_for_review' }}
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" sbt ++2.13.14! coverage "testOnly ai.starlake.extract.*" coverageReport
- name: Run tests(integration.extract) & Coverage Report On Push
if: ${{ github.event_name != 'pull_request' }}
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" SL_SPARK_BIGQUERY_MATERIALIZATION_DATASET=SL_BQ_TEST_DS SL_ACCESS_POLICIES_PROJECT_ID=${{ env.GCP_PROJECT }} TEMPORARY_GCS_BUCKET=${{ env.TEMPORARY_GCS_BUCKET }} SL_REMOTE_TEST=true RELEASE_SONATYPE=false GITHUB_TOKEN=${{ env.GITHUB_TOKEN }} SONATYPE_USERNAME=${{ env.SONATYPE_USERNAME }} SONATYPE_PASSWORD=${{ env.SONATYPE_PASSWORD }} sbt ++2.13.14! coverage "testOnly ai.starlake.integration.extract.*" coverageReport
- name: Run tests(integration.load) & Coverage Report On Push
if: ${{ github.event_name != 'pull_request' }}
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" SL_SPARK_BIGQUERY_MATERIALIZATION_DATASET=SL_BQ_TEST_DS SL_ACCESS_POLICIES_PROJECT_ID=${{ env.GCP_PROJECT }} TEMPORARY_GCS_BUCKET=${{ env.TEMPORARY_GCS_BUCKET }} SL_REMOTE_TEST=true RELEASE_SONATYPE=false GITHUB_TOKEN=${{ env.GITHUB_TOKEN }} SONATYPE_USERNAME=${{ env.SONATYPE_USERNAME }} SONATYPE_PASSWORD=${{ env.SONATYPE_PASSWORD }} sbt ++2.13.14! coverage "testOnly ai.starlake.integration.load.*" coverageReport
- name: Run tests(integration.starbake) & Coverage Report On Push
if: ${{ github.event_name != 'pull_request' || github.event.action == 'ready_for_review' }}
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" SL_SPARK_BIGQUERY_MATERIALIZATION_DATASET=SL_BQ_TEST_DS SL_ACCESS_POLICIES_PROJECT_ID=${{ env.GCP_PROJECT }} TEMPORARY_GCS_BUCKET=${{ env.TEMPORARY_GCS_BUCKET }} SL_REMOTE_TEST=true RELEASE_SONATYPE=false GITHUB_TOKEN=${{ env.GITHUB_TOKEN }} SONATYPE_USERNAME=${{ env.SONATYPE_USERNAME }} SONATYPE_PASSWORD=${{ env.SONATYPE_PASSWORD }} sbt ++2.13.14! coverage "testOnly ai.starlake.integration.starbake.*" coverageReport
- name: Run tests(integration.transform) & Coverage Report On Push
if: ${{ github.event_name != 'pull_request' }}
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" SL_SPARK_BIGQUERY_MATERIALIZATION_DATASET=SL_BQ_TEST_DS SL_ACCESS_POLICIES_PROJECT_ID=${{ env.GCP_PROJECT }} TEMPORARY_GCS_BUCKET=${{ env.TEMPORARY_GCS_BUCKET }} SL_REMOTE_TEST=true RELEASE_SONATYPE=false GITHUB_TOKEN=${{ env.GITHUB_TOKEN }} SONATYPE_USERNAME=${{ env.SONATYPE_USERNAME }} SONATYPE_PASSWORD=${{ env.SONATYPE_PASSWORD }} sbt ++2.13.14! coverage "testOnly ai.starlake.integration.transform.*" coverageReport
- name: Run tests(integration.utils) & Coverage Report On Push
if: ${{ github.event_name != 'pull_request' }}
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" SL_SPARK_BIGQUERY_MATERIALIZATION_DATASET=SL_BQ_TEST_DS SL_ACCESS_POLICIES_PROJECT_ID=${{ env.GCP_PROJECT }} TEMPORARY_GCS_BUCKET=${{ env.TEMPORARY_GCS_BUCKET }} SL_REMOTE_TEST=true RELEASE_SONATYPE=false GITHUB_TOKEN=${{ env.GITHUB_TOKEN }} SONATYPE_USERNAME=${{ env.SONATYPE_USERNAME }} SONATYPE_PASSWORD=${{ env.SONATYPE_PASSWORD }} sbt ++2.13.14! coverage "testOnly ai.starlake.integration.utils.*" coverageReport
- name: Run tests(job.boostrap) & Coverage Report On Push
if: ${{ github.event_name != 'pull_request' || github.event.action == 'ready_for_review' }}
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" sbt ++2.13.14! coverage "testOnly ai.starlake.job.bootstrap.*" coverageReport
- name: Run tests(job.connections) & Coverage Report On Push
if: ${{ github.event_name != 'pull_request' || github.event.action == 'ready_for_review' }}
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" sbt ++2.13.14! coverage "testOnly ai.starlake.job.connections.*" coverageReport
- name: Run tests(job.convert) & Coverage Report On Push
if: ${{ github.event_name != 'pull_request' || github.event.action == 'ready_for_review' }}
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" sbt ++2.13.14! coverage "testOnly ai.starlake.job.convert.*" coverageReport
- name: Run tests(job.infer) & Coverage Report On Push
if: ${{ github.event_name != 'pull_request' || github.event.action == 'ready_for_review' }}
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" sbt ++2.13.14! coverage "testOnly ai.starlake.job.infer.*" coverageReport
- name: Run tests(job.ingest) & Coverage Report On Push
if: ${{ github.event_name != 'pull_request' || github.event.action == 'ready_for_review' }}
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" sbt ++2.13.14! coverage "testOnly ai.starlake.job.ingest.*" coverageReport
- name: Run tests(job.kafka) & Coverage Report On Push
if: ${{ github.event_name != 'pull_request' || github.event.action == 'ready_for_review' }}
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" sbt ++2.13.14! coverage "testOnly ai.starlake.job.kafka.*" coverageReport
- name: Run tests(job.load) & Coverage Report On Push
if: ${{ github.event_name != 'pull_request' || github.event.action == 'ready_for_review' }}
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" sbt ++2.13.14! coverage "testOnly ai.starlake.job.load.*" coverageReport
- name: Run tests(job.metrics) & Coverage Report On Push
if: ${{ github.event_name != 'pull_request' }}
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" sbt ++2.13.14! coverage "testOnly ai.starlake.job.metrics.*" coverageReport
- name: Run tests(job.sink) & Coverage Report On Push
if: ${{ github.event_name != 'pull_request' || github.event.action == 'ready_for_review' }}
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" sbt ++2.13.14! coverage "testOnly ai.starlake.job.sink.*" coverageReport
- name: Run tests(job.strategies) & Coverage Report On Push
if: ${{ github.event_name != 'pull_request' || github.event.action == 'ready_for_review' }}
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" sbt ++2.13.14! coverage "testOnly ai.starlake.job.strategies.*" coverageReport
- name: Run tests(job.transform) & Coverage Report On Push
if: ${{ github.event_name != 'pull_request' || github.event.action == 'ready_for_review' }}
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" sbt ++2.13.14! coverage "testOnly ai.starlake.job.transform*" coverageReport
- name: Run tests(privacy) & Coverage Report On Push
if: ${{ github.event_name != 'pull_request' || github.event.action == 'ready_for_review' }}
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" sbt ++2.13.14! coverage "testOnly ai.starlake.privacy.*" coverageReport
- name: Run tests(schema) & Coverage Report On Push
if: ${{ github.event_name != 'pull_request' || github.event.action == 'ready_for_review' }}
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" sbt ++2.13.14! coverage "testOnly ai.starlake.schema.*" coverageReport
- name: Run tests(serve) & Coverage Report On Push
if: ${{ github.event_name != 'pull_request' || github.event.action == 'ready_for_review'}}
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" sbt ++2.13.14! coverage "testOnly ai.starlake.serve.*" coverageReport
- name: Run tests(sql) & Coverage Report On Push
if: ${{ github.event_name != 'pull_request' || github.event.action == 'ready_for_review' }}
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" sbt ++2.13.14! coverage "testOnly ai.starlake.sql.*" coverageReport
- name: Run tests(udf) & Coverage Report On Push
if: ${{ github.event_name != 'pull_request' || github.event.action == 'ready_for_review' }}
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" sbt ++2.13.14! coverage "testOnly ai.starlake.udf.*" coverageReport
- name: Run tests(utils) & Coverage Report On Push
if: ${{ github.event_name != 'pull_request' || github.event.action == 'ready_for_review' }}
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" sbt ++2.13.14! coverage "testOnly ai.starlake.utils.*" coverageReport
- name: Run tests(workflow) & Coverage Report On Push
if: ${{ github.event_name != 'pull_request' || github.event.action == 'ready_for_review' }}
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" sbt ++2.13.14! coverage "testOnly ai.starlake.workflow.*" coverageReport
- name: Upload coverage to Codecov
if: ${{ github.event_name != 'pull_request' }}
uses: codecov/codecov-action@v1
with:
file: target/scala-2.13/scoverage-report/scoverage.xml
flags: unittests
fail_ci_if_error: true
verbose: true
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Set up Zulu 11
uses: actions/setup-java@v2
with:
distribution: 'zulu' # See 'Supported distributions' for available options
java-version: '11'
- name: Formatting
run: sbt scalafmtSbtCheck scalafmtCheck test:scalafmtCheck
docker:
needs: test
name: Test Docker Image
runs-on: ubuntu-latest
strategy:
fail-fast: false
steps:
- name: Checkout
id: vcs
uses: actions/checkout@v4
- name: Env
run: |
cat .versions >> $GITHUB_ENV
source .versions
FIRST_LINE=$(head -n 1 version.sbt)
SL_VERSION=$(echo "$FIRST_LINE" | sed -E 's/.*version := "([0-9]+\.[0-9]+\.?.*)"/\1/')
echo "SL_VERSION=${SL_VERSION}" >> $GITHUB_ENV
SL_MAJOR_MINOR_VERSION=$(echo "${SL_VERSION}" | cut -d'.' -f1-2)
echo "SL_MAJOR_MINOR_VERSION=${SL_MAJOR_MINOR_VERSION}" >> $GITHUB_ENV
REGISTRY_IMAGE=starlakeai/starlake
echo "REGISTRY_IMAGE=${REGISTRY_IMAGE}" >> $GITHUB_ENV
REGISTRY_IMAGE_LATEST=${REGISTRY_IMAGE}:${SL_VERSION:-latest}
echo "REGISTRY_IMAGE_LATEST=${REGISTRY_IMAGE_LATEST}" >> $GITHUB_ENV
- name: Prepare
run: |
./scripts/docker-prepare.sh -b
- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY_IMAGE }}
labels: |
org.opencontainers.image.vendor=starlakeai
org.opencontainers.image.licenses=Apache-2.0
org.opencontainers.image.title=starlakeai/starlake
org.opencontainers.image.description="A declarative text based tool that enables analysts and engineers to extract, load, transform and orchestrate their data pipelines."
org.opencontainers.image.url=https://starlake.ai
# generate Docker tags based on the following events/attributes
tags: |
type=schedule
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
type=semver,pattern={{major}}
# type=sha
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Build and export to Docker
uses: docker/build-push-action@v6
with:
context: "./distrib/docker"
pull: true
load: true
tags: ${{ steps.meta.outputs.tags }},${{ env.REGISTRY_IMAGE_LATEST }}
labels: ${{ steps.meta.outputs.labels }}
build-args: |
BUILD_DATE=${{ env.BUILD_DATE }}
VCS_REF=${{ steps.vcs.outputs.commit }}
SL_VERSION=${{ env.SL_VERSION }}
- name: Inspect image
run: |
docker image inspect --format='{{json .Config.Labels}}' ${{ env.REGISTRY_IMAGE_LATEST }}
- name: Test
run: |
docker run --rm ${{ env.REGISTRY_IMAGE_LATEST }} help