add generic python orchestration classes to generate dags #1773
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Build | |
on: | |
workflow_dispatch: | |
push: | |
branches: | |
- '!master' | |
# - '*' # matches every branch that doesn't contain a '/' | |
# - '*/*' # matches every branch containing a single '/' | |
# - '**' # matches every branch | |
# - '!master' # excludes master | |
pull_request: | |
types: [opened, synchronize, reopened, ready_for_review] | |
branches: | |
- '**' | |
jobs: | |
test: | |
runs-on: ubuntu-latest | |
steps: | |
- name: Env | |
run: | | |
echo "SONATYPE_USERNAME=${{ secrets.SONATYPE_USERNAME }}" >> $GITHUB_ENV | |
echo "SONATYPE_PASSWORD=${{ secrets.SONATYPE_PASSWORD }}" >> $GITHUB_ENV | |
echo "GITHUB_TOKEN=${{ secrets.GITHUB_TOKEN }}" >> $GITHUB_ENV | |
echo "REDSHIFT_DATABASE=${{ secrets.REDSHIFT_DATABASE }}" >> $GITHUB_ENV | |
echo "REDSHIFT_HOST=${{ secrets.REDSHIFT_HOST }}" >> $GITHUB_ENV | |
echo "REDSHIFT_PASSWORD=${{ secrets.REDSHIFT_PASSWORD }}" >> $GITHUB_ENV | |
echo "REDSHIFT_USER=${{ secrets.REDSHIFT_USER }}" >> $GITHUB_ENV | |
echo "REDSHIFT_ROLE=${{ secrets.REDSHIFT_ROLE }}" >> $GITHUB_ENV | |
echo "SNOWFLAKE_ACCOUNT=${{ secrets.SNOWFLAKE_ACCOUNT }}" >> $GITHUB_ENV | |
echo "SNOWFLAKE_DB=${{ secrets.SNOWFLAKE_DB }}" >> $GITHUB_ENV | |
echo "SNOWFLAKE_PASSWORD=${{ secrets.SNOWFLAKE_PASSWORD }}" >> $GITHUB_ENV | |
echo "SNOWFLAKE_USER=${{ secrets.SNOWFLAKE_USER }}" >> $GITHUB_ENV | |
echo "SNOWFLAKE_WAREHOUSE=${{ secrets.SNOWFLAKE_WAREHOUSE }}" >> $GITHUB_ENV | |
echo "AWS_ACCESS_KEY_ID=${{ secrets.AWS_ACCESS_KEY_ID }}" >> $GITHUB_ENV | |
echo "AWS_SECRET_ACCESS_KEY=${{ secrets.AWS_SECRET_ACCESS_KEY }}" >> $GITHUB_ENV | |
echo "AWS_ACCOUNT_ID=${{ secrets.AWS_ACCOUNT_ID }}" >> $GITHUB_ENV | |
echo "GCP_PROJECT=${{ secrets.GCP_PROJECT }}" >> $GITHUB_ENV | |
echo "TEMPORARY_GCS_BUCKET=${{ secrets.TEMPORARY_GCS_BUCKET }}" >> $GITHUB_ENV | |
# - uses: actions/setup-python@v2 | |
# with: | |
# python-version: '3.10' | |
- uses: actions/checkout@v2 | |
- name: Install graphviz | |
run: sudo apt-get install -y graphviz | |
- name: Set up Zulu 11 | |
uses: actions/setup-java@v2 | |
with: | |
distribution: 'zulu' # See 'Supported distributions' for available options | |
java-version: '11' | |
# - uses: vemonet/setup-spark@v1 | |
# with: | |
# spark-version: '3.3.2' | |
# hadoop-version: '3' | |
# - run: spark-submit --version | |
- id: 'auth' | |
uses: 'google-github-actions/auth@v1' | |
with: | |
credentials_json: '${{ secrets.GCP_SERVICE_ACCOUNT }}' | |
- name: Use gcloud CLI | |
if: ${{ github.event_name != 'pull_request' || github.event.action == 'ready_for_review' }} | |
run: gcloud info | |
- name: Run tests(config) & Coverage Report On Push | |
if: ${{ github.event_name != 'pull_request' || github.event.action == 'ready_for_review' }} | |
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" sbt ++2.13.14! coverage "testOnly ai.starlake.config.*" coverageReport | |
- name: Run tests(extract) & Coverage Report On Push | |
if: ${{ github.event_name != 'pull_request' || github.event.action == 'ready_for_review' }} | |
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" sbt ++2.13.14! coverage "testOnly ai.starlake.extract.*" coverageReport | |
- name: Run tests(integration.extract) & Coverage Report On Push | |
if: ${{ github.event_name != 'pull_request' }} | |
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" SL_SPARK_BIGQUERY_MATERIALIZATION_DATASET=SL_BQ_TEST_DS SL_ACCESS_POLICIES_PROJECT_ID=${{ env.GCP_PROJECT }} TEMPORARY_GCS_BUCKET=${{ env.TEMPORARY_GCS_BUCKET }} SL_REMOTE_TEST=true RELEASE_SONATYPE=false GITHUB_TOKEN=${{ env.GITHUB_TOKEN }} SONATYPE_USERNAME=${{ env.SONATYPE_USERNAME }} SONATYPE_PASSWORD=${{ env.SONATYPE_PASSWORD }} sbt ++2.13.14! coverage "testOnly ai.starlake.integration.extract.*" coverageReport | |
- name: Run tests(integration.load) & Coverage Report On Push | |
if: ${{ github.event_name != 'pull_request' }} | |
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" SL_SPARK_BIGQUERY_MATERIALIZATION_DATASET=SL_BQ_TEST_DS SL_ACCESS_POLICIES_PROJECT_ID=${{ env.GCP_PROJECT }} TEMPORARY_GCS_BUCKET=${{ env.TEMPORARY_GCS_BUCKET }} SL_REMOTE_TEST=true RELEASE_SONATYPE=false GITHUB_TOKEN=${{ env.GITHUB_TOKEN }} SONATYPE_USERNAME=${{ env.SONATYPE_USERNAME }} SONATYPE_PASSWORD=${{ env.SONATYPE_PASSWORD }} sbt ++2.13.14! coverage "testOnly ai.starlake.integration.load.*" coverageReport | |
- name: Run tests(integration.starbake) & Coverage Report On Push | |
if: ${{ github.event_name != 'pull_request' || github.event.action == 'ready_for_review' }} | |
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" SL_SPARK_BIGQUERY_MATERIALIZATION_DATASET=SL_BQ_TEST_DS SL_ACCESS_POLICIES_PROJECT_ID=${{ env.GCP_PROJECT }} TEMPORARY_GCS_BUCKET=${{ env.TEMPORARY_GCS_BUCKET }} SL_REMOTE_TEST=true RELEASE_SONATYPE=false GITHUB_TOKEN=${{ env.GITHUB_TOKEN }} SONATYPE_USERNAME=${{ env.SONATYPE_USERNAME }} SONATYPE_PASSWORD=${{ env.SONATYPE_PASSWORD }} sbt ++2.13.14! coverage "testOnly ai.starlake.integration.starbake.*" coverageReport | |
- name: Run tests(integration.transform) & Coverage Report On Push | |
if: ${{ github.event_name != 'pull_request' }} | |
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" SL_SPARK_BIGQUERY_MATERIALIZATION_DATASET=SL_BQ_TEST_DS SL_ACCESS_POLICIES_PROJECT_ID=${{ env.GCP_PROJECT }} TEMPORARY_GCS_BUCKET=${{ env.TEMPORARY_GCS_BUCKET }} SL_REMOTE_TEST=true RELEASE_SONATYPE=false GITHUB_TOKEN=${{ env.GITHUB_TOKEN }} SONATYPE_USERNAME=${{ env.SONATYPE_USERNAME }} SONATYPE_PASSWORD=${{ env.SONATYPE_PASSWORD }} sbt ++2.13.14! coverage "testOnly ai.starlake.integration.transform.*" coverageReport | |
- name: Run tests(integration.utils) & Coverage Report On Push | |
if: ${{ github.event_name != 'pull_request' }} | |
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" SL_SPARK_BIGQUERY_MATERIALIZATION_DATASET=SL_BQ_TEST_DS SL_ACCESS_POLICIES_PROJECT_ID=${{ env.GCP_PROJECT }} TEMPORARY_GCS_BUCKET=${{ env.TEMPORARY_GCS_BUCKET }} SL_REMOTE_TEST=true RELEASE_SONATYPE=false GITHUB_TOKEN=${{ env.GITHUB_TOKEN }} SONATYPE_USERNAME=${{ env.SONATYPE_USERNAME }} SONATYPE_PASSWORD=${{ env.SONATYPE_PASSWORD }} sbt ++2.13.14! coverage "testOnly ai.starlake.integration.utils.*" coverageReport | |
- name: Run tests(job.boostrap) & Coverage Report On Push | |
if: ${{ github.event_name != 'pull_request' || github.event.action == 'ready_for_review' }} | |
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" sbt ++2.13.14! coverage "testOnly ai.starlake.job.bootstrap.*" coverageReport | |
- name: Run tests(job.connections) & Coverage Report On Push | |
if: ${{ github.event_name != 'pull_request' || github.event.action == 'ready_for_review' }} | |
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" sbt ++2.13.14! coverage "testOnly ai.starlake.job.connections.*" coverageReport | |
- name: Run tests(job.convert) & Coverage Report On Push | |
if: ${{ github.event_name != 'pull_request' || github.event.action == 'ready_for_review' }} | |
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" sbt ++2.13.14! coverage "testOnly ai.starlake.job.convert.*" coverageReport | |
- name: Run tests(job.infer) & Coverage Report On Push | |
if: ${{ github.event_name != 'pull_request' || github.event.action == 'ready_for_review' }} | |
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" sbt ++2.13.14! coverage "testOnly ai.starlake.job.infer.*" coverageReport | |
- name: Run tests(job.ingest) & Coverage Report On Push | |
if: ${{ github.event_name != 'pull_request' || github.event.action == 'ready_for_review' }} | |
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" sbt ++2.13.14! coverage "testOnly ai.starlake.job.ingest.*" coverageReport | |
- name: Run tests(job.kafka) & Coverage Report On Push | |
if: ${{ github.event_name != 'pull_request' || github.event.action == 'ready_for_review' }} | |
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" sbt ++2.13.14! coverage "testOnly ai.starlake.job.kafka.*" coverageReport | |
- name: Run tests(job.load) & Coverage Report On Push | |
if: ${{ github.event_name != 'pull_request' || github.event.action == 'ready_for_review' }} | |
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" sbt ++2.13.14! coverage "testOnly ai.starlake.job.load.*" coverageReport | |
- name: Run tests(job.metrics) & Coverage Report On Push | |
if: ${{ github.event_name != 'pull_request' }} | |
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" sbt ++2.13.14! coverage "testOnly ai.starlake.job.metrics.*" coverageReport | |
- name: Run tests(job.sink) & Coverage Report On Push | |
if: ${{ github.event_name != 'pull_request' || github.event.action == 'ready_for_review' }} | |
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" sbt ++2.13.14! coverage "testOnly ai.starlake.job.sink.*" coverageReport | |
- name: Run tests(job.strategies) & Coverage Report On Push | |
if: ${{ github.event_name != 'pull_request' || github.event.action == 'ready_for_review' }} | |
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" sbt ++2.13.14! coverage "testOnly ai.starlake.job.strategies.*" coverageReport | |
- name: Run tests(job.transform) & Coverage Report On Push | |
if: ${{ github.event_name != 'pull_request' || github.event.action == 'ready_for_review' }} | |
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" sbt ++2.13.14! coverage "testOnly ai.starlake.job.transform*" coverageReport | |
- name: Run tests(privacy) & Coverage Report On Push | |
if: ${{ github.event_name != 'pull_request' || github.event.action == 'ready_for_review' }} | |
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" sbt ++2.13.14! coverage "testOnly ai.starlake.privacy.*" coverageReport | |
- name: Run tests(schema) & Coverage Report On Push | |
if: ${{ github.event_name != 'pull_request' || github.event.action == 'ready_for_review' }} | |
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" sbt ++2.13.14! coverage "testOnly ai.starlake.schema.*" coverageReport | |
- name: Run tests(serve) & Coverage Report On Push | |
if: ${{ github.event_name != 'pull_request' || github.event.action == 'ready_for_review'}} | |
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" sbt ++2.13.14! coverage "testOnly ai.starlake.serve.*" coverageReport | |
- name: Run tests(sql) & Coverage Report On Push | |
if: ${{ github.event_name != 'pull_request' || github.event.action == 'ready_for_review' }} | |
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" sbt ++2.13.14! coverage "testOnly ai.starlake.sql.*" coverageReport | |
- name: Run tests(udf) & Coverage Report On Push | |
if: ${{ github.event_name != 'pull_request' || github.event.action == 'ready_for_review' }} | |
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" sbt ++2.13.14! coverage "testOnly ai.starlake.udf.*" coverageReport | |
- name: Run tests(utils) & Coverage Report On Push | |
if: ${{ github.event_name != 'pull_request' || github.event.action == 'ready_for_review' }} | |
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" sbt ++2.13.14! coverage "testOnly ai.starlake.utils.*" coverageReport | |
- name: Run tests(workflow) & Coverage Report On Push | |
if: ${{ github.event_name != 'pull_request' || github.event.action == 'ready_for_review' }} | |
run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g" sbt ++2.13.14! coverage "testOnly ai.starlake.workflow.*" coverageReport | |
- name: Upload coverage to Codecov | |
if: ${{ github.event_name != 'pull_request' }} | |
uses: codecov/codecov-action@v1 | |
with: | |
file: target/scala-2.13/scoverage-report/scoverage.xml | |
flags: unittests | |
fail_ci_if_error: true | |
verbose: true | |
lint: | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v2 | |
- name: Set up Zulu 11 | |
uses: actions/setup-java@v2 | |
with: | |
distribution: 'zulu' # See 'Supported distributions' for available options | |
java-version: '11' | |
- name: Formatting | |
run: sbt scalafmtSbtCheck scalafmtCheck test:scalafmtCheck | |
docker: | |
needs: test | |
name: Test Docker Image | |
runs-on: ubuntu-latest | |
strategy: | |
fail-fast: false | |
steps: | |
- name: Checkout | |
id: vcs | |
uses: actions/checkout@v4 | |
- name: Env | |
run: | | |
cat .versions >> $GITHUB_ENV | |
source .versions | |
FIRST_LINE=$(head -n 1 version.sbt) | |
SL_VERSION=$(echo "$FIRST_LINE" | sed -E 's/.*version := "([0-9]+\.[0-9]+\.?.*)"/\1/') | |
echo "SL_VERSION=${SL_VERSION}" >> $GITHUB_ENV | |
SL_MAJOR_MINOR_VERSION=$(echo "${SL_VERSION}" | cut -d'.' -f1-2) | |
echo "SL_MAJOR_MINOR_VERSION=${SL_MAJOR_MINOR_VERSION}" >> $GITHUB_ENV | |
REGISTRY_IMAGE=starlakeai/starlake | |
echo "REGISTRY_IMAGE=${REGISTRY_IMAGE}" >> $GITHUB_ENV | |
REGISTRY_IMAGE_LATEST=${REGISTRY_IMAGE}:${SL_VERSION:-latest} | |
echo "REGISTRY_IMAGE_LATEST=${REGISTRY_IMAGE_LATEST}" >> $GITHUB_ENV | |
- name: Prepare | |
run: | | |
./scripts/docker-prepare.sh -b | |
- name: Docker meta | |
id: meta | |
uses: docker/metadata-action@v5 | |
with: | |
images: ${{ env.REGISTRY_IMAGE }} | |
labels: | | |
org.opencontainers.image.vendor=starlakeai | |
org.opencontainers.image.licenses=Apache-2.0 | |
org.opencontainers.image.title=starlakeai/starlake | |
org.opencontainers.image.description="A declarative text based tool that enables analysts and engineers to extract, load, transform and orchestrate their data pipelines." | |
org.opencontainers.image.url=https://starlake.ai | |
# generate Docker tags based on the following events/attributes | |
tags: | | |
type=schedule | |
type=semver,pattern={{version}} | |
type=semver,pattern={{major}}.{{minor}} | |
type=semver,pattern={{major}} | |
# type=sha | |
- name: Set up QEMU | |
uses: docker/setup-qemu-action@v3 | |
- name: Set up Docker Buildx | |
uses: docker/setup-buildx-action@v3 | |
- name: Build and export to Docker | |
uses: docker/build-push-action@v6 | |
with: | |
context: "./distrib/docker" | |
pull: true | |
load: true | |
tags: ${{ steps.meta.outputs.tags }},${{ env.REGISTRY_IMAGE_LATEST }} | |
labels: ${{ steps.meta.outputs.labels }} | |
build-args: | | |
BUILD_DATE=${{ env.BUILD_DATE }} | |
VCS_REF=${{ steps.vcs.outputs.commit }} | |
SL_VERSION=${{ env.SL_VERSION }} | |
- name: Inspect image | |
run: | | |
docker image inspect --format='{{json .Config.Labels}}' ${{ env.REGISTRY_IMAGE_LATEST }} | |
- name: Test | |
run: | | |
docker run --rm ${{ env.REGISTRY_IMAGE_LATEST }} help |