Skip to content

ETL Workflow

ETL Workflow #2

Workflow file for this run

name: ETL Workflow
on:
push:
branches:
- 'main'
paths:
- src/neuripsadc/etl**
workflow_dispatch:
permissions:
id-token: write
contents: read
jobs:
submit-etl-pipeline:
runs-on: ubuntu-latest
steps:
-
name: Setup pyenv
uses: "gabrielfalcao/pyenv-action@v18"
with:
default: 3.10.13
-
name: Checkout
uses: actions/checkout@v4
-
name: Build beam runner image
run: |
chmod +x scripts/trigger.sh
./scripts/trigger.sh -p ${{ secrets.PROJECT_ID }} \
-r ${{ secrets.REGION }} \
-t build-beam-image
-
id: gcp_auth
name: Authenticate to Google Cloud
uses: google-github-actions/[email protected]
with:
project_id: ${{ secrets.PROJECT_ID }}
workload_identity_provider: ${{ secrets.WIP }}
service_account: ${{ secrets.SERVICE_ACCOUNT }}
-
name: 'Set up Cloud SDK'
uses: 'google-github-actions/setup-gcloud@v2'
with:
version: '>= 363.0.0'
-
name: Install Poetry
run: |
curl -sSL https://install.python-poetry.org | python3 - --version 1.8.3
-
name: Create venv and install requirements
run: |
python3 -m venv env && source env/bin/activate
poetry install --without dev
-
name: Submit Pipeline
run: |
source env/bin/activate
run_pipeline --source=${{ secrets.BUCKET }}/raw \
--output=${{ secrets.BUCKET }}/primary \
--runner=${{ vars.RUNNER}} \
--project=${{ secrets.PROJECT_ID }} \
--region=${{ secrets.REGION }} \
--temp_location=${{ secrets.BUCKET }}/pipeline_root \
--experiments=use_runner_v2 \
--sdk_container_image=${{ secrets.REGION }}-docker.pkg.dev/${{ secrets.PROJECT_ID }}/${{ vars.IMAGE }}:beam-cpu