From a211d144b36467474c6b958b39eca22118e2ae03 Mon Sep 17 00:00:00 2001 From: szalai1 Date: Thu, 22 Dec 2022 15:33:19 +0100 Subject: [PATCH 1/4] feat(CI): add venv caching --- .github/workflows/docker-unified.yml | 14 ++++++++++++++ .github/workflows/metadata-ingestion.yml | 16 ++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/.github/workflows/docker-unified.yml b/.github/workflows/docker-unified.yml index b574df8b42da6..279e3155f4f1b 100644 --- a/.github/workflows/docker-unified.yml +++ b/.github/workflows/docker-unified.yml @@ -477,6 +477,20 @@ jobs: if: ${{ needs.setup.outputs.publish != 'true' }} with: image: ${{ env.DATAHUB_MAE_CONSUMER_IMAGE }}:${{ needs.setup.outputs.unique_tag }} + - name: Calculate pip install plan + id: "packages_checksum" + run: | + pip install pip -U # only 22.2 and above contains the --dry-run flag + + # only the last line of the output is the packages that will be installed + pip install --dry-run -r ./smoke/requirements.txt | tail -n 1 > /tmp/would_be_installed.txt + CHECKSUM=$(shasum /tmp/would_be_installed.txt | awk '{print $1}') + echo "packages_checksum=$CHECKSUM" >> $GITHUB_OUTPUT + - uses: actions/cache@v3 + id: cache-venv + with: + path: ./smoke-test/venv/ + key: smoke-venv-${{ runner.os }}-${{ steps.packages_checksum.outputs.packages_checksum }} - name: Smoke test env: DATAHUB_VERSION: ${{ needs.setup.outputs.unique_tag }} diff --git a/.github/workflows/metadata-ingestion.yml b/.github/workflows/metadata-ingestion.yml index 043a32a4ce06f..01d9f4cab9705 100644 --- a/.github/workflows/metadata-ingestion.yml +++ b/.github/workflows/metadata-ingestion.yml @@ -55,6 +55,22 @@ jobs: hadoop-version: "3.2" - name: Install dependencies run: ./metadata-ingestion/scripts/install_deps.sh + - name: Calculate pip install plan + id: "packages_checksum" + run: | + cd metadata-ingestion + pip install pip -U # only 22.2 and above contains the --dry-run flag + + # only the last line of the output is the packages that will be installed + pip install --dry-run -e . | tail -n 1 > /tmp/would_be_installed.txt + echo ${{ matrix.extraPythonRequirement }} --dry-run >> /tmp/would_be_installed.txt + CHECKSUM=$(shasum /tmp/would_be_installed.txt | awk '{print $1}') + echo "packages_checksum=$CHECKSUM" >> $GITHUB_OUTPUT + - uses: actions/cache@v3 + id: cache-venv + with: + path: ./metadata-ingestion/venv/ + key: ${{ runner.os }}-venv-${{ steps.packages_checksum.outputs.packages_checksum }}-${{ matrix.python-version }}-${{ matrix.command }} - name: Run metadata-ingestion tests (extras ${{ matrix.extraPythonRequirement }}) run: ./gradlew -Pextra_pip_requirements='${{ matrix.extraPythonRequirement }}' :metadata-ingestion:${{ matrix.command }} - name: pip freeze show list installed From c47c44f3a1d2cb79cab02123562a64dd5c7aeaaa Mon Sep 17 00:00:00 2001 From: szalai1 Date: Tue, 27 Dec 2022 15:19:39 +0100 Subject: [PATCH 2/4] Reqested changes --- .github/workflows/docker-unified.yml | 1 + .github/workflows/metadata-ingestion.yml | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docker-unified.yml b/.github/workflows/docker-unified.yml index 1b37924ccb1bd..ed6a0bbd67eb5 100644 --- a/.github/workflows/docker-unified.yml +++ b/.github/workflows/docker-unified.yml @@ -466,6 +466,7 @@ jobs: - uses: actions/setup-python@v4 with: python-version: "3.7" + cache: 'pip' - name: Pull images in background run: | docker pull acryldata/datahub-actions:head & diff --git a/.github/workflows/metadata-ingestion.yml b/.github/workflows/metadata-ingestion.yml index 01d9f4cab9705..87c53c4ccc884 100644 --- a/.github/workflows/metadata-ingestion.yml +++ b/.github/workflows/metadata-ingestion.yml @@ -49,6 +49,7 @@ jobs: - uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} + cache: 'pip' - uses: vemonet/setup-spark@v1 # spark is required for pyspark+pydeequ data lake profiling with: spark-version: "3.0.3" @@ -62,8 +63,7 @@ jobs: pip install pip -U # only 22.2 and above contains the --dry-run flag # only the last line of the output is the packages that will be installed - pip install --dry-run -e . | tail -n 1 > /tmp/would_be_installed.txt - echo ${{ matrix.extraPythonRequirement }} --dry-run >> /tmp/would_be_installed.txt + pip install --dry-run -e . ${{ matrix.extraPythonRequirement }} | tail -n 1 > /tmp/would_be_installed.txt CHECKSUM=$(shasum /tmp/would_be_installed.txt | awk '{print $1}') echo "packages_checksum=$CHECKSUM" >> $GITHUB_OUTPUT - uses: actions/cache@v3 From 7cf105b424bb3f3dfb9efee4086a977f492965ab Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Tue, 27 Dec 2022 14:33:15 -0500 Subject: [PATCH 3/4] Update .github/workflows/metadata-ingestion.yml --- .github/workflows/metadata-ingestion.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/metadata-ingestion.yml b/.github/workflows/metadata-ingestion.yml index 87c53c4ccc884..6c2266e41fddc 100644 --- a/.github/workflows/metadata-ingestion.yml +++ b/.github/workflows/metadata-ingestion.yml @@ -63,7 +63,7 @@ jobs: pip install pip -U # only 22.2 and above contains the --dry-run flag # only the last line of the output is the packages that will be installed - pip install --dry-run -e . ${{ matrix.extraPythonRequirement }} | tail -n 1 > /tmp/would_be_installed.txt + pip install --dry-run -e .[dev] ${{ matrix.extraPythonRequirement }} | tail -n 1 > /tmp/would_be_installed.txt CHECKSUM=$(shasum /tmp/would_be_installed.txt | awk '{print $1}') echo "packages_checksum=$CHECKSUM" >> $GITHUB_OUTPUT - uses: actions/cache@v3 From 63531a0b42bb9b8147fe50f163726ddadde9850a Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 28 Dec 2022 00:47:12 -0500 Subject: [PATCH 4/4] update ingestion job trigger paths --- .github/workflows/metadata-ingestion.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/metadata-ingestion.yml b/.github/workflows/metadata-ingestion.yml index 6c2266e41fddc..79b714791ff49 100644 --- a/.github/workflows/metadata-ingestion.yml +++ b/.github/workflows/metadata-ingestion.yml @@ -4,12 +4,14 @@ on: branches: - master paths: + - ".github/**" - "metadata-ingestion/**" - "metadata-models/**" pull_request: branches: - master paths: + - ".github/**" - "metadata-ingestion/**" - "metadata-models/**" release: