From c22b8c17751c2bdb379b7245a971f8b1868b280f Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Wed, 5 Jul 2023 16:05:03 -0400 Subject: [PATCH 01/31] Update 2.50 release notes to include new Kafka topicPattern feature --- CHANGES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.md b/CHANGES.md index b6df49a48cad..dac9654f2331 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -61,6 +61,7 @@ * Support for X source added (Java/Python) ([#X](https://github.com/apache/beam/issues/X)). * Python GCSIO is now implemented with GCP GCS Client instead of apitools ([#25676](https://github.com/apache/beam/issues/25676)) +* Java KafkaIO now supports picking up topics via topicPattern ([#26948](https://github.com/apache/beam/pull/26948)) ## New Features / Improvements From 6c9c28dd2f825ac6b96362b7435f2094bf107b59 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Thu, 31 Aug 2023 13:32:01 -0400 Subject: [PATCH 02/31] Create groovy class for io performance tests Create gradle task and github actions config for GCS using this. --- ...stCommit_Java_IO_GCS_Performance_Tests.yml | 117 ++++++++++++++++++ .../gradle/IoPerformanceTestUtilities.groovy | 45 +++++++ .../org/apache/beam/examples/WriteBQ.java | 73 +++++++++++ it/google-cloud-platform/build.gradle | 5 +- 4 files changed, 239 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml create mode 100644 buildSrc/src/main/groovy/org/apache/beam/gradle/IoPerformanceTestUtilities.groovy create mode 100644 examples/java/src/main/java/org/apache/beam/examples/WriteBQ.java diff --git a/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml b/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml new file mode 100644 index 000000000000..c94e884effa2 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml @@ -0,0 +1,117 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Java IO GCS Performance Tests + +on: + push: + tags: ['v*'] + branches: ['master', 'release-*'] + paths: ['it/google-cloud-platform/**','.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml'] + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.sender.login }}-${{ github.event.schedule }}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_IO_GCS_Performance_Tests: + if: | + github.event_name == 'push' || + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Java PostCommit IO GCS Performance Tests' + runs-on: [self-hosted, ubuntu-20.04, main] + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + strategy: + matrix: + job_name: ["beam_PostCommit_Java_IO_GCS_Performance_Tests"] + job_phrase: ["Run Java PostCommit IO GCS Performance Tests"] + steps: + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Checkout release branch + if: github.event_name == 'schedule' #This has scheduled runs run against the latest release + uses: actions/checkout@v3 + with: + ref: release-2.50.0 #TODO automate updating this + repository: apache/beam + - name: Checkout non-release branch + if: github.event_name != 'schedule' #This has triggered runs checkout the triggering branch + uses: actions/checkout@v3 + - name: Install Java + uses: actions/setup-java@v3.8.0 + with: + distribution: 'zulu' + java-version: '8' + - name: Setup Gradle + uses: gradle/gradle-build-action@v2 + with: + cache-read-only: false + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v1 + - name: Authenticate on GCP + uses: google-github-actions/setup-gcloud@v0 + with: + service_account_email: ${{ secrets.GCP_SA_EMAIL }} + service_account_key: ${{ secrets.GCP_SA_KEY }} + project_id: ${{ secrets.GCP_PROJECT_ID }} + export_default_credentials: true + - name: GCloud Docker credential helper + run: | + gcloud auth configure-docker us.gcr.io + - name: run scheduled javaPostcommitIOGCSPerformanceTests script + if: github.event_name == 'schedule' #This ensures only scheduled runs publish metrics publicly + with: + exportDataset: performance_tests + exportTable: io_performance_metrics + run: ./gradlew :it:google-cloud-platform:GCSPerformanceTest + env: + USER: github-actions + - name: run triggered javaPostcommitIOGCSPerformanceTests script + if: github.event_name != 'schedule' + run: ./gradlew :it:google-cloud-platform:GCSPerformanceTest + env: + USER: github-actions diff --git a/buildSrc/src/main/groovy/org/apache/beam/gradle/IoPerformanceTestUtilities.groovy b/buildSrc/src/main/groovy/org/apache/beam/gradle/IoPerformanceTestUtilities.groovy new file mode 100644 index 000000000000..c15243ebe338 --- /dev/null +++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/IoPerformanceTestUtilities.groovy @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.gradle + +import org.gradle.api.Project +import org.gradle.api.tasks.testing.Test + +import javax.inject.Inject + +class IoPerformanceTestUtilities { + abstract static class IoPerformanceTest extends Test { + @Inject + IoPerformanceTest(Project runningProject, String module, String testClass, Map systemProperties){ + group = "Verification" + description = "Runs IO Performance Test for $testClass" + outputs.upToDateWhen { false } + testClassesDirs = runningProject.findProject(":it:${module}").sourceSets.test.output.classesDirs + classpath = runningProject.sourceSets.test.runtimeClasspath + runningProject.findProject(":it:${module}").sourceSets.test.runtimeClasspath + + include "**/${testClass}.class" + + systemProperty 'exportDataset', System.getenv 'exportDataset' + systemProperty 'exportTable', System.getenv 'exportTable' + + for (entry in systemProperties){ + systemProperty entry.key, entry.value + } + } + } +} diff --git a/examples/java/src/main/java/org/apache/beam/examples/WriteBQ.java b/examples/java/src/main/java/org/apache/beam/examples/WriteBQ.java new file mode 100644 index 000000000000..6043118f558e --- /dev/null +++ b/examples/java/src/main/java/org/apache/beam/examples/WriteBQ.java @@ -0,0 +1,73 @@ +package org.apache.beam.examples; + +import com.google.api.services.bigquery.model.TableFieldSchema; +import com.google.api.services.bigquery.model.TableRow; +import com.google.api.services.bigquery.model.TableSchema; +import com.google.common.base.Splitter; +import org.apache.beam.sdk.Pipeline; +import org.apache.beam.sdk.io.GenerateSequence; +import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO; +import org.apache.beam.sdk.io.gcp.bigquery.BigQueryOptions; +import org.apache.beam.sdk.options.Description; +import org.apache.beam.sdk.options.PipelineOptions; +import org.apache.beam.sdk.options.PipelineOptionsFactory; +import org.apache.beam.sdk.transforms.Create; +import org.apache.beam.sdk.transforms.DoFn; +import org.apache.beam.sdk.transforms.ParDo; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class WriteBQ { + + public interface WriteBQOptions extends PipelineOptions, BigQueryOptions { + @Description("Table to write to") + String getTable(); + + void setTable(String value); + } + + public static void main(String[] args) { + + WriteBQOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(WriteBQOptions.class); + options.setTable("google.com:clouddfe:jjc_test.writebq2"); + options.setUseStorageWriteApi(true); + options.setStorageApiAppendThresholdRecordCount(100); + + Pipeline p = Pipeline.create(options); + + List fields = new ArrayList<>(); + fields.add(new TableFieldSchema().setName("name").setType("STRING")); + fields.add(new TableFieldSchema().setName("year").setType("INTEGER")); + fields.add(new TableFieldSchema().setName("country").setType("STRING")); + TableSchema schema = new TableSchema().setFields(fields); + + p + .apply(GenerateSequence.from(0).to(100_000)) + //Convert to TableRow + .apply("to TableRow", ParDo.of(new DoFn() { + @ProcessElement + public void processElement(ProcessContext c) { + TableRow row = new TableRow(); + + row.set("name", "name"); + row.set("year", c.element()); + row.set("country", "country"); + + c.output(row); + } + })) + // to BigQuery + // Using `writeTableRows` is slightly less performant than using write with `WithFormatFunction` + // due to the TableRow encoding. See `WriteWithFormatBQ` for an example. + .apply(BigQueryIO.writeTableRows() // Input type from prev stage is Row + .withSchema(schema) + .to(options.getTable()) + .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) + .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND)); + p.run(); + } +} diff --git a/it/google-cloud-platform/build.gradle b/it/google-cloud-platform/build.gradle index f43b3f25720b..48c7175187da 100644 --- a/it/google-cloud-platform/build.gradle +++ b/it/google-cloud-platform/build.gradle @@ -15,6 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +import org.apache.beam.gradle.IoPerformanceTestUtilities plugins { id 'org.apache.beam.module' } applyJavaNature( @@ -74,4 +75,6 @@ dependencies { testImplementation library.java.mockito_inline testRuntimeOnly project(path: ":runners:direct-java", configuration: "shadowTest") testRuntimeOnly library.java.slf4j_simple -} \ No newline at end of file +} + +tasks.register("GCSPerformanceTest", IoPerformanceTestUtilities.IoPerformanceTest, project, 'google-cloud-platform', 'FileBasedIOLT', ['configuration':'large','project':'apache-beam-testing', 'artifactBucket':'io-performance-temp']) \ No newline at end of file From 520c9d1637f09687fe8e6c9002892a879d75902e Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Thu, 31 Aug 2023 13:36:55 -0400 Subject: [PATCH 03/31] delete unnecessary class --- .../org/apache/beam/examples/WriteBQ.java | 73 ------------------- 1 file changed, 73 deletions(-) delete mode 100644 examples/java/src/main/java/org/apache/beam/examples/WriteBQ.java diff --git a/examples/java/src/main/java/org/apache/beam/examples/WriteBQ.java b/examples/java/src/main/java/org/apache/beam/examples/WriteBQ.java deleted file mode 100644 index 6043118f558e..000000000000 --- a/examples/java/src/main/java/org/apache/beam/examples/WriteBQ.java +++ /dev/null @@ -1,73 +0,0 @@ -package org.apache.beam.examples; - -import com.google.api.services.bigquery.model.TableFieldSchema; -import com.google.api.services.bigquery.model.TableRow; -import com.google.api.services.bigquery.model.TableSchema; -import com.google.common.base.Splitter; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.io.GenerateSequence; -import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO; -import org.apache.beam.sdk.io.gcp.bigquery.BigQueryOptions; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.transforms.Create; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.ParDo; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - -public class WriteBQ { - - public interface WriteBQOptions extends PipelineOptions, BigQueryOptions { - @Description("Table to write to") - String getTable(); - - void setTable(String value); - } - - public static void main(String[] args) { - - WriteBQOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(WriteBQOptions.class); - options.setTable("google.com:clouddfe:jjc_test.writebq2"); - options.setUseStorageWriteApi(true); - options.setStorageApiAppendThresholdRecordCount(100); - - Pipeline p = Pipeline.create(options); - - List fields = new ArrayList<>(); - fields.add(new TableFieldSchema().setName("name").setType("STRING")); - fields.add(new TableFieldSchema().setName("year").setType("INTEGER")); - fields.add(new TableFieldSchema().setName("country").setType("STRING")); - TableSchema schema = new TableSchema().setFields(fields); - - p - .apply(GenerateSequence.from(0).to(100_000)) - //Convert to TableRow - .apply("to TableRow", ParDo.of(new DoFn() { - @ProcessElement - public void processElement(ProcessContext c) { - TableRow row = new TableRow(); - - row.set("name", "name"); - row.set("year", c.element()); - row.set("country", "country"); - - c.output(row); - } - })) - // to BigQuery - // Using `writeTableRows` is slightly less performant than using write with `WithFormatFunction` - // due to the TableRow encoding. See `WriteWithFormatBQ` for an example. - .apply(BigQueryIO.writeTableRows() // Input type from prev stage is Row - .withSchema(schema) - .to(options.getTable()) - .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) - .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND)); - p.run(); - } -} From 062de236bc65fc7553c8ddb021a2f1174357d88a Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Thu, 31 Aug 2023 13:45:40 -0400 Subject: [PATCH 04/31] fix env call --- .../org/apache/beam/gradle/IoPerformanceTestUtilities.groovy | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/buildSrc/src/main/groovy/org/apache/beam/gradle/IoPerformanceTestUtilities.groovy b/buildSrc/src/main/groovy/org/apache/beam/gradle/IoPerformanceTestUtilities.groovy index c15243ebe338..844afd75f008 100644 --- a/buildSrc/src/main/groovy/org/apache/beam/gradle/IoPerformanceTestUtilities.groovy +++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/IoPerformanceTestUtilities.groovy @@ -34,8 +34,8 @@ class IoPerformanceTestUtilities { include "**/${testClass}.class" - systemProperty 'exportDataset', System.getenv 'exportDataset' - systemProperty 'exportTable', System.getenv 'exportTable' + systemProperty 'exportDataset', System.getenv('exportDataset') + systemProperty 'exportTable', System.getenv('exportTable') for (entry in systemProperties){ systemProperty entry.key, entry.value From 9c9f86bd4218f1effb20cd9db5f44fca70277466 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Thu, 31 Aug 2023 14:14:16 -0400 Subject: [PATCH 05/31] fix call to gradle --- .../beam_PostCommit_Java_IO_GCS_Performance_Tests.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml b/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml index c94e884effa2..3a0800b4bcd3 100644 --- a/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml +++ b/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml @@ -104,14 +104,17 @@ jobs: gcloud auth configure-docker us.gcr.io - name: run scheduled javaPostcommitIOGCSPerformanceTests script if: github.event_name == 'schedule' #This ensures only scheduled runs publish metrics publicly + uses: ./.github/actions/gradle-command-self-hosted-action with: + gradle-command: :it:google-cloud-platform:GCSPerformanceTest exportDataset: performance_tests exportTable: io_performance_metrics - run: ./gradlew :it:google-cloud-platform:GCSPerformanceTest env: USER: github-actions - name: run triggered javaPostcommitIOGCSPerformanceTests script if: github.event_name != 'schedule' - run: ./gradlew :it:google-cloud-platform:GCSPerformanceTest + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :it:google-cloud-platform:GCSPerformanceTest env: USER: github-actions From 925ce55318a90468e6493afd889699e64a9651e8 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Thu, 31 Aug 2023 14:32:23 -0400 Subject: [PATCH 06/31] run on hosted runner for testing --- .../beam_PostCommit_Java_IO_GCS_Performance_Tests.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml b/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml index 3a0800b4bcd3..a7e24ba1d445 100644 --- a/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml +++ b/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml @@ -59,7 +59,8 @@ jobs: github.event_name == 'workflow_dispatch' || github.event_name == 'schedule' || github.event.comment.body == 'Run Java PostCommit IO GCS Performance Tests' - runs-on: [self-hosted, ubuntu-20.04, main] +# runs-on: [self-hosted, ubuntu-20.04, main] + runs-on: ubuntu-20.04 name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) strategy: matrix: From 117ef8b1c196472b03a7bd17292eaaa0bbaa483d Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Thu, 31 Aug 2023 14:35:58 -0400 Subject: [PATCH 07/31] add additional checkout --- .../workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml b/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml index a7e24ba1d445..75ca8a51a731 100644 --- a/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml +++ b/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml @@ -67,6 +67,7 @@ jobs: job_name: ["beam_PostCommit_Java_IO_GCS_Performance_Tests"] job_phrase: ["Run Java PostCommit IO GCS Performance Tests"] steps: + - uses: actions/checkout@v3 - name: Setup repository uses: ./.github/actions/setup-action with: From cb6e01b50aa66b554f1c8dcce76eef50c73b381b Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Thu, 31 Aug 2023 16:07:20 -0400 Subject: [PATCH 08/31] add destination for triggered tests --- .../workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml b/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml index 75ca8a51a731..fa38c5eeed79 100644 --- a/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml +++ b/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml @@ -118,5 +118,7 @@ jobs: uses: ./.github/actions/gradle-command-self-hosted-action with: gradle-command: :it:google-cloud-platform:GCSPerformanceTest + exportDataset: performance_tests + exportTable: io_performance_metrics env: USER: github-actions From 8ea6c51374b5d542c6abf8b6c3a81ba89b3b7a2e Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Fri, 1 Sep 2023 09:47:24 -0400 Subject: [PATCH 09/31] move env variables to correct location --- .../beam_PostCommit_Java_IO_GCS_Performance_Tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml b/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml index fa38c5eeed79..ebfb51de105e 100644 --- a/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml +++ b/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml @@ -109,16 +109,16 @@ jobs: uses: ./.github/actions/gradle-command-self-hosted-action with: gradle-command: :it:google-cloud-platform:GCSPerformanceTest + env: exportDataset: performance_tests exportTable: io_performance_metrics - env: USER: github-actions - name: run triggered javaPostcommitIOGCSPerformanceTests script if: github.event_name != 'schedule' uses: ./.github/actions/gradle-command-self-hosted-action with: gradle-command: :it:google-cloud-platform:GCSPerformanceTest + env: exportDataset: performance_tests exportTable: io_performance_metrics - env: USER: github-actions From 320a4cc2a3f23a2aa351179a34c47225580991f8 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Fri, 1 Sep 2023 11:23:43 -0400 Subject: [PATCH 10/31] try uploading against separate dataset --- .../beam_PostCommit_Java_IO_GCS_Performance_Tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml b/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml index ebfb51de105e..fd1d112406de 100644 --- a/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml +++ b/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml @@ -110,7 +110,7 @@ jobs: with: gradle-command: :it:google-cloud-platform:GCSPerformanceTest env: - exportDataset: performance_tests + exportDataset: jjc_test exportTable: io_performance_metrics USER: github-actions - name: run triggered javaPostcommitIOGCSPerformanceTests script @@ -119,6 +119,6 @@ jobs: with: gradle-command: :it:google-cloud-platform:GCSPerformanceTest env: - exportDataset: performance_tests + exportDataset: jjc_test exportTable: io_performance_metrics USER: github-actions From 1cd4e55026654670d45eb12fb57adf99a9fb73e1 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Fri, 1 Sep 2023 14:47:07 -0400 Subject: [PATCH 11/31] try without a user --- .../beam_PostCommit_Java_IO_GCS_Performance_Tests.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml b/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml index fd1d112406de..6f10a7d136a9 100644 --- a/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml +++ b/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml @@ -112,7 +112,6 @@ jobs: env: exportDataset: jjc_test exportTable: io_performance_metrics - USER: github-actions - name: run triggered javaPostcommitIOGCSPerformanceTests script if: github.event_name != 'schedule' uses: ./.github/actions/gradle-command-self-hosted-action @@ -120,5 +119,4 @@ jobs: gradle-command: :it:google-cloud-platform:GCSPerformanceTest env: exportDataset: jjc_test - exportTable: io_performance_metrics - USER: github-actions + exportTable: io_performance_metrics \ No newline at end of file From 4fc5b8e419c0444184c1d418d40e60f6149360c6 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Tue, 5 Sep 2023 10:39:02 -0400 Subject: [PATCH 12/31] update branch checkout, try to view the failure log --- .../beam_PostCommit_Java_IO_GCS_Performance_Tests.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml b/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml index 6f10a7d136a9..541a8da59ae6 100644 --- a/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml +++ b/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml @@ -78,7 +78,7 @@ jobs: if: github.event_name == 'schedule' #This has scheduled runs run against the latest release uses: actions/checkout@v3 with: - ref: release-2.50.0 #TODO automate updating this + ref: v2.50.0 #TODO automate updating this repository: apache/beam - name: Checkout non-release branch if: github.event_name != 'schedule' #This has triggered runs checkout the triggering branch @@ -119,4 +119,6 @@ jobs: gradle-command: :it:google-cloud-platform:GCSPerformanceTest env: exportDataset: jjc_test - exportTable: io_performance_metrics \ No newline at end of file + exportTable: io_performance_metrics + - name: view failed test + run: cat file:///home/runner/work/beam/beam/it/google-cloud-platform/build/reports/tests/GCSPerformanceTest/index.html \ No newline at end of file From 59069f2b9d9ac3bb42837703db195f99b66a8351 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Tue, 5 Sep 2023 11:05:05 -0400 Subject: [PATCH 13/31] run on failure --- .../beam_PostCommit_Java_IO_GCS_Performance_Tests.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml b/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml index 541a8da59ae6..2a6d671c710f 100644 --- a/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml +++ b/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml @@ -121,4 +121,5 @@ jobs: exportDataset: jjc_test exportTable: io_performance_metrics - name: view failed test - run: cat file:///home/runner/work/beam/beam/it/google-cloud-platform/build/reports/tests/GCSPerformanceTest/index.html \ No newline at end of file + if: ${{ failure() }} + run: cat /home/runner/work/beam/beam/it/google-cloud-platform/build/reports/tests/GCSPerformanceTest/classes/org.apache.beam.it.gcp.storage.FileBasedIOLT.html \ No newline at end of file From 6f51976e942f31c3d2bc4cb7d173b615a26913e3 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Tue, 5 Sep 2023 12:45:57 -0400 Subject: [PATCH 14/31] update to use correct BigQuery instance --- .../beam_PostCommit_Java_IO_GCS_Performance_Tests.yml | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml b/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml index 2a6d671c710f..06f7d64a3aae 100644 --- a/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml +++ b/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml @@ -110,7 +110,7 @@ jobs: with: gradle-command: :it:google-cloud-platform:GCSPerformanceTest env: - exportDataset: jjc_test + exportDataset: performance_tests exportTable: io_performance_metrics - name: run triggered javaPostcommitIOGCSPerformanceTests script if: github.event_name != 'schedule' @@ -118,8 +118,5 @@ jobs: with: gradle-command: :it:google-cloud-platform:GCSPerformanceTest env: - exportDataset: jjc_test - exportTable: io_performance_metrics - - name: view failed test - if: ${{ failure() }} - run: cat /home/runner/work/beam/beam/it/google-cloud-platform/build/reports/tests/GCSPerformanceTest/classes/org.apache.beam.it.gcp.storage.FileBasedIOLT.html \ No newline at end of file + exportDataset: performance_tests + exportTable: io_performance_metrics_test \ No newline at end of file From df716cb4aead47b0e3478ed44918501eb4f73ef9 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Tue, 5 Sep 2023 13:15:24 -0400 Subject: [PATCH 15/31] convert to matrix --- ..._PostCommit_Java_IO_Performance_Tests.yml} | 31 ++++++++--------- it/build.gradle | 33 +++++++++++++++++++ it/google-cloud-platform/build.gradle | 3 +- 3 files changed, 48 insertions(+), 19 deletions(-) rename .github/workflows/{beam_PostCommit_Java_IO_GCS_Performance_Tests.yml => beam_PostCommit_Java_IO_Performance_Tests.yml} (79%) create mode 100644 it/build.gradle diff --git a/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml b/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml similarity index 79% rename from .github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml rename to .github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml index 06f7d64a3aae..57a02979bf6c 100644 --- a/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml +++ b/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml @@ -13,13 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -name: PostCommit Java IO GCS Performance Tests +name: PostCommit Java IO Performance Tests on: push: tags: ['v*'] branches: ['master', 'release-*'] - paths: ['it/google-cloud-platform/**','.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml'] + paths: ['it/google-cloud-platform/**','.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml'] issue_comment: types: [created] schedule: @@ -53,36 +53,34 @@ env: GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} jobs: - beam_PostCommit_Java_IO_GCS_Performance_Tests: + beam_PostCommit_Java_IO_Performance_Tests: if: | github.event_name == 'push' || github.event_name == 'workflow_dispatch' || github.event_name == 'schedule' || - github.event.comment.body == 'Run Java PostCommit IO GCS Performance Tests' + github.event.comment.body == 'Run Java PostCommit IO Performance Tests' # runs-on: [self-hosted, ubuntu-20.04, main] runs-on: ubuntu-20.04 name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) strategy: matrix: - job_name: ["beam_PostCommit_Java_IO_GCS_Performance_Tests"] - job_phrase: ["Run Java PostCommit IO GCS Performance Tests"] + job_name: ["beam_PostCommit_Java_IO_Performance_Tests"] + job_phrase: ["Run Java PostCommit IO Performance Tests"] + test_case: ["GCSPerformanceTest", "BigTablePerformanceTest"] steps: - uses: actions/checkout@v3 - name: Setup repository uses: ./.github/actions/setup-action with: - comment_phrase: ${{ matrix.job_phrase }} + comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.test_case }} github_token: ${{ secrets.GITHUB_TOKEN }} - github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.test_case }}) - name: Checkout release branch if: github.event_name == 'schedule' #This has scheduled runs run against the latest release uses: actions/checkout@v3 with: ref: v2.50.0 #TODO automate updating this repository: apache/beam - - name: Checkout non-release branch - if: github.event_name != 'schedule' #This has triggered runs checkout the triggering branch - uses: actions/checkout@v3 - name: Install Java uses: actions/setup-java@v3.8.0 with: @@ -101,22 +99,19 @@ jobs: service_account_key: ${{ secrets.GCP_SA_KEY }} project_id: ${{ secrets.GCP_PROJECT_ID }} export_default_credentials: true - - name: GCloud Docker credential helper - run: | - gcloud auth configure-docker us.gcr.io - - name: run scheduled javaPostcommitIOGCSPerformanceTests script + - name: run scheduled javaPostcommitIOPerformanceTests script if: github.event_name == 'schedule' #This ensures only scheduled runs publish metrics publicly uses: ./.github/actions/gradle-command-self-hosted-action with: - gradle-command: :it:google-cloud-platform:GCSPerformanceTest + gradle-command: :it:${{ matrix.test_case }} env: exportDataset: performance_tests exportTable: io_performance_metrics - - name: run triggered javaPostcommitIOGCSPerformanceTests script + - name: run triggered javaPostcommitIOPerformanceTests script if: github.event_name != 'schedule' uses: ./.github/actions/gradle-command-self-hosted-action with: - gradle-command: :it:google-cloud-platform:GCSPerformanceTest + gradle-command: :it:${{ matrix.test_case }} env: exportDataset: performance_tests exportTable: io_performance_metrics_test \ No newline at end of file diff --git a/it/build.gradle b/it/build.gradle new file mode 100644 index 000000000000..35ccbba4c360 --- /dev/null +++ b/it/build.gradle @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +plugins { id 'org.apache.beam.module' } +applyJavaNature( + automaticModuleName: 'org.apache.beam.it', +) + +description = "Apache Beam :: IT" +ext.summary = "Integration test utilities suites." + +//These registrations exist to make our matrix Github Action simple to configure +tasks.register('GCSPerformanceTest') { + dependsOn(":it:google-cloud-platform:GCSPerformanceTest") +} + +tasks.register('BigTablePerformanceTest') { + dependsOn(":it:google-cloud-platform:BigTablePerformanceTest") +} \ No newline at end of file diff --git a/it/google-cloud-platform/build.gradle b/it/google-cloud-platform/build.gradle index 48c7175187da..0917ddd3e21a 100644 --- a/it/google-cloud-platform/build.gradle +++ b/it/google-cloud-platform/build.gradle @@ -77,4 +77,5 @@ dependencies { testRuntimeOnly library.java.slf4j_simple } -tasks.register("GCSPerformanceTest", IoPerformanceTestUtilities.IoPerformanceTest, project, 'google-cloud-platform', 'FileBasedIOLT', ['configuration':'large','project':'apache-beam-testing', 'artifactBucket':'io-performance-temp']) \ No newline at end of file +tasks.register("GCSPerformanceTest", IoPerformanceTestUtilities.IoPerformanceTest, project, 'google-cloud-platform', 'FileBasedIOLT', ['configuration':'large','project':'apache-beam-testing', 'artifactBucket':'io-performance-temp']) +tasks.register("BigTablePerformanceTest", IoPerformanceTestUtilities.IoPerformanceTest, project, 'google-cloud-platform', 'BigTableIOLT', ['configuration':'large','project':'apache-beam-testing', 'artifactBucket':'io-performance-temp']) \ No newline at end of file From 4bf0826a7846f671ed07389b874fb7ad5b833440 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Tue, 5 Sep 2023 13:47:39 -0400 Subject: [PATCH 16/31] add result reporting --- .../workflows/beam_PostCommit_Java_IO_Performance_Tests.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml b/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml index 57a02979bf6c..f2cd3fd2731b 100644 --- a/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml +++ b/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml @@ -61,7 +61,7 @@ jobs: github.event.comment.body == 'Run Java PostCommit IO Performance Tests' # runs-on: [self-hosted, ubuntu-20.04, main] runs-on: ubuntu-20.04 - name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.test_case }}) strategy: matrix: job_name: ["beam_PostCommit_Java_IO_Performance_Tests"] @@ -114,4 +114,6 @@ jobs: gradle-command: :it:${{ matrix.test_case }} env: exportDataset: performance_tests - exportTable: io_performance_metrics_test \ No newline at end of file + exportTable: io_performance_metrics_test + - name: read failure results + run: cat /home/runner/work/beam/beam/it/google-cloud-platform/build/reports/tests/BigTablePerformanceTest/classes/org.apache.beam.it.gcp.storage.BigTableIOLT.html From d40d04b0e2480ede63965cae645a2b43129c169d Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Tue, 5 Sep 2023 13:52:55 -0400 Subject: [PATCH 17/31] add failure clause --- .github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml b/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml index f2cd3fd2731b..ae8e63f44f20 100644 --- a/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml +++ b/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml @@ -116,4 +116,5 @@ jobs: exportDataset: performance_tests exportTable: io_performance_metrics_test - name: read failure results + if: ${{ failure() }} run: cat /home/runner/work/beam/beam/it/google-cloud-platform/build/reports/tests/BigTablePerformanceTest/classes/org.apache.beam.it.gcp.storage.BigTableIOLT.html From 2739e927476271ee92e0563c9dd5343c19ea82f6 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Tue, 5 Sep 2023 15:26:26 -0400 Subject: [PATCH 18/31] remove failure clause, update to run on self-hosted --- .../beam_PostCommit_Java_IO_Performance_Tests.yml | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml b/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml index ae8e63f44f20..2f232af20e0e 100644 --- a/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml +++ b/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml @@ -59,8 +59,7 @@ jobs: github.event_name == 'workflow_dispatch' || github.event_name == 'schedule' || github.event.comment.body == 'Run Java PostCommit IO Performance Tests' -# runs-on: [self-hosted, ubuntu-20.04, main] - runs-on: ubuntu-20.04 + runs-on: [self-hosted, ubuntu-20.04, main] name: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.test_case }}) strategy: matrix: @@ -114,7 +113,4 @@ jobs: gradle-command: :it:${{ matrix.test_case }} env: exportDataset: performance_tests - exportTable: io_performance_metrics_test - - name: read failure results - if: ${{ failure() }} - run: cat /home/runner/work/beam/beam/it/google-cloud-platform/build/reports/tests/BigTablePerformanceTest/classes/org.apache.beam.it.gcp.storage.BigTableIOLT.html + exportTable: io_performance_metrics_test \ No newline at end of file From bd6efeb15d7dc2b4126d950697b06e4b4964df3b Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Wed, 6 Sep 2023 11:47:14 -0400 Subject: [PATCH 19/31] address comments, clean up build --- .../workflows/beam_PostCommit_Java_IO_Performance_Tests.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml b/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml index 2f232af20e0e..48f43357a200 100644 --- a/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml +++ b/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml @@ -78,7 +78,7 @@ jobs: if: github.event_name == 'schedule' #This has scheduled runs run against the latest release uses: actions/checkout@v3 with: - ref: v2.50.0 #TODO automate updating this + ref: v2.50.0 #TODO(https://github.com/apache/beam/issues/28330) automate updating this repository: apache/beam - name: Install Java uses: actions/setup-java@v3.8.0 @@ -89,8 +89,6 @@ jobs: uses: gradle/gradle-build-action@v2 with: cache-read-only: false - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v1 - name: Authenticate on GCP uses: google-github-actions/setup-gcloud@v0 with: From 226a655b387db51685ea838f44be850e1790a309 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Wed, 6 Sep 2023 11:51:57 -0400 Subject: [PATCH 20/31] clarify branching --- .github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml b/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml index 48f43357a200..ffd5751fd8b3 100644 --- a/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml +++ b/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml @@ -97,7 +97,7 @@ jobs: project_id: ${{ secrets.GCP_PROJECT_ID }} export_default_credentials: true - name: run scheduled javaPostcommitIOPerformanceTests script - if: github.event_name == 'schedule' #This ensures only scheduled runs publish metrics publicly + if: github.event_name == 'schedule' #This ensures only scheduled runs publish metrics publicly by changing which exportTable is configured uses: ./.github/actions/gradle-command-self-hosted-action with: gradle-command: :it:${{ matrix.test_case }} From c63f112bd528c5846bdd0a921d1a81c6ba3f8799 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Thu, 7 Sep 2023 10:58:24 -0400 Subject: [PATCH 21/31] Update auth to retry getting credentials from GCE --- sdks/python/apache_beam/internal/gcp/auth.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/sdks/python/apache_beam/internal/gcp/auth.py b/sdks/python/apache_beam/internal/gcp/auth.py index 47c3416babd4..c82ac6e5669d 100644 --- a/sdks/python/apache_beam/internal/gcp/auth.py +++ b/sdks/python/apache_beam/internal/gcp/auth.py @@ -28,6 +28,8 @@ from apache_beam.options.pipeline_options import PipelineOptions # google.auth is only available when Beam is installed with the gcp extra. +from apache_beam.utils import retry + try: from google.auth import impersonated_credentials import google.auth @@ -149,8 +151,7 @@ def _get_service_credentials(pipeline_options): try: # pylint: disable=c-extension-no-member - credentials, _ = google.auth.default( - scopes=pipeline_options.view_as(GoogleCloudOptions).gcp_oauth_scopes) + credentials = _Credentials._get_credentials_with_retrys(pipeline_options) credentials = _Credentials._add_impersonation_credentials( credentials, pipeline_options) credentials = _ApitoolsCredentialsAdapter(credentials) @@ -159,12 +160,19 @@ def _get_service_credentials(pipeline_options): 'Credentials.') return credentials except Exception as e: - _LOGGER.warning( + _LOGGER.error( 'Unable to find default credentials to use: %s\n' 'Connecting anonymously.', e) return None + @staticmethod + @retry.with_exponential_backoff + def _get_credentials_with_retrys(pipeline_options): + credentials, _ = google.auth.default( + scopes=pipeline_options.view_as(GoogleCloudOptions).gcp_oauth_scopes) + return credentials + @staticmethod def _add_impersonation_credentials(credentials, pipeline_options): gcs_options = pipeline_options.view_as(GoogleCloudOptions) From 1c513950cbbaa6e1f1839139079828601e952756 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Fri, 15 Sep 2023 11:09:40 -0400 Subject: [PATCH 22/31] Re-order imports --- sdks/python/apache_beam/internal/gcp/auth.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sdks/python/apache_beam/internal/gcp/auth.py b/sdks/python/apache_beam/internal/gcp/auth.py index c82ac6e5669d..0304e183d14e 100644 --- a/sdks/python/apache_beam/internal/gcp/auth.py +++ b/sdks/python/apache_beam/internal/gcp/auth.py @@ -26,10 +26,9 @@ from apache_beam.options.pipeline_options import GoogleCloudOptions from apache_beam.options.pipeline_options import PipelineOptions - -# google.auth is only available when Beam is installed with the gcp extra. from apache_beam.utils import retry +# google.auth is only available when Beam is installed with the gcp extra. try: from google.auth import impersonated_credentials import google.auth From 1bacada565c95b359fd014cda31ee4758e692880 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Fri, 15 Sep 2023 12:39:43 -0400 Subject: [PATCH 23/31] Add test case --- sdks/python/apache_beam/internal/gcp/auth.py | 2 +- .../apache_beam/internal/gcp/auth_test.py | 60 +++++++++++++++++++ 2 files changed, 61 insertions(+), 1 deletion(-) create mode 100644 sdks/python/apache_beam/internal/gcp/auth_test.py diff --git a/sdks/python/apache_beam/internal/gcp/auth.py b/sdks/python/apache_beam/internal/gcp/auth.py index 0304e183d14e..dc1939844f81 100644 --- a/sdks/python/apache_beam/internal/gcp/auth.py +++ b/sdks/python/apache_beam/internal/gcp/auth.py @@ -166,7 +166,7 @@ def _get_service_credentials(pipeline_options): return None @staticmethod - @retry.with_exponential_backoff + @retry.with_exponential_backoff() def _get_credentials_with_retrys(pipeline_options): credentials, _ = google.auth.default( scopes=pipeline_options.view_as(GoogleCloudOptions).gcp_oauth_scopes) diff --git a/sdks/python/apache_beam/internal/gcp/auth_test.py b/sdks/python/apache_beam/internal/gcp/auth_test.py new file mode 100644 index 000000000000..38179838ae48 --- /dev/null +++ b/sdks/python/apache_beam/internal/gcp/auth_test.py @@ -0,0 +1,60 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import unittest + +import mock + +from apache_beam.internal.gcp import auth +from apache_beam.options.pipeline_options import GoogleCloudOptions +from apache_beam.options.pipeline_options import PipelineOptions + +try: + import google.auth as gauth +except ImportError: + gauth = None + + +@unittest.skipIf(gauth is None, 'Google Auth dependencies are not installed') +class MyTestCase(unittest.TestCase): + @mock.patch('google.auth.default') + def test_auth_with_retrys(self, unused_mock_arg): + pipeline_options = PipelineOptions() + pipeline_options.view_as( + GoogleCloudOptions).impersonate_service_account = False + + credentials = ('creds', 1) + + self.is_called = False + + def side_effect(scopes=None): + if self.is_called: + return credentials + else: + self.is_called = True + raise IOError('Failed') + + google_auth_mock = mock.MagicMock() + gauth.default = google_auth_mock + google_auth_mock.side_effect = side_effect + + returned_credentials = auth.get_service_credentials(pipeline_options) + + self.assertEqual('creds', returned_credentials._google_auth_credentials) + + +if __name__ == '__main__': + unittest.main() From 8eca7d0e17025418255518202c610df752e1d402 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Tue, 19 Sep 2023 10:56:07 -0400 Subject: [PATCH 24/31] Update exception log --- sdks/python/apache_beam/internal/gcp/auth.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sdks/python/apache_beam/internal/gcp/auth.py b/sdks/python/apache_beam/internal/gcp/auth.py index dc1939844f81..49ca20baab2f 100644 --- a/sdks/python/apache_beam/internal/gcp/auth.py +++ b/sdks/python/apache_beam/internal/gcp/auth.py @@ -159,9 +159,10 @@ def _get_service_credentials(pipeline_options): 'Credentials.') return credentials except Exception as e: - _LOGGER.error( + _LOGGER.warning( 'Unable to find default credentials to use: %s\n' - 'Connecting anonymously.', + 'Connecting anonymously. This is expected if no ' + 'credentials are needed to access GCP resources.', e) return None From 59310a259aef57f676a7ef043fc1c7896c2e17d1 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Tue, 19 Sep 2023 11:28:09 -0400 Subject: [PATCH 25/31] Add failure test --- .../apache_beam/internal/gcp/auth_test.py | 55 +++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/sdks/python/apache_beam/internal/gcp/auth_test.py b/sdks/python/apache_beam/internal/gcp/auth_test.py index 38179838ae48..ad61802e488e 100644 --- a/sdks/python/apache_beam/internal/gcp/auth_test.py +++ b/sdks/python/apache_beam/internal/gcp/auth_test.py @@ -14,6 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # +import logging import unittest import mock @@ -28,6 +29,25 @@ gauth = None +class MockLoggingHandler(logging.Handler): + """Mock logging handler to check for expected logs.""" + def __init__(self, *args, **kwargs): + self.reset() + logging.Handler.__init__(self, *args, **kwargs) + + def emit(self, record): + self.messages[record.levelname.lower()].append(record.getMessage()) + + def reset(self): + self.messages = { + 'debug': [], + 'info': [], + 'warning': [], + 'error': [], + 'critical': [], + } + + @unittest.skipIf(gauth is None, 'Google Auth dependencies are not installed') class MyTestCase(unittest.TestCase): @mock.patch('google.auth.default') @@ -55,6 +75,41 @@ def side_effect(scopes=None): self.assertEqual('creds', returned_credentials._google_auth_credentials) + @mock.patch('google.auth.default') + def test_auth_with_retrys_always_fail(self, unused_mock_arg): + pipeline_options = PipelineOptions() + pipeline_options.view_as( + GoogleCloudOptions).impersonate_service_account = False + + self.is_called = False + + def side_effect(scopes=None): + raise IOError('Failed') + + google_auth_mock = mock.MagicMock() + gauth.default = google_auth_mock + google_auth_mock.side_effect = side_effect + + loggerHandler = MockLoggingHandler() + + auth._LOGGER.addHandler(loggerHandler) + + #Remove the retry decorator to speed up testing. + #Otherwise, test takes ~10 minutes + auth._Credentials._get_credentials_with_retrys = \ + auth._Credentials._get_credentials_with_retrys.__closure__[2 + ].cell_contents + + returned_credentials = auth.get_service_credentials(pipeline_options) + + self.assertEqual(None, returned_credentials) + self.assertEqual([ + 'Unable to find default credentials to use: Failed\n' + 'Connecting anonymously. This is expected if no credentials are ' + 'needed to access GCP resources.' + ], + loggerHandler.messages.get('warning')) + if __name__ == '__main__': unittest.main() From 0da1faa8bde71d37641fb6a425d0db216e255e36 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Wed, 20 Sep 2023 14:03:10 -0400 Subject: [PATCH 26/31] Update removal of retrying method --- .../apache_beam/internal/gcp/auth_test.py | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/sdks/python/apache_beam/internal/gcp/auth_test.py b/sdks/python/apache_beam/internal/gcp/auth_test.py index ad61802e488e..a7b5d0ea01f3 100644 --- a/sdks/python/apache_beam/internal/gcp/auth_test.py +++ b/sdks/python/apache_beam/internal/gcp/auth_test.py @@ -81,24 +81,15 @@ def test_auth_with_retrys_always_fail(self, unused_mock_arg): pipeline_options.view_as( GoogleCloudOptions).impersonate_service_account = False - self.is_called = False - - def side_effect(scopes=None): - raise IOError('Failed') - - google_auth_mock = mock.MagicMock() - gauth.default = google_auth_mock - google_auth_mock.side_effect = side_effect - loggerHandler = MockLoggingHandler() auth._LOGGER.addHandler(loggerHandler) - #Remove the retry decorator to speed up testing. - #Otherwise, test takes ~10 minutes - auth._Credentials._get_credentials_with_retrys = \ - auth._Credentials._get_credentials_with_retrys.__closure__[2 - ].cell_contents + #Remove call to retrying method, as otherwise test takes ~10 minutes to run + def raise_(): + raise IOError('Failed') + + auth._Credentials._get_credentials_with_retrys = lambda options: raise_() returned_credentials = auth.get_service_credentials(pipeline_options) From e7e66f28c3453dd38b86bfa878236727032b725e Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Wed, 20 Sep 2023 15:27:52 -0400 Subject: [PATCH 27/31] rework via mock --- sdks/python/apache_beam/internal/gcp/auth_test.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sdks/python/apache_beam/internal/gcp/auth_test.py b/sdks/python/apache_beam/internal/gcp/auth_test.py index a7b5d0ea01f3..0793d0a15672 100644 --- a/sdks/python/apache_beam/internal/gcp/auth_test.py +++ b/sdks/python/apache_beam/internal/gcp/auth_test.py @@ -75,7 +75,8 @@ def side_effect(scopes=None): self.assertEqual('creds', returned_credentials._google_auth_credentials) - @mock.patch('google.auth.default') + @mock.patch( + 'apache_beam.internal.gcp.auth._Credentials._get_credentials_with_retrys') def test_auth_with_retrys_always_fail(self, unused_mock_arg): pipeline_options = PipelineOptions() pipeline_options.view_as( @@ -86,10 +87,12 @@ def test_auth_with_retrys_always_fail(self, unused_mock_arg): auth._LOGGER.addHandler(loggerHandler) #Remove call to retrying method, as otherwise test takes ~10 minutes to run - def raise_(): + def raise_(scopes=None): raise IOError('Failed') - auth._Credentials._get_credentials_with_retrys = lambda options: raise_() + retry_auth_mock = mock.MagicMock() + auth._Credentials._get_credentials_with_retrys = retry_auth_mock + retry_auth_mock.side_effect = raise_ returned_credentials = auth.get_service_credentials(pipeline_options) From a54cd149c8a6475510e583c935b0da0bab92955f Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Mon, 25 Sep 2023 13:57:00 -0400 Subject: [PATCH 28/31] Clear credentials cache for idempotent tests --- sdks/python/apache_beam/internal/gcp/auth_test.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/sdks/python/apache_beam/internal/gcp/auth_test.py b/sdks/python/apache_beam/internal/gcp/auth_test.py index 0793d0a15672..2397591c72de 100644 --- a/sdks/python/apache_beam/internal/gcp/auth_test.py +++ b/sdks/python/apache_beam/internal/gcp/auth_test.py @@ -49,7 +49,7 @@ def reset(self): @unittest.skipIf(gauth is None, 'Google Auth dependencies are not installed') -class MyTestCase(unittest.TestCase): +class AuthTest(unittest.TestCase): @mock.patch('google.auth.default') def test_auth_with_retrys(self, unused_mock_arg): pipeline_options = PipelineOptions() @@ -71,6 +71,12 @@ def side_effect(scopes=None): gauth.default = google_auth_mock google_auth_mock.side_effect = side_effect + # _Credentials caches the actual credentials. + # This resets it for idempotent tests. + if auth._Credentials._credentials_init: + auth._Credentials._credentials_init = False + auth._Credentials._credentials = None + returned_credentials = auth.get_service_credentials(pipeline_options) self.assertEqual('creds', returned_credentials._google_auth_credentials) @@ -94,6 +100,12 @@ def raise_(scopes=None): auth._Credentials._get_credentials_with_retrys = retry_auth_mock retry_auth_mock.side_effect = raise_ + # _Credentials caches the actual credentials. + # This resets it for idempotent tests. + if auth._Credentials._credentials_init: + auth._Credentials._credentials_init = False + auth._Credentials._credentials = None + returned_credentials = auth.get_service_credentials(pipeline_options) self.assertEqual(None, returned_credentials) From aceae65d68eaff9c88c87323505bcb99da83e0e2 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Tue, 26 Sep 2023 11:12:21 -0400 Subject: [PATCH 29/31] Remove handler after test Change retry timeout to facilitate shorter retrys for anonymous access cases --- sdks/python/apache_beam/internal/gcp/auth.py | 2 +- sdks/python/apache_beam/internal/gcp/auth_test.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/sdks/python/apache_beam/internal/gcp/auth.py b/sdks/python/apache_beam/internal/gcp/auth.py index 49ca20baab2f..9ebc755e2236 100644 --- a/sdks/python/apache_beam/internal/gcp/auth.py +++ b/sdks/python/apache_beam/internal/gcp/auth.py @@ -167,7 +167,7 @@ def _get_service_credentials(pipeline_options): return None @staticmethod - @retry.with_exponential_backoff() + @retry.with_exponential_backoff(num_retries=5, initial_delay_secs=2) def _get_credentials_with_retrys(pipeline_options): credentials, _ = google.auth.default( scopes=pipeline_options.view_as(GoogleCloudOptions).gcp_oauth_scopes) diff --git a/sdks/python/apache_beam/internal/gcp/auth_test.py b/sdks/python/apache_beam/internal/gcp/auth_test.py index 2397591c72de..5f511b0e06d2 100644 --- a/sdks/python/apache_beam/internal/gcp/auth_test.py +++ b/sdks/python/apache_beam/internal/gcp/auth_test.py @@ -116,6 +116,8 @@ def raise_(scopes=None): ], loggerHandler.messages.get('warning')) + auth._LOGGER.removeHandler(loggerHandler) + if __name__ == '__main__': unittest.main() From 9bd54a7047427ae7227d5a4c9ebecba21e5c520f Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Tue, 26 Sep 2023 14:56:02 -0400 Subject: [PATCH 30/31] Change retry timeout to facilitate shorter retrys for anonymous access cases --- sdks/python/apache_beam/internal/gcp/auth.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdks/python/apache_beam/internal/gcp/auth.py b/sdks/python/apache_beam/internal/gcp/auth.py index 9ebc755e2236..bab3ace4144e 100644 --- a/sdks/python/apache_beam/internal/gcp/auth.py +++ b/sdks/python/apache_beam/internal/gcp/auth.py @@ -167,7 +167,7 @@ def _get_service_credentials(pipeline_options): return None @staticmethod - @retry.with_exponential_backoff(num_retries=5, initial_delay_secs=2) + @retry.with_exponential_backoff(num_retries=4, initial_delay_secs=2) def _get_credentials_with_retrys(pipeline_options): credentials, _ = google.auth.default( scopes=pipeline_options.view_as(GoogleCloudOptions).gcp_oauth_scopes) From e4a97467e75d2360ef711460237d2d270b933e7d Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Tue, 26 Sep 2023 16:41:17 -0400 Subject: [PATCH 31/31] reset credentials before and after test --- sdks/python/apache_beam/internal/gcp/auth_test.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sdks/python/apache_beam/internal/gcp/auth_test.py b/sdks/python/apache_beam/internal/gcp/auth_test.py index 5f511b0e06d2..98fb828875b9 100644 --- a/sdks/python/apache_beam/internal/gcp/auth_test.py +++ b/sdks/python/apache_beam/internal/gcp/auth_test.py @@ -79,6 +79,12 @@ def side_effect(scopes=None): returned_credentials = auth.get_service_credentials(pipeline_options) + # _Credentials caches the actual credentials. + # This resets it for idempotent tests. + if auth._Credentials._credentials_init: + auth._Credentials._credentials_init = False + auth._Credentials._credentials = None + self.assertEqual('creds', returned_credentials._google_auth_credentials) @mock.patch( @@ -116,6 +122,12 @@ def raise_(scopes=None): ], loggerHandler.messages.get('warning')) + # _Credentials caches the actual credentials. + # This resets it for idempotent tests. + if auth._Credentials._credentials_init: + auth._Credentials._credentials_init = False + auth._Credentials._credentials = None + auth._LOGGER.removeHandler(loggerHandler)