From 30b92ad58f29862a9acfd1beb4a983fb4eac2e56 Mon Sep 17 00:00:00 2001 From: Jack McCluskey Date: Tue, 17 Dec 2024 14:55:51 -0500 Subject: [PATCH 1/6] Fix env variable loading in Cost Benchmark workflow --- .github/workflows/beam_Python_CostBenchmarks_Dataflow.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/beam_Python_CostBenchmarks_Dataflow.yml b/.github/workflows/beam_Python_CostBenchmarks_Dataflow.yml index 18fe37e142ac..a9bacf0687c3 100644 --- a/.github/workflows/beam_Python_CostBenchmarks_Dataflow.yml +++ b/.github/workflows/beam_Python_CostBenchmarks_Dataflow.yml @@ -92,7 +92,7 @@ jobs: -PloadTest.mainClass=apache_beam.testing.benchmarks.wordcount.wordcount \ -Prunner=DataflowRunner \ -PpythonVersion=3.10 \ - '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_1 }} --job_name=benchmark-tests-wordcount-python-${{env.NOW_UTC}} --output_file=gs://temp-storage-for-end-to-end-tests/wordcount/result_wordcount-${{env.NOW_UTC}}.txt' \ + '-PloadTest.args=${{ env.beam_Python_Cost_Benchmarks_Dataflow_test_arguments_1 }} --job_name=benchmark-tests-wordcount-python-${{env.NOW_UTC}} --output_file=gs://temp-storage-for-end-to-end-tests/wordcount/result_wordcount-${{env.NOW_UTC}}.txt' \ - name: Run Tensorflow MNIST Image Classification on Dataflow uses: ./.github/actions/gradle-command-self-hosted-action timeout-minutes: 30 @@ -102,4 +102,4 @@ jobs: -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.tensorflow_mnist_classification_cost_benchmark \ -Prunner=DataflowRunner \ -PpythonVersion=3.10 \ - '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_2 }} --job_name=benchmark-tests-tf-mnist-classification-python-${{env.NOW_UTC}} --input_file=gs://apache-beam-ml/testing/inputs/it_mnist_data.csv --output_file=gs://temp-storage-for-end-to-end-tests/wordcount/result_tf_mnist-${{env.NOW_UTC}}.txt --model=gs://apache-beam-ml/models/tensorflow/mnist/' \ \ No newline at end of file + '-PloadTest.args=${{ env.beam_Python_Cost_Benchmarks_Dataflow_test_arguments_2 }} --job_name=benchmark-tests-tf-mnist-classification-python-${{env.NOW_UTC}} --input_file=gs://apache-beam-ml/testing/inputs/it_mnist_data.csv --output_file=gs://temp-storage-for-end-to-end-tests/wordcount/result_tf_mnist-${{env.NOW_UTC}}.txt --model=gs://apache-beam-ml/models/tensorflow/mnist/' \ \ No newline at end of file From de6013ccf84f760446edac0fca5a3ce4583e410d Mon Sep 17 00:00:00 2001 From: Jack McCluskey Date: Tue, 17 Dec 2024 15:17:14 -0500 Subject: [PATCH 2/6] fix output file for tf mnist --- .github/workflows/beam_Python_CostBenchmarks_Dataflow.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/beam_Python_CostBenchmarks_Dataflow.yml b/.github/workflows/beam_Python_CostBenchmarks_Dataflow.yml index a9bacf0687c3..01943e47e031 100644 --- a/.github/workflows/beam_Python_CostBenchmarks_Dataflow.yml +++ b/.github/workflows/beam_Python_CostBenchmarks_Dataflow.yml @@ -102,4 +102,4 @@ jobs: -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.tensorflow_mnist_classification_cost_benchmark \ -Prunner=DataflowRunner \ -PpythonVersion=3.10 \ - '-PloadTest.args=${{ env.beam_Python_Cost_Benchmarks_Dataflow_test_arguments_2 }} --job_name=benchmark-tests-tf-mnist-classification-python-${{env.NOW_UTC}} --input_file=gs://apache-beam-ml/testing/inputs/it_mnist_data.csv --output_file=gs://temp-storage-for-end-to-end-tests/wordcount/result_tf_mnist-${{env.NOW_UTC}}.txt --model=gs://apache-beam-ml/models/tensorflow/mnist/' \ \ No newline at end of file + '-PloadTest.args=${{ env.beam_Python_Cost_Benchmarks_Dataflow_test_arguments_2 }} --job_name=benchmark-tests-tf-mnist-classification-python-${{env.NOW_UTC}} --input_file=gs://apache-beam-ml/testing/inputs/it_mnist_data.csv --output_file=gs://temp-storage-for-end-to-end-tests/inference/result_tf_mnist-${{env.NOW_UTC}}.txt --model=gs://apache-beam-ml/models/tensorflow/mnist/' \ \ No newline at end of file From 2dd647a4b67285ccae14eb55c7b7b9a6c78c9b3a Mon Sep 17 00:00:00 2001 From: Jack McCluskey Date: Tue, 17 Dec 2024 15:32:35 -0500 Subject: [PATCH 3/6] add load test requirements file arg --- .github/workflows/beam_Python_CostBenchmarks_Dataflow.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/beam_Python_CostBenchmarks_Dataflow.yml b/.github/workflows/beam_Python_CostBenchmarks_Dataflow.yml index 01943e47e031..209325c429a1 100644 --- a/.github/workflows/beam_Python_CostBenchmarks_Dataflow.yml +++ b/.github/workflows/beam_Python_CostBenchmarks_Dataflow.yml @@ -102,4 +102,5 @@ jobs: -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.tensorflow_mnist_classification_cost_benchmark \ -Prunner=DataflowRunner \ -PpythonVersion=3.10 \ + -PloadTest.requirementsTxtFile=apache_beam/ml/inference/tensorflow_tests_requirements.txt \ '-PloadTest.args=${{ env.beam_Python_Cost_Benchmarks_Dataflow_test_arguments_2 }} --job_name=benchmark-tests-tf-mnist-classification-python-${{env.NOW_UTC}} --input_file=gs://apache-beam-ml/testing/inputs/it_mnist_data.csv --output_file=gs://temp-storage-for-end-to-end-tests/inference/result_tf_mnist-${{env.NOW_UTC}}.txt --model=gs://apache-beam-ml/models/tensorflow/mnist/' \ \ No newline at end of file From 30507cd77df0178b8cfe61296e18972b1ba44daf Mon Sep 17 00:00:00 2001 From: Jack McCluskey Date: Tue, 17 Dec 2024 15:41:04 -0500 Subject: [PATCH 4/6] update mnist args --- .github/workflows/beam_Python_CostBenchmarks_Dataflow.yml | 2 +- .../tensorflow_mnist_classification_cost_benchmark.py | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/beam_Python_CostBenchmarks_Dataflow.yml b/.github/workflows/beam_Python_CostBenchmarks_Dataflow.yml index 209325c429a1..97c2eb6fc7e0 100644 --- a/.github/workflows/beam_Python_CostBenchmarks_Dataflow.yml +++ b/.github/workflows/beam_Python_CostBenchmarks_Dataflow.yml @@ -103,4 +103,4 @@ jobs: -Prunner=DataflowRunner \ -PpythonVersion=3.10 \ -PloadTest.requirementsTxtFile=apache_beam/ml/inference/tensorflow_tests_requirements.txt \ - '-PloadTest.args=${{ env.beam_Python_Cost_Benchmarks_Dataflow_test_arguments_2 }} --job_name=benchmark-tests-tf-mnist-classification-python-${{env.NOW_UTC}} --input_file=gs://apache-beam-ml/testing/inputs/it_mnist_data.csv --output_file=gs://temp-storage-for-end-to-end-tests/inference/result_tf_mnist-${{env.NOW_UTC}}.txt --model=gs://apache-beam-ml/models/tensorflow/mnist/' \ \ No newline at end of file + '-PloadTest.args=${{ env.beam_Python_Cost_Benchmarks_Dataflow_test_arguments_2 }} --job_name=benchmark-tests-tf-mnist-classification-python-${{env.NOW_UTC}} --input=gs://apache-beam-ml/testing/inputs/it_mnist_data.csv --output=gs://temp-storage-for-end-to-end-tests/inference/result_tf_mnist-${{env.NOW_UTC}}.txt --model_path=gs://apache-beam-ml/models/tensorflow/mnist/' \ \ No newline at end of file diff --git a/sdks/python/apache_beam/testing/benchmarks/inference/tensorflow_mnist_classification_cost_benchmark.py b/sdks/python/apache_beam/testing/benchmarks/inference/tensorflow_mnist_classification_cost_benchmark.py index f7e12dcead03..6c9a7a9b6da0 100644 --- a/sdks/python/apache_beam/testing/benchmarks/inference/tensorflow_mnist_classification_cost_benchmark.py +++ b/sdks/python/apache_beam/testing/benchmarks/inference/tensorflow_mnist_classification_cost_benchmark.py @@ -28,9 +28,6 @@ def __init__(self): def test(self): extra_opts = {} - extra_opts['input'] = self.pipeline.get_option('input_file') - extra_opts['output'] = self.pipeline.get_option('output_file') - extra_opts['model_path'] = self.pipeline.get_option('model') tensorflow_mnist_classification.run( self.pipeline.get_full_options_as_args(**extra_opts), save_main_session=False) From f75c1e3053f8d8062f8bac315e7f1f881b9c9169 Mon Sep 17 00:00:00 2001 From: Jack McCluskey Date: Tue, 17 Dec 2024 16:12:09 -0500 Subject: [PATCH 5/6] revert how args are passed --- .github/workflows/beam_Python_CostBenchmarks_Dataflow.yml | 2 +- .../tensorflow_mnist_classification_cost_benchmark.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/beam_Python_CostBenchmarks_Dataflow.yml b/.github/workflows/beam_Python_CostBenchmarks_Dataflow.yml index 97c2eb6fc7e0..209325c429a1 100644 --- a/.github/workflows/beam_Python_CostBenchmarks_Dataflow.yml +++ b/.github/workflows/beam_Python_CostBenchmarks_Dataflow.yml @@ -103,4 +103,4 @@ jobs: -Prunner=DataflowRunner \ -PpythonVersion=3.10 \ -PloadTest.requirementsTxtFile=apache_beam/ml/inference/tensorflow_tests_requirements.txt \ - '-PloadTest.args=${{ env.beam_Python_Cost_Benchmarks_Dataflow_test_arguments_2 }} --job_name=benchmark-tests-tf-mnist-classification-python-${{env.NOW_UTC}} --input=gs://apache-beam-ml/testing/inputs/it_mnist_data.csv --output=gs://temp-storage-for-end-to-end-tests/inference/result_tf_mnist-${{env.NOW_UTC}}.txt --model_path=gs://apache-beam-ml/models/tensorflow/mnist/' \ \ No newline at end of file + '-PloadTest.args=${{ env.beam_Python_Cost_Benchmarks_Dataflow_test_arguments_2 }} --job_name=benchmark-tests-tf-mnist-classification-python-${{env.NOW_UTC}} --input_file=gs://apache-beam-ml/testing/inputs/it_mnist_data.csv --output_file=gs://temp-storage-for-end-to-end-tests/inference/result_tf_mnist-${{env.NOW_UTC}}.txt --model=gs://apache-beam-ml/models/tensorflow/mnist/' \ \ No newline at end of file diff --git a/sdks/python/apache_beam/testing/benchmarks/inference/tensorflow_mnist_classification_cost_benchmark.py b/sdks/python/apache_beam/testing/benchmarks/inference/tensorflow_mnist_classification_cost_benchmark.py index 6c9a7a9b6da0..f7e12dcead03 100644 --- a/sdks/python/apache_beam/testing/benchmarks/inference/tensorflow_mnist_classification_cost_benchmark.py +++ b/sdks/python/apache_beam/testing/benchmarks/inference/tensorflow_mnist_classification_cost_benchmark.py @@ -28,6 +28,9 @@ def __init__(self): def test(self): extra_opts = {} + extra_opts['input'] = self.pipeline.get_option('input_file') + extra_opts['output'] = self.pipeline.get_option('output_file') + extra_opts['model_path'] = self.pipeline.get_option('model') tensorflow_mnist_classification.run( self.pipeline.get_full_options_as_args(**extra_opts), save_main_session=False) From 922bb92d613f017ad482683c7db431df116aeea5 Mon Sep 17 00:00:00 2001 From: Jack McCluskey Date: Tue, 17 Dec 2024 16:20:37 -0500 Subject: [PATCH 6/6] assign result correctly --- .../inference/tensorflow_mnist_classification_cost_benchmark.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdks/python/apache_beam/testing/benchmarks/inference/tensorflow_mnist_classification_cost_benchmark.py b/sdks/python/apache_beam/testing/benchmarks/inference/tensorflow_mnist_classification_cost_benchmark.py index f7e12dcead03..223b973e5fbe 100644 --- a/sdks/python/apache_beam/testing/benchmarks/inference/tensorflow_mnist_classification_cost_benchmark.py +++ b/sdks/python/apache_beam/testing/benchmarks/inference/tensorflow_mnist_classification_cost_benchmark.py @@ -31,7 +31,7 @@ def test(self): extra_opts['input'] = self.pipeline.get_option('input_file') extra_opts['output'] = self.pipeline.get_option('output_file') extra_opts['model_path'] = self.pipeline.get_option('model') - tensorflow_mnist_classification.run( + self.result = tensorflow_mnist_classification.run( self.pipeline.get_full_options_as_args(**extra_opts), save_main_session=False)