From 758de3323b56e28615255dcbd7ab4c53c89e16f6 Mon Sep 17 00:00:00 2001 From: riteshghorse Date: Wed, 7 Dec 2022 18:40:23 +0530 Subject: [PATCH 01/45] go lints --- sdks/go/pkg/beam/core/graph/xlang.go | 2 +- sdks/go/pkg/beam/core/typex/special.go | 12 +++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/sdks/go/pkg/beam/core/graph/xlang.go b/sdks/go/pkg/beam/core/graph/xlang.go index 8d2d0b324eff..eeb86f6464d6 100644 --- a/sdks/go/pkg/beam/core/graph/xlang.go +++ b/sdks/go/pkg/beam/core/graph/xlang.go @@ -52,7 +52,7 @@ func init() { // unnecesary proto related imports into graph. type ExpandedTransform struct { Components any // *pipepb.Components - Transform any //*pipepb.PTransform + Transform any // *pipepb.PTransform Requirements []string } diff --git a/sdks/go/pkg/beam/core/typex/special.go b/sdks/go/pkg/beam/core/typex/special.go index 067cef1fb4fd..935371225848 100644 --- a/sdks/go/pkg/beam/core/typex/special.go +++ b/sdks/go/pkg/beam/core/typex/special.go @@ -74,15 +74,21 @@ type BundleFinalization interface { RegisterCallback(time.Duration, func() error) } +// PaneTiming defines the pane timing in byte. type PaneTiming byte const ( - PaneEarly PaneTiming = 0 - PaneOnTime PaneTiming = 1 - PaneLate PaneTiming = 2 + // PaneEarly defines early pane timing. + PaneEarly PaneTiming = 0 + // PaneOnTime defines on-time pane timing. + PaneOnTime PaneTiming = 1 + // PaneLate defines late pane timing. + PaneLate PaneTiming = 2 + // PaneUnknown defines unknown pane timing. PaneUnknown PaneTiming = 3 ) +// PaneInfo represents the output pane. type PaneInfo struct { Timing PaneTiming IsFirst, IsLast bool From cb284ecac304714ca5246f83aefad07200bc987b Mon Sep 17 00:00:00 2001 From: riteshghorse Date: Tue, 7 Feb 2023 13:59:53 -0500 Subject: [PATCH 02/45] added tf model handler and tests --- .../tensorflow_mnist_classification.py | 113 +++++++++++ .../ml/inference/tensorflow_inference.py | 189 ++++++++++++++++++ .../inference/tensorflow_inference_it_test.py | 85 ++++++++ .../ml/inference/tensorflow_inference_test.py | 111 ++++++++++ 4 files changed, 498 insertions(+) create mode 100644 sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py create mode 100644 sdks/python/apache_beam/ml/inference/tensorflow_inference.py create mode 100644 sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py create mode 100644 sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py diff --git a/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py b/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py new file mode 100644 index 000000000000..a39b3fc6733e --- /dev/null +++ b/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py @@ -0,0 +1,113 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +import argparse +import logging +from typing import Iterable, List, Tuple + +import numpy + +import apache_beam as beam +from apache_beam.ml.inference.base import KeyedModelHandler, PredictionResult, RunInference +from apache_beam.ml.inference.tensorflow_inference import TFModelHandlerNumpy +from apache_beam.options.pipeline_options import PipelineOptions, SetupOptions + +from apache_beam.runners.runner import PipelineResult + + +def process_input(row: str) -> Tuple[int, numpy.ndarray]: + data = row.split(',') + label, pixels = int(data[0]), data[1:] + pixels = [int(pixel) for pixel in pixels] + # the trained model accepts the input of shape 28x28x1 + pixels = numpy.array(pixels).reshape(28, 28, 1) + return label, pixels + + +class PostProcessor(beam.DoFn): + """Process the PredictionResult to get the predicted label. + Returns a comma separated string with true label and predicted label. + """ + def process(self, element: Tuple[int, PredictionResult]) -> Iterable[str]: + label, prediction_result = element + prediction = numpy.argmax(prediction_result.inference, axis=0) + yield '{},{}'.format(label, prediction) + +def parse_known_args(argv): + """Parses args for the workflow.""" + parser = argparse.ArgumentParser() + parser.add_argument( + '--input', + dest='input', + required=True, + help='text file with comma separated int values.') + parser.add_argument( + '--output', + dest='output', + required=True, + help='Path to save output predictions.') + parser.add_argument( + '--model_path', + dest='model_path', + required=True, + help='Path to load the Tensorflow model for Inference.') + return parser.parse_known_args(argv) + + +def run( + argv=None, save_main_session=True, test_pipeline=None) -> PipelineResult: + """ + Args: + argv: Command line arguments defined for this example. + save_main_session: Used for internal testing. + test_pipeline: Used for internal testing. + """ + known_args, pipeline_args = parse_known_args(argv) + pipeline_options = PipelineOptions(pipeline_args) + pipeline_options.view_as(SetupOptions).save_main_session = save_main_session + + # In this example we pass keyed inputs to RunInference transform. + # Therefore, we use KeyedModelHandler wrapper over TFModelHandlerNumpy. + model_loader = KeyedModelHandler( + TFModelHandlerNumpy(model_uri=known_args.model_path)) + + pipeline = test_pipeline + if not test_pipeline: + pipeline = beam.Pipeline(options=pipeline_options) + + label_pixel_tuple = ( + pipeline + | "ReadFromInput" >> beam.io.ReadFromText(known_args.input) + | "PreProcessInputs" >> beam.Map(process_input)) + + predictions = ( + label_pixel_tuple + | "RunInference" >> RunInference(model_loader) + | "PostProcessOutputs" >> beam.ParDo(PostProcessor())) + + _ = predictions | "WriteOutput" >> beam.io.WriteToText( + known_args.output, shard_name_template='', append_trailing_newlines=True) + + result = pipeline.run() + result.wait_until_finish() + return result + + +if __name__ == '__main__': + logging.getLogger().setLevel(logging.INFO) + run() \ No newline at end of file diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py new file mode 100644 index 000000000000..c933423843b9 --- /dev/null +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py @@ -0,0 +1,189 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# pytype: skip-file + +import logging +from collections import defaultdict +from typing import Any +from typing import Callable +from typing import Dict +from typing import Iterable +from typing import Optional +from typing import Sequence +from typing import Union + +import sys +from apache_beam.ml.inference import utils +import tensorflow as tf +import numpy +from apache_beam.io.filesystems import FileSystems +from apache_beam.ml.inference.base import ModelHandler +from apache_beam.ml.inference.base import PredictionResult +from apache_beam.utils.annotations import experimental + +__all__ = [ + 'TFModelHandlerNumpy', + 'TFModelHandlerTensor', +] + +TensorInferenceFn = Callable[ + [tf.Module, Sequence[numpy.ndarray], Optional[Dict[str, Any]], Optional[str]], + Iterable[PredictionResult]] + +def _load_model(model_uri): + return tf.keras.models.load_model(model_uri) + + +def default_numpy_inference_fn( + model: tf.Module, + batch: Sequence[numpy.ndarray], + inference_args: Optional[Dict[str,Any]] = None, + model_id: Optional[str] = None) -> Iterable[PredictionResult]: + vectorized_batch = numpy.stack(batch, axis=0) + return utils._convert_to_result(batch, model.predict(vectorized_batch), model_id) + + +def default_tensor_inference_fn( + model: tf.Module, + batch: Sequence[tf.Tensor], + inference_args: Optional[Dict[str,Any]] = None, + model_id: Optional[str] = None) -> Iterable[PredictionResult]: + vectorized_batch = tf.stack(batch, axis=0) + return utils._convert_to_result(batch, model.predict(vectorized_batch), model_id) + +class TFModelHandlerNumpy(ModelHandler[numpy.ndarray, + PredictionResult, + tf.Module]): + def __init__( + self, + model_uri: str, + *, + inference_fn: TensorInferenceFn = default_numpy_inference_fn): + self._model_uri = model_uri + self._inference_fn = inference_fn + + def load_model(self) -> tf.Module: + """Loads and initializes a Tensorflow model for processing.""" + return _load_model(self._model_uri) + + def update_model_path(self, model_path: Optional[str] = None): + self._model_uri = model_path if model_path else self._model_uri + + def run_inference( + self, + batch: Sequence[numpy.ndarray], + model: tf.Module, + inference_args: Optional[Dict[str, Any]] = None + ) -> Iterable[PredictionResult]: + """ + Runs inferences on a batch of numpy array and returns an Iterable of + numpy array Predictions. + + This method stacks the n-dimensional np-array in a vectorized format to optimize + the inference call. + + Args: + batch: A sequence of numpy nd-array. These should be batchable, as this + method will call `numpy.stack()` and pass in batched numpy nd-array with + dimensions (batch_size, n_features, etc.) into the model's forward() + function. + model: A TF model. + inference_args: any additional arguments for an inference. + + Returns: + An Iterable of type PredictionResult. + """ + return self._inference_fn(model, batch, inference_args, self._model_uri) + + def get_num_bytes(self, batch: Sequence[numpy.ndarray]) -> int: + """ + Returns: + The number of bytes of data for a batch of numpy arrays. + """ + return sum(sys.getsizeof(element) for element in batch) + + def get_metrics_namespace(self) -> str: + """ + Returns: + A namespace for metrics collected by the RunInference transform. + """ + return 'BeamML_TF_Numpy' + + def validate_inference_args(self, inference_args: Optional[Dict[str, Any]]): + pass + + +class TFModelHandlerTensor(ModelHandler[tf.Tensor, + PredictionResult, + tf.Module]): + def __init__( + self, + model_uri: str, + *, + inference_fn: TensorInferenceFn = default_tensor_inference_fn): + self._model_uri = model_uri + self._inference_fn = inference_fn + + def load_model(self) -> tf.Module: + """Loads and initializes a tensorflow model for processing.""" + return _load_model(self._model_uri) + + def update_model_path(self, model_path: Optional[str] = None): + self._model_uri = model_path if model_path else self._model_uri + + def run_inference( + self, + batch: Sequence[tf.Tensor], + model: tf.Module, + inference_args: Optional[Dict[str, Any]] = None, + ) -> Iterable[PredictionResult]: + """ + Runs inferences on a batch of tf.Tensor and returns an Iterable of + Tensor Predictions. + This method stacks the list of Tensors in a vectorized format to optimize + the inference call. + Args: + batch: A sequence of Tensors. These Tensors should be batchable, as this + method will call `tf.stack()` and pass in batched Tensors with + dimensions (batch_size, n_features, etc.) into the model's forward() + function. + model: A Tensorflow model. + inference_args: Non-batchable arguments required as inputs to the model's + forward() function. Unlike Tensors in `batch`, these parameters will + not be dynamically batched + Returns: + An Iterable of type PredictionResult. + """ + return self._inference_fn(model, batch, inference_args, self._model_uri) + + def get_num_bytes(self, batch: Sequence[tf.Tensor]) -> int: + """ + Returns: + The number of bytes of data for a batch of Tensors. + """ + return sum(sys.getsizeof(element) for element in batch) + + def get_metrics_namespace(self) -> str: + """ + Returns: + A namespace for metrics collected by the RunInference transform. + """ + return 'BeamML_TF_Tensors' + + def validate_inference_args(self, inference_args: Optional[Dict[str, Any]]): + pass \ No newline at end of file diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py new file mode 100644 index 000000000000..fcd8dda0875c --- /dev/null +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py @@ -0,0 +1,85 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""End-to-End test for Tensorflow Inference""" + +from cgi import test +from typing import Tuple +import logging +from typing import List +import unittest +import uuid + + +import pytest + +import apache_beam as beam +from apache_beam.examples.inference import tensorflow_mnist_classification +from apache_beam.io.filesystems import FileSystems + +from apache_beam.testing.test_pipeline import TestPipeline + + +def process_outputs(filepath): + with FileSystems().open(filepath) as f: + lines = f.readlines() + lines = [l.decode('utf-8').strip('\n') for l in lines] + return lines + + +class TensorflowInference(unittest.TestCase): + def process_input(self, row: str) -> Tuple[int, List[int]]: + data = row.split(',') + label, pixels = int(data[0]), data[1:] + pixels = [int(pixel) for pixel in pixels] + return label, pixels + + + def test_tf_mnist_classification(self): + test_pipeline = TestPipeline(is_integration_test=True) + input_file = 'gs://clouddfe-riteshghorse/tf/mnist/dataset/testing_inputs_it_mnist_data.csv' + output_file_dir = 'gs://clouddfe-riteshghorse/tf/mnist/output/' + output_file = '/'.join([output_file_dir, str(uuid.uuid4()), 'result.txt']) + model_path = 'gs://clouddfe-riteshghorse/tf/mnist/model/' + extra_opts = { + 'input': input_file, + 'output': output_file, + 'model_path': model_path, + } + tensorflow_mnist_classification.run( + test_pipeline.get_full_options_as_args(**extra_opts), + save_main_session=False) + self.assertEqual(FileSystems().exists(output_file), True) + + expected_output_filepath = 'gs://clouddfe-riteshghorse/tf/mnist/output/testing_expected_outputs_test_sklearn_mnist_classification_actuals.txt' # pylint: disable=line-too-long + expected_outputs = process_outputs(expected_output_filepath) + + predicted_outputs = process_outputs(output_file) + self.assertEqual(len(expected_outputs), len(predicted_outputs)) + + predictions_dict = {} + for i in range(len(predicted_outputs)): + true_label, prediction = predicted_outputs[i].split(',') + predictions_dict[true_label] = prediction + + for i in range(len(expected_outputs)): + true_label, expected_prediction = expected_outputs[i].split(',') + self.assertEqual(predictions_dict[true_label], expected_prediction) + +if __name__ == '__main__': + logging.getLogger().setLevel(logging.DEBUG) + unittest.main() \ No newline at end of file diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py new file mode 100644 index 000000000000..b6fce47aee6c --- /dev/null +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py @@ -0,0 +1,111 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# pytype: skip-file + +import unittest +from apache_beam.examples import inference +from apache_beam.ml.inference.sklearn_inference_test import compare_prediction_result + + +import numpy +import tensorflow as tf + + +from apache_beam.ml.inference.base import KeyedModelHandler, PredictionResult +from apache_beam.ml.inference.tensorflow_inference import TFModelHandlerNumpy, TFModelHandlerTensor + +class FakeTFNumpyModel: + def predict(self, input: numpy.ndarray): + return numpy.multiply(input, 10) + + +class FakeTFTensorModel: + def predict(self, input: tf.Tensor): + return tf.math.multiply(input, 10) + + +def _compare_tensor_prediction_result(x, y): + return tf.math.equal(x.inference, y.inference) + + +class TFRunInferenceTest(unittest.TestCase): + def test_predict_numpy(self): + fake_model = FakeTFNumpyModel() + inference_runner = TFModelHandlerNumpy(model_uri='unused') + batched_examples = [ + numpy.array([1]), numpy.array([10]), numpy.array([100]) + ] + expected_predictions = [ + PredictionResult(numpy.array([1]), 10), + PredictionResult(numpy.array([10]), 100), + PredictionResult(numpy.array([100]), 1000) + ] + inferences = inference_runner.run_inference(batched_examples, fake_model) + for actual, expected in zip(inferences, expected_predictions): + self.assertTrue(compare_prediction_result(actual, expected)) + + + def test_predict_tensor(self): + fake_model = FakeTFTensorModel() + inference_runner = TFModelHandlerTensor(model_uri='unused') + batched_examples = [ + tf.convert_to_tensor(numpy.array([1])), + tf.convert_to_tensor(numpy.array([10])), + tf.convert_to_tensor(numpy.array([100])), + ] + expected_predictions = [ + PredictionResult(ex, pred) for ex, pred in zip(batched_examples, [tf.math.multiply(n, 10) for n in batched_examples]) + ] + + inferences = inference_runner.run_inference(batched_examples, fake_model) + for actual, expected in zip(inferences, expected_predictions): + self.assertTrue(_compare_tensor_prediction_result(actual, expected)) + + + def test_predict_keyed_numpy(self): + fake_model = FakeTFNumpyModel() + inference_runner = KeyedModelHandler(TFModelHandlerNumpy(model_uri='unused')) + batched_examples = [ + ('k1', numpy.array([1], dtype=numpy.int64)), + ('k2', numpy.array([10], dtype=numpy.int64)), + ('k3', numpy.array([100], dtype=numpy.int64)), + ] + expected_predictions = [ + (ex[0],PredictionResult(ex[1], pred)) for ex, pred in zip(batched_examples, [numpy.multiply(n[1], 10) for n in batched_examples]) + ] + inferences = inference_runner.run_inference(batched_examples, fake_model) + for actual, expected in zip(inferences, expected_predictions): + self.assertTrue(compare_prediction_result(actual[1], expected[1])) + + def test_predict_keyed_tensor(self): + fake_model = FakeTFTensorModel() + inference_runner = KeyedModelHandler(TFModelHandlerTensor(model_uri='unused')) + batched_examples = [ + ('k1', tf.convert_to_tensor(numpy.array([1]))), + ('k2', tf.convert_to_tensor(numpy.array([10]))), + ('k3', tf.convert_to_tensor(numpy.array([100]))), + ] + expected_predictions = [ + (ex[0],PredictionResult(ex[1], pred)) for ex, pred in zip(batched_examples, [tf.math.multiply(n[1], 10) for n in batched_examples]) + ] + inferences = inference_runner.run_inference(batched_examples, fake_model) + for actual, expected in zip(inferences, expected_predictions): + self.assertTrue(_compare_tensor_prediction_result(actual[1], expected[1])) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file From 81932831579fa2dec66893edb70f7b66da8fd21d Mon Sep 17 00:00:00 2001 From: riteshghorse Date: Tue, 7 Feb 2023 15:23:37 -0500 Subject: [PATCH 03/45] lint and formatting changes --- .../tensorflow_mnist_classification.py | 6 +-- .../ml/inference/tensorflow_inference.py | 39 ++++++++-------- .../inference/tensorflow_inference_it_test.py | 31 +++++++------ .../ml/inference/tensorflow_inference_test.py | 46 +++++++++++-------- 4 files changed, 68 insertions(+), 54 deletions(-) diff --git a/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py b/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py index a39b3fc6733e..4ec0de7d7de9 100644 --- a/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py +++ b/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py @@ -18,7 +18,7 @@ import argparse import logging -from typing import Iterable, List, Tuple +from typing import Iterable, Tuple import numpy @@ -26,7 +26,6 @@ from apache_beam.ml.inference.base import KeyedModelHandler, PredictionResult, RunInference from apache_beam.ml.inference.tensorflow_inference import TFModelHandlerNumpy from apache_beam.options.pipeline_options import PipelineOptions, SetupOptions - from apache_beam.runners.runner import PipelineResult @@ -110,4 +109,5 @@ def run( if __name__ == '__main__': logging.getLogger().setLevel(logging.INFO) - run() \ No newline at end of file + run() + \ No newline at end of file diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py index c933423843b9..0f02f5a8ffc2 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py @@ -17,24 +17,21 @@ # pytype: skip-file -import logging -from collections import defaultdict from typing import Any from typing import Callable from typing import Dict from typing import Iterable from typing import Optional from typing import Sequence -from typing import Union import sys -from apache_beam.ml.inference import utils -import tensorflow as tf import numpy -from apache_beam.io.filesystems import FileSystems +import tensorflow as tf + +from apache_beam.ml.inference import utils from apache_beam.ml.inference.base import ModelHandler from apache_beam.ml.inference.base import PredictionResult -from apache_beam.utils.annotations import experimental + __all__ = [ 'TFModelHandlerNumpy', @@ -42,7 +39,9 @@ ] TensorInferenceFn = Callable[ - [tf.Module, Sequence[numpy.ndarray], Optional[Dict[str, Any]], Optional[str]], + [tf.Module, Sequence[numpy.ndarray], + Optional[Dict[str, Any]], + Optional[str]], Iterable[PredictionResult]] def _load_model(model_uri): @@ -55,7 +54,8 @@ def default_numpy_inference_fn( inference_args: Optional[Dict[str,Any]] = None, model_id: Optional[str] = None) -> Iterable[PredictionResult]: vectorized_batch = numpy.stack(batch, axis=0) - return utils._convert_to_result(batch, model.predict(vectorized_batch), model_id) + return utils._convert_to_result(batch, model.predict(vectorized_batch), + model_id) def default_tensor_inference_fn( @@ -64,7 +64,9 @@ def default_tensor_inference_fn( inference_args: Optional[Dict[str,Any]] = None, model_id: Optional[str] = None) -> Iterable[PredictionResult]: vectorized_batch = tf.stack(batch, axis=0) - return utils._convert_to_result(batch, model.predict(vectorized_batch), model_id) + return utils._convert_to_result(batch, model.predict(vectorized_batch), + model_id) + class TFModelHandlerNumpy(ModelHandler[numpy.ndarray, PredictionResult, @@ -83,7 +85,7 @@ def load_model(self) -> tf.Module: def update_model_path(self, model_path: Optional[str] = None): self._model_uri = model_path if model_path else self._model_uri - + def run_inference( self, batch: Sequence[numpy.ndarray], @@ -94,14 +96,14 @@ def run_inference( Runs inferences on a batch of numpy array and returns an Iterable of numpy array Predictions. - This method stacks the n-dimensional np-array in a vectorized format to optimize - the inference call. + This method stacks the n-dimensional np-array in a vectorized format to + optimize the inference call. Args: batch: A sequence of numpy nd-array. These should be batchable, as this - method will call `numpy.stack()` and pass in batched numpy nd-array with - dimensions (batch_size, n_features, etc.) into the model's forward() - function. + method will call `numpy.stack()` and pass in batched numpy nd-array + with dimensions (batch_size, n_features, etc.) into the model's + forward() function. model: A TF model. inference_args: any additional arguments for an inference. @@ -145,7 +147,7 @@ def load_model(self) -> tf.Module: def update_model_path(self, model_path: Optional[str] = None): self._model_uri = model_path if model_path else self._model_uri - + def run_inference( self, batch: Sequence[tf.Tensor], @@ -186,4 +188,5 @@ def get_metrics_namespace(self) -> str: return 'BeamML_TF_Tensors' def validate_inference_args(self, inference_args: Optional[Dict[str, Any]]): - pass \ No newline at end of file + pass + \ No newline at end of file diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py index fcd8dda0875c..ef41bc5b50b9 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py @@ -17,22 +17,22 @@ """End-to-End test for Tensorflow Inference""" -from cgi import test -from typing import Tuple import logging + +from typing import Tuple from typing import List + import unittest import uuid - -import pytest - -import apache_beam as beam -from apache_beam.examples.inference import tensorflow_mnist_classification from apache_beam.io.filesystems import FileSystems - from apache_beam.testing.test_pipeline import TestPipeline +try: + import tensorflow as tf + from apache_beam.examples.inference import tensorflow_mnist_classification +except ImportError as e: + tf = None def process_outputs(filepath): with FileSystems().open(filepath) as f: @@ -41,17 +41,21 @@ def process_outputs(filepath): return lines +@unittest.skipIf( + tf is None, + 'Missing dependencies. ' + 'Test depends on tensorflow') class TensorflowInference(unittest.TestCase): def process_input(self, row: str) -> Tuple[int, List[int]]: data = row.split(',') label, pixels = int(data[0]), data[1:] pixels = [int(pixel) for pixel in pixels] return label, pixels - - + + def test_tf_mnist_classification(self): test_pipeline = TestPipeline(is_integration_test=True) - input_file = 'gs://clouddfe-riteshghorse/tf/mnist/dataset/testing_inputs_it_mnist_data.csv' + input_file = 'gs://clouddfe-riteshghorse/tf/mnist/dataset/testing_inputs_it_mnist_data.csv' # pylint: disable=line-too-long output_file_dir = 'gs://clouddfe-riteshghorse/tf/mnist/output/' output_file = '/'.join([output_file_dir, str(uuid.uuid4()), 'result.txt']) model_path = 'gs://clouddfe-riteshghorse/tf/mnist/model/' @@ -64,7 +68,7 @@ def test_tf_mnist_classification(self): test_pipeline.get_full_options_as_args(**extra_opts), save_main_session=False) self.assertEqual(FileSystems().exists(output_file), True) - + expected_output_filepath = 'gs://clouddfe-riteshghorse/tf/mnist/output/testing_expected_outputs_test_sklearn_mnist_classification_actuals.txt' # pylint: disable=line-too-long expected_outputs = process_outputs(expected_output_filepath) @@ -82,4 +86,5 @@ def test_tf_mnist_classification(self): if __name__ == '__main__': logging.getLogger().setLevel(logging.DEBUG) - unittest.main() \ No newline at end of file + unittest.main() + \ No newline at end of file diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py index b6fce47aee6c..1a5d4f06885f 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py @@ -18,26 +18,24 @@ # pytype: skip-file import unittest -from apache_beam.examples import inference -from apache_beam.ml.inference.sklearn_inference_test import compare_prediction_result import numpy import tensorflow as tf - +from apache_beam.ml.inference.sklearn_inference_test import compare_prediction_result from apache_beam.ml.inference.base import KeyedModelHandler, PredictionResult from apache_beam.ml.inference.tensorflow_inference import TFModelHandlerNumpy, TFModelHandlerTensor class FakeTFNumpyModel: - def predict(self, input: numpy.ndarray): - return numpy.multiply(input, 10) + def predict(self, input: numpy.ndarray): + return numpy.multiply(input, 10) class FakeTFTensorModel: def predict(self, input: tf.Tensor): return tf.math.multiply(input, 10) - + def _compare_tensor_prediction_result(x, y): return tf.math.equal(x.inference, y.inference) @@ -58,54 +56,62 @@ def test_predict_numpy(self): inferences = inference_runner.run_inference(batched_examples, fake_model) for actual, expected in zip(inferences, expected_predictions): self.assertTrue(compare_prediction_result(actual, expected)) - - + + def test_predict_tensor(self): fake_model = FakeTFTensorModel() inference_runner = TFModelHandlerTensor(model_uri='unused') batched_examples = [ tf.convert_to_tensor(numpy.array([1])), tf.convert_to_tensor(numpy.array([10])), - tf.convert_to_tensor(numpy.array([100])), + tf.convert_to_tensor(numpy.array([100])), ] expected_predictions = [ - PredictionResult(ex, pred) for ex, pred in zip(batched_examples, [tf.math.multiply(n, 10) for n in batched_examples]) + PredictionResult(ex, pred) + for ex, pred in zip(batched_examples, + [tf.math.multiply(n, 10) for n in batched_examples]) ] - + inferences = inference_runner.run_inference(batched_examples, fake_model) for actual, expected in zip(inferences, expected_predictions): self.assertTrue(_compare_tensor_prediction_result(actual, expected)) - - + + def test_predict_keyed_numpy(self): fake_model = FakeTFNumpyModel() - inference_runner = KeyedModelHandler(TFModelHandlerNumpy(model_uri='unused')) + inference_runner = KeyedModelHandler( + TFModelHandlerNumpy(model_uri='unused')) batched_examples = [ ('k1', numpy.array([1], dtype=numpy.int64)), ('k2', numpy.array([10], dtype=numpy.int64)), ('k3', numpy.array([100], dtype=numpy.int64)), ] expected_predictions = [ - (ex[0],PredictionResult(ex[1], pred)) for ex, pred in zip(batched_examples, [numpy.multiply(n[1], 10) for n in batched_examples]) + (ex[0],PredictionResult(ex[1], pred)) + for ex, pred in zip(batched_examples, + [numpy.multiply(n[1], 10) for n in batched_examples]) ] inferences = inference_runner.run_inference(batched_examples, fake_model) for actual, expected in zip(inferences, expected_predictions): self.assertTrue(compare_prediction_result(actual[1], expected[1])) - + def test_predict_keyed_tensor(self): fake_model = FakeTFTensorModel() - inference_runner = KeyedModelHandler(TFModelHandlerTensor(model_uri='unused')) + inference_runner = KeyedModelHandler( + TFModelHandlerTensor(model_uri='unused')) batched_examples = [ ('k1', tf.convert_to_tensor(numpy.array([1]))), ('k2', tf.convert_to_tensor(numpy.array([10]))), ('k3', tf.convert_to_tensor(numpy.array([100]))), ] expected_predictions = [ - (ex[0],PredictionResult(ex[1], pred)) for ex, pred in zip(batched_examples, [tf.math.multiply(n[1], 10) for n in batched_examples]) + (ex[0],PredictionResult(ex[1], pred)) + for ex, pred in zip(batched_examples, + [tf.math.multiply(n[1], 10) for n in batched_examples]) ] inferences = inference_runner.run_inference(batched_examples, fake_model) for actual, expected in zip(inferences, expected_predictions): self.assertTrue(_compare_tensor_prediction_result(actual[1], expected[1])) - + if __name__ == '__main__': - unittest.main() \ No newline at end of file + unittest.main() From d1eb67c38b86367ed6cc3ccd154fbcf106612750 Mon Sep 17 00:00:00 2001 From: riteshghorse Date: Tue, 7 Feb 2023 16:56:52 -0500 Subject: [PATCH 04/45] correct lints --- .../ml/inference/tensorflow_inference.py | 34 ++++++++----------- .../inference/tensorflow_inference_it_test.py | 9 +++-- .../ml/inference/tensorflow_inference_test.py | 12 ++++--- 3 files changed, 27 insertions(+), 28 deletions(-) diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py index 0f02f5a8ffc2..457a582b643d 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py @@ -32,17 +32,15 @@ from apache_beam.ml.inference.base import ModelHandler from apache_beam.ml.inference.base import PredictionResult - __all__ = [ 'TFModelHandlerNumpy', 'TFModelHandlerTensor', ] -TensorInferenceFn = Callable[ - [tf.Module, Sequence[numpy.ndarray], - Optional[Dict[str, Any]], - Optional[str]], - Iterable[PredictionResult]] +TensorInferenceFn = Callable[[ + tf.Module, Sequence[numpy.ndarray], Optional[Dict[str, Any]], Optional[str] +], + Iterable[PredictionResult]] def _load_model(model_uri): return tf.keras.models.load_model(model_uri) @@ -51,26 +49,26 @@ def _load_model(model_uri): def default_numpy_inference_fn( model: tf.Module, batch: Sequence[numpy.ndarray], - inference_args: Optional[Dict[str,Any]] = None, + inference_args: Optional[Dict[str, Any]] = None, model_id: Optional[str] = None) -> Iterable[PredictionResult]: vectorized_batch = numpy.stack(batch, axis=0) - return utils._convert_to_result(batch, model.predict(vectorized_batch), - model_id) + return utils._convert_to_result( + batch, model.predict(vectorized_batch), model_id) def default_tensor_inference_fn( model: tf.Module, batch: Sequence[tf.Tensor], - inference_args: Optional[Dict[str,Any]] = None, + inference_args: Optional[Dict[str, Any]] = None, model_id: Optional[str] = None) -> Iterable[PredictionResult]: vectorized_batch = tf.stack(batch, axis=0) - return utils._convert_to_result(batch, model.predict(vectorized_batch), - model_id) + return utils._convert_to_result( + batch, model.predict(vectorized_batch), model_id) class TFModelHandlerNumpy(ModelHandler[numpy.ndarray, - PredictionResult, - tf.Module]): + PredictionResult, + tf.Module]): def __init__( self, model_uri: str, @@ -130,9 +128,8 @@ def validate_inference_args(self, inference_args: Optional[Dict[str, Any]]): pass -class TFModelHandlerTensor(ModelHandler[tf.Tensor, - PredictionResult, - tf.Module]): +class TFModelHandlerTensor(ModelHandler[tf.Tensor, PredictionResult, + tf.Module]): def __init__( self, model_uri: str, @@ -188,5 +185,4 @@ def get_metrics_namespace(self) -> str: return 'BeamML_TF_Tensors' def validate_inference_args(self, inference_args: Optional[Dict[str, Any]]): - pass - \ No newline at end of file + pass \ No newline at end of file diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py index ef41bc5b50b9..d5dc3bea3167 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py @@ -34,6 +34,7 @@ except ImportError as e: tf = None + def process_outputs(filepath): with FileSystems().open(filepath) as f: lines = f.readlines() @@ -42,8 +43,7 @@ def process_outputs(filepath): @unittest.skipIf( - tf is None, - 'Missing dependencies. ' + tf is None, 'Missing dependencies. ' 'Test depends on tensorflow') class TensorflowInference(unittest.TestCase): def process_input(self, row: str) -> Tuple[int, List[int]]: @@ -52,7 +52,6 @@ def process_input(self, row: str) -> Tuple[int, List[int]]: pixels = [int(pixel) for pixel in pixels] return label, pixels - def test_tf_mnist_classification(self): test_pipeline = TestPipeline(is_integration_test=True) input_file = 'gs://clouddfe-riteshghorse/tf/mnist/dataset/testing_inputs_it_mnist_data.csv' # pylint: disable=line-too-long @@ -84,7 +83,7 @@ def test_tf_mnist_classification(self): true_label, expected_prediction = expected_outputs[i].split(',') self.assertEqual(predictions_dict[true_label], expected_prediction) + if __name__ == '__main__': logging.getLogger().setLevel(logging.DEBUG) - unittest.main() - \ No newline at end of file + unittest.main() \ No newline at end of file diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py index 1a5d4f06885f..0e53ee058c34 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py @@ -21,11 +21,15 @@ import numpy -import tensorflow as tf -from apache_beam.ml.inference.sklearn_inference_test import compare_prediction_result -from apache_beam.ml.inference.base import KeyedModelHandler, PredictionResult -from apache_beam.ml.inference.tensorflow_inference import TFModelHandlerNumpy, TFModelHandlerTensor +try: + import tensorflow as tf + from apache_beam.ml.inference.sklearn_inference_test import compare_prediction_result + from apache_beam.ml.inference.base import KeyedModelHandler, PredictionResult + from apache_beam.ml.inference.tensorflow_inference import TFModelHandlerNumpy, TFModelHandlerTensor +except ImportError: + raise unittest.SkipTest('PyTorch dependencies are not installed') + class FakeTFNumpyModel: def predict(self, input: numpy.ndarray): From 5fb5cbbc4105097ec7197824099cafe6089c9270 Mon Sep 17 00:00:00 2001 From: riteshghorse Date: Tue, 7 Feb 2023 17:25:06 -0500 Subject: [PATCH 05/45] more lints and formats --- .../ml/inference/tensorflow_inference.py | 48 +++++++++++++++++-- .../inference/tensorflow_inference_it_test.py | 13 +++-- .../ml/inference/tensorflow_inference_test.py | 13 ++--- 3 files changed, 56 insertions(+), 18 deletions(-) diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py index 457a582b643d..d7dd75b43431 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py @@ -17,6 +17,7 @@ # pytype: skip-file +from cmath import inf from typing import Any from typing import Callable from typing import Dict @@ -74,6 +75,22 @@ def __init__( model_uri: str, *, inference_fn: TensorInferenceFn = default_numpy_inference_fn): + """Implementation of the ModelHandler interface for Tensorflow. + + Example Usage:: + + pcoll | RunInference(TFModelHandlerNumpy(model_uri="my_uri")) + + See https://www.tensorflow.org/tutorials/keras/save_and_load for details. + + Args: + model_uri (str): path to the trained model. + inference_fn (TensorInferenceFn, optional): inference function to use + during RunInference. Defaults to default_numpy_inference_fn. + + **Supported Versions:** RunInference APIs in Apache Beam have been tested + with Tensorflow 2.11. + """ self._model_uri = model_uri self._inference_fn = inference_fn @@ -94,20 +111,22 @@ def run_inference( Runs inferences on a batch of numpy array and returns an Iterable of numpy array Predictions. - This method stacks the n-dimensional np-array in a vectorized format to + This method stacks the n-dimensional numpy array in a vectorized format to optimize the inference call. Args: batch: A sequence of numpy nd-array. These should be batchable, as this method will call `numpy.stack()` and pass in batched numpy nd-array with dimensions (batch_size, n_features, etc.) into the model's - forward() function. - model: A TF model. + predict() function. + model: A Tensorflow model. inference_args: any additional arguments for an inference. Returns: An Iterable of type PredictionResult. """ + inference_args = {} if not inference_args else inference_args + return self._inference_fn(model, batch, inference_args, self._model_uri) def get_num_bytes(self, batch: Sequence[numpy.ndarray]) -> int: @@ -135,6 +154,22 @@ def __init__( model_uri: str, *, inference_fn: TensorInferenceFn = default_tensor_inference_fn): + """Implementation of the ModelHandler interface for Tensorflow. + + Example Usage:: + + pcoll | RunInference(TFModelHandlerTensor(model_uri="my_uri")) + + See https://www.tensorflow.org/tutorials/keras/save_and_load for details. + + Args: + model_uri (str): path to the trained model. + inference_fn (TensorInferenceFn, optional): inference function to use + during RunInference. Defaults to default_numpy_inference_fn. + + **Supported Versions:** RunInference APIs in Apache Beam have been tested + with Tensorflow 2.11. + """ self._model_uri = model_uri self._inference_fn = inference_fn @@ -154,12 +189,14 @@ def run_inference( """ Runs inferences on a batch of tf.Tensor and returns an Iterable of Tensor Predictions. + This method stacks the list of Tensors in a vectorized format to optimize the inference call. + Args: batch: A sequence of Tensors. These Tensors should be batchable, as this method will call `tf.stack()` and pass in batched Tensors with - dimensions (batch_size, n_features, etc.) into the model's forward() + dimensions (batch_size, n_features, etc.) into the model's predict() function. model: A Tensorflow model. inference_args: Non-batchable arguments required as inputs to the model's @@ -185,4 +222,5 @@ def get_metrics_namespace(self) -> str: return 'BeamML_TF_Tensors' def validate_inference_args(self, inference_args: Optional[Dict[str, Any]]): - pass \ No newline at end of file + pass + \ No newline at end of file diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py index d5dc3bea3167..04eb5ab13cfa 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py @@ -22,12 +22,14 @@ from typing import Tuple from typing import List +import pytest import unittest import uuid from apache_beam.io.filesystems import FileSystems from apache_beam.testing.test_pipeline import TestPipeline +# pylint: disable=ungrouped-imports try: import tensorflow as tf from apache_beam.examples.inference import tensorflow_mnist_classification @@ -46,12 +48,8 @@ def process_outputs(filepath): tf is None, 'Missing dependencies. ' 'Test depends on tensorflow') class TensorflowInference(unittest.TestCase): - def process_input(self, row: str) -> Tuple[int, List[int]]: - data = row.split(',') - label, pixels = int(data[0]), data[1:] - pixels = [int(pixel) for pixel in pixels] - return label, pixels - + @pytest.mark.uses_tensorflow + @pytest.mark.it_postcommit def test_tf_mnist_classification(self): test_pipeline = TestPipeline(is_integration_test=True) input_file = 'gs://clouddfe-riteshghorse/tf/mnist/dataset/testing_inputs_it_mnist_data.csv' # pylint: disable=line-too-long @@ -86,4 +84,5 @@ def test_tf_mnist_classification(self): if __name__ == '__main__': logging.getLogger().setLevel(logging.DEBUG) - unittest.main() \ No newline at end of file + unittest.main() + \ No newline at end of file diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py index 0e53ee058c34..f48c8b510705 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py @@ -19,16 +19,16 @@ import unittest - import numpy +import pytest try: import tensorflow as tf - from apache_beam.ml.inference.sklearn_inference_test import compare_prediction_result + from apache_beam.ml.inference.sklearn_inference_test import _compare_prediction_result from apache_beam.ml.inference.base import KeyedModelHandler, PredictionResult from apache_beam.ml.inference.tensorflow_inference import TFModelHandlerNumpy, TFModelHandlerTensor except ImportError: - raise unittest.SkipTest('PyTorch dependencies are not installed') + raise unittest.SkipTest('Tensorflow dependencies are not installed') class FakeTFNumpyModel: @@ -59,9 +59,9 @@ def test_predict_numpy(self): ] inferences = inference_runner.run_inference(batched_examples, fake_model) for actual, expected in zip(inferences, expected_predictions): - self.assertTrue(compare_prediction_result(actual, expected)) - + self.assertTrue(_compare_prediction_result(actual, expected)) + @pytest.mark.uses_tensorflow def test_predict_tensor(self): fake_model = FakeTFTensorModel() inference_runner = TFModelHandlerTensor(model_uri='unused') @@ -97,8 +97,9 @@ def test_predict_keyed_numpy(self): ] inferences = inference_runner.run_inference(batched_examples, fake_model) for actual, expected in zip(inferences, expected_predictions): - self.assertTrue(compare_prediction_result(actual[1], expected[1])) + self.assertTrue(_compare_prediction_result(actual[1], expected[1])) + @pytest.mark.uses_tensorflow def test_predict_keyed_tensor(self): fake_model = FakeTFTensorModel() inference_runner = KeyedModelHandler( From e1ec1685541b69ad54c3c2b12ad67c55fda6057d Mon Sep 17 00:00:00 2001 From: riteshghorse Date: Tue, 7 Feb 2023 17:44:24 -0500 Subject: [PATCH 06/45] auto formatted with yapf --- .../tensorflow_mnist_classification.py | 4 +- .../ml/inference/tensorflow_inference.py | 10 +++-- .../inference/tensorflow_inference_it_test.py | 2 +- .../ml/inference/tensorflow_inference_test.py | 38 ++++++++++--------- 4 files changed, 30 insertions(+), 24 deletions(-) diff --git a/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py b/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py index 4ec0de7d7de9..c5d7cc539a9e 100644 --- a/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py +++ b/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py @@ -15,7 +15,6 @@ # limitations under the License. # - import argparse import logging from typing import Iterable, Tuple @@ -42,11 +41,13 @@ class PostProcessor(beam.DoFn): """Process the PredictionResult to get the predicted label. Returns a comma separated string with true label and predicted label. """ + def process(self, element: Tuple[int, PredictionResult]) -> Iterable[str]: label, prediction_result = element prediction = numpy.argmax(prediction_result.inference, axis=0) yield '{},{}'.format(label, prediction) + def parse_known_args(argv): """Parses args for the workflow.""" parser = argparse.ArgumentParser() @@ -110,4 +111,3 @@ def run( if __name__ == '__main__': logging.getLogger().setLevel(logging.INFO) run() - \ No newline at end of file diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py index d7dd75b43431..419e0a99023e 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py @@ -39,10 +39,11 @@ ] TensorInferenceFn = Callable[[ - tf.Module, Sequence[numpy.ndarray], Optional[Dict[str, Any]], Optional[str] + tf.Module, Sequence[numpy.ndarray], Optional[Dict[str, Any]], Optional[str] ], Iterable[PredictionResult]] + def _load_model(model_uri): return tf.keras.models.load_model(model_uri) @@ -53,7 +54,7 @@ def default_numpy_inference_fn( inference_args: Optional[Dict[str, Any]] = None, model_id: Optional[str] = None) -> Iterable[PredictionResult]: vectorized_batch = numpy.stack(batch, axis=0) - return utils._convert_to_result( + return utils._convert_to_result( batch, model.predict(vectorized_batch), model_id) @@ -70,6 +71,7 @@ def default_tensor_inference_fn( class TFModelHandlerNumpy(ModelHandler[numpy.ndarray, PredictionResult, tf.Module]): + def __init__( self, model_uri: str, @@ -126,7 +128,7 @@ def run_inference( An Iterable of type PredictionResult. """ inference_args = {} if not inference_args else inference_args - + return self._inference_fn(model, batch, inference_args, self._model_uri) def get_num_bytes(self, batch: Sequence[numpy.ndarray]) -> int: @@ -149,6 +151,7 @@ def validate_inference_args(self, inference_args: Optional[Dict[str, Any]]): class TFModelHandlerTensor(ModelHandler[tf.Tensor, PredictionResult, tf.Module]): + def __init__( self, model_uri: str, @@ -223,4 +226,3 @@ def get_metrics_namespace(self) -> str: def validate_inference_args(self, inference_args: Optional[Dict[str, Any]]): pass - \ No newline at end of file diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py index 04eb5ab13cfa..70a4dceda9bb 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py @@ -48,6 +48,7 @@ def process_outputs(filepath): tf is None, 'Missing dependencies. ' 'Test depends on tensorflow') class TensorflowInference(unittest.TestCase): + @pytest.mark.uses_tensorflow @pytest.mark.it_postcommit def test_tf_mnist_classification(self): @@ -85,4 +86,3 @@ def test_tf_mnist_classification(self): if __name__ == '__main__': logging.getLogger().setLevel(logging.DEBUG) unittest.main() - \ No newline at end of file diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py index f48c8b510705..3c6f14017373 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py @@ -32,11 +32,13 @@ class FakeTFNumpyModel: + def predict(self, input: numpy.ndarray): return numpy.multiply(input, 10) class FakeTFTensorModel: + def predict(self, input: tf.Tensor): return tf.math.multiply(input, 10) @@ -46,12 +48,11 @@ def _compare_tensor_prediction_result(x, y): class TFRunInferenceTest(unittest.TestCase): + def test_predict_numpy(self): fake_model = FakeTFNumpyModel() inference_runner = TFModelHandlerNumpy(model_uri='unused') - batched_examples = [ - numpy.array([1]), numpy.array([10]), numpy.array([100]) - ] + batched_examples = [numpy.array([1]), numpy.array([10]), numpy.array([100])] expected_predictions = [ PredictionResult(numpy.array([1]), 10), PredictionResult(numpy.array([10]), 100), @@ -71,8 +72,9 @@ def test_predict_tensor(self): tf.convert_to_tensor(numpy.array([100])), ] expected_predictions = [ - PredictionResult(ex, pred) - for ex, pred in zip(batched_examples, + PredictionResult(ex, pred) for ex, + pred in zip( + batched_examples, [tf.math.multiply(n, 10) for n in batched_examples]) ] @@ -80,19 +82,19 @@ def test_predict_tensor(self): for actual, expected in zip(inferences, expected_predictions): self.assertTrue(_compare_tensor_prediction_result(actual, expected)) - def test_predict_keyed_numpy(self): fake_model = FakeTFNumpyModel() inference_runner = KeyedModelHandler( - TFModelHandlerNumpy(model_uri='unused')) + TFModelHandlerNumpy(model_uri='unused')) batched_examples = [ - ('k1', numpy.array([1], dtype=numpy.int64)), - ('k2', numpy.array([10], dtype=numpy.int64)), - ('k3', numpy.array([100], dtype=numpy.int64)), + ('k1', numpy.array([1], dtype=numpy.int64)), + ('k2', numpy.array([10], dtype=numpy.int64)), + ('k3', numpy.array([100], dtype=numpy.int64)), ] expected_predictions = [ - (ex[0],PredictionResult(ex[1], pred)) - for ex, pred in zip(batched_examples, + (ex[0], PredictionResult(ex[1], pred)) for ex, + pred in zip( + batched_examples, [numpy.multiply(n[1], 10) for n in batched_examples]) ] inferences = inference_runner.run_inference(batched_examples, fake_model) @@ -105,18 +107,20 @@ def test_predict_keyed_tensor(self): inference_runner = KeyedModelHandler( TFModelHandlerTensor(model_uri='unused')) batched_examples = [ - ('k1', tf.convert_to_tensor(numpy.array([1]))), - ('k2', tf.convert_to_tensor(numpy.array([10]))), - ('k3', tf.convert_to_tensor(numpy.array([100]))), + ('k1', tf.convert_to_tensor(numpy.array([1]))), + ('k2', tf.convert_to_tensor(numpy.array([10]))), + ('k3', tf.convert_to_tensor(numpy.array([100]))), ] expected_predictions = [ - (ex[0],PredictionResult(ex[1], pred)) - for ex, pred in zip(batched_examples, + (ex[0], PredictionResult(ex[1], pred)) for ex, + pred in zip( + batched_examples, [tf.math.multiply(n[1], 10) for n in batched_examples]) ] inferences = inference_runner.run_inference(batched_examples, fake_model) for actual, expected in zip(inferences, expected_predictions): self.assertTrue(_compare_tensor_prediction_result(actual[1], expected[1])) + if __name__ == '__main__': unittest.main() From e7b5cf0cdcb5cf63451cd142ecb6a6415892271f Mon Sep 17 00:00:00 2001 From: riteshghorse Date: Tue, 7 Feb 2023 18:39:51 -0500 Subject: [PATCH 07/45] rm spare lines --- sdks/python/apache_beam/ml/inference/tensorflow_inference.py | 2 -- .../apache_beam/ml/inference/tensorflow_inference_it_test.py | 1 - .../apache_beam/ml/inference/tensorflow_inference_test.py | 3 --- 3 files changed, 6 deletions(-) diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py index 419e0a99023e..e4b340dc297d 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py @@ -71,7 +71,6 @@ def default_tensor_inference_fn( class TFModelHandlerNumpy(ModelHandler[numpy.ndarray, PredictionResult, tf.Module]): - def __init__( self, model_uri: str, @@ -151,7 +150,6 @@ def validate_inference_args(self, inference_args: Optional[Dict[str, Any]]): class TFModelHandlerTensor(ModelHandler[tf.Tensor, PredictionResult, tf.Module]): - def __init__( self, model_uri: str, diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py index 70a4dceda9bb..82910cfa63f1 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py @@ -48,7 +48,6 @@ def process_outputs(filepath): tf is None, 'Missing dependencies. ' 'Test depends on tensorflow') class TensorflowInference(unittest.TestCase): - @pytest.mark.uses_tensorflow @pytest.mark.it_postcommit def test_tf_mnist_classification(self): diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py index 3c6f14017373..926622fe0d61 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py @@ -32,13 +32,11 @@ class FakeTFNumpyModel: - def predict(self, input: numpy.ndarray): return numpy.multiply(input, 10) class FakeTFTensorModel: - def predict(self, input: tf.Tensor): return tf.math.multiply(input, 10) @@ -48,7 +46,6 @@ def _compare_tensor_prediction_result(x, y): class TFRunInferenceTest(unittest.TestCase): - def test_predict_numpy(self): fake_model = FakeTFNumpyModel() inference_runner = TFModelHandlerNumpy(model_uri='unused') From 70edea4cb037f29b98ada3dd74977707aa941be1 Mon Sep 17 00:00:00 2001 From: riteshghorse Date: Tue, 7 Feb 2023 18:51:40 -0500 Subject: [PATCH 08/45] add readme file --- .../apache_beam/examples/inference/README.md | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/sdks/python/apache_beam/examples/inference/README.md b/sdks/python/apache_beam/examples/inference/README.md index 9326af2ab77f..3f68916ce872 100644 --- a/sdks/python/apache_beam/examples/inference/README.md +++ b/sdks/python/apache_beam/examples/inference/README.md @@ -32,6 +32,15 @@ because the `apache_beam.examples.inference` module was added in that release. pip install apache-beam==2.40.0 ``` +### Tensorflow dependencies + +The following installation requirement is for the Tensorflow model handler examples. + +The RunInference API supports the Tensorflow framework. To use Tensorflow locally, first install `tensorflow`. +``` +pip install tensorflow==2.11.0 +``` + ### PyTorch dependencies The following installation requirements are for the files used in these examples. @@ -374,3 +383,51 @@ True Price 31000000.0, Predicted Price 25654277.256461 ... ``` +## MNIST digit classification with Tensorflow +[`tensorflow_mnist_classification.py`](./tensorflow_mnist_classification.py) contains an implementation for a RunInference pipeline that performs image classification on handwritten digits from the [MNIST](https://en.wikipedia.org/wiki/MNIST_database) database. + +The pipeline reads rows of pixels corresponding to a digit, performs basic preprocessing(converts the input shape to 28x28), passes the pixels to the trained Tensorflow model with RunInference, and then writes the predictions to a text file. + +### Dataset and model for language modeling + +To use this transform, you need a dataset and model for language modeling. + +1. Create a file named `INPUT.csv` that contains labels and pixels to feed into the model. Each row should have comma-separated elements. The first element is the label. All other elements are pixel values. The csv should not have column headers. The content of the file should be similar to the following example: +``` +1,0,0,0... +0,0,0,0... +1,0,0,0... +4,0,0,0... +... +``` +2. Save the trained tensorflow model to a directory `MODEL_DIR` . + + +### Running `tensorflow_mnist_classification.py` + +To run the MNIST classification pipeline locally, use the following command: +```sh +python -m apache_beam.examples.inference.tensorflow_mnist_classification.py \ + --input INPUT \ + --output OUTPUT \ + --model_path MODEL_DIR +``` +For example: +```sh +python -m apache_beam.examples.inference.tensorflow_mnist_classification.py \ + --input INPUT.csv \ + --output predictions.txt \ + --model_path MODEL_DIR +``` + +This writes the output to the `predictions.txt` with contents like: +``` +1,1 +4,4 +0,0 +7,7 +3,3 +5,5 +... +``` +Each line has data separated by a comma ",". The first item is the actual label of the digit. The second item is the predicted label of the digit. From 3ed3160f64a53c02c798ab51d9b90d1b3478e311 Mon Sep 17 00:00:00 2001 From: riteshghorse Date: Wed, 8 Feb 2023 10:01:54 -0500 Subject: [PATCH 09/45] test requirement file --- .../tensorflow_tests_requirements.txt | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 sdks/python/apache_beam/ml/inference/tensorflow_tests_requirements.txt diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_tests_requirements.txt b/sdks/python/apache_beam/ml/inference/tensorflow_tests_requirements.txt new file mode 100644 index 000000000000..40bbab86d080 --- /dev/null +++ b/sdks/python/apache_beam/ml/inference/tensorflow_tests_requirements.txt @@ -0,0 +1,18 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +tensorflow>=2.11.0 From 800cc3a8d8c4c9f38d8c1fdd2134b103289a087d Mon Sep 17 00:00:00 2001 From: riteshghorse Date: Wed, 8 Feb 2023 10:10:40 -0500 Subject: [PATCH 10/45] add test to gradle --- sdks/python/test-suites/direct/common.gradle | 30 +++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/sdks/python/test-suites/direct/common.gradle b/sdks/python/test-suites/direct/common.gradle index f164a1d358b4..75ab9992ab8e 100644 --- a/sdks/python/test-suites/direct/common.gradle +++ b/sdks/python/test-suites/direct/common.gradle @@ -280,11 +280,39 @@ task tfxInferenceTest { } } +// TensorFlow RunInference IT tests +task tensorflowInferenceTest { + dependsOn 'installGcpTest' + dependsOn ':sdks:python:sdist' + def requirementsFile = "${rootDir}/sdks/python/apache_beam/ml/inference/tensorflow_tests_requirements.txt" + doFirst { + exec { + executable 'sh' + args '-c', ". ${envdir}/bin/activate && pip install -r $requirementsFile" + } + } + doLast { + def testOpts = basicTestOpts + def argMap = [ + "test_opts": testOpts, + "suite": "postCommitIT-direct-py${pythonVersionSuffix}", + "collect": "uses_tensorflow and it_postcommit" , + "runner": "TestDirectRunner" + ] + def cmdArgs = mapToArgString(argMap) + exec { + executable 'sh' + args '-c', ". ${envdir}/bin/activate && ${runScriptsDir}/run_integration_test.sh $cmdArgs" + } + } +} + // Add all the RunInference framework IT tests to this gradle task that runs on Direct Runner Post commit suite. project.tasks.register("inferencePostCommitIT") { dependsOn = [ 'torchInferenceTest', 'sklearnInferenceTest', - 'tfxInferenceTest' + 'tfxInferenceTest', + 'tensorflowInferenceTest' ] } From 1bc4adf4af8712416e745558a84c1a51108ddf2c Mon Sep 17 00:00:00 2001 From: riteshghorse Date: Wed, 8 Feb 2023 10:28:44 -0500 Subject: [PATCH 11/45] add test tasks for tf --- .../ml/inference/tensorflow_inference_it_test.py | 2 +- .../ml/inference/tensorflow_inference_test.py | 4 ++-- sdks/python/test-suites/direct/common.gradle | 2 +- sdks/python/tox.ini | 12 ++++++++++++ 4 files changed, 16 insertions(+), 4 deletions(-) diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py index 82910cfa63f1..a86e176830c9 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py @@ -48,7 +48,7 @@ def process_outputs(filepath): tf is None, 'Missing dependencies. ' 'Test depends on tensorflow') class TensorflowInference(unittest.TestCase): - @pytest.mark.uses_tensorflow + @pytest.mark.uses_tf @pytest.mark.it_postcommit def test_tf_mnist_classification(self): test_pipeline = TestPipeline(is_integration_test=True) diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py index 926622fe0d61..372ae8a0af07 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py @@ -59,7 +59,7 @@ def test_predict_numpy(self): for actual, expected in zip(inferences, expected_predictions): self.assertTrue(_compare_prediction_result(actual, expected)) - @pytest.mark.uses_tensorflow + @pytest.mark.uses_tf def test_predict_tensor(self): fake_model = FakeTFTensorModel() inference_runner = TFModelHandlerTensor(model_uri='unused') @@ -98,7 +98,7 @@ def test_predict_keyed_numpy(self): for actual, expected in zip(inferences, expected_predictions): self.assertTrue(_compare_prediction_result(actual[1], expected[1])) - @pytest.mark.uses_tensorflow + @pytest.mark.uses_tf def test_predict_keyed_tensor(self): fake_model = FakeTFTensorModel() inference_runner = KeyedModelHandler( diff --git a/sdks/python/test-suites/direct/common.gradle b/sdks/python/test-suites/direct/common.gradle index 75ab9992ab8e..1cfd8d7748fa 100644 --- a/sdks/python/test-suites/direct/common.gradle +++ b/sdks/python/test-suites/direct/common.gradle @@ -296,7 +296,7 @@ task tensorflowInferenceTest { def argMap = [ "test_opts": testOpts, "suite": "postCommitIT-direct-py${pythonVersionSuffix}", - "collect": "uses_tensorflow and it_postcommit" , + "collect": "uses_tf and it_postcommit" , "runner": "TestDirectRunner" ] def cmdArgs = mapToArgString(argMap) diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini index 9844d69e1c56..16cd2e80c87e 100644 --- a/sdks/python/tox.ini +++ b/sdks/python/tox.ini @@ -329,3 +329,15 @@ commands = # Run all PyTorch unit tests # Allow exit code 5 (no tests run) so that we can run this command safely on arbitrary subdirectories. /bin/sh -c 'pytest -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 -m uses_pytorch {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret' + +[testenv:py{37,38,39,310}-tf-{211}] +deps = + -r build-requirements.txt + 211: tensorflow>=2.11.0 +extras = test,gcp +commands = + # Log torch version for debugging + /bin/sh -c "pip freeze | grep -E torch" + # Run all Tensorflow unit tests + # Allow exit code 5 (no tests run) so that we can run this command safely on arbitrary subdirectories. + /bin/sh -c 'pytest -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 -m uses_tf {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret' From 70b5a2b553cfe5f787e3f2e3e12b99c7dbfcf17f Mon Sep 17 00:00:00 2001 From: riteshghorse Date: Wed, 8 Feb 2023 11:36:59 -0500 Subject: [PATCH 12/45] unit test --- .../ml/inference/tensorflow_inference.py | 31 ++++++++++++++----- .../ml/inference/tensorflow_inference_test.py | 24 +++++++++++++- sdks/python/pytest.ini | 1 + 3 files changed, 48 insertions(+), 8 deletions(-) diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py index e4b340dc297d..dbdb9f36cad8 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py @@ -18,6 +18,7 @@ # pytype: skip-file from cmath import inf +import enum from typing import Any from typing import Callable from typing import Dict @@ -44,8 +45,16 @@ Iterable[PredictionResult]] -def _load_model(model_uri): - return tf.keras.models.load_model(model_uri) +class ModelType(enum.Enum): + """Defines how a model file should be loaded.""" + SAVED_MODEL = 1 + + +def _load_model(model_uri, model_type): + if model_type == ModelType.SAVED_MODEL: + return tf.keras.models.load_model(model_uri) + else: + raise AssertionError('Unsupported model type for loading.') def default_numpy_inference_fn( @@ -55,7 +64,7 @@ def default_numpy_inference_fn( model_id: Optional[str] = None) -> Iterable[PredictionResult]: vectorized_batch = numpy.stack(batch, axis=0) return utils._convert_to_result( - batch, model.predict(vectorized_batch), model_id) + batch, model.predict(vectorized_batch, **inference_args), model_id) def default_tensor_inference_fn( @@ -65,7 +74,7 @@ def default_tensor_inference_fn( model_id: Optional[str] = None) -> Iterable[PredictionResult]: vectorized_batch = tf.stack(batch, axis=0) return utils._convert_to_result( - batch, model.predict(vectorized_batch), model_id) + batch, model.predict(vectorized_batch, **inference_args), model_id) class TFModelHandlerNumpy(ModelHandler[numpy.ndarray, @@ -74,6 +83,7 @@ class TFModelHandlerNumpy(ModelHandler[numpy.ndarray, def __init__( self, model_uri: str, + model_type: ModelType = ModelType.SAVED_MODEL, *, inference_fn: TensorInferenceFn = default_numpy_inference_fn): """Implementation of the ModelHandler interface for Tensorflow. @@ -86,6 +96,8 @@ def __init__( Args: model_uri (str): path to the trained model. + model_type (ModelType): type of model to be loaded. + Defaults to SAVED_MODEL. inference_fn (TensorInferenceFn, optional): inference function to use during RunInference. Defaults to default_numpy_inference_fn. @@ -93,11 +105,12 @@ def __init__( with Tensorflow 2.11. """ self._model_uri = model_uri + self._model_type = model_type self._inference_fn = inference_fn def load_model(self) -> tf.Module: """Loads and initializes a Tensorflow model for processing.""" - return _load_model(self._model_uri) + return _load_model(self._model_uri, self._model_type) def update_model_path(self, model_path: Optional[str] = None): self._model_uri = model_path if model_path else self._model_uri @@ -153,6 +166,7 @@ class TFModelHandlerTensor(ModelHandler[tf.Tensor, PredictionResult, def __init__( self, model_uri: str, + model_type: ModelType = ModelType.SAVED_MODEL, *, inference_fn: TensorInferenceFn = default_tensor_inference_fn): """Implementation of the ModelHandler interface for Tensorflow. @@ -165,6 +179,8 @@ def __init__( Args: model_uri (str): path to the trained model. + model_type (ModelType): type of model to be loaded. + Defaults to SAVED_MODEL. inference_fn (TensorInferenceFn, optional): inference function to use during RunInference. Defaults to default_numpy_inference_fn. @@ -172,11 +188,12 @@ def __init__( with Tensorflow 2.11. """ self._model_uri = model_uri + self._model_type = model_type self._inference_fn = inference_fn def load_model(self) -> tf.Module: """Loads and initializes a tensorflow model for processing.""" - return _load_model(self._model_uri) + return _load_model(self._model_uri, self._model_type) def update_model_path(self, model_path: Optional[str] = None): self._model_uri = model_path if model_path else self._model_uri @@ -220,7 +237,7 @@ def get_metrics_namespace(self) -> str: Returns: A namespace for metrics collected by the RunInference transform. """ - return 'BeamML_TF_Tensors' + return 'BeamML_TF_Tensor' def validate_inference_args(self, inference_args: Optional[Dict[str, Any]]): pass diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py index 372ae8a0af07..2dd524af3a6c 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py @@ -37,7 +37,9 @@ def predict(self, input: numpy.ndarray): class FakeTFTensorModel: - def predict(self, input: tf.Tensor): + def predict(self, input: tf.Tensor, add=False): + if add: + return tf.math.add(tf.math.multiply(input, 10), 10) return tf.math.multiply(input, 10) @@ -78,6 +80,26 @@ def test_predict_tensor(self): inferences = inference_runner.run_inference(batched_examples, fake_model) for actual, expected in zip(inferences, expected_predictions): self.assertTrue(_compare_tensor_prediction_result(actual, expected)) + + @pytest.mark.uses_tf + def test_predict_tensor_with_args(self): + fake_model = FakeTFTensorModel() + inference_runner = TFModelHandlerTensor(model_uri='unused') + batched_examples = [ + tf.convert_to_tensor(numpy.array([1])), + tf.convert_to_tensor(numpy.array([10])), + tf.convert_to_tensor(numpy.array([100])), + ] + expected_predictions = [ + PredictionResult(ex, pred) for ex, + pred in zip( + batched_examples, + [tf.math.add(tf.math.multiply(n, 10), 10) for n in batched_examples]) + ] + + inferences = inference_runner.run_inference(batched_examples, fake_model, inference_args={"add":True}) + for actual, expected in zip(inferences, expected_predictions): + self.assertTrue(_compare_tensor_prediction_result(actual, expected)) def test_predict_keyed_numpy(self): fake_model = FakeTFNumpyModel() diff --git a/sdks/python/pytest.ini b/sdks/python/pytest.ini index 915b49c8f16a..1735a24885c7 100644 --- a/sdks/python/pytest.ini +++ b/sdks/python/pytest.ini @@ -52,6 +52,7 @@ markers = uses_sklearn: tests that utilize scikit-learn in some way uses_tensorflow: tests that utilize tensorflow in some way uses_tft: tests that utilizes tensorflow transforms in some way. + uses_tf: tests that utilize tensorflow # Default timeout intended for unit tests. # If certain tests need a different value, please see the docs on how to From f62c366b541048bba33737bd911da47a70ca53d0 Mon Sep 17 00:00:00 2001 From: riteshghorse Date: Wed, 8 Feb 2023 13:03:37 -0500 Subject: [PATCH 13/45] lints --- .../ml/inference/tensorflow_inference.py | 19 +++++++++---------- .../ml/inference/tensorflow_inference_test.py | 11 +++++++---- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py index dbdb9f36cad8..2d4d7444ca50 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py @@ -17,7 +17,6 @@ # pytype: skip-file -from cmath import inf import enum from typing import Any from typing import Callable @@ -48,7 +47,7 @@ class ModelType(enum.Enum): """Defines how a model file should be loaded.""" SAVED_MODEL = 1 - + def _load_model(model_uri, model_type): if model_type == ModelType.SAVED_MODEL: @@ -93,14 +92,14 @@ def __init__( pcoll | RunInference(TFModelHandlerNumpy(model_uri="my_uri")) See https://www.tensorflow.org/tutorials/keras/save_and_load for details. - + Args: model_uri (str): path to the trained model. - model_type (ModelType): type of model to be loaded. + model_type (ModelType): type of model to be loaded. Defaults to SAVED_MODEL. inference_fn (TensorInferenceFn, optional): inference function to use during RunInference. Defaults to default_numpy_inference_fn. - + **Supported Versions:** RunInference APIs in Apache Beam have been tested with Tensorflow 2.11. """ @@ -176,14 +175,14 @@ def __init__( pcoll | RunInference(TFModelHandlerTensor(model_uri="my_uri")) See https://www.tensorflow.org/tutorials/keras/save_and_load for details. - + Args: model_uri (str): path to the trained model. - model_type (ModelType): type of model to be loaded. + model_type (ModelType): type of model to be loaded. Defaults to SAVED_MODEL. inference_fn (TensorInferenceFn, optional): inference function to use during RunInference. Defaults to default_numpy_inference_fn. - + **Supported Versions:** RunInference APIs in Apache Beam have been tested with Tensorflow 2.11. """ @@ -207,10 +206,10 @@ def run_inference( """ Runs inferences on a batch of tf.Tensor and returns an Iterable of Tensor Predictions. - + This method stacks the list of Tensors in a vectorized format to optimize the inference call. - + Args: batch: A sequence of Tensors. These Tensors should be batchable, as this method will call `tf.stack()` and pass in batched Tensors with diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py index 2dd524af3a6c..9770b4fc898f 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py @@ -80,7 +80,7 @@ def test_predict_tensor(self): inferences = inference_runner.run_inference(batched_examples, fake_model) for actual, expected in zip(inferences, expected_predictions): self.assertTrue(_compare_tensor_prediction_result(actual, expected)) - + @pytest.mark.uses_tf def test_predict_tensor_with_args(self): fake_model = FakeTFTensorModel() @@ -93,11 +93,14 @@ def test_predict_tensor_with_args(self): expected_predictions = [ PredictionResult(ex, pred) for ex, pred in zip( - batched_examples, - [tf.math.add(tf.math.multiply(n, 10), 10) for n in batched_examples]) + batched_examples, [ + tf.math.add(tf.math.multiply(n, 10), 10) + for n in batched_examples + ]) ] - inferences = inference_runner.run_inference(batched_examples, fake_model, inference_args={"add":True}) + inferences = inference_runner.run_inference( + batched_examples, fake_model, inference_args={"add": True}) for actual, expected in zip(inferences, expected_predictions): self.assertTrue(_compare_tensor_prediction_result(actual, expected)) From eef7a2548a1de6fb648d2c7263d02e38478a83ac Mon Sep 17 00:00:00 2001 From: riteshghorse Date: Wed, 8 Feb 2023 13:17:40 -0500 Subject: [PATCH 14/45] updated inferenceFn type --- .../apache_beam/ml/inference/tensorflow_inference.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py index 2d4d7444ca50..88c247bfddc1 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py @@ -18,7 +18,7 @@ # pytype: skip-file import enum -from typing import Any +from typing import Any, Union from typing import Callable from typing import Dict from typing import Iterable @@ -39,7 +39,10 @@ ] TensorInferenceFn = Callable[[ - tf.Module, Sequence[numpy.ndarray], Optional[Dict[str, Any]], Optional[str] + tf.Module, + Sequence[Union[numpy.ndarray, tf.Tensor]], + Optional[Dict[str, Any]], + Optional[str] ], Iterable[PredictionResult]] From 1169246b5505583f46ef3319a3efd2ecffabc176 Mon Sep 17 00:00:00 2001 From: riteshghorse Date: Wed, 8 Feb 2023 13:31:26 -0500 Subject: [PATCH 15/45] add tox info for py38 --- sdks/python/test-suites/tox/py38/build.gradle | 13 +++++++++++++ sdks/python/tox.ini | 6 ++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/sdks/python/test-suites/tox/py38/build.gradle b/sdks/python/test-suites/tox/py38/build.gradle index e1b890e8959c..dbeda102c1ab 100644 --- a/sdks/python/test-suites/tox/py38/build.gradle +++ b/sdks/python/test-suites/tox/py38/build.gradle @@ -126,6 +126,19 @@ toxTask "testPy38pytorch-113", "py38-pytorch-113", "${posargs}" test.dependsOn "testPy38pytorch-113" preCommitPyCoverage.dependsOn "testPy38pytorch-113" +// Create a test task for each minor version of tensorflow +toxTask "testPy38tensorflow-29", "py38-tensorflow-29", "${posargs}" +test.dependsOn "testPy38tensorflow-29" +preCommitPyCoverage.dependsOn "testPy38tensorflow-29" + +toxTask "testPy38tensorflow-210", "py38-tensorflow-210", "${posargs}" +test.dependsOn "testPy38tensorflow-210" +preCommitPyCoverage.dependsOn "testPy38tensorflow-210" + +toxTask "testPy38tensorflow-211", "py38-tensorflow-211", "${posargs}" +test.dependsOn "testPy38tensorflow-211" +preCommitPyCoverage.dependsOn "testPy38tensorflow-211" + toxTask "whitespacelint", "whitespacelint", "${posargs}" task archiveFilesToLint(type: Zip) { diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini index 16cd2e80c87e..9e9a02abb8d9 100644 --- a/sdks/python/tox.ini +++ b/sdks/python/tox.ini @@ -330,10 +330,12 @@ commands = # Allow exit code 5 (no tests run) so that we can run this command safely on arbitrary subdirectories. /bin/sh -c 'pytest -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 -m uses_pytorch {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret' -[testenv:py{37,38,39,310}-tf-{211}] +[testenv:py{37,38,39,310}-tensorflow-{29,210,211}] deps = -r build-requirements.txt - 211: tensorflow>=2.11.0 + 29: tensorflow>=2.9.0,<2.10.0 + 210: tensorflow>=2.10.0,<2.11.0 + 211: tensorflow>=2.11.0,<2.12.0 extras = test,gcp commands = # Log torch version for debugging From 520e1921e26ef1cd96c49ab49f7b95c46e71d214 Mon Sep 17 00:00:00 2001 From: riteshghorse Date: Wed, 8 Feb 2023 14:06:59 -0500 Subject: [PATCH 16/45] pylint --- .../examples/inference/tensorflow_mnist_classification.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py b/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py index c5d7cc539a9e..600a16c2fb67 100644 --- a/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py +++ b/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py @@ -32,8 +32,8 @@ def process_input(row: str) -> Tuple[int, numpy.ndarray]: data = row.split(',') label, pixels = int(data[0]), data[1:] pixels = [int(pixel) for pixel in pixels] - # the trained model accepts the input of shape 28x28x1 - pixels = numpy.array(pixels).reshape(28, 28, 1) + # the trained model accepts the input of shape 28x28 + pixels = numpy.array(pixels).reshape((28, 28, 1)) return label, pixels @@ -41,7 +41,6 @@ class PostProcessor(beam.DoFn): """Process the PredictionResult to get the predicted label. Returns a comma separated string with true label and predicted label. """ - def process(self, element: Tuple[int, PredictionResult]) -> Iterable[str]: label, prediction_result = element prediction = numpy.argmax(prediction_result.inference, axis=0) From 4c43cc1f271f2202de049a8f695f15db60bb1e93 Mon Sep 17 00:00:00 2001 From: riteshghorse Date: Wed, 8 Feb 2023 16:57:40 -0500 Subject: [PATCH 17/45] lints --- .../examples/inference/tensorflow_mnist_classification.py | 3 +-- sdks/python/apache_beam/ml/inference/tensorflow_inference.py | 2 +- .../apache_beam/ml/inference/tensorflow_inference_it_test.py | 4 ---- .../apache_beam/ml/inference/tensorflow_inference_test.py | 3 +-- sdks/python/tox.ini | 5 +++-- 5 files changed, 6 insertions(+), 11 deletions(-) diff --git a/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py b/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py index 600a16c2fb67..7f4f1bb341da 100644 --- a/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py +++ b/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py @@ -16,9 +16,8 @@ # import argparse -import logging from typing import Iterable, Tuple - +import logging import numpy import apache_beam as beam diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py index 88c247bfddc1..45967b2a8c56 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py @@ -25,8 +25,8 @@ from typing import Optional from typing import Sequence -import sys import numpy +import sys import tensorflow as tf from apache_beam.ml.inference import utils diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py index a86e176830c9..0a932dad7c53 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py @@ -18,10 +18,6 @@ """End-to-End test for Tensorflow Inference""" import logging - -from typing import Tuple -from typing import List - import pytest import unittest import uuid diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py index 9770b4fc898f..3813931f55ae 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py @@ -17,9 +17,8 @@ # pytype: skip-file -import unittest - import numpy +import unittest import pytest try: diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini index 9e9a02abb8d9..4239382f0226 100644 --- a/sdks/python/tox.ini +++ b/sdks/python/tox.ini @@ -151,6 +151,7 @@ deps = docutils<0.18 Jinja2==3.0.3 # TODO(https://github.com/apache/beam/issues/21587): Sphinx version is too old. torch + tensorflow commands = time {toxinidir}/scripts/generate_pydoc.sh @@ -338,8 +339,8 @@ deps = 211: tensorflow>=2.11.0,<2.12.0 extras = test,gcp commands = - # Log torch version for debugging - /bin/sh -c "pip freeze | grep -E torch" + # Log tensorflow version for debugging + /bin/sh -c "pip freeze | grep -E tensorflow" # Run all Tensorflow unit tests # Allow exit code 5 (no tests run) so that we can run this command safely on arbitrary subdirectories. /bin/sh -c 'pytest -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 -m uses_tf {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret' From 8017a4ddfb793df448a97dd02c14c69055c12b1a Mon Sep 17 00:00:00 2001 From: riteshghorse Date: Fri, 10 Feb 2023 15:49:09 -0500 Subject: [PATCH 18/45] using tfhub --- sdks/python/apache_beam/examples/inference/README.md | 2 +- .../examples/inference/tensorflow_mnist_classification.py | 5 +++-- sdks/python/apache_beam/ml/inference/tensorflow_inference.py | 3 ++- .../apache_beam/ml/inference/tensorflow_inference_it_test.py | 4 ++-- .../ml/inference/tensorflow_tests_requirements.txt | 3 ++- 5 files changed, 10 insertions(+), 7 deletions(-) diff --git a/sdks/python/apache_beam/examples/inference/README.md b/sdks/python/apache_beam/examples/inference/README.md index 3f68916ce872..f239a76a8e85 100644 --- a/sdks/python/apache_beam/examples/inference/README.md +++ b/sdks/python/apache_beam/examples/inference/README.md @@ -392,7 +392,7 @@ The pipeline reads rows of pixels corresponding to a digit, performs basic prepr To use this transform, you need a dataset and model for language modeling. -1. Create a file named `INPUT.csv` that contains labels and pixels to feed into the model. Each row should have comma-separated elements. The first element is the label. All other elements are pixel values. The csv should not have column headers. The content of the file should be similar to the following example: +1. Create a file named [`INPUT.csv`](gs://apache-beam-ml/testing/inputs/it_mnist_data.csv) that contains labels and pixels to feed into the model. Each row should have comma-separated elements. The first element is the label. All other elements are pixel values. The csv should not have column headers. The content of the file should be similar to the following example: ``` 1,0,0,0... 0,0,0,0... diff --git a/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py b/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py index 7f4f1bb341da..eb026c3158f6 100644 --- a/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py +++ b/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py @@ -22,7 +22,7 @@ import apache_beam as beam from apache_beam.ml.inference.base import KeyedModelHandler, PredictionResult, RunInference -from apache_beam.ml.inference.tensorflow_inference import TFModelHandlerNumpy +from apache_beam.ml.inference.tensorflow_inference import ModelType, TFModelHandlerNumpy from apache_beam.options.pipeline_options import PipelineOptions, SetupOptions from apache_beam.runners.runner import PipelineResult @@ -82,7 +82,8 @@ def run( # In this example we pass keyed inputs to RunInference transform. # Therefore, we use KeyedModelHandler wrapper over TFModelHandlerNumpy. model_loader = KeyedModelHandler( - TFModelHandlerNumpy(model_uri=known_args.model_path)) + TFModelHandlerNumpy( + model_uri=known_args.model_path, model_type=ModelType.SAVED_MODEL)) pipeline = test_pipeline if not test_pipeline: diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py index 45967b2a8c56..5c307f33210e 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py @@ -28,6 +28,7 @@ import numpy import sys import tensorflow as tf +import tensorflow_hub as hub from apache_beam.ml.inference import utils from apache_beam.ml.inference.base import ModelHandler @@ -54,7 +55,7 @@ class ModelType(enum.Enum): def _load_model(model_uri, model_type): if model_type == ModelType.SAVED_MODEL: - return tf.keras.models.load_model(model_uri) + return tf.keras.models.load_model(hub.resolve(model_uri)) else: raise AssertionError('Unsupported model type for loading.') diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py index 0a932dad7c53..350aaf1dabc4 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py @@ -43,9 +43,9 @@ def process_outputs(filepath): @unittest.skipIf( tf is None, 'Missing dependencies. ' 'Test depends on tensorflow') +@pytest.mark.uses_tf +@pytest.mark.it_postcommit class TensorflowInference(unittest.TestCase): - @pytest.mark.uses_tf - @pytest.mark.it_postcommit def test_tf_mnist_classification(self): test_pipeline = TestPipeline(is_integration_test=True) input_file = 'gs://clouddfe-riteshghorse/tf/mnist/dataset/testing_inputs_it_mnist_data.csv' # pylint: disable=line-too-long diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_tests_requirements.txt b/sdks/python/apache_beam/ml/inference/tensorflow_tests_requirements.txt index 40bbab86d080..efe7ad4fcfcc 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_tests_requirements.txt +++ b/sdks/python/apache_beam/ml/inference/tensorflow_tests_requirements.txt @@ -15,4 +15,5 @@ # limitations under the License. # -tensorflow>=2.11.0 +tensorflow>=2.0.0 +tensorflow_hub>-0.10.0 From 1d98cdb9955df4a72685a6817da6ce99cd3d27ed Mon Sep 17 00:00:00 2001 From: riteshghorse Date: Tue, 7 Feb 2023 13:59:53 -0500 Subject: [PATCH 19/45] added tf model handler and tests --- .../tensorflow_mnist_classification.py | 113 +++++++++++ .../ml/inference/tensorflow_inference.py | 189 ++++++++++++++++++ .../inference/tensorflow_inference_it_test.py | 85 ++++++++ .../ml/inference/tensorflow_inference_test.py | 111 ++++++++++ 4 files changed, 498 insertions(+) create mode 100644 sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py create mode 100644 sdks/python/apache_beam/ml/inference/tensorflow_inference.py create mode 100644 sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py create mode 100644 sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py diff --git a/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py b/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py new file mode 100644 index 000000000000..a39b3fc6733e --- /dev/null +++ b/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py @@ -0,0 +1,113 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +import argparse +import logging +from typing import Iterable, List, Tuple + +import numpy + +import apache_beam as beam +from apache_beam.ml.inference.base import KeyedModelHandler, PredictionResult, RunInference +from apache_beam.ml.inference.tensorflow_inference import TFModelHandlerNumpy +from apache_beam.options.pipeline_options import PipelineOptions, SetupOptions + +from apache_beam.runners.runner import PipelineResult + + +def process_input(row: str) -> Tuple[int, numpy.ndarray]: + data = row.split(',') + label, pixels = int(data[0]), data[1:] + pixels = [int(pixel) for pixel in pixels] + # the trained model accepts the input of shape 28x28x1 + pixels = numpy.array(pixels).reshape(28, 28, 1) + return label, pixels + + +class PostProcessor(beam.DoFn): + """Process the PredictionResult to get the predicted label. + Returns a comma separated string with true label and predicted label. + """ + def process(self, element: Tuple[int, PredictionResult]) -> Iterable[str]: + label, prediction_result = element + prediction = numpy.argmax(prediction_result.inference, axis=0) + yield '{},{}'.format(label, prediction) + +def parse_known_args(argv): + """Parses args for the workflow.""" + parser = argparse.ArgumentParser() + parser.add_argument( + '--input', + dest='input', + required=True, + help='text file with comma separated int values.') + parser.add_argument( + '--output', + dest='output', + required=True, + help='Path to save output predictions.') + parser.add_argument( + '--model_path', + dest='model_path', + required=True, + help='Path to load the Tensorflow model for Inference.') + return parser.parse_known_args(argv) + + +def run( + argv=None, save_main_session=True, test_pipeline=None) -> PipelineResult: + """ + Args: + argv: Command line arguments defined for this example. + save_main_session: Used for internal testing. + test_pipeline: Used for internal testing. + """ + known_args, pipeline_args = parse_known_args(argv) + pipeline_options = PipelineOptions(pipeline_args) + pipeline_options.view_as(SetupOptions).save_main_session = save_main_session + + # In this example we pass keyed inputs to RunInference transform. + # Therefore, we use KeyedModelHandler wrapper over TFModelHandlerNumpy. + model_loader = KeyedModelHandler( + TFModelHandlerNumpy(model_uri=known_args.model_path)) + + pipeline = test_pipeline + if not test_pipeline: + pipeline = beam.Pipeline(options=pipeline_options) + + label_pixel_tuple = ( + pipeline + | "ReadFromInput" >> beam.io.ReadFromText(known_args.input) + | "PreProcessInputs" >> beam.Map(process_input)) + + predictions = ( + label_pixel_tuple + | "RunInference" >> RunInference(model_loader) + | "PostProcessOutputs" >> beam.ParDo(PostProcessor())) + + _ = predictions | "WriteOutput" >> beam.io.WriteToText( + known_args.output, shard_name_template='', append_trailing_newlines=True) + + result = pipeline.run() + result.wait_until_finish() + return result + + +if __name__ == '__main__': + logging.getLogger().setLevel(logging.INFO) + run() \ No newline at end of file diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py new file mode 100644 index 000000000000..c933423843b9 --- /dev/null +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py @@ -0,0 +1,189 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# pytype: skip-file + +import logging +from collections import defaultdict +from typing import Any +from typing import Callable +from typing import Dict +from typing import Iterable +from typing import Optional +from typing import Sequence +from typing import Union + +import sys +from apache_beam.ml.inference import utils +import tensorflow as tf +import numpy +from apache_beam.io.filesystems import FileSystems +from apache_beam.ml.inference.base import ModelHandler +from apache_beam.ml.inference.base import PredictionResult +from apache_beam.utils.annotations import experimental + +__all__ = [ + 'TFModelHandlerNumpy', + 'TFModelHandlerTensor', +] + +TensorInferenceFn = Callable[ + [tf.Module, Sequence[numpy.ndarray], Optional[Dict[str, Any]], Optional[str]], + Iterable[PredictionResult]] + +def _load_model(model_uri): + return tf.keras.models.load_model(model_uri) + + +def default_numpy_inference_fn( + model: tf.Module, + batch: Sequence[numpy.ndarray], + inference_args: Optional[Dict[str,Any]] = None, + model_id: Optional[str] = None) -> Iterable[PredictionResult]: + vectorized_batch = numpy.stack(batch, axis=0) + return utils._convert_to_result(batch, model.predict(vectorized_batch), model_id) + + +def default_tensor_inference_fn( + model: tf.Module, + batch: Sequence[tf.Tensor], + inference_args: Optional[Dict[str,Any]] = None, + model_id: Optional[str] = None) -> Iterable[PredictionResult]: + vectorized_batch = tf.stack(batch, axis=0) + return utils._convert_to_result(batch, model.predict(vectorized_batch), model_id) + +class TFModelHandlerNumpy(ModelHandler[numpy.ndarray, + PredictionResult, + tf.Module]): + def __init__( + self, + model_uri: str, + *, + inference_fn: TensorInferenceFn = default_numpy_inference_fn): + self._model_uri = model_uri + self._inference_fn = inference_fn + + def load_model(self) -> tf.Module: + """Loads and initializes a Tensorflow model for processing.""" + return _load_model(self._model_uri) + + def update_model_path(self, model_path: Optional[str] = None): + self._model_uri = model_path if model_path else self._model_uri + + def run_inference( + self, + batch: Sequence[numpy.ndarray], + model: tf.Module, + inference_args: Optional[Dict[str, Any]] = None + ) -> Iterable[PredictionResult]: + """ + Runs inferences on a batch of numpy array and returns an Iterable of + numpy array Predictions. + + This method stacks the n-dimensional np-array in a vectorized format to optimize + the inference call. + + Args: + batch: A sequence of numpy nd-array. These should be batchable, as this + method will call `numpy.stack()` and pass in batched numpy nd-array with + dimensions (batch_size, n_features, etc.) into the model's forward() + function. + model: A TF model. + inference_args: any additional arguments for an inference. + + Returns: + An Iterable of type PredictionResult. + """ + return self._inference_fn(model, batch, inference_args, self._model_uri) + + def get_num_bytes(self, batch: Sequence[numpy.ndarray]) -> int: + """ + Returns: + The number of bytes of data for a batch of numpy arrays. + """ + return sum(sys.getsizeof(element) for element in batch) + + def get_metrics_namespace(self) -> str: + """ + Returns: + A namespace for metrics collected by the RunInference transform. + """ + return 'BeamML_TF_Numpy' + + def validate_inference_args(self, inference_args: Optional[Dict[str, Any]]): + pass + + +class TFModelHandlerTensor(ModelHandler[tf.Tensor, + PredictionResult, + tf.Module]): + def __init__( + self, + model_uri: str, + *, + inference_fn: TensorInferenceFn = default_tensor_inference_fn): + self._model_uri = model_uri + self._inference_fn = inference_fn + + def load_model(self) -> tf.Module: + """Loads and initializes a tensorflow model for processing.""" + return _load_model(self._model_uri) + + def update_model_path(self, model_path: Optional[str] = None): + self._model_uri = model_path if model_path else self._model_uri + + def run_inference( + self, + batch: Sequence[tf.Tensor], + model: tf.Module, + inference_args: Optional[Dict[str, Any]] = None, + ) -> Iterable[PredictionResult]: + """ + Runs inferences on a batch of tf.Tensor and returns an Iterable of + Tensor Predictions. + This method stacks the list of Tensors in a vectorized format to optimize + the inference call. + Args: + batch: A sequence of Tensors. These Tensors should be batchable, as this + method will call `tf.stack()` and pass in batched Tensors with + dimensions (batch_size, n_features, etc.) into the model's forward() + function. + model: A Tensorflow model. + inference_args: Non-batchable arguments required as inputs to the model's + forward() function. Unlike Tensors in `batch`, these parameters will + not be dynamically batched + Returns: + An Iterable of type PredictionResult. + """ + return self._inference_fn(model, batch, inference_args, self._model_uri) + + def get_num_bytes(self, batch: Sequence[tf.Tensor]) -> int: + """ + Returns: + The number of bytes of data for a batch of Tensors. + """ + return sum(sys.getsizeof(element) for element in batch) + + def get_metrics_namespace(self) -> str: + """ + Returns: + A namespace for metrics collected by the RunInference transform. + """ + return 'BeamML_TF_Tensors' + + def validate_inference_args(self, inference_args: Optional[Dict[str, Any]]): + pass \ No newline at end of file diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py new file mode 100644 index 000000000000..fcd8dda0875c --- /dev/null +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py @@ -0,0 +1,85 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""End-to-End test for Tensorflow Inference""" + +from cgi import test +from typing import Tuple +import logging +from typing import List +import unittest +import uuid + + +import pytest + +import apache_beam as beam +from apache_beam.examples.inference import tensorflow_mnist_classification +from apache_beam.io.filesystems import FileSystems + +from apache_beam.testing.test_pipeline import TestPipeline + + +def process_outputs(filepath): + with FileSystems().open(filepath) as f: + lines = f.readlines() + lines = [l.decode('utf-8').strip('\n') for l in lines] + return lines + + +class TensorflowInference(unittest.TestCase): + def process_input(self, row: str) -> Tuple[int, List[int]]: + data = row.split(',') + label, pixels = int(data[0]), data[1:] + pixels = [int(pixel) for pixel in pixels] + return label, pixels + + + def test_tf_mnist_classification(self): + test_pipeline = TestPipeline(is_integration_test=True) + input_file = 'gs://clouddfe-riteshghorse/tf/mnist/dataset/testing_inputs_it_mnist_data.csv' + output_file_dir = 'gs://clouddfe-riteshghorse/tf/mnist/output/' + output_file = '/'.join([output_file_dir, str(uuid.uuid4()), 'result.txt']) + model_path = 'gs://clouddfe-riteshghorse/tf/mnist/model/' + extra_opts = { + 'input': input_file, + 'output': output_file, + 'model_path': model_path, + } + tensorflow_mnist_classification.run( + test_pipeline.get_full_options_as_args(**extra_opts), + save_main_session=False) + self.assertEqual(FileSystems().exists(output_file), True) + + expected_output_filepath = 'gs://clouddfe-riteshghorse/tf/mnist/output/testing_expected_outputs_test_sklearn_mnist_classification_actuals.txt' # pylint: disable=line-too-long + expected_outputs = process_outputs(expected_output_filepath) + + predicted_outputs = process_outputs(output_file) + self.assertEqual(len(expected_outputs), len(predicted_outputs)) + + predictions_dict = {} + for i in range(len(predicted_outputs)): + true_label, prediction = predicted_outputs[i].split(',') + predictions_dict[true_label] = prediction + + for i in range(len(expected_outputs)): + true_label, expected_prediction = expected_outputs[i].split(',') + self.assertEqual(predictions_dict[true_label], expected_prediction) + +if __name__ == '__main__': + logging.getLogger().setLevel(logging.DEBUG) + unittest.main() \ No newline at end of file diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py new file mode 100644 index 000000000000..b6fce47aee6c --- /dev/null +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py @@ -0,0 +1,111 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# pytype: skip-file + +import unittest +from apache_beam.examples import inference +from apache_beam.ml.inference.sklearn_inference_test import compare_prediction_result + + +import numpy +import tensorflow as tf + + +from apache_beam.ml.inference.base import KeyedModelHandler, PredictionResult +from apache_beam.ml.inference.tensorflow_inference import TFModelHandlerNumpy, TFModelHandlerTensor + +class FakeTFNumpyModel: + def predict(self, input: numpy.ndarray): + return numpy.multiply(input, 10) + + +class FakeTFTensorModel: + def predict(self, input: tf.Tensor): + return tf.math.multiply(input, 10) + + +def _compare_tensor_prediction_result(x, y): + return tf.math.equal(x.inference, y.inference) + + +class TFRunInferenceTest(unittest.TestCase): + def test_predict_numpy(self): + fake_model = FakeTFNumpyModel() + inference_runner = TFModelHandlerNumpy(model_uri='unused') + batched_examples = [ + numpy.array([1]), numpy.array([10]), numpy.array([100]) + ] + expected_predictions = [ + PredictionResult(numpy.array([1]), 10), + PredictionResult(numpy.array([10]), 100), + PredictionResult(numpy.array([100]), 1000) + ] + inferences = inference_runner.run_inference(batched_examples, fake_model) + for actual, expected in zip(inferences, expected_predictions): + self.assertTrue(compare_prediction_result(actual, expected)) + + + def test_predict_tensor(self): + fake_model = FakeTFTensorModel() + inference_runner = TFModelHandlerTensor(model_uri='unused') + batched_examples = [ + tf.convert_to_tensor(numpy.array([1])), + tf.convert_to_tensor(numpy.array([10])), + tf.convert_to_tensor(numpy.array([100])), + ] + expected_predictions = [ + PredictionResult(ex, pred) for ex, pred in zip(batched_examples, [tf.math.multiply(n, 10) for n in batched_examples]) + ] + + inferences = inference_runner.run_inference(batched_examples, fake_model) + for actual, expected in zip(inferences, expected_predictions): + self.assertTrue(_compare_tensor_prediction_result(actual, expected)) + + + def test_predict_keyed_numpy(self): + fake_model = FakeTFNumpyModel() + inference_runner = KeyedModelHandler(TFModelHandlerNumpy(model_uri='unused')) + batched_examples = [ + ('k1', numpy.array([1], dtype=numpy.int64)), + ('k2', numpy.array([10], dtype=numpy.int64)), + ('k3', numpy.array([100], dtype=numpy.int64)), + ] + expected_predictions = [ + (ex[0],PredictionResult(ex[1], pred)) for ex, pred in zip(batched_examples, [numpy.multiply(n[1], 10) for n in batched_examples]) + ] + inferences = inference_runner.run_inference(batched_examples, fake_model) + for actual, expected in zip(inferences, expected_predictions): + self.assertTrue(compare_prediction_result(actual[1], expected[1])) + + def test_predict_keyed_tensor(self): + fake_model = FakeTFTensorModel() + inference_runner = KeyedModelHandler(TFModelHandlerTensor(model_uri='unused')) + batched_examples = [ + ('k1', tf.convert_to_tensor(numpy.array([1]))), + ('k2', tf.convert_to_tensor(numpy.array([10]))), + ('k3', tf.convert_to_tensor(numpy.array([100]))), + ] + expected_predictions = [ + (ex[0],PredictionResult(ex[1], pred)) for ex, pred in zip(batched_examples, [tf.math.multiply(n[1], 10) for n in batched_examples]) + ] + inferences = inference_runner.run_inference(batched_examples, fake_model) + for actual, expected in zip(inferences, expected_predictions): + self.assertTrue(_compare_tensor_prediction_result(actual[1], expected[1])) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file From 5b56a2f3f03df2a8748352e8e994115870b43b4d Mon Sep 17 00:00:00 2001 From: riteshghorse Date: Tue, 7 Feb 2023 15:23:37 -0500 Subject: [PATCH 20/45] lint and formatting changes --- .../tensorflow_mnist_classification.py | 6 +-- .../ml/inference/tensorflow_inference.py | 39 ++++++++-------- .../inference/tensorflow_inference_it_test.py | 31 +++++++------ .../ml/inference/tensorflow_inference_test.py | 46 +++++++++++-------- 4 files changed, 68 insertions(+), 54 deletions(-) diff --git a/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py b/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py index a39b3fc6733e..4ec0de7d7de9 100644 --- a/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py +++ b/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py @@ -18,7 +18,7 @@ import argparse import logging -from typing import Iterable, List, Tuple +from typing import Iterable, Tuple import numpy @@ -26,7 +26,6 @@ from apache_beam.ml.inference.base import KeyedModelHandler, PredictionResult, RunInference from apache_beam.ml.inference.tensorflow_inference import TFModelHandlerNumpy from apache_beam.options.pipeline_options import PipelineOptions, SetupOptions - from apache_beam.runners.runner import PipelineResult @@ -110,4 +109,5 @@ def run( if __name__ == '__main__': logging.getLogger().setLevel(logging.INFO) - run() \ No newline at end of file + run() + \ No newline at end of file diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py index c933423843b9..0f02f5a8ffc2 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py @@ -17,24 +17,21 @@ # pytype: skip-file -import logging -from collections import defaultdict from typing import Any from typing import Callable from typing import Dict from typing import Iterable from typing import Optional from typing import Sequence -from typing import Union import sys -from apache_beam.ml.inference import utils -import tensorflow as tf import numpy -from apache_beam.io.filesystems import FileSystems +import tensorflow as tf + +from apache_beam.ml.inference import utils from apache_beam.ml.inference.base import ModelHandler from apache_beam.ml.inference.base import PredictionResult -from apache_beam.utils.annotations import experimental + __all__ = [ 'TFModelHandlerNumpy', @@ -42,7 +39,9 @@ ] TensorInferenceFn = Callable[ - [tf.Module, Sequence[numpy.ndarray], Optional[Dict[str, Any]], Optional[str]], + [tf.Module, Sequence[numpy.ndarray], + Optional[Dict[str, Any]], + Optional[str]], Iterable[PredictionResult]] def _load_model(model_uri): @@ -55,7 +54,8 @@ def default_numpy_inference_fn( inference_args: Optional[Dict[str,Any]] = None, model_id: Optional[str] = None) -> Iterable[PredictionResult]: vectorized_batch = numpy.stack(batch, axis=0) - return utils._convert_to_result(batch, model.predict(vectorized_batch), model_id) + return utils._convert_to_result(batch, model.predict(vectorized_batch), + model_id) def default_tensor_inference_fn( @@ -64,7 +64,9 @@ def default_tensor_inference_fn( inference_args: Optional[Dict[str,Any]] = None, model_id: Optional[str] = None) -> Iterable[PredictionResult]: vectorized_batch = tf.stack(batch, axis=0) - return utils._convert_to_result(batch, model.predict(vectorized_batch), model_id) + return utils._convert_to_result(batch, model.predict(vectorized_batch), + model_id) + class TFModelHandlerNumpy(ModelHandler[numpy.ndarray, PredictionResult, @@ -83,7 +85,7 @@ def load_model(self) -> tf.Module: def update_model_path(self, model_path: Optional[str] = None): self._model_uri = model_path if model_path else self._model_uri - + def run_inference( self, batch: Sequence[numpy.ndarray], @@ -94,14 +96,14 @@ def run_inference( Runs inferences on a batch of numpy array and returns an Iterable of numpy array Predictions. - This method stacks the n-dimensional np-array in a vectorized format to optimize - the inference call. + This method stacks the n-dimensional np-array in a vectorized format to + optimize the inference call. Args: batch: A sequence of numpy nd-array. These should be batchable, as this - method will call `numpy.stack()` and pass in batched numpy nd-array with - dimensions (batch_size, n_features, etc.) into the model's forward() - function. + method will call `numpy.stack()` and pass in batched numpy nd-array + with dimensions (batch_size, n_features, etc.) into the model's + forward() function. model: A TF model. inference_args: any additional arguments for an inference. @@ -145,7 +147,7 @@ def load_model(self) -> tf.Module: def update_model_path(self, model_path: Optional[str] = None): self._model_uri = model_path if model_path else self._model_uri - + def run_inference( self, batch: Sequence[tf.Tensor], @@ -186,4 +188,5 @@ def get_metrics_namespace(self) -> str: return 'BeamML_TF_Tensors' def validate_inference_args(self, inference_args: Optional[Dict[str, Any]]): - pass \ No newline at end of file + pass + \ No newline at end of file diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py index fcd8dda0875c..ef41bc5b50b9 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py @@ -17,22 +17,22 @@ """End-to-End test for Tensorflow Inference""" -from cgi import test -from typing import Tuple import logging + +from typing import Tuple from typing import List + import unittest import uuid - -import pytest - -import apache_beam as beam -from apache_beam.examples.inference import tensorflow_mnist_classification from apache_beam.io.filesystems import FileSystems - from apache_beam.testing.test_pipeline import TestPipeline +try: + import tensorflow as tf + from apache_beam.examples.inference import tensorflow_mnist_classification +except ImportError as e: + tf = None def process_outputs(filepath): with FileSystems().open(filepath) as f: @@ -41,17 +41,21 @@ def process_outputs(filepath): return lines +@unittest.skipIf( + tf is None, + 'Missing dependencies. ' + 'Test depends on tensorflow') class TensorflowInference(unittest.TestCase): def process_input(self, row: str) -> Tuple[int, List[int]]: data = row.split(',') label, pixels = int(data[0]), data[1:] pixels = [int(pixel) for pixel in pixels] return label, pixels - - + + def test_tf_mnist_classification(self): test_pipeline = TestPipeline(is_integration_test=True) - input_file = 'gs://clouddfe-riteshghorse/tf/mnist/dataset/testing_inputs_it_mnist_data.csv' + input_file = 'gs://clouddfe-riteshghorse/tf/mnist/dataset/testing_inputs_it_mnist_data.csv' # pylint: disable=line-too-long output_file_dir = 'gs://clouddfe-riteshghorse/tf/mnist/output/' output_file = '/'.join([output_file_dir, str(uuid.uuid4()), 'result.txt']) model_path = 'gs://clouddfe-riteshghorse/tf/mnist/model/' @@ -64,7 +68,7 @@ def test_tf_mnist_classification(self): test_pipeline.get_full_options_as_args(**extra_opts), save_main_session=False) self.assertEqual(FileSystems().exists(output_file), True) - + expected_output_filepath = 'gs://clouddfe-riteshghorse/tf/mnist/output/testing_expected_outputs_test_sklearn_mnist_classification_actuals.txt' # pylint: disable=line-too-long expected_outputs = process_outputs(expected_output_filepath) @@ -82,4 +86,5 @@ def test_tf_mnist_classification(self): if __name__ == '__main__': logging.getLogger().setLevel(logging.DEBUG) - unittest.main() \ No newline at end of file + unittest.main() + \ No newline at end of file diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py index b6fce47aee6c..1a5d4f06885f 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py @@ -18,26 +18,24 @@ # pytype: skip-file import unittest -from apache_beam.examples import inference -from apache_beam.ml.inference.sklearn_inference_test import compare_prediction_result import numpy import tensorflow as tf - +from apache_beam.ml.inference.sklearn_inference_test import compare_prediction_result from apache_beam.ml.inference.base import KeyedModelHandler, PredictionResult from apache_beam.ml.inference.tensorflow_inference import TFModelHandlerNumpy, TFModelHandlerTensor class FakeTFNumpyModel: - def predict(self, input: numpy.ndarray): - return numpy.multiply(input, 10) + def predict(self, input: numpy.ndarray): + return numpy.multiply(input, 10) class FakeTFTensorModel: def predict(self, input: tf.Tensor): return tf.math.multiply(input, 10) - + def _compare_tensor_prediction_result(x, y): return tf.math.equal(x.inference, y.inference) @@ -58,54 +56,62 @@ def test_predict_numpy(self): inferences = inference_runner.run_inference(batched_examples, fake_model) for actual, expected in zip(inferences, expected_predictions): self.assertTrue(compare_prediction_result(actual, expected)) - - + + def test_predict_tensor(self): fake_model = FakeTFTensorModel() inference_runner = TFModelHandlerTensor(model_uri='unused') batched_examples = [ tf.convert_to_tensor(numpy.array([1])), tf.convert_to_tensor(numpy.array([10])), - tf.convert_to_tensor(numpy.array([100])), + tf.convert_to_tensor(numpy.array([100])), ] expected_predictions = [ - PredictionResult(ex, pred) for ex, pred in zip(batched_examples, [tf.math.multiply(n, 10) for n in batched_examples]) + PredictionResult(ex, pred) + for ex, pred in zip(batched_examples, + [tf.math.multiply(n, 10) for n in batched_examples]) ] - + inferences = inference_runner.run_inference(batched_examples, fake_model) for actual, expected in zip(inferences, expected_predictions): self.assertTrue(_compare_tensor_prediction_result(actual, expected)) - - + + def test_predict_keyed_numpy(self): fake_model = FakeTFNumpyModel() - inference_runner = KeyedModelHandler(TFModelHandlerNumpy(model_uri='unused')) + inference_runner = KeyedModelHandler( + TFModelHandlerNumpy(model_uri='unused')) batched_examples = [ ('k1', numpy.array([1], dtype=numpy.int64)), ('k2', numpy.array([10], dtype=numpy.int64)), ('k3', numpy.array([100], dtype=numpy.int64)), ] expected_predictions = [ - (ex[0],PredictionResult(ex[1], pred)) for ex, pred in zip(batched_examples, [numpy.multiply(n[1], 10) for n in batched_examples]) + (ex[0],PredictionResult(ex[1], pred)) + for ex, pred in zip(batched_examples, + [numpy.multiply(n[1], 10) for n in batched_examples]) ] inferences = inference_runner.run_inference(batched_examples, fake_model) for actual, expected in zip(inferences, expected_predictions): self.assertTrue(compare_prediction_result(actual[1], expected[1])) - + def test_predict_keyed_tensor(self): fake_model = FakeTFTensorModel() - inference_runner = KeyedModelHandler(TFModelHandlerTensor(model_uri='unused')) + inference_runner = KeyedModelHandler( + TFModelHandlerTensor(model_uri='unused')) batched_examples = [ ('k1', tf.convert_to_tensor(numpy.array([1]))), ('k2', tf.convert_to_tensor(numpy.array([10]))), ('k3', tf.convert_to_tensor(numpy.array([100]))), ] expected_predictions = [ - (ex[0],PredictionResult(ex[1], pred)) for ex, pred in zip(batched_examples, [tf.math.multiply(n[1], 10) for n in batched_examples]) + (ex[0],PredictionResult(ex[1], pred)) + for ex, pred in zip(batched_examples, + [tf.math.multiply(n[1], 10) for n in batched_examples]) ] inferences = inference_runner.run_inference(batched_examples, fake_model) for actual, expected in zip(inferences, expected_predictions): self.assertTrue(_compare_tensor_prediction_result(actual[1], expected[1])) - + if __name__ == '__main__': - unittest.main() \ No newline at end of file + unittest.main() From 3ada0169358abcc96e7337b3325a3a0ad2d2d802 Mon Sep 17 00:00:00 2001 From: riteshghorse Date: Tue, 7 Feb 2023 16:56:52 -0500 Subject: [PATCH 21/45] correct lints --- .../ml/inference/tensorflow_inference.py | 34 ++++++++----------- .../inference/tensorflow_inference_it_test.py | 9 +++-- .../ml/inference/tensorflow_inference_test.py | 12 ++++--- 3 files changed, 27 insertions(+), 28 deletions(-) diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py index 0f02f5a8ffc2..457a582b643d 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py @@ -32,17 +32,15 @@ from apache_beam.ml.inference.base import ModelHandler from apache_beam.ml.inference.base import PredictionResult - __all__ = [ 'TFModelHandlerNumpy', 'TFModelHandlerTensor', ] -TensorInferenceFn = Callable[ - [tf.Module, Sequence[numpy.ndarray], - Optional[Dict[str, Any]], - Optional[str]], - Iterable[PredictionResult]] +TensorInferenceFn = Callable[[ + tf.Module, Sequence[numpy.ndarray], Optional[Dict[str, Any]], Optional[str] +], + Iterable[PredictionResult]] def _load_model(model_uri): return tf.keras.models.load_model(model_uri) @@ -51,26 +49,26 @@ def _load_model(model_uri): def default_numpy_inference_fn( model: tf.Module, batch: Sequence[numpy.ndarray], - inference_args: Optional[Dict[str,Any]] = None, + inference_args: Optional[Dict[str, Any]] = None, model_id: Optional[str] = None) -> Iterable[PredictionResult]: vectorized_batch = numpy.stack(batch, axis=0) - return utils._convert_to_result(batch, model.predict(vectorized_batch), - model_id) + return utils._convert_to_result( + batch, model.predict(vectorized_batch), model_id) def default_tensor_inference_fn( model: tf.Module, batch: Sequence[tf.Tensor], - inference_args: Optional[Dict[str,Any]] = None, + inference_args: Optional[Dict[str, Any]] = None, model_id: Optional[str] = None) -> Iterable[PredictionResult]: vectorized_batch = tf.stack(batch, axis=0) - return utils._convert_to_result(batch, model.predict(vectorized_batch), - model_id) + return utils._convert_to_result( + batch, model.predict(vectorized_batch), model_id) class TFModelHandlerNumpy(ModelHandler[numpy.ndarray, - PredictionResult, - tf.Module]): + PredictionResult, + tf.Module]): def __init__( self, model_uri: str, @@ -130,9 +128,8 @@ def validate_inference_args(self, inference_args: Optional[Dict[str, Any]]): pass -class TFModelHandlerTensor(ModelHandler[tf.Tensor, - PredictionResult, - tf.Module]): +class TFModelHandlerTensor(ModelHandler[tf.Tensor, PredictionResult, + tf.Module]): def __init__( self, model_uri: str, @@ -188,5 +185,4 @@ def get_metrics_namespace(self) -> str: return 'BeamML_TF_Tensors' def validate_inference_args(self, inference_args: Optional[Dict[str, Any]]): - pass - \ No newline at end of file + pass \ No newline at end of file diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py index ef41bc5b50b9..d5dc3bea3167 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py @@ -34,6 +34,7 @@ except ImportError as e: tf = None + def process_outputs(filepath): with FileSystems().open(filepath) as f: lines = f.readlines() @@ -42,8 +43,7 @@ def process_outputs(filepath): @unittest.skipIf( - tf is None, - 'Missing dependencies. ' + tf is None, 'Missing dependencies. ' 'Test depends on tensorflow') class TensorflowInference(unittest.TestCase): def process_input(self, row: str) -> Tuple[int, List[int]]: @@ -52,7 +52,6 @@ def process_input(self, row: str) -> Tuple[int, List[int]]: pixels = [int(pixel) for pixel in pixels] return label, pixels - def test_tf_mnist_classification(self): test_pipeline = TestPipeline(is_integration_test=True) input_file = 'gs://clouddfe-riteshghorse/tf/mnist/dataset/testing_inputs_it_mnist_data.csv' # pylint: disable=line-too-long @@ -84,7 +83,7 @@ def test_tf_mnist_classification(self): true_label, expected_prediction = expected_outputs[i].split(',') self.assertEqual(predictions_dict[true_label], expected_prediction) + if __name__ == '__main__': logging.getLogger().setLevel(logging.DEBUG) - unittest.main() - \ No newline at end of file + unittest.main() \ No newline at end of file diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py index 1a5d4f06885f..0e53ee058c34 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py @@ -21,11 +21,15 @@ import numpy -import tensorflow as tf -from apache_beam.ml.inference.sklearn_inference_test import compare_prediction_result -from apache_beam.ml.inference.base import KeyedModelHandler, PredictionResult -from apache_beam.ml.inference.tensorflow_inference import TFModelHandlerNumpy, TFModelHandlerTensor +try: + import tensorflow as tf + from apache_beam.ml.inference.sklearn_inference_test import compare_prediction_result + from apache_beam.ml.inference.base import KeyedModelHandler, PredictionResult + from apache_beam.ml.inference.tensorflow_inference import TFModelHandlerNumpy, TFModelHandlerTensor +except ImportError: + raise unittest.SkipTest('PyTorch dependencies are not installed') + class FakeTFNumpyModel: def predict(self, input: numpy.ndarray): From e8cee7be5e64b08d416cfa0d50aefaced325ac0b Mon Sep 17 00:00:00 2001 From: riteshghorse Date: Tue, 7 Feb 2023 17:25:06 -0500 Subject: [PATCH 22/45] more lints and formats --- .../ml/inference/tensorflow_inference.py | 48 +++++++++++++++++-- .../inference/tensorflow_inference_it_test.py | 13 +++-- .../ml/inference/tensorflow_inference_test.py | 13 ++--- 3 files changed, 56 insertions(+), 18 deletions(-) diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py index 457a582b643d..d7dd75b43431 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py @@ -17,6 +17,7 @@ # pytype: skip-file +from cmath import inf from typing import Any from typing import Callable from typing import Dict @@ -74,6 +75,22 @@ def __init__( model_uri: str, *, inference_fn: TensorInferenceFn = default_numpy_inference_fn): + """Implementation of the ModelHandler interface for Tensorflow. + + Example Usage:: + + pcoll | RunInference(TFModelHandlerNumpy(model_uri="my_uri")) + + See https://www.tensorflow.org/tutorials/keras/save_and_load for details. + + Args: + model_uri (str): path to the trained model. + inference_fn (TensorInferenceFn, optional): inference function to use + during RunInference. Defaults to default_numpy_inference_fn. + + **Supported Versions:** RunInference APIs in Apache Beam have been tested + with Tensorflow 2.11. + """ self._model_uri = model_uri self._inference_fn = inference_fn @@ -94,20 +111,22 @@ def run_inference( Runs inferences on a batch of numpy array and returns an Iterable of numpy array Predictions. - This method stacks the n-dimensional np-array in a vectorized format to + This method stacks the n-dimensional numpy array in a vectorized format to optimize the inference call. Args: batch: A sequence of numpy nd-array. These should be batchable, as this method will call `numpy.stack()` and pass in batched numpy nd-array with dimensions (batch_size, n_features, etc.) into the model's - forward() function. - model: A TF model. + predict() function. + model: A Tensorflow model. inference_args: any additional arguments for an inference. Returns: An Iterable of type PredictionResult. """ + inference_args = {} if not inference_args else inference_args + return self._inference_fn(model, batch, inference_args, self._model_uri) def get_num_bytes(self, batch: Sequence[numpy.ndarray]) -> int: @@ -135,6 +154,22 @@ def __init__( model_uri: str, *, inference_fn: TensorInferenceFn = default_tensor_inference_fn): + """Implementation of the ModelHandler interface for Tensorflow. + + Example Usage:: + + pcoll | RunInference(TFModelHandlerTensor(model_uri="my_uri")) + + See https://www.tensorflow.org/tutorials/keras/save_and_load for details. + + Args: + model_uri (str): path to the trained model. + inference_fn (TensorInferenceFn, optional): inference function to use + during RunInference. Defaults to default_numpy_inference_fn. + + **Supported Versions:** RunInference APIs in Apache Beam have been tested + with Tensorflow 2.11. + """ self._model_uri = model_uri self._inference_fn = inference_fn @@ -154,12 +189,14 @@ def run_inference( """ Runs inferences on a batch of tf.Tensor and returns an Iterable of Tensor Predictions. + This method stacks the list of Tensors in a vectorized format to optimize the inference call. + Args: batch: A sequence of Tensors. These Tensors should be batchable, as this method will call `tf.stack()` and pass in batched Tensors with - dimensions (batch_size, n_features, etc.) into the model's forward() + dimensions (batch_size, n_features, etc.) into the model's predict() function. model: A Tensorflow model. inference_args: Non-batchable arguments required as inputs to the model's @@ -185,4 +222,5 @@ def get_metrics_namespace(self) -> str: return 'BeamML_TF_Tensors' def validate_inference_args(self, inference_args: Optional[Dict[str, Any]]): - pass \ No newline at end of file + pass + \ No newline at end of file diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py index d5dc3bea3167..04eb5ab13cfa 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py @@ -22,12 +22,14 @@ from typing import Tuple from typing import List +import pytest import unittest import uuid from apache_beam.io.filesystems import FileSystems from apache_beam.testing.test_pipeline import TestPipeline +# pylint: disable=ungrouped-imports try: import tensorflow as tf from apache_beam.examples.inference import tensorflow_mnist_classification @@ -46,12 +48,8 @@ def process_outputs(filepath): tf is None, 'Missing dependencies. ' 'Test depends on tensorflow') class TensorflowInference(unittest.TestCase): - def process_input(self, row: str) -> Tuple[int, List[int]]: - data = row.split(',') - label, pixels = int(data[0]), data[1:] - pixels = [int(pixel) for pixel in pixels] - return label, pixels - + @pytest.mark.uses_tensorflow + @pytest.mark.it_postcommit def test_tf_mnist_classification(self): test_pipeline = TestPipeline(is_integration_test=True) input_file = 'gs://clouddfe-riteshghorse/tf/mnist/dataset/testing_inputs_it_mnist_data.csv' # pylint: disable=line-too-long @@ -86,4 +84,5 @@ def test_tf_mnist_classification(self): if __name__ == '__main__': logging.getLogger().setLevel(logging.DEBUG) - unittest.main() \ No newline at end of file + unittest.main() + \ No newline at end of file diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py index 0e53ee058c34..f48c8b510705 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py @@ -19,16 +19,16 @@ import unittest - import numpy +import pytest try: import tensorflow as tf - from apache_beam.ml.inference.sklearn_inference_test import compare_prediction_result + from apache_beam.ml.inference.sklearn_inference_test import _compare_prediction_result from apache_beam.ml.inference.base import KeyedModelHandler, PredictionResult from apache_beam.ml.inference.tensorflow_inference import TFModelHandlerNumpy, TFModelHandlerTensor except ImportError: - raise unittest.SkipTest('PyTorch dependencies are not installed') + raise unittest.SkipTest('Tensorflow dependencies are not installed') class FakeTFNumpyModel: @@ -59,9 +59,9 @@ def test_predict_numpy(self): ] inferences = inference_runner.run_inference(batched_examples, fake_model) for actual, expected in zip(inferences, expected_predictions): - self.assertTrue(compare_prediction_result(actual, expected)) - + self.assertTrue(_compare_prediction_result(actual, expected)) + @pytest.mark.uses_tensorflow def test_predict_tensor(self): fake_model = FakeTFTensorModel() inference_runner = TFModelHandlerTensor(model_uri='unused') @@ -97,8 +97,9 @@ def test_predict_keyed_numpy(self): ] inferences = inference_runner.run_inference(batched_examples, fake_model) for actual, expected in zip(inferences, expected_predictions): - self.assertTrue(compare_prediction_result(actual[1], expected[1])) + self.assertTrue(_compare_prediction_result(actual[1], expected[1])) + @pytest.mark.uses_tensorflow def test_predict_keyed_tensor(self): fake_model = FakeTFTensorModel() inference_runner = KeyedModelHandler( From 7a2c1a17b1a4dafa5c993fc6f5069831af427123 Mon Sep 17 00:00:00 2001 From: riteshghorse Date: Tue, 7 Feb 2023 17:44:24 -0500 Subject: [PATCH 23/45] auto formatted with yapf --- .../tensorflow_mnist_classification.py | 4 +- .../ml/inference/tensorflow_inference.py | 10 +++-- .../inference/tensorflow_inference_it_test.py | 2 +- .../ml/inference/tensorflow_inference_test.py | 38 ++++++++++--------- 4 files changed, 30 insertions(+), 24 deletions(-) diff --git a/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py b/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py index 4ec0de7d7de9..c5d7cc539a9e 100644 --- a/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py +++ b/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py @@ -15,7 +15,6 @@ # limitations under the License. # - import argparse import logging from typing import Iterable, Tuple @@ -42,11 +41,13 @@ class PostProcessor(beam.DoFn): """Process the PredictionResult to get the predicted label. Returns a comma separated string with true label and predicted label. """ + def process(self, element: Tuple[int, PredictionResult]) -> Iterable[str]: label, prediction_result = element prediction = numpy.argmax(prediction_result.inference, axis=0) yield '{},{}'.format(label, prediction) + def parse_known_args(argv): """Parses args for the workflow.""" parser = argparse.ArgumentParser() @@ -110,4 +111,3 @@ def run( if __name__ == '__main__': logging.getLogger().setLevel(logging.INFO) run() - \ No newline at end of file diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py index d7dd75b43431..419e0a99023e 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py @@ -39,10 +39,11 @@ ] TensorInferenceFn = Callable[[ - tf.Module, Sequence[numpy.ndarray], Optional[Dict[str, Any]], Optional[str] + tf.Module, Sequence[numpy.ndarray], Optional[Dict[str, Any]], Optional[str] ], Iterable[PredictionResult]] + def _load_model(model_uri): return tf.keras.models.load_model(model_uri) @@ -53,7 +54,7 @@ def default_numpy_inference_fn( inference_args: Optional[Dict[str, Any]] = None, model_id: Optional[str] = None) -> Iterable[PredictionResult]: vectorized_batch = numpy.stack(batch, axis=0) - return utils._convert_to_result( + return utils._convert_to_result( batch, model.predict(vectorized_batch), model_id) @@ -70,6 +71,7 @@ def default_tensor_inference_fn( class TFModelHandlerNumpy(ModelHandler[numpy.ndarray, PredictionResult, tf.Module]): + def __init__( self, model_uri: str, @@ -126,7 +128,7 @@ def run_inference( An Iterable of type PredictionResult. """ inference_args = {} if not inference_args else inference_args - + return self._inference_fn(model, batch, inference_args, self._model_uri) def get_num_bytes(self, batch: Sequence[numpy.ndarray]) -> int: @@ -149,6 +151,7 @@ def validate_inference_args(self, inference_args: Optional[Dict[str, Any]]): class TFModelHandlerTensor(ModelHandler[tf.Tensor, PredictionResult, tf.Module]): + def __init__( self, model_uri: str, @@ -223,4 +226,3 @@ def get_metrics_namespace(self) -> str: def validate_inference_args(self, inference_args: Optional[Dict[str, Any]]): pass - \ No newline at end of file diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py index 04eb5ab13cfa..70a4dceda9bb 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py @@ -48,6 +48,7 @@ def process_outputs(filepath): tf is None, 'Missing dependencies. ' 'Test depends on tensorflow') class TensorflowInference(unittest.TestCase): + @pytest.mark.uses_tensorflow @pytest.mark.it_postcommit def test_tf_mnist_classification(self): @@ -85,4 +86,3 @@ def test_tf_mnist_classification(self): if __name__ == '__main__': logging.getLogger().setLevel(logging.DEBUG) unittest.main() - \ No newline at end of file diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py index f48c8b510705..3c6f14017373 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py @@ -32,11 +32,13 @@ class FakeTFNumpyModel: + def predict(self, input: numpy.ndarray): return numpy.multiply(input, 10) class FakeTFTensorModel: + def predict(self, input: tf.Tensor): return tf.math.multiply(input, 10) @@ -46,12 +48,11 @@ def _compare_tensor_prediction_result(x, y): class TFRunInferenceTest(unittest.TestCase): + def test_predict_numpy(self): fake_model = FakeTFNumpyModel() inference_runner = TFModelHandlerNumpy(model_uri='unused') - batched_examples = [ - numpy.array([1]), numpy.array([10]), numpy.array([100]) - ] + batched_examples = [numpy.array([1]), numpy.array([10]), numpy.array([100])] expected_predictions = [ PredictionResult(numpy.array([1]), 10), PredictionResult(numpy.array([10]), 100), @@ -71,8 +72,9 @@ def test_predict_tensor(self): tf.convert_to_tensor(numpy.array([100])), ] expected_predictions = [ - PredictionResult(ex, pred) - for ex, pred in zip(batched_examples, + PredictionResult(ex, pred) for ex, + pred in zip( + batched_examples, [tf.math.multiply(n, 10) for n in batched_examples]) ] @@ -80,19 +82,19 @@ def test_predict_tensor(self): for actual, expected in zip(inferences, expected_predictions): self.assertTrue(_compare_tensor_prediction_result(actual, expected)) - def test_predict_keyed_numpy(self): fake_model = FakeTFNumpyModel() inference_runner = KeyedModelHandler( - TFModelHandlerNumpy(model_uri='unused')) + TFModelHandlerNumpy(model_uri='unused')) batched_examples = [ - ('k1', numpy.array([1], dtype=numpy.int64)), - ('k2', numpy.array([10], dtype=numpy.int64)), - ('k3', numpy.array([100], dtype=numpy.int64)), + ('k1', numpy.array([1], dtype=numpy.int64)), + ('k2', numpy.array([10], dtype=numpy.int64)), + ('k3', numpy.array([100], dtype=numpy.int64)), ] expected_predictions = [ - (ex[0],PredictionResult(ex[1], pred)) - for ex, pred in zip(batched_examples, + (ex[0], PredictionResult(ex[1], pred)) for ex, + pred in zip( + batched_examples, [numpy.multiply(n[1], 10) for n in batched_examples]) ] inferences = inference_runner.run_inference(batched_examples, fake_model) @@ -105,18 +107,20 @@ def test_predict_keyed_tensor(self): inference_runner = KeyedModelHandler( TFModelHandlerTensor(model_uri='unused')) batched_examples = [ - ('k1', tf.convert_to_tensor(numpy.array([1]))), - ('k2', tf.convert_to_tensor(numpy.array([10]))), - ('k3', tf.convert_to_tensor(numpy.array([100]))), + ('k1', tf.convert_to_tensor(numpy.array([1]))), + ('k2', tf.convert_to_tensor(numpy.array([10]))), + ('k3', tf.convert_to_tensor(numpy.array([100]))), ] expected_predictions = [ - (ex[0],PredictionResult(ex[1], pred)) - for ex, pred in zip(batched_examples, + (ex[0], PredictionResult(ex[1], pred)) for ex, + pred in zip( + batched_examples, [tf.math.multiply(n[1], 10) for n in batched_examples]) ] inferences = inference_runner.run_inference(batched_examples, fake_model) for actual, expected in zip(inferences, expected_predictions): self.assertTrue(_compare_tensor_prediction_result(actual[1], expected[1])) + if __name__ == '__main__': unittest.main() From ee905eed9e98e6b02c1292b0695c962fd9f77466 Mon Sep 17 00:00:00 2001 From: riteshghorse Date: Tue, 7 Feb 2023 18:39:51 -0500 Subject: [PATCH 24/45] rm spare lines --- sdks/python/apache_beam/ml/inference/tensorflow_inference.py | 2 -- .../apache_beam/ml/inference/tensorflow_inference_it_test.py | 1 - .../apache_beam/ml/inference/tensorflow_inference_test.py | 3 --- 3 files changed, 6 deletions(-) diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py index 419e0a99023e..e4b340dc297d 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py @@ -71,7 +71,6 @@ def default_tensor_inference_fn( class TFModelHandlerNumpy(ModelHandler[numpy.ndarray, PredictionResult, tf.Module]): - def __init__( self, model_uri: str, @@ -151,7 +150,6 @@ def validate_inference_args(self, inference_args: Optional[Dict[str, Any]]): class TFModelHandlerTensor(ModelHandler[tf.Tensor, PredictionResult, tf.Module]): - def __init__( self, model_uri: str, diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py index 70a4dceda9bb..82910cfa63f1 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py @@ -48,7 +48,6 @@ def process_outputs(filepath): tf is None, 'Missing dependencies. ' 'Test depends on tensorflow') class TensorflowInference(unittest.TestCase): - @pytest.mark.uses_tensorflow @pytest.mark.it_postcommit def test_tf_mnist_classification(self): diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py index 3c6f14017373..926622fe0d61 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py @@ -32,13 +32,11 @@ class FakeTFNumpyModel: - def predict(self, input: numpy.ndarray): return numpy.multiply(input, 10) class FakeTFTensorModel: - def predict(self, input: tf.Tensor): return tf.math.multiply(input, 10) @@ -48,7 +46,6 @@ def _compare_tensor_prediction_result(x, y): class TFRunInferenceTest(unittest.TestCase): - def test_predict_numpy(self): fake_model = FakeTFNumpyModel() inference_runner = TFModelHandlerNumpy(model_uri='unused') From b54436f189a97397e32bee17c31b2676e6337e28 Mon Sep 17 00:00:00 2001 From: riteshghorse Date: Fri, 10 Feb 2023 15:55:54 -0500 Subject: [PATCH 25/45] merge master --- .../apache_beam/examples/inference/README.md | 60 ++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) diff --git a/sdks/python/apache_beam/examples/inference/README.md b/sdks/python/apache_beam/examples/inference/README.md index 7d71b1d2826f..1d89e0c2ec64 100644 --- a/sdks/python/apache_beam/examples/inference/README.md +++ b/sdks/python/apache_beam/examples/inference/README.md @@ -32,6 +32,15 @@ because the `apache_beam.examples.inference` module was added in that release. pip install apache-beam==2.40.0 ``` +### Tensorflow dependencies + +The following installation requirement is for the Tensorflow model handler examples. + +The RunInference API supports the Tensorflow framework. To use Tensorflow locally, first install `tensorflow`. +``` +pip install tensorflow==2.11.0 +``` + ### PyTorch dependencies The following installation requirements are for the files used in these examples. @@ -417,4 +426,53 @@ python -m apache_beam.examples.inference.onnx_sentiment_classification.py \ This writes the output to the output file path with contents like: ``` A comedy-drama of nearly epic proportions rooted in a sincere performance by the title character undergoing midlife crisis .;1 -``` \ No newline at end of file +``` + +## MNIST digit classification with Tensorflow +[`tensorflow_mnist_classification.py`](./tensorflow_mnist_classification.py) contains an implementation for a RunInference pipeline that performs image classification on handwritten digits from the [MNIST](https://en.wikipedia.org/wiki/MNIST_database) database. + +The pipeline reads rows of pixels corresponding to a digit, performs basic preprocessing(converts the input shape to 28x28), passes the pixels to the trained Tensorflow model with RunInference, and then writes the predictions to a text file. + +### Dataset and model for language modeling + +To use this transform, you need a dataset and model for language modeling. + +1. Create a file named `INPUT.csv` that contains labels and pixels to feed into the model. Each row should have comma-separated elements. The first element is the label. All other elements are pixel values. The csv should not have column headers. The content of the file should be similar to the following example: +``` +1,0,0,0... +0,0,0,0... +1,0,0,0... +4,0,0,0... +... +``` +2. Save the trained tensorflow model to a directory `MODEL_DIR` . + + +### Running `tensorflow_mnist_classification.py` + +To run the MNIST classification pipeline locally, use the following command: +```sh +python -m apache_beam.examples.inference.tensorflow_mnist_classification.py \ + --input INPUT \ + --output OUTPUT \ + --model_path MODEL_DIR +``` +For example: +```sh +python -m apache_beam.examples.inference.tensorflow_mnist_classification.py \ + --input INPUT.csv \ + --output predictions.txt \ + --model_path MODEL_DIR +``` + +This writes the output to the `predictions.txt` with contents like: +``` +1,1 +4,4 +0,0 +7,7 +3,3 +5,5 +... +``` +Each line has data separated by a comma ",". The first item is the actual label of the digit. The second item is the predicted label of the digit. From dd7c49d6a4a974a7997502bb979dc3ad36fdc0ad Mon Sep 17 00:00:00 2001 From: riteshghorse Date: Wed, 8 Feb 2023 10:01:54 -0500 Subject: [PATCH 26/45] test requirement file --- .../tensorflow_tests_requirements.txt | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 sdks/python/apache_beam/ml/inference/tensorflow_tests_requirements.txt diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_tests_requirements.txt b/sdks/python/apache_beam/ml/inference/tensorflow_tests_requirements.txt new file mode 100644 index 000000000000..40bbab86d080 --- /dev/null +++ b/sdks/python/apache_beam/ml/inference/tensorflow_tests_requirements.txt @@ -0,0 +1,18 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +tensorflow>=2.11.0 From 86d7329d33a68aa48b5e60ad86c41263e84b5fe8 Mon Sep 17 00:00:00 2001 From: riteshghorse Date: Wed, 8 Feb 2023 10:10:40 -0500 Subject: [PATCH 27/45] add test to gradle --- sdks/python/test-suites/direct/common.gradle | 30 +++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/sdks/python/test-suites/direct/common.gradle b/sdks/python/test-suites/direct/common.gradle index 9281355ad654..6fc22c285044 100644 --- a/sdks/python/test-suites/direct/common.gradle +++ b/sdks/python/test-suites/direct/common.gradle @@ -281,11 +281,39 @@ task tfxInferenceTest { } } +// TensorFlow RunInference IT tests +task tensorflowInferenceTest { + dependsOn 'installGcpTest' + dependsOn ':sdks:python:sdist' + def requirementsFile = "${rootDir}/sdks/python/apache_beam/ml/inference/tensorflow_tests_requirements.txt" + doFirst { + exec { + executable 'sh' + args '-c', ". ${envdir}/bin/activate && pip install -r $requirementsFile" + } + } + doLast { + def testOpts = basicTestOpts + def argMap = [ + "test_opts": testOpts, + "suite": "postCommitIT-direct-py${pythonVersionSuffix}", + "collect": "uses_tensorflow and it_postcommit" , + "runner": "TestDirectRunner" + ] + def cmdArgs = mapToArgString(argMap) + exec { + executable 'sh' + args '-c', ". ${envdir}/bin/activate && ${runScriptsDir}/run_integration_test.sh $cmdArgs" + } + } +} + // Add all the RunInference framework IT tests to this gradle task that runs on Direct Runner Post commit suite. project.tasks.register("inferencePostCommitIT") { dependsOn = [ 'torchInferenceTest', 'sklearnInferenceTest', - 'tfxInferenceTest' + 'tfxInferenceTest', + 'tensorflowInferenceTest' ] } From 8ca2a1d48adbd2a85421eb57b951d151cfc1eff4 Mon Sep 17 00:00:00 2001 From: riteshghorse Date: Wed, 8 Feb 2023 10:28:44 -0500 Subject: [PATCH 28/45] add test tasks for tf --- .../ml/inference/tensorflow_inference_it_test.py | 2 +- .../ml/inference/tensorflow_inference_test.py | 4 ++-- sdks/python/test-suites/direct/common.gradle | 2 +- sdks/python/tox.ini | 13 ++++++++++++- 4 files changed, 16 insertions(+), 5 deletions(-) diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py index 82910cfa63f1..a86e176830c9 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py @@ -48,7 +48,7 @@ def process_outputs(filepath): tf is None, 'Missing dependencies. ' 'Test depends on tensorflow') class TensorflowInference(unittest.TestCase): - @pytest.mark.uses_tensorflow + @pytest.mark.uses_tf @pytest.mark.it_postcommit def test_tf_mnist_classification(self): test_pipeline = TestPipeline(is_integration_test=True) diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py index 926622fe0d61..372ae8a0af07 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py @@ -59,7 +59,7 @@ def test_predict_numpy(self): for actual, expected in zip(inferences, expected_predictions): self.assertTrue(_compare_prediction_result(actual, expected)) - @pytest.mark.uses_tensorflow + @pytest.mark.uses_tf def test_predict_tensor(self): fake_model = FakeTFTensorModel() inference_runner = TFModelHandlerTensor(model_uri='unused') @@ -98,7 +98,7 @@ def test_predict_keyed_numpy(self): for actual, expected in zip(inferences, expected_predictions): self.assertTrue(_compare_prediction_result(actual[1], expected[1])) - @pytest.mark.uses_tensorflow + @pytest.mark.uses_tf def test_predict_keyed_tensor(self): fake_model = FakeTFTensorModel() inference_runner = KeyedModelHandler( diff --git a/sdks/python/test-suites/direct/common.gradle b/sdks/python/test-suites/direct/common.gradle index 6fc22c285044..80e0bf052e57 100644 --- a/sdks/python/test-suites/direct/common.gradle +++ b/sdks/python/test-suites/direct/common.gradle @@ -297,7 +297,7 @@ task tensorflowInferenceTest { def argMap = [ "test_opts": testOpts, "suite": "postCommitIT-direct-py${pythonVersionSuffix}", - "collect": "uses_tensorflow and it_postcommit" , + "collect": "uses_tf and it_postcommit" , "runner": "TestDirectRunner" ] def cmdArgs = mapToArgString(argMap) diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini index 7c842ef5d6f9..6028321c4025 100644 --- a/sdks/python/tox.ini +++ b/sdks/python/tox.ini @@ -338,4 +338,15 @@ commands = /bin/sh -c "pip freeze | grep -E onnx" # Run all ONNX unit tests pytest -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 -m uses_onnx {posargs} - \ No newline at end of file + +[testenv:py{37,38,39,310}-tf-{211}] +deps = + -r build-requirements.txt + 211: tensorflow>=2.11.0 +extras = test,gcp +commands = + # Log torch version for debugging + /bin/sh -c "pip freeze | grep -E torch" + # Run all Tensorflow unit tests + # Allow exit code 5 (no tests run) so that we can run this command safely on arbitrary subdirectories. + /bin/sh -c 'pytest -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 -m uses_tf {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret' From 613068f41501e6ab40047771277ffb0b8c857eb3 Mon Sep 17 00:00:00 2001 From: riteshghorse Date: Wed, 8 Feb 2023 11:36:59 -0500 Subject: [PATCH 29/45] unit test --- .../ml/inference/tensorflow_inference.py | 31 ++++++++++++++----- .../ml/inference/tensorflow_inference_test.py | 24 +++++++++++++- sdks/python/pytest.ini | 1 + 3 files changed, 48 insertions(+), 8 deletions(-) diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py index e4b340dc297d..dbdb9f36cad8 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py @@ -18,6 +18,7 @@ # pytype: skip-file from cmath import inf +import enum from typing import Any from typing import Callable from typing import Dict @@ -44,8 +45,16 @@ Iterable[PredictionResult]] -def _load_model(model_uri): - return tf.keras.models.load_model(model_uri) +class ModelType(enum.Enum): + """Defines how a model file should be loaded.""" + SAVED_MODEL = 1 + + +def _load_model(model_uri, model_type): + if model_type == ModelType.SAVED_MODEL: + return tf.keras.models.load_model(model_uri) + else: + raise AssertionError('Unsupported model type for loading.') def default_numpy_inference_fn( @@ -55,7 +64,7 @@ def default_numpy_inference_fn( model_id: Optional[str] = None) -> Iterable[PredictionResult]: vectorized_batch = numpy.stack(batch, axis=0) return utils._convert_to_result( - batch, model.predict(vectorized_batch), model_id) + batch, model.predict(vectorized_batch, **inference_args), model_id) def default_tensor_inference_fn( @@ -65,7 +74,7 @@ def default_tensor_inference_fn( model_id: Optional[str] = None) -> Iterable[PredictionResult]: vectorized_batch = tf.stack(batch, axis=0) return utils._convert_to_result( - batch, model.predict(vectorized_batch), model_id) + batch, model.predict(vectorized_batch, **inference_args), model_id) class TFModelHandlerNumpy(ModelHandler[numpy.ndarray, @@ -74,6 +83,7 @@ class TFModelHandlerNumpy(ModelHandler[numpy.ndarray, def __init__( self, model_uri: str, + model_type: ModelType = ModelType.SAVED_MODEL, *, inference_fn: TensorInferenceFn = default_numpy_inference_fn): """Implementation of the ModelHandler interface for Tensorflow. @@ -86,6 +96,8 @@ def __init__( Args: model_uri (str): path to the trained model. + model_type (ModelType): type of model to be loaded. + Defaults to SAVED_MODEL. inference_fn (TensorInferenceFn, optional): inference function to use during RunInference. Defaults to default_numpy_inference_fn. @@ -93,11 +105,12 @@ def __init__( with Tensorflow 2.11. """ self._model_uri = model_uri + self._model_type = model_type self._inference_fn = inference_fn def load_model(self) -> tf.Module: """Loads and initializes a Tensorflow model for processing.""" - return _load_model(self._model_uri) + return _load_model(self._model_uri, self._model_type) def update_model_path(self, model_path: Optional[str] = None): self._model_uri = model_path if model_path else self._model_uri @@ -153,6 +166,7 @@ class TFModelHandlerTensor(ModelHandler[tf.Tensor, PredictionResult, def __init__( self, model_uri: str, + model_type: ModelType = ModelType.SAVED_MODEL, *, inference_fn: TensorInferenceFn = default_tensor_inference_fn): """Implementation of the ModelHandler interface for Tensorflow. @@ -165,6 +179,8 @@ def __init__( Args: model_uri (str): path to the trained model. + model_type (ModelType): type of model to be loaded. + Defaults to SAVED_MODEL. inference_fn (TensorInferenceFn, optional): inference function to use during RunInference. Defaults to default_numpy_inference_fn. @@ -172,11 +188,12 @@ def __init__( with Tensorflow 2.11. """ self._model_uri = model_uri + self._model_type = model_type self._inference_fn = inference_fn def load_model(self) -> tf.Module: """Loads and initializes a tensorflow model for processing.""" - return _load_model(self._model_uri) + return _load_model(self._model_uri, self._model_type) def update_model_path(self, model_path: Optional[str] = None): self._model_uri = model_path if model_path else self._model_uri @@ -220,7 +237,7 @@ def get_metrics_namespace(self) -> str: Returns: A namespace for metrics collected by the RunInference transform. """ - return 'BeamML_TF_Tensors' + return 'BeamML_TF_Tensor' def validate_inference_args(self, inference_args: Optional[Dict[str, Any]]): pass diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py index 372ae8a0af07..2dd524af3a6c 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py @@ -37,7 +37,9 @@ def predict(self, input: numpy.ndarray): class FakeTFTensorModel: - def predict(self, input: tf.Tensor): + def predict(self, input: tf.Tensor, add=False): + if add: + return tf.math.add(tf.math.multiply(input, 10), 10) return tf.math.multiply(input, 10) @@ -78,6 +80,26 @@ def test_predict_tensor(self): inferences = inference_runner.run_inference(batched_examples, fake_model) for actual, expected in zip(inferences, expected_predictions): self.assertTrue(_compare_tensor_prediction_result(actual, expected)) + + @pytest.mark.uses_tf + def test_predict_tensor_with_args(self): + fake_model = FakeTFTensorModel() + inference_runner = TFModelHandlerTensor(model_uri='unused') + batched_examples = [ + tf.convert_to_tensor(numpy.array([1])), + tf.convert_to_tensor(numpy.array([10])), + tf.convert_to_tensor(numpy.array([100])), + ] + expected_predictions = [ + PredictionResult(ex, pred) for ex, + pred in zip( + batched_examples, + [tf.math.add(tf.math.multiply(n, 10), 10) for n in batched_examples]) + ] + + inferences = inference_runner.run_inference(batched_examples, fake_model, inference_args={"add":True}) + for actual, expected in zip(inferences, expected_predictions): + self.assertTrue(_compare_tensor_prediction_result(actual, expected)) def test_predict_keyed_numpy(self): fake_model = FakeTFNumpyModel() diff --git a/sdks/python/pytest.ini b/sdks/python/pytest.ini index 560ef2f187af..2733b2511d05 100644 --- a/sdks/python/pytest.ini +++ b/sdks/python/pytest.ini @@ -53,6 +53,7 @@ markers = uses_tensorflow: tests that utilize tensorflow in some way uses_tft: tests that utilizes tensorflow transforms in some way. uses_onnx: tests that utilizes onnx in some way. + uses_tf: tests that utilize tensorflow # Default timeout intended for unit tests. # If certain tests need a different value, please see the docs on how to From 0fd2b3080037abea4b41b3d9981e7251b8e5834e Mon Sep 17 00:00:00 2001 From: riteshghorse Date: Wed, 8 Feb 2023 13:03:37 -0500 Subject: [PATCH 30/45] lints --- .../ml/inference/tensorflow_inference.py | 19 +++++++++---------- .../ml/inference/tensorflow_inference_test.py | 11 +++++++---- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py index dbdb9f36cad8..2d4d7444ca50 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py @@ -17,7 +17,6 @@ # pytype: skip-file -from cmath import inf import enum from typing import Any from typing import Callable @@ -48,7 +47,7 @@ class ModelType(enum.Enum): """Defines how a model file should be loaded.""" SAVED_MODEL = 1 - + def _load_model(model_uri, model_type): if model_type == ModelType.SAVED_MODEL: @@ -93,14 +92,14 @@ def __init__( pcoll | RunInference(TFModelHandlerNumpy(model_uri="my_uri")) See https://www.tensorflow.org/tutorials/keras/save_and_load for details. - + Args: model_uri (str): path to the trained model. - model_type (ModelType): type of model to be loaded. + model_type (ModelType): type of model to be loaded. Defaults to SAVED_MODEL. inference_fn (TensorInferenceFn, optional): inference function to use during RunInference. Defaults to default_numpy_inference_fn. - + **Supported Versions:** RunInference APIs in Apache Beam have been tested with Tensorflow 2.11. """ @@ -176,14 +175,14 @@ def __init__( pcoll | RunInference(TFModelHandlerTensor(model_uri="my_uri")) See https://www.tensorflow.org/tutorials/keras/save_and_load for details. - + Args: model_uri (str): path to the trained model. - model_type (ModelType): type of model to be loaded. + model_type (ModelType): type of model to be loaded. Defaults to SAVED_MODEL. inference_fn (TensorInferenceFn, optional): inference function to use during RunInference. Defaults to default_numpy_inference_fn. - + **Supported Versions:** RunInference APIs in Apache Beam have been tested with Tensorflow 2.11. """ @@ -207,10 +206,10 @@ def run_inference( """ Runs inferences on a batch of tf.Tensor and returns an Iterable of Tensor Predictions. - + This method stacks the list of Tensors in a vectorized format to optimize the inference call. - + Args: batch: A sequence of Tensors. These Tensors should be batchable, as this method will call `tf.stack()` and pass in batched Tensors with diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py index 2dd524af3a6c..9770b4fc898f 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py @@ -80,7 +80,7 @@ def test_predict_tensor(self): inferences = inference_runner.run_inference(batched_examples, fake_model) for actual, expected in zip(inferences, expected_predictions): self.assertTrue(_compare_tensor_prediction_result(actual, expected)) - + @pytest.mark.uses_tf def test_predict_tensor_with_args(self): fake_model = FakeTFTensorModel() @@ -93,11 +93,14 @@ def test_predict_tensor_with_args(self): expected_predictions = [ PredictionResult(ex, pred) for ex, pred in zip( - batched_examples, - [tf.math.add(tf.math.multiply(n, 10), 10) for n in batched_examples]) + batched_examples, [ + tf.math.add(tf.math.multiply(n, 10), 10) + for n in batched_examples + ]) ] - inferences = inference_runner.run_inference(batched_examples, fake_model, inference_args={"add":True}) + inferences = inference_runner.run_inference( + batched_examples, fake_model, inference_args={"add": True}) for actual, expected in zip(inferences, expected_predictions): self.assertTrue(_compare_tensor_prediction_result(actual, expected)) From 1e80e707d91aaf4c483547d1ea341d06a2a306e1 Mon Sep 17 00:00:00 2001 From: riteshghorse Date: Wed, 8 Feb 2023 13:17:40 -0500 Subject: [PATCH 31/45] updated inferenceFn type --- .../apache_beam/ml/inference/tensorflow_inference.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py index 2d4d7444ca50..88c247bfddc1 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py @@ -18,7 +18,7 @@ # pytype: skip-file import enum -from typing import Any +from typing import Any, Union from typing import Callable from typing import Dict from typing import Iterable @@ -39,7 +39,10 @@ ] TensorInferenceFn = Callable[[ - tf.Module, Sequence[numpy.ndarray], Optional[Dict[str, Any]], Optional[str] + tf.Module, + Sequence[Union[numpy.ndarray, tf.Tensor]], + Optional[Dict[str, Any]], + Optional[str] ], Iterable[PredictionResult]] From 38210fcd0cf7235e60010b06f2ce27e7df4adfe1 Mon Sep 17 00:00:00 2001 From: riteshghorse Date: Wed, 8 Feb 2023 13:31:26 -0500 Subject: [PATCH 32/45] add tox info for py38 --- sdks/python/test-suites/tox/py38/build.gradle | 12 ++++++++++++ sdks/python/tox.ini | 5 ++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/sdks/python/test-suites/tox/py38/build.gradle b/sdks/python/test-suites/tox/py38/build.gradle index ea803faabc52..7d582bd89c1a 100644 --- a/sdks/python/test-suites/tox/py38/build.gradle +++ b/sdks/python/test-suites/tox/py38/build.gradle @@ -106,6 +106,18 @@ preCommitPyCoverage.dependsOn "testPy38pytorch-113" toxTask "testPy38onnx-113", "py38-onnx-113", "${posargs}" test.dependsOn "testPy38onnx-113" preCommitPyCoverage.dependsOn "testPy38onnx-113" +// Create a test task for each minor version of tensorflow +toxTask "testPy38tensorflow-29", "py38-tensorflow-29", "${posargs}" +test.dependsOn "testPy38tensorflow-29" +preCommitPyCoverage.dependsOn "testPy38tensorflow-29" + +toxTask "testPy38tensorflow-210", "py38-tensorflow-210", "${posargs}" +test.dependsOn "testPy38tensorflow-210" +preCommitPyCoverage.dependsOn "testPy38tensorflow-210" + +toxTask "testPy38tensorflow-211", "py38-tensorflow-211", "${posargs}" +test.dependsOn "testPy38tensorflow-211" +preCommitPyCoverage.dependsOn "testPy38tensorflow-211" toxTask "whitespacelint", "whitespacelint", "${posargs}" diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini index 6028321c4025..adb02cc18cf1 100644 --- a/sdks/python/tox.ini +++ b/sdks/python/tox.ini @@ -340,9 +340,12 @@ commands = pytest -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 -m uses_onnx {posargs} [testenv:py{37,38,39,310}-tf-{211}] +[testenv:py{37,38,39,310}-tensorflow-{29,210,211}] deps = -r build-requirements.txt - 211: tensorflow>=2.11.0 + 29: tensorflow>=2.9.0,<2.10.0 + 210: tensorflow>=2.10.0,<2.11.0 + 211: tensorflow>=2.11.0,<2.12.0 extras = test,gcp commands = # Log torch version for debugging From 521bd78b831d812ba88fc29d10d80bb9a62c5a02 Mon Sep 17 00:00:00 2001 From: riteshghorse Date: Wed, 8 Feb 2023 14:06:59 -0500 Subject: [PATCH 33/45] pylint --- .../examples/inference/tensorflow_mnist_classification.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py b/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py index c5d7cc539a9e..600a16c2fb67 100644 --- a/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py +++ b/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py @@ -32,8 +32,8 @@ def process_input(row: str) -> Tuple[int, numpy.ndarray]: data = row.split(',') label, pixels = int(data[0]), data[1:] pixels = [int(pixel) for pixel in pixels] - # the trained model accepts the input of shape 28x28x1 - pixels = numpy.array(pixels).reshape(28, 28, 1) + # the trained model accepts the input of shape 28x28 + pixels = numpy.array(pixels).reshape((28, 28, 1)) return label, pixels @@ -41,7 +41,6 @@ class PostProcessor(beam.DoFn): """Process the PredictionResult to get the predicted label. Returns a comma separated string with true label and predicted label. """ - def process(self, element: Tuple[int, PredictionResult]) -> Iterable[str]: label, prediction_result = element prediction = numpy.argmax(prediction_result.inference, axis=0) From 029cc958b19e18032c7dbefb9379582914eba6da Mon Sep 17 00:00:00 2001 From: riteshghorse Date: Wed, 8 Feb 2023 16:57:40 -0500 Subject: [PATCH 34/45] lints --- .../examples/inference/tensorflow_mnist_classification.py | 3 +-- sdks/python/apache_beam/ml/inference/tensorflow_inference.py | 2 +- .../apache_beam/ml/inference/tensorflow_inference_it_test.py | 4 ---- .../apache_beam/ml/inference/tensorflow_inference_test.py | 3 +-- sdks/python/tox.ini | 5 +++-- 5 files changed, 6 insertions(+), 11 deletions(-) diff --git a/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py b/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py index 600a16c2fb67..7f4f1bb341da 100644 --- a/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py +++ b/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py @@ -16,9 +16,8 @@ # import argparse -import logging from typing import Iterable, Tuple - +import logging import numpy import apache_beam as beam diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py index 88c247bfddc1..45967b2a8c56 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py @@ -25,8 +25,8 @@ from typing import Optional from typing import Sequence -import sys import numpy +import sys import tensorflow as tf from apache_beam.ml.inference import utils diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py index a86e176830c9..0a932dad7c53 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py @@ -18,10 +18,6 @@ """End-to-End test for Tensorflow Inference""" import logging - -from typing import Tuple -from typing import List - import pytest import unittest import uuid diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py index 9770b4fc898f..3813931f55ae 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py @@ -17,9 +17,8 @@ # pytype: skip-file -import unittest - import numpy +import unittest import pytest try: diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini index adb02cc18cf1..eb59d07b252a 100644 --- a/sdks/python/tox.ini +++ b/sdks/python/tox.ini @@ -153,6 +153,7 @@ deps = torch onnxruntime onnx + tensorflow commands = time {toxinidir}/scripts/generate_pydoc.sh @@ -348,8 +349,8 @@ deps = 211: tensorflow>=2.11.0,<2.12.0 extras = test,gcp commands = - # Log torch version for debugging - /bin/sh -c "pip freeze | grep -E torch" + # Log tensorflow version for debugging + /bin/sh -c "pip freeze | grep -E tensorflow" # Run all Tensorflow unit tests # Allow exit code 5 (no tests run) so that we can run this command safely on arbitrary subdirectories. /bin/sh -c 'pytest -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 -m uses_tf {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret' From efec494358e3e7d6c97c5bf331bc137fe17d500d Mon Sep 17 00:00:00 2001 From: riteshghorse Date: Fri, 10 Feb 2023 15:49:09 -0500 Subject: [PATCH 35/45] using tfhub --- sdks/python/apache_beam/examples/inference/README.md | 2 +- .../examples/inference/tensorflow_mnist_classification.py | 5 +++-- sdks/python/apache_beam/ml/inference/tensorflow_inference.py | 3 ++- .../apache_beam/ml/inference/tensorflow_inference_it_test.py | 4 ++-- .../ml/inference/tensorflow_tests_requirements.txt | 3 ++- 5 files changed, 10 insertions(+), 7 deletions(-) diff --git a/sdks/python/apache_beam/examples/inference/README.md b/sdks/python/apache_beam/examples/inference/README.md index 1d89e0c2ec64..e9a257e97e67 100644 --- a/sdks/python/apache_beam/examples/inference/README.md +++ b/sdks/python/apache_beam/examples/inference/README.md @@ -437,7 +437,7 @@ The pipeline reads rows of pixels corresponding to a digit, performs basic prepr To use this transform, you need a dataset and model for language modeling. -1. Create a file named `INPUT.csv` that contains labels and pixels to feed into the model. Each row should have comma-separated elements. The first element is the label. All other elements are pixel values. The csv should not have column headers. The content of the file should be similar to the following example: +1. Create a file named [`INPUT.csv`](gs://apache-beam-ml/testing/inputs/it_mnist_data.csv) that contains labels and pixels to feed into the model. Each row should have comma-separated elements. The first element is the label. All other elements are pixel values. The csv should not have column headers. The content of the file should be similar to the following example: ``` 1,0,0,0... 0,0,0,0... diff --git a/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py b/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py index 7f4f1bb341da..eb026c3158f6 100644 --- a/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py +++ b/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py @@ -22,7 +22,7 @@ import apache_beam as beam from apache_beam.ml.inference.base import KeyedModelHandler, PredictionResult, RunInference -from apache_beam.ml.inference.tensorflow_inference import TFModelHandlerNumpy +from apache_beam.ml.inference.tensorflow_inference import ModelType, TFModelHandlerNumpy from apache_beam.options.pipeline_options import PipelineOptions, SetupOptions from apache_beam.runners.runner import PipelineResult @@ -82,7 +82,8 @@ def run( # In this example we pass keyed inputs to RunInference transform. # Therefore, we use KeyedModelHandler wrapper over TFModelHandlerNumpy. model_loader = KeyedModelHandler( - TFModelHandlerNumpy(model_uri=known_args.model_path)) + TFModelHandlerNumpy( + model_uri=known_args.model_path, model_type=ModelType.SAVED_MODEL)) pipeline = test_pipeline if not test_pipeline: diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py index 45967b2a8c56..5c307f33210e 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py @@ -28,6 +28,7 @@ import numpy import sys import tensorflow as tf +import tensorflow_hub as hub from apache_beam.ml.inference import utils from apache_beam.ml.inference.base import ModelHandler @@ -54,7 +55,7 @@ class ModelType(enum.Enum): def _load_model(model_uri, model_type): if model_type == ModelType.SAVED_MODEL: - return tf.keras.models.load_model(model_uri) + return tf.keras.models.load_model(hub.resolve(model_uri)) else: raise AssertionError('Unsupported model type for loading.') diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py index 0a932dad7c53..350aaf1dabc4 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py @@ -43,9 +43,9 @@ def process_outputs(filepath): @unittest.skipIf( tf is None, 'Missing dependencies. ' 'Test depends on tensorflow') +@pytest.mark.uses_tf +@pytest.mark.it_postcommit class TensorflowInference(unittest.TestCase): - @pytest.mark.uses_tf - @pytest.mark.it_postcommit def test_tf_mnist_classification(self): test_pipeline = TestPipeline(is_integration_test=True) input_file = 'gs://clouddfe-riteshghorse/tf/mnist/dataset/testing_inputs_it_mnist_data.csv' # pylint: disable=line-too-long diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_tests_requirements.txt b/sdks/python/apache_beam/ml/inference/tensorflow_tests_requirements.txt index 40bbab86d080..efe7ad4fcfcc 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_tests_requirements.txt +++ b/sdks/python/apache_beam/ml/inference/tensorflow_tests_requirements.txt @@ -15,4 +15,5 @@ # limitations under the License. # -tensorflow>=2.11.0 +tensorflow>=2.0.0 +tensorflow_hub>-0.10.0 From 40568d4509415312bea14e2a1754db52dff9b022 Mon Sep 17 00:00:00 2001 From: riteshghorse Date: Mon, 13 Feb 2023 12:04:06 -0500 Subject: [PATCH 36/45] tfhub example --- .../apache_beam/examples/inference/README.md | 47 +++++++ .../tensorflow_imagenet_segmentation.py | 125 ++++++++++++++++++ .../ml/inference/tensorflow_inference.py | 14 +- .../inference/tensorflow_inference_it_test.py | 28 ++++ .../tensorflow_tests_requirements.txt | 4 +- 5 files changed, 213 insertions(+), 5 deletions(-) create mode 100644 sdks/python/apache_beam/examples/inference/tensorflow_imagenet_segmentation.py diff --git a/sdks/python/apache_beam/examples/inference/README.md b/sdks/python/apache_beam/examples/inference/README.md index e9a257e97e67..b67c961a7750 100644 --- a/sdks/python/apache_beam/examples/inference/README.md +++ b/sdks/python/apache_beam/examples/inference/README.md @@ -428,6 +428,7 @@ This writes the output to the output file path with contents like: A comedy-drama of nearly epic proportions rooted in a sincere performance by the title character undergoing midlife crisis .;1 ``` +--- ## MNIST digit classification with Tensorflow [`tensorflow_mnist_classification.py`](./tensorflow_mnist_classification.py) contains an implementation for a RunInference pipeline that performs image classification on handwritten digits from the [MNIST](https://en.wikipedia.org/wiki/MNIST_database) database. @@ -476,3 +477,49 @@ This writes the output to the `predictions.txt` with contents like: ... ``` Each line has data separated by a comma ",". The first item is the actual label of the digit. The second item is the predicted label of the digit. + +--- +## Image segmentation with Tensorflow and TensorflowHub + +[`tensorflow_image_segmentation.py`](./tensorflow_image_segmentation.py) contains an implementation for a RunInference pipeline that performs image segementation using the [`mobilenet_v2`]("https://tfhub.dev/google/tf2-preview/mobilenet_v2/classification/4") architecture from the tensorflow hub. + +The pipeline reads images, performs basic preprocessing, passes the images to the Tensorflow implementation of RunInference, and then writes predictions to a text file. + +### Dataset and model for image segmentation + +To use this transform, you need a dataset and model for image segmentation. + +1. Create a directory named `IMAGE_DIR`. Create or download images and put them in this directory. We +will use the [example image]("https://storage.googleapis.com/download.tensorflow.org/example_images/") on tensorflow. +2. Create a file named `IMAGE_FILE_NAMES.txt` that names of each of the images in `IMAGE_DIR` that you want to use to run image segmentation. For example: +``` +grace_hopper.jpg +``` +3. A tensorflow `MODEL_PATH`, we will use the [mobilenet]("https://tfhub.dev/google/tf2-preview/mobilenet_v2/classification/4") model. +4. Note the path to the `OUTPUT` file. This file is used by the pipeline to write the predictions. + +### Running `tensorflow_image_segmentation.py` + +To run the image segmentation pipeline locally, use the following command: +```sh +python -m apache_beam.examples.inference.tensorflow_image_segmentation \ + --input IMAGE_FILE_NAMES \ + --image_dir IMAGES_DIR \ + --output OUTPUT \ + --model_path MODEL_PATH +``` + +For example, if you've followed the naming conventions recommended above: +```sh +python -m apache_beam.examples.inference.tensorflow_image_segmentation \ + --input IMAGE_FILE_NAMES.txt \ + --image_dir "https://storage.googleapis.com/download.tensorflow.org/example_images/" + --output predictions.txt \ + --model_path "https://tfhub.dev/google/tf2-preview/mobilenet_v2/classification/4" +``` +This writes the output to the `predictions.txt` with contents like: +``` +background +... +``` +Each line has a list of predicted label. diff --git a/sdks/python/apache_beam/examples/inference/tensorflow_imagenet_segmentation.py b/sdks/python/apache_beam/examples/inference/tensorflow_imagenet_segmentation.py new file mode 100644 index 000000000000..3a2143eb69c4 --- /dev/null +++ b/sdks/python/apache_beam/examples/inference/tensorflow_imagenet_segmentation.py @@ -0,0 +1,125 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import argparse +from typing import Iterable, Iterator +import logging +import numpy +import tensorflow as tf + +import apache_beam as beam +from apache_beam.ml.inference.base import PredictionResult, RunInference +from apache_beam.ml.inference.tensorflow_inference import TFModelHandlerTensor +from apache_beam.options.pipeline_options import PipelineOptions, SetupOptions +from apache_beam.runners.runner import PipelineResult + + +class PostProcessor(beam.DoFn): + """Process the PredictionResult to get the predicted label. + Returns a comma separated string with true label and predicted label. + """ + def process(self, element: PredictionResult) -> Iterable[str]: + print("prediction result---->: %", element) + predicted_class = numpy.argmax(element.inference[0], axis=-1) + labels_path = tf.keras.utils.get_file( + 'ImageNetLabels.txt', + 'https://storage.googleapis.com/download.tensorflow.org/data/ImageNetLabels.txt' # pylint: disable=line-too-long + ) + imagenet_labels = numpy.array(open(labels_path).read().splitlines()) + predicted_class_name = imagenet_labels[predicted_class] + return predicted_class_name.title() + + +def parse_known_args(argv): + """Parses args for the workflow.""" + parser = argparse.ArgumentParser() + parser.add_argument( + '--input', + dest='input', + required=True, + help='Path to the text file containing image names.') + parser.add_argument( + '--output', + dest='output', + required=True, + help='Path to save output predictions.') + parser.add_argument( + '--model_path', + dest='model_path', + required=True, + help='Path to load the Tensorflow model for Inference.') + parser.add_argument( + '--image_dir', help='Path to the directory where images are stored.') + return parser.parse_known_args(argv) + + +def filter_empty_lines(text: str) -> Iterator[str]: + if len(text.strip()) > 0: + yield text + + +def read_image(image_name, image_dir): + from PIL import Image + img = tf.keras.utils.get_file(image_name, image_dir + image_name) + img = Image.open(img).resize((224, 224)) + img = numpy.array(img) / 255.0 + img_tensor = tf.cast(tf.convert_to_tensor(img[...]), dtype=tf.float32) + return img_tensor + + +def run( + argv=None, save_main_session=True, test_pipeline=None) -> PipelineResult: + """ + Args: + argv: Command line arguments defined for this example. + save_main_session: Used for internal testing. + test_pipeline: Used for internal testing. + """ + known_args, pipeline_args = parse_known_args(argv) + pipeline_options = PipelineOptions(pipeline_args) + pipeline_options.view_as(SetupOptions).save_main_session = save_main_session + + # In this example we will use the TensorflowHub model URL. + model_loader = TFModelHandlerTensor(model_uri=known_args.model_path) + + pipeline = test_pipeline + if not test_pipeline: + pipeline = beam.Pipeline(options=pipeline_options) + + image = ( + pipeline + | 'ReadImageNames' >> beam.io.ReadFromText(known_args.input) + | 'FilterEmptyLines' >> beam.ParDo(filter_empty_lines) + | "PreProcessInputs" >> + beam.Map(lambda image_name: read_image(image_name, known_args.image_dir))) + + predictions = ( + image + | "RunInference" >> RunInference(model_loader) + | "PostProcessOutputs" >> beam.ParDo(PostProcessor())) + + _ = predictions | "WriteOutput" >> beam.io.WriteToText( + known_args.output, shard_name_template='', append_trailing_newlines=False) + + result = pipeline.run() + result.wait_until_finish() + return result + + +if __name__ == '__main__': + logging.getLogger().setLevel(logging.INFO) + run() diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py index 5c307f33210e..139c39c2a190 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py @@ -66,8 +66,11 @@ def default_numpy_inference_fn( inference_args: Optional[Dict[str, Any]] = None, model_id: Optional[str] = None) -> Iterable[PredictionResult]: vectorized_batch = numpy.stack(batch, axis=0) - return utils._convert_to_result( - batch, model.predict(vectorized_batch, **inference_args), model_id) + if inference_args: + predictions = model(vectorized_batch, **inference_args) + else: + predictions = model(vectorized_batch) + return utils._convert_to_result(batch, predictions, model_id) def default_tensor_inference_fn( @@ -76,8 +79,11 @@ def default_tensor_inference_fn( inference_args: Optional[Dict[str, Any]] = None, model_id: Optional[str] = None) -> Iterable[PredictionResult]: vectorized_batch = tf.stack(batch, axis=0) - return utils._convert_to_result( - batch, model.predict(vectorized_batch, **inference_args), model_id) + if inference_args: + predictions = model(vectorized_batch, **inference_args) + else: + predictions = model(vectorized_batch) + return utils._convert_to_result(batch, predictions, model_id) class TFModelHandlerNumpy(ModelHandler[numpy.ndarray, diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py index 350aaf1dabc4..c7914d9792f2 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py @@ -23,6 +23,7 @@ import uuid from apache_beam.io.filesystems import FileSystems +from apache_beam.examples.inference import tensorflow_imagenet_segmentation from apache_beam.testing.test_pipeline import TestPipeline # pylint: disable=ungrouped-imports @@ -77,6 +78,33 @@ def test_tf_mnist_classification(self): true_label, expected_prediction = expected_outputs[i].split(',') self.assertEqual(predictions_dict[true_label], expected_prediction) + def test_tf_imagenet_image_classification(self): + test_pipeline = TestPipeline(is_integration_test=True) + input_file = 'gs://clouddfe-riteshghorse/tf/imagenet/input/input_labels.txt' # pylint: disable=line-too-long + image_dir = 'https://storage.googleapis.com/download.tensorflow.org/example_images/' # pylint: disable=line-too-long + output_file_dir = 'gs://clouddfe-riteshghorse/tf/imagenet/output' + output_file = '/'.join([output_file_dir, str(uuid.uuid4()), 'result.txt']) + model_path = 'https://tfhub.dev/google/tf2-preview/mobilenet_v2/classification/4' # pylint: disable=line-too-long + extra_opts = { + 'input': input_file, + 'output': output_file, + 'model_path': model_path, + 'image_dir': image_dir + } + tensorflow_imagenet_segmentation.run( + test_pipeline.get_full_options_as_args(**extra_opts), + save_main_session=False) + self.assertEqual(FileSystems().exists(output_file), True) + + expected_output_filepath = 'gs://clouddfe-riteshghorse/tf/imagenet/output/actuals.txt' # pylint: disable=line-too-long + expected_outputs = process_outputs(expected_output_filepath) + + predicted_outputs = process_outputs(output_file) + self.assertEqual(len(expected_outputs), len(predicted_outputs)) + + for true_label, predicted_label in zip(expected_outputs, predicted_outputs): + self.assertEqual(true_label, predicted_label) + if __name__ == '__main__': logging.getLogger().setLevel(logging.DEBUG) diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_tests_requirements.txt b/sdks/python/apache_beam/ml/inference/tensorflow_tests_requirements.txt index efe7ad4fcfcc..0db3237052c1 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_tests_requirements.txt +++ b/sdks/python/apache_beam/ml/inference/tensorflow_tests_requirements.txt @@ -15,5 +15,7 @@ # limitations under the License. # -tensorflow>=2.0.0 +tensorflow>=1.0.0 tensorflow_hub>-0.10.0 +Pillow>=9.0.0 + From 4fe8a1d4d3a6adc5b7931036fe2a160392cb9180 Mon Sep 17 00:00:00 2001 From: riteshghorse Date: Mon, 13 Feb 2023 12:11:12 -0500 Subject: [PATCH 37/45] update doc --- .../examples/inference/tensorflow_imagenet_segmentation.py | 2 +- sdks/python/apache_beam/ml/inference/tensorflow_inference.py | 4 ++-- .../apache_beam/ml/inference/tensorflow_inference_test.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sdks/python/apache_beam/examples/inference/tensorflow_imagenet_segmentation.py b/sdks/python/apache_beam/examples/inference/tensorflow_imagenet_segmentation.py index 3a2143eb69c4..3255da8f2b8d 100644 --- a/sdks/python/apache_beam/examples/inference/tensorflow_imagenet_segmentation.py +++ b/sdks/python/apache_beam/examples/inference/tensorflow_imagenet_segmentation.py @@ -30,7 +30,7 @@ class PostProcessor(beam.DoFn): """Process the PredictionResult to get the predicted label. - Returns a comma separated string with true label and predicted label. + Returns predicted label. """ def process(self, element: PredictionResult) -> Iterable[str]: print("prediction result---->: %", element) diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py index 139c39c2a190..38fa87b2848d 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py @@ -107,7 +107,7 @@ def __init__( model_uri (str): path to the trained model. model_type (ModelType): type of model to be loaded. Defaults to SAVED_MODEL. - inference_fn (TensorInferenceFn, optional): inference function to use + inference_fn (TensorInferenceFn, Optional): inference function to use during RunInference. Defaults to default_numpy_inference_fn. **Supported Versions:** RunInference APIs in Apache Beam have been tested @@ -190,7 +190,7 @@ def __init__( model_uri (str): path to the trained model. model_type (ModelType): type of model to be loaded. Defaults to SAVED_MODEL. - inference_fn (TensorInferenceFn, optional): inference function to use + inference_fn (TensorInferenceFn, Optional): inference function to use during RunInference. Defaults to default_numpy_inference_fn. **Supported Versions:** RunInference APIs in Apache Beam have been tested diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py index 3813931f55ae..9369624e5df5 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py @@ -18,8 +18,8 @@ # pytype: skip-file import numpy -import unittest import pytest +import unittest try: import tensorflow as tf From 368d87dd1c960f527a881bb090a09bae803d403a Mon Sep 17 00:00:00 2001 From: riteshghorse Date: Mon, 13 Feb 2023 13:31:42 -0500 Subject: [PATCH 38/45] sort imports --- .../inference/tensorflow_imagenet_segmentation.py | 12 ++++++++---- .../inference/tensorflow_mnist_classification.py | 14 ++++++++++---- .../ml/inference/tensorflow_inference.py | 6 +++--- .../ml/inference/tensorflow_inference_it_test.py | 5 +++-- .../ml/inference/tensorflow_inference_test.py | 6 +++--- 5 files changed, 27 insertions(+), 16 deletions(-) diff --git a/sdks/python/apache_beam/examples/inference/tensorflow_imagenet_segmentation.py b/sdks/python/apache_beam/examples/inference/tensorflow_imagenet_segmentation.py index 3255da8f2b8d..70e6797d2a2f 100644 --- a/sdks/python/apache_beam/examples/inference/tensorflow_imagenet_segmentation.py +++ b/sdks/python/apache_beam/examples/inference/tensorflow_imagenet_segmentation.py @@ -16,15 +16,19 @@ # import argparse -from typing import Iterable, Iterator import logging +from typing import Iterable +from typing import Iterator + import numpy -import tensorflow as tf import apache_beam as beam -from apache_beam.ml.inference.base import PredictionResult, RunInference +import tensorflow as tf +from apache_beam.ml.inference.base import PredictionResult +from apache_beam.ml.inference.base import RunInference from apache_beam.ml.inference.tensorflow_inference import TFModelHandlerTensor -from apache_beam.options.pipeline_options import PipelineOptions, SetupOptions +from apache_beam.options.pipeline_options import PipelineOptions +from apache_beam.options.pipeline_options import SetupOptions from apache_beam.runners.runner import PipelineResult diff --git a/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py b/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py index eb026c3158f6..174d21b26af2 100644 --- a/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py +++ b/sdks/python/apache_beam/examples/inference/tensorflow_mnist_classification.py @@ -16,14 +16,20 @@ # import argparse -from typing import Iterable, Tuple import logging +from typing import Iterable +from typing import Tuple + import numpy import apache_beam as beam -from apache_beam.ml.inference.base import KeyedModelHandler, PredictionResult, RunInference -from apache_beam.ml.inference.tensorflow_inference import ModelType, TFModelHandlerNumpy -from apache_beam.options.pipeline_options import PipelineOptions, SetupOptions +from apache_beam.ml.inference.base import KeyedModelHandler +from apache_beam.ml.inference.base import PredictionResult +from apache_beam.ml.inference.base import RunInference +from apache_beam.ml.inference.tensorflow_inference import ModelType +from apache_beam.ml.inference.tensorflow_inference import TFModelHandlerNumpy +from apache_beam.options.pipeline_options import PipelineOptions +from apache_beam.options.pipeline_options import SetupOptions from apache_beam.runners.runner import PipelineResult diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py index 38fa87b2848d..c53a0f088e95 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py @@ -18,18 +18,18 @@ # pytype: skip-file import enum -from typing import Any, Union +import sys +from typing import Any from typing import Callable from typing import Dict from typing import Iterable from typing import Optional from typing import Sequence +from typing import Union import numpy -import sys import tensorflow as tf import tensorflow_hub as hub - from apache_beam.ml.inference import utils from apache_beam.ml.inference.base import ModelHandler from apache_beam.ml.inference.base import PredictionResult diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py index c7914d9792f2..1ddf01fa3112 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py @@ -18,12 +18,13 @@ """End-to-End test for Tensorflow Inference""" import logging -import pytest import unittest import uuid -from apache_beam.io.filesystems import FileSystems +import pytest + from apache_beam.examples.inference import tensorflow_imagenet_segmentation +from apache_beam.io.filesystems import FileSystems from apache_beam.testing.test_pipeline import TestPipeline # pylint: disable=ungrouped-imports diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py index 9369624e5df5..47e027b4e499 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py @@ -17,9 +17,10 @@ # pytype: skip-file +import unittest + import numpy import pytest -import unittest try: import tensorflow as tf @@ -46,6 +47,7 @@ def _compare_tensor_prediction_result(x, y): return tf.math.equal(x.inference, y.inference) +@pytest.mark.uses_tf class TFRunInferenceTest(unittest.TestCase): def test_predict_numpy(self): fake_model = FakeTFNumpyModel() @@ -60,7 +62,6 @@ def test_predict_numpy(self): for actual, expected in zip(inferences, expected_predictions): self.assertTrue(_compare_prediction_result(actual, expected)) - @pytest.mark.uses_tf def test_predict_tensor(self): fake_model = FakeTFTensorModel() inference_runner = TFModelHandlerTensor(model_uri='unused') @@ -80,7 +81,6 @@ def test_predict_tensor(self): for actual, expected in zip(inferences, expected_predictions): self.assertTrue(_compare_tensor_prediction_result(actual, expected)) - @pytest.mark.uses_tf def test_predict_tensor_with_args(self): fake_model = FakeTFTensorModel() inference_runner = TFModelHandlerTensor(model_uri='unused') From a557fadac6a2f37a9319517a93564b950ec5a1d0 Mon Sep 17 00:00:00 2001 From: riteshghorse Date: Tue, 14 Feb 2023 10:04:12 -0500 Subject: [PATCH 39/45] resolve pydoc,precommit --- .../apache_beam/ml/inference/tensorflow_tests_requirements.txt | 2 +- sdks/python/tox.ini | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_tests_requirements.txt b/sdks/python/apache_beam/ml/inference/tensorflow_tests_requirements.txt index 0db3237052c1..8a9deba61dd8 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_tests_requirements.txt +++ b/sdks/python/apache_beam/ml/inference/tensorflow_tests_requirements.txt @@ -16,6 +16,6 @@ # tensorflow>=1.0.0 -tensorflow_hub>-0.10.0 +tensorflow_hub>=0.10.0 Pillow>=9.0.0 diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini index eb59d07b252a..f32532f3af49 100644 --- a/sdks/python/tox.ini +++ b/sdks/python/tox.ini @@ -154,6 +154,7 @@ deps = onnxruntime onnx tensorflow + tensorflow_hub commands = time {toxinidir}/scripts/generate_pydoc.sh From 46fbde9d135473ff5428493f9b3cf582260d0aae Mon Sep 17 00:00:00 2001 From: riteshghorse Date: Tue, 14 Feb 2023 10:52:13 -0500 Subject: [PATCH 40/45] fix import --- .../apache_beam/ml/inference/tensorflow_inference_it_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py index 1ddf01fa3112..3308dbdd1215 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py @@ -23,13 +23,13 @@ import pytest -from apache_beam.examples.inference import tensorflow_imagenet_segmentation from apache_beam.io.filesystems import FileSystems from apache_beam.testing.test_pipeline import TestPipeline # pylint: disable=ungrouped-imports try: import tensorflow as tf + from apache_beam.examples.inference import tensorflow_imagenet_segmentation from apache_beam.examples.inference import tensorflow_mnist_classification except ImportError as e: tf = None From 34e45058b5102f8a447d0670f4e1a7a0d1244b62 Mon Sep 17 00:00:00 2001 From: riteshghorse Date: Tue, 14 Feb 2023 13:27:00 -0500 Subject: [PATCH 41/45] fix lint --- sdks/python/apache_beam/ml/inference/tensorflow_inference.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py index c53a0f088e95..08813da70dc5 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py @@ -28,6 +28,7 @@ from typing import Union import numpy + import tensorflow as tf import tensorflow_hub as hub from apache_beam.ml.inference import utils From 0fbb3d989a272bb2385af15f492ef20cfd14116a Mon Sep 17 00:00:00 2001 From: riteshghorse Date: Tue, 14 Feb 2023 14:22:20 -0500 Subject: [PATCH 42/45] address comments --- .../apache_beam/examples/inference/README.md | 2 +- .../tensorflow_imagenet_segmentation.py | 3 +-- .../ml/inference/tensorflow_inference.py | 2 +- .../inference/tensorflow_inference_it_test.py | 17 ++++++++++++----- .../ml/inference/tensorflow_inference_test.py | 1 - 5 files changed, 15 insertions(+), 10 deletions(-) diff --git a/sdks/python/apache_beam/examples/inference/README.md b/sdks/python/apache_beam/examples/inference/README.md index b67c961a7750..69cd773593bd 100644 --- a/sdks/python/apache_beam/examples/inference/README.md +++ b/sdks/python/apache_beam/examples/inference/README.md @@ -481,7 +481,7 @@ Each line has data separated by a comma ",". The first item is the actual label --- ## Image segmentation with Tensorflow and TensorflowHub -[`tensorflow_image_segmentation.py`](./tensorflow_image_segmentation.py) contains an implementation for a RunInference pipeline that performs image segementation using the [`mobilenet_v2`]("https://tfhub.dev/google/tf2-preview/mobilenet_v2/classification/4") architecture from the tensorflow hub. +[`tensorflow_imagenet_segmentation.py`](./tensorflow_imagenet_segmentation.py) contains an implementation for a RunInference pipeline that performs image segementation using the [`mobilenet_v2`]("https://tfhub.dev/google/tf2-preview/mobilenet_v2/classification/4") architecture from the tensorflow hub. The pipeline reads images, performs basic preprocessing, passes the images to the Tensorflow implementation of RunInference, and then writes predictions to a text file. diff --git a/sdks/python/apache_beam/examples/inference/tensorflow_imagenet_segmentation.py b/sdks/python/apache_beam/examples/inference/tensorflow_imagenet_segmentation.py index 70e6797d2a2f..e059d2433bfa 100644 --- a/sdks/python/apache_beam/examples/inference/tensorflow_imagenet_segmentation.py +++ b/sdks/python/apache_beam/examples/inference/tensorflow_imagenet_segmentation.py @@ -21,6 +21,7 @@ from typing import Iterator import numpy +from PIL import Image import apache_beam as beam import tensorflow as tf @@ -37,7 +38,6 @@ class PostProcessor(beam.DoFn): Returns predicted label. """ def process(self, element: PredictionResult) -> Iterable[str]: - print("prediction result---->: %", element) predicted_class = numpy.argmax(element.inference[0], axis=-1) labels_path = tf.keras.utils.get_file( 'ImageNetLabels.txt', @@ -77,7 +77,6 @@ def filter_empty_lines(text: str) -> Iterator[str]: def read_image(image_name, image_dir): - from PIL import Image img = tf.keras.utils.get_file(image_name, image_dir + image_name) img = Image.open(img).resize((224, 224)) img = numpy.array(img) / 255.0 diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py index 08813da70dc5..c1bb3d6ab480 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py @@ -112,7 +112,7 @@ def __init__( during RunInference. Defaults to default_numpy_inference_fn. **Supported Versions:** RunInference APIs in Apache Beam have been tested - with Tensorflow 2.11. + with Tensorflow 2.9, 2.10, 2.11. """ self._model_uri = model_uri self._model_type = model_type diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py index 3308dbdd1215..d2d42524868b 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py @@ -64,7 +64,9 @@ def test_tf_mnist_classification(self): save_main_session=False) self.assertEqual(FileSystems().exists(output_file), True) - expected_output_filepath = 'gs://clouddfe-riteshghorse/tf/mnist/output/testing_expected_outputs_test_sklearn_mnist_classification_actuals.txt' # pylint: disable=line-too-long + expected_output_filepath = ( + 'gs://clouddfe-riteshghorse/tf/mnist/output/testing_expected_outputs_test_sklearn_mnist_classification_actuals.txt' # pylint: disable=line-too-long + ) expected_outputs = process_outputs(expected_output_filepath) predicted_outputs = process_outputs(output_file) @@ -81,11 +83,15 @@ def test_tf_mnist_classification(self): def test_tf_imagenet_image_classification(self): test_pipeline = TestPipeline(is_integration_test=True) - input_file = 'gs://clouddfe-riteshghorse/tf/imagenet/input/input_labels.txt' # pylint: disable=line-too-long - image_dir = 'https://storage.googleapis.com/download.tensorflow.org/example_images/' # pylint: disable=line-too-long + input_file = ( + 'gs://clouddfe-riteshghorse/tf/imagenet/input/input_labels.txt') + image_dir = ( + 'https://storage.googleapis.com/download.tensorflow.org/example_images/' + ) output_file_dir = 'gs://clouddfe-riteshghorse/tf/imagenet/output' output_file = '/'.join([output_file_dir, str(uuid.uuid4()), 'result.txt']) - model_path = 'https://tfhub.dev/google/tf2-preview/mobilenet_v2/classification/4' # pylint: disable=line-too-long + model_path = ( + 'https://tfhub.dev/google/tf2-preview/mobilenet_v2/classification/4') extra_opts = { 'input': input_file, 'output': output_file, @@ -97,7 +103,8 @@ def test_tf_imagenet_image_classification(self): save_main_session=False) self.assertEqual(FileSystems().exists(output_file), True) - expected_output_filepath = 'gs://clouddfe-riteshghorse/tf/imagenet/output/actuals.txt' # pylint: disable=line-too-long + expected_output_filepath = ( + 'gs://clouddfe-riteshghorse/tf/imagenet/output/actuals.txt') expected_outputs = process_outputs(expected_output_filepath) predicted_outputs = process_outputs(output_file) diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py index 47e027b4e499..842de7fe611b 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py @@ -122,7 +122,6 @@ def test_predict_keyed_numpy(self): for actual, expected in zip(inferences, expected_predictions): self.assertTrue(_compare_prediction_result(actual[1], expected[1])) - @pytest.mark.uses_tf def test_predict_keyed_tensor(self): fake_model = FakeTFTensorModel() inference_runner = KeyedModelHandler( From d298e42dd0e236c486c41f4a10aa01bc1cf42cc4 Mon Sep 17 00:00:00 2001 From: riteshghorse Date: Wed, 15 Feb 2023 01:18:11 -0500 Subject: [PATCH 43/45] fix optional inference args --- .../tensorflow_imagenet_segmentation.py | 2 +- .../ml/inference/tensorflow_inference.py | 20 +++++++------------ 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/sdks/python/apache_beam/examples/inference/tensorflow_imagenet_segmentation.py b/sdks/python/apache_beam/examples/inference/tensorflow_imagenet_segmentation.py index e059d2433bfa..bfdaefe861e2 100644 --- a/sdks/python/apache_beam/examples/inference/tensorflow_imagenet_segmentation.py +++ b/sdks/python/apache_beam/examples/inference/tensorflow_imagenet_segmentation.py @@ -21,7 +21,6 @@ from typing import Iterator import numpy -from PIL import Image import apache_beam as beam import tensorflow as tf @@ -31,6 +30,7 @@ from apache_beam.options.pipeline_options import PipelineOptions from apache_beam.options.pipeline_options import SetupOptions from apache_beam.runners.runner import PipelineResult +from PIL import Image class PostProcessor(beam.DoFn): diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py index c1bb3d6ab480..a9c3ff1e1ac2 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py @@ -43,7 +43,7 @@ TensorInferenceFn = Callable[[ tf.Module, Sequence[Union[numpy.ndarray, tf.Tensor]], - Optional[Dict[str, Any]], + Dict[str, Any], Optional[str] ], Iterable[PredictionResult]] @@ -64,26 +64,20 @@ def _load_model(model_uri, model_type): def default_numpy_inference_fn( model: tf.Module, batch: Sequence[numpy.ndarray], - inference_args: Optional[Dict[str, Any]] = None, + inference_args: Dict[str, Any], model_id: Optional[str] = None) -> Iterable[PredictionResult]: vectorized_batch = numpy.stack(batch, axis=0) - if inference_args: - predictions = model(vectorized_batch, **inference_args) - else: - predictions = model(vectorized_batch) + predictions = model(vectorized_batch, **inference_args) return utils._convert_to_result(batch, predictions, model_id) def default_tensor_inference_fn( model: tf.Module, batch: Sequence[tf.Tensor], - inference_args: Optional[Dict[str, Any]] = None, + inference_args: Dict[str, Any], model_id: Optional[str] = None) -> Iterable[PredictionResult]: vectorized_batch = tf.stack(batch, axis=0) - if inference_args: - predictions = model(vectorized_batch, **inference_args) - else: - predictions = model(vectorized_batch) + predictions = model(vectorized_batch, **inference_args) return utils._convert_to_result(batch, predictions, model_id) @@ -150,7 +144,6 @@ def run_inference( An Iterable of type PredictionResult. """ inference_args = {} if not inference_args else inference_args - return self._inference_fn(model, batch, inference_args, self._model_uri) def get_num_bytes(self, batch: Sequence[numpy.ndarray]) -> int: @@ -212,7 +205,7 @@ def run_inference( self, batch: Sequence[tf.Tensor], model: tf.Module, - inference_args: Optional[Dict[str, Any]] = None, + inference_args: Optional[Dict[str, Any]] = None ) -> Iterable[PredictionResult]: """ Runs inferences on a batch of tf.Tensor and returns an Iterable of @@ -233,6 +226,7 @@ def run_inference( Returns: An Iterable of type PredictionResult. """ + inference_args = {} if not inference_args else inference_args return self._inference_fn(model, batch, inference_args, self._model_uri) def get_num_bytes(self, batch: Sequence[tf.Tensor]) -> int: From 2556534c72a1cf72a208d0b62f623a3461f67878 Mon Sep 17 00:00:00 2001 From: riteshghorse Date: Wed, 15 Feb 2023 10:15:09 -0500 Subject: [PATCH 44/45] change to ml bucket --- .../inference/tensorflow_inference_it_test.py | 21 +++++++------------ 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py index d2d42524868b..7b4b13ce2e1e 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py @@ -50,10 +50,10 @@ def process_outputs(filepath): class TensorflowInference(unittest.TestCase): def test_tf_mnist_classification(self): test_pipeline = TestPipeline(is_integration_test=True) - input_file = 'gs://clouddfe-riteshghorse/tf/mnist/dataset/testing_inputs_it_mnist_data.csv' # pylint: disable=line-too-long - output_file_dir = 'gs://clouddfe-riteshghorse/tf/mnist/output/' + input_file = 'gs://apache-beam-ml/testing/inputs/it_mnist_data.csv' + output_file_dir = 'apache-beam-ml/testing/outputs' output_file = '/'.join([output_file_dir, str(uuid.uuid4()), 'result.txt']) - model_path = 'gs://clouddfe-riteshghorse/tf/mnist/model/' + model_path = 'apache-beam-ml/models/tensorflow/mnist/' extra_opts = { 'input': input_file, 'output': output_file, @@ -64,11 +64,8 @@ def test_tf_mnist_classification(self): save_main_session=False) self.assertEqual(FileSystems().exists(output_file), True) - expected_output_filepath = ( - 'gs://clouddfe-riteshghorse/tf/mnist/output/testing_expected_outputs_test_sklearn_mnist_classification_actuals.txt' # pylint: disable=line-too-long - ) + expected_output_filepath = 'gs://apache-beam-ml/testing/expected_outputs/test_sklearn_mnist_classification_actuals.txt' # pylint: disable=line-too-long expected_outputs = process_outputs(expected_output_filepath) - predicted_outputs = process_outputs(output_file) self.assertEqual(len(expected_outputs), len(predicted_outputs)) @@ -81,14 +78,14 @@ def test_tf_mnist_classification(self): true_label, expected_prediction = expected_outputs[i].split(',') self.assertEqual(predictions_dict[true_label], expected_prediction) - def test_tf_imagenet_image_classification(self): + def test_tf_imagenet_image_segmentation(self): test_pipeline = TestPipeline(is_integration_test=True) input_file = ( - 'gs://clouddfe-riteshghorse/tf/imagenet/input/input_labels.txt') + 'gs://apache-beam-ml/testing/inputs/it_imagenet_input_labels.txt') image_dir = ( 'https://storage.googleapis.com/download.tensorflow.org/example_images/' ) - output_file_dir = 'gs://clouddfe-riteshghorse/tf/imagenet/output' + output_file_dir = 'apache-beam-ml/testing/outputs' output_file = '/'.join([output_file_dir, str(uuid.uuid4()), 'result.txt']) model_path = ( 'https://tfhub.dev/google/tf2-preview/mobilenet_v2/classification/4') @@ -103,10 +100,8 @@ def test_tf_imagenet_image_classification(self): save_main_session=False) self.assertEqual(FileSystems().exists(output_file), True) - expected_output_filepath = ( - 'gs://clouddfe-riteshghorse/tf/imagenet/output/actuals.txt') + expected_output_filepath = 'gs://apache-beam-ml/testing/expected_outputs/test_tf_imagenet_image_segmentation.txt' # pylint: disable=line-too-long expected_outputs = process_outputs(expected_output_filepath) - predicted_outputs = process_outputs(output_file) self.assertEqual(len(expected_outputs), len(predicted_outputs)) From 627fdd931d891f78325743480316fc48899dc793 Mon Sep 17 00:00:00 2001 From: riteshghorse Date: Wed, 15 Feb 2023 10:33:51 -0500 Subject: [PATCH 45/45] fix doc --- .../ml/inference/tensorflow_inference.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py index a9c3ff1e1ac2..ee33c53cadb0 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py @@ -100,10 +100,9 @@ def __init__( Args: model_uri (str): path to the trained model. - model_type (ModelType): type of model to be loaded. - Defaults to SAVED_MODEL. - inference_fn (TensorInferenceFn, Optional): inference function to use - during RunInference. Defaults to default_numpy_inference_fn. + model_type: type of model to be loaded. Defaults to SAVED_MODEL. + inference_fn: inference function to use during RunInference. + Defaults to default_numpy_inference_fn. **Supported Versions:** RunInference APIs in Apache Beam have been tested with Tensorflow 2.9, 2.10, 2.11. @@ -182,10 +181,10 @@ def __init__( Args: model_uri (str): path to the trained model. - model_type (ModelType): type of model to be loaded. + model_type: type of model to be loaded. Defaults to SAVED_MODEL. - inference_fn (TensorInferenceFn, Optional): inference function to use - during RunInference. Defaults to default_numpy_inference_fn. + inference_fn: inference function to use during RunInference. + Defaults to default_numpy_inference_fn. **Supported Versions:** RunInference APIs in Apache Beam have been tested with Tensorflow 2.11.