Support the model conversion from Tensorflow model to ONNX model (#399)

Signed-off-by: Lv, Liang1 <[email protected]> Co-authored-by: zehao-intel <[email protected]> Co-authored-by: chensuyue <[email protected]>
intel · Feb 24, 2023 · 33a2352 · 33a2352
1 parent 836aa55
commit 33a2352
Show file tree

Hide file tree

Showing 46 changed files with 5,408 additions and 2 deletions.
diff --git a/.azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt b/.azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt
@@ -1101,6 +1101,7 @@ mobilenet
 MobileNet
 mobilenetv
 Mobilenetv
+MobilenetV
 MobileNetv
 MobileNetV
 modalities

diff --git a/.azure-pipelines/scripts/ut/run_basic_itex.sh b/.azure-pipelines/scripts/ut/run_basic_itex.sh
@@ -5,6 +5,8 @@ echo "run basic itex"
 echo "specify fwk version..."
 export itex_version='1.1.0'
 export tensorflow_version='2.11.0-official'
+export onnx_version='1.13.0'
+export onnxruntime_version='1.13.1'
 
 echo "set up UT env..."
 bash /neural-compressor/.azure-pipelines/scripts/ut/env_setup.sh

diff --git a/examples/.config/model_params_tf2onnx.json b/examples/.config/model_params_tf2onnx.json
@@ -0,0 +1,52 @@
+{
+  "tf2onnx": {
+    "resnet50v1.0": {
+      "model_src_dir": "image_recognition/tensorflow_models/resnet50_v1/export",
+      "source_model_dataset": "/tf_dataset/dataset/imagenet",
+      "target_model_dataset": "/tf_dataset/dataset/imagenet",
+      "input_model": "/tf_dataset/pre-trained-models/resnet50/fp32/freezed_resnet50.pb",
+      "main_script": "main.py",
+      "batch_size": 32
+    },
+    "resnet50v1.5": {
+      "model_src_dir": "image_recognition/tensorflow_models/resnet50_v1_5/export",
+      "source_model_dataset": "/tf_dataset/dataset/imagenet",
+      "target_model_dataset": "/tf_dataset/dataset/imagenet",
+      "input_model": "/tf_dataset/pre-trained-models/resnet50v1_5/fp32/resnet50_v1.pb",
+      "main_script": "main.py",
+      "batch_size": 32
+    },
+    "mobilenetv2": {
+      "model_src_dir": "image_recognition/tensorflow_models/mobilenet_v2/export",
+      "source_model_dataset": "/tf_dataset/dataset/imagenet",
+      "target_model_dataset": "/tf_dataset/dataset/imagenet",
+      "input_model": "/tf_dataset/pre-train-model-slim/pbfile/frozen_pb/frozen_mobilenet_v2.pb",
+      "main_script": "main.py",
+      "batch_size": 32
+    },
+    "vgg16": {
+      "model_src_dir": "image_recognition/tensorflow_models/vgg16/export",
+      "source_model_dataset": "/tf_dataset/dataset/imagenet",
+      "target_model_dataset": "/tf_dataset/dataset/imagenet",
+      "input_model": "/tf_dataset/pre-train-model-slim/pbfile/frozen_pb/frozen_vgg16.pb",
+      "main_script": "main.py",
+      "batch_size": 32
+    },
+    "faster_rcnn_resnet50": {
+      "model_src_dir": "object_detection/tensorflow_models/faster_rcnn_resnet50/export",
+      "source_model_dataset": "/tf_dataset/tensorflow/coco_val.record",
+      "target_model_dataset": "/tf_dataset/tensorflow/coco_val.record",
+      "input_model": "/tf_dataset/pre-train-model-oob/object_detection/faster_rcnn_resnet50/frozen_inference_graph.pb",
+      "main_script": "main.py",
+      "batch_size": 10
+    },
+    "ssd_mobilenet_v1": {
+      "model_src_dir": "object_detection/tensorflow_models/ssd_mobilenet_v1/export",
+      "source_model_dataset": "/tf_dataset/tensorflow/coco_val.record",
+      "target_model_dataset": "/tf_dataset/tensorflow/coco_val.record",
+      "input_model": "/tf_dataset/pre-train-model-oob/object_detection/ssd_mobilenet_v1/frozen_inference_graph.pb",
+      "main_script": "main.py",
+      "batch_size": 10
+    }
+  }
+}
diff --git a/...es/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/export/README.md b/...es/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/export/README.md
@@ -0,0 +1,119 @@
+Step-by-Step
+============
+
+This document is used to show how to export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model using Intel® Neural Compressor.
+
+
+# Prerequisite
+
+## 1. Environment
+
+### Installation
+Recommend python 3.8 or higher version.
+```shell
+# Install Intel® Neural Compressor
+pip install neural-compressor
+```
+
+### Install requirements
+The Tensorflow and intel-extension-for-tensorflow is mandatory to be installed to run this export ONNX INT8 model example.
+The Intel Extension for Tensorflow for Intel CPUs is installed as default.
+```shell
+pip install -r requirements.txt
+```
+
+### Install Intel Extension for Tensorflow
+Intel Extension for Tensorflow is mandatory to be installed for exporting Tensorflow model to ONNX.
+```shell
+pip install --upgrade intel-extension-for-tensorflow[cpu]
+```
+
+## 2. Prepare Pretrained model
+
+The mobilenet_v2 checkpoint file comes from [models](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models).
+We can get the pb file by convert the checkpoint file.
+
+  1. Download the checkpoint file from [here](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models)
+  ```shell
+  wget https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.4_224.tgz
+  tar -xvf mobilenet_v2_1.4_224.tgz
+  ```
+
+  2. Exporting the Inference Graph
+  ```shell
+  git clone https://github.com/tensorflow/models
+  cd models/research/slim
+  python export_inference_graph.py \
+          --alsologtostderr \
+          --model_name=mobilenet_v2 \
+          --output_file=/tmp/mobilenet_v2_inf_graph.pb
+  ```
+  Make sure to use intel-tensorflow v1.15, and pip install tf_slim.
+  #### Install Intel Tensorflow 1.15 up2
+  Check your python version and use pip install 1.15.0 up2 from links below:
+  https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp36-cp36m-manylinux2010_x86_64.whl                
+  https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp37-cp37m-manylinux2010_x86_64.whl
+  https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp35-cp35m-manylinux2010_x86_64.whl
+  > Please note: The ImageNet dataset has 1001, the **VGG** and **ResNet V1** final layers have only 1000 outputs rather than 1001. So we need add the `--labels_offset=1` flag in the inference graph exporting command.
+  3. Use [Netron](https://lutzroeder.github.io/netron/) to get the input/output layer name of inference graph pb, for vgg_16 the output layer name is `MobilenetV2/Predictions/Reshape_1`
+
+  4. Freezing the exported Graph, please use the tool `freeze_graph.py` in [tensorflow v1.15.2](https://github.com/tensorflow/tensorflow/blob/v1.15.2/tensorflow/python/tools/freeze_graph.py) repo 
+  ```shell
+  python freeze_graph.py \
+          --input_graph=/tmp/mobilenet_v2_inf_graph.pb \
+          --input_checkpoint=./mobilenet_v2.ckpt \
+          --input_binary=true \
+          --output_graph=./frozen_mobilenet_v2.pb \
+          --output_node_names=MobilenetV2/Predictions/Reshape_1
+  ```
+
+## 3. Prepare Dataset
+
+  TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format.
+  We also prepared related scripts in `imagenet_prepare` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format.
+
+  ```shell
+  cd examples/tensorflow/image_recognition/tensorflow_models/
+  # convert validation subset
+  bash prepare_imagenet_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/val/ --subset=validation
+  # convert train subset
+  bash prepare_imagenet_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/train/ --subset=train
+  cd mobilenet_v2/export
+  ```
+
+# Run Command
+Please note the dataset is TF records format for running quantization and benchmark.
+
+
+### Export Tensorflow FP32 model to ONNX FP32 model
+```shell
+bash run_export.sh --input_model=./frozen_mobilenet_v2.pb --output_model=./mobilenet_v2.onnx --dtype=fp32 --quant_format=qdq
+```
+
+## Run benchmark for Tensorflow FP32 model
+```shell
+bash run_benchmark.sh --input_model=./frozen_mobilenet_v2.pb --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32
+bash run_benchmark.sh --input_model=./frozen_mobilenet_v2.pb --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1
+```
+
+### Run benchmark for ONNX FP32 model
+```shell
+bash run_benchmark.sh --input_model=./mobilenet_v2.onnx --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32
+bash run_benchmark.sh --input_model=./mobilenet_v2.onnx --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1
+```
+
+### Export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model
+```shell
+bash run_export.sh --input_model=./frozen_mobilenet_v2.pb --output_model=./mobilenet_v2_int8.onnx --dtype=int8 --quant_format=qdq --dataset_location=/path/to/imagenet/
+```
+
+## Run benchmark for Tensorflow INT8 model
+```shell
+bash run_benchmark.sh --input_model=./tf-quant.pb --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32
+bash run_benchmark.sh --input_model=./tf-quant.pb --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1
+```
+
+### Run benchmark for ONNX INT8 QDQ model
+```shell
+bash run_benchmark.sh --input_model=./mobilenet_v2_int8.onnx --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32
+bash run_benchmark.sh --input_model=./mobilenet_v2_int8.onnx --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/export/main.py b/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/export/main.py
@@ -0,0 +1,146 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2022 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from argparse import ArgumentParser
+import tensorflow as tf
+import onnx
+import os
+import onnxruntime as ort
+import numpy as np
+
+tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
+
+
+def eval_func_onnx(model, dataloader, metric, postprocess=None):
+    metric.reset()
+    sess = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers())
+    input_names = [i.name for i in sess.get_inputs()]
+    for input_data, label in dataloader:
+        output = sess.run(None, dict(zip(input_names, [input_data])))
+        if postprocess:
+            output, label = postprocess((output, label))
+        metric.update(output, label)
+    return metric.result()
+
+def eval_func_tf(model, dataloader, metric, postprocess=None):
+    from neural_compressor.model import Model
+    model = Model(model)
+    input_tensor = model.input_tensor
+    output_tensor = model.output_tensor if len(model.output_tensor)>1 else \
+                        model.output_tensor[0]
+
+    for _, (inputs, labels) in enumerate(dataloader):
+        # dataloader should keep the order and len of inputs same with input_tensor
+        inputs = np.array([inputs])
+        feed_dict = dict(zip(input_tensor, inputs))
+        predictions = model.sess.run(output_tensor, feed_dict)
+        metric.update(predictions, labels)
+    acc = metric.result()
+    return acc
+
+class eval_classifier_optimized_graph:
+    """Evaluate image classifier with optimized TensorFlow graph."""
+
+    def __init__(self):
+        """Initilization."""
+        arg_parser = ArgumentParser(description='Parse args')
+        arg_parser.add_argument('-g', "--input-graph",
+                                help='Specify the input graph for the transform tool',
+                                dest='input_graph')
+        arg_parser.add_argument("--output-graph",
+                                help='Specify tune result model save dir',
+                                dest='output_graph')
+        arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark')
+        arg_parser.add_argument('--mode', dest='mode', default='performance', help='benchmark mode')
+        arg_parser.add_argument('--export', dest='export', action='store_true', help='use neural_compressor to export.')
+        arg_parser.add_argument('--tune', dest='tune', action='store_true', help='use neural_compressor to tune.')
+        arg_parser.add_argument('--dataset_location', dest='dataset_location',
+                                 help='location of calibration dataset and evaluate dataset')
+        arg_parser.add_argument('--batch_size', type=int, default=32, dest='batch_size', help='batch_size of benchmark')
+        arg_parser.add_argument('--dtype', dest='dtype', default='fp32', help='the data type of export')
+        arg_parser.add_argument('--quant_format', dest='quant_format', default='qdq', help='the quant format of export')
+        self.args = arg_parser.parse_args()
+
+    def run(self):
+        """This is neural_compressor function include tuning, export and benchmark option."""
+        if self.args.quant_format != 'qdq':
+            raise ValueError("Only support tensorflow export to ONNX for QDQ format, "
+                "please make sure input the correct quant_format.")
+
+        if self.args.export:
+            if self.args.dtype == 'int8':
+                from neural_compressor import quantization
+                from neural_compressor.config import PostTrainingQuantConfig
+                from neural_compressor.utils.create_obj_from_config import create_dataloader
+                dataloader_args = {
+                    'batch_size': 10,
+                    'dataset': {"ImageRecord": {'root': self.args.dataset_location}},
+                    'transform': {'BilinearImagenet':
+                        {'height': 224, 'width': 224}},
+                    'filter': None
+                }
+                dataloader = create_dataloader('tensorflow', dataloader_args)
+                conf = PostTrainingQuantConfig(backend='itex', calibration_sampling_size=[50, 100])
+                q_model = quantization.fit(self.args.input_graph, conf=conf, calib_dataloader=dataloader,
+                            eval_dataloader=dataloader)
+                q_model.save("./tf-quant.pb")
+                from neural_compressor.config import TF2ONNXConfig
+                config = TF2ONNXConfig(dtype=self.args.dtype, input_names='input[-1,224,224,3]')
+                q_model.export(self.args.output_graph, config)
+            else:
+                from neural_compressor.model import Model
+                from neural_compressor.config import TF2ONNXConfig
+                inc_model = Model(self.args.input_graph)
+                config = TF2ONNXConfig(dtype="fp32", input_names='input[-1,224,224,3]')
+                inc_model.export(self.args.output_graph, config)
+
+        if self.args.benchmark:
+            if self.args.input_graph.endswith('.onnx'):
+                model = onnx.load(self.args.input_graph)
+            else:
+                model = self.args.input_graph
+
+            from neural_compressor.utils.create_obj_from_config import create_dataloader
+            dataloader_args = {
+                'batch_size': self.args.batch_size,
+                'dataset': {"ImageRecord": {'root': self.args.dataset_location}},
+                'transform': {'BilinearImagenet': {'height': 224, 'width': 224}},
+                'filter': None
+            }
+            dataloader = create_dataloader('tensorflow', dataloader_args)
+            from neural_compressor.metric import TensorflowTopK
+            top1 = TensorflowTopK(k=1)
+            def eval(model):
+                if isinstance(model, str):
+                    return eval_func_tf(model, dataloader, top1)
+                else:
+                    return eval_func_onnx(model, dataloader, top1)
+
+            if self.args.mode == 'performance':
+                from neural_compressor.benchmark import fit
+                from neural_compressor.config import BenchmarkConfig
+                conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7)
+                fit(self.args.input_graph, conf, b_dataloader=dataloader)
+            elif self.args.mode == 'accuracy':
+                acc_result = eval(model)
+                print("Batch size = %d" % dataloader.batch_size)
+                print("Accuracy: %.5f" % acc_result)
+
+if __name__ == "__main__":
+    evaluate_opt_graph = eval_classifier_optimized_graph()
+    evaluate_opt_graph.run()
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/export/requirements.txt b/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/export/requirements.txt
@@ -0,0 +1,6 @@
+tensorflow
+intel-extension-for-tensorflow[cpu]
+tf2onnx
+onnx
+onnxruntime
+onnxruntime-extensions; python_version < '3.10'
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/export/run_benchmark.sh b/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/export/run_benchmark.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+set -x
+
+function main {
+
+  init_params "$@"
+  run_benchmark
+
+}
+
+# init params
+function init_params {
+  for var in "$@"
+  do
+    case $var in
+      --input_model=*)
+          input_model=$(echo $var |cut -f2 -d=)
+      ;;
+      --mode=*)
+          mode=$(echo $var |cut -f2 -d=)
+      ;;
+      --dataset_location=*)
+          dataset_location=$(echo $var |cut -f2 -d=)
+      ;;
+      --batch_size=*)
+          batch_size=$(echo $var |cut -f2 -d=)
+    esac
+  done
+
+}
+
+# run_tuning
+function run_benchmark {
+    python main.py \
+            --input-graph ${input_model} \
+            --mode ${mode} \
+            --dataset_location ${dataset_location} \
+            --batch_size ${batch_size} \
+            --benchmark
+}
+
+main "$@"