intel · chensuyue · Jun 13, 2024 · Jun 5, 2024 · Jun 5, 2024 · Jun 6, 2024
diff --git a/neural_compressor/experimental/export/qlinear2qdq.py b/neural_compressor/experimental/export/qlinear2qdq.py
@@ -14,6 +14,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+# pragma: no cover
 """Helper functions to export onnx model from QLinearops to QDQ."""
 from deprecated import deprecated
 

diff --git a/neural_compressor/experimental/export/tf2onnx.py b/neural_compressor/experimental/export/tf2onnx.py
@@ -14,6 +14,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+# pragma: no cover
 """Helper functions to export model from TensorFlow to ONNX."""
 
 import re

diff --git a/neural_compressor/experimental/export/torch2onnx.py b/neural_compressor/experimental/export/torch2onnx.py
@@ -14,6 +14,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+# pragma: no cover
 """Helper functions to export model from PyTorch/TensorFlow to ONNX."""
 
 import os

diff --git a/neural_compressor/model/onnx_model.py b/neural_compressor/model/onnx_model.py
@@ -827,7 +827,7 @@ def find_ffn_matmul(self, attention_index, attention_matmul_list, block_len):
     def export(self, save_path, conf):
         """Export Qlinear to QDQ model."""
         from neural_compressor.config import ONNXQlinear2QDQConfig
-        from neural_compressor.experimental.export import onnx_qlinear_to_qdq
+        from neural_compressor.utils.export import onnx_qlinear_to_qdq
 
         if isinstance(conf, ONNXQlinear2QDQConfig):
             add_nodes, remove_nodes, inits = onnx_qlinear_to_qdq(self._model, self._input_name_to_nodes)

diff --git a/neural_compressor/model/tensorflow_model.py b/neural_compressor/model/tensorflow_model.py
@@ -1009,7 +1009,7 @@ def export(self, save_path, conf):
                     + "we reset opset_version={} here".format(conf.opset_version)
                 )
 
-            from neural_compressor.experimental.export import tf_to_fp32_onnx, tf_to_int8_onnx
+            from neural_compressor.utils.export import tf_to_fp32_onnx, tf_to_int8_onnx
 
             inputs_as_nchw = conf.kwargs.get("inputs_as_nchw", None)
             if conf.dtype == "int8":

diff --git a/neural_compressor/model/torch_model.py b/neural_compressor/model/torch_model.py
@@ -418,7 +418,7 @@ def export(
                 "but the torch version found is {}".format(Version("1.12.0"), version)
             )
 
-        from neural_compressor.experimental.export import torch_to_fp32_onnx, torch_to_int8_onnx
+        from neural_compressor.utils.export import torch_to_fp32_onnx, torch_to_int8_onnx
 
         if conf.dtype == "int8":
             torch_to_int8_onnx(

diff --git a/neural_compressor/onnxrt/utils/onnx_model.py b/neural_compressor/onnxrt/utils/onnx_model.py
@@ -648,7 +648,7 @@ def find_ffn_matmul(self, attention_index, attention_matmul_list, block_len):
     def export(self, save_path, conf):
         """Export Qlinear to QDQ model."""
         from neural_compressor.config import ONNXQlinear2QDQConfig
-        from neural_compressor.experimental.export import onnx_qlinear_to_qdq
+        from neural_compressor.utils.export import onnx_qlinear_to_qdq
 
         if isinstance(conf, ONNXQlinear2QDQConfig):
             if len(self._input_name_to_nodes) == 0:

diff --git a/neural_compressor/torch/export/__init__.py b/neural_compressor/torch/export/__init__.py
@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from neural_compressor.torch.export._export import export_model_for_pt2e_quant, export
+from neural_compressor.torch.export.pt2e_export import export_model_for_pt2e_quant, export
diff --git a/neural_compressor/torch/export/_export.py → ...al_compressor/torch/export/pt2e_export.py b/neural_compressor/torch/export/_export.py → ...al_compressor/torch/export/pt2e_export.py
diff --git a/neural_compressor/torch/utils/utility.py b/neural_compressor/torch/utils/utility.py
@@ -22,8 +22,7 @@
 from torch.ao.quantization.quantizer.x86_inductor_quantizer import QuantizationConfig, X86InductorQuantizer
 from typing_extensions import TypeAlias
 
-from neural_compressor.common import logger
-from neural_compressor.common.utils import Mode
+from neural_compressor.common.utils import LazyImport, Mode, logger
 
 OP_NAME_AND_TYPE_TUPLE_TYPE: TypeAlias = Tuple[str, Union[torch.nn.Module, Callable]]
 

diff --git a/neural_compressor/utils/export/__init__.py b/neural_compressor/utils/export/__init__.py
@@ -0,0 +1,21 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Intel Neural Compressor Export."""
+
+from .torch2onnx import torch_to_fp32_onnx, torch_to_int8_onnx
+from .qlinear2qdq import onnx_qlinear_to_qdq
+from .tf2onnx import tf_to_fp32_onnx, tf_to_int8_onnx
diff --git a/neural_compressor/utils/export/qlinear2qdq.py b/neural_compressor/utils/export/qlinear2qdq.py
@@ -0,0 +1,82 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Helper functions to export onnx model from QLinear ops to QDQ."""
+from neural_compressor.adaptor.ox_utils.util import find_by_name
+from neural_compressor.utils import logger
+from neural_compressor.utils.utility import LazyImport
+
+numpy_helper = LazyImport("onnx.numpy_helper")
+
+
+def check_model(model):
+    """Check optype for input model.
+
+    Args:
+        model (ModelProto): onnx model.
+    """
+    has_integerop = False
+    has_qlinearop = False
+    for node in model.graph.node:
+        if node.op_type.endswith("Integer"):
+            has_integerop = True
+        elif node.op_type.startswith("QLinear"):
+            has_qlinearop = True
+        elif node.op_type in ["QAttention", "QGemm", "QEmbedLayerNormalization"]:
+            has_qlinearop = True
+        elif node.op_type in ["Gather"]:
+            input_data = find_by_name(node.input[0], model.graph.initializer)
+            if input_data is not None and numpy_helper.to_array(input_data).dtype in ["int8", "uint8"]:
+                has_qlinearop = True
+    if has_integerop:
+        logger.info("This model has Integer ops, these ops will be skipped.")
+    if has_qlinearop:
+        return True
+    else:
+        logger.info("This model has no QLinear ops, save the original model.")
+        return False
+
+
+def onnx_qlinear_to_qdq(
+    model,
+    input_name_to_nodes,
+):
+    """Export ONNX QLinearops model into QDQ model.
+
+    Args:
+        model (ModelProto): int8 onnx model.
+        input_name_to_nodes (dict): the mapping of tensor name and its destination nodes.
+    """
+    from neural_compressor.adaptor.ox_utils.operators import QOPERATORS
+
+    add_nodes = []
+    remove_nodes = []
+    inits = []
+    if check_model(model):
+        for node in model.graph.node:
+            if node.op_type in QOPERATORS:
+                if node.output[0] not in input_name_to_nodes:
+                    continue
+                children = []
+                for out in node.output:
+                    children.extend(input_name_to_nodes[node.output[0]])
+                converter = QOPERATORS[node.op_type](node, children, model.graph.initializer)
+                done, add_node, init = converter.convert()
+                if done:
+                    add_nodes.extend(add_node)
+                    inits.extend(init)
+                    remove_nodes.append(node)
+    return add_nodes, remove_nodes, inits
diff --git a/neural_compressor/utils/export/tf2onnx.py b/neural_compressor/utils/export/tf2onnx.py
@@ -0,0 +1,118 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2022 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Helper functions to export model from TensorFlow to ONNX."""
+
+import re
+
+from neural_compressor.utils import logger
+from neural_compressor.utils.utility import LazyImport
+
+t2o = LazyImport("tf2onnx")
+
+
+def _split_nodename_and_shape(name):
+    """Split input name with shape into name and shape."""
+    # pattern for a node name
+    inputs = []
+    shapes = {}
+    # input takes in most cases the format name:0, where 0 is the output number
+    # in some cases placeholders don't have a rank which onnx can't handle so we let uses override the shape
+    # by appending the same, ie : [1,28,28,3]
+    name_pattern = r"(?:([\w\d/\-\._:]+)(\[[\-\d,]+\])?),?"
+    splits = re.split(name_pattern, name)
+    for i in range(1, len(splits), 3):
+        inputs.append(splits[i] + ":0")
+        if splits[i + 1] is not None:
+            shape = [int(n) for n in splits[i + 1][1:-1].split(",")]
+            shape = [n if n >= 0 else None for n in shape]
+            shapes[splits[i] + ":0"] = shape
+    if not shapes:
+        shapes = None
+    return inputs, shapes
+
+
+def tf_to_fp32_onnx(graph_def, save_path, opset_version=14, input_names=None, output_names=None, inputs_as_nchw=None):
+    """Export FP32 Tensorflow model into FP32 ONNX model using tf2onnx tool.
+
+    Args:
+        graph_def (graph_def to convert): fp32 graph_def.
+        save_path (str): save path of ONNX model.
+        opset_version (int, optional): opset version. Defaults to 14.
+        input_names (list, optional): input names. Defaults to None.
+        output_names (list, optional): output names. Defaults to None.
+        inputs_as_nchw (list, optional): transpose the input. Defaults to None.
+    """
+    shape_override = None
+    if isinstance(input_names, str):
+        input_names, shape_override = _split_nodename_and_shape(input_names)
+    else:
+        input_names[:] = [o + ":0" for o in input_names]
+    output_names[:] = [o + ":0" for o in output_names]
+    t2o.convert.from_graph_def(
+        graph_def=graph_def,
+        input_names=input_names,
+        output_names=output_names,
+        inputs_as_nchw=inputs_as_nchw,
+        shape_override=shape_override,
+        opset=opset_version,
+        output_path=save_path,
+    )
+    info = "The FP32 ONNX Model exported to path: {0}".format(save_path)
+    logger.info("*" * len(info))
+    logger.info(info)
+    logger.info("*" * len(info))
+
+
+def tf_to_int8_onnx(
+    int8_model, save_path, opset_version: int = 14, input_names=None, output_names=None, inputs_as_nchw=None
+):
+    """Export INT8 Tensorflow model into INT8 ONNX model.
+
+    Args:
+        int8_model (tensorflow ITEX QDQ model): int8 model.
+        save_path (str): save path of ONNX model.
+        opset_version (int, optional): opset version. Defaults to 14.
+        input_names (list, optional): input names. Defaults to None.
+        output_names (list, optional): output names. Defaults to None.
+        inputs_as_nchw (list, optional): transpose the input. Defaults to None.
+    """
+    shape_override = None
+    if isinstance(input_names, str):
+        input_names, shape_override = _split_nodename_and_shape(input_names)
+    else:
+        input_names[:] = [o + ":0" for o in input_names]
+    output_names[:] = [o + ":0" for o in output_names]
+    onnx_convert_graph = "./converted_graph.onnx"
+    from neural_compressor.adaptor.tf_utils.tf2onnx_converter import TensorflowQDQToOnnxQDQConverter
+
+    TensorflowQDQToOnnxQDQConverter(
+        int8_model, input_names, output_names, shape_override, inputs_as_nchw, opset_version
+    ).convert(onnx_convert_graph)
+
+    import onnxruntime as ort
+
+    sess_options = ort.SessionOptions()
+    sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
+    sess_options.optimized_model_filepath = save_path
+    import onnx
+
+    model = onnx.load(onnx_convert_graph)
+    ort.InferenceSession(model.SerializeToString(), sess_options)
+    info = "The INT8 ONNX Model is exported to path: {0}".format(save_path)
+    logger.info("*" * len(info))
+    logger.info(info)
+    logger.info("*" * len(info))