diff --git a/neural_compressor/experimental/export/qlinear2qdq.py b/neural_compressor/experimental/export/qlinear2qdq.py
index cafc50ab3f4..d9edce935cd 100644
--- a/neural_compressor/experimental/export/qlinear2qdq.py
+++ b/neural_compressor/experimental/export/qlinear2qdq.py
@@ -14,6 +14,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+# pragma: no cover
 """Helper functions to export onnx model from QLinearops to QDQ."""
 from deprecated import deprecated
 
diff --git a/neural_compressor/experimental/export/tf2onnx.py b/neural_compressor/experimental/export/tf2onnx.py
index 35dc3661ef9..111d203f083 100644
--- a/neural_compressor/experimental/export/tf2onnx.py
+++ b/neural_compressor/experimental/export/tf2onnx.py
@@ -14,6 +14,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+# pragma: no cover
 """Helper functions to export model from TensorFlow to ONNX."""
 
 import re
diff --git a/neural_compressor/experimental/export/torch2onnx.py b/neural_compressor/experimental/export/torch2onnx.py
index e23758130be..143b5dab5a8 100644
--- a/neural_compressor/experimental/export/torch2onnx.py
+++ b/neural_compressor/experimental/export/torch2onnx.py
@@ -14,6 +14,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+# pragma: no cover
 """Helper functions to export model from PyTorch/TensorFlow to ONNX."""
 
 import os
diff --git a/neural_compressor/model/onnx_model.py b/neural_compressor/model/onnx_model.py
index c3cbf5f9b27..69dad4b743d 100644
--- a/neural_compressor/model/onnx_model.py
+++ b/neural_compressor/model/onnx_model.py
@@ -827,7 +827,7 @@ def find_ffn_matmul(self, attention_index, attention_matmul_list, block_len):
     def export(self, save_path, conf):
         """Export Qlinear to QDQ model."""
         from neural_compressor.config import ONNXQlinear2QDQConfig
-        from neural_compressor.experimental.export import onnx_qlinear_to_qdq
+        from neural_compressor.utils.export import onnx_qlinear_to_qdq
 
         if isinstance(conf, ONNXQlinear2QDQConfig):
             add_nodes, remove_nodes, inits = onnx_qlinear_to_qdq(self._model, self._input_name_to_nodes)
diff --git a/neural_compressor/model/tensorflow_model.py b/neural_compressor/model/tensorflow_model.py
index 0caadc53dbd..740d72d1339 100644
--- a/neural_compressor/model/tensorflow_model.py
+++ b/neural_compressor/model/tensorflow_model.py
@@ -1009,7 +1009,7 @@ def export(self, save_path, conf):
                     + "we reset opset_version={} here".format(conf.opset_version)
                 )
 
-            from neural_compressor.experimental.export import tf_to_fp32_onnx, tf_to_int8_onnx
+            from neural_compressor.utils.export import tf_to_fp32_onnx, tf_to_int8_onnx
 
             inputs_as_nchw = conf.kwargs.get("inputs_as_nchw", None)
             if conf.dtype == "int8":
diff --git a/neural_compressor/model/torch_model.py b/neural_compressor/model/torch_model.py
index 31502dbae6b..7338f196d46 100644
--- a/neural_compressor/model/torch_model.py
+++ b/neural_compressor/model/torch_model.py
@@ -418,7 +418,7 @@ def export(
                 "but the torch version found is {}".format(Version("1.12.0"), version)
             )
 
-        from neural_compressor.experimental.export import torch_to_fp32_onnx, torch_to_int8_onnx
+        from neural_compressor.utils.export import torch_to_fp32_onnx, torch_to_int8_onnx
 
         if conf.dtype == "int8":
             torch_to_int8_onnx(
diff --git a/neural_compressor/onnxrt/utils/onnx_model.py b/neural_compressor/onnxrt/utils/onnx_model.py
index 65e1115a692..801416f7f64 100644
--- a/neural_compressor/onnxrt/utils/onnx_model.py
+++ b/neural_compressor/onnxrt/utils/onnx_model.py
@@ -648,7 +648,7 @@ def find_ffn_matmul(self, attention_index, attention_matmul_list, block_len):
     def export(self, save_path, conf):
         """Export Qlinear to QDQ model."""
         from neural_compressor.config import ONNXQlinear2QDQConfig
-        from neural_compressor.experimental.export import onnx_qlinear_to_qdq
+        from neural_compressor.utils.export import onnx_qlinear_to_qdq
 
         if isinstance(conf, ONNXQlinear2QDQConfig):
             if len(self._input_name_to_nodes) == 0:
diff --git a/neural_compressor/torch/export/__init__.py b/neural_compressor/torch/export/__init__.py
index 6d7af54f5c5..e3e4775e986 100644
--- a/neural_compressor/torch/export/__init__.py
+++ b/neural_compressor/torch/export/__init__.py
@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from neural_compressor.torch.export._export import export_model_for_pt2e_quant, export
+from neural_compressor.torch.export.pt2e_export import export_model_for_pt2e_quant, export
diff --git a/neural_compressor/torch/export/_export.py b/neural_compressor/torch/export/pt2e_export.py
similarity index 100%
rename from neural_compressor/torch/export/_export.py
rename to neural_compressor/torch/export/pt2e_export.py
diff --git a/neural_compressor/torch/utils/utility.py b/neural_compressor/torch/utils/utility.py
index f88c768cfed..6e0b4cb18b9 100644
--- a/neural_compressor/torch/utils/utility.py
+++ b/neural_compressor/torch/utils/utility.py
@@ -22,8 +22,7 @@
 from torch.ao.quantization.quantizer.x86_inductor_quantizer import QuantizationConfig, X86InductorQuantizer
 from typing_extensions import TypeAlias
 
-from neural_compressor.common import logger
-from neural_compressor.common.utils import Mode
+from neural_compressor.common.utils import LazyImport, Mode, logger
 
 OP_NAME_AND_TYPE_TUPLE_TYPE: TypeAlias = Tuple[str, Union[torch.nn.Module, Callable]]
 
diff --git a/neural_compressor/utils/export/__init__.py b/neural_compressor/utils/export/__init__.py
new file mode 100644
index 00000000000..56c3604ffe8
--- /dev/null
+++ b/neural_compressor/utils/export/__init__.py
@@ -0,0 +1,21 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Intel Neural Compressor Export."""
+
+from .torch2onnx import torch_to_fp32_onnx, torch_to_int8_onnx
+from .qlinear2qdq import onnx_qlinear_to_qdq
+from .tf2onnx import tf_to_fp32_onnx, tf_to_int8_onnx
diff --git a/neural_compressor/utils/export/qlinear2qdq.py b/neural_compressor/utils/export/qlinear2qdq.py
new file mode 100644
index 00000000000..a6f189596ce
--- /dev/null
+++ b/neural_compressor/utils/export/qlinear2qdq.py
@@ -0,0 +1,82 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Helper functions to export onnx model from QLinear ops to QDQ."""
+from neural_compressor.adaptor.ox_utils.util import find_by_name
+from neural_compressor.utils import logger
+from neural_compressor.utils.utility import LazyImport
+
+numpy_helper = LazyImport("onnx.numpy_helper")
+
+
+def check_model(model):
+    """Check optype for input model.
+
+    Args:
+        model (ModelProto): onnx model.
+    """
+    has_integerop = False
+    has_qlinearop = False
+    for node in model.graph.node:
+        if node.op_type.endswith("Integer"):
+            has_integerop = True
+        elif node.op_type.startswith("QLinear"):
+            has_qlinearop = True
+        elif node.op_type in ["QAttention", "QGemm", "QEmbedLayerNormalization"]:
+            has_qlinearop = True
+        elif node.op_type in ["Gather"]:
+            input_data = find_by_name(node.input[0], model.graph.initializer)
+            if input_data is not None and numpy_helper.to_array(input_data).dtype in ["int8", "uint8"]:
+                has_qlinearop = True
+    if has_integerop:
+        logger.info("This model has Integer ops, these ops will be skipped.")
+    if has_qlinearop:
+        return True
+    else:
+        logger.info("This model has no QLinear ops, save the original model.")
+        return False
+
+
+def onnx_qlinear_to_qdq(
+    model,
+    input_name_to_nodes,
+):
+    """Export ONNX QLinearops model into QDQ model.
+
+    Args:
+        model (ModelProto): int8 onnx model.
+        input_name_to_nodes (dict): the mapping of tensor name and its destination nodes.
+    """
+    from neural_compressor.adaptor.ox_utils.operators import QOPERATORS
+
+    add_nodes = []
+    remove_nodes = []
+    inits = []
+    if check_model(model):
+        for node in model.graph.node:
+            if node.op_type in QOPERATORS:
+                if node.output[0] not in input_name_to_nodes:
+                    continue
+                children = []
+                for out in node.output:
+                    children.extend(input_name_to_nodes[node.output[0]])
+                converter = QOPERATORS[node.op_type](node, children, model.graph.initializer)
+                done, add_node, init = converter.convert()
+                if done:
+                    add_nodes.extend(add_node)
+                    inits.extend(init)
+                    remove_nodes.append(node)
+    return add_nodes, remove_nodes, inits
diff --git a/neural_compressor/utils/export/tf2onnx.py b/neural_compressor/utils/export/tf2onnx.py
new file mode 100644
index 00000000000..8e0bf77b8e5
--- /dev/null
+++ b/neural_compressor/utils/export/tf2onnx.py
@@ -0,0 +1,118 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2022 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Helper functions to export model from TensorFlow to ONNX."""
+
+import re
+
+from neural_compressor.utils import logger
+from neural_compressor.utils.utility import LazyImport
+
+t2o = LazyImport("tf2onnx")
+
+
+def _split_nodename_and_shape(name):
+    """Split input name with shape into name and shape."""
+    # pattern for a node name
+    inputs = []
+    shapes = {}
+    # input takes in most cases the format name:0, where 0 is the output number
+    # in some cases placeholders don't have a rank which onnx can't handle so we let uses override the shape
+    # by appending the same, ie : [1,28,28,3]
+    name_pattern = r"(?:([\w\d/\-\._:]+)(\[[\-\d,]+\])?),?"
+    splits = re.split(name_pattern, name)
+    for i in range(1, len(splits), 3):
+        inputs.append(splits[i] + ":0")
+        if splits[i + 1] is not None:
+            shape = [int(n) for n in splits[i + 1][1:-1].split(",")]
+            shape = [n if n >= 0 else None for n in shape]
+            shapes[splits[i] + ":0"] = shape
+    if not shapes:
+        shapes = None
+    return inputs, shapes
+
+
+def tf_to_fp32_onnx(graph_def, save_path, opset_version=14, input_names=None, output_names=None, inputs_as_nchw=None):
+    """Export FP32 Tensorflow model into FP32 ONNX model using tf2onnx tool.
+
+    Args:
+        graph_def (graph_def to convert): fp32 graph_def.
+        save_path (str): save path of ONNX model.
+        opset_version (int, optional): opset version. Defaults to 14.
+        input_names (list, optional): input names. Defaults to None.
+        output_names (list, optional): output names. Defaults to None.
+        inputs_as_nchw (list, optional): transpose the input. Defaults to None.
+    """
+    shape_override = None
+    if isinstance(input_names, str):
+        input_names, shape_override = _split_nodename_and_shape(input_names)
+    else:
+        input_names[:] = [o + ":0" for o in input_names]
+    output_names[:] = [o + ":0" for o in output_names]
+    t2o.convert.from_graph_def(
+        graph_def=graph_def,
+        input_names=input_names,
+        output_names=output_names,
+        inputs_as_nchw=inputs_as_nchw,
+        shape_override=shape_override,
+        opset=opset_version,
+        output_path=save_path,
+    )
+    info = "The FP32 ONNX Model exported to path: {0}".format(save_path)
+    logger.info("*" * len(info))
+    logger.info(info)
+    logger.info("*" * len(info))
+
+
+def tf_to_int8_onnx(
+    int8_model, save_path, opset_version: int = 14, input_names=None, output_names=None, inputs_as_nchw=None
+):
+    """Export INT8 Tensorflow model into INT8 ONNX model.
+
+    Args:
+        int8_model (tensorflow ITEX QDQ model): int8 model.
+        save_path (str): save path of ONNX model.
+        opset_version (int, optional): opset version. Defaults to 14.
+        input_names (list, optional): input names. Defaults to None.
+        output_names (list, optional): output names. Defaults to None.
+        inputs_as_nchw (list, optional): transpose the input. Defaults to None.
+    """
+    shape_override = None
+    if isinstance(input_names, str):
+        input_names, shape_override = _split_nodename_and_shape(input_names)
+    else:
+        input_names[:] = [o + ":0" for o in input_names]
+    output_names[:] = [o + ":0" for o in output_names]
+    onnx_convert_graph = "./converted_graph.onnx"
+    from neural_compressor.adaptor.tf_utils.tf2onnx_converter import TensorflowQDQToOnnxQDQConverter
+
+    TensorflowQDQToOnnxQDQConverter(
+        int8_model, input_names, output_names, shape_override, inputs_as_nchw, opset_version
+    ).convert(onnx_convert_graph)
+
+    import onnxruntime as ort
+
+    sess_options = ort.SessionOptions()
+    sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
+    sess_options.optimized_model_filepath = save_path
+    import onnx
+
+    model = onnx.load(onnx_convert_graph)
+    ort.InferenceSession(model.SerializeToString(), sess_options)
+    info = "The INT8 ONNX Model is exported to path: {0}".format(save_path)
+    logger.info("*" * len(info))
+    logger.info(info)
+    logger.info("*" * len(info))
diff --git a/neural_compressor/utils/export/torch2onnx.py b/neural_compressor/utils/export/torch2onnx.py
new file mode 100644
index 00000000000..fdd332faa20
--- /dev/null
+++ b/neural_compressor/utils/export/torch2onnx.py
@@ -0,0 +1,423 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Helper functions to export model from PyTorch/TensorFlow to ONNX."""
+
+import os
+from collections import UserDict
+
+from neural_compressor.adaptor.torch_utils.util import input2tuple
+from neural_compressor.utils import logger
+from neural_compressor.utils.utility import LazyImport
+
+torch = LazyImport("torch")
+onnx = LazyImport("onnx")
+ort = LazyImport("onnxruntime")
+ortq = LazyImport("onnxruntime.quantization")
+
+
+def _prepare_inputs(pt_model, input_names, example_inputs):
+    """Prepare input_names and example_inputs."""
+    if isinstance(example_inputs, dict) or isinstance(example_inputs, UserDict):
+        input_names = input_names or list(example_inputs.keys())
+        if isinstance(example_inputs, UserDict):
+            example_inputs = dict(example_inputs)
+    # match input_names with inspected input_order, especially for bert in hugginface.
+    elif input_names and len(input_names) > 1:
+        import inspect
+
+        input_order = inspect.signature(pt_model.forward).parameters.keys()
+        flag = [name in input_order for name in input_names]  # whether should be checked
+        if all(flag):
+            new_input_names = []
+            new_example_inputs = []
+            for name in input_order:
+                if name in input_names:
+                    new_input_names.append(name)
+                    id = input_names.index(name)
+                    new_example_inputs.append(example_inputs[id])
+            input_names = new_input_names
+            example_inputs = new_example_inputs
+        example_inputs = input2tuple(example_inputs)
+    return input_names, example_inputs
+
+
+def get_node_mapping(
+    fp32_model,
+    fp32_onnx_path,
+):
+    """Get PyTorch module and ONNX node mapping.
+
+    Args:
+        fp32_model (torch.nn.Module): quantization configuration from PyTorch.
+        fp32_onnx_path (str): path to fp32 onnx model.
+
+    Returns:
+        module_node_mapping: op mapping from PyTorch to ONNX.
+    """
+
+    def check_data(op_type, data, module_dict):
+        for name, value in module_dict.items():
+            if value.shape == data.shape:
+                if (value == data).all():
+                    module_dict.pop(name)
+                    return name
+        return None
+
+    module_dict = {}
+    for name, module in fp32_model.named_modules():
+        if (
+            "Conv" in str(module.__class__.__name__)
+            or "Embedding" in str(module.__class__.__name__)
+            or "Linear" in str(module.__class__.__name__)
+        ):
+            if hasattr(module, "weight"):
+                value = module.weight.detach().cpu().numpy()
+                module_dict[name] = value
+
+    module_node_mapping = {}
+    fp32_onnx_model = onnx.load(fp32_onnx_path)
+    initializer_data = {tensor.name: tensor for tensor in fp32_onnx_model.graph.initializer}
+    from onnx import numpy_helper
+
+    for node in fp32_onnx_model.graph.node:
+        if node.op_type in op_types_to_quantize:
+            if node.op_type == "MatMul" and node.input[1] in initializer_data:
+                data = numpy_helper.to_array(initializer_data[node.input[1]]).T
+            elif node.op_type == "Gather" and node.input[0] in initializer_data:
+                data = numpy_helper.to_array(initializer_data[node.input[0]])
+            elif node.op_type in ["Gemm"]:
+                data = numpy_helper.to_array(initializer_data[node.input[1]])
+            else:  # pragma: no cover
+                continue
+            pt_name = check_data(node.op_type, data, module_dict)
+            if pt_name:
+                module_node_mapping[pt_name] = node.name
+    return module_node_mapping
+
+
+def get_quantizable_onnx_ops(int8_model, module_node_mapping):
+    """Get quantizable onnx ops.
+
+    Args:
+        int8_model (torch.nn.Module): PyTorch int8 model.
+        module_node_mapping (dict): op mapping from PyTorch to ONNX.
+
+    Returns:
+        quantize_nodes: all onnx node that should be quantized.
+    """
+    quantize_nodes = []
+    for name, module in int8_model.named_modules():
+        if (
+            "Conv" in str(module.__class__.__name__)
+            or "Embedding" in str(module.__class__.__name__)
+            or "Linear" in str(module.__class__.__name__)
+        ):
+            if hasattr(module, "weight") and callable(module.weight):
+                if module.weight().dtype in [torch.qint8, torch.quint8]:
+                    if name.split(".module")[0] in module_node_mapping:
+                        node = module_node_mapping[name.split(".module")[0]]
+                        quantize_nodes.append(node)
+    return quantize_nodes
+
+
+def dynamic_quant_export(
+    pt_fp32_model,
+    pt_int8_model,
+    save_path,
+    example_inputs,
+    q_config,
+    opset_version,
+    dynamic_axes,
+    input_names,
+    output_names,
+    weight_type,
+):
+    """Export dynamic quantized model.
+
+    Args:
+        pt_fp32_model (torch.nn.module): PyTorch FP32 model.
+        pt_int8_model (torch.nn.module): PyTorch INT8 model.
+        save_path (str): save path of ONNX model.
+        example_inputs (dict|list|tuple|torch.Tensor): used to trace torch model.
+        q_config (dict): containing quantization configuration.
+        opset_version (int, optional): opset version. Defaults to 14.
+        dynamic_axes (dict, optional): dynamic axes. Defaults to
+            {"input": {0: "batch_size"}, "output": {0: "batch_size"}}.
+        input_names (dict, optional): input names. Defaults to None.
+        output_names (dict, optional): output names. Defaults to None.
+        weight_type (str, optional): data types of weight of ONNX model
+            (only needed for exporting dynamic quantized model). Defaults to 'S8'.
+    """
+    global op_types_to_quantize
+    op_types_to_quantize = ["MatMul", "Gemm", "Gather"]
+
+    # pylint: disable=E1101
+    fp32_onnx_path = save_path + ".tmp" if save_path else "int8-model.onnx.tmp"
+    torch_to_fp32_onnx(
+        pt_fp32_model,
+        fp32_onnx_path,
+        example_inputs,
+        opset_version=opset_version,
+        input_names=input_names,
+        output_names=output_names,
+        dynamic_axes=dynamic_axes,
+        verbose=False,
+    )
+
+    module_node_mapping = get_node_mapping(pt_fp32_model, fp32_onnx_path)
+    quantize_nodes = get_quantizable_onnx_ops(pt_int8_model, module_node_mapping)
+
+    REDUCE_RANGE = q_config["reduce_range"]
+    if REDUCE_RANGE:
+        logger.info("Reduce range is {}".format(str(REDUCE_RANGE)))
+
+    logger.info("Quantization format is not available when executing dynamic quantization.")
+
+    if weight_type.upper() == "S8":
+        weight_type = ortq.QuantType.QInt8
+    elif weight_type.upper() == "U8":
+        weight_type = ortq.QuantType.QUInt8
+    else:
+        assert False, "Right now, we don't support weight type: {}, " "please use S8/U8.".format(weight_type)
+
+    ortq.quantize_dynamic(
+        fp32_onnx_path,
+        save_path,
+        per_channel=True,
+        reduce_range=REDUCE_RANGE,
+        weight_type=weight_type,
+        nodes_to_quantize=quantize_nodes,
+        nodes_to_exclude=[],
+        extra_options={},
+    )
+
+    os.remove(fp32_onnx_path)
+
+
+def static_quant_export(
+    pt_int8_model,
+    save_path,
+    example_inputs,
+    q_config,
+    opset_version,
+    dynamic_axes,
+    input_names,
+    output_names,
+    quant_format,
+):
+    """Export static quantized model.
+
+    Args:
+        pt_int8_model (torch.nn.module): PyTorch INT8 model.
+        save_path (str): save path of ONNX model.
+        example_inputs (dict|list|tuple|torch.Tensor): used to trace torch model.
+        q_config (dict): containing quantization configuration.
+        opset_version (int, optional): opset version. Defaults to 14.
+        dynamic_axes (dict, optional): dynamic axes. Defaults to
+            {"input": {0: "batch_size"}, "output": {0: "batch_size"}}.
+        input_names (dict, optional): input names. Defaults to None.
+        output_names (dict, optional): output names. Defaults to None.
+        quant_format (str, optional): _quantization format of ONNX model. Defaults to 'QDQ'.
+    """
+    input_names, example_inputs = _prepare_inputs(pt_int8_model, input_names, example_inputs)
+
+    def model_wrapper(model_fn):
+        # export doesn't support a dictionary output, so manually turn it into a tuple
+        # refer to https://discuss.tvm.apache.org/t/how-to-deal-with-prim-dictconstruct/11978
+        def wrapper(*args, **kwargs):
+            output = model_fn(*args, **kwargs)
+            if isinstance(output, dict):
+                return tuple(v for v in output.values() if v is not None)
+            else:
+                return output
+
+        return wrapper
+
+    pt_int8_model.forward = model_wrapper(pt_int8_model.forward)
+
+    with torch.no_grad():
+        try:
+            torch.onnx.export(
+                pt_int8_model,
+                input2tuple(example_inputs),
+                save_path,
+                opset_version=opset_version,
+                input_names=input_names,
+                output_names=output_names,
+                dynamic_axes=dynamic_axes,
+            )
+        except TypeError:
+            config_name = (
+                "QuantizationAwareTrainingConfig"
+                if q_config["approach"] == "quant_aware_training"
+                else "PostTrainingQuantConfig"
+            )
+            logger.error(
+                "Export failed, possibly because unsupported quantized ops. Check "
+                "neural-compressor/docs/source/export.md#supported-quantized-ops "
+                "for supported ops."
+            )
+            logger.error(
+                "Please fallback unsupported quantized ops by setting 'op_type_dict' or "
+                "'op_name_dict' in '{}' config. ".format(config_name)
+            )
+            raise TypeError("Export failed with TypeError.")
+        except Exception as e:
+            raise e
+
+    if quant_format != "QDQ":
+        sess_options = ort.SessionOptions()
+        sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_EXTENDED
+        sess_options.optimized_model_filepath = save_path
+        ort.InferenceSession(save_path, sess_options)
+
+
+def torch_to_fp32_onnx(
+    pt_fp32_model,
+    save_path,
+    example_inputs,
+    opset_version=14,
+    dynamic_axes={"input": {0: "batch_size"}, "output": {0: "batch_size"}},
+    input_names=None,
+    output_names=None,
+    do_constant_folding=True,
+    verbose=True,
+):
+    """Export FP32 PyTorch model into FP32 ONNX model.
+
+    Args:
+        pt_fp32_model (torch.nn.module): PyTorch FP32 model.
+        save_path (str): save path of ONNX model.
+        example_inputs (dict|list|tuple|torch.Tensor): used to trace torch model.
+        opset_version (int, optional): opset version. Defaults to 14.
+        dynamic_axes (dict, optional): dynamic axes. Defaults to
+            {"input": {0: "batch_size"}, "output": {0: "batch_size"}}.
+        input_names (dict, optional): input names. Defaults to None.
+        output_names (dict, optional): output names. Defaults to None.
+        do_constant_folding (bool, optional): do constant folding or not. Defaults to True.
+        verbose (bool, optional): dump verbose or not. Defaults to True.
+    """
+    from neural_compressor.utils.pytorch import is_int8_model
+
+    assert is_int8_model(pt_fp32_model) is False, (
+        "The fp32 model is replaced during quantization. "
+        + "please customize a eval_func when quantizing, if not, such as `lambda x: 1`."
+    )
+
+    input_names, example_inputs = _prepare_inputs(pt_fp32_model, input_names, example_inputs)
+
+    with torch.no_grad():
+        torch.onnx.export(
+            pt_fp32_model,
+            example_inputs,
+            save_path,
+            opset_version=opset_version,
+            input_names=input_names,
+            output_names=output_names,
+            dynamic_axes=dynamic_axes,
+            do_constant_folding=do_constant_folding,
+        )
+
+    if verbose:
+        info = "The FP32 ONNX Model exported to path: {0}".format(save_path)
+        logger.info("*" * len(info))
+        logger.info(info)
+        logger.info("*" * len(info))
+
+
+def torch_to_int8_onnx(
+    pt_fp32_model,
+    pt_int8_model,
+    save_path,
+    example_inputs,
+    q_config,
+    opset_version=14,
+    dynamic_axes={"input": {0: "batch_size"}, "output": {0: "batch_size"}},
+    input_names=None,
+    output_names=None,
+    quant_format: str = "QDQ",
+    weight_type: str = "S8",
+    verbose=True,
+):
+    """Export INT8 PyTorch model into INT8 ONNX model.
+
+    Args:
+        pt_fp32_model (torch.nn.module): PyTorch FP32 model.
+        pt_int8_model (torch.nn.module): PyTorch INT8 model.
+        save_path (str): save path of ONNX model.
+        example_inputs (dict|list|tuple|torch.Tensor): used to trace torch model.
+        q_config (dict): containing quantization configuration.
+        opset_version (int, optional): opset version. Defaults to 14.
+        dynamic_axes (dict, optional): dynamic axes. Defaults to
+            {"input": {0: "batch_size"}, "output": {0: "batch_size"}}.
+        input_names (dict, optional): input names. Defaults to None.
+        output_names (dict, optional): output names. Defaults to None.
+        quant_format (str, optional): _quantization format of ONNX model. Defaults to 'QDQ'.
+        weight_type (str, optional): data types of weight of ONNX model
+            (only needed for exporting dynamic quantized model). Defaults to 'S8'.
+        verbose (bool, optional): dump verbose or not. Defaults to True.
+    """
+    from neural_compressor.utils.pytorch import is_int8_model
+
+    assert is_int8_model(pt_int8_model), (
+        "The exported model is not INT8 model, " "please reset 'dtype' to 'FP32' or check your model."
+    )
+
+    assert q_config is not None, "'q_config' is needed when export an INT8 model."
+
+    quant_format = quant_format.upper()
+    if quant_format == "QDQ" and opset_version < 13:  # pragma: no cover
+        opset_version = 13
+        logger.warning(
+            "QDQ format requires opset_version >= 13, " + "we reset opset_version={} here".format(opset_version)
+        )
+
+    if q_config["approach"] == "post_training_dynamic_quant":
+        # dynamic quantization export follow these steps:
+        # "1. export FP32 PyTorch model to FP32 ONNX model. "
+        # "2. use FP32 ONNX model as the input model for post training dynamic quantization."
+        # TODO: will be removed once torch supports dynamic quantization export
+        dynamic_quant_export(
+            pt_fp32_model,
+            pt_int8_model,
+            save_path,
+            example_inputs,
+            q_config,
+            opset_version,
+            dynamic_axes,
+            input_names,
+            output_names,
+            weight_type,
+        )
+    else:
+        static_quant_export(
+            pt_int8_model,
+            save_path,
+            example_inputs,
+            q_config,
+            opset_version,
+            dynamic_axes,
+            input_names,
+            output_names,
+            quant_format,
+        )
+
+    if verbose:
+        info = "The INT8 ONNX Model exported to path: {0}".format(save_path)
+        logger.info("*" * len(info))
+        logger.info(info)
+        logger.info("*" * len(info))
diff --git a/test/3x/torch/quantization/test_pt2e_quant.py b/test/3x/torch/quantization/test_pt2e_quant.py
index 7d1aab562d3..3857832598a 100644
--- a/test/3x/torch/quantization/test_pt2e_quant.py
+++ b/test/3x/torch/quantization/test_pt2e_quant.py
@@ -29,7 +29,7 @@ def _is_ipex_imported():
 
     monkeypatch.setattr("neural_compressor.torch.quantization.config.is_ipex_imported", _is_ipex_imported)
     monkeypatch.setattr("neural_compressor.torch.quantization.algorithm_entry.is_ipex_imported", _is_ipex_imported)
-    monkeypatch.setattr("neural_compressor.torch.export._export.is_ipex_imported", _is_ipex_imported)
+    monkeypatch.setattr("neural_compressor.torch.export.pt2e_export.is_ipex_imported", _is_ipex_imported)
 
 
 class TestPT2EQuantization: