diff --git a/neural_compressor/adaptor/onnxrt.py b/neural_compressor/adaptor/onnxrt.py index af8a43b2ac7..e28f7213272 100644 --- a/neural_compressor/adaptor/onnxrt.py +++ b/neural_compressor/adaptor/onnxrt.py @@ -95,6 +95,8 @@ def __init__(self, framework_specific_info): config_file = 'onnxrt_cuda.yaml' elif self.backend == 'DnnlExecutionProvider': config_file = 'onnxrt_dnnl.yaml' + elif self.backend == 'DmlExecutionProvider': + config_file = 'onnxrt_dml.yaml' else: # pragma: no cover assert False, "{} provider is not supported in current environment, " \ "supported providers: {}".format(self.backend, diff --git a/neural_compressor/adaptor/onnxrt_dml.yaml b/neural_compressor/adaptor/onnxrt_dml.yaml new file mode 100644 index 00000000000..9040692d881 --- /dev/null +++ b/neural_compressor/adaptor/onnxrt_dml.yaml @@ -0,0 +1,67 @@ +## Copyright (c) 2021 Intel Corporation +## +## Licensed under the Apache License, Version 2.0 (the "License"); +## you may not use this file except in compliance with the License. +## You may obtain a copy of the License at +## +## http://www.apache.org/licenses/LICENSE-2.0 +## +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, +## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +## See the License for the specific language governing permissions and +## limitations under the License. +## +# + +- + version: + name: '1.13.1' + int8: &ref_1_13 { + 'static': &ref_1_13_static { + 'Conv': &default_static_qlinear_qdq { + 'weight': &int8_sym_pertensor_minmax { + 'dtype': ['int8'], + 'scheme': ['sym'], + 'granularity': ['per_tensor'], + 'algorithm': ['minmax'] + }, + 'activation': &uint8_asym_pertensor_minmax { + 'dtype': ['uint8'], + 'scheme': ['asym'], + 'granularity': ['per_tensor'], + 'algorithm': ['minmax'] + }, + 'mode': ['QDQ', 'QLinear'] + }, + 'MatMul': { + 'weight': *int8_sym_pertensor_minmax, + 'activation': *uint8_asym_pertensor_minmax, + 'mode': ['QDQ', 'QLinear'] + }, + 'Mul': &default_static_qlinear { + 'weight': *int8_sym_pertensor_minmax, + 'activation': *uint8_asym_pertensor_minmax, + 'mode': ['QLinear'] + }, + 'Relu': *default_static_qlinear_qdq, + 'Clip': *default_static_qlinear_qdq, + 'MaxPool': *default_static_qlinear_qdq, + 'Add': *default_static_qlinear, + }, + } + fp16: &common_fp16 ['Add', 'GlobalAveragePool', 'AveragePool', 'SpaceToDepth', 'Sigmoid', 'Mul', + 'Softmax', 'Gemm', 'MatMul', 'Conv', 'Concat', 'Upsample', 'Pow', 'Sqrt', 'DepthToSpace', + 'Clip', 'BatchNormalization', 'Transpose', 'Softmax', 'AveragePool', 'Squeeze', 'MaxPool', + 'Relu', 'Concat'] + + recipes: &default_optimization + graph_optimization: # from onnxruntime graph_optimization_level + level: ['DISABLE_ALL', 'ENABLE_BASIC', 'ENABLE_EXTENDED', 'ENABLE_ALL'] + +- + version: + name: 'default' + int8: *ref_1_13 + recipes: + <<: *default_optimization diff --git a/neural_compressor/adaptor/ox_utils/operators/direct_q8.py b/neural_compressor/adaptor/ox_utils/operators/direct_q8.py index 625061ed999..a4b4c5bea6a 100644 --- a/neural_compressor/adaptor/ox_utils/operators/direct_q8.py +++ b/neural_compressor/adaptor/ox_utils/operators/direct_q8.py @@ -18,7 +18,9 @@ from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, qop_registry, QOperator -@op_registry(op_types="Reshape, Transpose, Squeeze, Unsqueeze, Flatten, Expand, Slice") + +@op_registry(op_types="Reshape, Transpose, Squeeze, Unsqueeze, Flatten, Expand, Slice, " \ + "SpaceToDepth, DepthToSpace, Upsample") class Direct8BitOperator(Operator): """Direct8Bit Operator.""" diff --git a/neural_compressor/adaptor/ox_utils/util.py b/neural_compressor/adaptor/ox_utils/util.py index 0040db9f21e..8fe8d8443bb 100644 --- a/neural_compressor/adaptor/ox_utils/util.py +++ b/neural_compressor/adaptor/ox_utils/util.py @@ -72,13 +72,15 @@ 'onnxrt_trt_ep': 'TensorrtExecutionProvider', 'onnxrt_dnnl_ep': 'DnnlExecutionProvider', 'onnxrt_cuda_ep': 'CUDAExecutionProvider', + 'onnxrt_dml_ep': 'DmlExecutionProvider' } ONNXRT_BACKENDS = { 'CPUExecutionProvider': 'default', 'TensorrtExecutionProvider': 'onnxrt_trt_ep', 'CUDAExecutionProvider': 'onnxrt_cuda_ep', - 'DnnlExecutionProvider': 'onnxrt_dnnl_ep' + 'DnnlExecutionProvider': 'onnxrt_dnnl_ep', + 'DmlExecutionProvider': 'onnxrt_dml_ep' } def dtype_to_name(dtype_mapping, dtype): @@ -536,4 +538,4 @@ def to_numpy(data): assert False, "The input data for onnx model is {}, which is not supported " \ "to convert to numpy ndarrays.".format(type(data)) else: - return data \ No newline at end of file + return data diff --git a/neural_compressor/config.py b/neural_compressor/config.py index fde44331395..c3d4c08951a 100644 --- a/neural_compressor/config.py +++ b/neural_compressor/config.py @@ -259,7 +259,7 @@ class BenchmarkConfig: inputs (list, optional): A list of strings containing the inputs of model. Default is an empty list. outputs (list, optional): A list of strings containing the outputs of model. Default is an empty list. backend (str, optional): Backend name for model execution. Supported values include: 'default', 'itex', - 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep'. + 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep', 'onnxrt_dml_ep'. Default value is 'default'. warmup (int, optional): The number of iterations to perform warmup before running performance tests. Default value is 5. @@ -328,7 +328,7 @@ def backend(self): def backend(self, backend): """Set backend.""" if _check_value('backend', backend, str, [ - 'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep']): + 'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep', 'onnxrt_dml_ep']): self._backend = backend @property @@ -694,7 +694,8 @@ class _BaseQuantizationConfig: inputs: Inputs of model, only required in tensorflow. outputs: Outputs of model, only required in tensorflow. backend: Backend for model execution. - Support 'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep' + Support 'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep', + 'onnxrt_dml_ep' domain: Model domain. Support 'auto', 'cv', 'object_detection', 'nlp' and 'recommendation_system'. Adaptor will use specific quantization settings for different domains automatically, and explicitly specified quantization settings will override the automatic setting. @@ -1102,7 +1103,7 @@ def backend(self): @backend.setter def backend(self, backend): if _check_value('backend', backend, str, [ - 'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep']): + 'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep', 'onnxrt_dml_ep']): self._backend = backend @property @@ -1148,7 +1149,8 @@ class PostTrainingQuantConfig(_BaseQuantizationConfig): Args: device: Support 'cpu' and 'gpu'. backend: Backend for model execution. - Support 'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep' + Support 'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep', + 'onnxrt_dml_ep' domain: Model domain. Support 'auto', 'cv', 'object_detection', 'nlp' and 'recommendation_system'. Adaptor will use specific quantization settings for different domains automatically, and explicitly specified quantization settings will override the automatic setting. @@ -1309,7 +1311,8 @@ class QuantizationAwareTrainingConfig(_BaseQuantizationConfig): Args: device: Support 'cpu' and 'gpu'. backend: Backend for model execution. - Support 'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep' + Support 'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep', + 'onnxrt_dml_ep' inputs: Inputs of model, only required in tensorflow. outputs: Outputs of model, only required in tensorflow. op_type_dict: Tuning constraints on optype-wise for advance user to reduce tuning space. @@ -1779,8 +1782,8 @@ class MixedPrecisionConfig(object): device (str, optional): Device for execution. Support 'cpu' and 'gpu', default is 'cpu'. backend (str, optional): Backend for model execution. - Support 'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep' - default is 'default'. + Support 'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep', + 'onnxrt_dml_ep'. Default is 'default'. precisions ([str, list], optional): Target precision for mix precision conversion. Support 'bf16' and 'fp16', default is 'bf16'. model_name (str, optional): The name of the model. Default value is empty. @@ -1939,7 +1942,7 @@ def backend(self): def backend(self, backend): """Set backend.""" if _check_value('backend', backend, str, [ - 'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep']): + 'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep', 'onnxrt_dml_ep']): self._backend = backend @property diff --git a/test/adaptor/onnxrt_adaptor/test_adaptor_onnxrt.py b/test/adaptor/onnxrt_adaptor/test_adaptor_onnxrt.py index f527e22195b..01d148c433b 100644 --- a/test/adaptor/onnxrt_adaptor/test_adaptor_onnxrt.py +++ b/test/adaptor/onnxrt_adaptor/test_adaptor_onnxrt.py @@ -1482,7 +1482,7 @@ def test_backend(self, mock_warning): with self.assertRaises(AssertionError) as context: adaptor = FRAMEWORKS[framework](framework_specific_info) self.assertEqual(str(context.exception), "'test_backend' backend is not supported, "\ - "supported backends include ['default', 'onnxrt_trt_ep', 'onnxrt_dnnl_ep', 'onnxrt_cuda_ep']") + "supported backends include ['default', 'onnxrt_trt_ep', 'onnxrt_dnnl_ep', 'onnxrt_cuda_ep', 'onnxrt_dml_ep']") framework_specific_info = {"device": "cpu", "backend": "onnxrt_trt_ep",