From 4ce9de5feb472dbab57a3bb9369c8b7ba1c57305 Mon Sep 17 00:00:00 2001 From: yuwenzho Date: Mon, 21 Aug 2023 11:39:34 +0800 Subject: [PATCH] Enhance ONNXRT backend check (#1160) Signed-off-by: yuwenzho --- neural_compressor/adaptor/onnxrt.py | 23 ++++++++++--- .../onnxrt_adaptor/test_adaptor_onnxrt.py | 27 +++++++++++++++ .../onnxrt_adaptor/test_onnxrt_operators.py | 34 +++++++++---------- 3 files changed, 61 insertions(+), 23 deletions(-) diff --git a/neural_compressor/adaptor/onnxrt.py b/neural_compressor/adaptor/onnxrt.py index 75d9b6e6cb3..af8a43b2ac7 100644 --- a/neural_compressor/adaptor/onnxrt.py +++ b/neural_compressor/adaptor/onnxrt.py @@ -64,17 +64,13 @@ def __init__(self, framework_specific_info): self.dynamic = framework_specific_info["approach"] == "post_training_dynamic_quant" self.domain = framework_specific_info.get("domain", "auto") self.recipes = framework_specific_info.get("recipes", {}) + self._check_backend_available(framework_specific_info["backend"]) self.backend = PROVIDERS[framework_specific_info["backend"]] self.performance_only = framework_specific_info.get("performance_only", False) self.use_bf16 = framework_specific_info.get("use_bf16", False) and \ self.backend in ort.get_available_providers() self.use_fp16 = framework_specific_info.get("use_fp16", False) - if self.backend not in ort.get_all_providers(): - logger.warning("{} backend is not supported in current environment, " - "supported backends: {}".format(ONNXRT_BACKENDS[self.backend], - [ONNXRT_BACKENDS[i] for i in ort.get_all_providers() if i in ONNXRT_BACKENDS])) - # get quantization format according to framework_specific_info if (not self.dynamic and "format" in framework_specific_info and \ framework_specific_info["format"].lower() == 'qdq') or \ @@ -324,6 +320,23 @@ def quantize(self, tune_cfg, model, data_loader, q_func=None): tmp_model.topological_sort() return tmp_model + def _check_backend_available(self, backend): + """Check backend is available or not.""" + if backend not in PROVIDERS: + assert False, "'{}' backend is not supported, " \ + "supported backends include {}".format(backend, \ + [provider for provider in PROVIDERS.keys()]) + + if backend in ["onnxrt_trt_ep", "onnxrt_cuda_ep"] and \ + self.device != "gpu": + logger.warning("Backend `{}` requires a GPU device. Reset device to 'gpu'.".format(backend)) + self.device = "gpu" + + ep = PROVIDERS[backend] + if ep not in ort.get_available_providers(): + logger.warning("Specified provider '{}' is not in available provider names. "\ + "Fallback to available providers: '{}'".format(ep, ", ".join(ort.get_available_providers()))) + def _reset_calib_iter(self, data_loader, cfg_calib_sampling_size, cfg_calib_iter): """Check and reset calibration iterations according to calib_sampleing_size and dataloader batch_size.""" if isinstance(data_loader, BaseDataLoader): diff --git a/test/adaptor/onnxrt_adaptor/test_adaptor_onnxrt.py b/test/adaptor/onnxrt_adaptor/test_adaptor_onnxrt.py index 2f0663f6204..f527e22195b 100644 --- a/test/adaptor/onnxrt_adaptor/test_adaptor_onnxrt.py +++ b/test/adaptor/onnxrt_adaptor/test_adaptor_onnxrt.py @@ -1,6 +1,7 @@ import os import shutil import unittest +from unittest.mock import patch import onnxruntime as ort import torch import torchvision @@ -1471,6 +1472,32 @@ def test_dataloader_input(self): q_model = quantizer.fit() self.assertNotEqual(q_model, None) + @patch('logging.Logger.warning') + def test_backend(self, mock_warning): + framework_specific_info = {"device": "cpu", + "backend": "test_backend", + "approach": "post_training_static_quant", + "workspace_path": './nc_workspace'} + framework = "onnxrt_qlinearops" + with self.assertRaises(AssertionError) as context: + adaptor = FRAMEWORKS[framework](framework_specific_info) + self.assertEqual(str(context.exception), "'test_backend' backend is not supported, "\ + "supported backends include ['default', 'onnxrt_trt_ep', 'onnxrt_dnnl_ep', 'onnxrt_cuda_ep']") + + framework_specific_info = {"device": "cpu", + "backend": "onnxrt_trt_ep", + "approach": "post_training_static_quant", + "workspace_path": './nc_workspace'} + framework = "onnxrt_qlinearops" + adaptor = FRAMEWORKS[framework](framework_specific_info) + + call_args_list = mock_warning.call_args_list + first_warning_args = call_args_list[0][0] + self.assertEqual(first_warning_args[0], "Backend `onnxrt_trt_ep` requires a GPU device. Reset device to 'gpu'.") + second_warning_args = call_args_list[1][0] + self.assertIn("not in available provider names. Fallback to available providers", second_warning_args[0]) + + self.assertEqual(mock_warning.call_count, 2) if __name__ == "__main__": diff --git a/test/adaptor/onnxrt_adaptor/test_onnxrt_operators.py b/test/adaptor/onnxrt_adaptor/test_onnxrt_operators.py index da43afa2f19..46dd63c24f0 100644 --- a/test/adaptor/onnxrt_adaptor/test_onnxrt_operators.py +++ b/test/adaptor/onnxrt_adaptor/test_onnxrt_operators.py @@ -1377,8 +1377,6 @@ def get_fp16_mixed_precision_model(self, model): converted_model = fit(model, config) return converted_model - @unittest.skipIf('CUDAExecutionProvider' not in ort.get_all_providers(), - "skip since CUDAExecutionProvider is not supported") def test_fp16(self): optypes = ['Sum', 'Sub', 'Div', 'Pow', 'Add'] for optype in optypes: @@ -1391,7 +1389,7 @@ def test_fp16(self): convert_model = self.get_fp16_mixed_precision_model(model) self.assertTrue('Cast' in set([i.op_type for i in convert_model.nodes()])) self.assertTrue(10 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == 'Cast'])) - session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=['CUDAExecutionProvider']) + session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=ort.get_available_providers()) outputs = session.run(None, input_data) optypes = ['Equal', 'Greater', 'GreaterOrEqual', 'Less', 'LessOrEqual'] @@ -1405,7 +1403,7 @@ def test_fp16(self): convert_model = self.get_fp16_mixed_precision_model(model) self.assertTrue('Cast' in set([i.op_type for i in convert_model.nodes()])) self.assertTrue(10 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == 'Cast'])) - session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=['CUDAExecutionProvider']) + session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=ort.get_available_providers()) outputs = session.run(None, input_data) optypes = ['Abs', 'Exp', 'Log', 'Round', 'Sqrt', 'Softmax', 'Exp', 'Tanh', 'Sigmoid', 'LeakyRelu', 'Round'] @@ -1418,7 +1416,7 @@ def test_fp16(self): convert_model = self.get_fp16_mixed_precision_model(model) self.assertTrue('Cast' in set([i.op_type for i in convert_model.nodes()])) self.assertTrue(10 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == 'Cast'])) - session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=['CUDAExecutionProvider']) + session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=ort.get_available_providers()) outputs = session.run(None, input_data) optypes = ['ReduceMean', 'ReduceL1', 'ReduceL2', 'ReduceLogSum', 'ReduceLogSumExp', 'ReduceMax', 'ReduceProd', \ @@ -1432,7 +1430,7 @@ def test_fp16(self): convert_model = self.get_fp16_mixed_precision_model(model) self.assertTrue('Cast' in set([i.op_type for i in convert_model.nodes()])) self.assertTrue(10 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == 'Cast'])) - session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=['CUDAExecutionProvider']) + session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=ort.get_available_providers()) outputs = session.run(None, input_data) optypes = ['Gelu'] @@ -1445,7 +1443,7 @@ def test_fp16(self): convert_model = self.get_fp16_mixed_precision_model(model) self.assertTrue('Cast' in set([i.op_type for i in convert_model.nodes()])) self.assertTrue(10 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == 'Cast'])) - session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=['CUDAExecutionProvider']) + session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=ort.get_available_providers()) outputs = session.run(None, input_data) optypes = ['BiasGelu', 'FastGelu'] @@ -1459,7 +1457,7 @@ def test_fp16(self): convert_model = self.get_fp16_mixed_precision_model(model) self.assertTrue('Cast' in set([i.op_type for i in convert_model.nodes()])) self.assertTrue(10 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == 'Cast'])) - session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=['CUDAExecutionProvider']) + session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=ort.get_available_providers()) outputs = session.run(None, input_data) @@ -1474,7 +1472,7 @@ def test_fp16(self): convert_model = self.get_fp16_mixed_precision_model(model) self.assertTrue('Cast' in set([i.op_type for i in convert_model.nodes()])) self.assertTrue(10 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == 'Cast'])) - session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=['CUDAExecutionProvider']) + session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=ort.get_available_providers()) outputs = session.run(None, input_data) optypes = ['FusedMatMul'] @@ -1489,22 +1487,22 @@ def test_fp16(self): convert_model = self.get_fp16_mixed_precision_model(model) self.assertTrue('Cast' in set([i.op_type for i in convert_model.nodes()])) self.assertTrue(10 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == 'Cast'])) - session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=['CUDAExecutionProvider']) + session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=ort.get_available_providers()) outputs = session.run(None, input_data) optypes = ['Gemm'] for optype in optypes: inps = [['input1', TensorProto.FLOAT, (1,2)]] - outs = [['output', TensorProto.FLOAT, (1,2)]] + outs = [['output', TensorProto.FLOAT, (1,1)]] weights = [['input2', TensorProto.FLOAT, (2,1), np.random.random((2))], - ['input3', TensorProto.FLOAT, (1,2), np.random.random((2))]] + ['input3', TensorProto.FLOAT, (1,1), np.random.random((1))]] node_infos = [['test', ['input1', 'input2', 'input3'], ['output'], optype]] model = self.build_model(inps, outs, weights, node_infos) input_data = self.build_test_data(['input1'], [(1,2)], ['float32']) convert_model = self.get_fp16_mixed_precision_model(model) self.assertTrue('Cast' in set([i.op_type for i in convert_model.nodes()])) self.assertTrue(10 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == 'Cast'])) - session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=['CUDAExecutionProvider']) + session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=ort.get_available_providers()) outputs = session.run(None, input_data) optypes = ['LayerNormalization'] @@ -1519,7 +1517,7 @@ def test_fp16(self): convert_model = self.get_fp16_mixed_precision_model(model) self.assertTrue('Cast' in set([i.op_type for i in convert_model.nodes()])) self.assertTrue(10 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == 'Cast'])) - session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=['CUDAExecutionProvider']) + session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=ort.get_available_providers()) outputs = session.run(None, input_data) optypes = ['BatchNormalization'] @@ -1537,7 +1535,7 @@ def test_fp16(self): convert_model = self.get_fp16_mixed_precision_model(model) self.assertTrue('Cast' in set([i.op_type for i in convert_model.nodes()])) self.assertTrue(10 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == 'Cast'])) - session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=['CUDAExecutionProvider']) + session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=ort.get_available_providers()) outputs = session.run(None, input_data) def get_bf16_mixed_precision_model(self, model): @@ -1547,7 +1545,7 @@ def get_bf16_mixed_precision_model(self, model): converted_model = fit(model, config) return converted_model - @unittest.skipIf(not CpuInfo().bf16 or 'DnnlExecutionProvider' not in ort.get_all_providers(), + @unittest.skipIf(not CpuInfo().bf16 or 'DnnlExecutionProvider' not in ort.get_available_providers(), "skip since DnnlExecutionProvider is not supported") def test_bf16(self): optypes = ['Sum', 'Sub', 'Div', 'Pow', 'Add'] @@ -1665,9 +1663,9 @@ def test_bf16(self): optypes = ['Gemm'] for optype in optypes: inps = [['input1', TensorProto.FLOAT, (1,2)]] - outs = [['output', TensorProto.FLOAT, (1,2)]] + outs = [['output', TensorProto.FLOAT, (1,1)]] weights = [['input2', TensorProto.FLOAT, (2,1), np.random.random((2))], - ['input3', TensorProto.FLOAT, [], np.random.random((1))]] + ['input3', TensorProto.FLOAT, (1,1), np.random.random((1))]] node_infos = [['test', ['input1', 'input2', 'input3'], ['output'], optype]] model = self.build_model(inps, outs, weights, node_infos) input_data = self.build_test_data(['input1'], [(1,2)], ['float32'])