From d1f315f359440382d713a0a20c7927c7c0d252a1 Mon Sep 17 00:00:00 2001 From: yuwenzho Date: Thu, 3 Aug 2023 10:33:54 +0800 Subject: [PATCH] Fix precision tuning bug for ONNX CUDA EP (#1133) Signed-off-by: yuwenzho --- neural_compressor/strategy/utils/constant.py | 2 +- test/adaptor/onnxrt_adaptor/test_adaptor_onnxrt.py | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/neural_compressor/strategy/utils/constant.py b/neural_compressor/strategy/utils/constant.py index 771e27cbeda..0d6474f9f30 100644 --- a/neural_compressor/strategy/utils/constant.py +++ b/neural_compressor/strategy/utils/constant.py @@ -25,7 +25,7 @@ ('weight','scheme'), ('weight','algorithm'), ('weight','granularity'), ('weight','bits'), ('weight','group_size'), 'sampling_size'] -PRECISION_SET_V2_0 = {'fp32', 'bf16'} +PRECISION_SET_V2_0 = {'fp32', 'bf16', 'fp16'} auto_query_order = ['static', 'dynamic', 'bf16', 'fp16', 'fp32'] static_query_order = ['static', 'bf16', 'fp16', 'fp32'] diff --git a/test/adaptor/onnxrt_adaptor/test_adaptor_onnxrt.py b/test/adaptor/onnxrt_adaptor/test_adaptor_onnxrt.py index 80d988a84f3..2f0663f6204 100644 --- a/test/adaptor/onnxrt_adaptor/test_adaptor_onnxrt.py +++ b/test/adaptor/onnxrt_adaptor/test_adaptor_onnxrt.py @@ -1231,6 +1231,12 @@ def eval(model): calib_dataloader=self.matmul_dataloader, eval_func=eval) self.assertTrue('QLinearMatMul' not in [i.op_type for i in q_model.nodes()]) + config = PostTrainingQuantConfig(approach='static', backend='onnxrt_cuda_ep', device='gpu', quant_level=1) + q_model = quantization.fit(self.distilbert_model, config, + calib_dataloader=DummyNLPDataloader_dict("distilbert-base-uncased-finetuned-sst-2-english"), + eval_func=eval) + self.assertTrue('QLinearMatMul' in [i.op_type for i in q_model.nodes()]) + config = PostTrainingQuantConfig(approach='static', recipes={'optypes_to_exclude_output_quant': ['MatMul']}) q_model = quantization.fit(self.matmul_model, config, calib_dataloader=self.matmul_dataloader, eval_func=eval)