From 3cb6d38f3e5a74c5657b0614c012c207dae4d5b1 Mon Sep 17 00:00:00 2001 From: "Wang, Chang" Date: Wed, 27 Dec 2023 20:35:46 +0800 Subject: [PATCH] Fix llm model accuracy regression with IPEX 2.1.100 (#1499) Signed-off-by: Wang, Chang1 --- neural_compressor/adaptor/pytorch.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py index 8839444bc2e..7acfd68d72f 100644 --- a/neural_compressor/adaptor/pytorch.py +++ b/neural_compressor/adaptor/pytorch.py @@ -3365,7 +3365,6 @@ def qdq_quantize(self, model, q_model, tune_cfg, dataloader, q_func): self._cfg_to_qconfig(tune_cfg, smooth_quant=True) update_sq_scale(self.ipex_config_path, smoothquant_scale_info) model._model.load_qconf_summary(qconf_summary=self.ipex_config_path) - # real calibration for other operators try: # IPEX may raise an error on the second iteration. @@ -3383,8 +3382,10 @@ def qdq_quantize(self, model, q_model, tune_cfg, dataloader, q_func): + "using scale info from SmoothQuant for Linear and " + "one iter calibration for other ops." ) - model._model.save_qconf_summary(qconf_summary=self.ipex_config_path) + if self.version.release > Version("2.1.0").release: + update_sq_scale(self.ipex_config_path, smoothquant_scale_info) + model._model.load_qconf_summary(qconf_summary=self.ipex_config_path) self._ipex_post_quant_process(model, q_model, dataloader, inplace=inplace) with open(self.ipex_config_path, "r") as f: