From 3cb6d38f3e5a74c5657b0614c012c207dae4d5b1 Mon Sep 17 00:00:00 2001
From: "Wang, Chang" <chang1.wang@intel.com>
Date: Wed, 27 Dec 2023 20:35:46 +0800
Subject: [PATCH] Fix llm model accuracy regression with IPEX 2.1.100 (#1499)

Signed-off-by: Wang, Chang1 <chang1.wang@intel.com>
---
 neural_compressor/adaptor/pytorch.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py
index 8839444bc2e..7acfd68d72f 100644
--- a/neural_compressor/adaptor/pytorch.py
+++ b/neural_compressor/adaptor/pytorch.py
@@ -3365,7 +3365,6 @@ def qdq_quantize(self, model, q_model, tune_cfg, dataloader, q_func):
         self._cfg_to_qconfig(tune_cfg, smooth_quant=True)
         update_sq_scale(self.ipex_config_path, smoothquant_scale_info)
         model._model.load_qconf_summary(qconf_summary=self.ipex_config_path)
-
         # real calibration for other operators
         try:
             # IPEX may raise an error on the second iteration.
@@ -3383,8 +3382,10 @@ def qdq_quantize(self, model, q_model, tune_cfg, dataloader, q_func):
                 + "using scale info from SmoothQuant for Linear and "
                 + "one iter calibration for other ops."
             )
-
         model._model.save_qconf_summary(qconf_summary=self.ipex_config_path)
+        if self.version.release > Version("2.1.0").release:
+            update_sq_scale(self.ipex_config_path, smoothquant_scale_info)
+            model._model.load_qconf_summary(qconf_summary=self.ipex_config_path)
         self._ipex_post_quant_process(model, q_model, dataloader, inplace=inplace)
 
         with open(self.ipex_config_path, "r") as f: