Skip to content

Commit

Permalink
Fix llm model accuracy regression with IPEX 2.1.100 (#1499)
Browse files Browse the repository at this point in the history
Signed-off-by: Wang, Chang1 <[email protected]>
  • Loading branch information
changwangss authored Dec 27, 2023
1 parent e827887 commit 3cb6d38
Showing 1 changed file with 3 additions and 2 deletions.
5 changes: 3 additions & 2 deletions neural_compressor/adaptor/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -3365,7 +3365,6 @@ def qdq_quantize(self, model, q_model, tune_cfg, dataloader, q_func):
self._cfg_to_qconfig(tune_cfg, smooth_quant=True)
update_sq_scale(self.ipex_config_path, smoothquant_scale_info)
model._model.load_qconf_summary(qconf_summary=self.ipex_config_path)

# real calibration for other operators
try:
# IPEX may raise an error on the second iteration.
Expand All @@ -3383,8 +3382,10 @@ def qdq_quantize(self, model, q_model, tune_cfg, dataloader, q_func):
+ "using scale info from SmoothQuant for Linear and "
+ "one iter calibration for other ops."
)

model._model.save_qconf_summary(qconf_summary=self.ipex_config_path)
if self.version.release > Version("2.1.0").release:
update_sq_scale(self.ipex_config_path, smoothquant_scale_info)
model._model.load_qconf_summary(qconf_summary=self.ipex_config_path)
self._ipex_post_quant_process(model, q_model, dataloader, inplace=inplace)

with open(self.ipex_config_path, "r") as f:
Expand Down

0 comments on commit 3cb6d38

Please sign in to comment.