diff --git a/neural_compressor/torch/algorithms/weight_only/gptq.py b/neural_compressor/torch/algorithms/weight_only/gptq.py index 1dbd7511663..d0e133b1758 100644 --- a/neural_compressor/torch/algorithms/weight_only/gptq.py +++ b/neural_compressor/torch/algorithms/weight_only/gptq.py @@ -930,12 +930,7 @@ def tmp(_, inp, out): logger.info("Quantization done") # self.model.config.use_cache = self.use_cache - - # obtain model (all weight only quantization API function should return) - for k, v in gptq_config.items(): - for m, n in v.items(): - gptq_config[k][m] = n.tolist() - return self.model, gptq_config + return self.model class GPTQ: @@ -1379,9 +1374,8 @@ def convert(self, model, *args, **kwargs): self.gptq_quantizer.model = model self.gptq_quantizer.remove_prepare_for_calibration() - q_model, gptq_config = self.gptq_quantizer.execute_quantization() + q_model = self.gptq_quantizer.execute_quantization() if not self.gptq_quantizer.use_layer_wise: q_model = q_model.to(self.model_device) - q_model.gptq_config = gptq_config logger.info("GPTQ quantizing done.") return q_model diff --git a/neural_compressor/torch/algorithms/weight_only/save_load.py b/neural_compressor/torch/algorithms/weight_only/save_load.py index d515d91f9a7..feb4b907b7e 100644 --- a/neural_compressor/torch/algorithms/weight_only/save_load.py +++ b/neural_compressor/torch/algorithms/weight_only/save_load.py @@ -53,11 +53,6 @@ def save(model, output_dir="./saved_results"): # saving process save_config_mapping(model.qconfig, qconfig_file_path) - if hasattr(model, "gptq_config") and model.gptq_config: - gptq_config_path = os.path.join(os.path.abspath(os.path.expanduser(output_dir)), "gptq_config.json") - with open(gptq_config_path, "w") as f: - json.dump(model.gptq_config, f, indent=4) - # MethodType 'save' not in state_dict del model.save torch.save(model.state_dict(), qmodel_weight_file_path)