Skip to content

Commit

Permalink
refine autoround export (#1711)
Browse files Browse the repository at this point in the history
Signed-off-by: changwangss <[email protected]>
  • Loading branch information
changwangss authored Apr 3, 2024
1 parent 7ee7215 commit 4092311
Showing 1 changed file with 48 additions and 14 deletions.
62 changes: 48 additions & 14 deletions neural_compressor/model/torch_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -498,8 +498,7 @@ def export_compressed_model(
gptq_config = self.gptq_config if hasattr(self, "gptq_config") else {}

autoround_config = self.autoround_config if hasattr(self, "autoround_config") else {}

if gptq_config or (autoround_config and device == "xpu"):
if gptq_config:
for k, v in weight_config.items():
logger.debug(f"Compressing {k} on device {device}")
if v["dtype"] == "fp32":
Expand Down Expand Up @@ -558,19 +557,54 @@ def export_compressed_model(
)
new_module.pack(int_weight, gptq_scale, gptq_zp, m.bias, gptq_perm)
set_module(self.model, k, new_module)
elif autoround_config and (device == "cpu" or device == "auto"):
from auto_round.export.export_to_itrex.export import pack_model # pylint: disable=E0401
elif autoround_config:
if device == "xpu":
for k, v in weight_config.items():
logger.debug(f"Compressing {k} on device {device}")
if v["dtype"] == "fp32":
continue
else:
dtype = v["dtype"]
num_bits = v["bits"]
group_size = v["group_size"]
scheme = v["scheme"]
m = fetch_module(self.model, k)
autoround_conf = autoround_config[k]
fp32_weight = m.weight.data
autoround_scale = torch.tensor(autoround_conf["scale"], dtype=torch.float32)
autoround_zp = None if scheme == "sym" else torch.tensor(autoround_conf["zero"], dtype=torch.int32)
int_weight = quant_weight_w_scale(fp32_weight, autoround_scale, autoround_zp, group_size)
int_weight = int_weight.type(torch.int32)
new_module = WeightOnlyLinear(
m.in_features,
m.out_features,
num_bits,
group_size,
dtype=dtype,
zp=autoround_zp is not None,
bias=m.bias is not None,
g_idx=None,
compression_dtype=compression_dtype,
compression_dim=compression_dim,
scale_dtype=scale_dtype,
device=device,
use_optimum_format=use_optimum_format,
)
new_module.pack(int_weight, autoround_scale, autoround_zp, m.bias, None)
set_module(self.model, k, new_module)
else:
from auto_round.export.export_to_itrex.export import pack_model # pylint: disable=E0401

self.model = pack_model(
self.model,
weight_config=autoround_config,
enable_full_range=enable_full_range,
compression_dtype=compression_dtype,
compression_dim=compression_dim,
device=device,
use_optimum_format=use_optimum_format,
inplace=True,
)
self.model = pack_model(
self.model,
weight_config=autoround_config,
enable_full_range=enable_full_range,
compression_dtype=compression_dtype,
compression_dim=compression_dim,
device=device,
use_optimum_format=use_optimum_format,
inplace=True,
)
else:
for k, v in weight_config.items():
logger.debug(f"Compressing {k} on device {device}")
Expand Down

0 comments on commit 4092311

Please sign in to comment.