Skip to content

Commit

Permalink
Fix bugs for gptq exporting with static_groups (#1614)
Browse files Browse the repository at this point in the history
Signed-off-by: YIYANGCAI <[email protected]>
  • Loading branch information
YIYANGCAI authored Feb 28, 2024
1 parent f812e67 commit b4e37b7
Showing 1 changed file with 7 additions and 1 deletion.
8 changes: 7 additions & 1 deletion neural_compressor/adaptor/torch_utils/gptq.py
Original file line number Diff line number Diff line change
Expand Up @@ -668,7 +668,8 @@ def tmp(_, inp, out):
gptq_config[self.get_full_layer_name(layer_name, block_idx)] = {"scale": scale}
if not weight_config_this_layer["sym"]:
gptq_config[self.get_full_layer_name(layer_name, block_idx)]["zero"] = zp
if weight_config_this_layer["act_order"]: # save perm for restoring the weights
if weight_config_this_layer["act_order"] and not weight_config_this_layer["static_groups"]:
# save perm for restoring the weights, but only when static_groups is not enabled.
gptq_config[self.get_full_layer_name(layer_name, block_idx)]["perm"] = gptq_for_this_block[
layer_name
].perm
Expand Down Expand Up @@ -828,6 +829,11 @@ def fasterquant(self, W, blocksize=128, percdamp=0.01, groupsize=-1, act_order=F
zero.append(self.quantizer.zero)
else:
idx = i1 + i
if (i1 + i) % groupsize == 0:
# load the pre-calculated quantization parameters in groups
static_quantizer = groups[(i1 + i) // groupsize]
scale.append(static_quantizer.scale)
zero.append(static_quantizer.zero)
if act_order:
idx = perm[idx]
self.quantizer = groups[idx // groupsize]
Expand Down

0 comments on commit b4e37b7

Please sign in to comment.