From cb7b4859bf3c9c6b6ca6d4140c4d896d97364e74 Mon Sep 17 00:00:00 2001 From: yuwenzho Date: Wed, 17 Aug 2022 13:37:49 +0800 Subject: [PATCH] fix quantization from fp64 to fp32 (#1153) --- neural_compressor/adaptor/ox_utils/util.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/neural_compressor/adaptor/ox_utils/util.py b/neural_compressor/adaptor/ox_utils/util.py index c4cf6570512..87c7bf962bb 100644 --- a/neural_compressor/adaptor/ox_utils/util.py +++ b/neural_compressor/adaptor/ox_utils/util.py @@ -155,11 +155,12 @@ def quantize_data_with_scale_zero(data, qType, scheme, scale, zero_point): - when data type == int8, from [-m , m] -> [-(2^{b-1}-1), 2^{b-1}-1] where m = max(abs(rmin), abs(rmax)) ''' + data = np.asarray(data) if qType == onnx_proto.TensorProto.INT8 and scheme == 'sym': # signed byte type - quantized_data = (np.asarray(data) / scale).round().astype('b') + quantized_data = (data.astype(np.float32) / scale).round().astype('b') elif qType == onnx_proto.TensorProto.UINT8 and scheme == 'asym': - quantized_data = ((np.asarray(data) / scale).round() + zero_point).astype('B') + quantized_data = ((data.astype(np.float32) / scale).round() + zero_point).astype('B') else: raise ValueError("Unexpected combination of data type {} and scheme {}.".format( qType, scheme))