Skip to content

Commit

Permalink
fix model failure (intel#1105)
Browse files Browse the repository at this point in the history
  • Loading branch information
mengniwang95 authored Jul 26, 2022
1 parent 4e331ec commit 09a0629
Show file tree
Hide file tree
Showing 5 changed files with 86 additions and 41 deletions.
2 changes: 2 additions & 0 deletions neural_compressor/adaptor/onnxrt.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from neural_compressor.utils.utility import Statistics
from neural_compressor.experimental.data.dataloaders.base_dataloader import BaseDataLoader
from neural_compressor.conf.dotdict import deep_get
from neural_compressor.adaptor.ox_utils.util import split_shared_bias
import math

onnx = LazyImport("onnx")
Expand Down Expand Up @@ -465,6 +466,7 @@ def _pre_optimize(self, model, level=1):
if self.graph_optimization.gemm2matmul else tmp_model
model.model = self._rename_node(model.model)
model = self._revert_fusedconv(model)
model = split_shared_bias(model)
model.topological_sort()
self.pre_optimized_model = model

Expand Down
14 changes: 13 additions & 1 deletion neural_compressor/adaptor/ox_utils/operators/split.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,18 @@
attribute_to_kwarg
from .base_operator import QuantOperatorBase
from neural_compressor.adaptor.ox_utils.util import QuantizedValue
from .qdq_base_operator import QDQOperatorBase

class QDQSplit(QDQOperatorBase):
def __init__(self, onnx_quantizer, onnx_node):
super().__init__(onnx_quantizer, onnx_node)

def quantize(self):
node = self.node
self.quantizer.quantize_inputs(node, [0])
if not self.disable_qdq_for_node_output or self.quantizer != 'qdq':
self.quantizer.quantize_outputs(self.node, direct_int8=True)
node.name = node.name + "_quant"

class QSplit(QuantOperatorBase):
def __init__(self, onnx_quantizer, onnx_node):
Expand All @@ -30,7 +42,7 @@ def convert(self):
node = self.node
parent = self.quantizer.model.get_parents(node)[0]
children = self.quantizer.model.get_children(node)
if len(children) == 0:
if parent.op_type != 'DequantizeLinear' or len(children) == 0:
return
kwargs = {}
for attribute in node.attribute:
Expand Down
89 changes: 51 additions & 38 deletions neural_compressor/adaptor/ox_utils/quantizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,13 @@ def quantize_inputs(self, node, indices=None,
[tensor_name + "_dequantized"])
self.replace_input.append([node, tensor_name, dequant_node.output[0]])
self.new_nodes.extend([qlinear_node, dequant_node])
quantized_value = QuantizedValue(weight.name, q_weight_name,
scale_name,
zp_name,
QuantizedValueType.Initializer,
None, dtype)
if weight.name not in self.quantized_value_map:
self.quantized_value_map[weight.name] = quantized_value
else:
weight = self._get_quantized_weight(initializer, dtype, scheme)
self._update_weight(weight)
Expand Down Expand Up @@ -557,13 +564,12 @@ def quantize_bias_tensor(self, node):
[bias_name + '_dequantized'], bias_name + '_DequantizeLinear')
self.new_nodes.append(dequant_node)
self.replace_input.append([find_by_name(node.name, self.model.nodes()),
bias_name, dequant_node.output[0]])
bias_name, bias_name + '_dequantized'])

def quantize_bias(self, bias_name, input_name, weight_name, new_node_list=[]):
'''
Quantized the bias. Zero Point == 0 and Scale == Input_Scale * Weight_Scale
'''

# get scale for weight
weight_scale_initializer = find_by_name(weight_name + '_scale', self.model.initializer())
weight_scale = self.tensor_proto_to_array(weight_scale_initializer)
Expand Down Expand Up @@ -624,8 +630,6 @@ def quantize_bias(self, bias_name, input_name, weight_name, new_node_list=[]):
quantized_bias_zp_name,
QuantizedValueType.Initializer,
None, onnx_proto.TensorProto.INT32)
if bias_name not in self.quantized_value_map:
self.quantized_value_map[bias_name] = quantized_value
return quantized_bias_name, quantized_value

def _dynamic_quantize_bias(self, input_name, weight_scale_name, \
Expand Down Expand Up @@ -701,40 +705,49 @@ def quantize_weight_per_channel(self, weight_name, weight_qType, scheme, channel
if initializer is None:
raise ValueError("{} is not an initializer", weight_name)

weights = self.tensor_proto_to_array(initializer)
channel_count = weights.shape[channel_axis]
rmin_list = []
rmax_list = []
zero_point_list = []
scale_list = []
quantized_per_channel_data_list = []
for i in range(channel_count):
per_channel_data = weights.take(i, channel_axis)
rmin, rmax, zero_point, scale, quantized_per_channel_data = quantize_data(
per_channel_data.flatten().tolist(), _get_qrange_for_qType(weight_qType,
self.reduce_range), weight_qType, scheme)
rmin_list.append(rmin)
rmax_list.append(rmax)
zero_point_list.append(zero_point)
scale_list.append(scale)
quantized_per_channel_data_list.append(quantized_per_channel_data)

# combine per_channel_data into one
reshape_dims = list(weights.shape) # deep copy
reshape_dims[channel_axis] = 1 # only one per channel for reshape
quantized_weights = np.asarray(quantized_per_channel_data_list[0]).reshape(reshape_dims)
for i in range(1, len(quantized_per_channel_data_list)):
channel_weights = np.asarray(quantized_per_channel_data_list[i]).reshape(reshape_dims)
quantized_weights = np.concatenate((quantized_weights, channel_weights), channel_axis)

weight = QuantizedInitializer(initializer.name, initializer, rmin_list, rmax_list,
zero_point_list, scale_list,
weights,
quantized_weights.flatten().tolist(),
channel_axis, weight_qType)

self._update_weight(weight)
return (weight.name + "_quantized", weight.name + "_zero_point", weight.name + "_scale")
if initializer.name not in self.quantized_value_map:
weights = self.tensor_proto_to_array(initializer)
channel_count = weights.shape[channel_axis]
rmin_list = []
rmax_list = []
zero_point_list = []
scale_list = []
quantized_per_channel_data_list = []
for i in range(channel_count):
per_channel_data = weights.take(i, channel_axis)
rmin, rmax, zero_point, scale, quantized_per_channel_data = quantize_data(
per_channel_data.flatten().tolist(), _get_qrange_for_qType(weight_qType,
self.reduce_range), weight_qType, scheme)
rmin_list.append(rmin)
rmax_list.append(rmax)
zero_point_list.append(zero_point)
scale_list.append(scale)
quantized_per_channel_data_list.append(quantized_per_channel_data)

# combine per_channel_data into one
reshape_dims = list(weights.shape) # deep copy
reshape_dims[channel_axis] = 1 # only one per channel for reshape
quantized_weights = np.asarray(quantized_per_channel_data_list[0]).reshape(reshape_dims)
for i in range(1, len(quantized_per_channel_data_list)):
channel_weights = np.asarray(quantized_per_channel_data_list[i]).reshape(reshape_dims)
quantized_weights = np.concatenate((quantized_weights, channel_weights), channel_axis)

weight = QuantizedInitializer(initializer.name, initializer, rmin_list, rmax_list,
zero_point_list, scale_list,
weights,
quantized_weights.flatten().tolist(),
channel_axis, weight_qType)

self._update_weight(weight)
quantized_value = QuantizedValue(weight.name, weight.name + "_quantized",
weight.name + "_scale",
weight.name + "_zero_point",
QuantizedValueType.Initializer,
None, weight_qType)
self.quantized_value_map[weight.name] = quantized_value

return (initializer.name + "_quantized", initializer.name + "_zero_point",
initializer.name + "_scale")

def _update_weight(self, weight):
'''
Expand Down
4 changes: 2 additions & 2 deletions neural_compressor/adaptor/ox_utils/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from .operators.maxpool import QMaxPool, QDQMaxPool
from .operators.gavgpool import QGlobalAveragePool
from .operators.lstm import LSTMQuant, QDQLSTM
from .operators.split import QSplit
from .operators.split import QSplit, QDQSplit
from .operators.concat import QLinearConcat, QDQConcat
from .operators.pad import QPad, QDQPad
from .operators.pooling import QLinearPool, QDQPool
Expand Down Expand Up @@ -92,7 +92,7 @@
"AveragePool": QDQPool,
"Unsqueeze" : QDQDirect8BitOp,
"Concat": QDQConcat,
"Split": QDQDirect8BitOp
"Split": QDQSplit
}

CastRegistry = {
Expand Down
18 changes: 18 additions & 0 deletions neural_compressor/adaptor/ox_utils/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,24 @@ def _get_qrange_for_qType(qType, reduce_range=False):
else:
raise ValueError('unsupported quantization data type')

def split_shared_bias(model):
for input_name, node_list in model.input_name_to_nodes.items():
if len(node_list) > 1 and input_name in [i.name for i in model.model.graph.initializer]:
for node in node_list[1:]:
if node.op_type not in ['Conv', 'FusedConv']:
continue
if node.input[2] == input_name:
new_input_name = node.input[2] + '_nc_split_' + node.name
new_input = helper.make_tensor(
new_input_name,
model.get_initializer(input_name).data_type,
model.get_initializer(input_name).dims,
model.get_initializer(input_name).raw_data,
True)
model.add_initializer(new_input)
node.input[2] = new_input_name
return model

def convert_np_to_float16(np_array, min_positive_val=1e-7, max_finite_val=1e4): # pragma: no cover
'''
Convert float32 numpy array to float16 without changing sign or finiteness.
Expand Down

0 comments on commit 09a0629

Please sign in to comment.