Skip to content

Commit

Permalink
output check added after sq transform (#954)
Browse files Browse the repository at this point in the history
Signed-off-by: chensuyue <[email protected]>
Co-authored-by: wenhuach21 <[email protected]>
  • Loading branch information
maktukmak and wenhuach21 authored Jun 15, 2023
1 parent 6d7fd72 commit 5c04acd
Show file tree
Hide file tree
Showing 3 changed files with 201 additions and 111 deletions.
58 changes: 51 additions & 7 deletions neural_compressor/adaptor/torch_utils/smooth_quant.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,16 @@ def model_forward(model, dataloader, iters, device):
break


def model_forward_per_sample(model, sample, device):
try:
output = forward_wrapper(model, sample, device)
return output

except Exception as e:
output = forward_wrapper(model, sample[0], device)
return output


def quant_dequant_w(m, num_bits=8, scheme='asym'): ##TODO take sym as default
eps = torch.finfo(torch.float32).eps
if isinstance(m, torch.nn.Linear):
Expand Down Expand Up @@ -688,7 +698,7 @@ def transform(self, alpha=0.5, folding=False, percentile=99.999, op_types=['Line
self.absorb_to_layer, no_absorb_layers = self._trace(
op_types) ##TODO we need to insert mul layer for no_absorb_layers later
if self.absorb_to_layer == None and no_absorb_layers == None:
logger.warning("sorry, could not trace the model, smooth quant is ignored")
logger.warning("sorry, could not trace the model, smooth quant is skipped")
logger.warning("if you are using huggingface model,"
"you could set torchscript to True "
"when loading the model or set the return_dict to False")
Expand All @@ -701,7 +711,8 @@ def transform(self, alpha=0.5, folding=False, percentile=99.999, op_types=['Line
to_absorb_cnt += len(item)

logger.info(
f"find {to_absorb_cnt} could be absorbed in {to_absorb_cnt + len(no_absorb_layers)}")
f" {to_absorb_cnt} out of {to_absorb_cnt + len(no_absorb_layers)} "
f"layers could be absorbed in smooth quant")

# remove self.self_absorb_layers if it exists in self.absorb_to_layer
for k, v in self.absorb_to_layer.items():
Expand Down Expand Up @@ -735,12 +746,39 @@ def transform(self, alpha=0.5, folding=False, percentile=99.999, op_types=['Line

if alpha == 'auto':
alpha = self.alpha_per_layer
example_inputs = self._get_example_input()
if example_inputs != None:
out_pre_sq = model_forward_per_sample(self.model, example_inputs, self.device)

self.weight_scale_info, self.absorb_scales_info = self._adjust_parameters(self.absorb_to_layer,
input_maxes, alpha)
if example_inputs != None:
# Check mathematical equivelancy
out_post_sq = model_forward_per_sample(self.model, example_inputs, self.device)

if not self.output_is_equal(out_post_sq, out_pre_sq):
logger.warning(
"Mathematical equivelancy of Smoothquant is not preserved. "
" Please kindly report this issue to github, sq is skipped")
self.recover()
# else:
# logger.info("Mathematical equivelancy of Smoothquant is preserved.")

else:
logger.warning(" Could not get example input, equivelancy check is skipped")

self.input_values, self.output_values = {}, {}
return self.model

def output_is_equal(self, out1, out2, atol=1e-05):
if isinstance(out1, tuple):
return all(torch.all(torch.isclose(out1[i], out2[i], atol=atol)) for i in range(len(out1)))
elif isinstance(out1, dict):
return all(torch.all(torch.isclose(out1[k], out2[k], atol=atol)) for k in out1.keys())
elif isinstance(out1, torch.Tensor):
return torch.all(torch.isclose(out1, out2, atol=atol))
return False

def recover(self):
"""
recover the model weights
Expand Down Expand Up @@ -768,6 +806,16 @@ def _get_all_layer_names(self, op_types=['Linear']):
self_absorb_layer[name] = [name]
return self_absorb_layer

def _get_example_input(self):
if self.dataloader == None and self.example_inputs == None:
return None
if self.example_inputs is None:
##assert self.dataloader, "Please provide dataloader or example_inputs"
for idx, input in enumerate(self.dataloader):
self.example_inputs = input

return self.example_inputs

def _trace(self, op_types):
"""
Try the model to find the layers which can be smooth quantized.
Expand All @@ -777,11 +825,7 @@ def _trace(self, op_types):
no_absorb_layers: A list saving the layers which could not find the absorb layer
"""
tg = GraphTrace()
if self.example_inputs is None:
assert self.dataloader, "Please provide dataloader or example_inputs"
for idx, input in enumerate(self.dataloader):
self.example_inputs = input
break
self._get_example_input()
absorb_to_layer, no_absorb_layers = tg.get_absorb_to_layer(self.traced_model, self.example_inputs, op_types)
return absorb_to_layer, no_absorb_layers

Expand Down
104 changes: 0 additions & 104 deletions test/algorithm/test_smooth_quant.py
Original file line number Diff line number Diff line change
@@ -1,122 +1,18 @@
import copy
import unittest
import onnx
import numpy as np
import shutil
import torch
from onnx import onnx_pb as onnx_proto
from onnx import helper, TensorProto, numpy_helper
from neural_compressor.data import Datasets, DATALOADERS
from neural_compressor.data.dataloaders.pytorch_dataloader import PyTorchDataLoader
from neural_compressor.adaptor.torch_utils.smooth_quant import TorchSmoothQuant
from neural_compressor.adaptor.ox_utils.smooth_quant import ORTSmoothQuant

try:
import intel_extension_for_pytorch as ipex
TEST_IPEX = True
except:
TEST_IPEX = False

def build_onnx_model():
A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 5, 5])
C = helper.make_tensor_value_info('C', TensorProto.FLOAT, [1, 5, 2])
H = helper.make_tensor_value_info('H', TensorProto.FLOAT, [1, 5, 2])

g_value = np.random.uniform(low=0.001, high=0.5, size=(25)).astype(np.float32)
G_init = helper.make_tensor('G', TensorProto.FLOAT, [5, 5], g_value.reshape(25).tolist())
matmul_node = onnx.helper.make_node('MatMul', ['A', 'G'], ['C'], name='Matmul')

b_value = np.random.uniform(low=0.001, high=0.5, size=(10)).astype(np.float32)
B_init = helper.make_tensor('B', TensorProto.FLOAT, [5, 2], b_value.reshape(10).tolist())
matmul_node2 = onnx.helper.make_node('MatMul', ['C', 'B'], ['I'], name='Matmul2')

e_value = np.random.uniform(low=0.001, high=0.5, size=(10)).astype(np.float32)
E_init = helper.make_tensor('E', TensorProto.FLOAT, [5, 2], e_value.reshape(10).tolist())
matmul_node3 = onnx.helper.make_node('MatMul', ['C', 'E'], ['K'], name='Matmul3')

add = onnx.helper.make_node('Add', ['I', 'E'], ['D'], name='add')

f_value = np.random.uniform(low=0.001, high=0.5, size=(10)).astype(np.float32)
F_init = helper.make_tensor('F', TensorProto.FLOAT, [5, 2], f_value.reshape(10).tolist())
add2 = onnx.helper.make_node('Add', ['D', 'F'], ['H'], name='add2')

graph = helper.make_graph([matmul_node, matmul_node2, matmul_node3, add, add2], 'test_graph_1', [A], [H], [B_init, E_init, F_init, G_init])
model = helper.make_model(graph)
model = helper.make_model(graph, **{'opset_imports': [helper.make_opsetid('', 13)]})
return model

class TestORTSq(unittest.TestCase):
@classmethod
def setUpClass(self):
self.model = build_onnx_model()
dataset = Datasets("onnxrt_qdq")["dummy_v2"]((5,5), (5,1))
self.dataloader = DATALOADERS['onnxrt_qlinearops'](dataset)

@classmethod
def tearDownClass(self):
shutil.rmtree("./nc_workspace", ignore_errors=True)

def test_sq(self):
sq = ORTSmoothQuant(copy.deepcopy(self.model), self.dataloader)
model = sq.transform(calib_iter=5, scales_per_op=False)
self.assertEqual(len([i for i in model.model.graph.node if i.op_type == 'Mul']), 1)
sq.recover()
self.assertEqual(len(sq.model.nodes()), len(self.model.graph.node))
for init in self.model.graph.initializer:
tensor = numpy_helper.to_array(init)
sq_tensor = numpy_helper.to_array(sq.model.get_initializer(init.name))
self.assertAlmostEqual(tensor[0][0], sq_tensor[0][0], 4)

sq = ORTSmoothQuant(copy.deepcopy(self.model), self.dataloader)
model = sq.transform(calib_iter=5, folding=False, scales_per_op=False)
self.assertEqual(len([i for i in model.model.graph.node if i.op_type == 'Mul']), 2)
sq.recover()
self.assertEqual(len(sq.model.nodes()), len(self.model.graph.node))
for init in self.model.graph.initializer:
tensor = numpy_helper.to_array(init)
sq_tensor = numpy_helper.to_array(sq.model.get_initializer(init.name))
self.assertAlmostEqual(tensor[0][0], sq_tensor[0][0], 4)

sq = ORTSmoothQuant(copy.deepcopy(self.model), self.dataloader)
model = sq.transform(calib_iter=5, folding=False, scales_per_op=True)
self.assertEqual(len([i for i in model.model.graph.node if i.op_type == 'Mul']), 3)
sq.recover()
self.assertEqual(len(sq.model.nodes()), len(self.model.graph.node))
for init in self.model.graph.initializer:
tensor = numpy_helper.to_array(init)
sq_tensor = numpy_helper.to_array(sq.model.get_initializer(init.name))
self.assertAlmostEqual(tensor[0][0], sq_tensor[0][0], 4)

sq = ORTSmoothQuant(copy.deepcopy(self.model), self.dataloader)
model = sq.transform(calib_iter=5, scales_per_op=True)
self.assertEqual(len([i for i in model.model.graph.node if i.op_type == 'Mul']), 3)
sq.recover()
self.assertEqual(len(sq.model.nodes()), len(self.model.graph.node))
for init in self.model.graph.initializer:
tensor = numpy_helper.to_array(init)
sq_tensor = numpy_helper.to_array(sq.model.get_initializer(init.name))
self.assertAlmostEqual(tensor[0][0], sq_tensor[0][0], 4)

sq = ORTSmoothQuant(copy.deepcopy(self.model), self.dataloader)
model = sq.transform(calib_iter=5, scales_per_op=True, alpha='auto')
self.assertEqual(len([i for i in model.model.graph.node if i.op_type == 'Mul']), 3)
sq.recover()
self.assertEqual(len(sq.model.nodes()), len(self.model.graph.node))
for init in self.model.graph.initializer:
tensor = numpy_helper.to_array(init)
sq_tensor = numpy_helper.to_array(sq.model.get_initializer(init.name))
self.assertAlmostEqual(tensor[0][0], sq_tensor[0][0], 4)


sq = ORTSmoothQuant(copy.deepcopy(self.model), self.dataloader)
model = sq.transform(calib_iter=5, alpha='auto', scales_per_op=False)
self.assertEqual(len([i for i in model.model.graph.node if i.op_type == 'Mul']), 1)
sq.recover()
self.assertEqual(len(sq.model.nodes()), len(self.model.graph.node))
for init in self.model.graph.initializer:
tensor = numpy_helper.to_array(init)
sq_tensor = numpy_helper.to_array(sq.model.get_initializer(init.name))
self.assertAlmostEqual(tensor[0][0], sq_tensor[0][0], 4)

class TestSqDepthwiseConv(unittest.TestCase):
@classmethod
Expand Down
150 changes: 150 additions & 0 deletions test/algorithm/test_smooth_quant_onnx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
import onnx
from onnx import onnx_pb as onnx_proto
from onnx import helper, TensorProto, numpy_helper
import copy
import unittest
import numpy as np
import shutil
import torch
from neural_compressor.data import Datasets, DATALOADERS
from neural_compressor.data.dataloaders.pytorch_dataloader import PyTorchDataLoader
from neural_compressor.adaptor.torch_utils.smooth_quant import TorchSmoothQuant
from neural_compressor.adaptor.ox_utils.smooth_quant import ORTSmoothQuant
def build_onnx_model():
A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 5, 5])
C = helper.make_tensor_value_info('C', TensorProto.FLOAT, [1, 5, 2])
H = helper.make_tensor_value_info('H', TensorProto.FLOAT, [1, 5, 2])

g_value = np.random.uniform(low=0.001, high=0.5, size=(25)).astype(np.float32)
G_init = helper.make_tensor('G', TensorProto.FLOAT, [5, 5], g_value.reshape(25).tolist())
matmul_node = onnx.helper.make_node('MatMul', ['A', 'G'], ['C'], name='Matmul')

b_value = np.random.uniform(low=0.001, high=0.5, size=(10)).astype(np.float32)
B_init = helper.make_tensor('B', TensorProto.FLOAT, [5, 2], b_value.reshape(10).tolist())
matmul_node2 = onnx.helper.make_node('MatMul', ['C', 'B'], ['I'], name='Matmul2')

e_value = np.random.uniform(low=0.001, high=0.5, size=(10)).astype(np.float32)
E_init = helper.make_tensor('E', TensorProto.FLOAT, [5, 2], e_value.reshape(10).tolist())
matmul_node3 = onnx.helper.make_node('MatMul', ['C', 'E'], ['K'], name='Matmul3')

add = onnx.helper.make_node('Add', ['I', 'E'], ['D'], name='add')

f_value = np.random.uniform(low=0.001, high=0.5, size=(10)).astype(np.float32)
F_init = helper.make_tensor('F', TensorProto.FLOAT, [5, 2], f_value.reshape(10).tolist())
add2 = onnx.helper.make_node('Add', ['D', 'F'], ['H'], name='add2')

graph = helper.make_graph([matmul_node, matmul_node2, matmul_node3, add, add2], 'test_graph_1', [A], [H], [B_init, E_init, F_init, G_init])
model = helper.make_model(graph)
model = helper.make_model(graph, **{'opset_imports': [helper.make_opsetid('', 13)]})
return model
import copy
import unittest

import numpy as np
import shutil
import torch

from neural_compressor.data import Datasets, DATALOADERS
from neural_compressor.data.dataloaders.pytorch_dataloader import PyTorchDataLoader
from neural_compressor.adaptor.torch_utils.smooth_quant import TorchSmoothQuant

def build_onnx_model():
A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 5, 5])
C = helper.make_tensor_value_info('C', TensorProto.FLOAT, [1, 5, 2])
H = helper.make_tensor_value_info('H', TensorProto.FLOAT, [1, 5, 2])

g_value = np.random.uniform(low=0.001, high=0.5, size=(25)).astype(np.float32)
G_init = helper.make_tensor('G', TensorProto.FLOAT, [5, 5], g_value.reshape(25).tolist())
matmul_node = onnx.helper.make_node('MatMul', ['A', 'G'], ['C'], name='Matmul')

b_value = np.random.uniform(low=0.001, high=0.5, size=(10)).astype(np.float32)
B_init = helper.make_tensor('B', TensorProto.FLOAT, [5, 2], b_value.reshape(10).tolist())
matmul_node2 = onnx.helper.make_node('MatMul', ['C', 'B'], ['I'], name='Matmul2')

e_value = np.random.uniform(low=0.001, high=0.5, size=(10)).astype(np.float32)
E_init = helper.make_tensor('E', TensorProto.FLOAT, [5, 2], e_value.reshape(10).tolist())
matmul_node3 = onnx.helper.make_node('MatMul', ['C', 'E'], ['K'], name='Matmul3')

add = onnx.helper.make_node('Add', ['I', 'E'], ['D'], name='add')

f_value = np.random.uniform(low=0.001, high=0.5, size=(10)).astype(np.float32)
F_init = helper.make_tensor('F', TensorProto.FLOAT, [5, 2], f_value.reshape(10).tolist())
add2 = onnx.helper.make_node('Add', ['D', 'F'], ['H'], name='add2')

graph = helper.make_graph([matmul_node, matmul_node2, matmul_node3, add, add2], 'test_graph_1', [A], [H], [B_init, E_init, F_init, G_init])
model = helper.make_model(graph)
model = helper.make_model(graph, **{'opset_imports': [helper.make_opsetid('', 13)]})
return model

class TestORTSq(unittest.TestCase):
@classmethod
def setUpClass(self):
self.model = build_onnx_model()
dataset = Datasets("onnxrt_qdq")["dummy_v2"]((5,5), (5,1))
self.dataloader = DATALOADERS['onnxrt_qlinearops'](dataset)

@classmethod
def tearDownClass(self):
shutil.rmtree("./nc_workspace", ignore_errors=True)

def test_sq(self):
sq = ORTSmoothQuant(copy.deepcopy(self.model), self.dataloader)
model = sq.transform(calib_iter=5)
self.assertEqual(len([i for i in model.model.graph.node if i.op_type == 'Mul']), 1)
sq.recover()
self.assertEqual(len(sq.model.nodes()), len(self.model.graph.node))
for init in self.model.graph.initializer:
tensor = numpy_helper.to_array(init)
sq_tensor = numpy_helper.to_array(sq.model.get_initializer(init.name))
self.assertAlmostEqual(tensor[0][0], sq_tensor[0][0], 4)

sq = ORTSmoothQuant(copy.deepcopy(self.model), self.dataloader)
model = sq.transform(calib_iter=5, folding=False)
self.assertEqual(len([i for i in model.model.graph.node if i.op_type == 'Mul']), 2)
sq.recover()
self.assertEqual(len(sq.model.nodes()), len(self.model.graph.node))
for init in self.model.graph.initializer:
tensor = numpy_helper.to_array(init)
sq_tensor = numpy_helper.to_array(sq.model.get_initializer(init.name))
self.assertAlmostEqual(tensor[0][0], sq_tensor[0][0], 4)

sq = ORTSmoothQuant(copy.deepcopy(self.model), self.dataloader)
model = sq.transform(calib_iter=5, folding=False, scales_per_op=True)
self.assertEqual(len([i for i in model.model.graph.node if i.op_type == 'Mul']), 3)
sq.recover()
self.assertEqual(len(sq.model.nodes()), len(self.model.graph.node))
for init in self.model.graph.initializer:
tensor = numpy_helper.to_array(init)
sq_tensor = numpy_helper.to_array(sq.model.get_initializer(init.name))
self.assertAlmostEqual(tensor[0][0], sq_tensor[0][0], 4)

sq = ORTSmoothQuant(copy.deepcopy(self.model), self.dataloader)
model = sq.transform(calib_iter=5, scales_per_op=True)
self.assertEqual(len([i for i in model.model.graph.node if i.op_type == 'Mul']), 3)
sq.recover()
self.assertEqual(len(sq.model.nodes()), len(self.model.graph.node))
for init in self.model.graph.initializer:
tensor = numpy_helper.to_array(init)
sq_tensor = numpy_helper.to_array(sq.model.get_initializer(init.name))
self.assertAlmostEqual(tensor[0][0], sq_tensor[0][0], 4)

sq = ORTSmoothQuant(copy.deepcopy(self.model), self.dataloader)
model = sq.transform(calib_iter=5, scales_per_op=True, alpha='auto')
self.assertEqual(len([i for i in model.model.graph.node if i.op_type == 'Mul']), 3)
sq.recover()
self.assertEqual(len(sq.model.nodes()), len(self.model.graph.node))
for init in self.model.graph.initializer:
tensor = numpy_helper.to_array(init)
sq_tensor = numpy_helper.to_array(sq.model.get_initializer(init.name))
self.assertAlmostEqual(tensor[0][0], sq_tensor[0][0], 4)


sq = ORTSmoothQuant(copy.deepcopy(self.model), self.dataloader)
model = sq.transform(calib_iter=5, alpha='auto')
self.assertEqual(len([i for i in model.model.graph.node if i.op_type == 'Mul']), 1)
sq.recover()
self.assertEqual(len(sq.model.nodes()), len(self.model.graph.node))
for init in self.model.graph.initializer:
tensor = numpy_helper.to_array(init)
sq_tensor = numpy_helper.to_array(sq.model.get_initializer(init.name))
self.assertAlmostEqual(tensor[0][0], sq_tensor[0][0], 4)

0 comments on commit 5c04acd

Please sign in to comment.