Skip to content

Commit

Permalink
support Conv [BiasAdd, Sum, LeakyRelu] int8 (#1118)
Browse files Browse the repository at this point in the history
  • Loading branch information
lvliang-intel authored Aug 16, 2022
1 parent a6f7476 commit c72453e
Show file tree
Hide file tree
Showing 6 changed files with 132 additions and 60 deletions.
2 changes: 2 additions & 0 deletions neural_compressor/adaptor/inteltensorflow.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,8 @@
'Dequantize + Conv2D + BiasAdd + Relu6 + QuantizeV2',
'Dequantize + Conv2D + Relu6 + QuantizeV2',
'Dequantize + Conv2D + BiasAdd + LeakyRelu + QuantizeV2',
'Dequantize + Conv2D + BiasAdd + Add + LeakyRelu + QuantizeV2',
'Dequantize + Conv2D + BiasAdd + AddV2 + LeakyRelu + QuantizeV2',
'Dequantize + Conv2D + LeakyRelu + QuantizeV2',
'Dequantize + Conv2D + BiasAdd + Sigmoid + QuantizeV2',
'Dequantize + Conv2D + Sigmoid + QuantizeV2',
Expand Down
2 changes: 1 addition & 1 deletion neural_compressor/adaptor/tf_utils/graph_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -556,7 +556,7 @@ def _freeze_requantization_ranges(self, additional_data=None):
self._tmp_graph_def = ScaleProPagationTransformer(
self._tmp_graph_def).do_transformation()

if debug:
if debug and not self.new_api:
self._tmp_graph_def.library.CopyFrom(self.model.graph_def.library)
self._tmp_model.graph_def = self._tmp_graph_def
self._tmp_model.save(self._int8_frozen_range_model_path)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ class FuseConvRequantizeTransformer(GraphRewriterBase):
fuse_sum_op_types = (
[b'BiasAdd', b'Sum'],
[b'BiasAdd', b'Sum', b'Relu'],
[b'BiasAdd', b'Sum', b'LeakyRelu'],
[b'BiasAdd', b'Relu', b'Sum'],
[b'BiasAdd', b'LeakyRelu', b'Sum']
)
Expand Down Expand Up @@ -346,14 +347,6 @@ def do_transformation(self):
dtypes.float32.as_datatype_enum,
dtypes.float32.as_datatype_enum,
])
Helper.set_attr_type_list(new_node, 'Thost_outputs', [
requantize_node.attr['out_type'].type,
dtypes.float32.as_datatype_enum,
dtypes.float32.as_datatype_enum ])
Helper.set_attr_dtype(new_node, "out_type", \
dtype_map_dict[requantize_node.attr['out_type'].type])
Helper.set_attr_dtype(new_node, "Tsummand", \
dtype_map_dict[requantize_node.attr['out_type'].type])
elif str(quantized_node.attr['fused_ops'].list.s) == str([b"BiasAdd"]):
Helper.set_attr_string_list(new_node, 'fused_ops', [b'BiasAdd', b'Requantize'])
Helper.set_attr_type_list(new_node, 'Thost_inputs', [
Expand All @@ -368,14 +361,6 @@ def do_transformation(self):
dtypes.float32.as_datatype_enum,
dtypes.float32.as_datatype_enum,
])
Helper.set_attr_type_list(new_node, 'Thost_outputs', [
requantize_node.attr['out_type'].type,
dtypes.float32.as_datatype_enum,
dtypes.float32.as_datatype_enum ])
Helper.set_attr_dtype(new_node, "out_type", \
dtype_map_dict[requantize_node.attr['out_type'].type])
Helper.set_attr_dtype(new_node, "Tsummand", \
dtype_map_dict[requantize_node.attr['out_type'].type])
elif str(quantized_node.attr['fused_ops'].list.s) == str([b"BiasAdd", b"Relu"]):
Helper.set_attr_string_list(new_node, 'fused_ops', [b'BiasAdd', b'Relu', b'Requantize'])
Helper.set_attr_type_list(new_node, 'Thost_inputs', [
Expand All @@ -390,14 +375,15 @@ def do_transformation(self):
dtypes.float32.as_datatype_enum,
dtypes.float32.as_datatype_enum,
])
Helper.set_attr_type_list(new_node, 'Thost_outputs', [
requantize_node.attr['out_type'].type,
dtypes.float32.as_datatype_enum,
dtypes.float32.as_datatype_enum ])
Helper.set_attr_dtype(new_node, "out_type", \
dtype_map_dict[requantize_node.attr['out_type'].type])
Helper.set_attr_dtype(new_node, "Tsummand", \
dtype_map_dict[requantize_node.attr['out_type'].type])

Helper.set_attr_type_list(new_node, 'Thost_outputs', [
requantize_node.attr['out_type'].type,
dtypes.float32.as_datatype_enum,
dtypes.float32.as_datatype_enum ])
Helper.set_attr_dtype(new_node, "out_type", \
dtype_map_dict[requantize_node.attr['out_type'].type])
Helper.set_attr_dtype(new_node, "Tsummand", \
dtype_map_dict[requantize_node.attr['out_type'].type])

if quantized_node.op == "QuantizedConv2D" or \
quantized_node.op == "QuantizedConv2DWithBias" or \
Expand Down Expand Up @@ -525,43 +511,28 @@ def do_transformation(self):
else dtypes.qint8)
if str(quantized_node.attr['fused_ops'].list.s) == str([b'BiasAdd', b'Sum', b'Relu']):
self.fused_ops = [b'BiasAdd', b'Sum', b'Relu', b'Requantize']
Helper.set_attr_type_list(new_node, 'Thost_outputs', [
requantize_node.attr['out_type'].type,
dtypes.float32.as_datatype_enum,
dtypes.float32.as_datatype_enum ])
Helper.set_attr_dtype(new_node, "out_type", \
dtype_map_dict[requantize_node.attr['out_type'].type])
elif str(quantized_node.attr['fused_ops'].list.s) == str([b'BiasAdd', b'Sum', b'LeakyRelu']):
self.fused_ops = [b'BiasAdd', b'Sum', b'LeakyRelu', b'Requantize']
elif str(quantized_node.attr['fused_ops'].list.s) == str([b'BiasAdd', b'LeakyRelu', b'Sum']):
self.fused_ops = [b'BiasAdd', b'LeakyRelu', b'Sum', b'Requantize']
elif str(quantized_node.attr['fused_ops'].list.s) == str([b'BiasAdd', b'Relu', b'Sum']):
self.fused_ops = [b'BiasAdd', b'Relu', b'Sum', b'Requantize']
Helper.set_attr_type_list(new_node, 'Thost_outputs', [
requantize_node.attr['out_type'].type,
dtypes.float32.as_datatype_enum,
dtypes.float32.as_datatype_enum ])
Helper.set_attr_dtype(new_node, "out_type", \
dtype_map_dict[requantize_node.attr['out_type'].type])
elif str(quantized_node.attr['fused_ops'].list.s) == str([b'BiasAdd', b'LeakyRelu', b'Sum']):
self.fused_ops = [b'BiasAdd', b'LeakyRelu', b'Sum', b'Requantize']
Helper.set_attr_type_list(new_node, 'Thost_outputs', [
requantize_node.attr['out_type'].type,
dtypes.float32.as_datatype_enum,
dtypes.float32.as_datatype_enum ])
Helper.set_attr_dtype(new_node, "out_type", \
dtype_map_dict[requantize_node.attr['out_type'].type])
#Current fusion requires summand has same dtype as output if output is qint8
Helper.set_attr_dtype(new_node, "Tsummand", \
dtype_map_dict[requantize_node.attr['out_type'].type])

elif str(quantized_node.attr['fused_ops'].list.s) == str([b'BiasAdd', b'Sum']):
self.fused_ops = [b'BiasAdd', b'Sum', b'Requantize']
Helper.set_attr_type_list(new_node, 'Thost_outputs', [
requantize_node.attr['out_type'].type,
dtypes.float32.as_datatype_enum,
dtypes.float32.as_datatype_enum ])
Helper.set_attr_dtype(new_node, "out_type", \
dtype_map_dict[requantize_node.attr['out_type'].type])
#Current fusion requires summand has same dtype as output if output is qint8
Helper.set_attr_dtype(new_node, "Tsummand", \
dtype_map_dict[requantize_node.attr['out_type'].type])
Helper.set_attr_type_list(new_node, 'Thost_outputs', [
requantize_node.attr['out_type'].type,
dtypes.float32.as_datatype_enum,
dtypes.float32.as_datatype_enum ])
Helper.set_attr_dtype(new_node, "out_type", \
dtype_map_dict[requantize_node.attr['out_type'].type])
Helper.set_attr_string_list(new_node, 'fused_ops', self.fused_ops)

if not self.new_api:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ def __init__(self, **kwargs):
'DequantizeConv2DBiasAddSigmoidQuantizeV2': self.apply_newly_conv_biasadd_relu_fusion,
'DequantizeConv2DSigmoidQuantizeV2': self.apply_newly_conv_biasadd_relu_fusion,
'DequantizeConv2DBiasAddLeakyReluAddV2QuantizeV2': self.apply_newly_conv_biasadd_addn_relu_fusion,
'DequantizeConv2DBiasAddAddLeakyReluQuantizeV2': self.apply_newly_conv_biasadd_addn_relu_fusion,
'DequantizeConv2DBiasAddAddV2LeakyReluQuantizeV2': self.apply_newly_conv_biasadd_addn_relu_fusion,
'DequantizeConv2DLeakyReluAddV2QuantizeV2': self.apply_newly_conv_biasadd_addn_relu_fusion,
'DequantizeConv2DAddRelu6QuantizeV2': self.apply_newly_conv_biasadd_relu_fusion,
'DequantizeConv2DAddReluQuantizeV2': self.apply_newly_conv_biasadd_relu_fusion,
Expand Down Expand Up @@ -1189,8 +1191,9 @@ def apply_newly_conv_biasadd_addn_relu_fusion(self, match_node_name):
return self.apply_newly_conv_biasadd_fusion(match_node_name[:3] + [match_node_name[-1]])

forth_node = self.node_name_mapping[match_node_name[4]].node
if third_node.op != 'LeakyRelu' and not self._find_relu_node(matched_node.node):
return self.apply_newly_conv_biasadd_fusion(match_node_name[:3] + [match_node_name[-1]])
if forth_node.op != 'LeakyRelu':
if third_node.op != 'LeakyRelu' and not self._find_relu_node(matched_node.node):
return self.apply_newly_conv_biasadd_fusion(match_node_name[:3])

is_leakyrelu_add_fusion = third_node.op == 'LeakyRelu' and forth_node.op.find('Add') != -1
is_relu_add_fusion = third_node.op == 'Relu' and forth_node.op.find('Add') != -1
Expand Down Expand Up @@ -1227,6 +1230,7 @@ def apply_newly_conv_biasadd_addn_relu_fusion(self, match_node_name):
else:
relu_node_name = match_node_name[4]
is_relu6 = self.node_name_mapping[relu_node_name].node.op == "Relu6"
is_leakyrelu = self.node_name_mapping[relu_node_name].node.op == "LeakyRelu"

sum_index = 1 if match_node_name[2 + relu_offset] == self.node_name_mapping[
match_node_name[3 + relu_offset]].node.input[0] else 0
Expand Down Expand Up @@ -1265,7 +1269,10 @@ def apply_newly_conv_biasadd_addn_relu_fusion(self, match_node_name):
if "alpha" in self.node_name_mapping[relu_node_name].node.attr:
helper.copy_attr(quantized_conv_node, "alpha",
self.node_name_mapping[relu_node_name].node.attr["alpha"])
helper.set_attr_string_list(quantized_conv_node, 'fused_ops', [b'BiasAdd', b'Sum', b'Relu'])
if is_leakyrelu:
helper.set_attr_string_list(quantized_conv_node, 'fused_ops', [b'BiasAdd', b'Sum', b'LeakyRelu'])
else:
helper.set_attr_string_list(quantized_conv_node, 'fused_ops', [b'BiasAdd', b'Sum', b'Relu'])
helper.set_attr_dtype(quantized_conv_node, "Tbias", dtypes.float32)
# if self.device == 'gpu' else dtypes.qint32)
helper.set_attr_dtype(quantized_conv_node, "Tsummand", dtypes.qint32)
Expand All @@ -1291,7 +1298,7 @@ def apply_newly_conv_biasadd_addn_relu_fusion(self, match_node_name):

self.add_output_graph_node(quantized_conv_node)

if is_leakyrelu_add_fusion:
if is_leakyrelu_add_fusion or is_leakyrelu:
quantize_down_name = self._add_quantize_down_nodes(
node, quantized_node_name, dtypes.qint8, False)
self._intel_cpu_add_dequantize_result_node(
Expand Down
7 changes: 1 addition & 6 deletions test/tfnewapi/test_tensorflow_graph_qdq_conv_fusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,9 @@
import tensorflow as tf
import logging

from neural_compressor.adaptor.tf_utils.quantize_graph.quantize_graph_for_intel_cpu import QuantizeGraphForIntel
from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.strip_unused_nodes import StripUnusedNodesOptimizer
from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.fold_batch_norm import FoldBatchNormNodesOptimizer
from tensorflow.python.framework import graph_util
from tensorflow.python.framework import function
from neural_compressor.adaptor.tensorflow import TensorflowQuery
from neural_compressor.adaptor.tf_utils.util import disable_random
from pkg_resources import parse_version

def build_fake_yaml():
fake_yaml = '''
Expand Down Expand Up @@ -326,7 +321,7 @@ def test_conv_biasadd_addv2_relu_fallback_fusion_1(self):
found_conv_fusion = True
break
self.assertEqual(found_conv_fusion, True)

@disable_random()
def test_conv_fusion_with_last_conv(self):
logging.getLogger().info("test_conv_fusion_with_last_conv")
Expand Down
97 changes: 97 additions & 0 deletions test/tfnewapi/test_tensorflow_graph_qdq_new_conv_fusion.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
#
# -*- coding: utf-8 -*-
#
import unittest
import os
import yaml
import tensorflow as tf
import logging

from tensorflow.python.framework import graph_util
from tensorflow.python.framework import function
from neural_compressor.adaptor.tf_utils.util import disable_random

def build_fake_yaml():
fake_yaml = '''
model:
name: fake_yaml
framework: inteltensorflow
inputs: input
device: cpu
quantization:
model_wise:
weight:
granularity: per_tensor
scheme: sym
dtype: int8
algorithm: minmax
evaluation:
accuracy:
metric:
topk: 1
tuning:
strategy:
name: basic
accuracy_criterion:
relative: 0.1
exit_policy:
performance_only: True
workspace:
path: saved
'''

y = yaml.load(fake_yaml, Loader=yaml.SafeLoader)

with open('fake_yaml.yaml', "w", encoding="utf-8") as f:
yaml.dump(y, f)

f.close()


class TestTensorflowNewQdqConvFusion(unittest.TestCase):

@classmethod
def setUpClass(self):
build_fake_yaml()

@classmethod
def tearDownClass(self):
os.remove('fake_yaml.yaml')

@disable_random()
def test_conv_biasadd_add_leakyrelu_fusion(self):
logging.getLogger().info("test_conv_biasadd_add_leakyrelu_fusion")
x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input")
conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16],
initializer=tf.compat.v1.random_normal_initializer())
conv = tf.nn.conv2d(x, conv_weights, strides=[1, 2, 2, 1], padding="SAME")
normed = tf.compat.v1.layers.batch_normalization(conv)
conv2_weights = tf.compat.v1.get_variable("weight_conv2", [3, 3, 16, 16],
initializer=tf.compat.v1.random_normal_initializer())
conv2 = tf.nn.conv2d(x, conv2_weights, strides=[1, 2, 2, 1], padding="SAME")
sumadd = tf.raw_ops.AddV2(x=normed, y=conv2, name='addv2')
leaky_relu = tf.nn.leaky_relu(sumadd, name='op_to_store')

out_name = leaky_relu.name.split(':')[0]
with tf.compat.v1.Session() as sess:
sess.run(tf.compat.v1.global_variables_initializer())
output_graph_def = graph_util.convert_variables_to_constants(
sess=sess,
input_graph_def=sess.graph_def,
output_node_names=[out_name])
from neural_compressor.experimental import Quantization, common
quantizer = Quantization('fake_yaml.yaml')
dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True)
quantizer.eval_dataloader = common.DataLoader(dataset)
quantizer.calib_dataloader = common.DataLoader(dataset)
quantizer.model = output_graph_def
output_graph = quantizer.fit()
found_conv_fusion = True

for i in output_graph.graph_def.node:
if i.op == 'LeakyRelu':
found_conv_fusion = False
break

if __name__ == '__main__':
unittest.main()

0 comments on commit c72453e

Please sign in to comment.