From 9b529388bf3a6589e2a25cd4c6391c11d63b2b93 Mon Sep 17 00:00:00 2001 From: lvliang-intel Date: Wed, 21 Sep 2022 18:56:40 +0800 Subject: [PATCH] Fix transformer_lt_mlperf accuracy drop issue (#1248) --- .../tf_utils/quantize_graph/qdq/fuse_qdq_matmul.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_matmul.py b/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_matmul.py index fd025995737..6c1f4ea8a6b 100644 --- a/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_matmul.py +++ b/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_matmul.py @@ -32,6 +32,17 @@ def __init__(self, **kwargs): self.sorted_patterns = sorted(self.patterns, key=lambda i: len(i), reverse=True) + # TODO Remove this when TFDO supports output_quantization_mode 'MIN_FIRST' + # Root cause of the transformer_lt_mlperf model accuracy drop: + # MatMul + Relu fusion ==> the output quantization mode only can be set to 'SCALED', + # if the input_quantization_mode of the next _QuantizedMatMul is set to 'MIN_FIRST'. + # the mismatch will cause the accrucy drop. + if not self.performance_only: + if ['Dequantize', 'MatMul', 'Relu', 'QuantizeV2'] in self.sorted_patterns: + self.sorted_patterns.remove(['Dequantize', 'MatMul', 'Relu', 'QuantizeV2']) + if ['Dequantize', 'MatMul', 'BiasAdd', 'Relu', 'QuantizeV2'] in self.sorted_patterns: + self.sorted_patterns.remove(['Dequantize', 'MatMul', 'BiasAdd', 'Relu', 'QuantizeV2']) + self.exclude_matmul_nodes = [] self.fusion_op_type = set(fusion[1] for fusion in self.patterns) self.fusion_mapping = {