From 4dbe9b24b47637a78199afc0ec4a152770fd6699 Mon Sep 17 00:00:00 2001 From: irexyc Date: Thu, 1 Jun 2023 01:33:35 +0800 Subject: [PATCH 1/3] fix satrn onnxruntime batch inference --- .../mmocr/models/text_recognition/__init__.py | 2 + .../models/text_recognition/nrtr_decoder.py | 36 ++++++++++++++++ .../models/text_recognition/satrn_encoder.py | 42 +++++++++++++++++++ 3 files changed, 80 insertions(+) create mode 100644 mmdeploy/codebase/mmocr/models/text_recognition/nrtr_decoder.py create mode 100644 mmdeploy/codebase/mmocr/models/text_recognition/satrn_encoder.py diff --git a/mmdeploy/codebase/mmocr/models/text_recognition/__init__.py b/mmdeploy/codebase/mmocr/models/text_recognition/__init__.py index b2991af218..e08cd24191 100644 --- a/mmdeploy/codebase/mmocr/models/text_recognition/__init__.py +++ b/mmdeploy/codebase/mmocr/models/text_recognition/__init__.py @@ -4,6 +4,8 @@ from . import crnn_decoder # noqa: F401,F403 from . import encoder_decoder_recognizer # noqa: F401,F403 from . import lstm_layer # noqa: F401,F403 +from . import nrtr_decoder # noqa: F401,F403 from . import sar_decoder # noqa: F401,F403 from . import sar_encoder # noqa: F401,F403 +from . import satrn_encoder # noqa: F401,F403 from . import transformer_module # noqa: F401,F403 diff --git a/mmdeploy/codebase/mmocr/models/text_recognition/nrtr_decoder.py b/mmdeploy/codebase/mmocr/models/text_recognition/nrtr_decoder.py new file mode 100644 index 0000000000..3291b2c8bb --- /dev/null +++ b/mmdeploy/codebase/mmocr/models/text_recognition/nrtr_decoder.py @@ -0,0 +1,36 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import math +from typing import Sequence + +import torch + +from mmdeploy.core import FUNCTION_REWRITER + + +@FUNCTION_REWRITER.register_rewriter( + func_name='mmocr.models.textrecog.NRTRDecoder._get_source_mask') +def nrtr_decoder___get_source_mask( + self, src_seq: torch.Tensor, + valid_ratios: Sequence[float]) -> torch.Tensor: + """Generate mask for source sequence. + + Args: + src_seq (torch.Tensor): Image sequence. Shape :math:`(N, T, C)`. + valid_ratios (list[float]): The valid ratio of input image. For + example, if the width of the original image is w1 and the width + after padding is w2, then valid_ratio = w1/w2. Source mask is + used to cover the area of the padding region. + + Returns: + Tensor or None: Source mask. Shape :math:`(N, T)`. The region of + padding area are False, and the rest are True. + """ + + N, T, _ = src_seq.size() + mask = None + if len(valid_ratios) > 0: + mask = src_seq.new_zeros((N, T), device=src_seq.device) + valid_width = min(T, math.ceil(T * valid_ratios[0])) + mask[:, :valid_width] = 1 + + return mask diff --git a/mmdeploy/codebase/mmocr/models/text_recognition/satrn_encoder.py b/mmdeploy/codebase/mmocr/models/text_recognition/satrn_encoder.py new file mode 100644 index 0000000000..c965487442 --- /dev/null +++ b/mmdeploy/codebase/mmocr/models/text_recognition/satrn_encoder.py @@ -0,0 +1,42 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import math +from typing import List + +from mmocr.structures import TextRecogDataSample +from torch import Tensor + +from mmdeploy.core import FUNCTION_REWRITER + + +@FUNCTION_REWRITER.register_rewriter( + func_name='mmocr.models.textrecog.SATRNEncoder.forward') +def satrn_encoder__forward( + self, + feat: Tensor, + data_samples: List[TextRecogDataSample] = None) -> Tensor: + """Forward propagation of encoder. + + Args: + feat (Tensor): Feature tensor of shape :math:`(N, D_m, H, W)`. + data_samples (list[TextRecogDataSample]): Batch of + TextRecogDataSample, containing `valid_ratio` information. + Defaults to None. + + Returns: + Tensor: A tensor of shape :math:`(N, T, D_m)`. + """ + valid_ratio = 1.0 + feat = self.position_enc(feat) + n, c, h, w = feat.size() + mask = feat.new_zeros((n, h, w)) + valid_width = min(w, math.ceil(w * valid_ratio)) + mask[:, :, :valid_width] = 1 + mask = mask.view(n, h * w) + feat = feat.view(n, c, h * w) + + output = feat.permute(0, 2, 1).contiguous() + for enc_layer in self.layer_stack: + output = enc_layer(output, h, w, mask) + output = self.layer_norm(output) + + return output From 1f9c4edb595500aad604a10a1ffb389996c60882 Mon Sep 17 00:00:00 2001 From: irexyc Date: Fri, 9 Jun 2023 17:58:25 +0800 Subject: [PATCH 2/3] add ut --- .../test_mmocr/test_mmocr_models.py | 80 ++++++++++++++++++- 1 file changed, 79 insertions(+), 1 deletion(-) diff --git a/tests/test_codebase/test_mmocr/test_mmocr_models.py b/tests/test_codebase/test_mmocr/test_mmocr_models.py index 1d07d5f9e6..e56584121d 100644 --- a/tests/test_codebase/test_mmocr/test_mmocr_models.py +++ b/tests/test_codebase/test_mmocr/test_mmocr_models.py @@ -10,7 +10,8 @@ from mmdeploy.core import RewriterContext, patch_model from mmdeploy.utils import Backend, Codebase from mmdeploy.utils.config_utils import load_config -from mmdeploy.utils.test import (WrapModel, check_backend, get_model_outputs, +from mmdeploy.utils.test import (WrapModel, check_backend, get_backend_outputs, + get_model_outputs, get_onnx_model, get_rewrite_outputs) try: @@ -155,6 +156,83 @@ def test_bidirectionallstm(backend: Backend): assert rewrite_outputs is not None +@pytest.mark.parametrize('backend', [Backend.ONNXRUNTIME]) +def test_nrtr_decoder__get_source_mask(backend: Backend): + from mmocr.models.textrecog import NRTRDecoder + deploy_cfg = mmengine.Config( + dict( + onnx_config=dict( + input_names=['input'], + output_names=['output'], + input_shape=None, + dynamic_axes={ + 'input': { + 0: 'batch', + }, + 'output': { + 0: 'batch', + } + }), + backend_config=dict(type=backend.value, model_inputs=None), + codebase_config=dict(type='mmocr', task='TextRecognition'))) + src_seq = torch.rand(1, 200, 256) + batch_src_seq = src_seq.expand(3, 200, 256) + decoder = NRTRDecoder( + dictionary=dict( + type='Dictionary', + dict_file='tests/test_codebase/test_mmocr/' + 'data/lower_english_digits.txt', + with_start=True, + with_end=True, + same_start_end=True, + with_padding=True, + with_unknown=True)) + + wrapped_model = WrapModel(decoder, '_get_source_mask') + model_inputs = {'src_seq': src_seq, 'valid_ratios': torch.Tensor([1.0])} + batch_model_inputs = {'input': batch_src_seq} + ir_file_path = get_onnx_model(wrapped_model, model_inputs, deploy_cfg) + backend_outputs = get_backend_outputs(ir_file_path, batch_model_inputs, + deploy_cfg)[0].numpy() + # batch results should be same + assert np.sum(backend_outputs[0] == backend_outputs[1]) and np.sum( + backend_outputs[1] == backend_outputs[2]) + + +@pytest.mark.parametrize('backend', [Backend.ONNXRUNTIME]) +def test_satrn_encoder__get_source_mask(backend: Backend): + from mmocr.models.textrecog import SATRNEncoder + + deploy_cfg = mmengine.Config( + dict( + onnx_config=dict( + input_names=['input'], + output_names=['output'], + input_shape=None, + dynamic_axes={ + 'input': { + 0: 'batch', + }, + 'output': { + 0: 'batch', + } + }), + backend_config=dict(type=backend.value, model_inputs=None), + codebase_config=dict(type='mmocr', task='TextRecognition'))) + encoder = SATRNEncoder(d_k=4, d_v=4, d_model=32, d_inner=32 * 4) + feat = torch.randn(1, 32, 32, 32) + batch_feat = feat.expand(3, 32, 32, 32) + wrapped_model = WrapModel(encoder, 'forward') + model_inputs = {'feat': feat} + batch_model_inputs = {'input': batch_feat} + ir_file_path = get_onnx_model(wrapped_model, model_inputs, deploy_cfg) + backend_outputs = get_backend_outputs(ir_file_path, batch_model_inputs, + deploy_cfg)[0].numpy() + # batch results should be same + assert np.sum(backend_outputs[0] == backend_outputs[1]) and np.sum( + backend_outputs[1] == backend_outputs[2]) + + @pytest.mark.parametrize('backend', [Backend.ONNXRUNTIME]) def test_simple_test_of_single_stage_text_detector(backend: Backend): """Test simple_test single_stage_text_detector.""" From 08433188ec8d3dc2ea36bd9a7317aa6326b4f426 Mon Sep 17 00:00:00 2001 From: irexyc Date: Fri, 9 Jun 2023 18:11:40 +0800 Subject: [PATCH 3/3] better compare --- tests/test_codebase/test_mmocr/test_mmocr_models.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/test_codebase/test_mmocr/test_mmocr_models.py b/tests/test_codebase/test_mmocr/test_mmocr_models.py index e56584121d..565e665977 100644 --- a/tests/test_codebase/test_mmocr/test_mmocr_models.py +++ b/tests/test_codebase/test_mmocr/test_mmocr_models.py @@ -194,9 +194,10 @@ def test_nrtr_decoder__get_source_mask(backend: Backend): ir_file_path = get_onnx_model(wrapped_model, model_inputs, deploy_cfg) backend_outputs = get_backend_outputs(ir_file_path, batch_model_inputs, deploy_cfg)[0].numpy() + num_elements = np.prod(backend_outputs.shape[1:]) # batch results should be same - assert np.sum(backend_outputs[0] == backend_outputs[1]) and np.sum( - backend_outputs[1] == backend_outputs[2]) + assert np.sum(backend_outputs[0] == backend_outputs[1]) == num_elements \ + and np.sum(backend_outputs[1] == backend_outputs[2]) == num_elements @pytest.mark.parametrize('backend', [Backend.ONNXRUNTIME]) @@ -228,9 +229,10 @@ def test_satrn_encoder__get_source_mask(backend: Backend): ir_file_path = get_onnx_model(wrapped_model, model_inputs, deploy_cfg) backend_outputs = get_backend_outputs(ir_file_path, batch_model_inputs, deploy_cfg)[0].numpy() + num_elements = np.prod(backend_outputs.shape[1:]) # batch results should be same - assert np.sum(backend_outputs[0] == backend_outputs[1]) and np.sum( - backend_outputs[1] == backend_outputs[2]) + assert np.sum(backend_outputs[0] == backend_outputs[1]) == num_elements \ + and np.sum(backend_outputs[1] == backend_outputs[2]) == num_elements @pytest.mark.parametrize('backend', [Backend.ONNXRUNTIME])