Skip to content

Commit

Permalink
[Feature] Support more MMOCR models: DBNet++, TextSnake, ABINet, MRCNN (
Browse files Browse the repository at this point in the history
#1534)

* WIP ocr

* add mrcnn rewrite

* add any rewrite for abinet

* export abinet to onnx

* fix abinet onnx export

* support abinet to tensorrt static and modify mmocr.yml

* add textsnake and dbnetpp

* support mrcnn in ORT and TRT

* add a condition before update data_preprocessor scope

* update doc and mmocr.yml

* add ut

* markdown and simple config

* write build_pytorch_model in child class

* update any_default

* remove where in abi_language_decoder___get_length__default

* keep where

* fix UT

* fix UT

* fix UT

* update mmocr.yml and config description

* tensorrt-fp32 -> tensorrt

* update doc
  • Loading branch information
AllentDan authored Jan 16, 2023
1 parent 7e66cfc commit ff95bf4
Show file tree
Hide file tree
Showing 40 changed files with 1,246 additions and 67 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
_base_ = ['./text-detection_static.py', '../../_base_/backends/onnxruntime.py']
onnx_config = dict(
output_names=['dets', 'labels', 'masks'],
dynamic_axes=dict(
input=dict({
0: 'batch',
2: 'height',
3: 'width'
}),
dets=dict({
0: 'batch',
1: 'num_dets'
}),
labels=dict({
0: 'batch',
1: 'num_dets'
}),
masks=dict({
0: 'batch',
1: 'num_dets',
2: 'height',
3: 'width'
})))
codebase_config = dict(
post_processing=dict(
score_threshold=0.05,
confidence_threshold=0.005,
iou_threshold=0.5,
max_output_boxes_per_class=200,
pre_top_k=5000,
keep_top_k=100,
background_label_id=-1,
export_postprocess_mask=False))
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
_base_ = ['./text-detection_mrcnn_tensorrt_dynamic-320x320-2240x2240.py']
backend_config = dict(common_config=dict(fp16_mode=True))
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
_base_ = ['./text-detection_mrcnn_tensorrt_dynamic-320x320-2240x2240.py']

backend_config = dict(common_config=dict(fp16_mode=True, int8_mode=True))

calib_config = dict(create_calib=True, calib_file='calib_data.h5')
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
_base_ = ['./text-detection_static.py', '../../_base_/backends/tensorrt.py']
onnx_config = dict(
output_names=['dets', 'labels', 'masks'],
dynamic_axes=dict(
input=dict({
0: 'batch',
2: 'height',
3: 'width'
}),
dets=dict({
0: 'batch',
1: 'num_dets'
}),
labels=dict({
0: 'batch',
1: 'num_dets'
}),
masks=dict({
0: 'batch',
1: 'num_dets',
2: 'height',
3: 'width'
})))

backend_config = dict(
common_config=dict(max_workspace_size=1 << 30),
model_inputs=[
dict(
input_shapes=dict(
input=dict(
min_shape=[1, 3, 320, 320],
opt_shape=[1, 3, 600, 800],
max_shape=[1, 3, 2240, 2240])))
])

codebase_config = dict(
post_processing=dict(
score_threshold=0.05,
confidence_threshold=0.005,
iou_threshold=0.5,
max_output_boxes_per_class=200,
pre_top_k=5000,
keep_top_k=100,
background_label_id=-1,
export_postprocess_mask=False))
18 changes: 18 additions & 0 deletions configs/mmocr/text-detection/text-detection_mrcnn_torchscript.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
_base_ = [
'../../_base_/torchscript_config.py',
'../../_base_/backends/torchscript.py'
]

ir_config = dict(input_shape=None, output_names=['dets', 'labels', 'masks'])
codebase_config = dict(
type='mmocr',
task='TextDetection',
post_processing=dict(
score_threshold=0.05,
confidence_threshold=0.005,
iou_threshold=0.5,
max_output_boxes_per_class=200,
pre_top_k=5000,
keep_top_k=100,
background_label_id=-1,
export_postprocess_mask=False))
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# 1 channel input for CRNN models
_base_ = [
'./text-recognition_dynamic.py', '../../_base_/backends/tensorrt-fp16.py'
]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# 3 channel and 32 height input for SATRN models
_base_ = [
'./text-recognition_dynamic.py', '../../_base_/backends/tensorrt-fp16.py'
]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# 3 channel and 48 height for SAR models
_base_ = [
'./text-recognition_dynamic.py', '../../_base_/backends/tensorrt-fp16.py'
]
backend_config = dict(
common_config=dict(max_workspace_size=1 << 30),
model_inputs=[
dict(
input_shapes=dict(
input=dict(
min_shape=[1, 3, 48, 64],
opt_shape=[1, 3, 48, 64],
max_shape=[1, 3, 48, 640])))
])
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# 1 channel input for CRNN models
_base_ = [
'./text-recognition_static.py', '../../_base_/backends/tensorrt-fp16.py'
]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# ABINet models use static input 32x128
_base_ = [
'./text-recognition_static.py', '../../_base_/backends/tensorrt-fp16.py'
]

onnx_config = dict(input_shape=[128, 32])
backend_config = dict(
common_config=dict(max_workspace_size=1 << 30),
model_inputs=[
dict(
input_shapes=dict(
input=dict(
min_shape=[1, 3, 32, 128],
opt_shape=[1, 3, 32, 128],
max_shape=[1, 3, 32, 128])))
])
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# 1 channel input for CRNN models
_base_ = [
'./text-recognition_dynamic.py', '../../_base_/backends/tensorrt-int8.py'
]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# 3 channel and 32 height input for SATRN models
_base_ = [
'./text-recognition_dynamic.py', '../../_base_/backends/tensorrt-int8.py'
]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# 3 channel and 48 height for SAR models
_base_ = [
'./text-recognition_dynamic.py', '../../_base_/backends/tensorrt-int8.py'
]
backend_config = dict(
common_config=dict(max_workspace_size=1 << 30),
model_inputs=[
dict(
input_shapes=dict(
input=dict(
min_shape=[1, 3, 48, 64],
opt_shape=[1, 3, 48, 64],
max_shape=[1, 3, 48, 640])))
])
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# 1 channel input for CRNN models
_base_ = [
'./text-recognition_static.py', '../../_base_/backends/tensorrt-int8.py'
]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# ABINet models use static input 32x128
_base_ = [
'./text-recognition_static.py', '../../_base_/backends/tensorrt-int8.py'
]

onnx_config = dict(input_shape=[128, 32])
backend_config = dict(
common_config=dict(max_workspace_size=1 << 30),
model_inputs=[
dict(
input_shapes=dict(
input=dict(
min_shape=[1, 3, 32, 128],
opt_shape=[1, 3, 32, 128],
max_shape=[1, 3, 32, 128])))
])
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# 1 channel input for CRNN models
_base_ = ['./text-recognition_dynamic.py', '../../_base_/backends/tensorrt.py']
backend_config = dict(
common_config=dict(max_workspace_size=1 << 30),
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# 3 channel and 32 height input for SATRN models
_base_ = ['./text-recognition_dynamic.py', '../../_base_/backends/tensorrt.py']
backend_config = dict(
common_config=dict(max_workspace_size=1 << 30),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# 3 channel and 48 height for SAR models
_base_ = ['./text-recognition_dynamic.py', '../../_base_/backends/tensorrt.py']
backend_config = dict(
common_config=dict(max_workspace_size=1 << 30),
model_inputs=[
dict(
input_shapes=dict(
input=dict(
min_shape=[1, 3, 48, 64],
opt_shape=[1, 3, 48, 64],
max_shape=[1, 3, 48, 640])))
])
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# 1 channel input for CRNN models
_base_ = ['./text-recognition_static.py', '../../_base_/backends/tensorrt.py']

onnx_config = dict(input_shape=[32, 32])
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# ABINet models use static input 32x128
_base_ = ['./text-recognition_static.py', '../../_base_/backends/tensorrt.py']

onnx_config = dict(input_shape=[128, 32])
backend_config = dict(
common_config=dict(max_workspace_size=1 << 30),
model_inputs=[
dict(
input_shapes=dict(
input=dict(
min_shape=[1, 3, 32, 128],
opt_shape=[1, 3, 32, 128],
max_shape=[1, 3, 32, 128])))
])
122 changes: 122 additions & 0 deletions docs/en/03-benchmark/benchmark.md
Original file line number Diff line number Diff line change
Expand Up @@ -1178,6 +1178,42 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../
<td align="center">0.7949</td>
<td align="center">0.7950</td>
</tr>
<tr>
<td align="center" rowspan="3"><a href="https://github.com/open-mmlab/mmocr/blob/1.x/configs/textdet/dbnetpp/dbnetpp_resnet50_fpnc_1200e_icdar2015.py">DBNetpp</a></td>
<td align="center" rowspan="3">TextDetection</td>
<td align="center" rowspan="3">ICDAR2015</td>
<td align="center">recall</td>
<td align="center">0.8209</td>
<td align="center">0.8209</td>
<td align="center">0.8209</td>
<td align="center">0.8199</td>
<td align="center">0.8204</td>
<td align="center">0.8204</td>
<td align="center">-</td>
<td align="center">0.8209</td>
</tr>
<tr>
<td align="center">precision</td>
<td align="center">0.9079</td>
<td align="center">0.9079</td>
<td align="center">0.9079</td>
<td align="center">0.9117</td>
<td align="center">0.9117</td>
<td align="center">0.9142</td>
<td align="center">-</td>
<td align="center">0.9079</td>
</tr>
<tr>
<td align="center">hmean</td>
<td align="center">0.8622</td>
<td align="center">0.8622</td>
<td align="center">0.8622</td>
<td align="center">0.8634</td>
<td align="center">0.8637</td>
<td align="center">0.8648</td>
<td align="center">-</td>
<td align="center">0.8622</td>
</tr>
<tr>
<td align="center" rowspan="3"><a href="https://github.com/open-mmlab/mmocr/blob/1.x/configs/textdet/psenet/psenet_resnet50_fpnf_600e_icdar2015.py">PSENet</a></td>
<td align="center" rowspan="3">TextDetection</td>
Expand Down Expand Up @@ -1250,6 +1286,78 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../
<td align="center">-</td>
<td align="center">0.7955</td>
</tr>
<tr>
<td align="center" rowspan="3"><a href="https://github.com/open-mmlab/mmocr/blob/1.x/configs/textdet/textsnake/textsnake_resnet50_fpn-unet_1200e_ctw1500.py">TextSnake</a></td>
<td align="center" rowspan="3">TextDetection</td>
<td align="center" rowspan="3">CTW1500</td>
<td align="center">recall</td>
<td align="center">0.8052</td>
<td align="center">0.8052</td>
<td align="center">0.8052</td>
<td align="center">0.8055</td>
<td align="center">-</td>
<td align="center">-</td>
<td align="center">-</td>
<td align="center">-</td>
</tr>
<tr>
<td align="center">precision</td>
<td align="center">0.8535</td>
<td align="center">0.8535</td>
<td align="center">0.8535</td>
<td align="center">0.8538</td>
<td align="center">-</td>
<td align="center">-</td>
<td align="center">-</td>
<td align="center">-</td>
</tr>
<tr>
<td align="center">hmean</td>
<td align="center">0.8286</td>
<td align="center">0.8286</td>
<td align="center">0.8286</td>
<td align="center">0.8290</td>
<td align="center">-</td>
<td align="center">-</td>
<td align="center">-</td>
<td align="center">-</td>
</tr>
<tr>
<td align="center" rowspan="3"><a href="https://github.com/open-mmlab/mmocr/blob/1.x/configs/textdet/maskrcnn/mask-rcnn_resnet50_fpn_160e_icdar2015.py">MaskRCNN</a></td>
<td align="center" rowspan="3">TextDetection</td>
<td align="center" rowspan="3">ICDAR2015</td>
<td align="center">recall</td>
<td align="center">0.7766</td>
<td align="center">0.7766</td>
<td align="center">0.7766</td>
<td align="center">0.7766</td>
<td align="center">0.7761</td>
<td align="center">0.7670</td>
<td align="center">-</td>
<td align="center">-</td>
</tr>
<tr>
<td align="center">precision</td>
<td align="center">0.8644</td>
<td align="center">0.8644</td>
<td align="center">0.8644</td>
<td align="center">0.8644</td>
<td align="center">0.8630</td>
<td align="center">0.8705</td>
<td align="center">-</td>
<td align="center">-</td>
</tr>
<tr>
<td align="center">hmean</td>
<td align="center">0.8182</td>
<td align="center">0.8182</td>
<td align="center">0.8182</td>
<td align="center">0.8182</td>
<td align="center">0.8172</td>
<td align="center">0.8155</td>
<td align="center">-</td>
<td align="center">-</td>
</tr>
<tr>
<td align="center"><a href="https://github.com/open-mmlab/mmocr/blob/1.x/configs/textrecog/crnn/crnn_mini-vgg_5e_mj.py">CRNN</a></td>
<td align="center">TextRecognition</td>
Expand Down Expand Up @@ -1292,6 +1400,20 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../
<td align="center">-</td>
<td align="center">-</td>
</tr>
<tr>
<td align="center"><a href="https://github.com/open-mmlab/mmocr/blob/1.x/configs/textrecog/abinet/abinet_20e_st-an_mj.py">ABINet</a></td>
<td align="center">TextRecognition</td>
<td align="center">IIIT5K</td>
<td align="center">acc</td>
<td align="center">0.9603</td>
<td align="center">0.9563</td>
<td align="center">0.9563</td>
<td align="center">0.9573</td>
<td align="center">0.9507</td>
<td align="center">0.9510</td>
<td align="center">-</td>
<td align="center">-</td>
</tr>
</tbody>
</table>
</div>
Expand Down
Loading

0 comments on commit ff95bf4

Please sign in to comment.