Skip to content

Commit

Permalink
ViPNAS ResNet (open-mmlab#742)
Browse files Browse the repository at this point in the history
Add ViPNAS ResNet models. (https://github.com/luminxu/ViPNAS)
  • Loading branch information
luminxu authored Jun 30, 2021
1 parent 51b268f commit 4620884
Show file tree
Hide file tree
Showing 13 changed files with 1,676 additions and 6 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
<!-- [ALGORITHM] -->

<details>
<summary align="right">ViPNAS (CVPR'2021)</summary>

```bibtex
@article{xu2021vipnas,
title={ViPNAS: Efficient Video Pose Estimation via Neural Architecture Search},
author={Xu, Lumin and Guan, Yingda and Jin, Sheng and Liu, Wentao and Qian, Chen and Luo, Ping and Ouyang, Wanli and Wang, Xiaogang},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
year={2021}
}
```

</details>

<!-- [DATASET] -->

<details>
<summary align="right">COCO (ECCV'2014)</summary>

```bibtex
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
```

</details>

Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset

| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log |
| :-------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
| [S-VipNAS-Res50](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/s_vipnas_res50_coco_256x192.py) | 256x192 | 0.711 | 0.893 | 0.789 | 0.769 | 0.769 | [ckpt](https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_res50_coco_256x192-cc43b466_20210624.pth) | [log](https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_res50_coco_256x192_20210624.log.json) |
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
log_level = 'INFO'
load_from = None
resume_from = None
dist_params = dict(backend='nccl')
workflow = [('train', 1)]
checkpoint_config = dict(interval=10)
evaluation = dict(interval=10, metric='mAP', key_indicator='AP')

optimizer = dict(
type='Adam',
lr=5e-4,
)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[170, 200])
total_epochs = 210
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])

channel_cfg = dict(
num_output_channels=17,
dataset_joints=17,
dataset_channel=[
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
],
inference_channel=[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
])

# model settings
model = dict(
type='TopDown',
pretrained=None,
backbone=dict(type='ViPNAS_ResNet', depth=50),
keypoint_head=dict(
type='ViPNASHeatmapSimpleHead',
in_channels=608,
out_channels=channel_cfg['num_output_channels'],
loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
train_cfg=dict(),
test_cfg=dict(
flip_test=True,
post_process='default',
shift_heatmap=True,
modulate_kernel=11))

data_cfg = dict(
image_size=[192, 256],
heatmap_size=[48, 64],
num_output_channels=channel_cfg['num_output_channels'],
num_joints=channel_cfg['dataset_joints'],
dataset_channel=channel_cfg['dataset_channel'],
inference_channel=channel_cfg['inference_channel'],
soft_nms=False,
nms_thr=1.0,
oks_thr=0.9,
vis_thr=0.2,
use_gt_bbox=False,
det_bbox_thr=0.0,
bbox_file='data/coco/person_detection_results/'
'COCO_val2017_detections_AP_H_56_person.json',
)

train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownRandomFlip', flip_prob=0.5),
dict(
type='TopDownHalfBodyTransform',
num_joints_half_body=8,
prob_half_body=0.3),
dict(
type='TopDownGetRandomScaleRotation', rot_factor=30,
scale_factor=0.25),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(type='TopDownGenerateTarget', sigma=2),
dict(
type='Collect',
keys=['img', 'target', 'target_weight'],
meta_keys=[
'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
'rotation', 'bbox_score', 'flip_pairs'
]),
]

val_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(
type='Collect',
keys=['img'],
meta_keys=[
'image_file', 'center', 'scale', 'rotation', 'bbox_score',
'flip_pairs'
]),
]

test_pipeline = val_pipeline

data_root = 'data/coco'
data = dict(
samples_per_gpu=64,
workers_per_gpu=2,
val_dataloader=dict(samples_per_gpu=32),
test_dataloader=dict(samples_per_gpu=32),
train=dict(
type='TopDownCocoDataset',
ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
img_prefix=f'{data_root}/train2017/',
data_cfg=data_cfg,
pipeline=train_pipeline),
val=dict(
type='TopDownCocoDataset',
ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
img_prefix=f'{data_root}/val2017/',
data_cfg=data_cfg,
pipeline=val_pipeline),
test=dict(
type='TopDownCocoDataset',
ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
img_prefix=f'{data_root}/val2017/',
data_cfg=data_cfg,
pipeline=val_pipeline),
)
19 changes: 19 additions & 0 deletions docs/papers/backbones/vipnas.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# ViPNAS: Efficient Video Pose Estimation via Neural Architecture Search

## Introduction

<!-- [ALGORITHM] -->

<details>
<summary align="right">ViPNAS (CVPR'2021)</summary>

```bibtex
@article{xu2021vipnas,
title={ViPNAS: Efficient Video Pose Estimation via Neural Architecture Search},
author={Xu, Lumin and Guan, Yingda and Jin, Sheng and Liu, Wentao and Qian, Chen and Luo, Ping and Ouyang, Wanli and Wang, Xiaogang},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
year={2021}
}
```

</details>
4 changes: 3 additions & 1 deletion mmpose/core/evaluation/top_down_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -558,7 +558,9 @@ def keypoints_from_heatmaps(heatmaps,

N, K, H, W = heatmaps.shape
if use_udp:
assert target_type in ['GaussianHeatMap', 'CombinedTarget']
assert target_type.lower() in [
'GaussianHeatMap'.lower(), 'CombinedTarget'.lower()
]
if target_type == 'GaussianHeatMap':
preds, maxvals = _get_max_preds(heatmaps)
preds = post_dark_udp(preds, heatmaps, kernel=kernel)
Expand Down
3 changes: 2 additions & 1 deletion mmpose/core/post_processing/post_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,8 @@ def flip_back(output_flipped, flip_pairs, target_type='GaussianHeatMap'):
"""
assert output_flipped.ndim == 4, \
'output_flipped should be [batch_size, num_keypoints, height, width]'
assert target_type in ('GaussianHeatMap', 'CombinedTarget')
assert target_type.lower() in ('GaussianHeatMap'.lower(),
'CombinedTarget'.lower())
shape_ori = output_flipped.shape
channels = 1
if target_type == 'CombinedTarget':
Expand Down
4 changes: 3 additions & 1 deletion mmpose/datasets/pipelines/top_down_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,9 @@ def _udp_generate_target(self, cfg, joints_3d, joints_3d_visible, factor,
target_weight = np.ones((num_joints, 1), dtype=np.float32)
target_weight[:, 0] = joints_3d_visible[:, 0]

assert target_type in ['GaussianHeatMap', 'CombinedTarget']
assert target_type.lower() in [
'GaussianHeatMap'.lower(), 'CombinedTarget'.lower()
]

if target_type == 'GaussianHeatMap':
target = np.zeros((num_joints, heatmap_size[1], heatmap_size[0]),
Expand Down
3 changes: 2 additions & 1 deletion mmpose/models/backbones/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,11 @@
from .shufflenet_v2 import ShuffleNetV2
from .tcn import TCN
from .vgg import VGG
from .vipnas_resnet import ViPNAS_ResNet

__all__ = [
'AlexNet', 'HourglassNet', 'HRNet', 'MobileNetV2', 'MobileNetV3', 'RegNet',
'ResNet', 'ResNetV1d', 'ResNeXt', 'SCNet', 'SEResNet', 'SEResNeXt',
'ShuffleNetV1', 'ShuffleNetV2', 'CPM', 'RSN', 'MSPN', 'ResNeSt', 'VGG',
'TCN'
'TCN', 'ViPNAS_ResNet'
]
Loading

0 comments on commit 4620884

Please sign in to comment.