Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor multi branch #8634

Merged
merged 27 commits into from
Aug 26, 2022
Merged
Show file tree
Hide file tree
Changes from 25 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
8fb235a
refactor dataflow
hhaAndroid Aug 23, 2022
d17d1e9
fix docstr
hhaAndroid Aug 23, 2022
ce1e068
fix commit
hhaAndroid Aug 23, 2022
d2780bf
fix commit
hhaAndroid Aug 24, 2022
f33fe2b
fix visualizer hook
hhaAndroid Aug 24, 2022
3cbdf31
fix UT
hhaAndroid Aug 24, 2022
9723433
fix UT
hhaAndroid Aug 24, 2022
1e661bb
resolve conflicts
hhaAndroid Aug 24, 2022
0b90fb2
fix UT error
hhaAndroid Aug 24, 2022
f8c5e7b
fix bug
hhaAndroid Aug 24, 2022
6d21f79
Refactor semi data flow
Czm369 Aug 24, 2022
485117d
Merge branch 'data_flow' of github.com:hhaAndroid/mmdetection into re…
Czm369 Aug 24, 2022
adc8ffa
update to mmengine main
hhaAndroid Aug 25, 2022
3ef2233
update typehint
hhaAndroid Aug 25, 2022
53777ab
replace data preprocess output type to dict
hhaAndroid Aug 25, 2022
b133fd8
update
hhaAndroid Aug 25, 2022
2f1add5
fix typehint
hhaAndroid Aug 25, 2022
d4ccf31
Merge branch 'data_flow' of github.com:hhaAndroid/mmdetection into re…
Czm369 Aug 25, 2022
f57acb6
Refactor MultiBranchDataPreprocessor again
Czm369 Aug 25, 2022
bca7bde
Solve thr conflict
Czm369 Aug 25, 2022
d4e67ee
Add some docstring
Czm369 Aug 25, 2022
8e3097a
Add some examples
Czm369 Aug 26, 2022
e267a81
Fix some commits
Czm369 Aug 26, 2022
cc418f4
Merge branch 'dev-3.x' of github.com:open-mmlab/mmdetection into refa…
Czm369 Aug 26, 2022
88880a9
Merge branch 'dev-3.x' of github.com:open-mmlab/mmdetection into refa…
Czm369 Aug 26, 2022
c7c4390
fix some commits
Czm369 Aug 26, 2022
afa69c5
Merge branch 'dev-3.x' of github.com:open-mmlab/mmdetection into refa…
Czm369 Aug 26, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion configs/_base_/datasets/semi_coco_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@

scale = [(1333, 400), (1333, 1200)]

branch_field = ['sup', 'unsup_teacher', 'unsup_student']
# pipeline used to augment labeled data,
# which will be sent to student model for supervised training.
sup_pipeline = [
Expand All @@ -41,7 +42,10 @@
dict(type='RandomFlip', prob=0.5),
dict(type='RandAugment', aug_space=color_space, aug_num=1),
dict(type='FilterAnnotations', min_gt_bbox_wh=(1e-2, 1e-2)),
dict(type='MultiBranch', sup=dict(type='PackDetInputs'))
dict(
type='MultiBranch',
branch_field=branch_field,
sup=dict(type='PackDetInputs'))
]

# pipeline used to augment unlabeled data weakly,
Expand Down Expand Up @@ -82,6 +86,7 @@
dict(type='LoadEmptyAnnotations'),
dict(
type='MultiBranch',
branch_field=branch_field,
unsup_teacher=weak_pipeline,
unsup_student=strong_pipeline,
)
Expand Down
94 changes: 90 additions & 4 deletions mmdet/datasets/transforms/wrappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,37 +14,123 @@ class MultiBranch(BaseTransform):
r"""Multiple branch pipeline wrapper.

Generate multiple data-augmented versions of the same image.
`MultiBranch` needs to specify the branch names of all
pipelines of the dataset, perform corresponding data augmentation
for the current branch, and return None for other branches,
which ensures the consistency of return format across
different samples.

Args:
branch_field (list): List of branch names.
branch_pipelines (dict): Dict of different pipeline configs
to be composed.

Examples:
>>> branch_field = ['sup', 'unsup_teacher', 'unsup_student']
>>> sup_pipeline = [
>>> dict(type='LoadImageFromFile',
>>> file_client_args=dict(backend='disk')),
>>> dict(type='LoadAnnotations', with_bbox=True),
>>> dict(type='Resize', scale=(1333, 800), keep_ratio=True),
>>> dict(type='RandomFlip', prob=0.5),
>>> dict(
>>> type='MultiBranch',
>>> branch_field=branch_field,
>>> sup=dict(type='PackDetInputs'))
>>> ]
>>> weak_pipeline = [
>>> dict(type='LoadImageFromFile',
>>> file_client_args=dict(backend='disk')),
>>> dict(type='LoadAnnotations', with_bbox=True),
>>> dict(type='Resize', scale=(1333, 800), keep_ratio=True),
>>> dict(type='RandomFlip', prob=0.0),
>>> dict(
>>> type='MultiBranch',
>>> branch_field=branch_field,
>>> sup=dict(type='PackDetInputs'))
>>> ]
>>> strong_pipeline = [
>>> dict(type='LoadImageFromFile',
>>> file_client_args=dict(backend='disk')),
>>> dict(type='LoadAnnotations', with_bbox=True),
>>> dict(type='Resize', scale=(1333, 800), keep_ratio=True),
>>> dict(type='RandomFlip', prob=1.0),
>>> dict(
>>> type='MultiBranch',
>>> branch_field=branch_field,
>>> sup=dict(type='PackDetInputs'))
>>> ]
>>> unsup_pipeline = [
>>> dict(type='LoadImageFromFile',
>>> file_client_args=file_client_args),
>>> dict(type='LoadEmptyAnnotations'),
>>> dict(
>>> type='MultiBranch',
>>> branch_field=branch_field,
>>> unsup_teacher=weak_pipeline,
>>> unsup_student=strong_pipeline)
>>> ]
>>> from mmcv.transforms import Compose
>>> sup_branch = Compose(sup_pipeline)
>>> unsup_branch = Compose(unsup_pipeline)
>>> print(sup_branch)
>>> Compose(
>>> LoadImageFromFile(ignore_empty=False, to_float32=False, color_type='color', imdecode_backend='cv2', file_client_args={'backend': 'disk'}) # noqa
>>> LoadAnnotations(with_bbox=True, with_label=True, with_mask=False, with_seg=False, poly2mask=True, imdecode_backend='cv2', file_client_args={'backend': 'disk'}) # noqa
>>> Resize(scale=(1333, 800), scale_factor=None, keep_ratio=True, clip_object_border=True), backend=cv2), interpolation=bilinear) # noqa
>>> RandomFlip(prob=0.5, direction=horizontal)
>>> MultiBranch(branch_pipelines=['sup'])
>>> )
>>> print(unsup_branch)
>>> Compose(
>>> LoadImageFromFile(ignore_empty=False, to_float32=False, color_type='color', imdecode_backend='cv2', file_client_args={'backend': 'disk'}) # noqa
>>> LoadEmptyAnnotations(with_bbox=True, with_label=True, with_mask=False, with_seg=False, seg_ignore_label=255) # noqa
>>> MultiBranch(branch_pipelines=['unsup_teacher', 'unsup_student'])
>>> )
"""

def __init__(self, **branch_pipelines: dict) -> None:
def __init__(self, branch_field: List[str],
**branch_pipelines: dict) -> None:
self.branch_field = branch_field
self.branch_pipelines = {
branch: Compose(pipeline)
for branch, pipeline in branch_pipelines.items()
}

def transform(self, results: dict) -> Optional[List[dict]]:
def transform(self, results: dict) -> dict:
"""Transform function to apply transforms sequentially.

Args:
results (dict): Result dict from loading pipeline.

Returns:
list[dict]: Results from different pipeline.
dict:

- 'inputs' (Dict[str, obj:`torch.Tensor`]): The forward data of
models from different branches.
- 'data_sample' (Dict[str,obj:`DetDataSample`]): The annotation
info of the sample from different branches.
"""

multi_results = {}
for branch in self.branch_field:
multi_results[branch] = {'inputs': None, 'data_samples': None}
for branch, pipeline in self.branch_pipelines.items():
branch_results = pipeline(copy.deepcopy(results))
# If one branch pipeline returns None,
# it will sample another data from dataset.
if branch_results is None:
return None
multi_results[branch] = branch_results
return multi_results

format_results = {}
for branch, results in multi_results.items():
for key in results.keys():
if format_results.get(key, None) is None:
format_results[key] = {branch: results[key]}
else:
format_results[key][branch] = results[key]
return format_results

def __repr__(self) -> str:
repr_str = self.__class__.__name__
Expand Down
144 changes: 126 additions & 18 deletions mmdet/models/data_preprocessors/data_preprocessor.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright (c) OpenMMLab. All rights reserved.
import random
from numbers import Number
from typing import Dict, List, Optional, Sequence, Tuple, Union
from typing import List, Optional, Sequence, Tuple, Union

import numpy as np
import torch
Expand Down Expand Up @@ -368,6 +368,91 @@ def forward(
class MultiBranchDataPreprocessor(BaseDataPreprocessor):
"""DataPreprocessor wrapper for multi-branch data.

Take semi-supervised object detection as an example, assume that
the ratio of labeled data and unlabeled data in a batch is 1:2,
`sup` indicates the branch where the labeled data is augmented,
`unsup_teacher` and `unsup_student` indicate the branches where
the unlabeled data is augmented by different pipeline.

The input format of multi-branch data is shown as below :

.. code-block:: none
{
'inputs':
{
'sup': [Tensor, None, None],
'unsup_teacher': [None, Tensor, Tensor],
'unsup_student': [None, Tensor, Tensor],
},
'data_sample':
{
'sup': [DetDataSample, None, None],
'unsup_teacher': [None, DetDataSample, DetDataSample],
'unsup_student': [NOne, DetDataSample, DetDataSample],
}
}

The format of multi-branch data
after filtering None is shown as below :

.. code-block:: none
{
'inputs':
{
'sup': [Tensor],
'unsup_teacher': [Tensor, Tensor],
'unsup_student': [Tensor, Tensor],
},
'data_sample':
{
'sup': [DetDataSample],
'unsup_teacher': [DetDataSample, DetDataSample],
'unsup_student': [DetDataSample, DetDataSample],
}
}

In order to reuse `DetDataPreprocessor` for the data
from different branches, the format of multi-branch data
grouped by branch as below :

.. code-block:: none
{
'sup':
{
'inputs': [Tensor]
'data_sample': [DetDataSample, DetDataSample]
},
'unsup_teacher':
{
'inputs': [Tensor, Tensor]
'data_sample': [DetDataSample, DetDataSample]
},
'unsup_student':
{
'inputs': [Tensor, Tensor]
'data_sample': [DetDataSample, DetDataSample]
},
}

After preprocessing data from different branches,
the multi-branch data needs to be reformatted as:

.. code-block:: none
{
'inputs':
{
'sup': [Tensor],
'unsup_teacher': [Tensor, Tensor],
'unsup_student': [Tensor, Tensor],
},
'data_sample':
{
'sup': [DetDataSample],
'unsup_teacher': [DetDataSample, DetDataSample],
'unsup_student': [DetDataSample, DetDataSample],
}
}

Args:
data_preprocessor (:obj:`ConfigDict` or dict): Config of
:class:`DetDataPreprocessor` to process the input data.
Expand All @@ -377,37 +462,60 @@ def __init__(self, data_preprocessor: ConfigType) -> None:
super().__init__()
self.data_preprocessor = MODELS.build(data_preprocessor)

def forward(
self,
data: dict,
training: bool = False
) -> Tuple[Dict[str, torch.Tensor], Dict[str, Optional[list]]]:
def forward(self, data: dict, training: bool = False) -> dict:
"""Perform normalization、padding and bgr2rgb conversion based on
``BaseDataPreprocessor`` for multi-branch data.

Args:
data (Sequence[dict]): data sampled from dataloader.
data (dict): Data sampled from dataloader.
training (bool): Whether to enable training time augmentation.

Returns:
Tuple[Dict[torch.Tensor], Dict[Optional[list]]]: Each tuple of
zip(dict, dict) is the data in the same format as the model input.
dict:

- 'inputs' (Dict[str, obj:`torch.Tensor`]): The forward data of
models from different branches.
- 'data_sample' (Dict[str, obj:`DetDataSample`]): The annotation
info of the sample from different branches.
"""

if training is False:
return self.data_preprocessor(data, training)

# Filter out branches with a value of None
for key in data.keys():
for branch in data[key].keys():
data[key][branch] = list(
filter(lambda x: x is not None, data[key][branch]))

# Group data by branch
multi_branch_data = {}
for multi_results in data:
for branch, results in multi_results.items():
for key in data.keys():
for branch in data[key].keys():
if multi_branch_data.get(branch, None) is None:
multi_branch_data[branch] = [results]
multi_branch_data[branch] = {key: data[key][branch]}
elif multi_branch_data[branch].get(key, None) is None:
multi_branch_data[branch][key] = data[key][branch]
else:
multi_branch_data[branch].append(results)
multi_batch_inputs, multi_batch_data_samples = {}, {}
for branch, data in multi_branch_data.items():
multi_batch_inputs[branch], multi_batch_data_samples[
branch] = self.data_preprocessor(data, training)
return multi_batch_inputs, multi_batch_data_samples
multi_branch_data[branch][key].append(data[key][branch])

# Preprocess data from different branches
for branch, _data in multi_branch_data.items():
multi_branch_data[branch] = self.data_preprocessor(_data, training)

# Format data by inputs and data_samples
format_data = {}
for branch in multi_branch_data.keys():
for key in multi_branch_data[branch].keys():
if format_data.get(key, None) is None:
format_data[key] = {branch: multi_branch_data[branch][key]}
elif format_data[key].get(branch, None) is None:
format_data[key][branch] = multi_branch_data[branch][key]
else:
format_data[key][branch].append(
multi_branch_data[branch][key])

return format_data

@property
def device(self):
Expand Down
Loading