diff --git a/configs/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco.py b/configs/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco.py index 2bf8cb7f8..3fca98f06 100644 --- a/configs/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco.py +++ b/configs/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco.py @@ -1,42 +1,103 @@ _base_ = '../_base_/default_runtime.py' -# dataset settings -data_root = 'data/coco/' -dataset_type = 'YOLOv5CocoDataset' +# ========================Frequently modified parameters====================== +# -----data related----- +data_root = 'data/coco/' # Root path of data +# Path of train annotation file +train_ann_file = 'annotations/instances_train2017.json' +train_data_prefix = 'train2017/' # Prefix of train image path +# Path of val annotation file +val_ann_file = 'annotations/instances_val2017.json' +val_data_prefix = 'val2017/' # Prefix of val image path -# parameters that often need to be modified -img_scale = (640, 640) # width, height -max_epochs = 300 -save_epoch_intervals = 10 +num_classes = 80 # Number of classes for classification +# Batch size of a single GPU during training train_batch_size_per_gpu = 16 +# Worker to pre-fetch data for each single GPU during training train_num_workers = 8 -# persistent_workers must be False if num_workers is 0. +# persistent_workers must be False if num_workers is 0 persistent_workers = True + +# -----model related----- +# Basic size of multi-scale prior box +anchors = [ + [(12, 16), (19, 36), (40, 28)], # P3/8 + [(36, 75), (76, 55), (72, 146)], # P4/16 + [(142, 110), (192, 243), (459, 401)] # P5/32 +] +# -----train val related----- +# Base learning rate for optim_wrapper. Corresponding to 8xb16=64 bs +base_lr = 0.01 +max_epochs = 300 # Maximum training epochs + +num_epoch_stage2 = 30 # The last 30 epochs switch evaluation interval +val_interval_stage2 = 1 # Evaluation interval + +model_test_cfg = dict( + # The config of multi-label for multi-class prediction. + multi_label=True, + # The number of boxes before NMS. + nms_pre=30000, + score_thr=0.001, # Threshold to filter out boxes. + nms=dict(type='nms', iou_threshold=0.65), # NMS type and threshold + max_per_img=300) # Max number of detections of each image + +# ========================Possible modified parameters======================== +# -----data related----- +img_scale = (640, 640) # width, height +# Dataset type, this will be used to define the dataset +dataset_type = 'YOLOv5CocoDataset' +# Batch size of a single GPU during validation val_batch_size_per_gpu = 1 +# Worker to pre-fetch data for each single GPU during validation val_num_workers = 2 -# only on Val +# Config of batch shapes. Only on val. +# It means not used if batch_shapes_cfg is None. batch_shapes_cfg = dict( type='BatchShapePolicy', batch_size=val_batch_size_per_gpu, img_size=img_scale[0], + # The image scale of padding should be divided by pad_size_divisor size_divisor=32, + # Additional paddings for pixel scale extra_pad_ratio=0.5) -# different from yolov5 -anchors = [ - [(12, 16), (19, 36), (40, 28)], # P3/8 - [(36, 75), (76, 55), (72, 146)], # P4/16 - [(142, 110), (192, 243), (459, 401)] # P5/32 -] -strides = [8, 16, 32] -num_det_layers = 3 -num_classes = 80 +# -----model related----- +strides = [8, 16, 32] # Strides of multi-scale prior box +num_det_layers = 3 # The number of model output scales +norm_cfg = dict(type='BN', momentum=0.03, eps=0.001) + +# Data augmentation +max_translate_ratio = 0.2 # YOLOv5RandomAffine +scaling_ratio_range = (0.1, 2.0) # YOLOv5RandomAffine +mixup_prob = 0.15 # YOLOv5MixUp +randchoice_mosaic_prob = [0.8, 0.2] +mixup_alpha = 8.0 # YOLOv5MixUp +mixup_beta = 8.0 # YOLOv5MixUp + +# -----train val related----- +loss_cls_weight = 0.3 +loss_bbox_weight = 0.05 +loss_obj_weight = 0.7 +# BatchYOLOv7Assigner params +simota_candidate_topk = 10 +simota_iou_weight = 3.0 +simota_cls_weight = 1.0 +prior_match_thr = 4. # Priori box matching threshold +obj_level_weights = [4., 1., + 0.4] # The obj loss weights of the three output layers -# single-scale training is recommended to +lr_factor = 0.1 # Learning rate scaling factor +weight_decay = 0.0005 +save_epoch_intervals = 1 # Save model checkpoint and validation intervals +max_keep_ckpts = 3 # The maximum checkpoints to keep. + +# Single-scale training is recommended to # be turned on, which can speed up training. env_cfg = dict(cudnn_benchmark=True) +# ===============================Unmodified in most cases==================== model = dict( type='YOLODetector', data_preprocessor=dict( @@ -47,7 +108,7 @@ backbone=dict( type='YOLOv7Backbone', arch='L', - norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), + norm_cfg=norm_cfg, act_cfg=dict(type='SiLU', inplace=True)), neck=dict( type='YOLOv7PAFPN', @@ -61,7 +122,7 @@ in_channels=[512, 1024, 1024], # The real output channel will be multiplied by 2 out_channels=[128, 256, 512], - norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), + norm_cfg=norm_cfg, act_cfg=dict(type='SiLU', inplace=True)), bbox_head=dict( type='YOLOv7Head', @@ -80,31 +141,28 @@ type='mmdet.CrossEntropyLoss', use_sigmoid=True, reduction='mean', - loss_weight=0.3 * (num_classes / 80 * 3 / num_det_layers)), + loss_weight=loss_cls_weight * + (num_classes / 80 * 3 / num_det_layers)), loss_bbox=dict( type='IoULoss', iou_mode='ciou', bbox_format='xywh', reduction='mean', - loss_weight=0.05 * (3 / num_det_layers), + loss_weight=loss_bbox_weight * (3 / num_det_layers), return_iou=True), loss_obj=dict( type='mmdet.CrossEntropyLoss', use_sigmoid=True, reduction='mean', - loss_weight=0.7 * ((img_scale[0] / 640)**2 * 3 / num_det_layers)), - obj_level_weights=[4., 1., 0.4], + loss_weight=loss_obj_weight * + ((img_scale[0] / 640)**2 * 3 / num_det_layers)), + prior_match_thr=prior_match_thr, + obj_level_weights=obj_level_weights, # BatchYOLOv7Assigner params - prior_match_thr=4., - simota_candidate_topk=10, - simota_iou_weight=3.0, - simota_cls_weight=1.0), - test_cfg=dict( - multi_label=True, - nms_pre=30000, - score_thr=0.001, - nms=dict(type='nms', iou_threshold=0.65), - max_per_img=300)) + simota_candidate_topk=simota_candidate_topk, + simota_iou_weight=simota_iou_weight, + simota_cls_weight=simota_cls_weight), + test_cfg=model_test_cfg) pre_transform = [ dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args), @@ -121,8 +179,8 @@ type='YOLOv5RandomAffine', max_rotate_degree=0.0, max_shear_degree=0.0, - max_translate_ratio=0.2, # note - scaling_ratio_range=(0.1, 2.0), # note + max_translate_ratio=max_translate_ratio, # note + scaling_ratio_range=scaling_ratio_range, # note # img_scale is (width, height) border=(-img_scale[0] // 2, -img_scale[1] // 2), border_val=(114, 114, 114)), @@ -138,8 +196,8 @@ type='YOLOv5RandomAffine', max_rotate_degree=0.0, max_shear_degree=0.0, - max_translate_ratio=0.2, # note - scaling_ratio_range=(0.1, 2.0), # note + max_translate_ratio=max_translate_ratio, # note + scaling_ratio_range=scaling_ratio_range, # note # img_scale is (width, height) border=(-img_scale[0] // 2, -img_scale[1] // 2), border_val=(114, 114, 114)), @@ -148,16 +206,16 @@ randchoice_mosaic_pipeline = dict( type='RandomChoice', transforms=[mosiac4_pipeline, mosiac9_pipeline], - prob=[0.8, 0.2]) + prob=randchoice_mosaic_prob) train_pipeline = [ *pre_transform, randchoice_mosaic_pipeline, dict( type='YOLOv5MixUp', - alpha=8.0, # note - beta=8.0, # note - prob=0.15, + alpha=mixup_alpha, # note + beta=mixup_beta, # note + prob=mixup_prob, pre_transform=[*pre_transform, randchoice_mosaic_pipeline]), dict(type='YOLOv5HSVRandomAug'), dict(type='mmdet.RandomFlip', prob=0.5), @@ -177,8 +235,8 @@ dataset=dict( type=dataset_type, data_root=data_root, - ann_file='annotations/instances_train2017.json', - data_prefix=dict(img='train2017/'), + ann_file=train_ann_file, + data_prefix=dict(img=train_data_prefix), filter_cfg=dict(filter_empty_gt=False, min_size=32), pipeline=train_pipeline)) @@ -208,8 +266,8 @@ type=dataset_type, data_root=data_root, test_mode=True, - data_prefix=dict(img='val2017/'), - ann_file='annotations/instances_val2017.json', + data_prefix=dict(img=val_data_prefix), + ann_file=val_ann_file, pipeline=test_pipeline, batch_shapes_cfg=batch_shapes_cfg)) @@ -220,9 +278,9 @@ type='OptimWrapper', optimizer=dict( type='SGD', - lr=0.01, + lr=base_lr, momentum=0.937, - weight_decay=0.0005, + weight_decay=weight_decay, nesterov=True, batch_size_per_gpu=train_batch_size_per_gpu), constructor='YOLOv7OptimWrapperConstructor') @@ -231,27 +289,14 @@ param_scheduler=dict( type='YOLOv5ParamSchedulerHook', scheduler_type='cosine', - lr_factor=0.1, # note + lr_factor=lr_factor, # note max_epochs=max_epochs), checkpoint=dict( type='CheckpointHook', save_param_scheduler=False, - interval=1, + interval=save_epoch_intervals, save_best='auto', - max_keep_ckpts=3)) - -val_evaluator = dict( - type='mmdet.CocoMetric', - proposal_nums=(100, 1, 10), # Can be accelerated - ann_file=data_root + 'annotations/instances_val2017.json', - metric='bbox') -test_evaluator = val_evaluator - -train_cfg = dict( - type='EpochBasedTrainLoop', - max_epochs=max_epochs, - val_interval=save_epoch_intervals, - dynamic_intervals=[(270, 1)]) + max_keep_ckpts=max_keep_ckpts)) custom_hooks = [ dict( @@ -263,7 +308,17 @@ priority=49) ] +val_evaluator = dict( + type='mmdet.CocoMetric', + proposal_nums=(100, 1, 10), # Can be accelerated + ann_file=data_root + val_ann_file, + metric='bbox') +test_evaluator = val_evaluator + +train_cfg = dict( + type='EpochBasedTrainLoop', + max_epochs=max_epochs, + val_interval=save_epoch_intervals, + dynamic_intervals=[(max_epochs - num_epoch_stage2, val_interval_stage2)]) val_cfg = dict(type='ValLoop') test_cfg = dict(type='TestLoop') - -# randomness = dict(seed=1, deterministic=True) diff --git a/configs/yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco.py b/configs/yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco.py index afb004024..b9e9f10e2 100644 --- a/configs/yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco.py +++ b/configs/yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco.py @@ -1,10 +1,26 @@ _base_ = './yolov7_l_syncbn_fast_8x16b-300e_coco.py' +# ========================modified parameters======================== + +# -----model related----- +# Data augmentation +max_translate_ratio = 0.1 # YOLOv5RandomAffine +scaling_ratio_range = (0.5, 1.6) # YOLOv5RandomAffine +mixup_prob = 0.05 # YOLOv5MixUp +randchoice_mosaic_prob = [0.8, 0.2] +mixup_alpha = 8.0 # YOLOv5MixUp +mixup_beta = 8.0 # YOLOv5MixUp + +# -----train val related----- +loss_cls_weight = 0.5 +loss_obj_weight = 1.0 + +lr_factor = 0.01 # Learning rate scaling factor +# ===============================Unmodified in most cases==================== num_classes = _base_.num_classes num_det_layers = _base_.num_det_layers img_scale = _base_.img_scale pre_transform = _base_.pre_transform - model = dict( backbone=dict( arch='Tiny', act_cfg=dict(type='LeakyReLU', negative_slope=0.1)), @@ -18,9 +34,9 @@ use_repconv_outs=False), bbox_head=dict( head_module=dict(in_channels=[128, 256, 512]), - loss_cls=dict(loss_weight=0.5 * + loss_cls=dict(loss_weight=loss_cls_weight * (num_classes / 80 * 3 / num_det_layers)), - loss_obj=dict(loss_weight=1.0 * + loss_obj=dict(loss_weight=loss_obj_weight * ((img_scale[0] / 640)**2 * 3 / num_det_layers)))) mosiac4_pipeline = [ @@ -33,8 +49,8 @@ type='YOLOv5RandomAffine', max_rotate_degree=0.0, max_shear_degree=0.0, - max_translate_ratio=0.1, # change - scaling_ratio_range=(0.5, 1.6), # change + max_translate_ratio=max_translate_ratio, # change + scaling_ratio_range=scaling_ratio_range, # change # img_scale is (width, height) border=(-img_scale[0] // 2, -img_scale[1] // 2), border_val=(114, 114, 114)), @@ -50,8 +66,8 @@ type='YOLOv5RandomAffine', max_rotate_degree=0.0, max_shear_degree=0.0, - max_translate_ratio=0.1, # change - scaling_ratio_range=(0.5, 1.6), # change + max_translate_ratio=max_translate_ratio, # change + scaling_ratio_range=scaling_ratio_range, # change border=(-img_scale[0] // 2, -img_scale[1] // 2), border_val=(114, 114, 114)), ] @@ -59,16 +75,16 @@ randchoice_mosaic_pipeline = dict( type='RandomChoice', transforms=[mosiac4_pipeline, mosiac9_pipeline], - prob=[0.8, 0.2]) + prob=randchoice_mosaic_prob) train_pipeline = [ *pre_transform, randchoice_mosaic_pipeline, dict( type='YOLOv5MixUp', - alpha=8.0, - beta=8.0, - prob=0.05, # change + alpha=mixup_alpha, + beta=mixup_beta, + prob=mixup_prob, # change pre_transform=[*pre_transform, randchoice_mosaic_pipeline]), dict(type='YOLOv5HSVRandomAug'), dict(type='mmdet.RandomFlip', prob=0.5), @@ -79,4 +95,4 @@ ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) -default_hooks = dict(param_scheduler=dict(lr_factor=0.01)) +default_hooks = dict(param_scheduler=dict(lr_factor=lr_factor)) diff --git a/configs/yolov7/yolov7_w-p6_syncbn_fast_8x16b-300e_coco.py b/configs/yolov7/yolov7_w-p6_syncbn_fast_8x16b-300e_coco.py index 6536c0937..17cb84da5 100644 --- a/configs/yolov7/yolov7_w-p6_syncbn_fast_8x16b-300e_coco.py +++ b/configs/yolov7/yolov7_w-p6_syncbn_fast_8x16b-300e_coco.py @@ -1,19 +1,42 @@ _base_ = './yolov7_l_syncbn_fast_8x16b-300e_coco.py' +# ========================modified parameters======================== +# -----data related----- img_scale = (1280, 1280) # height, width -num_classes = 80 -# only on Val -batch_shapes_cfg = dict(img_size=img_scale[0], size_divisor=64) +num_classes = 80 # Number of classes for classification +# Config of batch shapes. Only on val +# It means not used if batch_shapes_cfg is None. +batch_shapes_cfg = dict( + img_size=img_scale[ + 0], # The image scale of padding should be divided by pad_size_divisor + size_divisor=64) # Additional paddings for pixel scale +# -----model related----- +# Basic size of multi-scale prior box anchors = [ [(19, 27), (44, 40), (38, 94)], # P3/8 [(96, 68), (86, 152), (180, 137)], # P4/16 [(140, 301), (303, 264), (238, 542)], # P5/32 [(436, 615), (739, 380), (925, 792)] # P6/64 ] -strides = [8, 16, 32, 64] -num_det_layers = 4 +strides = [8, 16, 32, 64] # Strides of multi-scale prior box +num_det_layers = 4 # # The number of model output scales +norm_cfg = dict(type='BN', momentum=0.03, eps=0.001) +# Data augmentation +max_translate_ratio = 0.2 # YOLOv5RandomAffine +scaling_ratio_range = (0.1, 2.0) # YOLOv5RandomAffine +mixup_prob = 0.15 # YOLOv5MixUp +randchoice_mosaic_prob = [0.8, 0.2] +mixup_alpha = 8.0 # YOLOv5MixUp +mixup_beta = 8.0 # YOLOv5MixUp + +# -----train val related----- +loss_cls_weight = 0.3 +loss_bbox_weight = 0.05 +loss_obj_weight = 0.7 + +# ===============================Unmodified in most cases==================== model = dict( backbone=dict(arch='W', out_indices=(2, 3, 4, 5)), neck=dict( @@ -26,15 +49,15 @@ type='YOLOv7p6HeadModule', in_channels=[128, 256, 384, 512], featmap_strides=strides, - norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), + norm_cfg=norm_cfg, act_cfg=dict(type='SiLU', inplace=True)), prior_generator=dict(base_sizes=anchors, strides=strides), simota_candidate_topk=20, # note # scaled based on number of detection layers - loss_cls=dict(loss_weight=0.3 * + loss_cls=dict(loss_weight=loss_cls_weight * (num_classes / 80 * 3 / num_det_layers)), - loss_bbox=dict(loss_weight=0.05 * (3 / num_det_layers)), - loss_obj=dict(loss_weight=0.7 * + loss_bbox=dict(loss_weight=loss_bbox_weight * (3 / num_det_layers)), + loss_obj=dict(loss_weight=loss_obj_weight * ((img_scale[0] / 640)**2 * 3 / num_det_layers)), obj_level_weights=[4.0, 1.0, 0.25, 0.06])) @@ -50,8 +73,8 @@ type='YOLOv5RandomAffine', max_rotate_degree=0.0, max_shear_degree=0.0, - max_translate_ratio=0.2, # note - scaling_ratio_range=(0.1, 2.0), # note + max_translate_ratio=max_translate_ratio, # note + scaling_ratio_range=scaling_ratio_range, # note # img_scale is (width, height) border=(-img_scale[0] // 2, -img_scale[1] // 2), border_val=(114, 114, 114)), @@ -67,8 +90,8 @@ type='YOLOv5RandomAffine', max_rotate_degree=0.0, max_shear_degree=0.0, - max_translate_ratio=0.2, # note - scaling_ratio_range=(0.1, 2.0), # note + max_translate_ratio=max_translate_ratio, # note + scaling_ratio_range=scaling_ratio_range, # note # img_scale is (width, height) border=(-img_scale[0] // 2, -img_scale[1] // 2), border_val=(114, 114, 114)), @@ -77,16 +100,16 @@ randchoice_mosaic_pipeline = dict( type='RandomChoice', transforms=[mosiac4_pipeline, mosiac9_pipeline], - prob=[0.8, 0.2]) + prob=randchoice_mosaic_prob) train_pipeline = [ *pre_transform, randchoice_mosaic_pipeline, dict( type='YOLOv5MixUp', - alpha=8.0, # note - beta=8.0, # note - prob=0.15, + alpha=mixup_alpha, # note + beta=mixup_beta, # note + prob=mixup_prob, pre_transform=[*pre_transform, randchoice_mosaic_pipeline]), dict(type='YOLOv5HSVRandomAug'), dict(type='mmdet.RandomFlip', prob=0.5),