From 9a3bcd1074fa0f2345cf15b19e999b0ce6f12a8e Mon Sep 17 00:00:00 2001 From: sangbumchoi Date: Thu, 4 Apr 2024 00:26:26 +0000 Subject: [PATCH 1/8] make style --- src/transformers/models/deta/modeling_deta.py | 1 - src/transformers/models/detr/image_processing_detr.py | 10 ++++------ .../models/mask2former/image_processing_mask2former.py | 6 +++--- src/transformers/models/yolov6/modeling_yolov6.py | 1 - 4 files changed, 7 insertions(+), 11 deletions(-) diff --git a/src/transformers/models/deta/modeling_deta.py b/src/transformers/models/deta/modeling_deta.py index 7a2491d37ecefd..ece9d325746fd3 100644 --- a/src/transformers/models/deta/modeling_deta.py +++ b/src/transformers/models/deta/modeling_deta.py @@ -46,7 +46,6 @@ from ...utils import is_accelerate_available, is_ninja_available, is_torchvision_available, logging, requires_backends from ...utils.backbone_utils import load_backbone from .configuration_deta import DetaConfig -from .load_custom import load_cuda_kernels logger = logging.get_logger(__name__) diff --git a/src/transformers/models/detr/image_processing_detr.py b/src/transformers/models/detr/image_processing_detr.py index 778a26b68f7bc9..10bd588ec9b603 100644 --- a/src/transformers/models/detr/image_processing_detr.py +++ b/src/transformers/models/detr/image_processing_detr.py @@ -1847,9 +1847,9 @@ def post_process_instance_segmentation( raise ValueError("return_coco_annotation and return_binary_maps can not be both set to True.") # [batch_size, num_queries, num_classes+1] - class_queries_logits = outputs.logits - # [batch_size, num_queries, height, width] - masks_queries_logits = outputs.pred_masks + class_queries_logits = outputs.logits + # [batch_size, num_queries, height, width] + masks_queries_logits = outputs.pred_masks device = masks_queries_logits.device num_classes = class_queries_logits.shape[-1] - 1 @@ -1887,9 +1887,7 @@ def post_process_instance_segmentation( if target_sizes is not None: size = target_sizes[i] if isinstance(target_sizes[i], tuple) else target_sizes[i].cpu().tolist() segmentation = torch.zeros(size) - 1 - pred_masks = torch.nn.functional.interpolate( - pred_masks.unsqueeze(0), size=size, mode="nearest" - )[0] + pred_masks = torch.nn.functional.interpolate(pred_masks.unsqueeze(0), size=size, mode="nearest")[0] instance_maps, segments = [], [] current_segment_id = 0 diff --git a/src/transformers/models/mask2former/image_processing_mask2former.py b/src/transformers/models/mask2former/image_processing_mask2former.py index 880837eae541b1..1e884371f40a05 100644 --- a/src/transformers/models/mask2former/image_processing_mask2former.py +++ b/src/transformers/models/mask2former/image_processing_mask2former.py @@ -1129,9 +1129,9 @@ def post_process_instance_segmentation( if target_sizes is not None: size = target_sizes[i] if isinstance(target_sizes[i], tuple) else target_sizes[i].cpu().tolist() segmentation = torch.zeros(size) - 1 - pred_masks = torch.nn.functional.interpolate( - pred_masks.unsqueeze(0).cpu(), size=size, mode="nearest" - )[0] + pred_masks = torch.nn.functional.interpolate(pred_masks.unsqueeze(0).cpu(), size=size, mode="nearest")[ + 0 + ] instance_maps, segments = [], [] current_segment_id = 0 diff --git a/src/transformers/models/yolov6/modeling_yolov6.py b/src/transformers/models/yolov6/modeling_yolov6.py index da1b36483b13c7..475de9356466ca 100755 --- a/src/transformers/models/yolov6/modeling_yolov6.py +++ b/src/transformers/models/yolov6/modeling_yolov6.py @@ -1549,7 +1549,6 @@ def df_loss(pred_dist, target, reg_max): return (loss_left + loss_right).mean(-1, keepdim=True) -# Copied from transformers.models.detr.modeling_detr.DetrLoss with Detr->Yolos class Yolov6Loss(nn.Module): """ This class computes the losses for Yolov6ForObjectDetection. The process happens in two steps: 1) From ce260aebe1d0c563e457cf8febb323e42277b151 Mon Sep 17 00:00:00 2001 From: sangbumchoi Date: Thu, 4 Apr 2024 06:16:45 +0000 Subject: [PATCH 2/8] change minor configuration --- src/transformers/models/yolov6/configuration_yolov6.py | 2 +- src/transformers/models/yolov6/convert_yolov6_to_pytorch.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/transformers/models/yolov6/configuration_yolov6.py b/src/transformers/models/yolov6/configuration_yolov6.py index 20b7fb5cd9f5f9..d5eb6c45fc9fd8 100644 --- a/src/transformers/models/yolov6/configuration_yolov6.py +++ b/src/transformers/models/yolov6/configuration_yolov6.py @@ -132,7 +132,7 @@ def __init__( reg_max_proj=16, class_loss_coefficient=1.0, iou_loss_coefficient=2.5, - dfl_loss_coefficient=1.0, + dfl_loss_coefficient=0.5, initializer_range=0.02, forward_fuse=False, export=False, diff --git a/src/transformers/models/yolov6/convert_yolov6_to_pytorch.py b/src/transformers/models/yolov6/convert_yolov6_to_pytorch.py index fd31b35e8ed583..bd9f19cacca8ea 100644 --- a/src/transformers/models/yolov6/convert_yolov6_to_pytorch.py +++ b/src/transformers/models/yolov6/convert_yolov6_to_pytorch.py @@ -64,7 +64,7 @@ def get_yolov6_config(yolov6_name: str) -> Yolov6Config: config.head_anchors = 1 config.head_strides = [8, 16, 32, 64] config.iou_type = "giou" - config.atss_warmup_epoch = 0 + config.atss_warmup_epoch = 4 config.use_dfl = True config.reg_max = 16 From d58f0818e3b01528e0be015107a5b726d2a3a6e1 Mon Sep 17 00:00:00 2001 From: sangbumchoi Date: Thu, 4 Apr 2024 06:16:59 +0000 Subject: [PATCH 3/8] set TaskedAlignAssinger for when warmup_epoch is 0 (which is yolov6n) --- .../models/yolov6/modeling_yolov6.py | 27 +++++++++++-------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/src/transformers/models/yolov6/modeling_yolov6.py b/src/transformers/models/yolov6/modeling_yolov6.py index 475de9356466ca..f7c15aa041071c 100755 --- a/src/transformers/models/yolov6/modeling_yolov6.py +++ b/src/transformers/models/yolov6/modeling_yolov6.py @@ -1081,12 +1081,9 @@ class Yolov6PreTrainedModel(PreTrainedModel): def _init_weights(self, module: Union[nn.Linear, nn.Conv2d, nn.LayerNorm]) -> None: """Initialize the weights""" - if isinstance(module, (nn.Linear, nn.Conv2d)): - # Slightly different from the TF version which uses truncated_normal for initialization - # cf https://github.com/pytorch/pytorch/pull/5617 - module.weight.data.normal_(mean=0.0, std=self.config.initializer_range) - if module.bias is not None: - module.bias.data.zero_() + if isinstance(module, nn.BatchNorm2d): + module.eps = 1e-3 + module.momentum = 3e-2 elif isinstance(module, nn.LayerNorm): module.bias.data.zero_() module.weight.data.fill_(1.0) @@ -1564,14 +1561,22 @@ class Yolov6Loss(nn.Module): Args: - matcher (`YolosHungarianMatcher`): - Module able to compute a matching between targets and proposals. num_classes (`int`): Number of object categories, omitting the special no-object category. - eos_coef (`float`): - Relative classification weight applied to the no-object category. + warmup_epoch (`int`): + Warming up epoch to use either ATSSAssigner, or TaskAlignedAssigner. However, it is not used due to incompatibility. + use_dfl (`bool`): + Whether to use dfl_loss. + iou_type (`str`): + Different types of iou such as giou, ciou, diou. + fpn_strides (`List[int]`): + List of int to generate strides in feature pyramid network. + reg_max (`int`): + Max number of regression. losses (`List[str]`): List of all the losses to be applied. See `get_loss` for a list of all available losses. + training (`bool`): + Wheter it is training or inference stage. """ def __init__( @@ -1782,7 +1787,7 @@ def forward(self, outputs, targets): outputs["anchor_points_s"] = anchor_points_s pred_bboxes = self.bbox_decode(anchor_points_s, pred_distri) # xyxy outputs["pred_bboxes"] = pred_bboxes - if not self.training: + if not self.training or self.warmup_epoch == 0: target_labels, target_bboxes, target_scores, fg_mask = self.formal_assigner( pred_scores.detach(), pred_bboxes.detach() * stride_tensor, From 68a9803d9bc90aa9f8b713eee8846ee037bba4ce Mon Sep 17 00:00:00 2001 From: sangbumchoi Date: Fri, 5 Apr 2024 02:46:20 +0000 Subject: [PATCH 4/8] minor update with assigner --- .../models/yolov6/modeling_yolov6.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/src/transformers/models/yolov6/modeling_yolov6.py b/src/transformers/models/yolov6/modeling_yolov6.py index f7c15aa041071c..9bd05de7d6d568 100755 --- a/src/transformers/models/yolov6/modeling_yolov6.py +++ b/src/transformers/models/yolov6/modeling_yolov6.py @@ -27,7 +27,7 @@ from torch import Tensor, nn from ...activations import ACT2FN -from ...modeling_outputs import BackboneOutput, BaseModelOutputWithNoAttention +from ...modeling_outputs import BaseModelOutputWithNoAttention from ...modeling_utils import PreTrainedModel from ...utils import ( ModelOutput, @@ -65,11 +65,6 @@ ] -@dataclass -class Yolov6ModelOutput(BackboneOutput): - loss: Optional[torch.FloatTensor] = None - - @dataclass class Yolov6ObjectDetectionOutput(ModelOutput): """ @@ -89,10 +84,6 @@ class Yolov6ObjectDetectionOutput(ModelOutput): values are normalized in [0, 1], relative to the size of each individual image in the batch (disregarding possible padding). You can use [`~YolosImageProcessor.post_process`] to retrieve the unnormalized bounding boxes. - auxiliary_outputs (`list[Dict]`, *optional*): - Optional, only returned when auxilary losses are activated (i.e. `config.auxiliary_loss` is set to `True`) - and labels are provided. It is a list of dictionaries containing the two above keys (`logits` and - `pred_boxes`) for each decoder layer. last_hidden_state (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*): Sequence of hidden-states at the output of the last layer of the decoder of the model. hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`): @@ -1787,7 +1778,8 @@ def forward(self, outputs, targets): outputs["anchor_points_s"] = anchor_points_s pred_bboxes = self.bbox_decode(anchor_points_s, pred_distri) # xyxy outputs["pred_bboxes"] = pred_bboxes - if not self.training or self.warmup_epoch == 0: + # original implementation of assigner is using warmup_epoch + try: target_labels, target_bboxes, target_scores, fg_mask = self.formal_assigner( pred_scores.detach(), pred_bboxes.detach() * stride_tensor, @@ -1796,7 +1788,7 @@ def forward(self, outputs, targets): gt_bboxes, mask_gt, ) - else: + except: target_labels, target_bboxes, target_scores, fg_mask = self.warmup_assigner( anchors, n_anchors_list, From 97f1f94d4f25bb5d66466fe01fc8c3ae2550b8a6 Mon Sep 17 00:00:00 2001 From: sangbumchoi Date: Fri, 5 Apr 2024 02:47:53 +0000 Subject: [PATCH 5/8] make style --- src/transformers/models/yolov6/modeling_yolov6.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/models/yolov6/modeling_yolov6.py b/src/transformers/models/yolov6/modeling_yolov6.py index 9bd05de7d6d568..5d033483819962 100755 --- a/src/transformers/models/yolov6/modeling_yolov6.py +++ b/src/transformers/models/yolov6/modeling_yolov6.py @@ -1788,7 +1788,7 @@ def forward(self, outputs, targets): gt_bboxes, mask_gt, ) - except: + except BaseException: target_labels, target_bboxes, target_scores, fg_mask = self.warmup_assigner( anchors, n_anchors_list, From e8447a143219fb17d08ed054c07a88a787969a8c Mon Sep 17 00:00:00 2001 From: sangbumchoi Date: Thu, 11 Apr 2024 05:46:06 +0000 Subject: [PATCH 6/8] roll back to use only warmup assigner --- .../models/yolov6/modeling_yolov6.py | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/src/transformers/models/yolov6/modeling_yolov6.py b/src/transformers/models/yolov6/modeling_yolov6.py index 5d033483819962..89a5bce39b6453 100755 --- a/src/transformers/models/yolov6/modeling_yolov6.py +++ b/src/transformers/models/yolov6/modeling_yolov6.py @@ -1779,24 +1779,24 @@ def forward(self, outputs, targets): pred_bboxes = self.bbox_decode(anchor_points_s, pred_distri) # xyxy outputs["pred_bboxes"] = pred_bboxes # original implementation of assigner is using warmup_epoch - try: - target_labels, target_bboxes, target_scores, fg_mask = self.formal_assigner( - pred_scores.detach(), - pred_bboxes.detach() * stride_tensor, - anchor_points, - gt_labels, - gt_bboxes, - mask_gt, - ) - except BaseException: - target_labels, target_bboxes, target_scores, fg_mask = self.warmup_assigner( - anchors, - n_anchors_list, - gt_labels, - gt_bboxes, - mask_gt, - pred_bboxes.detach() * stride_tensor, - ) + # try: + # target_labels, target_bboxes, target_scores, fg_mask = self.formal_assigner( + # pred_scores.detach(), + # pred_bboxes.detach() * stride_tensor, + # anchor_points, + # gt_labels, + # gt_bboxes, + # mask_gt, + # ) + # except BaseException: + target_labels, target_bboxes, target_scores, fg_mask = self.warmup_assigner( + anchors, + n_anchors_list, + gt_labels, + gt_bboxes, + mask_gt, + pred_bboxes.detach() * stride_tensor, + ) # rescale bbox target_bboxes /= stride_tensor From 6dc62be890e38b1d4005afe92797079817d55c0d Mon Sep 17 00:00:00 2001 From: sangbumchoi Date: Thu, 11 Apr 2024 06:25:25 +0000 Subject: [PATCH 7/8] fix error not working of 'anchors' in evaluation --- .../models/yolov6/modeling_yolov6.py | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/src/transformers/models/yolov6/modeling_yolov6.py b/src/transformers/models/yolov6/modeling_yolov6.py index 89a5bce39b6453..794c0ea707c787 100755 --- a/src/transformers/models/yolov6/modeling_yolov6.py +++ b/src/transformers/models/yolov6/modeling_yolov6.py @@ -1779,24 +1779,24 @@ def forward(self, outputs, targets): pred_bboxes = self.bbox_decode(anchor_points_s, pred_distri) # xyxy outputs["pred_bboxes"] = pred_bboxes # original implementation of assigner is using warmup_epoch - # try: - # target_labels, target_bboxes, target_scores, fg_mask = self.formal_assigner( - # pred_scores.detach(), - # pred_bboxes.detach() * stride_tensor, - # anchor_points, - # gt_labels, - # gt_bboxes, - # mask_gt, - # ) - # except BaseException: - target_labels, target_bboxes, target_scores, fg_mask = self.warmup_assigner( - anchors, - n_anchors_list, - gt_labels, - gt_bboxes, - mask_gt, - pred_bboxes.detach() * stride_tensor, - ) + if not self.training: + target_labels, target_bboxes, target_scores, fg_mask = self.formal_assigner( + pred_scores.detach(), + pred_bboxes.detach() * stride_tensor, + anchor_points, + gt_labels, + gt_bboxes, + mask_gt, + ) + else: + target_labels, target_bboxes, target_scores, fg_mask = self.warmup_assigner( + anchors, + n_anchors_list, + gt_labels, + gt_bboxes, + mask_gt, + pred_bboxes.detach() * stride_tensor, + ) # rescale bbox target_bboxes /= stride_tensor From 110eb5c27fb637469af0350ceb8fd8727859afd1 Mon Sep 17 00:00:00 2001 From: sangbumchoi Date: Mon, 15 Apr 2024 03:06:20 +0000 Subject: [PATCH 8/8] roll-back the initialization method --- src/transformers/models/yolov6/modeling_yolov6.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/transformers/models/yolov6/modeling_yolov6.py b/src/transformers/models/yolov6/modeling_yolov6.py index 794c0ea707c787..9ff3c05566279e 100755 --- a/src/transformers/models/yolov6/modeling_yolov6.py +++ b/src/transformers/models/yolov6/modeling_yolov6.py @@ -1072,9 +1072,12 @@ class Yolov6PreTrainedModel(PreTrainedModel): def _init_weights(self, module: Union[nn.Linear, nn.Conv2d, nn.LayerNorm]) -> None: """Initialize the weights""" - if isinstance(module, nn.BatchNorm2d): - module.eps = 1e-3 - module.momentum = 3e-2 + if isinstance(module, (nn.Linear, nn.Conv2d)): + # Slightly different from the TF version which uses truncated_normal for initialization + # cf https://github.com/pytorch/pytorch/pull/5617 + module.weight.data.normal_(mean=0.0, std=self.config.initializer_range) + if module.bias is not None: + module.bias.data.zero_() elif isinstance(module, nn.LayerNorm): module.bias.data.zero_() module.weight.data.fill_(1.0)