Merge pull request huggingface#8 from Superb-AI-Suite/develop

배포
SangbumChoi · Apr 15, 2024 · d3fa1d9 · d3fa1d9
2 parents 3f13ad3 + 110eb5c
commit d3fa1d9
Show file tree

Hide file tree

Showing 6 changed files with 24 additions and 28 deletions.
diff --git a/src/transformers/models/deta/modeling_deta.py b/src/transformers/models/deta/modeling_deta.py
@@ -46,7 +46,6 @@
 from ...utils import is_accelerate_available, is_ninja_available, is_torchvision_available, logging, requires_backends
 from ...utils.backbone_utils import load_backbone
 from .configuration_deta import DetaConfig
-from .load_custom import load_cuda_kernels
 
 
 logger = logging.get_logger(__name__)

diff --git a/src/transformers/models/detr/image_processing_detr.py b/src/transformers/models/detr/image_processing_detr.py
@@ -1847,9 +1847,9 @@ def post_process_instance_segmentation(
             raise ValueError("return_coco_annotation and return_binary_maps can not be both set to True.")
 
         # [batch_size, num_queries, num_classes+1]
-        class_queries_logits = outputs.logits  
-        # [batch_size, num_queries, height, width]        
-        masks_queries_logits = outputs.pred_masks  
+        class_queries_logits = outputs.logits
+        # [batch_size, num_queries, height, width]
+        masks_queries_logits = outputs.pred_masks
 
         device = masks_queries_logits.device
         num_classes = class_queries_logits.shape[-1] - 1
@@ -1887,9 +1887,7 @@ def post_process_instance_segmentation(
             if target_sizes is not None:
                 size = target_sizes[i] if isinstance(target_sizes[i], tuple) else target_sizes[i].cpu().tolist()
                 segmentation = torch.zeros(size) - 1
-                pred_masks = torch.nn.functional.interpolate(
-                    pred_masks.unsqueeze(0), size=size, mode="nearest"
-                )[0]
+                pred_masks = torch.nn.functional.interpolate(pred_masks.unsqueeze(0), size=size, mode="nearest")[0]
 
             instance_maps, segments = [], []
             current_segment_id = 0

diff --git a/src/transformers/models/mask2former/image_processing_mask2former.py b/src/transformers/models/mask2former/image_processing_mask2former.py
@@ -1129,9 +1129,9 @@ def post_process_instance_segmentation(
             if target_sizes is not None:
                 size = target_sizes[i] if isinstance(target_sizes[i], tuple) else target_sizes[i].cpu().tolist()
                 segmentation = torch.zeros(size) - 1
-                pred_masks = torch.nn.functional.interpolate(
-                    pred_masks.unsqueeze(0).cpu(), size=size, mode="nearest"
-                )[0]
+                pred_masks = torch.nn.functional.interpolate(pred_masks.unsqueeze(0).cpu(), size=size, mode="nearest")[
+                    0
+                ]
 
             instance_maps, segments = [], []
             current_segment_id = 0

diff --git a/src/transformers/models/yolov6/configuration_yolov6.py b/src/transformers/models/yolov6/configuration_yolov6.py
@@ -132,7 +132,7 @@ def __init__(
         reg_max_proj=16,
         class_loss_coefficient=1.0,
         iou_loss_coefficient=2.5,
-        dfl_loss_coefficient=1.0,
+        dfl_loss_coefficient=0.5,
         initializer_range=0.02,
         forward_fuse=False,
         export=False,

diff --git a/src/transformers/models/yolov6/convert_yolov6_to_pytorch.py b/src/transformers/models/yolov6/convert_yolov6_to_pytorch.py
@@ -64,7 +64,7 @@ def get_yolov6_config(yolov6_name: str) -> Yolov6Config:
         config.head_anchors = 1
         config.head_strides = [8, 16, 32, 64]
         config.iou_type = "giou"
-        config.atss_warmup_epoch = 0
+        config.atss_warmup_epoch = 4
         config.use_dfl = True
         config.reg_max = 16
 

diff --git a/src/transformers/models/yolov6/modeling_yolov6.py b/src/transformers/models/yolov6/modeling_yolov6.py
@@ -27,7 +27,7 @@
 from torch import Tensor, nn
 
 from ...activations import ACT2FN
-from ...modeling_outputs import BackboneOutput, BaseModelOutputWithNoAttention
+from ...modeling_outputs import BaseModelOutputWithNoAttention
 from ...modeling_utils import PreTrainedModel
 from ...utils import (
     ModelOutput,
@@ -65,11 +65,6 @@
 ]
 
 
-@dataclass
-class Yolov6ModelOutput(BackboneOutput):
-    loss: Optional[torch.FloatTensor] = None
-
-
 @dataclass
 class Yolov6ObjectDetectionOutput(ModelOutput):
     """
@@ -89,10 +84,6 @@ class Yolov6ObjectDetectionOutput(ModelOutput):
             values are normalized in [0, 1], relative to the size of each individual image in the batch (disregarding
             possible padding). You can use [`~YolosImageProcessor.post_process`] to retrieve the unnormalized bounding
             boxes.
-        auxiliary_outputs (`list[Dict]`, *optional*):
-            Optional, only returned when auxilary losses are activated (i.e. `config.auxiliary_loss` is set to `True`)
-            and labels are provided. It is a list of dictionaries containing the two above keys (`logits` and
-            `pred_boxes`) for each decoder layer.
         last_hidden_state (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
             Sequence of hidden-states at the output of the last layer of the decoder of the model.
         hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
@@ -1086,7 +1077,7 @@ def _init_weights(self, module: Union[nn.Linear, nn.Conv2d, nn.LayerNorm]) -> No
             # cf https://github.com/pytorch/pytorch/pull/5617
             module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
             if module.bias is not None:
-                module.bias.data.zero_()
+                module.bias.data.zero_()        
         elif isinstance(module, nn.LayerNorm):
             module.bias.data.zero_()
             module.weight.data.fill_(1.0)
@@ -1549,7 +1540,6 @@ def df_loss(pred_dist, target, reg_max):
     return (loss_left + loss_right).mean(-1, keepdim=True)
 
 
-# Copied from transformers.models.detr.modeling_detr.DetrLoss with Detr->Yolos
 class Yolov6Loss(nn.Module):
     """
     This class computes the losses for Yolov6ForObjectDetection. The process happens in two steps: 1)
@@ -1565,14 +1555,22 @@ class Yolov6Loss(nn.Module):
 
 
     Args:
-        matcher (`YolosHungarianMatcher`):
-            Module able to compute a matching between targets and proposals.
         num_classes (`int`):
             Number of object categories, omitting the special no-object category.
-        eos_coef (`float`):
-            Relative classification weight applied to the no-object category.
+        warmup_epoch (`int`):
+            Warming up epoch to use either ATSSAssigner, or TaskAlignedAssigner. However, it is not used due to incompatibility.
+        use_dfl (`bool`):
+            Whether to use dfl_loss.
+        iou_type (`str`):
+            Different types of iou such as giou, ciou, diou.
+        fpn_strides (`List[int]`):
+            List of int to generate strides in feature pyramid network.
+        reg_max (`int`):
+            Max number of regression.
         losses (`List[str]`):
             List of all the losses to be applied. See `get_loss` for a list of all available losses.
+        training (`bool`):
+            Wheter it is training or inference stage.
     """
 
     def __init__(
@@ -1783,6 +1781,7 @@ def forward(self, outputs, targets):
         outputs["anchor_points_s"] = anchor_points_s
         pred_bboxes = self.bbox_decode(anchor_points_s, pred_distri)  # xyxy
         outputs["pred_bboxes"] = pred_bboxes
+        # original implementation of assigner is using warmup_epoch
         if not self.training:
             target_labels, target_bboxes, target_scores, fg_mask = self.formal_assigner(
                 pred_scores.detach(),