diff --git a/src/super_gradients/training/utils/detection_utils.py b/src/super_gradients/training/utils/detection_utils.py
index ac2a757c7b..06eb758d3e 100755
--- a/src/super_gradients/training/utils/detection_utils.py
+++ b/src/super_gradients/training/utils/detection_utils.py
@@ -25,6 +25,7 @@ class DetectionTargetsFormat(Enum):
     For example:
     LABEL_NORMALIZED_XYXY means [class_idx,x1,y1,x2,y2]
     """
+
     LABEL_XYXY = "LABEL_XYXY"
     XYXY_LABEL = "XYXY_LABEL"
     LABEL_NORMALIZED_XYXY = "LABEL_NORMALIZED_XYXY"
@@ -88,8 +89,7 @@ def _iou(CIoU: bool, DIoU: bool, GIoU: bool, b1_x1, b1_x2, b1_y1, b1_y2, b2_x1,
     DO NOT CALL THIS FUNCTIONS DIRECTLY - use one of the functions mentioned above
     """
     # Intersection area
-    intersection_area = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
-                        (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
+    intersection_area = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
     # Union Area
     w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1
     w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1
@@ -105,16 +105,16 @@ def _iou(CIoU: bool, DIoU: bool, GIoU: bool, b1_x1, b1_x2, b1_y1, b1_y2, b2_x1,
         # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1
         if DIoU or CIoU:
             # convex diagonal squared
-            c2 = cw ** 2 + ch ** 2 + eps
+            c2 = cw**2 + ch**2 + eps
             # centerpoint distance squared
             rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 + (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4
             if DIoU:
                 iou -= rho2 / c2  # DIoU
             elif CIoU:  # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
-                v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2)
+                v = (4 / math.pi**2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2)
                 with torch.no_grad():
                     alpha = v / ((1 + eps) - iou + v)
-                iou -= (rho2 / c2 + v * alpha)  # CIoU
+                iou -= rho2 / c2 + v * alpha  # CIoU
     return iou
 
 
@@ -159,8 +159,7 @@ def calc_bbox_iou_matrix(pred: torch.Tensor):
     b2_x2 = b1_x2.transpose(2, 1)
     b2_y1 = b1_y1.transpose(2, 1)
     b2_y2 = b1_y2.transpose(2, 1)
-    intersection_area = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
-                        (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
+    intersection_area = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
     # Union Area
     w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1
     w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1
@@ -177,7 +176,6 @@ def change_bbox_bounds_for_image_size(boxes, img_shape):
 
 
 class DetectionPostPredictionCallback(ABC, nn.Module):
-
     def __init__(self) -> None:
         super().__init__()
 
@@ -234,8 +232,7 @@ def box_area(box):
     return inter / (area1[:, None] + area2 - inter)  # iou = inter / (area1 + area2 - inter)
 
 
-def non_max_suppression(prediction, conf_thres=0.1, iou_thres=0.6,
-                        multi_label_per_box: bool = True, with_confidence: bool = False):
+def non_max_suppression(prediction, conf_thres=0.1, iou_thres=0.6, multi_label_per_box: bool = True, with_confidence: bool = False):
     """
     Performs Non-Maximum Suppression (NMS) on inference results
         :param prediction: raw model prediction
@@ -285,8 +282,7 @@ def non_max_suppression(prediction, conf_thres=0.1, iou_thres=0.6,
     return output
 
 
-def matrix_non_max_suppression(pred, conf_thres: float = 0.1, kernel: str = 'gaussian',
-                               sigma: float = 3.0, max_num_of_detections: int = 500):
+def matrix_non_max_suppression(pred, conf_thres: float = 0.1, kernel: str = "gaussian", sigma: float = 3.0, max_num_of_detections: int = 500):
     """Performs Matrix Non-Maximum Suppression (NMS) on inference results
         https://arxiv.org/pdf/1912.04488.pdf
         :param pred: raw model prediction (in test mode) - a Tensor of shape [batch, num_predictions, 85]
@@ -326,9 +322,9 @@ def matrix_non_max_suppression(pred, conf_thres: float = 0.1, kernel: str = 'gau
     ious_cmax, _ = ious.max(1)
     ious_cmax = ious_cmax.unsqueeze(2).repeat(1, 1, max_num_of_detections)
 
-    if kernel == 'gaussian':
-        decay_matrix = torch.exp(-1 * sigma * (ious ** 2))
-        compensate_matrix = torch.exp(-1 * sigma * (ious_cmax ** 2))
+    if kernel == "gaussian":
+        decay_matrix = torch.exp(-1 * sigma * (ious**2))
+        compensate_matrix = torch.exp(-1 * sigma * (ious_cmax**2))
         decay, _ = (decay_matrix / compensate_matrix).min(dim=1)
     else:
         decay = (1 - ious) / (1 - ious_cmax)
@@ -345,8 +341,9 @@ class NMS_Type(str, Enum):
     """
     Type of non max suppression algorithm that can be used for post processing detection
     """
-    ITERATIVE = 'iterative'
-    MATRIX = 'matrix'
+
+    ITERATIVE = "iterative"
+    MATRIX = "matrix"
 
 
 def undo_image_preprocessing(im_tensor: torch.Tensor) -> np.ndarray:
@@ -356,7 +353,7 @@ def undo_image_preprocessing(im_tensor: torch.Tensor) -> np.ndarray:
     """
     im_np = im_tensor.cpu().numpy()
     im_np = im_np[:, ::-1, :, :].transpose(0, 2, 3, 1)
-    im_np *= 255.
+    im_np *= 255.0
     return np.ascontiguousarray(im_np, dtype=np.uint8)
 
 
@@ -366,14 +363,24 @@ def _generate_color_mapping(num_classes: int) -> List[Tuple[int]]:
         """
         Generate a unique BGR color for each class
         """
-        cmap = plt.cm.get_cmap('gist_rainbow', num_classes)
+        cmap = plt.cm.get_cmap("gist_rainbow", num_classes)
         colors = [cmap(i, bytes=True)[:3][::-1] for i in range(num_classes)]
         return [tuple(int(v) for v in c) for c in colors]
 
     @staticmethod
-    def _draw_box_title(color_mapping: List[Tuple[int]], class_names: List[str], box_thickness: int,
-                        image_np: np.ndarray, x1: int, y1: int, x2: int, y2: int, class_id: int,
-                        pred_conf: float = None, is_target: bool = False):
+    def _draw_box_title(
+        color_mapping: List[Tuple[int]],
+        class_names: List[str],
+        box_thickness: int,
+        image_np: np.ndarray,
+        x1: int,
+        y1: int,
+        x2: int,
+        y2: int,
+        class_id: int,
+        pred_conf: float = None,
+        is_target: bool = False,
+    ):
         color = color_mapping[class_id]
         class_name = class_names[class_id]
 
@@ -384,35 +391,43 @@ def _draw_box_title(color_mapping: List[Tuple[int]], class_names: List[str], box
         text_color = (255, 255, 255)  # white
 
         if is_target:
-            title = f'[GT] {class_name}'
+            title = f"[GT] {class_name}"
         if not is_target:
             title = f'[Pred] {class_name}  {str(round(pred_conf, 2)) if pred_conf is not None else ""}'
 
         image_np = cv2.rectangle(image_np, (x1, y1 - 15), (x1 + len(title) * 10, y1), color, cv2.FILLED)
-        image_np = cv2.putText(image_np, title, (x1, y1 - box_thickness), 2, .5, text_color, 1, lineType=cv2.LINE_AA)
+        image_np = cv2.putText(image_np, title, (x1, y1 - box_thickness), 2, 0.5, text_color, 1, lineType=cv2.LINE_AA)
 
         return image_np
 
     @staticmethod
-    def _visualize_image(image_np: np.ndarray, pred_boxes: np.ndarray, target_boxes: np.ndarray,
-                         class_names: List[str], box_thickness: int, gt_alpha: float, image_scale: float,
-                         checkpoint_dir: str, image_name: str):
+    def _visualize_image(
+        image_np: np.ndarray,
+        pred_boxes: np.ndarray,
+        target_boxes: np.ndarray,
+        class_names: List[str],
+        box_thickness: int,
+        gt_alpha: float,
+        image_scale: float,
+        checkpoint_dir: str,
+        image_name: str,
+    ):
         image_np = cv2.resize(image_np, (0, 0), fx=image_scale, fy=image_scale, interpolation=cv2.INTER_NEAREST)
         color_mapping = DetectionVisualization._generate_color_mapping(len(class_names))
 
         # Draw predictions
         pred_boxes[:, :4] *= image_scale
         for box in pred_boxes:
-            image_np = DetectionVisualization._draw_box_title(color_mapping, class_names, box_thickness,
-                                                              image_np, *box[:4].astype(int),
-                                                              class_id=int(box[5]), pred_conf=box[4])
+            image_np = DetectionVisualization._draw_box_title(
+                color_mapping, class_names, box_thickness, image_np, *box[:4].astype(int), class_id=int(box[5]), pred_conf=box[4]
+            )
 
         # Draw ground truths
         target_boxes_image = np.zeros_like(image_np, np.uint8)
         for box in target_boxes:
-            target_boxes_image = DetectionVisualization._draw_box_title(color_mapping, class_names, box_thickness,
-                                                                        target_boxes_image, *box[2:],
-                                                                        class_id=box[1], is_target=True)
+            target_boxes_image = DetectionVisualization._draw_box_title(
+                color_mapping, class_names, box_thickness, target_boxes_image, *box[2:], class_id=box[1], is_target=True
+            )
 
         # Transparent overlay of ground truth boxes
         mask = target_boxes_image.astype(bool)
@@ -422,7 +437,7 @@ def _visualize_image(image_np: np.ndarray, pred_boxes: np.ndarray, target_boxes:
             return image_np
         else:
             pathlib.Path(checkpoint_dir).mkdir(parents=True, exist_ok=True)
-            cv2.imwrite(os.path.join(checkpoint_dir, str(image_name) + '.jpg'), image_np)
+            cv2.imwrite(os.path.join(checkpoint_dir, str(image_name) + ".jpg"), image_np)
 
     @staticmethod
     def _scaled_ccwh_to_xyxy(target_boxes: np.ndarray, h: int, w: int, image_scale: float) -> np.ndarray:
@@ -450,10 +465,18 @@ def _scaled_ccwh_to_xyxy(target_boxes: np.ndarray, h: int, w: int, image_scale:
         return target_boxes
 
     @staticmethod
-    def visualize_batch(image_tensor: torch.Tensor, pred_boxes: List[torch.Tensor], target_boxes: torch.Tensor,
-                        batch_name: Union[int, str], class_names: List[str], checkpoint_dir: str = None,
-                        undo_preprocessing_func: Callable[[torch.Tensor], np.ndarray] = undo_image_preprocessing,
-                        box_thickness: int = 2, image_scale: float = 1., gt_alpha: float = .4):
+    def visualize_batch(
+        image_tensor: torch.Tensor,
+        pred_boxes: List[torch.Tensor],
+        target_boxes: torch.Tensor,
+        batch_name: Union[int, str],
+        class_names: List[str],
+        checkpoint_dir: str = None,
+        undo_preprocessing_func: Callable[[torch.Tensor], np.ndarray] = undo_image_preprocessing,
+        box_thickness: int = 2,
+        image_scale: float = 1.0,
+        gt_alpha: float = 0.4,
+    ):
         """
         A helper function to visualize detections predicted by a network:
         saves images into a given path with a name that is {batch_name}_{imade_idx_in_the_batch}.jpg, one batch per call.
@@ -483,17 +506,17 @@ def visualize_batch(image_tensor: torch.Tensor, pred_boxes: List[torch.Tensor],
                                         0 for invisible, 1 for fully opaque
         """
         image_np = undo_preprocessing_func(image_tensor.detach())
-        targets = DetectionVisualization._scaled_ccwh_to_xyxy(target_boxes.detach().cpu().numpy(), *image_np.shape[1:3],
-                                                              image_scale)
+        targets = DetectionVisualization._scaled_ccwh_to_xyxy(target_boxes.detach().cpu().numpy(), *image_np.shape[1:3], image_scale)
 
         out_images = []
         for i in range(image_np.shape[0]):
             preds = pred_boxes[i].detach().cpu().numpy() if pred_boxes[i] is not None else np.empty((0, 6))
             targets_cur = targets[targets[:, 0] == i]
 
-            image_name = '_'.join([str(batch_name), str(i)])
-            res_image = DetectionVisualization._visualize_image(image_np[i], preds, targets_cur, class_names, box_thickness, gt_alpha, image_scale,
-                                                                checkpoint_dir, image_name)
+            image_name = "_".join([str(batch_name), str(i)])
+            res_image = DetectionVisualization._visualize_image(
+                image_np[i], preds, targets_cur, class_names, box_thickness, gt_alpha, image_scale, checkpoint_dir, image_name
+            )
             if res_image is not None:
                 out_images.append(res_image)
 
@@ -531,14 +554,14 @@ def __init__(self, anchors_list: List[List], strides: List[int]):
     def _check_all_lists(anchors: list) -> bool:
         for a in anchors:
             if not isinstance(a, (list, ListConfig)):
-                raise RuntimeError('All objects of anchors_list must be lists')
+                raise RuntimeError("All objects of anchors_list must be lists")
 
     @staticmethod
     def _check_all_len_equal_and_even(anchors: list) -> bool:
         len_of_first = len(anchors[0])
         for a in anchors:
             if len(a) % 2 == 1 or len(a) != len_of_first:
-                raise RuntimeError('All objects of anchors_list must be of the same even length')
+                raise RuntimeError("All objects of anchors_list must be of the same even length")
 
     @property
     def stride(self) -> nn.Parameter:
@@ -644,6 +667,7 @@ class DetectionCollateFN:
     """
     Collate function for Yolox training
     """
+
     def __call__(self, data) -> Tuple[torch.Tensor, torch.Tensor]:
         batch = default_collate(data)
         ims, targets = batch[0:2]
@@ -653,7 +677,7 @@ def _format_targets(self, targets: torch.Tensor) -> torch.Tensor:
         nlabel = (targets.sum(dim=2) > 0).sum(dim=1)  # number of label per image
         targets_merged = []
         for i in range(targets.shape[0]):
-            targets_im = targets[i, :nlabel[i]]
+            targets_im = targets[i, : nlabel[i]]
             batch_column = targets.new_ones((targets_im.shape[0], 1)) * i
             targets_merged.append(torch.cat((batch_column, targets_im), 1))
         return torch.cat(targets_merged, 0)
@@ -663,6 +687,7 @@ class CrowdDetectionCollateFN(DetectionCollateFN):
     """
     Collate function for Yolox training with additional_batch_items that includes crowd targets
     """
+
     def __call__(self, data) -> Tuple[torch.Tensor, torch.Tensor, Dict[str, torch.Tensor]]:
         batch = default_collate(data)
         ims, targets, crowd_targets = batch[0:3]
@@ -674,7 +699,7 @@ def compute_box_area(box: torch.Tensor) -> torch.Tensor:
          :param box: One or many boxes, shape = (4, ?), each box in format (x1, y1, x2, y2)
     Returns:
         Area of every box, shape = (1, ?)
-     """
+    """
     # box = 4xn
     return (box[2] - box[0]) * (box[3] - box[1])
 
@@ -693,8 +718,7 @@ def crowd_ioa(det_box: torch.Tensor, crowd_box: torch.Tensor) -> torch.Tensor:
     det_area = compute_box_area(det_box.T)
 
     # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
-    inter = (torch.min(det_box[:, None, 2:], crowd_box[:, 2:]) - torch.max(det_box[:, None, :2], crowd_box[:, :2])) \
-        .clamp(0).prod(2)
+    inter = (torch.min(det_box[:, None, 2:], crowd_box[:, 2:]) - torch.max(det_box[:, None, :2], crowd_box[:, :2])).clamp(0).prod(2)
     return inter / det_area[:, None]  # crowd_ioa = inter / det_area
 
 
@@ -758,7 +782,7 @@ def compute_detection_matching(
             device=device,
             iou_thresholds=iou_thresholds,
             top_k=top_k,
-            return_on_cpu=return_on_cpu
+            return_on_cpu=return_on_cpu,
         )
         batch_metrics.append(img_matching_tensors)
 
@@ -766,16 +790,16 @@ def compute_detection_matching(
 
 
 def compute_img_detection_matching(
-        preds: torch.Tensor,
-        targets: torch.Tensor,
-        crowd_targets: torch.Tensor,
-        height: int,
-        width: int,
-        iou_thresholds: torch.Tensor,
-        device: str,
-        denormalize_targets: bool,
-        top_k: int = 100,
-        return_on_cpu: bool = True
+    preds: torch.Tensor,
+    targets: torch.Tensor,
+    crowd_targets: torch.Tensor,
+    height: int,
+    width: int,
+    iou_thresholds: torch.Tensor,
+    device: str,
+    denormalize_targets: bool,
+    top_k: int = 100,
+    return_on_cpu: bool = True,
 ) -> Tuple:
     """
     Match predictions (NMS output) and the targets (ground truth) with respect to IoU and confidence score
@@ -851,7 +875,7 @@ def compute_img_detection_matching(
 
         # Fill IoU values at index (i, j) with 0 when the prediction (i) and target(j) are of different class
         # Filling with 0 is equivalent to ignore these values since with want IoU > iou_threshold > 0
-        cls_mismatch = (preds_cls[preds_idx_to_use].view(-1, 1) != targets_cls.view(1, -1))
+        cls_mismatch = preds_cls[preds_idx_to_use].view(-1, 1) != targets_cls.view(1, -1)
         iou[cls_mismatch] = 0
 
         # The matching priority is first detection confidence and then IoU value.
@@ -892,7 +916,7 @@ def compute_img_detection_matching(
 
         # Fill IoA values at index (i, j) with 0 when the prediction (i) and target(j) are of different class
         # Filling with 0 is equivalent to ignore these values since with want IoA > threshold > 0
-        cls_mismatch = (preds_cls[preds_idx_to_use].view(-1, 1) != crowd_targets_cls.view(1, -1))
+        cls_mismatch = preds_cls[preds_idx_to_use].view(-1, 1) != crowd_targets_cls.view(1, -1)
         ioa[cls_mismatch] = 0
 
         # For each prediction, we keep it's highest score with any crowd target (of same class)
@@ -901,7 +925,7 @@ def compute_img_detection_matching(
 
         # If a prediction has IoA higher than threshold (with any target of same class), then there is a match
         # shape = (n_preds_to_use x iou_thresholds)
-        is_matching_with_crowd = (best_ioa.view(-1, 1) > iou_thresholds.view(1, -1))
+        is_matching_with_crowd = best_ioa.view(-1, 1) > iou_thresholds.view(1, -1)
 
         preds_to_ignore[preds_idx_to_use] = torch.logical_or(preds_to_ignore[preds_idx_to_use], is_matching_with_crowd)
 
@@ -925,7 +949,7 @@ def get_top_k_idx_per_cls(preds_scores: torch.Tensor, preds_cls: torch.Tensor, t
     :return top_k_idx:     Indexes of the top k predictions. length <= (k * n_unique_class)
     """
     n_unique_cls = torch.max(preds_cls)
-    mask = (preds_cls.view(-1, 1) == torch.arange(n_unique_cls + 1, device=preds_scores.device).view(1, -1))
+    mask = preds_cls.view(-1, 1) == torch.arange(n_unique_cls + 1, device=preds_scores.device).view(1, -1)
     preds_scores_per_cls = preds_scores.view(-1, 1) * mask
 
     sorted_scores_per_cls, sorting_idx = preds_scores_per_cls.sort(0, descending=True)
@@ -984,7 +1008,7 @@ def compute_detection_metrics(
             n_targets=cls_targets_idx.sum(),
             recall_thresholds=recall_thresholds,
             score_threshold=score_threshold,
-            device=device
+            device=device,
         )
         ap[cls_i, :] = cls_ap
         precision[cls_i, :] = cls_precision
@@ -996,13 +1020,13 @@ def compute_detection_metrics(
 
 
 def compute_detection_metrics_per_cls(
-        preds_matched: torch.Tensor,
-        preds_to_ignore: torch.Tensor,
-        preds_scores: torch.Tensor,
-        n_targets: int,
-        recall_thresholds: torch.Tensor,
-        score_threshold: float,
-        device: str,
+    preds_matched: torch.Tensor,
+    preds_to_ignore: torch.Tensor,
+    preds_scores: torch.Tensor,
+    n_targets: int,
+    recall_thresholds: torch.Tensor,
+    score_threshold: float,
+    device: str,
 ):
     """
     Compute the list of precision, recall and MaP of a given class for every recall IoU threshold.
@@ -1035,7 +1059,7 @@ def compute_detection_metrics_per_cls(
     sort_ind = torch.argsort(preds_scores.to(dtype), descending=True)
     tps = tps[sort_ind, :]
     fps = fps[sort_ind, :]
-    preds_scores = preds_scores[sort_ind]
+    preds_scores = preds_scores[sort_ind].contiguous()
 
     # Rolling sum over the predictions
     rolling_tps = torch.cumsum(tps, axis=0, dtype=torch.float)
@@ -1070,7 +1094,7 @@ def compute_detection_metrics_per_cls(
     # We want the index i so that: rolling_recalls[i-1] < recall_thresholds[k] <= rolling_recalls[i]
     # Note:  when recall_thresholds[k] > max(rolling_recalls), i = len(rolling_recalls)
     # Note2: we work with transpose (.T) to apply torch.searchsorted on first dim instead of the last one
-    recall_threshold_idx = torch.searchsorted(rolling_recalls.T, recall_thresholds, right=False).T
+    recall_threshold_idx = torch.searchsorted(rolling_recalls.T.contiguous(), recall_thresholds, right=False).T
 
     # When recall_thresholds[k] > max(rolling_recalls), rolling_precisions[i] is not defined, and we want precision = 0
     rolling_precisions = torch.cat((rolling_precisions, torch.zeros(1, nb_iou_thrs, device=device)), dim=0)