diff --git a/src/otx/core/model/rotated_detection.py b/src/otx/core/model/rotated_detection.py index ff475e3b85b..7b44ca92d6f 100644 --- a/src/otx/core/model/rotated_detection.py +++ b/src/otx/core/model/rotated_detection.py @@ -9,11 +9,12 @@ from datumaro import Polygon from torchvision import tv_tensors +from otx.algo.instance_segmentation.maskrcnn import MaskRCNN, MaskRCNNEfficientNet, MaskRCNNResNet50 from otx.core.data.entity.instance_segmentation import InstanceSegBatchPredEntity -from otx.core.model.instance_segmentation import OTXInstanceSegModel, OVInstanceSegmentationModel +from otx.core.model.instance_segmentation import OVInstanceSegmentationModel -class OTXRotatedDetModel(OTXInstanceSegModel): +class RotatedMaskRCNNModel(MaskRCNN): """Base class for the rotated detection models used in OTX.""" def predict_step(self, *args: torch.Any, **kwargs: torch.Any) -> InstanceSegBatchPredEntity: @@ -93,6 +94,14 @@ def predict_step(self, *args: torch.Any, **kwargs: torch.Any) -> InstanceSegBatc ) +class RotatedMaskRCNNResNet50(RotatedMaskRCNNModel, MaskRCNNResNet50): + """Rotated MaskRCNN model with ResNet50 backbone.""" + + +class RotatedMaskRCNNEfficientNet(RotatedMaskRCNNModel, MaskRCNNEfficientNet): + """Rotated MaskRCNN model with EfficientNet backbone.""" + + class OVRotatedDetectionModel(OVInstanceSegmentationModel): """Rotated Detection model compatible for OpenVINO IR Inference. diff --git a/src/otx/recipe/_base_/data/rotated_detection.yaml b/src/otx/recipe/_base_/data/rotated_detection.yaml deleted file mode 100644 index 8ac4759ffc5..00000000000 --- a/src/otx/recipe/_base_/data/rotated_detection.yaml +++ /dev/null @@ -1,91 +0,0 @@ -task: ROTATED_DETECTION -input_size: - - 1024 - - 1024 -mem_cache_size: 1GB -mem_cache_img_max_size: null -image_color_channel: RGB -stack_images: true -data_format: coco_instances -include_polygons: true -unannotated_items_ratio: 0.0 -train_subset: - subset_name: train - transform_lib_type: TORCHVISION - to_tv_image: false - transforms: - - class_path: otx.core.data.transform_libs.torchvision.Resize - init_args: - keep_ratio: true - transform_bbox: true - transform_mask: true - scale: $(input_size) - - class_path: otx.core.data.transform_libs.torchvision.Pad - init_args: - size_divisor: 32 - transform_mask: true - - class_path: otx.core.data.transform_libs.torchvision.RandomFlip - init_args: - prob: 0.5 - is_numpy_to_tvtensor: true - - class_path: torchvision.transforms.v2.ToDtype - init_args: - dtype: ${as_torch_dtype:torch.float32} - - class_path: torchvision.transforms.v2.Normalize - init_args: - mean: [123.675, 116.28, 103.53] - std: [58.395, 57.12, 57.375] - batch_size: 1 - num_workers: 2 - sampler: - class_path: torch.utils.data.RandomSampler - -val_subset: - subset_name: val - transform_lib_type: TORCHVISION - to_tv_image: false - transforms: - - class_path: otx.core.data.transform_libs.torchvision.Resize - init_args: - keep_ratio: true - scale: $(input_size) - - class_path: otx.core.data.transform_libs.torchvision.Pad - init_args: - size_divisor: 32 - is_numpy_to_tvtensor: true - - class_path: torchvision.transforms.v2.ToDtype - init_args: - dtype: ${as_torch_dtype:torch.float32} - - class_path: torchvision.transforms.v2.Normalize - init_args: - mean: [123.675, 116.28, 103.53] - std: [58.395, 57.12, 57.375] - batch_size: 1 - num_workers: 2 - sampler: - class_path: torch.utils.data.RandomSampler - -test_subset: - subset_name: test - transform_lib_type: TORCHVISION - to_tv_image: false - transforms: - - class_path: otx.core.data.transform_libs.torchvision.Resize - init_args: - keep_ratio: true - scale: $(input_size) - - class_path: otx.core.data.transform_libs.torchvision.Pad - init_args: - size_divisor: 32 - is_numpy_to_tvtensor: true - - class_path: torchvision.transforms.v2.ToDtype - init_args: - dtype: ${as_torch_dtype:torch.float32} - - class_path: torchvision.transforms.v2.Normalize - init_args: - mean: [123.675, 116.28, 103.53] - std: [58.395, 57.12, 57.375] - batch_size: 1 - num_workers: 2 - sampler: - class_path: torch.utils.data.RandomSampler diff --git a/src/otx/recipe/rotated_detection/maskrcnn_efficientnetb2b.yaml b/src/otx/recipe/rotated_detection/maskrcnn_efficientnetb2b.yaml index 13abae757d5..d071a72ae30 100644 --- a/src/otx/recipe/rotated_detection/maskrcnn_efficientnetb2b.yaml +++ b/src/otx/recipe/rotated_detection/maskrcnn_efficientnetb2b.yaml @@ -1,5 +1,5 @@ model: - class_path: otx.algo.instance_segmentation.maskrcnn.MaskRCNNEfficientNet + class_path: otx.core.model.rotated_detection.RotatedMaskRCNNEfficientNet init_args: label_info: 80 @@ -28,13 +28,18 @@ engine: callback_monitor: val/map_50 -data: ../_base_/data/rotated_detection.yaml +data: ../_base_/data/instance_segmentation.yaml overrides: + task: ROTATED_DETECTION max_epochs: 100 data: train_subset: batch_size: 4 + num_workers: 8 transforms: + - class_path: otx.core.data.transform_libs.torchvision.Pad + init_args: + size_divisor: 32 - class_path: torchvision.transforms.v2.Normalize init_args: std: [1.0, 1.0, 1.0] @@ -42,15 +47,21 @@ overrides: class_path: otx.algo.samplers.balanced_sampler.BalancedSampler val_subset: - batch_size: 1 + num_workers: 4 transforms: + - class_path: otx.core.data.transform_libs.torchvision.Pad + init_args: + size_divisor: 32 - class_path: torchvision.transforms.v2.Normalize init_args: std: [1.0, 1.0, 1.0] test_subset: - batch_size: 1 + num_workers: 4 transforms: + - class_path: otx.core.data.transform_libs.torchvision.Pad + init_args: + size_divisor: 32 - class_path: torchvision.transforms.v2.Normalize init_args: std: [1.0, 1.0, 1.0] diff --git a/src/otx/recipe/rotated_detection/maskrcnn_efficientnetb2b_tile.yaml b/src/otx/recipe/rotated_detection/maskrcnn_efficientnetb2b_tile.yaml new file mode 100644 index 00000000000..358d538bfd9 --- /dev/null +++ b/src/otx/recipe/rotated_detection/maskrcnn_efficientnetb2b_tile.yaml @@ -0,0 +1,89 @@ +model: + class_path: otx.core.model.rotated_detection.RotatedMaskRCNNEfficientNet + init_args: + label_info: 80 + + optimizer: + class_path: torch.optim.SGD + init_args: + lr: 0.007 + momentum: 0.9 + weight_decay: 0.001 + + scheduler: + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable + init_args: + num_warmup_steps: 100 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.1 + patience: 9 + monitor: val/map_50 + +engine: + task: ROTATED_DETECTION + device: auto + +callback_monitor: val/map_50 + +data: ../_base_/data/instance_segmentation.yaml +overrides: + task: ROTATED_DETECTION + max_epochs: 100 + data: + input_size: + - 512 + - 512 + tile_config: + enable_tiler: true + enable_adaptive_tiling: true + + train_subset: + batch_size: 4 + num_workers: 8 + transforms: + - class_path: otx.core.data.transform_libs.torchvision.Resize + init_args: + keep_ratio: false + scale: $(input_size) + - class_path: otx.core.data.transform_libs.torchvision.Pad + init_args: + pad_to_square: false + size_divisor: 32 + - class_path: torchvision.transforms.v2.Normalize + init_args: + std: [1.0, 1.0, 1.0] + sampler: + class_path: otx.algo.samplers.balanced_sampler.BalancedSampler + + val_subset: + num_workers: 4 + transforms: + - class_path: otx.core.data.transform_libs.torchvision.Resize + init_args: + keep_ratio: false + scale: $(input_size) + - class_path: otx.core.data.transform_libs.torchvision.Pad + init_args: + pad_to_square: false + size_divisor: 32 + - class_path: torchvision.transforms.v2.Normalize + init_args: + std: [1.0, 1.0, 1.0] + + test_subset: + num_workers: 4 + transforms: + - class_path: otx.core.data.transform_libs.torchvision.Resize + init_args: + keep_ratio: false + scale: $(input_size) + - class_path: otx.core.data.transform_libs.torchvision.Pad + init_args: + pad_to_square: false + size_divisor: 32 + - class_path: torchvision.transforms.v2.Normalize + init_args: + std: [1.0, 1.0, 1.0] diff --git a/src/otx/recipe/rotated_detection/maskrcnn_r50.yaml b/src/otx/recipe/rotated_detection/maskrcnn_r50.yaml index f50a74deebd..03e03d5dbf1 100644 --- a/src/otx/recipe/rotated_detection/maskrcnn_r50.yaml +++ b/src/otx/recipe/rotated_detection/maskrcnn_r50.yaml @@ -1,5 +1,5 @@ model: - class_path: otx.algo.instance_segmentation.maskrcnn.MaskRCNNResNet50 + class_path: otx.core.model.rotated_detection.RotatedMaskRCNNResNet50 init_args: label_info: 80 @@ -19,7 +19,7 @@ model: init_args: mode: max factor: 0.1 - patience: 9 + patience: 4 monitor: val/map_50 engine: @@ -28,15 +28,30 @@ engine: callback_monitor: val/map_50 -data: ../_base_/data/rotated_detection.yaml +data: ../_base_/data/instance_segmentation.yaml overrides: + task: ROTATED_DETECTION max_epochs: 100 + gradient_clip_val: 35.0 data: train_subset: batch_size: 4 + num_workers: 8 + transforms: + - class_path: otx.core.data.transform_libs.torchvision.Pad + init_args: + size_divisor: 32 val_subset: - batch_size: 1 + num_workers: 4 + transforms: + - class_path: otx.core.data.transform_libs.torchvision.Pad + init_args: + size_divisor: 32 test_subset: - batch_size: 1 + num_workers: 4 + transforms: + - class_path: otx.core.data.transform_libs.torchvision.Pad + init_args: + size_divisor: 32 diff --git a/src/otx/recipe/rotated_detection/maskrcnn_r50_tile.yaml b/src/otx/recipe/rotated_detection/maskrcnn_r50_tile.yaml new file mode 100644 index 00000000000..26b5e746613 --- /dev/null +++ b/src/otx/recipe/rotated_detection/maskrcnn_r50_tile.yaml @@ -0,0 +1,79 @@ +model: + class_path: otx.core.model.rotated_detection.RotatedMaskRCNNResNet50 + init_args: + label_info: 80 + + optimizer: + class_path: torch.optim.SGD + init_args: + lr: 0.007 + momentum: 0.9 + weight_decay: 0.001 + + scheduler: + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable + init_args: + num_warmup_steps: 100 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.1 + patience: 4 + monitor: val/map_50 + +engine: + task: ROTATED_DETECTION + device: auto + +callback_monitor: val/map_50 + +data: ../_base_/data/instance_segmentation.yaml +overrides: + task: ROTATED_DETECTION + max_epochs: 100 + gradient_clip_val: 35.0 + data: + input_size: + - 512 + - 512 + tile_config: + enable_tiler: true + enable_adaptive_tiling: true + + train_subset: + batch_size: 4 + num_workers: 8 + transforms: + - class_path: otx.core.data.transform_libs.torchvision.Resize + init_args: + keep_ratio: false + scale: $(input_size) + - class_path: otx.core.data.transform_libs.torchvision.Pad + init_args: + pad_to_square: false + size_divisor: 32 + + val_subset: + num_workers: 4 + transforms: + - class_path: otx.core.data.transform_libs.torchvision.Resize + init_args: + keep_ratio: false + scale: $(input_size) + - class_path: otx.core.data.transform_libs.torchvision.Pad + init_args: + pad_to_square: false + size_divisor: 32 + + test_subset: + num_workers: 4 + transforms: + - class_path: otx.core.data.transform_libs.torchvision.Resize + init_args: + keep_ratio: false + scale: $(input_size) + - class_path: otx.core.data.transform_libs.torchvision.Pad + init_args: + pad_to_square: false + size_divisor: 32 diff --git a/src/otx/recipe/rotated_detection/openvino_model.yaml b/src/otx/recipe/rotated_detection/openvino_model.yaml index b6c445bab1c..a678d25a97b 100644 --- a/src/otx/recipe/rotated_detection/openvino_model.yaml +++ b/src/otx/recipe/rotated_detection/openvino_model.yaml @@ -1,5 +1,5 @@ model: - class_path: otx.core.model.instance_segmentation.OVInstanceSegmentationModel + class_path: otx.core.model.instance_segmentation.OVRotatedDetectionModel init_args: label_info: 80 model_name: openvino.xml @@ -13,8 +13,9 @@ engine: callback_monitor: val/map_50 -data: ../_base_/data/rotated_detection.yaml +data: ../_base_/data/instance_segmentation.yaml overrides: + task: ROTATED_DETECTION reset: - data.train_subset.transforms - data.val_subset.transforms