Adding MedSAM2 to the model zoo (#4828)

* added Med-SAM2 model to manifest-torch.json (#4733) * Adding documentation for MedSAM2 * Added some needed imports --------- Co-authored-by: Evatt Harvey-Salinger <[email protected]> Co-authored-by: danielgural <[email protected]>
voxel51 · Sep 24, 2024 · 65cfa91 · 65cfa91
1 parent 54e2760
commit 65cfa91
Show file tree

Hide file tree

Showing 3 changed files with 72 additions and 6 deletions.
diff --git a/docs/scripts/make_model_zoo_docs.py b/docs/scripts/make_model_zoo_docs.py
@@ -92,6 +92,13 @@
     from fiftyone import ViewField as F
 {% endif %}
 
+{% if 'med-sam' in name %}
+    from fiftyone import ViewField as F
+    from fiftyone.utils.huggingface import load_from_hub
+    
+    dataset = load_from_hub("Voxel51/BTCV-CT-as-video-MedSAM2-dataset")[:2]
+{% endif %}
+
 {% if 'imagenet' in name %}
     dataset = foz.load_zoo_dataset(
         "imagenet-sample",
@@ -109,6 +116,17 @@
         .set_field("frames.detections", None)
         .save()
     )
+{% elif 'med-sam' in name %}
+
+    # Retaining detections from a single frame in the middle
+    # Note that SAM2 only propagates segmentation masks forward in a video
+    (
+        dataset
+        .match_frames(F("frame_number") != 100)
+        .set_field("frames.gt_detections", None)
+        .save()
+    )
+
 {% else %}
     dataset = foz.load_zoo_dataset(
         "coco-2017",
@@ -133,7 +151,7 @@
     dataset.apply_model(model, label_field="auto")
 
     session = fo.launch_app(dataset)
-{% elif 'segment-anything' in tags and 'video' in tags %}
+{% elif 'segment-anything' in tags and 'video' in tags  and 'med-SAM' not in tags %}
     model = foz.load_zoo_model("{{ name }}")
 
     # Segment inside boxes and propagate to all frames
@@ -143,6 +161,17 @@
         prompt_field="frames.detections",  # can contain Detections or Keypoints
     )
 
+    session = fo.launch_app(dataset)
+{% elif 'med-sam' in name %}
+    model = foz.load_zoo_model("{{ name }}")
+
+    # Segment inside boxes and propagate to all frames
+    dataset.apply_model(
+        model,
+        label_field="pred_segmentations",
+        prompt_field="frames.gt_detections",
+    )
+
     session = fo.launch_app(dataset)
 {% elif 'dinov2' in name %}
     model = foz.load_zoo_model("{{ name }}")

diff --git a/fiftyone/utils/sam2.py b/fiftyone/utils/sam2.py
@@ -7,19 +7,18 @@
 |
 """
 
-import cv2
-import numpy as np
-
 import logging
 
+import cv2
 import eta.core.utils as etau
+import numpy as np
 
 import fiftyone.core.labels as fol
+import fiftyone.core.models as fom
 import fiftyone.core.utils as fou
-import fiftyone.utils.torch as fout
 import fiftyone.utils.sam as fosam
+import fiftyone.utils.torch as fout
 import fiftyone.zoo.models as fozm
-import fiftyone.core.models as fom
 
 fou.ensure_torch()
 import torch

diff --git a/fiftyone/zoo/models/manifest-torch.json b/fiftyone/zoo/models/manifest-torch.json
@@ -448,6 +448,44 @@
             "tags": ["segment-anything", "torch", "zero-shot", "video"],
             "date_added": "2024-08-05 14:38:20"
         },
+        {
+            "base_name": "med-sam-2-video-torch",
+            "base_filename": "med-sam-2_pretrain.pth",
+            "version": null,
+            "description": "Fine-tuned SAM2-hiera-tiny model from paper: Medical SAM 2 - Segment Medical Images as Video via Segment Anything Model 2 <https://arxiv.org/abs/2408.00874>`_",
+            "source": "https://github.com/MedicineToken/Medical-SAM2",
+            "size_bytes": 155906050,
+            "manager": {
+                "type": "fiftyone.core.models.ModelManager",
+                "config": {
+                    "url": "https://huggingface.co/jiayuanz3/MedSAM2_pretrain/resolve/main/MedSAM2_pretrain.pth?download=true"
+                }
+            },
+            "default_deployment_config_dict": {
+                "type": "fiftyone.utils.sam2.SegmentAnything2VideoModel",
+                "config": {
+                    "entrypoint_fcn": "sam2.build_sam.build_sam2_video_predictor",
+                    "entrypoint_args": { "model_cfg": "sam2_hiera_t.yaml" }
+                }
+            },
+            "requirements": {
+                "packages": ["torch", "torchvision"],
+                "cpu": {
+                    "support": true
+                },
+                "gpu": {
+                    "support": true
+                }
+            },
+            "tags": [
+                "segment-anything",
+                "torch",
+                "zero-shot",
+                "video",
+                "med-SAM"
+            ],
+            "date_added": "2024-08-17 14:48:00"
+        },
         {
             "base_name": "deeplabv3-resnet50-coco-torch",
             "base_filename": "deeplabv3_resnet50_coco-cd0a2569.pth",