freemocap · philipqueen · Oct 10, 2024 · Oct 10, 2024
diff --git a/skellytracker/trackers/base_tracker/base_tracker.py b/skellytracker/trackers/base_tracker/base_tracker.py
@@ -37,11 +37,12 @@ def __init__(
             self.tracked_objects[name] = TrackedObject(object_id=name)
 
     @abstractmethod
-    def process_image(self, image: np.ndarray, **kwargs) -> Dict[str, TrackedObject]:
+    def process_image(self, image: np.ndarray, annotate_image: bool = True, **kwargs) -> Dict[str, TrackedObject]:
         """
         Process the input image and apply the tracking algorithm.
 
         :param image: An input image.
+        :param annotate_image: Whether to annotate a copy of the image with the results of the tracking algorithm.
         :return: A dictionary of tracked objects
         """
         pass

diff --git a/skellytracker/trackers/charuco_tracker/charuco_tracker.py b/skellytracker/trackers/charuco_tracker/charuco_tracker.py
@@ -10,6 +10,7 @@
 
 default_aruco_dictionary = cv2.aruco.getPredefinedDictionary(cv2.aruco.DICT_4X4_250)
 
+
 class CharucoTracker(BaseTracker):
     def __init__(
         self,
@@ -36,7 +37,9 @@ def __init__(
         self.tracked_object_names = tracked_object_names
         self.dictionary = dictionary
 
-    def process_image(self, image: np.ndarray, **kwargs) -> Dict[str, TrackedObject]:
+    def process_image(
+        self, image: np.ndarray, annotate_image: bool = True, **kwargs
+    ) -> Dict[str, TrackedObject]:
         # Convert the image to grayscale
         gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
 
@@ -59,9 +62,10 @@ def process_image(self, image: np.ndarray, **kwargs) -> Dict[str, TrackedObject]
                 self.tracked_objects[object_id].pixel_x = corner[0][0]
                 self.tracked_objects[object_id].pixel_y = corner[0][1]
 
-        self.annotated_image = self.annotate_image(
-            image=image, tracked_objects=self.tracked_objects
-        )
+        if annotate_image:
+            self.annotated_image = self.annotate_image(
+                image=image, tracked_objects=self.tracked_objects
+            )
 
         return self.tracked_objects
 

diff --git a/skellytracker/trackers/mediapipe_blendshape_tracker/mediapipe_blendshape_tracker.py b/skellytracker/trackers/mediapipe_blendshape_tracker/mediapipe_blendshape_tracker.py
@@ -46,7 +46,9 @@ def __init__(
         )
         self.detector = vision.FaceLandmarker.create_from_options(options)
 
-    def process_image(self, image: np.ndarray, **kwargs) -> Dict[str, TrackedObject]:
+    def process_image(
+        self, image: np.ndarray, annotate_image: bool = True, **kwargs
+    ) -> Dict[str, TrackedObject]:
         rgb_image = cv2.cvtColor(
             image, cv2.COLOR_BGR2RGB
         )  # TODO: may need to convert this into an `mp.Image`, but can't find documentation about that
@@ -63,11 +65,12 @@ def process_image(self, image: np.ndarray, **kwargs) -> Dict[str, TrackedObject]
             blendshape.score for blendshape in results.face_blendshapes[0]
         ]  # TODO: assumes we're only interested in 1 face, but docs say this works for multiple faces??
 
-        self.annotated_image = self.annotate_image(
-            image=image,
-            tracked_objects=self.tracked_objects,
-            face_landmarks=results.face_landmarks[0],
-        )
+        if annotate_image:
+            self.annotated_image = self.annotate_image(
+                image=image,
+                tracked_objects=self.tracked_objects,
+                face_landmarks=results.face_landmarks[0],
+            )
 
         return self.tracked_objects
 
@@ -127,7 +130,7 @@ def get_or_download_mediapipe_blendshape_model(self) -> Path:
             r.raise_for_status()
             model_path.write_bytes(r.content)
         return model_path
-    
+
 
 if __name__ == "__main__":
     MediapipeBlendshapeTracker().demo()
diff --git a/skellytracker/trackers/mediapipe_tracker/mediapipe_holistic_tracker.py b/skellytracker/trackers/mediapipe_tracker/mediapipe_holistic_tracker.py
@@ -36,7 +36,9 @@ def __init__(
             smooth_landmarks=smooth_landmarks,
         )
 
-    def process_image(self, image: np.ndarray, **kwargs) -> Dict[str, TrackedObject]:
+    def process_image(
+        self, image: np.ndarray, annotate_image: bool = True, **kwargs
+    ) -> Dict[str, TrackedObject]:
         # Convert the image to RGB
         rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
 
@@ -57,9 +59,10 @@ def process_image(self, image: np.ndarray, **kwargs) -> Dict[str, TrackedObject]
             "landmarks"
         ] = results.right_hand_landmarks
 
-        self.annotated_image = self.annotate_image(
-            image=image, tracked_objects=self.tracked_objects
-        )
+        if annotate_image:
+            self.annotated_image = self.annotate_image(
+                image=image, tracked_objects=self.tracked_objects
+            )
 
         return self.tracked_objects
 

diff --git a/skellytracker/trackers/yolo_mediapipe_combo_tracker/yolo_mediapipe_combo_tracker.py b/skellytracker/trackers/yolo_mediapipe_combo_tracker/yolo_mediapipe_combo_tracker.py
@@ -52,7 +52,9 @@ def __init__(
         self.bounding_box_buffer_percentage = bounding_box_buffer_percentage
         self.buffer_size_method = buffer_size_method
 
-    def process_image(self, image: np.ndarray, **kwargs) -> Dict[str, TrackedObject]:
+    def process_image(
+        self, image: np.ndarray, annotate_image: bool = True, **kwargs
+    ) -> Dict[str, TrackedObject]:
 
         yolo_results = self.model(image, classes=0, max_det=1, verbose=False)
         box_xyxy = np.asarray(yolo_results[0].boxes.xyxy.cpu()).flatten()
@@ -119,9 +121,10 @@ def process_image(self, image: np.ndarray, **kwargs) -> Dict[str, TrackedObject]
 
         bbox_image = buffered_yolo_results[0].plot()
 
-        self.annotated_image = self.annotate_image(
-            image=bbox_image, tracked_objects=self.tracked_objects
-        )
+        if annotate_image:
+            self.annotated_image = self.annotate_image(
+                image=bbox_image, tracked_objects=self.tracked_objects
+            )
 
         return self.tracked_objects
 

diff --git a/skellytracker/trackers/yolo_object_tracker/yolo_object_tracker.py b/skellytracker/trackers/yolo_object_tracker/yolo_object_tracker.py
@@ -31,7 +31,9 @@ def __init__(
         else:
             self.classes = None  # None includes all classes
 
-    def process_image(self, image, **kwargs) -> Dict[str, TrackedObject]:
+    def process_image(
+        self, image, annotate_image: bool = True, **kwargs
+    ) -> Dict[str, TrackedObject]:
         results = self.model(
             image,
             classes=self.classes,
@@ -53,7 +55,8 @@ def process_image(self, image, **kwargs) -> Dict[str, TrackedObject]:
             0
         ].boxes.orig_shape
 
-        self.annotated_image = self.annotate_image(image, results=results, **kwargs)
+        if annotate_image:
+            self.annotated_image = self.annotate_image(image, results=results, **kwargs)
 
         return self.tracked_objects
 

diff --git a/skellytracker/trackers/yolo_tracker/yolo_tracker.py b/skellytracker/trackers/yolo_tracker/yolo_tracker.py
@@ -15,15 +15,18 @@ def __init__(self, model_size: str = "nano"):
         pytorch_model = YOLOModelInfo.model_dictionary[model_size]
         self.model = YOLO(pytorch_model)
 
-    def process_image(self, image: np.ndarray, **kwargs) -> Dict[str, TrackedObject]:
+    def process_image(
+        self, image: np.ndarray, annotate_image: bool = True, **kwargs
+    ) -> Dict[str, TrackedObject]:
         # "max_det=1" argument to limit to single person tracking for now
         results = self.model(image, max_det=1, verbose=False)
 
         self.unpack_results(results)
 
-        self.annotated_image = self.annotate_image(
-            image=image, results=results, **kwargs
-        )
+        if annotate_image:
+            self.annotated_image = self.annotate_image(
+                image=image, results=results, **kwargs
+            )
 
         return self.tracked_objects