Merge pull request #9 from occ-ai/roy.box_border_ui_errs

Refactor code to handle text detection target result states and updat…
locaal-ai · May 23, 2024 · b7b346c · b7b346c
2 parents b81852f + d5e4028
commit b7b346c
Show file tree

Hide file tree

Showing 12 changed files with 304 additions and 312 deletions.
diff --git a/README.md b/README.md
@@ -94,13 +94,19 @@ There are some extra steps for installation on Windows:
 
 ### Running from source
 
-1. Once everything is installed launch the application:
+1. Compile the UI files into Python:
 
-  ```shell
-  python main.py
-  ```
+    ```powershell
+    ./scripts/compile_ui.ps1
+    ```
+
+1. Launch the application:
+
+    ```shell
+    python main.py
+    ```
 
-2. Follow the on-screen instructions to load an image of the scoreboard and extract the text.
+1. Follow the on-screen instructions to load an image of the scoreboard and extract the text.
 
 ### Build an executable
 

diff --git a/camera_view.py b/camera_view.py
@@ -1,5 +1,3 @@
-import platform
-import time
 from PySide6.QtWidgets import (
     QGraphicsView,
     QGraphicsScene,
@@ -8,19 +6,22 @@
 from PySide6.QtCore import Qt
 from PySide6.QtGui import QImage, QPixmap, QPainter
 from PySide6.QtCore import QThread, Signal
+
+import platform
+import time
 import cv2
 import numpy as np
+import datetime
+from datetime import datetime
+
 from camera_info import CameraInfo
 from ndi import NDICapture
 from screen_capture_source import ScreenCapture
-
 from storage import TextDetectionTargetMemoryStorage
 from tesseract import TextDetector
-import datetime
-from datetime import datetime
-
 from text_detection_target import TextDetectionTargetWithResult
 from sc_logging import logger
+from frame_stabilizer import FrameStabilizer
 
 
 # Function to set the resolution
@@ -80,68 +81,6 @@ def set_camera_highest_resolution(cap):
     set_resolution(cap, *highest_res)
 
 
-class FrameStabilizer:
-    def __init__(self):
-        self.stabilizationFrame = None
-        self.stabilizationFrameCount = 0
-        self.stabilizationBurnInCompleted = False
-        self.stabilizationKPs = None
-        self.stabilizationDesc = None
-        self.orb = None
-        self.matcher = None
-
-    def reset(self):
-        self.stabilizationFrame = None
-        self.stabilizationFrameCount = 0
-        self.stabilizationBurnInCompleted = False
-        self.stabilizationKPs = None
-        self.stabilizationDesc = None
-
-    def stabilize_frame(self, frame_rgb):
-        if self.stabilizationFrame is None:
-            self.stabilizationFrame = frame_rgb
-            self.stabilizationFrameCount = 0
-        elif not self.stabilizationBurnInCompleted:
-            self.stabilizationFrameCount += 1
-            # add the new frame to the stabilization frame
-            frame_rgb = cv2.addWeighted(frame_rgb, 0.5, self.stabilizationFrame, 0.5, 0)
-            if self.stabilizationFrameCount == 10:
-                self.stabilizationBurnInCompleted = True
-                # extract ORB features from the stabilization frame
-                self.orb = cv2.ORB_create()
-                self.stabilizationKPs, self.stabilizationDesc = (
-                    self.orb.detectAndCompute(self.stabilizationFrame, None)
-                )
-                self.matcher = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
-
-        if self.stabilizationBurnInCompleted:
-            # stabilization burn-in period is over, start stabilization
-            # extract features from the current frame
-            kps, desc = self.orb.detectAndCompute(frame_rgb, None)
-            # match the features
-            matches = self.matcher.match(self.stabilizationDesc, desc)
-            # sort the matches by distance
-            matches = sorted(matches, key=lambda x: x.distance)
-            # calculate an affine transform from the matched keypoints
-            src_pts = np.float32(
-                [self.stabilizationKPs[m.queryIdx].pt for m in matches]
-            ).reshape(-1, 1, 2)
-            dst_pts = np.float32([kps[m.trainIdx].pt for m in matches]).reshape(
-                -1, 1, 2
-            )
-            h, _ = cv2.estimateAffinePartial2D(src_pts, dst_pts)
-            # warp the frame
-            if h is not None:
-                frame_rgb = cv2.warpAffine(
-                    frame_rgb,
-                    h,
-                    (frame_rgb.shape[1], frame_rgb.shape[0]),
-                    flags=cv2.WARP_INVERSE_MAP | cv2.INTER_LINEAR,
-                )
-
-        return frame_rgb
-
-
 class TimerThread(QThread):
     update_signal = Signal(object)
     update_error = Signal(object)

diff --git a/frame_stabilizer.py b/frame_stabilizer.py
@@ -0,0 +1,74 @@
+import cv2
+import numpy as np
+
+
+# This class is used to stabilize the frames of the video.
+# It uses ORB features to match keypoints between frames and calculate an affine transform to
+# warp the frame.
+class FrameStabilizer:
+    def __init__(self):
+        self.stabilizationFrame = None
+        self.stabilizationFrameCount = 0
+        self.stabilizationBurnInCompleted = False
+        self.stabilizationKPs = None
+        self.stabilizationDesc = None
+        self.orb = None
+        self.matcher = None
+
+    def reset(self):
+        self.stabilizationFrame = None
+        self.stabilizationFrameCount = 0
+        self.stabilizationBurnInCompleted = False
+        self.stabilizationKPs = None
+        self.stabilizationDesc = None
+
+    def stabilize_frame(self, frame_rgb):
+        if self.stabilizationFrame is None:
+            self.stabilizationFrame = frame_rgb
+            self.stabilizationFrameCount = 0
+        elif not self.stabilizationBurnInCompleted:
+            self.stabilizationFrameCount += 1
+            # add the new frame to the stabilization frame
+            frame_rgb = cv2.addWeighted(frame_rgb, 0.5, self.stabilizationFrame, 0.5, 0)
+            if self.stabilizationFrameCount == 10:
+                self.stabilizationBurnInCompleted = True
+                # extract ORB features from the stabilization frame
+                self.orb = cv2.ORB_create()
+                self.stabilizationKPs, self.stabilizationDesc = (
+                    self.orb.detectAndCompute(self.stabilizationFrame, None)
+                )
+                self.matcher = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
+
+        if (
+            self.stabilizationBurnInCompleted
+            and self.stabilizationFrame is not None
+            and self.orb is not None
+            and self.matcher is not None
+            and self.stabilizationKPs is not None
+            and self.stabilizationDesc is not None
+        ):
+            # stabilization burn-in period is over, start stabilization
+            # extract features from the current frame
+            kps, desc = self.orb.detectAndCompute(frame_rgb, None)
+            # match the features
+            matches = self.matcher.match(self.stabilizationDesc, desc)
+            # sort the matches by distance
+            matches = sorted(matches, key=lambda x: x.distance)
+            # calculate an affine transform from the matched keypoints
+            src_pts = np.float32(
+                [self.stabilizationKPs[m.queryIdx].pt for m in matches]
+            ).reshape(-1, 1, 2)
+            dst_pts = np.float32([kps[m.trainIdx].pt for m in matches]).reshape(
+                -1, 1, 2
+            )
+            h, _ = cv2.estimateAffinePartial2D(src_pts, dst_pts)
+            # warp the frame
+            if h is not None:
+                frame_rgb = cv2.warpAffine(
+                    frame_rgb,
+                    h,
+                    (frame_rgb.shape[1], frame_rgb.shape[0]),
+                    flags=cv2.WARP_INVERSE_MAP | cv2.INTER_LINEAR,
+                )
+
+        return frame_rgb
diff --git a/main.py b/main.py
@@ -408,7 +408,7 @@ def selectOutputFolder(self):
         folder = QFileDialog.getExistingDirectory(
             self,
             "Select Output Folder",
-            fetch_data("scoresight.json", "output_folder"),
+            fetch_data("scoresight.json", "output_folder", ""),
             options=QFileDialog.Option.ShowDirsOnly,
         )
         if folder and len(folder) > 0:
@@ -522,7 +522,7 @@ def vmixUiSetup(self):
         if mapping:
             self.vmixUpdater.set_field_mapping(mapping)
 
-        self.ui.tableView_vmixMapping.model().itemChanged.connect(
+        self.ui.tableView_vmixMapping.model().dataChanged.connect(
             self.vmixMappingChanged
         )
 
@@ -759,9 +759,9 @@ def connectObs(self):
         if self.obs_connect_modal is not None:
             self.obs_websocket_client = open_obs_websocket(
                 {
-                    "ip": self.obs_modal_ui.obs_connect_modal.lineEdit_ip.text(),
-                    "port": self.obs_modal_ui.obs_connect_modal.lineEdit_port.text(),
-                    "password": self.obs_modal_ui.obs_connect_modal.lineEdit_password.text(),
+                    "ip": self.obs_modal_ui.lineEdit_ip.text(),
+                    "port": self.obs_modal_ui.lineEdit_port.text(),
+                    "password": self.obs_modal_ui.lineEdit_password.text(),
                 }
             )
         else:
@@ -865,23 +865,26 @@ def sourceChanged(self, index):
                 self, "Open Video File", "", "Video Files (*.mp4 *.avi *.mov)"
             )
             if not file:
+                # no file selected - change source to "Select a source"
+                self.ui.comboBox_camera_source.setCurrentText("Select a source")
                 return
             self.source_name = file
         if self.source_name == "URL Source (HTTP, RTSP)":
             # open a dialog to enter the url
             url_dialog = QDialog()
             ui_urlsource = Ui_UrlSource()
             ui_urlsource.setupUi(url_dialog)
-
             url_dialog.setWindowTitle("URL Source")
             # focus on url input
             ui_urlsource.lineEdit_url.setFocus()
             url_dialog.exec()  # wait for the dialog to close
             # check if the dialog was accepted
             if url_dialog.result() != QDialog.DialogCode.Accepted:
+                self.ui.comboBox_camera_source.setCurrentText("Select a source")
                 return
             self.source_name = ui_urlsource.lineEdit_url.text()
             if self.source_name == "":
+                self.ui.comboBox_camera_source.setCurrentText("Select a source")
                 return
         if self.source_name == "Screen Capture":
             # open a dialog to select the screen
@@ -898,6 +901,7 @@ def sourceChanged(self, index):
             screen_dialog.exec()
             # check if the dialog was accepted
             if screen_dialog.result() != QDialog.DialogCode.Accepted:
+                self.ui.comboBox_camera_source.setCurrentText("Select a source")
                 return
             # get the window ID from the comboBox_window
             window_id = ui_screencapture.comboBox_window.currentData()
@@ -932,20 +936,29 @@ def sourceSelectionSucessful(self):
         self.ui.frame_source_view.setEnabled(False)
 
         if self.ui.comboBox_camera_source.currentData() == "file":
+            if self.source_name is None:
+                logger.error("No file selected")
+                return
             camera_info = CameraInfo(
                 self.source_name,
                 self.source_name,
                 self.source_name,
                 CameraInfo.CameraType.FILE,
             )
         elif self.ui.comboBox_camera_source.currentData() == "url":
+            if self.source_name is None:
+                logger.error("No url entered")
+                return
             camera_info = CameraInfo(
                 self.source_name,
                 self.source_name,
                 self.source_name,
                 CameraInfo.CameraType.URL,
             )
         elif self.ui.comboBox_camera_source.currentData() == "screen_capture":
+            if self.source_name is None:
+                logger.error("No screen capture selected")
+                return
             camera_info = CameraInfo(
                 self.source_name,
                 self.source_name,
@@ -1056,7 +1069,8 @@ def ocrResult(self, results: list[TextDetectionTargetWithResult]):
             if targetWithResult.result is None:
                 continue
             if (
-                "skip_empty" in targetWithResult.settings
+                targetWithResult.settings is not None
+                and "skip_empty" in targetWithResult.settings
                 and targetWithResult.settings["skip_empty"]
                 and len(targetWithResult.result) == 0
             ):
@@ -1067,7 +1081,10 @@ def ocrResult(self, results: list[TextDetectionTargetWithResult]):
             ):
                 continue
 
-            if self.obs_websocket_client is not None:
+            if (
+                self.obs_websocket_client is not None
+                and targetWithResult.settings is not None
+            ):
                 # find the source name for the target from the default boxes
                 update_text_source(
                     self.obs_websocket_client,
@@ -1202,12 +1219,12 @@ def removeBox(self):
         self.detectionTargetsStorage.remove_item(item.text())
 
     def createOBSScene(self):
-        self.ui.statusbar().showMessage("Creating OBS scene")
+        self.ui.statusbar.showMessage("Creating OBS scene")
         # get the scene name from the lineEdit_sceneName
         scene_name = self.ui.lineEdit_sceneName.text()
         # clear or create a new scene
         create_obs_scene_from_export(self.obs_websocket_client, scene_name)
-        self.ui.statusbar().showMessage("Finished creating scene")
+        self.ui.statusbar.showMessage("Finished creating scene")
 
     # on destroy, close the OBS connection
     def closeEvent(self, event):