Skip to content

Commit

Permalink
Merge pull request #9 from occ-ai/roy.box_border_ui_errs
Browse files Browse the repository at this point in the history
Refactor code to handle text detection target result states and updat…
  • Loading branch information
royshil authored May 23, 2024
2 parents b81852f + d5e4028 commit b7b346c
Show file tree
Hide file tree
Showing 12 changed files with 304 additions and 312 deletions.
16 changes: 11 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,13 +94,19 @@ There are some extra steps for installation on Windows:

### Running from source

1. Once everything is installed launch the application:
1. Compile the UI files into Python:

```shell
python main.py
```
```powershell
./scripts/compile_ui.ps1
```
1. Launch the application:
```shell
python main.py
```
2. Follow the on-screen instructions to load an image of the scoreboard and extract the text.
1. Follow the on-screen instructions to load an image of the scoreboard and extract the text.
### Build an executable
Expand Down
75 changes: 7 additions & 68 deletions camera_view.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import platform
import time
from PySide6.QtWidgets import (
QGraphicsView,
QGraphicsScene,
Expand All @@ -8,19 +6,22 @@
from PySide6.QtCore import Qt
from PySide6.QtGui import QImage, QPixmap, QPainter
from PySide6.QtCore import QThread, Signal

import platform
import time
import cv2
import numpy as np
import datetime
from datetime import datetime

from camera_info import CameraInfo
from ndi import NDICapture
from screen_capture_source import ScreenCapture

from storage import TextDetectionTargetMemoryStorage
from tesseract import TextDetector
import datetime
from datetime import datetime

from text_detection_target import TextDetectionTargetWithResult
from sc_logging import logger
from frame_stabilizer import FrameStabilizer


# Function to set the resolution
Expand Down Expand Up @@ -80,68 +81,6 @@ def set_camera_highest_resolution(cap):
set_resolution(cap, *highest_res)


class FrameStabilizer:
def __init__(self):
self.stabilizationFrame = None
self.stabilizationFrameCount = 0
self.stabilizationBurnInCompleted = False
self.stabilizationKPs = None
self.stabilizationDesc = None
self.orb = None
self.matcher = None

def reset(self):
self.stabilizationFrame = None
self.stabilizationFrameCount = 0
self.stabilizationBurnInCompleted = False
self.stabilizationKPs = None
self.stabilizationDesc = None

def stabilize_frame(self, frame_rgb):
if self.stabilizationFrame is None:
self.stabilizationFrame = frame_rgb
self.stabilizationFrameCount = 0
elif not self.stabilizationBurnInCompleted:
self.stabilizationFrameCount += 1
# add the new frame to the stabilization frame
frame_rgb = cv2.addWeighted(frame_rgb, 0.5, self.stabilizationFrame, 0.5, 0)
if self.stabilizationFrameCount == 10:
self.stabilizationBurnInCompleted = True
# extract ORB features from the stabilization frame
self.orb = cv2.ORB_create()
self.stabilizationKPs, self.stabilizationDesc = (
self.orb.detectAndCompute(self.stabilizationFrame, None)
)
self.matcher = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)

if self.stabilizationBurnInCompleted:
# stabilization burn-in period is over, start stabilization
# extract features from the current frame
kps, desc = self.orb.detectAndCompute(frame_rgb, None)
# match the features
matches = self.matcher.match(self.stabilizationDesc, desc)
# sort the matches by distance
matches = sorted(matches, key=lambda x: x.distance)
# calculate an affine transform from the matched keypoints
src_pts = np.float32(
[self.stabilizationKPs[m.queryIdx].pt for m in matches]
).reshape(-1, 1, 2)
dst_pts = np.float32([kps[m.trainIdx].pt for m in matches]).reshape(
-1, 1, 2
)
h, _ = cv2.estimateAffinePartial2D(src_pts, dst_pts)
# warp the frame
if h is not None:
frame_rgb = cv2.warpAffine(
frame_rgb,
h,
(frame_rgb.shape[1], frame_rgb.shape[0]),
flags=cv2.WARP_INVERSE_MAP | cv2.INTER_LINEAR,
)

return frame_rgb


class TimerThread(QThread):
update_signal = Signal(object)
update_error = Signal(object)
Expand Down
74 changes: 74 additions & 0 deletions frame_stabilizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import cv2
import numpy as np


# This class is used to stabilize the frames of the video.
# It uses ORB features to match keypoints between frames and calculate an affine transform to
# warp the frame.
class FrameStabilizer:
def __init__(self):
self.stabilizationFrame = None
self.stabilizationFrameCount = 0
self.stabilizationBurnInCompleted = False
self.stabilizationKPs = None
self.stabilizationDesc = None
self.orb = None
self.matcher = None

def reset(self):
self.stabilizationFrame = None
self.stabilizationFrameCount = 0
self.stabilizationBurnInCompleted = False
self.stabilizationKPs = None
self.stabilizationDesc = None

def stabilize_frame(self, frame_rgb):
if self.stabilizationFrame is None:
self.stabilizationFrame = frame_rgb
self.stabilizationFrameCount = 0
elif not self.stabilizationBurnInCompleted:
self.stabilizationFrameCount += 1
# add the new frame to the stabilization frame
frame_rgb = cv2.addWeighted(frame_rgb, 0.5, self.stabilizationFrame, 0.5, 0)
if self.stabilizationFrameCount == 10:
self.stabilizationBurnInCompleted = True
# extract ORB features from the stabilization frame
self.orb = cv2.ORB_create()
self.stabilizationKPs, self.stabilizationDesc = (
self.orb.detectAndCompute(self.stabilizationFrame, None)
)
self.matcher = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)

if (
self.stabilizationBurnInCompleted
and self.stabilizationFrame is not None
and self.orb is not None
and self.matcher is not None
and self.stabilizationKPs is not None
and self.stabilizationDesc is not None
):
# stabilization burn-in period is over, start stabilization
# extract features from the current frame
kps, desc = self.orb.detectAndCompute(frame_rgb, None)
# match the features
matches = self.matcher.match(self.stabilizationDesc, desc)
# sort the matches by distance
matches = sorted(matches, key=lambda x: x.distance)
# calculate an affine transform from the matched keypoints
src_pts = np.float32(
[self.stabilizationKPs[m.queryIdx].pt for m in matches]
).reshape(-1, 1, 2)
dst_pts = np.float32([kps[m.trainIdx].pt for m in matches]).reshape(
-1, 1, 2
)
h, _ = cv2.estimateAffinePartial2D(src_pts, dst_pts)
# warp the frame
if h is not None:
frame_rgb = cv2.warpAffine(
frame_rgb,
h,
(frame_rgb.shape[1], frame_rgb.shape[0]),
flags=cv2.WARP_INVERSE_MAP | cv2.INTER_LINEAR,
)

return frame_rgb
37 changes: 27 additions & 10 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,7 +408,7 @@ def selectOutputFolder(self):
folder = QFileDialog.getExistingDirectory(
self,
"Select Output Folder",
fetch_data("scoresight.json", "output_folder"),
fetch_data("scoresight.json", "output_folder", ""),
options=QFileDialog.Option.ShowDirsOnly,
)
if folder and len(folder) > 0:
Expand Down Expand Up @@ -522,7 +522,7 @@ def vmixUiSetup(self):
if mapping:
self.vmixUpdater.set_field_mapping(mapping)

self.ui.tableView_vmixMapping.model().itemChanged.connect(
self.ui.tableView_vmixMapping.model().dataChanged.connect(
self.vmixMappingChanged
)

Expand Down Expand Up @@ -759,9 +759,9 @@ def connectObs(self):
if self.obs_connect_modal is not None:
self.obs_websocket_client = open_obs_websocket(
{
"ip": self.obs_modal_ui.obs_connect_modal.lineEdit_ip.text(),
"port": self.obs_modal_ui.obs_connect_modal.lineEdit_port.text(),
"password": self.obs_modal_ui.obs_connect_modal.lineEdit_password.text(),
"ip": self.obs_modal_ui.lineEdit_ip.text(),
"port": self.obs_modal_ui.lineEdit_port.text(),
"password": self.obs_modal_ui.lineEdit_password.text(),
}
)
else:
Expand Down Expand Up @@ -865,23 +865,26 @@ def sourceChanged(self, index):
self, "Open Video File", "", "Video Files (*.mp4 *.avi *.mov)"
)
if not file:
# no file selected - change source to "Select a source"
self.ui.comboBox_camera_source.setCurrentText("Select a source")
return
self.source_name = file
if self.source_name == "URL Source (HTTP, RTSP)":
# open a dialog to enter the url
url_dialog = QDialog()
ui_urlsource = Ui_UrlSource()
ui_urlsource.setupUi(url_dialog)

url_dialog.setWindowTitle("URL Source")
# focus on url input
ui_urlsource.lineEdit_url.setFocus()
url_dialog.exec() # wait for the dialog to close
# check if the dialog was accepted
if url_dialog.result() != QDialog.DialogCode.Accepted:
self.ui.comboBox_camera_source.setCurrentText("Select a source")
return
self.source_name = ui_urlsource.lineEdit_url.text()
if self.source_name == "":
self.ui.comboBox_camera_source.setCurrentText("Select a source")
return
if self.source_name == "Screen Capture":
# open a dialog to select the screen
Expand All @@ -898,6 +901,7 @@ def sourceChanged(self, index):
screen_dialog.exec()
# check if the dialog was accepted
if screen_dialog.result() != QDialog.DialogCode.Accepted:
self.ui.comboBox_camera_source.setCurrentText("Select a source")
return
# get the window ID from the comboBox_window
window_id = ui_screencapture.comboBox_window.currentData()
Expand Down Expand Up @@ -932,20 +936,29 @@ def sourceSelectionSucessful(self):
self.ui.frame_source_view.setEnabled(False)

if self.ui.comboBox_camera_source.currentData() == "file":
if self.source_name is None:
logger.error("No file selected")
return
camera_info = CameraInfo(
self.source_name,
self.source_name,
self.source_name,
CameraInfo.CameraType.FILE,
)
elif self.ui.comboBox_camera_source.currentData() == "url":
if self.source_name is None:
logger.error("No url entered")
return
camera_info = CameraInfo(
self.source_name,
self.source_name,
self.source_name,
CameraInfo.CameraType.URL,
)
elif self.ui.comboBox_camera_source.currentData() == "screen_capture":
if self.source_name is None:
logger.error("No screen capture selected")
return
camera_info = CameraInfo(
self.source_name,
self.source_name,
Expand Down Expand Up @@ -1056,7 +1069,8 @@ def ocrResult(self, results: list[TextDetectionTargetWithResult]):
if targetWithResult.result is None:
continue
if (
"skip_empty" in targetWithResult.settings
targetWithResult.settings is not None
and "skip_empty" in targetWithResult.settings
and targetWithResult.settings["skip_empty"]
and len(targetWithResult.result) == 0
):
Expand All @@ -1067,7 +1081,10 @@ def ocrResult(self, results: list[TextDetectionTargetWithResult]):
):
continue

if self.obs_websocket_client is not None:
if (
self.obs_websocket_client is not None
and targetWithResult.settings is not None
):
# find the source name for the target from the default boxes
update_text_source(
self.obs_websocket_client,
Expand Down Expand Up @@ -1202,12 +1219,12 @@ def removeBox(self):
self.detectionTargetsStorage.remove_item(item.text())

def createOBSScene(self):
self.ui.statusbar().showMessage("Creating OBS scene")
self.ui.statusbar.showMessage("Creating OBS scene")
# get the scene name from the lineEdit_sceneName
scene_name = self.ui.lineEdit_sceneName.text()
# clear or create a new scene
create_obs_scene_from_export(self.obs_websocket_client, scene_name)
self.ui.statusbar().showMessage("Finished creating scene")
self.ui.statusbar.showMessage("Finished creating scene")

# on destroy, close the OBS connection
def closeEvent(self, event):
Expand Down
Loading

0 comments on commit b7b346c

Please sign in to comment.