Merge pull request #17 from occ-ai/roy.crop_region

Crop region
locaal-ai · May 30, 2024 · 1d80702 · 1d80702
2 parents ec749c9 + ebe7880
commit 1d80702
Show file tree

Hide file tree

Showing 6 changed files with 147 additions and 11 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -81,7 +81,13 @@ endif()
 target_include_directories(${CMAKE_PROJECT_NAME} PRIVATE vendor)
 
 target_sources(
-  ${CMAKE_PROJECT_NAME} PRIVATE src/plugin-main.c src/detect-filter.cpp src/detect-filter-info.c
-                                src/obs-utils/obs-utils.cpp src/edgeyolo/edgeyolo_onnxruntime.cpp src/sort/Sort.cpp)
+  ${CMAKE_PROJECT_NAME}
+  PRIVATE src/plugin-main.c
+          src/detect-filter.cpp
+          src/detect-filter-info.c
+          src/detect-filter-utils.cpp
+          src/obs-utils/obs-utils.cpp
+          src/edgeyolo/edgeyolo_onnxruntime.cpp
+          src/sort/Sort.cpp)
 
 set_target_properties_plugin(${CMAKE_PROJECT_NAME} PROPERTIES OUTPUT_NAME ${_name})
diff --git a/data/locale/en-US.ini b/data/locale/en-US.ini
@@ -35,3 +35,8 @@ ExternalModel="External Model"
 ModelPath="Model Path"
 ShowUnseenObjects="Show Currently Undetected Objects"
 SaveDetectionsPath="Save Detections Path"
+CropGroup="Crop Region"
+CropLeft="Left"
+CropTop="Top"
+CropRight="Right"
+CropBottom="Bottom"
diff --git a/src/FilterData.h b/src/FilterData.h
@@ -32,6 +32,11 @@ struct filter_data {
 	bool sortTracking;
 	bool showUnseenObjects;
 	std::string saveDetectionsPath;
+	bool crop_enabled;
+	int crop_left;
+	int crop_right;
+	int crop_top;
+	int crop_bottom;
 
 	// create SORT tracker
 	Sort tracker;

diff --git a/src/detect-filter-utils.cpp b/src/detect-filter-utils.cpp
@@ -0,0 +1,40 @@
+#include <opencv2/opencv.hpp>
+using namespace cv;
+
+void drawDashedLine(Mat &img, Point pt1, Point pt2, Scalar color, int thickness, int lineType,
+		    int dashLength)
+{
+	double lineLength = norm(pt1 - pt2);
+	double angle = atan2(pt2.y - pt1.y, pt2.x - pt1.x);
+
+	Point p1 = pt1;
+	Point p2;
+	bool draw = true;
+
+	for (double d = 0; d < lineLength; d += dashLength) {
+		if (draw) {
+			p2.x = pt1.x +
+			       static_cast<int>(cos(angle) * std::min(d + dashLength, lineLength));
+			p2.y = pt1.y +
+			       static_cast<int>(sin(angle) * std::min(d + dashLength, lineLength));
+			line(img, p1, p2, color, thickness, lineType);
+		}
+		p1.x = pt1.x + static_cast<int>(cos(angle) * (d + dashLength));
+		p1.y = pt1.y + static_cast<int>(sin(angle) * (d + dashLength));
+		draw = !draw;
+	}
+}
+
+void drawDashedRectangle(Mat &img, Rect rect, Scalar color, int thickness, int lineType,
+			 int dashLength)
+{
+	Point pt1(rect.x, rect.y);
+	Point pt2(rect.x + rect.width, rect.y);
+	Point pt3(rect.x + rect.width, rect.y + rect.height);
+	Point pt4(rect.x, rect.y + rect.height);
+
+	drawDashedLine(img, pt1, pt2, color, thickness, lineType, dashLength);
+	drawDashedLine(img, pt2, pt3, color, thickness, lineType, dashLength);
+	drawDashedLine(img, pt3, pt4, color, thickness, lineType, dashLength);
+	drawDashedLine(img, pt4, pt1, color, thickness, lineType, dashLength);
+}
diff --git a/src/detect-filter-utils.h b/src/detect-filter-utils.h
@@ -0,0 +1,12 @@
+#ifndef DETECT_FILTER_UTILS_H
+#define DETECT_FILTER_UTILS_H
+
+#include <opencv2/core/types.hpp>
+
+void drawDashedLine(cv::Mat &img, cv::Point pt1, cv::Point pt2, cv::Scalar color, int thickness = 1,
+		    int lineType = 8, int dashLength = 10);
+
+void drawDashedRectangle(cv::Mat &img, cv::Rect rect, cv::Scalar color, int thickness = 1,
+			 int lineType = 8, int dashLength = 10);
+
+#endif // DETECT_FILTER_UTILS_H
diff --git a/src/detect-filter.cpp b/src/detect-filter.cpp
@@ -25,6 +25,7 @@
 #include "consts.h"
 #include "obs-utils/obs-utils.h"
 #include "edgeyolo/utils.hpp"
+#include "detect-filter-utils.h"
 
 #define EXTERNAL_MODEL_SIZE "!!!EXTERNAL_MODEL!!!"
 
@@ -54,7 +55,7 @@ static bool enable_advanced_settings(obs_properties_t *ppts, obs_property_t *p,
 
 	for (const char *prop_name :
 	     {"threshold", "useGPU", "numThreads", "model_size", "detected_object", "sort_tracking",
-	      "max_unseen_frames", "show_unseen_objects", "save_detections_path"}) {
+	      "max_unseen_frames", "show_unseen_objects", "save_detections_path", "crop_group"}) {
 		p = obs_properties_get(ppts, prop_name);
 		obs_property_set_visible(p, enabled);
 	}
@@ -247,6 +248,33 @@ obs_properties_t *detect_filter_properties(void *data)
 	// If advanced is selected show the advanced settings, otherwise hide them
 	obs_property_set_modified_callback(advanced, enable_advanced_settings);
 
+	// add a checkable group for crop region settings
+	obs_properties_t *crop_group_props = obs_properties_create();
+	obs_property_t *crop_group =
+		obs_properties_add_group(props, "crop_group", obs_module_text("CropGroup"),
+					 OBS_GROUP_CHECKABLE, crop_group_props);
+
+	// add callback to show/hide crop region options
+	obs_property_set_modified_callback(crop_group, [](obs_properties_t *props_,
+							  obs_property_t *, obs_data_t *settings) {
+		const bool enabled = obs_data_get_bool(settings, "crop_group");
+		for (auto prop_name : {"crop_left", "crop_right", "crop_top", "crop_bottom"}) {
+			obs_property_t *prop = obs_properties_get(props_, prop_name);
+			obs_property_set_visible(prop, enabled);
+		}
+		return true;
+	});
+
+	// add crop region settings
+	obs_properties_add_int_slider(crop_group_props, "crop_left", obs_module_text("CropLeft"), 0,
+				      1000, 1);
+	obs_properties_add_int_slider(crop_group_props, "crop_right", obs_module_text("CropRight"),
+				      0, 1000, 1);
+	obs_properties_add_int_slider(crop_group_props, "crop_top", obs_module_text("CropTop"), 0,
+				      1000, 1);
+	obs_properties_add_int_slider(crop_group_props, "crop_bottom",
+				      obs_module_text("CropBottom"), 0, 1000, 1);
+
 	// add a text input for the currently detected object
 	obs_property_t *detected_obj_prop = obs_properties_add_text(
 		props, "detected_object", obs_module_text("DetectedObject"), OBS_TEXT_DEFAULT);
@@ -388,6 +416,11 @@ void detect_filter_defaults(obs_data_t *settings)
 	obs_data_set_default_double(settings, "zoom_speed_factor", 0.05);
 	obs_data_set_default_string(settings, "zoom_object", "single");
 	obs_data_set_default_string(settings, "save_detections_path", "");
+	obs_data_set_default_bool(settings, "crop_group", false);
+	obs_data_set_default_int(settings, "crop_left", 0);
+	obs_data_set_default_int(settings, "crop_right", 0);
+	obs_data_set_default_int(settings, "crop_top", 0);
+	obs_data_set_default_int(settings, "crop_bottom", 0);
 }
 
 void detect_filter_update(void *data, obs_data_t *settings)
@@ -416,6 +449,11 @@ void detect_filter_update(void *data, obs_data_t *settings)
 	}
 	tf->showUnseenObjects = obs_data_get_bool(settings, "show_unseen_objects");
 	tf->saveDetectionsPath = obs_data_get_string(settings, "save_detections_path");
+	tf->crop_enabled = obs_data_get_bool(settings, "crop_group");
+	tf->crop_left = (int)obs_data_get_int(settings, "crop_left");
+	tf->crop_right = (int)obs_data_get_int(settings, "crop_right");
+	tf->crop_top = (int)obs_data_get_int(settings, "crop_top");
+	tf->crop_bottom = (int)obs_data_get_int(settings, "crop_bottom");
 
 	// check if tracking state has changed
 	if (tf->trackingEnabled != newTrackingEnabled) {
@@ -730,19 +768,39 @@ void detect_filter_video_tick(void *data, float seconds)
 		imageBGRA = tf->inputBGRA.clone();
 	}
 
-	cv::Mat frame;
-	cv::cvtColor(imageBGRA, frame, cv::COLOR_BGRA2BGR);
+	cv::Mat inferenceFrame;
+
+	cv::Rect cropRect(0, 0, imageBGRA.cols, imageBGRA.rows);
+	if (tf->crop_enabled) {
+		cropRect = cv::Rect(tf->crop_left, tf->crop_top,
+				    imageBGRA.cols - tf->crop_left - tf->crop_right,
+				    imageBGRA.rows - tf->crop_top - tf->crop_bottom);
+		obs_log(LOG_INFO, "Crop: %d %d %d %d", cropRect.x, cropRect.y, cropRect.width,
+			cropRect.height);
+		cv::cvtColor(imageBGRA(cropRect), inferenceFrame, cv::COLOR_BGRA2BGR);
+	} else {
+		cv::cvtColor(imageBGRA, inferenceFrame, cv::COLOR_BGRA2BGR);
+	}
+
 	std::vector<edgeyolo_cpp::Object> objects;
 
 	try {
 		std::unique_lock<std::mutex> lock(tf->modelMutex);
-		objects = tf->edgeyolo->inference(frame);
+		objects = tf->edgeyolo->inference(inferenceFrame);
 	} catch (const Ort::Exception &e) {
 		obs_log(LOG_ERROR, "ONNXRuntime Exception: %s", e.what());
 	} catch (const std::exception &e) {
 		obs_log(LOG_ERROR, "%s", e.what());
 	}
 
+	if (tf->crop_enabled) {
+		// translate the detected objects to the original frame
+		for (edgeyolo_cpp::Object &obj : objects) {
+			obj.rect.x += (float)cropRect.x;
+			obj.rect.y += (float)cropRect.y;
+		}
+	}
+
 	// update the detected object text input
 	if (objects.size() > 0) {
 		if (tf->lastDetectedObjectId != objects[0].label) {
@@ -811,6 +869,13 @@ void detect_filter_video_tick(void *data, float seconds)
 	}
 
 	if (tf->preview || tf->maskingEnabled) {
+		cv::Mat frame;
+		cv::cvtColor(imageBGRA, frame, cv::COLOR_BGRA2BGR);
+
+		if (tf->preview && tf->crop_enabled) {
+			// draw the crop rectangle on the frame in a dashed line
+			drawDashedRectangle(frame, cropRect, cv::Scalar(0, 255, 0), 5, 8, 15);
+		}
 		if (tf->preview && objects.size() > 0) {
 			edgeyolo_cpp::utils::draw_objects(frame, objects, tf->classNames);
 		}
@@ -828,7 +893,10 @@ void detect_filter_video_tick(void *data, float seconds)
 	}
 
 	if (tf->trackingEnabled && tf->trackingFilter) {
-		cv::Rect2f boundingBox = cv::Rect2f(0, 0, (float)frame.cols, (float)frame.rows);
+		const int width = imageBGRA.cols;
+		const int height = imageBGRA.rows;
+
+		cv::Rect2f boundingBox = cv::Rect2f(0, 0, (float)width, (float)height);
 		// get location of the objects
 		if (tf->zoomObject == "single") {
 			if (objects.size() > 0) {
@@ -847,11 +915,11 @@ void detect_filter_video_tick(void *data, float seconds)
 		// the zooming box should maintain the aspect ratio of the image
 		// with the tf->zoomFactor controlling the effective buffer around the bounding box
 		// the bounding box is the center of the zooming box
-		float frameAspectRatio = (float)frame.cols / (float)frame.rows;
+		float frameAspectRatio = (float)width / (float)height;
 		// calculate an aspect ratio box around the object using its height
 		float boxHeight = boundingBox.height;
 		// calculate the zooming box size
-		float dh = (float)frame.rows - boxHeight;
+		float dh = (float)height - boxHeight;
 		float buffer = dh * (1.0f - tf->zoomFactor);
 		float zh = boxHeight + buffer;
 		float zw = zh * frameAspectRatio;
@@ -882,11 +950,11 @@ void detect_filter_video_tick(void *data, float seconds)
 		// right = image width - (zx + zw)
 		obs_data_set_int(
 			crop_pad_settings, "right",
-			(int)((float)frame.cols - (tf->trackingRect.x + tf->trackingRect.width)));
+			(int)((float)width - (tf->trackingRect.x + tf->trackingRect.width)));
 		// bottom = image height - (zy + zh)
 		obs_data_set_int(
 			crop_pad_settings, "bottom",
-			(int)((float)frame.rows - (tf->trackingRect.y + tf->trackingRect.height)));
+			(int)((float)height - (tf->trackingRect.y + tf->trackingRect.height)));
 		// apply the settings
 		obs_source_update(tf->trackingFilter, crop_pad_settings);
 		obs_data_release(crop_pad_settings);