Skip to content

Commit

Permalink
Update OCR Plugin features and add new localization strings (#21)
Browse files Browse the repository at this point in the history
* Update OCR Plugin features and add new localization strings

* Refactor OCR filter properties callback in ocr-filter.cpp and include necessary headers in text-render-helper.cpp
  • Loading branch information
royshil authored Apr 11, 2024
1 parent ab93e5d commit 16c63d6
Show file tree
Hide file tree
Showing 11 changed files with 163 additions and 24 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,6 @@ include(cmake/BuildInja.cmake)
target_link_libraries(${CMAKE_PROJECT_NAME} PRIVATE inja)

target_sources(${CMAKE_PROJECT_NAME} PRIVATE src/plugin-main.c src/obs-utils.cpp src/tesseract-ocr-utils.cpp
src/ocr-filter.cpp src/ocr-filter-info.c)
src/ocr-filter.cpp src/ocr-filter-info.c src/text-render-helper.cpp)

set_target_properties_plugin(${CMAKE_PROJECT_NAME} PROPERTIES OUTPUT_NAME ${_name})
4 changes: 4 additions & 0 deletions data/locale/en-US.ini
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,7 @@ PreviewBinarization="Preview Binarization"
RescaleImage="Rescale Image"
RescaleTargetSize="Rescale Target Size"
DilationIterations="Dilation Iterations"
ImageOutputOption="Image Output Option"
DetectionBoxesMask="Detection Boxes Mask"
TextRendering="Text Overlay"
TextWithBackground="Text with Background"
4 changes: 4 additions & 0 deletions src/consts.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,8 @@ const char *const WHITELIST_CHARS_PORTUGUESE =
const char *const WHITELIST_CHARS_RUSSIAN =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!@#$%^&*()_+-=[]{}|;':\",./<>?`~\\абвгдеёжзийклмнопрстуфхцчшщъыьэюя ";

const int OUTPUT_IMAGE_OPTION_DETECTION_MASK = 0;
const int OUTPUT_IMAGE_OPTION_TEXT_OVERLAY = 1;
const int OUTPUT_IMAGE_OPTION_TEXT_BACKGROUND = 2;

#endif /* CONSTS_H */
1 change: 1 addition & 0 deletions src/filter-data.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ struct filter_data {
std::string output_format_template;
bool update_on_change;
int update_on_change_threshold;
int output_image_option;

bool isDisabled;

Expand Down
17 changes: 13 additions & 4 deletions src/obs-utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,9 +159,9 @@ void setTextCallback(const std::string &str, struct filter_data *usd)
obs_source_release(target);
};

void setTextDetectionMaskCallback(const cv::Mat &mask, struct filter_data *usd)
void setTextDetectionMaskCallback(const cv::Mat &mask_rgba, struct filter_data *usd)
{
UNUSED_PARAMETER(mask);
UNUSED_PARAMETER(mask_rgba);
if (!usd->output_source_mutex) {
obs_log(LOG_ERROR, "output_source_mutex is null");
return;
Expand Down Expand Up @@ -191,7 +191,7 @@ void setTextDetectionMaskCallback(const cv::Mat &mask, struct filter_data *usd)
std::string config_folder = obs_module_config_path("");
std::string filename = config_folder + "/" + usd->unique_id + ".png";
// write the file
write_png_file(filename.c_str(), mask.data, mask.cols, mask.rows);
write_png_file_rgba(filename.c_str(), mask_rgba.data, mask_rgba.cols, mask_rgba.rows);

// set the image source settings
auto image_settings = obs_source_get_settings(target);
Expand Down Expand Up @@ -301,9 +301,18 @@ void check_plugin_config_folder_exists()
}
}

void write_png_file(const char *filename, const unsigned char *image8uc1, int width, int height)
void write_png_file_8uc1(const char *filename, const unsigned char *image8uc1, int width,
int height)
{
QImage image(image8uc1, width, height, QImage::Format_Grayscale8);
QString qfilename(filename);
image.save(qfilename);
}

void write_png_file_rgba(const char *filename, const unsigned char *imageRGBA, int width,
int height)
{
QImage image(imageRGBA, width, height, QImage::Format_RGBA8888);
QString qfilename(filename);
image.save(qfilename);
}
5 changes: 4 additions & 1 deletion src/obs-utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ void update_image_source_on_settings(struct filter_data *usd, obs_data_t *settin

void check_plugin_config_folder_exists();

void write_png_file(const char *filename, const unsigned char *image8uc3, int width, int height);
void write_png_file_8uc1(const char *filename, const unsigned char *image8uc3, int width,
int height);
void write_png_file_rgba(const char *filename, const unsigned char *imageRGBA, int width,
int height);

#endif /* OBS_UTILS_H */
34 changes: 34 additions & 0 deletions src/ocr-filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,8 @@ obs_properties_t *ocr_filter_properties(void *data)
// Output formatting
obs_properties_add_text(props, "output_formatting", obs_module_text("OutputFormatting"),
OBS_TEXT_MULTILINE);
// hide the output formatting property by default
obs_property_set_visible(obs_properties_get(props, "output_formatting"), false);

// Add a property for the output text source
obs_property_t *text_sources =
Expand Down Expand Up @@ -253,6 +255,12 @@ obs_properties_t *ocr_filter_properties(void *data)
obs_property_set_visible(obs_properties_get(props_modified,
"output_file_path"),
save_to_file);
// show/hide "output_formatting" property based on the selected output source
bool show_output_formatting =
strcmp(obs_data_get_string(settings, "text_sources"), "none") != 0;
obs_property_set_visible(obs_properties_get(props_modified,
"output_formatting"),
show_output_formatting);
UNUSED_PARAMETER(property);
return true;
});
Expand All @@ -267,6 +275,30 @@ obs_properties_t *ocr_filter_properties(void *data)
obs_property_list_add_string(image_sources, obs_module_text("NoOutput"), "none");
// Add the sources
obs_enum_sources(add_image_sources_to_list, image_sources);
// add change callback for the image sources
obs_property_set_modified_callback(
obs_properties_get(props, "text_detection_mask_sources"),
[](obs_properties_t *props_modified, obs_property_t *, obs_data_t *settings) {
// hide/show the image_output_option property based on the selected image source
bool show_image_output_option =
strcmp(obs_data_get_string(settings, "text_detection_mask_sources"),
"none") != 0;
obs_property_set_visible(obs_properties_get(props_modified,
"image_output_option"),
show_image_output_option);
return true;
});

// add a choice for the image output format: detection boxes mask, text rendering, or text with background
obs_property_t *output_format = obs_properties_add_list(
props, "image_output_option", obs_module_text("ImageOutputOption"),
OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_INT);
obs_property_list_add_int(output_format, obs_module_text("DetectionBoxesMask"),
OUTPUT_IMAGE_OPTION_DETECTION_MASK);
obs_property_list_add_int(output_format, obs_module_text("TextRendering"),
OUTPUT_IMAGE_OPTION_TEXT_OVERLAY);
obs_property_list_add_int(output_format, obs_module_text("TextWithBackground"),
OUTPUT_IMAGE_OPTION_TEXT_BACKGROUND);

// Add a informative text about the plugin
obs_properties_add_text(
Expand Down Expand Up @@ -302,6 +334,7 @@ void ocr_filter_defaults(obs_data_t *settings)
obs_data_set_default_int(settings, "word_length", 5);
obs_data_set_default_int(settings, "window_size", 10);
obs_data_set_default_string(settings, "output_formatting", "{{output}}");
obs_data_set_default_int(settings, "image_output_option", 0);
}

void ocr_filter_update(void *data, obs_data_t *settings)
Expand Down Expand Up @@ -340,6 +373,7 @@ void ocr_filter_update(void *data, obs_data_t *settings)
tf->update_on_change = obs_data_get_bool(settings, "update_on_change");
tf->update_on_change_threshold =
(int)obs_data_get_int(settings, "update_on_change_threshold");
tf->output_image_option = (int)obs_data_get_int(settings, "image_output_option");

// Initialize the Tesseract OCR model
initialize_tesseract_ocr(tf, hard_tesseract_init_required);
Expand Down
61 changes: 44 additions & 17 deletions src/tesseract-ocr-utils.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#include "tesseract-ocr-utils.h"
#include "plugin-support.h"
#include "obs-utils.h"
#include "consts.h"
#include "text-render-helper.h"

#include <obs-module.h>

Expand Down Expand Up @@ -179,19 +181,18 @@ std::string run_tesseract_ocr(filter_data *tf, const cv::Mat &image)
return recognitionResult;
}

std::vector<std::vector<cv::Point>> extract_text_detection_boxes(filter_data *tf,
cv::Size imageSize)
std::vector<OCRBox> extract_text_detection_boxes(filter_data *tf, cv::Size imageSize)
{
// extract the text detection boxes
tesseract::ResultIterator *ri = tf->tesseract_model->GetIterator();
if (ri == nullptr) {
return std::vector<std::vector<cv::Point>>();
return std::vector<OCRBox>();
}
tesseract::PageIteratorLevel level = tesseract::RIL_WORD;
if (tf->pageSegmentationMode == tesseract::PSM_SINGLE_CHAR) {
level = tesseract::RIL_SYMBOL;
}
std::vector<std::vector<cv::Point>> boxes;
std::vector<OCRBox> boxes;
do {
if (ri->Empty(level)) {
continue;
Expand All @@ -204,13 +205,13 @@ std::vector<std::vector<cv::Point>> extract_text_detection_boxes(filter_data *tf
continue;
}
}
std::vector<cv::Point> box(4);
OCRBox box;
int left, top, right, bottom;
ri->BoundingBox(level, &left, &top, &right, &bottom);
box[0] = cv::Point(left, top);
box[1] = cv::Point(right, top);
box[2] = cv::Point(right, bottom);
box[3] = cv::Point(left, bottom);
box.box = cv::Rect(left, top, right - left, bottom - top);
// get the text of the box
const char *text = ri->GetUTF8Text(level);
box.text = text;
// get area of box
const int area = (right - left) * (bottom - top);
// if the area is too small or too big, relative to the image size - skip the box
Expand Down Expand Up @@ -415,19 +416,45 @@ void tesseract_thread(void *data)
std::string ocr_result = run_tesseract_ocr(tf, imageForOCR);

if (is_valid_output_source_name(tf->output_image_source_name)) {
cv::Mat text_detection_output(imageBGRA.rows,
imageBGRA.cols, CV_8UC4,
cv::Scalar(0, 0, 0, 0));

// Extract the text detection boxes
std::vector<std::vector<cv::Point>> boxes =
std::vector<OCRBox> boxes =
extract_text_detection_boxes(tf, imageBGRA.size());

// Create a text detection binary mask
cv::Mat text_detection_mask(imageBGRA.rows, imageBGRA.cols,
CV_8UC1, cv::Scalar(0));
for (const std::vector<cv::Point> &box : boxes) {
cv::fillConvexPoly(text_detection_mask, box,
cv::Scalar(255));
if (tf->output_image_option ==
OUTPUT_IMAGE_OPTION_TEXT_OVERLAY) {
// Create a text overlay image
QImage text_overlay_image =
render_boxes_with_qtextdocument(
boxes, imageBGRA.cols,
imageBGRA.rows);
cv::Mat text_overlay_image_mat(
text_overlay_image.height(),
text_overlay_image.width(), CV_8UC4,
text_overlay_image.bits(),
text_overlay_image.bytesPerLine());
text_overlay_image_mat.copyTo(
text_detection_output);
// } else if (tf->output_image_option ==
// OUTPUT_IMAGE_OPTION_TEXT_BACKGROUND) {
// // Draw the text detection boxes on the image with a background

} else {
text_detection_output.setTo(
cv::Scalar(0, 0, 0, 255));

// Create a text detection binary mask
for (const auto &box : boxes) {
cv::rectangle(
text_detection_output, box.box,
cv::Scalar(255, 255, 255, 255), -1);
}
}

setTextDetectionMaskCallback(text_detection_mask, tf);
setTextDetectionMaskCallback(text_detection_output, tf);
}

if (!ocr_result.empty() &&
Expand Down
7 changes: 6 additions & 1 deletion src/tesseract-ocr-utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,15 @@
#include <deque>
#include <string>

struct OCRBox {
std::string text;
cv::Rect box;
};

void cleanup_config_files(const std::string &unique_id);
void initialize_tesseract_ocr(filter_data *tf, bool hard_tesseract_init_required = false);
std::string run_tesseract_ocr(filter_data *tf, const cv::Mat &imageBGRA);
std::vector<std::vector<cv::Point>> extract_text_detection_boxes(filter_data *tf);
std::vector<OCRBox> extract_text_detection_boxes(filter_data *tf);
std::string strip(const std::string &str);
void stop_and_join_tesseract_thread(struct filter_data *tf);
void tesseract_thread(void *data);
Expand Down
38 changes: 38 additions & 0 deletions src/text-render-helper.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#include "text-render-helper.h"

#include <QPainter>
#include <QPixmap>

/**
* Render text to a buffer using QTextDocument
* @param text Text to render
* @param width Output width
* @param height Output height
* @param data Output buffer, user must free
* @param css_props CSS properties to apply to the text
*/
QImage render_boxes_with_qtextdocument(const std::vector<OCRBox> &boxes, uint32_t width,
uint32_t height)
{
QPixmap pixmap(width, height);
pixmap.fill(Qt::transparent);
QPainter painter;
painter.begin(&pixmap);
painter.setCompositionMode(QPainter::CompositionMode_Source);

// draw individual boxes on the pixmap
for (const OCRBox &box : boxes) {
painter.setPen(Qt::blue);
// set the character size according to the box height
QFont font = painter.font();
font.setPixelSize(box.box.height);
painter.setFont(font);
painter.drawText(box.box.x, box.box.y + box.box.height,
QString::fromStdString(box.text));
}

painter.setCompositionMode(QPainter::CompositionMode_DestinationIn);
painter.end();

return pixmap.toImage();
}
14 changes: 14 additions & 0 deletions src/text-render-helper.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#ifndef TEXT_RENDER_HELPER_H
#define TEXT_RENDER_HELPER_H

#include "tesseract-ocr-utils.h"

#include <string>
#include <vector>

#include <QImage>

QImage render_boxes_with_qtextdocument(const std::vector<OCRBox> &boxes, uint32_t width,
uint32_t height);

#endif // TEXT_RENDER_HELPER_H

0 comments on commit 16c63d6

Please sign in to comment.