Skip to content

Commit

Permalink
Add image dilation and rescaling capabilities (#17)
Browse files Browse the repository at this point in the history
* Add image dilation and rescaling capabilities

* Refactor filter_data struct in filter-data.h
  • Loading branch information
royshil authored Mar 22, 2024
1 parent 85b86d8 commit de22dcf
Show file tree
Hide file tree
Showing 5 changed files with 74 additions and 5 deletions.
12 changes: 8 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,17 +59,21 @@ Available now:
- "Semantic Smoothing": getting more consistent outputs with higher accuracy and confidence by "averaging" several text outputs
- Timing/Running modes: per X-milliseconds
- Output formatting (with inja): e.g. "Score: {{score}}"
- Output text detection to image source
- Binarization methods (threshold, Otsu, Triangle, adaptive)
- Image Dilation
- Rescale (optimal Tesseract performance is at 35 pixels / character)

Coming soon:
- More languages built-in (pretrained Tesseract models)
- Allowing external model files
- More output capabilities e.g. Parsing, saving a file, websocket event, etc.
- Output to more built-in OBS sources (Image, etc.)
- More output capabilities e.g. Parsing, websocket event, etc.
- Extracting text from complex image layouts
- Different timing/run modes: per X-frames, image change, etc.
- Image stabilization
- Image processing: Perspective warping, auto-cropping, binarization, etc.
- Text detection mask output: Use for blurring / hiding text on scene
- Optical flow tracking for fast moving text
- Image processing: Perspective warping, auto-cropping, etc.
- Advanced binarization: Niblack, Sauvola

Check out our other plugins:
- [Background Removal](https://github.com/occ-ai/obs-backgroundremoval) removes background from webcam without a green screen.
Expand Down
3 changes: 3 additions & 0 deletions data/locale/en-US.ini
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,6 @@ BinarizationMode="Binarization Mode"
BinarizationThreshold="Binarization Threshold"
BinarizationBlockSize="Binarization Block Size"
PreviewBinarization="Preview Binarization"
RescaleImage="Rescale Image"
RescaleTargetSize="Rescale Target Size"
DilationIterations="Dilation Iterations"
3 changes: 3 additions & 0 deletions src/filter-data.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ struct filter_data {
int binarizationThreshold;
int binarizationBlockSize;
bool previewBinarization;
int dilationIterations;
bool rescaleImage;
int rescaleTargetSize;
std::string char_whitelist;
std::string user_patterns;
int conf_threshold;
Expand Down
45 changes: 44 additions & 1 deletion src/ocr-filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,16 @@ bool binarization_mode_modified(obs_properties_t *props, obs_property_t *propert
return true;
}

bool rescale_modified(obs_properties_t *props_modified, obs_property_t *property,
obs_data_t *settings)
{
bool rescale_image = obs_data_get_bool(settings, "rescale_image");
obs_property_set_visible(obs_properties_get(props_modified, "rescale_target_size"),
rescale_image);
UNUSED_PARAMETER(property);
return true;
}

obs_properties_t *ocr_filter_properties(void *data)
{
obs_properties_t *props = obs_properties_create();
Expand Down Expand Up @@ -107,14 +117,17 @@ obs_properties_t *ocr_filter_properties(void *data)
{"page_segmentation_mode", "char_whitelist", "conf_threshold",
"user_patterns", "enable_smoothing", "word_length", "window_size",
"update_on_change", "binarization_mode", "preview_binarization",
"binarization_threshold", "binarization_block_size"}) {
"binarization_threshold", "binarization_block_size", "rescale_image",
"rescale_target_size", "update_on_change_threshold",
"dilation_iterations"}) {
obs_property_set_visible(obs_properties_get(props_modified, prop),
advanced_settings);
}
if (advanced_settings) {
enable_smoothing_modified(props_modified, nullptr, settings);
update_on_change_modified(props_modified, nullptr, settings);
binarization_mode_modified(props_modified, nullptr, settings);
rescale_modified(props_modified, nullptr, settings);
}
UNUSED_PARAMETER(property);
return true;
Expand Down Expand Up @@ -163,10 +176,34 @@ obs_properties_t *ocr_filter_properties(void *data)
obs_property_set_modified_callback(obs_properties_get(props, "binarization_mode"),
binarization_mode_modified);

// add dilation iterations
obs_properties_add_int_slider(props, "dilation_iterations",
obs_module_text("DilationIterations"), 0, 10, 1);

// Add option for previewing the binarization
obs_properties_add_bool(props, "preview_binarization",
obs_module_text("PreviewBinarization"));

// add option for rescaling the image
obs_properties_add_bool(props, "rescale_image", obs_module_text("RescaleImage"));

// add rescale target size slider
obs_properties_add_int_slider(props, "rescale_target_size",
obs_module_text("RescaleTargetSize"), 10, 100, 1);

// add callback to enable or disable the rescale target size property
obs_property_set_modified_callback(
obs_properties_get(props, "rescale_image"),
[](obs_properties_t *props_modified, obs_property_t *property,
obs_data_t *settings) {
bool rescale_image = obs_data_get_bool(settings, "rescale_image");
obs_property_set_visible(obs_properties_get(props_modified,
"rescale_target_size"),
rescale_image);
UNUSED_PARAMETER(property);
return true;
});

// Add character whitelist
obs_properties_add_text(props, "char_whitelist", obs_module_text("CharWhitelist"),
OBS_TEXT_DEFAULT);
Expand Down Expand Up @@ -253,6 +290,9 @@ void ocr_filter_defaults(obs_data_t *settings)
obs_data_set_default_int(settings, "binarization_threshold", 127);
obs_data_set_default_int(settings, "binarization_block_size", 15);
obs_data_set_default_bool(settings, "preview_binarization", false);
obs_data_set_default_int(settings, "dilation_iterations", 0);
obs_data_set_default_bool(settings, "rescale_image", true);
obs_data_set_default_int(settings, "rescale_target_size", 35);
obs_data_set_default_string(settings, "text_sources", "none");
obs_data_set_default_string(settings, "text_detection_mask_sources", "none");
obs_data_set_default_string(
Expand Down Expand Up @@ -288,6 +328,9 @@ void ocr_filter_update(void *data, obs_data_t *settings)
tf->binarizationThreshold = (int)obs_data_get_int(settings, "binarization_threshold");
tf->binarizationBlockSize = (int)obs_data_get_int(settings, "binarization_block_size");
tf->previewBinarization = obs_data_get_bool(settings, "preview_binarization");
tf->dilationIterations = (int)obs_data_get_int(settings, "dilation_iterations");
tf->rescaleImage = obs_data_get_bool(settings, "rescale_image");
tf->rescaleTargetSize = (int)obs_data_get_int(settings, "rescale_target_size");
tf->char_whitelist = obs_data_get_string(settings, "char_whitelist");
tf->conf_threshold = (int)obs_data_get_int(settings, "conf_threshold");
tf->enable_smoothing = obs_data_get_bool(settings, "enable_smoothing");
Expand Down
16 changes: 16 additions & 0 deletions src/tesseract-ocr-utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,13 @@ void tesseract_thread(void *data)
cv::THRESH_BINARY | cv::THRESH_OTSU);
}

if (tf->dilationIterations > 0) {
cv::Mat element = cv::getStructuringElement(cv::MORPH_RECT,
cv::Size(3, 3));
cv::dilate(imageForOCR, imageForOCR, element,
cv::Point(-1, -1), tf->dilationIterations);
}

if (tf->previewBinarization) {
// lock the outputPreviewBGRALock
std::lock_guard<std::mutex> lock(tf->outputPreviewBGRALock);
Expand All @@ -395,6 +402,15 @@ void tesseract_thread(void *data)
}
}

if (tf->rescaleImage) {
// scale to height tf->rescaleTargetSize maintaining aspect ratio
cv::Mat resized;
float scale =
(float)tf->rescaleTargetSize / imageForOCR.rows;
cv::resize(imageForOCR, resized, cv::Size(), scale, scale);
imageForOCR = resized;
}

// Process the image
std::string ocr_result = run_tesseract_ocr(tf, imageForOCR);

Expand Down

0 comments on commit de22dcf

Please sign in to comment.