Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add depth estimation model #366

Merged
merged 14 commits into from
Jul 4, 2023
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,5 @@

# Exclude CMake build number cache
/cmake/.CMakeBuildNumber

src/*.generated.*
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ A plugin for [OBS Studio](https://obsproject.com/) that allows you to replace th
- [MacOSX](#mac-osx)
- [Linux (Ubuntu, Arch, openSUSE)](#linux)
- [Windows](#windows)

🚧 Check out our experimental [CleanStream](https://github.com/royshil/obs-cleanstream) OBS plugin for real-time filler word (uh,um) and profanity removal from live audio stream 🚧

## Download
Expand Down Expand Up @@ -78,14 +78,15 @@ The pretrained model weights used for portrait foreground segmentation are taken
- https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.7/contrib/PP-HumanSeg
- https://github.com/PINTO0309/PINTO_model_zoo/tree/main/082_MediaPipe_Meet_Segmentation
- https://github.com/PeterL1n/RobustVideoMatting
- https://github.com/PINTO0309/PINTO_model_zoo/tree/main/384_TCMonoDepth and https://github.com/yu-li/TCMonoDepth

Image enhancement (low light) models are taken from:
- https://github.com/PINTO0309/PINTO_model_zoo/tree/main/213_TBEFN
- https://github.com/PINTO0309/PINTO_model_zoo/tree/main/372_URetinex-Net
- https://github.com/PINTO0309/PINTO_model_zoo/tree/main/370_Semantic-Guided-Low-Light-Image-Enhancement
- https://github.com/PINTO0309/PINTO_model_zoo/tree/main/243_Zero-DCE-improved

Some more information about how I built it: https://www.morethantechnical.com/2021/04/15/obs-plugin-for-portrait-background-removal-with-onnx-sinet-model/
Some more information about how I built it: https://www.morethantechnical.com/2021/04/15/obs-plugin-for-portrait-background-removal-with-onnx-sinet-model/ and https://www.morethantechnical.com/2023/05/20/building-an-obs-background-removal-plugin-a-walkthrough/

## Building

Expand Down
34 changes: 32 additions & 2 deletions data/effects/kawase_blur.effect
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
uniform float4x4 ViewProj;
uniform texture2d image;
uniform texture2d focalmask; // focal (depth) mask

uniform float xOffset;
uniform float yOffset;

uniform int blurIter; // Current blur iteration
uniform int blurTotal; // Total number of blur iterations
uniform float blurFocusPoint; // Focus point for the blur. 0 = back, 1 = front

sampler_state textureSampler {
Filter = Linear;
AddressU = Clamp;
Expand All @@ -28,8 +33,33 @@ VertDataOut VSDefault(VertDataOut v_in)
return vert_out;
}

float4 PSKawaseBlur(VertDataOut v_in) : TARGET
/**
* Kawase focal blur
* The blur amount will be based on the depth of the pixel, and the focus point.
* The focus point is a value between 0 and 1, where 0 is the back of the image, and 1 is the front.
* The blur amount is the difference between the focus point and the estimated depth of the pixel.
*/
float4 PSKawaseFocalBlur(VertDataOut v_in) : TARGET
{
float blurIterF = (float)blurIter / (float)blurTotal;

// Blur the focal map to get a smoother value else aliasing occurs
float blurValue = focalmask.Sample(textureSampler, v_in.uv).r;
blurValue += focalmask.Sample(textureSampler, v_in.uv + float2( 0.01, 0.01));
blurValue += focalmask.Sample(textureSampler, v_in.uv + float2(-0.01, 0.01));
blurValue += focalmask.Sample(textureSampler, v_in.uv + float2( 0.01, -0.01));
blurValue += focalmask.Sample(textureSampler, v_in.uv + float2(-0.01, -0.01));
blurValue *= 0.25;

// Calculate the distance from the focus point for this pixel
float blurFocusDistance = clamp(abs(blurValue - blurFocusPoint), 0.0, 1.0);

if (blurIterF > blurFocusDistance) {
// If we're past the focus point, just return the image pixel, don't blur further
return image.Sample(textureSampler, v_in.uv);
}

// Calculate the blur value from neighboring pixels
float4 sum = float4(0.0, 0.0, 0.0, 0.0);
sum += image.Sample(textureSampler, v_in.uv + float2( xOffset, yOffset));
sum += image.Sample(textureSampler, v_in.uv + float2(-xOffset, yOffset));
Expand All @@ -44,6 +74,6 @@ technique Draw
pass
{
vertex_shader = VSDefault(v_in);
pixel_shader = PSKawaseBlur(v_in);
pixel_shader = PSKawaseFocalBlur(v_in);
}
}
10 changes: 2 additions & 8 deletions data/effects/mask_alpha_filter.effect
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,8 @@ VertDataOut VSDefault(VertDataIn v_in)

float4 PSAlphaMaskRGBAWithBlur(VertDataOut v_in) : TARGET
{
float4 inputRGBA = image.Sample(textureSampler, v_in.uv);
inputRGBA.rgb = max(float3(0.0, 0.0, 0.0), inputRGBA.rgb / inputRGBA.a);

float4 outputRGBA;
float a = (1.0 - alphamask.Sample(textureSampler, v_in.uv).r) * inputRGBA.a;
outputRGBA.rgb = inputRGBA.rgb * a + blurredBackground.Sample(textureSampler, v_in.uv).rgb * (1.0 - a);
outputRGBA.a = 1;
return outputRGBA;
// Return the blurred image, the focal mask is already applied to the blurred image
return float4(blurredBackground.Sample(textureSampler, v_in.uv).rgb, 1.0);
}

float4 PSAlphaMaskRGBAWithoutBlur(VertDataOut v_in) : TARGET
Expand Down
2 changes: 2 additions & 0 deletions data/locale/en-US.ini
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,5 @@ URETINEX="URetinex-Net"
SGLLIE="Semantic Guided Enhancement"
ZERODCE="Zero-DCE"
EnableThreshold="Enable threshold"
BlurFocusPoint="Blur focus point"
TCMonoDepth="TCMonoDepth (Depth)"
Binary file added data/models/tcmonodepth_tcsmallnet_192x320.onnx
Binary file not shown.
70 changes: 53 additions & 17 deletions src/background-filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "models/ModelSelfie.h"
#include "models/ModelRVM.h"
#include "models/ModelPPHumanSeg.h"
#include "models/ModelTCMonoDepth.h"
#include "FilterData.h"
#include "ort-utils/ort-session-utils.h"
#include "obs-utils/obs-utils.h"
Expand All @@ -40,6 +41,7 @@ struct background_removal_filter : public filter_data {
int maskEveryXFrames = 1;
int maskEveryXFramesCount = 0;
int64_t blurBackground = 0;
float blurFocusPoint = 0.1f;

gs_effect_t *effect;
gs_effect_t *kawaseBlurEffect;
Expand Down Expand Up @@ -75,6 +77,7 @@ obs_properties_t *background_filter_properties(void *data)
{
obs_properties_t *props = obs_properties_create();

/* Threshold props */
obs_property_t *p = obs_properties_add_bool(
props, "enable_threshold", obs_module_text("EnableThreshold"));
obs_property_set_modified_callback(p, enable_threshold_modified);
Expand All @@ -96,6 +99,7 @@ obs_properties_t *background_filter_properties(void *data)
props, "feather", obs_module_text("FeatherBlendSilhouette"),
0.0, 1.0, 0.05);

/* GPU, CPU and performance Props */
obs_property_t *p_use_gpu = obs_properties_add_list(
props, "useGPU", obs_module_text("InferenceDevice"),
OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_STRING);
Expand All @@ -115,6 +119,13 @@ obs_properties_t *background_filter_properties(void *data)
USEGPU_COREML);
#endif

obs_properties_add_int(props, "mask_every_x_frames",
obs_module_text("CalculateMaskEveryXFrame"), 1,
300, 1);
obs_properties_add_int_slider(props, "numThreads",
obs_module_text("NumThreads"), 0, 8, 1);

/* Model selection Props */
obs_property_t *p_model_select = obs_properties_add_list(
props, "model_select", obs_module_text("SegmentationModel"),
OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_STRING);
Expand All @@ -132,17 +143,19 @@ obs_properties_t *background_filter_properties(void *data)
obs_property_list_add_string(p_model_select,
obs_module_text("Robust Video Matting"),
MODEL_RVM);
obs_property_list_add_string(p_model_select,
obs_module_text("TCMonoDepth"),
MODEL_DEPTH_TCMONODEPTH);

obs_properties_add_int(props, "mask_every_x_frames",
obs_module_text("CalculateMaskEveryXFrame"), 1,
300, 1);

/* Background Blur Props */
obs_properties_add_int_slider(
props, "blur_background",
obs_module_text("BlurBackgroundFactor0NoBlurUseColor"), 0, 20,
1);
obs_properties_add_int_slider(props, "numThreads",
obs_module_text("NumThreads"), 0, 8, 1);

obs_properties_add_float_slider(props, "blur_focus_point",
obs_module_text("BlurFocusPoint"), 0.0,
1.0, 0.05);

UNUSED_PARAMETER(data);
return props;
Expand All @@ -167,6 +180,7 @@ void background_filter_defaults(obs_data_t *settings)
obs_data_set_default_int(settings, "mask_every_x_frames", 1);
obs_data_set_default_int(settings, "blur_background", 0);
obs_data_set_default_int(settings, "numThreads", 1);
obs_data_set_default_double(settings, "blur_focus_point", 0.1);
}

void background_filter_update(void *data, obs_data_t *settings)
Expand All @@ -186,6 +200,8 @@ void background_filter_update(void *data, obs_data_t *settings)
(int)obs_data_get_int(settings, "mask_every_x_frames");
tf->maskEveryXFramesCount = (int)(0);
tf->blurBackground = obs_data_get_int(settings, "blur_background");
tf->blurFocusPoint =
(float)obs_data_get_double(settings, "blur_focus_point");

const std::string newUseGpu = obs_data_get_string(settings, "useGPU");
const std::string newModel =
Expand Down Expand Up @@ -215,6 +231,9 @@ void background_filter_update(void *data, obs_data_t *settings)
if (tf->modelSelection == MODEL_PPHUMANSEG) {
tf->model.reset(new ModelPPHumanSeg);
}
if (tf->modelSelection == MODEL_DEPTH_TCMONODEPTH) {
tf->model.reset(new ModelTCMonoDepth);
}

createOrtSession(tf);
}
Expand Down Expand Up @@ -437,7 +456,8 @@ void background_filter_video_tick(void *data, float seconds)
}

static gs_texture_t *blur_background(struct background_removal_filter *tf,
uint32_t width, uint32_t height)
uint32_t width, uint32_t height,
gs_texture_t *alphaTexture)
{
if (tf->blurBackground == 0 || !tf->kawaseBlurEffect) {
return nullptr;
Expand All @@ -448,10 +468,18 @@ static gs_texture_t *blur_background(struct background_removal_filter *tf,
gs_texrender_get_texture(tf->texrender));
gs_eparam_t *image =
gs_effect_get_param_by_name(tf->kawaseBlurEffect, "image");
gs_eparam_t *focalmask =
gs_effect_get_param_by_name(tf->kawaseBlurEffect, "focalmask");
gs_eparam_t *xOffset =
gs_effect_get_param_by_name(tf->kawaseBlurEffect, "xOffset");
gs_eparam_t *yOffset =
gs_effect_get_param_by_name(tf->kawaseBlurEffect, "yOffset");
gs_eparam_t *blurIter =
gs_effect_get_param_by_name(tf->kawaseBlurEffect, "blurIter");
gs_eparam_t *blurTotal =
gs_effect_get_param_by_name(tf->kawaseBlurEffect, "blurTotal");
gs_eparam_t *blurFocusPointParam = gs_effect_get_param_by_name(
tf->kawaseBlurEffect, "blurFocusPoint");

for (int i = 0; i < (int)tf->blurBackground; i++) {
gs_texrender_reset(tf->texrender);
Expand All @@ -462,8 +490,12 @@ static gs_texture_t *blur_background(struct background_removal_filter *tf,
}

gs_effect_set_texture(image, blurredTexture);
gs_effect_set_texture(focalmask, alphaTexture);
gs_effect_set_float(xOffset, ((float)i + 0.5f) / (float)width);
gs_effect_set_float(yOffset, ((float)i + 0.5f) / (float)height);
gs_effect_set_int(blurIter, i);
gs_effect_set_int(blurTotal, (int)tf->blurBackground);
gs_effect_set_float(blurFocusPointParam, tf->blurFocusPoint);

struct vec4 background;
vec4_zero(&background);
Expand Down Expand Up @@ -496,22 +528,12 @@ void background_filter_video_render(void *data, gs_effect_t *_effect)
return;
}

// Output the masked image

gs_texture_t *blurredTexture = blur_background(tf, width, height);

if (!tf->effect) {
// Effect failed to load, skip rendering
obs_source_skip_video_filter(tf->source);
return;
}

if (!obs_source_process_filter_begin(tf->source, GS_RGBA,
OBS_ALLOW_DIRECT_RENDERING)) {
obs_source_skip_video_filter(tf->source);
return;
}

gs_texture_t *alphaTexture = nullptr;
{
std::lock_guard<std::mutex> lock(tf->outputLock);
Expand All @@ -524,6 +546,17 @@ void background_filter_video_render(void *data, gs_effect_t *_effect)
return;
}
}

// Output the masked image
gs_texture_t *blurredTexture =
blur_background(tf, width, height, alphaTexture);

if (!obs_source_process_filter_begin(tf->source, GS_RGBA,
OBS_ALLOW_DIRECT_RENDERING)) {
obs_source_skip_video_filter(tf->source);
return;
}

gs_eparam_t *alphamask =
gs_effect_get_param_by_name(tf->effect, "alphamask");
gs_eparam_t *blurSize =
Expand All @@ -534,13 +567,16 @@ void background_filter_video_render(void *data, gs_effect_t *_effect)
gs_effect_get_param_by_name(tf->effect, "yTexelSize");
gs_eparam_t *blurredBackground =
gs_effect_get_param_by_name(tf->effect, "blurredBackground");
gs_eparam_t *blurFocusPointParam =
gs_effect_get_param_by_name(tf->effect, "blurFocusPoint");

gs_effect_set_texture(alphamask, alphaTexture);
gs_effect_set_int(blurSize, (int)tf->blurBackground);
gs_effect_set_float(xTexelSize, 1.0f / (float)width);
gs_effect_set_float(yTexelSize, 1.0f / (float)height);
if (tf->blurBackground > 0) {
gs_effect_set_texture(blurredBackground, blurredTexture);
gs_effect_set_float(blurFocusPointParam, tf->blurFocusPoint);
}

gs_blend_state_push();
Expand Down
2 changes: 2 additions & 0 deletions src/consts.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ const char *const MODEL_ENHANCE_URETINEX = "models/uretinex_net_180x320.onnx";
const char *const MODEL_ENHANCE_SGLLIE =
"models/semantic_guided_llie_180x324.onnx";
const char *const MODEL_ENHANCE_ZERODCE = "models/zero_dce_180x320.onnx";
const char *const MODEL_DEPTH_TCMONODEPTH =
"models/tcmonodepth_tcsmallnet_192x320.onnx";

const char *const USEGPU_CPU = "cpu";
const char *const USEGPU_DML = "dml";
Expand Down
28 changes: 28 additions & 0 deletions src/models/ModelTCMonoDepth.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#ifndef MODELTCMONODEPTH_H
#define MODELTCMONODEPTH_H

#include "Model.h"

class ModelTCMonoDepth : public ModelBCHW {
private:
/* data */
public:
ModelTCMonoDepth(/* args */) {}
~ModelTCMonoDepth() {}

virtual void prepareInputToNetwork(cv::Mat &resizedImage,
cv::Mat &preprocessedImage)
{
// Do not normalize from [0, 255] to [0, 1].

hwc_to_chw(resizedImage, preprocessedImage);
}

virtual void postprocessOutput(cv::Mat &outputImage)
{
cv::normalize(outputImage, outputImage, 1.0, 0.0,
cv::NORM_MINMAX);
}
};

#endif // MODELTCMONODEPTH_H