-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathseparate_video.py
95 lines (75 loc) · 3.31 KB
/
separate_video.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import torch
import torchvision
import cv2
import numpy as np
from torchvision import models
from torchvision.transforms import transforms as T
# Function to preprocess the image
def preprocess(image, device):
preprocess = T.Compose([
T.ToPILImage(),
T.Resize((520, 520)),
T.ToTensor(),
T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
return preprocess(image).unsqueeze(0).to(device)
# Function to postprocess the output
def postprocess(output, orig_shape):
output = output.squeeze().cpu().detach().numpy()
output = output.argmax(0)
output = cv2.resize(output, orig_shape[::-1], interpolation=cv2.INTER_NEAREST)
return output
def separate_sub_bg(path):
background_video = 'background_video.mp4'
subject_video = 'subject_video.mp4'
# Check if MPS (Metal Performance Shaders) backend is available
device = torch.device("mps") if torch.backends.mps.is_available() else torch.device("cpu")
print(f"Using device: {device}")
# Load the pre-trained DeepLabV3 model and move it to the selected device
model = models.segmentation.deeplabv3_resnet101(pretrained=True).to(device).eval()
# Path to the local video file
video_path = path
# Capture the video
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
print("Error: Could not open video.")
exit()
# Get video properties
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))
# Define the codec and create VideoWriter objects
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out_subject = cv2.VideoWriter(subject_video, fourcc, fps, (frame_width, frame_height))
out_background = cv2.VideoWriter(background_video, fourcc, fps, (frame_width, frame_height))
while True:
ret, frame = cap.read()
if not ret:
break
orig_shape = frame.shape[:2]
input_tensor = preprocess(frame, device)
with torch.no_grad():
output = model(input_tensor)['out']
# Get the segmentation mask
seg_mask = postprocess(output, orig_shape)
# Create a binary mask for the subject (person class is usually index 15 in COCO dataset)
binary_mask = (seg_mask == 15).astype(np.uint8) * 255
# Extract the subject using the binary mask
subject = cv2.bitwise_and(frame, frame, mask=binary_mask)
# warmer_subject = subject.copy().astype(np.float32)
# warmer_subject[:, :, 2] *= 1.2 # Increase red channel
# warmer_subject[:, :, 1] *= 1.1 # Increase green channel slightly
# warmer_subject[:, :, 0] *= 0.9 # Decrease blue channel slightly
# # Ensure pixel values are within valid range [0, 255]
# warmer_subject = np.clip(warmer_subject, 0, 255).astype(np.uint8)
# Create the background by inverting the binary mask
background_mask = cv2.bitwise_not(binary_mask)
background = cv2.bitwise_and(frame, frame, mask=background_mask)
# Write the frames into the files
out_subject.write(subject)
out_background.write(background)
# Release everything when job is finished
cap.release()
out_subject.release()
out_background.release()
return subject_video, background_video