-
Notifications
You must be signed in to change notification settings - Fork 44
/
Copy pathfeatureExtractor.py
129 lines (99 loc) · 4.55 KB
/
featureExtractor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# Written by S. Emre Eskimez, in 2017 - University of Rochester
# This script is written for extracting features from GRID dataset.
# If you intend to use other videos with arbitrary length, you need to modify this script.
# Usage: python featureExtractor.py -vp path-to-video-files/ -sp path-to-shape-predictor-68-face-landmarks-dat -o output-file-name.hdf5
# You can find shape_predictor_68_face_landmarks.dat online from various sources.
import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt
import matplotlib.animation as manimation
from copy import deepcopy
import sys
import os
import dlib
import glob
# from skimage import io
import numpy as np
import h5py
import pylab
import librosa
import imageio
import utils
import argparse, fnmatch, shutil
from tqdm import tqdm
import subprocess
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("-vp", "--video-path", type=str, help="video folder path")
parser.add_argument("-sp", "--sp-path", type=str, help="shape_predictor_68_face_landmarks.dat path")
parser.add_argument("-o", "--output-path", type=str, help="Output file path")
args = parser.parse_args()
predictor_path = args.sp_path#'../data/shape_predictor_68_face_landmarks.dat'
video_folder_path = args.video_path
dataset_path = args.output_path
ms = np.load('mean_shape.npy') # Mean face shape, you can use any kind of face instead of mean face.
fnorm = utils.faceNormalizer()
ms = fnorm.alignEyePoints(np.reshape(ms, (1, 68, 2)))[0,:,:]
try:
os.remove(dataset_path)
except:
print ('Exception when deleting previous dataset...')
wsize = 0.04
hsize = 0.04
# These two vectors are for filling the empty cells with zeros for delta and double delta features
zeroVecD = np.zeros((1, 64))
zeroVecDD = np.zeros((2, 64))
dataHandler = h5py.File(dataset_path)
speechData = dataHandler.create_dataset('MelFeatures', (1, 75, 128), maxshape=(None, 75, 128))
lmarkData = dataHandler.create_dataset('flmark', (1, 75, 136), maxshape=(None, 75, 136))
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor(predictor_path)
points_old = np.zeros((68, 2), dtype=np.float32)
fileCtr = 0
for root, dirnames, filenames in os.walk(video_folder_path):
for filename in filenames:
# You can add the file type of your videos here:
if os.path.splitext(filename)[1] == '.mpg' or os.path.splitext(filename)[1] == '.mp4':
f = os.path.join(root, filename)
vid = imageio.get_reader(f, 'ffmpeg')
point_seq = []
img_seq = []
for frm_cnt in tqdm(range(0, vid.get_length())):
points = np.zeros((68, 2), dtype=np.float32)
try:
img = vid.get_data(frm_cnt)
except:
print('FRAME EXCEPTION!!')
continue
dets = detector(img, 1)
if len(dets) != 1:
print('FACE DETECTION FAILED!!')
continue
for k, d in enumerate(dets):
shape = predictor(img, d)
for i in range(68):
points[i, 0] = shape.part(i).x
points[i, 1] = shape.part(i).y
# points = np.reshape(points, (points.shape[0]*points.shape[1], ))
point_seq.append(deepcopy(points))
cmd = 'ffmpeg -y -i '+os.path.join(root, filename)+' -vn -acodec pcm_s16le -ac 1 -ar 44100 temp.wav'
subprocess.call(cmd, shell=True)
y, sr = librosa.load('temp.wav', sr=44100)
os.remove('temp.wav')
frames = np.array(point_seq)
fnorm = utils.faceNormalizer()
aligned_frames = fnorm.alignEyePoints(frames)
transferredFrames = fnorm.transferExpression(aligned_frames, ms)
frames = fnorm.unitNorm(transferredFrames)
if frames.shape[0] != 75:
continue
melFrames = np.transpose(utils.melSpectra(y, sr, wsize, hsize))
melDelta = np.insert(np.diff(melFrames, n=1, axis=0), 0, zeroVecD, axis=0)
melDDelta = np.insert(np.diff(melFrames, n=2, axis=0), 0, zeroVecDD, axis=0)
melFeatures = np.concatenate((melDelta, melDDelta), axis=1)
if melFeatures.shape[0] != 75:
continue
speechData[fileCtr, :, :] = melFeatures
speechData.resize((speechData.shape[0]+1, 75, 128))
lmarkData[fileCtr, :, :] = np.reshape(frames, (75, 136))
lmarkData.resize((lmarkData.shape[0]+1, 75, 136))
fileCtr += 1