-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathVoiceClassification.py
100 lines (73 loc) · 2.97 KB
/
VoiceClassification.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
from tools.data_manager import *
from tools.dataCreation import *
from tools.wordSeparation import *
import keras
def create_model(n_features, n_classes):
"""
:param n_features: int
number of features in the data
:param n_classes:
number of classes/speakers/labels
:return:
keras model
"""
model = Sequential()
model.add(Dense(n_features, input_shape=(n_features,), activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(2048, activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(1024, activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(2048, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(n_classes, activation='softmax'))
model.compile(loss='categorical_crossentropy', metrics=['accuracy'])
return model
class word_model:
"""
A class to represent a model for multiple words
Attributes:
train_labels: list
list of labels (speakers) in the training set
train_labels_encoded: list
list of one hot encoded labels
model: keras model
model fit to the training data
"""
def __init__(self, words, load=False, path=""):
"""
constructs one keras model for many words using data found in directory words/'word'/ for each 'word'
:param words: list of str
list of words to use for training, ASSUMES data for each word exists in words/'word'
"""
data = pd.DataFrame()
for word in words:
word_data = get_data_from_dir('words/{}/'.format(word))
data = data.append(word_data)
train, val = train_test_split(data, test_size=0.25, stratify=data['speaker'])
self.ss = StandardScaler()
train_features = get_features(train['filepath'], self.ss)
val_features = get_features(val['filepath'], self.ss)
self.train_labels = train['speaker'].tolist()
self.train_labels_encoded = get_encoded_labels(train['speaker'])
val_labels = get_encoded_labels(val['speaker'])
if load:
self.model = keras.models.load_model(path)
self.accuracy = self.model.evaluate(val_features, val_labels)
else:
self.model = create_model( N_MFCCS, len(self.train_labels_encoded[0]))
history = self.model.fit(train_features, self.train_labels_encoded, epochs=50, validation_data=(val_features, val_labels))
self.accuracy = history.history['val_accuracy']
dir_path = os.path.dirname(os.path.realpath(__file__))
if __name__ == "__main__":
# play_recordings(['sentence'])
# words = ["alexa","the", "be", "to", "of", "and"]
#
model = word_model(['sentence'])
#
print(predict_speaker("test/eli.wav", model))
# print(predict_speaker("test/harley.wav", model))
# print(predict_speaker("test/alex.wav", model))
# live_input(model)
# separate_words(f"{dir_path}\\test\\Sentence.wav")
# record_and_save("harley")