forked from dscripka/openWakeWord
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added some scripts and functions to assist with collection false acti…
…vations
- Loading branch information
Showing
5 changed files
with
240 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,145 @@ | ||
# Copyright 2022 David Scripka. All rights reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
# Imports | ||
import numpy as np | ||
import os | ||
import scipy.io.wavfile | ||
import tempfile | ||
import openwakeword | ||
import argparse | ||
import time | ||
from speechbrain.dataio.dataio import read_audio | ||
import collections | ||
from tqdm import tqdm | ||
|
||
# Parse input arguments | ||
parser=argparse.ArgumentParser() | ||
parser.add_argument( | ||
"--input_files", | ||
help="""A text file where each line is a full path to an audio file to mine for false-positives.""", | ||
type=str, | ||
default="./", | ||
required=True | ||
) | ||
parser.add_argument( | ||
"--skip_files", | ||
help="""A text file where each line is a full path to an audio file that should be skipped.""", | ||
type=str, | ||
required=False | ||
) | ||
parser.add_argument( | ||
"--output_dir", | ||
help="""Where to save the audio features from a false-positive. | ||
By default, will be saved as <model_name>.npy files of shape N_clips x frames x features""", | ||
type=str, | ||
default="./", | ||
required=True | ||
) | ||
parser.add_argument( | ||
"--n_threads", | ||
help="""The number of CPU threads to use when processing.""", | ||
type=int, | ||
default=1, | ||
required=False | ||
) | ||
parser.add_argument( | ||
"--max_wall_time", | ||
help="""The total amount of wall-clock time (in hours) to mine for false-positives. When this limit is reached | ||
the examples found up to this point will be saved.""", | ||
type=float, | ||
default=1, | ||
required=False | ||
) | ||
parser.add_argument( | ||
"--max_feature_size", | ||
help="""The maximum size (in MB) for the false-positive features. If the total collected is larger | ||
is than this, processing will stop.""", | ||
type=float, | ||
default=5000, | ||
required=False | ||
) | ||
args=parser.parse_args() | ||
|
||
if __name__ == "__main__": | ||
# Get audio files to mine from input list | ||
with open(args.input_files, 'r') as f: | ||
input_files = [i.strip() for i in f.readlines()] | ||
|
||
# Get audio files to skip and adjust input file list | ||
if args.skip_files: | ||
with open(args.skip_files, 'r') as f: | ||
skip_files = [i.strip() for i in f.readlines()] | ||
input_files = [i for i in input_files if i not in skip_files] | ||
|
||
# Set starting time | ||
start_time = time.time() | ||
|
||
# Begin processing files | ||
bs = int(args.n_threads*2) | ||
combined_features = collections.defaultdict(list) | ||
for i in tqdm(range(0, len(input_files), bs)): | ||
with tempfile.TemporaryDirectory() as tmp_dir: | ||
batch = input_files[i:i+bs] | ||
batch_data = [] | ||
tmp_file_paths = [] | ||
for i in batch: | ||
dat = read_audio(i).numpy() | ||
if len(dat.shape) > 1: | ||
dat = dat[:, 0] | ||
dat = (dat*32767).astype(np.int16) # convert to 16-khz, 16-bit audio | ||
|
||
# Save audio to temporary .wav files | ||
tmp_fname = os.path.join(tmp_dir, i.split(os.path.sep)[-1]) | ||
scipy.io.wavfile.write(tmp_fname, 16000, dat) | ||
tmp_file_paths.append(tmp_fname) | ||
|
||
# Predict on temporary files | ||
predictions = openwakeword.utils.bulk_predict( | ||
file_paths=tmp_file_paths, | ||
wakeword_model_paths=[], # loads all default models | ||
prediction_function="_get_positive_prediction_frames", | ||
ncpu=args.n_threads | ||
) | ||
|
||
# Combine and store features | ||
for fl in predictions.keys(): | ||
for lbl in predictions[fl].keys(): | ||
combined_features[lbl].append(predictions[fl][lbl]) | ||
|
||
# Check for maximum processing time | ||
if (time.time() - start_time)/3600 > args.max_wall_time: | ||
print("\nMaximum wall-time reached. Saving mined false-positives and exiting...") | ||
break | ||
|
||
# Check for maximum features size in memory | ||
size = 0 | ||
for key in combined_features.keys(): | ||
for i in combined_features[key]: | ||
size += i.nbytes/1e6 | ||
if size > args.max_feature_size: | ||
print("\nMaximum feature size (in MB) reached. Saving mined false-positives and exiting...") | ||
break | ||
|
||
# Combine mined features into single numpy arrays | ||
for lbl in combined_features.keys(): | ||
combined_features[lbl] = np.concatenate(combined_features[lbl], axis=0) | ||
|
||
# Save results to .npy files | ||
if not os.path.exists(args.output_dir): | ||
os.mkdir(args.output_dir) | ||
|
||
for key in combined_features.keys(): | ||
np.save(f"{args.output_dir}{os.path.sep}{key}.npy", combined_features[key]) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters