-
Notifications
You must be signed in to change notification settings - Fork 1
/
filelist.py
30 lines (26 loc) · 1.13 KB
/
filelist.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import argparse
import glob
import os
import tqdm
import soundfile as sf
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--input', type=str, default="./dataset", help='Dataset path')
parser.add_argument('-o', '--output', type=str, default="./filelists/48k_audio_filelist.txt", help='File list output path')
parser.add_argument('-s', '--sr', type=int, default=48000, help='File target sample rate')
args = parser.parse_args()
audio_files = list(glob.glob(os.path.join(args.input, "**/*.wav"), recursive=True))
target_sr = args.sr
total_time = 0
with open(args.output, "w", encoding="utf-8") as f:
for i, audio_path in enumerate(tqdm.tqdm(audio_files)):
audio = sf.SoundFile(audio_path)
sec = audio.frames / audio.samplerate
if audio.frames / audio.samplerate * target_sr < 16384 * 1.2:
continue
# if sec > 30:
# continue
audio_path = audio_path.replace("\\", "/")
f.write(f"{audio_path}\n")
total_time += sec
print(f"Total time: {total_time//3600}h")