-
Notifications
You must be signed in to change notification settings - Fork 0
/
export_tsv.py
38 lines (30 loc) · 1.25 KB
/
export_tsv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
from typing import List
import librosa
import glob
import tqdm
import os
import librosa
import wave
def write_tsv(out_path: str, root_path: str, files: List[str]):
nsamples = []
with open(out_path, 'w', encoding='utf-8') as f:
f.write(root_path + '\n')
for file in tqdm.tqdm(files):
p = "/".join(file.split('/')[-3:])
with wave.open(os.path.join(root_path, p), 'r') as audio_file:
sr = audio_file.getframerate()
nsample = audio_file.getnframes()
if sr != 48000:
print(p)
continue
nsamples.append(nsample)
f.write(f'{p}\t{nsample}\n')
print("Max Sample: ", max(nsamples))
def main():
chinese_files = list(glob.glob("datasets/genshin-20220915/chinese/wav/*.wav"))
english_files = list(glob.glob("datasets/genshin-20220915/english/wav/*.wav"))
japanese_files = list(glob.glob("datasets/genshin-20220915/japanese/wav/*.wav"))
korean_files = list(glob.glob("datasets/genshin-20220915/korean/wav/*.wav"))
write_tsv("tsv_dir/genshin.tsv", os.path.abspath("datasets/genshin-20220915"), chinese_files + english_files + japanese_files + korean_files)
if __name__ == "__main__":
main()