-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy path01_download_data_model.sh
executable file
·86 lines (75 loc) · 2.89 KB
/
01_download_data_model.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/bin/bash
source env.sh
for data_set in libri_dev libri_test; do
dir=data/$data_set
if [ ! -f $dir/wav.scp ] ; then
if [ -z $password ]; then
echo "Enter password provided by the organisers (check README.md registration):"
read -s password
echo
fi
[ -d $dir ] && rm -r $dir
if [ ! -f corpora/$data_set.tar.gz ]; then
mkdir -p corpora
cd corpora
sshpass -p "$password" sftp [email protected] <<EOF
cd /challengedata/corpora
get $data_set.tar.gz
bye
EOF
cd -
fi
echo " Unpacking $data_set data set..."
tar -xf corpora/$data_set.tar.gz || exit 1
[ ! -f $dir/text ] && echo "File $dir/text does not exist" && exit 1
cut -d' ' -f1 $dir/text > $dir/text1
cut -d' ' -f2- $dir/text | sed -r 's/,|!|\?|\./ /g' | sed -r 's/ +/ /g' | awk '{print toupper($0)}' > $dir/text2
paste -d' ' $dir/text1 $dir/text2 > $dir/text
rm $dir/text1 $dir/text2
fi
done
#Download LibriSpeech-360
check=corpora/LibriSpeech/train-clean-360
if [ ! -d $check ]; then
echo "Download train-clean-360..."
mkdir -p corpora
cd corpora
if [ ! -f train-clean-360.tar.gz ] ; then
echo "Download train-clean-360..."
wget --no-check-certificate https://www.openslr.org/resources/12/train-clean-360.tar.gz
fi
echo "Unpacking train-clean-360"
tar -xvzf train-clean-360.tar.gz
cd ../
fi
check_data=data/libri_dev_enrolls
check_model=exp/asv_pre_ecapa
#Download kaldi format datadir and SpeechBrain pretrained ASV/ASR models
if [ ! -d $check_data ]; then
if [ ! -f data.zip ]; then
echo "Download VPC kaldi format datadir..."
wget https://github.com/DigitalPhonetics/VoicePAT/releases/download/v2/data.zip
fi
echo "Unpacking data"
unzip data.zip
fi
if [ ! -d $check_model ]; then
if [ ! -f pre_model.zip ]; then
echo "Download pretrained ASV & ASR models trained using original train-clean-360..."
wget https://github.com/DigitalPhonetics/VoicePAT/releases/download/v2/pre_model.zip
fi
echo "Unpacking pretrained evaluation models"
unzip pre_model.zip
fi
#Download GAN pre-models only if perform GAN anonymization
if [ ! -d models ]; then
echo "Download pretrained models of GAN-basd speaker anonymization system, only if you use this method to anonymize data.."
mkdir -p models
wget -q -O models/anonymization.zip https://github.com/DigitalPhonetics/speaker-anonymization/releases/download/v2.0/anonymization.zip
wget -q -O models/asr.zip https://github.com/DigitalPhonetics/speaker-anonymization/releases/download/v2.0/asr.zip
wget -q -O models/tts.zip https://github.com/DigitalPhonetics/speaker-anonymization/releases/download/v2.0/tts.zip
unzip -oq models/asr.zip -d models
unzip -oq models/tts.zip -d models
unzip -oq models/anonymization.zip -d models
rm models/*.zip
fi