update jnv and jvnv

bagustris · Feb 4, 2025 · 3978a91 · 3978a91
1 parent badb8fa
commit 3978a91
Show file tree

Hide file tree

Showing 5 changed files with 21 additions and 9 deletions.
diff --git a/data/jnv/exp.ini b/data/jnv/exp.ini
@@ -1,6 +1,6 @@
 [EXP]
-root = ./tmp
-name = exp_jnv_hubert_knn
+root = ./results
+name = exp_jnv_hubert
 runs = 1
 epochs = 1
 save = True
@@ -19,12 +19,13 @@ test.type = csv
 test.absolute_path = False
 test.split_strategy = test
 target = emotion
-; labels = ['angry', 'disgust', 'surprise', 'sad']
+labels = ['angry', 'disgust', 'surprise', 'sad']
 [FEATS]
 type = ['hubert-xlarge-ll60k']
 ; scale = standard
 [MODEL]
-type = knn
+type = svm
 ; save = True
+C_val = 10
 [RESAMPLE]
 replace = True
diff --git a/data/jnv/process_database.py b/data/jnv/process_database.py
@@ -28,6 +28,12 @@ def main(args):
     train_df, dev_df, test_df = np.split(df.sample(frac=1, random_state=42), [
                                          int(.8 * len(df)), int(.9 * len(df))])
 
+    # print number of files in each set
+    print(f"Number of files in train set: {len(train_df)}")
+    print(f"Number of files in dev set: {len(dev_df)}")
+    print(f"Number of files in test set: {len(test_df)}")
+    print(f"Number of files in total: {len(df)}")
+
     train_df.to_csv(os.path.join(output_dir, "jnv_train.csv"), index=False)
     dev_df.to_csv(os.path.join(output_dir, "jnv_dev.csv"), index=False)
     test_df.to_csv(os.path.join(output_dir, "jnv_test.csv"), index=False)
@@ -38,7 +44,7 @@ def main(args):
     parser.add_argument(
         "--data_dir",
         type=str,
-        default="jnv_corpus_ver2/JNV/",
+        default="JNV/",
         help="Directory containing audio files")
     parser.add_argument(
         "--output_dir",

diff --git a/data/jvnv/README.md b/data/jvnv/README.md
@@ -2,6 +2,7 @@
 
 ```bash
 wget https://ss-takashi.sakura.ne.jp/corpus/jvnv/jvnv_ver1.zip
+unzip jvnv_ver1.zip
 # process the database, need to add nkululeko to use find_files function
 python3 process_database.py
 cd ../..

diff --git a/data/jvnv/exp.ini b/data/jvnv/exp.ini
@@ -1,6 +1,6 @@
 [EXP]
-root = /tmp/results/
-name = exp_jvnv_hubert_all
+root = ./results/
+name = exp_jvnv_hubert
 ; runs = 1
 ; epochs = 1
 ; save = True
@@ -19,14 +19,15 @@ test.type = csv
 test.absolute_path = False
 test.split_strategy = test
 target = emotion
-; labels = ['anger', 'fear', 'sad', 'happy']
+labels = ['anger', 'fear', 'sad', 'happy']
 ; get the number of classes from the target column automatically
 [FEATS]
 type = ['hubert-xlarge-ll60k']
 ; no_reuse = False
 scale = standard
 [MODEL]
-type = knn
+type = svm
 ; save = True
+C_val = 10
 [RESAMPLE]
 replace = True
diff --git a/data/jvnv/process_database.py b/data/jvnv/process_database.py
@@ -48,6 +48,9 @@ def process_database(data_dir, output_dir):
             writer.writerow(["file", "emotion", "gender"])
             writer.writerows(set_data)
 
+    # print number of files in each set
+    for set_name, set_data in data.items():
+        print(f"Number of files in {set_name} set: {len(set_data)}")
     print("Database processing completed.")