-
Notifications
You must be signed in to change notification settings - Fork 20
/
data_process.py
37 lines (29 loc) · 1.18 KB
/
data_process.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import os
import shutil
import subprocess
def generate_data(datasets_dir):
for subdir in os.listdir(datasets_dir):
subdir_path = os.path.join(datasets_dir, subdir)
if os.path.isdir(subdir_path):
script_path = os.path.join(subdir_path, "make_dataset.py")
if os.path.isfile(script_path):
try:
subprocess.run(["python", script_path], check=True)
print(f"Success make_dataset.py in {subdir_path}")
except:
print(f"Fail make_dataset.py in {subdir_path}")
else:
print(f"*******No make_dataset.py in {subdir_path}*******")
def del_data(datasets_dir):
for root, dirs, files in os.walk(datasets_dir):
if "data" in dirs:
data_dir_path = os.path.join(root, "data")
for file in os.listdir(data_dir_path):
if file.endswith(".jsonl"):
shutil.rmtree(data_dir_path)
print(f"success delete data folder {data_dir_path}")
break
if __name__ == "__main__":
datasets_dir = "datasets"
generate_data(datasets_dir)
# del_data(datasets_dir)