-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path8_upload.py
37 lines (29 loc) · 1.02 KB
/
8_upload.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
from huggingface_hub import HfApi, CommitOperationAdd
import utils
REPO_ID = "wormtooth/MNBVC-judgment"
def get_uploaded_files(api):
ret = list(api.list_repo_tree(REPO_ID, path_in_repo="data", repo_type="dataset"))
files_set = set()
for path in ret:
files_set.add(path.path.split("/")[1])
return files_set
def upload_files(api, paths):
operations = []
for path in paths:
op = CommitOperationAdd(path_in_repo=f"data/{path.name}", path_or_fileobj=path)
operations.append(op)
msg = f"Upload {len(paths)} files"
api.create_commit(REPO_ID, operations, repo_type="dataset", commit_message=msg)
if __name__ == "__main__":
api = HfApi()
uploaded_files_set = get_uploaded_files(api)
result_folder = utils.get_results_path() / "data"
gz_file_paths = [
path
for path in result_folder.glob("*.jsonl.gz")
if path.name not in uploaded_files_set
]
batch_size = 20
for i in range(0, len(gz_file_paths), batch_size):
paths = gz_file_paths[i: i + batch_size]
upload_files(api, paths)