diff --git a/config.json b/config.json new file mode 100644 index 0000000000..fc3e2ce8c6 --- /dev/null +++ b/config.json @@ -0,0 +1,5 @@ +{ + "outputs_path": "outputs", + "shard_level": 3, + "shard_fill": "z" +} \ No newline at end of file diff --git a/scripts/shard_repo.py b/scripts/shard_repo.py index 1aeb11ac49..489f333e26 100644 --- a/scripts/shard_repo.py +++ b/scripts/shard_repo.py @@ -5,19 +5,37 @@ import os import glob +import json import subprocess +import sys import tqdm -outputs = glob.glob("outputs/*.json") +try: + with open("config.json") as f: + config = json.load(f) +except Exception: + print("no config.json found, using defaults", file=sys.stderr) + config = { + "outputs_path": "outputs", + "shard_level": 3, + "shard_fill": "z" + } + +outputs_path = config["outputs_path"] +shard_level = config["shard_level"] +shard_fill = config["shard_fill"] + + +outputs = glob.glob(f"{outputs_path}/*.json") for orig_out in tqdm.tqdm(outputs): out = os.path.basename(orig_out) chars = [c for c in out if c.isalnum()] - while len(chars) < 3: - chars.append("z") + while len(chars) < shard_level: + chars.append(shard_fill) - final_pth = os.path.join("outputs", chars[0], chars[1], chars[2]) + final_pth = os.path.join(outputs_path, *chars[:shard_level]) os.makedirs(final_pth, exist_ok=True) final_out = os.path.join(final_pth, out)