Skip to content

Commit

Permalink
fix: dump processed
Browse files Browse the repository at this point in the history
  • Loading branch information
Ming-Yan committed Apr 11, 2024
1 parent 67e478c commit d33074d
Showing 1 changed file with 6 additions and 6 deletions.
12 changes: 6 additions & 6 deletions scripts/dump_processed.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,16 @@ def dump_dataset(output, fname, alljson):
jsonlist = glob.glob(alljson) if "*" in alljson else alljson.split(",")
print("Original jsons:", jsonlist)
oldf, newf = {}, {}
for j in jsonlist:
old = json.load(open(j))
for o in old.keys():
oldf[o] = old[o]
oldf = json.load(open(j))
for m in output.keys():
for f in output[m].keys():
newf[f] = list(output[m][f]["fname"])
newf[f] = list(set(output[m][f]["fname"]))
failed = {}
for t in oldf.keys():
failed[t] = list(set(oldf[t]) - set(newf[t]))
for f in oldf[t]:
if f in newf[t]:
oldf[t].remove(f)
failed[t] = oldf[t]

with open(f"{fname}_failed_dataset.json", "w") as outfile:
json.dump(failed, outfile, indent=4)
Expand Down

0 comments on commit d33074d

Please sign in to comment.