forked from chesvectain/PackingData
-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathgenerate-category-labels.py
executable file
·40 lines (32 loc) · 1.25 KB
/
generate-category-labels.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#!/usr/bin/python3
# -*- coding: UTF-8 -*-
from tinyscript import *
CATEGORIES = ("bundler", "compressor", "cryptor", "encoder", "mutator", "protector", "virtualizer")
LABELS = ts.Path("packed/README.md")
SOURCE = ts.Path("packed")
fmt = lambda p: re.sub(r"[-_]", "", p.lower())
def parse_labels():
packers, start = {}, False
for line in LABELS.read_lines():
if not start:
if line.startswith(b"---"):
start = True
continue
packer, categories = line.decode().split("|")
for category in categories.split(","):
category = category.strip()
packers.setdefault(category, [])
packers[category].append(fmt(packer))
return packers
if __name__ == '__main__':
initialize()
for category, packers in parse_labels().items():
if category not in CATEGORIES:
continue
logger.info(f"Processing category '{category}'...")
labels = {}
for path in SOURCE.walk(filter_func=ts.is_file):
labels[hashlib.sha256_file(path)] = [None, category][fmt(path.parts[1]) in packers]
logger.debug("Saving to JSON...")
with ts.Path(f"labels-{category}.json").open('w') as f:
json.dump(labels, f, indent=4)