-
Notifications
You must be signed in to change notification settings - Fork 3
/
benchmark.py
118 lines (102 loc) · 4.1 KB
/
benchmark.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import os
import pstats
import cProfile
import argparse
from contextlib import contextmanager
from tqdm import tqdm
from cidc_schemas.template import Template
from cidc_schemas.template_reader import XlTemplateReader
from cidc_schemas.prism import (
merge_clinical_trial_metadata,
merge_artifacts,
ArtifactInfo,
prismify,
set_prism_encrypt_key,
)
@contextmanager
def profiling(run_name: str, outdir: str = "benchmark"):
"""A context manager that profiles enclosed code using cProfile.Profile,
outputting results to the specified output director (defaults to "benchmark/").
"""
if not os.path.isdir(outdir):
os.mkdir(outdir)
profiler = cProfile.Profile()
profiler.enable()
exception = None
try:
print(f"Running step '{run_name}'")
yield
except Exception as e:
exception = e
finally:
profiler.disable()
filename = os.path.join(outdir, f"{run_name}.profile.txt")
with open(filename, "w") as outfile:
outfile.write(f"[profiler output for '{run_name}']\n\n")
ps = pstats.Stats(profiler, stream=outfile).sort_stats("time")
ps.print_stats()
print(f"Wrote profiler results to {filename}")
if exception:
raise exception
def run(ts_path: str, mif_path: str, he_path: str, outdir: str):
"""Run and profile a typical metadata validation and merging workload."""
set_prism_encrypt_key("foobar")
with profiling("1_prismify_tissue_slide_shipping_manifest", outdir):
ts_template = Template.from_type("tissue_slide")
ts_spreadsheet, _ = XlTemplateReader.from_excel(ts_path)
ts_metadata, _, _ = prismify(ts_spreadsheet, ts_template)
ts_metadata["allowed_cohort_names"] = ["Not_reported"]
ts_metadata["allowed_collection_event_names"] = ["Baseline"]
with profiling("2_prismify_mif_assay_metadata_spreadsheet", outdir):
mif_template = Template.from_type("mif")
mif_spreadsheet, _ = XlTemplateReader.from_excel(mif_path)
mif_metadata, files, _ = prismify(mif_spreadsheet, mif_template)
with profiling("3_merge_mif_assay_artifacts_into_mif_metadata_patch", outdir):
# tqdm gives us a stdout progress indicator as prism iterates through the array
artifact_info = tqdm(
[
ArtifactInfo(
f.upload_placeholder,
f"object/url/{f.upload_placeholder}",
"",
0,
"",
"abcd",
)
for i, f in enumerate(files)
]
)
mif_metadata, _ = merge_artifacts(mif_metadata, artifact_info)
with profiling("4_merge_mif_metadata_with_tissue_slide_metadata", outdir):
combined_metadata, _ = merge_clinical_trial_metadata(mif_metadata, ts_metadata)
# Don't profile this a second time, since we're only interested
# in how long it takes to merge the shipping manifest data into
# existing trial metadata
he_template = Template.from_type("h_and_e")
he_spreadsheet, _ = XlTemplateReader.from_excel(he_path)
he_metadata, _, _ = prismify(he_spreadsheet, he_template)
with profiling("5_merge_h_and_e_metadata_into_trial", outdir):
merge_clinical_trial_metadata(he_metadata, combined_metadata)
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Run and profile a typical metadata validation and merging workload."
)
parser.add_argument(
"--ts-path", required=True, help="path to a tissue slide metadata spreadsheet"
)
parser.add_argument(
"--mif-path",
required=True,
help="path to an mif metadata spreadsheet with samples from the tissue slide manifest",
)
parser.add_argument(
"--he-path", required=True, help="path to an h&e metadata spreadsheet"
)
parser.add_argument(
"--out-dir",
required=False,
help="root directory to write profile info to",
default="benchmark",
)
args = parser.parse_args()
run(args.ts_path, args.mif_path, args.he_path, args.out_dir)