forked from cms-patatrack/pixeltrack-standalone
-
Notifications
You must be signed in to change notification settings - Fork 0
/
run-scan.py
executable file
·223 lines (197 loc) · 9.23 KB
/
run-scan.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
#!/usr/bin/env python3
import os
import re
import json
import time
import argparse
import subprocess
import multiprocessing
# Number of events for each application
n_events_unit = 1000
n_blocks_per_stream = {
"fwtest": 1,
"cuda": {"": 100, "transfer": 100},
"cudauvm": {"": 100, "transfer": 100},
"cudacompat": {"": 8},
}
result_re = re.compile("Processed (?P<events>\d+) events in (?P<time>\S+) seconds, throughput (?P<throughput>\S+) events/s")
def printMessage(*args):
print(time.strftime("%y-%m-%d %H:%M:%S"), *args)
def throughput(output):
for line in output:
m = result_re.search(line)
if m:
printMessage(line.rstrip())
return (float(m.group("throughput")), float(m.group("time")))
raise Exception("Did not find throughput from the log")
def partition_cores(cores, nth):
if nth >= len(cores):
return (cores, [])
return (cores[0:nth], cores[nth:])
def run(nev, nstr, cores_main, opts, logfilename):
nth = len(cores_main)
with open(logfilename, "w") as logfile:
taskset = []
nvprof = []
command = [opts.program, "--maxEvents", str(nev), "--numberOfStreams", str(nstr), "--numberOfThreads", str(nth)] + opts.args
if opts.taskset:
taskset = ["taskset", "-c", ",".join(cores_main)]
logfile.write(" ".join(taskset+command))
logfile.write("\n----\n")
logfile.flush()
if opts.dryRun:
print(" ".join(taskset+command))
return (0, 0)
p = subprocess.Popen(taskset+command, stdout=logfile, stderr=subprocess.STDOUT, universal_newlines=True)
try:
p.wait()
except KeyboardInterrupt:
try:
p.terminate()
except OSError:
pass
p.wait()
if p.returncode != 0:
raise Exception("Got return code %d, see output in the log file %s" % (p.returncode, logfilename))
with open(logfilename) as logfile:
return throughput(logfile)
def main(opts):
cores = [str(x) for x in range(0, multiprocessing.cpu_count())]
maxThreads = len(cores)
if opts.maxThreads > 0:
maxThreads = min(maxThreads, opts.maxThreads)
nthreads = range(opts.minThreads,maxThreads+1)
if len(opts.numThreads) > 0:
nthreads = [x for x in opts.numThreads if x >= opts.minThreads and x <= maxThreads]
n_streams_threads = [(i, i) for i in nthreads]
if len(opts.numStreams) > 0:
n_streams_threads = [(s, t) for t in nthreads for s in opts.numStreams]
nev_per_stream = opts.eventsPerStream
if nev_per_stream is None:
tmp = n_blocks_per_stream.get(os.path.basename(opts.program), None)
if tmp is None:
raise Exception("No default number of event blocks for program %s, and --eventsPerStream was not given" % opts.program)
if isinstance(tmp, dict):
if "--transfer" in opts.args:
eventBlocksPerStream = tmp["transfer"]
else:
eventBlocksPerStream = tmp[""]
else:
eventBlocksPerStream = tmp
nev_per_stream = eventBlocksPerStream * n_events_unit
data = dict(
program=opts.program,
args=" ".join(opts.args),
results=[]
)
outputJson = opts.output+".json"
alreadyExists = set()
if not opts.overwrite and os.path.exists(outputJson):
with open(outputJson) as inp:
data = json.load(inp)
if not opts.append:
for res in data["results"]:
alreadyExists.add( (res["streams"], res["threads"]) )
stop = False
for nstr, nth in n_streams_threads:
if nstr == 0:
nstr = nth
if (nstr, nth) in alreadyExists:
continue
if opts.maxStreamsToAddEvents > 0 and nstr > opts.maxStreamsToAddEvents:
nev = nev_per_stream * opts.maxStreamsToAddEvents
else:
nev = nev_per_stream*nstr
(cores_main, cores_bkg) = partition_cores(cores, nth)
if opts.warmup:
printMessage("Warming up")
run(nev, nstr, cores_main, opts, opts.output+"_warmup.txt")
print()
opts.warmup = False
msg = "Number of streams %d threads %d events %d" % (nstr, nth, nev)
if opts.taskset:
msg += ", running on cores %s" % ",".join(cores_main)
printMessage(msg)
throughputs = []
for i in range(opts.repeat):
tryAgain = opts.tryAgain
while tryAgain > 0:
try:
(th, wtime) = run(nev, nstr, cores_main, opts, opts.output+"_log_nstr%d_nth%d_n%d.txt"%(nstr, nth, i))
break
except Exception as e:
tryAgain -= 1
if tryAgain == 0:
raise
print("Got exception (see below), trying again ({} times left)".format(tryAgain))
print("--------------------")
print(str(e))
print("--------------------")
if opts.dryRun:
continue
throughputs.append(th)
data["results"].append(dict(
threads=nth,
streams=nstr,
events=nev,
throughput=th
))
# Save results after each test
with open(outputJson, "w") as out:
json.dump(data, out, indent=2)
if opts.stopAfterWallTime > 0 and wtime > opts.stopAfterWallTime:
stop = True
break
thr = 0
if len(throughputs) > 0:
thr = sum(throughputs)/len(throughputs)
printMessage("Number of streams %d threads %d, average throughput %f" % (nstr, nth, thr))
print()
if stop:
print("Reached max wall time of %d s, stopping scan" % opts.stopAfterWallTime)
break
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run a scan of a given test program")
parser.add_argument("program", type=str,
help="Path to the test program to run")
parser.add_argument("-o", "--output", type=str, default="result",
help="Prefix of output JSON and log files. If the output JSON file exists, it will be updated (see also --overwrite) (default: 'result')")
parser.add_argument("--overwrite", action="store_true",
help="Overwrite the output JSON instead of updating it")
parser.add_argument("--append", action="store_true",
help="Append new (stream, threads) results insteads of ignoring already existing point")
parser.add_argument("--taskset", action="store_true",
help="Use taskset to explicitly set the cores where to run on")
parser.add_argument("--minThreads", type=int, default=1,
help="Minimum number of threads to use in the scan (default: 1)")
parser.add_argument("--maxThreads", type=int, default=-1,
help="Maximum number of threads to use in the scan (default: -1 for the number of cores)")
parser.add_argument("--numThreads", type=str, default="",
help="Comma separated list of numbers of threads to use in the scan (default: empty for all)")
parser.add_argument("--numStreams", type=str, default="",
help="Comma separated list of numbers of streams to use in the scan (default: empty for always the same as the number of threads). If both number of threads and number of streams have more than 1 element, a 2D scan is done with all the combinations")
parser.add_argument("--eventsPerStream", type=int, default=None,
help="Number of events to be used per EDM stream (default: 400*4kev for cuda, others also hardcoded in the top of the script file)")
parser.add_argument("--maxStreamsToAddEvents", type=int, default=-1,
help="Maximum number of streams to add events (default: -1 for no limit")
parser.add_argument("--stopAfterWallTime", type=int, default=-1,
help="Stop running after the wall time of the job reaches this many in seconds (default: -1 for no limit)")
parser.add_argument("--repeat", type=int, default=1,
help="Repeat each point this many times (default: 1)")
parser.add_argument("--tryAgain", type=int, default=1,
help="In case of failure on a point, try again at most this many times (default: 1)")
parser.add_argument("--warmup", action="store_true",
help="Run the command once before starting the profiling")
parser.add_argument("--dryRun", action="store_true",
help="Print out commands, don't actually run anything")
parser.add_argument("args", nargs=argparse.REMAINDER)
opts = parser.parse_args()
if opts.minThreads <= 0:
parser.error("minThreads must be > 0, got %d" % opts.minThreads)
if opts.maxThreads <= 0 and opts.maxThreads != -1:
parser.error("maxThreads must be > 0 or -1, got %d" % opts.maxThreads)
if opts.numThreads != "":
opts.numThreads = [int(x) for x in opts.numThreads.split(",")]
if opts.numStreams != "":
opts.numStreams = [int(x) for x in opts.numStreams.split(",")]
main(opts)