forked from mpirvu/Utils
-
Notifications
You must be signed in to change notification settings - Fork 0
/
runDaCapo.py
200 lines (175 loc) · 7.83 KB
/
runDaCapo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
# Python script to run DaCapo benchmarks and collect performance statistics
# Usage: python3 runDaCapo.py
# Note: dacapo-9.12-MR1-bach.jar msut be present in current directory
# The script can be customized as follows:
# 1. Specify which benchmarks to run ==> change "benchmark" list below
# 2. Specify the number iterations for each benchmark (to warm up the JVM) ==> change "benchmarkOpts" below
# 3. Specify the number of runs for each benchmark ==> change "numRuns" below
# 4. Specify JDK options ==> change "jvmOption" list below
# 5. Specify JDK to use ==> change "jdks" list below
import re # for regular expressions
import sys # for accessing parameters and exit
import shlex, subprocess
import logging
import numpy as np
numRuns = 100 # number of runs to use for each benchmark in each configuration
benchmarkOpts = "--iterations 1 -s default" # not all benchmarks can use size large. Better to use "default"
numlastIterForComputingAvg = 1 # number of last iterations for each JVM used for computing the average execution time
# Must be smaller that --iterations in benchmarkOpts
doColdRun = True # If True, destroy the SCC before each benchmark
affinity = "taskset 0x3"
#level=logging.DEBUG,
logging.basicConfig(level=logging.INFO, format='%(asctime)s :: %(levelname)s :: (%(threadName)-6s) :: %(message)s',)
# List of benchmarks to run
# avrora batik eclipse fop h2 jython luindex lusearch lusearch-fix pmd sunflow tomcat tradebeans tradesoap xalan
benchmarks = [
#"avrora",
#"batik", Not working at all with OpenJDK
#"eclipse", # only working with Java8
"fop",
#"h2",
#"jython", # Does not run Java17
#"luindex",
#"lusearch-fix",
#"pmd",
#"sunflow",
#"tomcat", # does not work at all
#"tradebeans", # does not work at all
#"tradesoap", # does not work at all
#"xalan"
]
# List of JVM options to try
jvmOptions = [
#"",
"-Xms1G -Xmx1G",
]
jdks = [
#"/home/mpirvu/sdks/OpenJDK17U-jre_x64_linux_hotspot_17.0.8.1_1",
"/home/mpirvu/FullJava17/openj9-openjdk-jdk17/build/linux-x86_64-server-release/images/jdk"
]
def destroySCC(jvm, jvmOpts):
print(jvm, jvmOpts)
cmd = ""
# Parse the options and try to figure out the location of the SCC
# This should be something like -Xshareclasses:cacheDir=<name>
# match alphanum and _ and . and / but not comma
m = re.search('cacheDir=([a-zA-Z0-9_\./]+)', jvmOpts)
if m:
cacheDir = m.group(1)
cmd = f"{jvm}/bin/java -Xshareclasses:cacheDir={cacheDir},destroyall"
else:
cmd = f"{jvm}/bin/java -Xshareclasses:destroyall"
#logging.info("Destroying SCC with: {cmd}".format(cmd=cmd))
try:
output = subprocess.check_output(shlex.split(cmd), universal_newlines=True, stderr=subprocess.STDOUT)
except subprocess.CalledProcessError as e:
# If the SCC does not exist, we get a non-zero return code
output = e.output
except subprocess.SubprocessError as e:
logging.warning("SubprocessError clearing SCC: {e}".format(e=e))
output = str(e)
logging.info("{output}".format(output=output))
return 0
'''
Returns the execution time in milliseconds as a float
or Nan if the experiment fails
'''
def runBenchmarkOnce(benchmarkName, jvm, jvmOpts, benchIter):
cmd = f"{affinity} {jvm}/bin/java {jvmOpts} -jar dacapo-9.12-MR1-bach.jar {benchmarkOpts} {benchmarkName}"
logging.info("Starting: {cmd}".format(cmd=cmd))
output = subprocess.check_output(shlex.split(cmd), universal_newlines=True, stderr=subprocess.STDOUT)
# Parse the output and look for "PASSED in nnnn msec" or "completed warmup nnn in nnnn msec" ====
#===== DaCapo 9.12-MR1 fop starting warmup 1 =====
#===== DaCapo 9.12-MR1 fop completed warmup 1 in 1672 msec =====
#...
#===== DaCapo 9.12-MR1 fop starting =====
#===== DaCapo 9.12-MR1 fop PASSED in 242 msec =====
lines = output.splitlines()
pattern1 = re.compile('^===== DaCapo .+ PASSED in (\d+) msec ====')
pattern2 = re.compile('^===== DaCapo .+ completed warmup \d+ in (\d+) msec ====')
foundPassed = False
runTimes = np.full(benchIter, fill_value=np.nan, dtype=float)
i = 0
for line in lines:
#print(line)
m = pattern1.match(line)
if m:
runTimes[i] = float(m.group(1))
foundPassed = True
i = i + 1
else:
m = pattern2.match(line)
if m:
runTimes[i] = float(m.group(1))
i = i + 1
# Compute the average time of the last N iterations
avgTime = np.nanmean(runTimes[-numlastIterForComputingAvg:])
print(avgTime)
return avgTime if foundPassed else np.nan
#print(output)
def tdistribution(degreesOfFreedom):
table = [6.314, 2.92, 2.353, 2.132, 2.015, 1.943, 1.895, 1.860, 1.833, 1.812, 1.796, 1.782, 1.771, 1.761, 1.753, 1.746, 1.740, 1.734, 1.729, 1.725]
if degreesOfFreedom < 1:
return -1.0
if degreesOfFreedom <= 20:
return table[degreesOfFreedom-1]
if degreesOfFreedom < 30:
return 1.697
if degreesOfFreedom < 40:
return 1.684
if degreesOfFreedom < 50:
return 1.676
if degreesOfFreedom < 60:
return 1.671
if degreesOfFreedom < 70:
return 1.667
if degreesOfFreedom < 80:
return 1.664
if degreesOfFreedom < 90:
return 1.662
if degreesOfFreedom < 100:
return 1.660
return 1.65
#import scipy.stats as st
#def computeCI95(a):
# results = st.t.interval(0.95, len(a)-1, loc=0, scale=st.sem(a))
# return 100.0 * results[1] / st.tmean(a)
# Determine the number of iterations to use for each benchmark
m = re.compile('--iterations (\d+)').match(benchmarkOpts)
benchIter = int(m.group(1)) if m else sys.exit('Cannot determine number of iterations from benchmarkOpts')
if benchIter < numlastIterForComputingAvg:
sys.exit('Number of iterations for computing the average must be smaller than the total number of iterations')
if doColdRun:
print("Will do cold")
# multi-dimensional array of results
results = np.full((len(benchmarks), len(jdks), len(jvmOptions), numRuns), fill_value=np.nan, dtype=float)
for bench in range(len(benchmarks)):
for jdk in range(len(jdks)):
for opt in range(len(jvmOptions)):
if doColdRun:
destroySCC(jdks[jdk],jvmOptions[opt])
runBenchmarkOnce(benchmarks[bench], jdks[jdk], jvmOptions[opt], benchIter) # discard the cold run
for i in range(numRuns):
execTime = runBenchmarkOnce(benchmarks[bench], jdks[jdk], jvmOptions[opt], benchIter)
results[bench, jdk, opt, i] = execTime
# Stats ignoring Nan which are due to failed experiments
mean = np.nanmean(results, axis=3)
std = np.nanstd(results, axis=3)
min = np.nanmin(results, axis=3)
max = np.nanmax(results, axis=3)
# Count valid experiments excluding Nan values
numValidExperiments = np.count_nonzero(~np.isnan(results), axis=3)
#print(numValidExperiments)
# Create my function that will apply "tdistribution" to all elements in an ndarray
tdist_vec = np.vectorize(tdistribution)
# Compute 95% confidence intervals as percentages of the mean value
ci95 = tdist_vec(numValidExperiments-1) * std / np.sqrt(numValidExperiments) / mean *100.0
# np.percentile(s1, [25, 50, 75], interpolation='midpoint')
# Count how many non-NaN values are in the array
# np.count_nonzero(~np.isnan(data))
for bench in range(len(benchmarks)):
for jdk in range(len(jdks)):
for opt in range(len(jvmOptions)):
print("Bench =", benchmarks[bench], "JDK =", jdks[jdk], "Opt =", jvmOptions[opt])
print("mean = {m:5.0f} \tCI95 = {ci:4.2}% \tStdDev = {s:3.1f} \tMin = {mi:5.0f} \tMax = {ma:5.0f} \tNum = {n:2d}".
format(m=mean[bench, jdk, opt], ci=ci95[bench, jdk, opt], s=std[bench, jdk, opt], mi= min[bench, jdk, opt], ma=max[bench, jdk, opt], n=numValidExperiments[bench, jdk, opt]))