-
Notifications
You must be signed in to change notification settings - Fork 304
/
cart_utils.py
executable file
·476 lines (371 loc) · 15 KB
/
cart_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
#!/usr/bin/python
'''
(C) Copyright 2018-2021 Intel Corporation.
SPDX-License-Identifier: BSD-2-Clause-Patent
'''
from __future__ import print_function
# pylint: disable=broad-except
import time
from distutils.spawn import find_executable
import os
# MPI environment module needs this
#pylint: disable=unused-import
import re
#pylint: enable=unused-import
import shlex
import subprocess
import logging
import cart_logparse
import cart_logtest
import socket
from general_utils import stop_processes
from write_host_file import write_host_file
class CartUtils():
"""CartUtils Class"""
def __init__(self):
""" CartUtils init """
self.stdout = logging.getLogger('avocado.test.stdout')
self.progress_log = logging.getLogger("progress")
self.module_init = False
self.provider = None
self.module = lambda *x: False
@staticmethod
def check_process(proc):
""" check if a process is still running"""
proc.poll()
procrtn = proc.returncode
if procrtn is None:
return True
return False
@staticmethod
def wait_process(proc, wait_time):
""" wait for process to terminate """
i = wait_time
procrtn = None
while i:
proc.poll()
procrtn = proc.returncode
if procrtn is not None:
break
else:
time.sleep(1)
i = i - 1
return procrtn
@staticmethod
def cleanup_processes():
""" Clean up cart processes, in case avocado/apricot does not. """
error_list = []
localhost = socket.gethostname().split(".")[0:1]
processes = r"'\<(crt_launch|orterun)\>'"
retry_count = 0
while retry_count < 2:
result = stop_processes(localhost,
processes,
added_filter=r"'\<(grep|defunct)\>'")
if 1 in result:
print(
"Stopped '{}' processes on {}".format(
processes, str(result[1])))
retry_count += 1
elif 0 in result:
print("All '{}' processes have been stopped".format(processes))
retry_count = 99
else:
error_list.append("Error detecting/stopping cart processes")
retry_count = 99
if retry_count == 2:
error_list.append("Unable to stop cart processes!")
return error_list
@staticmethod
def stop_process(proc):
""" wait for process to terminate """
i = 60
procrtn = None
while i:
proc.poll()
procrtn = proc.returncode
if procrtn is not None:
break
else:
time.sleep(1)
i = i - 1
if procrtn is None:
procrtn = -1
try:
proc.terminate()
except Exception:
proc.kill()
return procrtn
# What is special about pylint's 15 variable limit?
# pylint: disable=too-many-locals
def get_env(self, cartobj):
""" return basic env setting in yaml """
env_CCSA = cartobj.params.get("env", "/run/env_CRT_CTX_SHARE_ADDR/*/")
test_name = cartobj.params.get("name", "/run/tests/*/")
if env_CCSA is not None:
log_dir = "{}-{}".format(test_name, env_CCSA)
else:
# Ensure we don't try to + concat None and string
env_CCSA = ""
log_dir = "{}".format(test_name)
# Write group attach info file(s) to HOME or DAOS_TEST_SHARED_DIR.
# (It can't be '.' or cwd(), it must be some place writable.)
daos_test_shared_dir = os.environ['HOME']
if 'DAOS_TEST_SHARED_DIR' in os.environ:
daos_test_shared_dir = os.environ['DAOS_TEST_SHARED_DIR']
log_path = os.environ['DAOS_TEST_LOG_DIR']
log_file = os.path.join(log_path, log_dir,
test_name + "_" + env_CCSA + "_cart.log")
# Default env vars for orterun to None
log_mask = None
self.provider = None
ofi_interface = None
ofi_domain = None
ofi_share_addr = None
if "D_LOG_MASK" in os.environ:
log_mask = os.environ.get("D_LOG_MASK")
if "CRT_PHY_ADDR_STR" in os.environ:
self.provider = os.environ.get("CRT_PHY_ADDR_STR")
if "OFI_INTERFACE" in os.environ:
ofi_interface = os.environ.get("OFI_INTERFACE")
if "OFI_DOMAIN" in os.environ:
ofi_domain = os.environ.get("OFI_DOMAIN")
if "CRT_CTX_SHARE_ADDR" in os.environ:
ofi_share_addr = os.environ.get("CRT_CTX_SHARE_ADDR")
# Do not use the standard .log file extension, otherwise it'll get
# removed (cleaned up for disk space savings) before we can archive it.
log_filename = test_name + "_" + env_CCSA + "_output.orterun_log"
output_filename_path = os.path.join(log_path, log_dir, log_filename)
env = " --output-filename {!s}".format(output_filename_path)
env += " -x D_LOG_FILE={!s}".format(log_file)
env += " -x D_LOG_FILE_APPEND_PID=1"
if log_mask is not None:
env += " -x D_LOG_MASK={!s}".format(log_mask)
if self.provider is not None:
env += " -x CRT_PHY_ADDR_STR={!s}".format(self.provider)
if ofi_interface is not None:
env += " -x OFI_INTERFACE={!s}".format(ofi_interface)
if ofi_domain is not None:
env += " -x OFI_DOMAIN={!s}".format(ofi_domain)
if ofi_share_addr is not None:
env += " -x CRT_CTX_SHARE_ADDR={!s}".format(ofi_share_addr)
env += " -x CRT_ATTACH_INFO_PATH={!s}".format(daos_test_shared_dir)
cartobj.log_path = log_path
if not os.path.exists(log_path):
os.makedirs(log_path)
# If the logparser is being used, make sure the log directory is empty
logparse = cartobj.params.get("logparse", "/run/tests/*/")
if logparse:
for the_file in os.listdir(log_path):
file_path = os.path.join(log_path, the_file)
if os.path.isfile(file_path):
os.unlink(file_path)
return env
# pylint: enable=too-many-locals
@staticmethod
def get_srv_cnt(cartobj, host):
""" get server count """
hostlist = cartobj.params.get("{}".format(host), "/run/hosts/*/")
srvcnt = 0
srvcnt += len(hostlist)
return srvcnt
@staticmethod
def get_yaml_list_elem(param, index):
"""Get n-th element from YAML param
Args:
param (str): yaml string or list value
index (int): index into list or None (for a non-list param)
Returns:
value: n-th element of list or string value
"""
if isinstance(param, list):
return param[index]
else:
return param
# pylint: disable=too-many-locals
def build_cmd(self, cartobj, env, host, **kwargs):
""" build command """
tst_cmd = ""
index = kwargs.get('index', None)
tst_vgd = " valgrind --xml=yes " + \
"--xml-file={}/".format(cartobj.log_path) + \
r"valgrind.%q\{PMIX_ID\}.memcheck " + \
"--fair-sched=try --partial-loads-ok=yes " + \
"--leak-check=yes --gen-suppressions=all " + \
"--suppressions=../etc/memcheck-cart.supp " + \
"--show-reachable=yes "
self.init_mpi("openmpi")
openmpi_path = os.environ["PATH"]
openmpi_path += ":/usr/lib64/openmpi3/bin"
openmpi_path += ":/usr/lib64/mpi/gcc/openmpi3/bin"
orterun_bin = find_executable("orterun", openmpi_path)
if orterun_bin is None:
orterun_bin = "orterun_not_installed"
_tst_bin = cartobj.params.get("{}_bin".format(host),
"/run/tests/*/")
_tst_arg = cartobj.params.get("{}_arg".format(host),
"/run/tests/*/")
_tst_env = cartobj.params.get("{}_env".format(host),
"/run/tests/*/")
_tst_slt = cartobj.params.get("{}_slt".format(host),
"/run/tests/*/")
_tst_ctx = "16"
if "{}_CRT_CTX_NUM".format(host) in os.environ:
_tst_ctx = os.environ["{}_CRT_CTX_NUM".format(host)]
# If the yaml parameter is a list, return the n-th element
tst_bin = self.get_yaml_list_elem(_tst_bin, index)
tst_arg = self.get_yaml_list_elem(_tst_arg, index)
tst_env = self.get_yaml_list_elem(_tst_env, index)
tst_slt = self.get_yaml_list_elem(_tst_slt, index)
tst_ctx = self.get_yaml_list_elem(_tst_ctx, index)
tst_host = cartobj.params.get("{}".format(host), "/run/hosts/*/")
tst_ppn = cartobj.params.get("{}_ppn".format(host), "/run/tests/*/")
logparse = cartobj.params.get("logparse", "/run/tests/*/")
# Write group attach info file(s) to HOME or DAOS_TEST_SHARED_DIR.
# (It can't be '.' or cwd(), it must be some place writable.)
daos_test_shared_dir = os.environ['HOME']
if 'DAOS_TEST_SHARED_DIR' in os.environ:
daos_test_shared_dir = os.environ['DAOS_TEST_SHARED_DIR']
if tst_slt is not None:
hostfile = write_host_file(tst_host,
daos_test_shared_dir,
tst_slt)
else:
hostfile = write_host_file(tst_host,
daos_test_shared_dir,
tst_ppn)
mca_flags = "--mca btl self,tcp "
if self.provider == "ofi+psm2":
mca_flags += "--mca pml ob1 "
tst_cmd = "{} {} -N {} --hostfile {} "\
.format(orterun_bin, mca_flags, tst_ppn, hostfile)
tst_cmd += env
if tst_ctx is not None:
tst_cmd += " -x CRT_CTX_NUM=" + tst_ctx
if tst_env is not None:
tst_cmd += " " + tst_env
if logparse:
tst_cmd += " -x D_LOG_FILE_APPEND_PID=1"
tst_mod = os.getenv("CART_TEST_MODE", "native")
if tst_mod == "memcheck":
tst_cmd += tst_vgd
if tst_bin is not None:
tst_cmd += " " + tst_bin
if tst_arg is not None:
tst_cmd += " " + tst_arg
return tst_cmd
# pylint: enable=too-many-locals
def launch_srv_cli_test(self, cartobj, srvcmd, clicmd):
""" launches sever in the background and client in the foreground """
srv_rtn = self.launch_cmd_bg(cartobj, srvcmd)
# Verify the server is still running.
if not self.check_process(srv_rtn):
procrtn = self.stop_process(srv_rtn)
cartobj.fail("Server did not launch, return code %s" \
% procrtn)
cli_rtn = self.launch_test(cartobj, clicmd, srv_rtn)
srv_rtn = self.stop_process(srv_rtn)
if srv_rtn:
cartobj.fail("Failed, return codes client %d " % cli_rtn + \
"server %d" % srv_rtn)
return 0
def init_mpi_old(self, mpi):
"""load mpi with older environment-modules"""
self.print("Loading old %s" % mpi)
self.module('purge')
self.module('load', mpi)
return True
def init_mpi(self, mpi):
"""load mpi"""
mpich = ['mpi/mpich-x86_64']
openmpi = ['mpi/openmpi3-x86_64', 'mpi/openmpi-x86_64']
init_file = '/usr/share/Modules/init/python.py'
if mpi == "mpich":
load = mpich
unload = openmpi
else:
load = openmpi
unload = mpich
#initialize Modules
if not os.path.exists(init_file):
if not self.module_init:
self.print("Modules (environment-modules) is not installed")
self.module_init = True
return False
#pylint: disable=exec-used
#pylint: disable=undefined-variable
if not self.module_init:
exec(open(init_file).read()) # nosec
self.module = module
self.module_init = True
#pylint: enable=exec-used
#pylint: enable=undefined-variable
try:
with open(os.devnull, 'w') as devnull:
subprocess.check_call(['/bin/sh', '-l', '-c', 'module -V'],
stdout=devnull,
stderr=devnull)
except subprocess.CalledProcessError:
# older version of module return -1
return self.init_mpi_old(load[0])
self.print("Checking for loaded modules")
for to_load in load:
if self.module('is-loaded', to_load):
self.print("%s is already loaded" % to_load)
return True
for to_unload in unload:
if self.module('is-loaded', to_unload):
self.module('unload', to_unload)
self.print("Unloading %s" % to_unload)
for to_load in load:
if self.module('load', to_load):
self.print("Loaded %s" % to_load)
return True
self.print("No MPI found on system")
return False
def launch_test(self, cartobj, cmd, srv1=None, srv2=None):
""" launches test """
self.print("\nCMD : %s\n" % cmd)
self.print("\nENV : %s\n" % os.environ)
cmd = shlex.split(cmd)
rtn = subprocess.call(cmd)
if rtn:
if srv1 is not None:
self.stop_process(srv1)
if srv2 is not None:
self.stop_process(srv2)
cartobj.fail("Failed, return codes %d " % rtn)
return rtn
def launch_cmd_bg(self, cartobj, cmd):
""" launches the given cmd in background """
self.print("\nCMD : %s\n" % cmd)
cmd = shlex.split(cmd)
rtn = subprocess.Popen(cmd)
if rtn is None:
cartobj.fail("Failed to start command\n")
return -1
return rtn
def print(self, cmd):
""" prints the given cmd at runtime and stdout """
self.stdout.info(cmd)
self.progress_log.info(cmd)
@staticmethod
def log_check(cartobj):
"""Check log files for consistency """
logparse = cartobj.params.get("logparse", "/run/tests/*/")
if logparse is None or not logparse:
return
strict_test = False
print("Parsing log path", cartobj.log_path)
if not os.path.exists(cartobj.log_path):
print("Path does not exist")
return
for filename in os.listdir(cartobj.log_path):
log_file = os.path.join(cartobj.log_path, filename)
if not os.path.isfile(log_file):
print("File is a Directory. Skipping.... :", log_file)
continue
print("Parsing ", log_file)
cl = cart_logparse.LogIter(log_file)
c_log_test = cart_logtest.LogTest(cl)
c_log_test.check_log_file(strict_test)