From 5734ed7f8a381aedaef52e82758f83621123e2aa Mon Sep 17 00:00:00 2001 From: Peter Portante Date: Mon, 24 Oct 2022 10:40:34 -0400 Subject: [PATCH 1/4] Apply `black` --- drop_buffer_cache.py | 35 +- fallocate.py | 29 +- invoke_process.py | 43 +- launch_smf_host.py | 68 +- launcher_thread.py | 15 +- multi_thread_workload.py | 72 +- output_results.py | 153 +-- parse.py | 380 +++++--- parse_slave.py | 25 +- parser_data_types.py | 38 +- profile_workload.py | 37 +- smallfile.py | 1778 +++++++++++++++++++---------------- smallfile_cli.py | 106 ++- smallfile_remote.py | 8 +- smallfile_rsptimes_stats.py | 203 ++-- smf_test_params.py | 200 ++-- ssh_thread.py | 21 +- sync_files.py | 41 +- yaml_parser.py | 195 ++-- 19 files changed, 1902 insertions(+), 1545 deletions(-) diff --git a/drop_buffer_cache.py b/drop_buffer_cache.py index 60ab549..11165ac 100644 --- a/drop_buffer_cache.py +++ b/drop_buffer_cache.py @@ -4,9 +4,11 @@ import os import sys + class DropBufferCacheException(Exception): pass + # Drop 'buffer' cache for the given range of the given file. POSIX_FADV_DONTNEED = 4 @@ -15,6 +17,7 @@ class DropBufferCacheException(Exception): # this function is used if we can't load the real libc function + def noop_libc_function(*args): return 0 @@ -22,10 +25,11 @@ def noop_libc_function(*args): # I have no idea what this code really does, but strace says it works. # does this code work under Cygwin? + def load_libc_function(func_name): func = noop_libc_function try: - libc = ctypes.CDLL(ctypes.util.find_library('c')) + libc = ctypes.CDLL(ctypes.util.find_library("c")) func = getattr(libc, func_name) except AttributeError: # print("Unable to locate %s in libc. Leaving as a no-op."% func_name) @@ -35,28 +39,29 @@ def load_libc_function(func_name): # do this at module load time -_posix_fadvise = load_libc_function('posix_fadvise64') +_posix_fadvise = load_libc_function("posix_fadvise64") def drop_buffer_cache(fd, offset, length): - ret = _posix_fadvise(fd, - ctypes.c_uint64(offset), - ctypes.c_uint64(length), - POSIX_FADV_DONTNEED) + ret = _posix_fadvise( + fd, ctypes.c_uint64(offset), ctypes.c_uint64(length), POSIX_FADV_DONTNEED + ) if ret != OK: - raise DropBufferCacheException('posix_fadvise64(%s, %s, %s, 4) -> %s' % - (fd, offset, length, ret)) + raise DropBufferCacheException( + "posix_fadvise64(%s, %s, %s, 4) -> %s" % (fd, offset, length, ret) + ) + # unit test -if __name__ == '__main__': - fd = os.open('/tmp/foo', os.O_WRONLY | os.O_CREAT) - if sys.version.startswith('3'): - ret = os.write(fd, bytes('hi there', 'UTF-8')) - elif sys.version.startswith('2'): - ret = os.write(fd, 'hi there') +if __name__ == "__main__": + fd = os.open("/tmp/foo", os.O_WRONLY | os.O_CREAT) + if sys.version.startswith("3"): + ret = os.write(fd, bytes("hi there", "UTF-8")) + elif sys.version.startswith("2"): + ret = os.write(fd, "hi there") else: - raise DropBufferCacheException('unrecognized python version %s' % sys.version) + raise DropBufferCacheException("unrecognized python version %s" % sys.version) assert ret == 8 drop_buffer_cache(fd, 0, 8) os.close(fd) diff --git a/fallocate.py b/fallocate.py index 065705f..573852f 100644 --- a/fallocate.py +++ b/fallocate.py @@ -18,6 +18,7 @@ # this function is used if we can't load the real libc function + def noop_libc_function(*args): return OK @@ -25,10 +26,11 @@ def noop_libc_function(*args): # I have no idea what this code really does, but strace says it works. # does this code work under Cygwin? + def load_libc_function(func_name): func = noop_libc_function try: - libc = ctypes.CDLL(ctypes.util.find_library('c')) + libc = ctypes.CDLL(ctypes.util.find_library("c")) func = getattr(libc, func_name) except AttributeError: # print("Unable to locate %s in libc. Leaving as a no-op."% func_name) @@ -38,31 +40,30 @@ def load_libc_function(func_name): # do this at module load time -_posix_fallocate = load_libc_function('fallocate64') +_posix_fallocate = load_libc_function("fallocate64") # mode is one of FALLOC constants above + def fallocate(fd, mode, offset, length): - return _posix_fallocate(fd, - mode, - ctypes.c_uint64(offset), - ctypes.c_uint64(length)) + return _posix_fallocate(fd, mode, ctypes.c_uint64(offset), ctypes.c_uint64(length)) + # unit test -if __name__ == '__main__': - fd = os.open('/tmp/foo', os.O_WRONLY | os.O_CREAT) +if __name__ == "__main__": + fd = os.open("/tmp/foo", os.O_WRONLY | os.O_CREAT) assert fd > 0x02 ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, 0, 8) assert ret == OK - if sys.version.startswith('3'): - ret = os.write(fd, bytes('hi there', 'UTF-8')) - elif sys.version.startswith('2'): - ret = os.write(fd, 'hi there') + if sys.version.startswith("3"): + ret = os.write(fd, bytes("hi there", "UTF-8")) + elif sys.version.startswith("2"): + ret = os.write(fd, "hi there") else: - print('unrecognized python version %s' % sys.version) + print("unrecognized python version %s" % sys.version) sys.exit(NOTOK) assert ret == 8 os.close(fd) - print('SUCCESS') + print("SUCCESS") diff --git a/invoke_process.py b/invoke_process.py index e37ea07..9536068 100644 --- a/invoke_process.py +++ b/invoke_process.py @@ -1,12 +1,12 @@ # -*- coding: utf-8 -*- -''' +""" invoke_process.py launch multiple subprocesses running SmallfileWorkload instance Copyright 2012 -- Ben England Licensed under the Apache License at http://www.apache.org/licenses/LICENSE-2.0 See Appendix on this page for instructions pertaining to license. -''' +""" import multiprocessing import shutil @@ -23,8 +23,8 @@ # it doesn't really use > 1 core because of the GIL (global lock) # occasional status reports could be sent back using pipe as well -class subprocess(multiprocessing.Process): +class subprocess(multiprocessing.Process): def __init__(self, invocation): multiprocessing.Process.__init__(self) (conn1, conn2) = multiprocessing.Pipe(False) @@ -38,11 +38,14 @@ def __init__(self, invocation): def run(self): try: self.invoke.do_workload() - self.invoke.log.debug('exiting subprocess and returning invoke ' - + str(self.invoke)) + self.invoke.log.debug( + "exiting subprocess and returning invoke " + str(self.invoke) + ) except Exception as e: - print('Exception seen in thread %s host %s (tail %s) ' % - (self.invoke.tid, self.invoke.onhost, self.invoke.log_fn())) + print( + "Exception seen in thread %s host %s (tail %s) " + % (self.invoke.tid, self.invoke.onhost, self.invoke.log_fn()) + ) self.invoke.log.error(str(e)) self.status = self.invoke.NOTOK finally: @@ -62,19 +65,19 @@ def run(self): # including multi-threaded test # to run, just do "python invoke_process.py" -class Test(unittest_module.TestCase): +class Test(unittest_module.TestCase): def setUp(self): self.invok = smallfile.SmallfileWorkload() self.invok.debug = True self.invok.verbose = True - self.invok.tid = 'regtest' + self.invok.tid = "regtest" self.invok.start_log() shutil.rmtree(self.invok.src_dirs[0], ignore_errors=True) os.makedirs(self.invok.src_dirs[0], 0o644) def test_multiproc_stonewall(self): - self.invok.log.info('starting stonewall test') + self.invok.log.info("starting stonewall test") thread_ready_timeout = 4 thread_count = 4 for tree in self.invok.top_dirs: @@ -85,8 +88,7 @@ def test_multiproc_stonewall(self): for dir in self.invok.dest_dirs: os.mkdir(dir) os.mkdir(self.invok.network_dir) - self.invok.starting_gate = os.path.join(self.invok.network_dir, - 'starting-gate') + self.invok.starting_gate = os.path.join(self.invok.network_dir, "starting-gate") sgate_file = self.invok.starting_gate invokeList = [] for j in range(0, thread_count): @@ -96,8 +98,8 @@ def test_multiproc_stonewall(self): s.verbose = True s.tid = str(j) - s.prefix = 'thr_' - s.suffix = 'foo' + s.prefix = "thr_" + s.suffix = "foo" s.iterations = 10 s.stonewall = False s.starting_gate = sgate_file @@ -118,8 +120,9 @@ def test_multiproc_stonewall(self): break time.sleep(1) if not threads_ready: - raise SMFRunException('threads did not show up within %d seconds' - % thread_ready_timeout) + raise SMFRunException( + "threads did not show up within %d seconds" % thread_ready_timeout + ) time.sleep(1) touch(sgate_file) for t in threadList: @@ -130,11 +133,13 @@ def test_multiproc_stonewall(self): assert rtnd_invok.rq_final is not None assert rtnd_invok.filenum_final is not None if rtnd_invok.status != rtnd_invok.OK: - raise SMFRunException('subprocess failure for %s invocation %s: ' - % (str(t), str(rtnd_invok))) + raise SMFRunException( + "subprocess failure for %s invocation %s: " + % (str(t), str(rtnd_invok)) + ) # so you can just do "python invoke_process.py" to test it -if __name__ == '__main__': +if __name__ == "__main__": unittest_module.main() diff --git a/launch_smf_host.py b/launch_smf_host.py index 6c6665a..2230720 100644 --- a/launch_smf_host.py +++ b/launch_smf_host.py @@ -10,7 +10,7 @@ # if your mountpoint for the shared storage is /mnt/fs: # CMD: python launch_smf_host.py --top $top_dir --as-host container$container_id # you could include this as the last line in your docker file -# and fill in top_dir and container_id as environment variables in +# and fill in top_dir and container_id as environment variables in # your docker run command using the -e option # # docker run -e top_dir=/mnt/fs/smf -e container_id="container-2" # @@ -20,7 +20,7 @@ # as the Windows clients, so you don't need to specify # --substitute_top in any other situation. # -# Example for Windows: +# Example for Windows: # if mountpoint on Linux test driver is /mnt/cifs/testshare # and mountpoint on Windows is z:\ # you run: @@ -28,7 +28,7 @@ # --top /mnt/cifs/testshare/smf # --substitute_top z:\smf # -# +# import sys import os import time @@ -40,39 +40,43 @@ OK = 0 NOTOK = 1 -def start_log(prefix = socket.gethostname()): + +def start_log(prefix=socket.gethostname()): log = logging.getLogger(prefix) - if os.getenv('LOGLEVEL_DEBUG') != None: + if os.getenv("LOGLEVEL_DEBUG") != None: log.setLevel(logging.DEBUG) else: log.setLevel(logging.INFO) - log_format = prefix + '%(asctime)s - %(levelname)s - %(message)s' + log_format = prefix + "%(asctime)s - %(levelname)s - %(message)s" formatter = logging.Formatter(log_format) h = logging.StreamHandler() h.setFormatter(formatter) log.addHandler(h) - h2 = logging.FileHandler('/var/tmp/launch_smf_host.%s.log' % prefix) + h2 = logging.FileHandler("/var/tmp/launch_smf_host.%s.log" % prefix) h2.setFormatter(formatter) log.addHandler(h2) - log.info('starting log') + log.info("starting log") return log + def usage(msg): print(msg) - print('usage: python launch_smf_host.py' - '--top top-directory ' - '[ --substitute-top synonym-directory ]' - '[ --as-host as-host-name ] ') + print( + "usage: python launch_smf_host.py" + "--top top-directory " + "[ --substitute-top synonym-directory ]" + "[ --as-host as-host-name ] " + ) sys.exit(NOTOK) # parse command line if len(sys.argv) < 3: - usage('required command line arguments missing') + usage("required command line arguments missing") substitute_dir = None top_dir = None @@ -80,56 +84,58 @@ def usage(msg): j = 1 while j < len(sys.argv): if len(sys.argv) == j + 1: - usage('every parameter name must have a value') + usage("every parameter name must have a value") nm = sys.argv[j] if len(nm) < 3: - usage('parameter name must be > 3 characters long and start with --') + usage("parameter name must be > 3 characters long and start with --") nm = nm[2:] val = sys.argv[j + 1] j += 2 - if nm == 'substitute-top': + if nm == "substitute-top": substitute_dir = val - elif nm == 'top': + elif nm == "top": top_dir = val - elif nm == 'as-host': + elif nm == "as-host": as_host = val else: - usage('unrecognized parameter --%s' % nm) + usage("unrecognized parameter --%s" % nm) if not top_dir: - usage('you must specify --top directory') + usage("you must specify --top directory") log = start_log(prefix=as_host) -log.info('substitute-top %s, top directory %s, as-host %s' % - (substitute_dir, top_dir, as_host)) +log.info( + "substitute-top %s, top directory %s, as-host %s" + % (substitute_dir, top_dir, as_host) +) # look for launch files, read smallfile_remote.py command from them, # and execute, substituting --shared directory for --top directory, # to allow samba to work with Linux test driver -network_shared_path = os.path.join(top_dir, 'network_shared') +network_shared_path = os.path.join(top_dir, "network_shared") -launch_fn = os.path.join(network_shared_path, as_host) + '.smf_launch' +launch_fn = os.path.join(network_shared_path, as_host) + ".smf_launch" if os.path.exists(launch_fn): # avoid left-over launch files os.unlink(launch_fn) -log.info('launch filename ' + launch_fn) +log.info("launch filename " + launch_fn) -shutdown_fn = os.path.join(network_shared_path, 'shutdown_launchers.tmp') -log.info('daemon shutdown filename ' + shutdown_fn) +shutdown_fn = os.path.join(network_shared_path, "shutdown_launchers.tmp") +log.info("daemon shutdown filename " + shutdown_fn) while True: try: - with open(launch_fn, 'r') as f: + with open(launch_fn, "r") as f: cmd = f.readline().strip() os.unlink(launch_fn) if substitute_dir != None: cmd = cmd.replace(substitute_dir, top_dir) - log.debug('spawning cmd: %s' % cmd) + log.debug("spawning cmd: %s" % cmd) rc = os.system(cmd) if rc != OK: - log.debug('ERROR: return code %d for cmd %s' % (rc, cmd)) + log.debug("ERROR: return code %d for cmd %s" % (rc, cmd)) except IOError as e: if e.errno != errno.ENOENT: raise e finally: if os.path.exists(shutdown_fn): # avoid left-over launch files - log.info('saw daemon shutdown file %s, exiting' % shutdown_fn) + log.info("saw daemon shutdown file %s, exiting" % shutdown_fn) sys.exit(0) time.sleep(1) diff --git a/launcher_thread.py b/launcher_thread.py index e45e71d..9802cab 100644 --- a/launcher_thread.py +++ b/launcher_thread.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -''' +""" launcher_thread.py manages parallel execution of shell commands on remote hosts it assumes there is a poller on each remote host, launch_smf_host.py, @@ -11,7 +11,7 @@ Copyright 2012 -- Ben England Licensed under the Apache License at http://www.apache.org/licenses/LICENSE-2.0 See Appendix on this page for instructions pertaining to license. -''' +""" import threading import os @@ -26,8 +26,8 @@ # for the remote host or container to run, # then waits for the result to appear in the same shared directory -class launcher_thread(threading.Thread): +class launcher_thread(threading.Thread): def __init__(self, prm, remote_host, remote_cmd_in): threading.Thread.__init__(self) self.prm = prm # test parameters @@ -37,14 +37,15 @@ def __init__(self, prm, remote_host, remote_cmd_in): def run(self): master_invoke = self.prm.master_invoke - launch_fn = os.path.join(master_invoke.network_dir, - self.remote_host) + '.smf_launch' + launch_fn = ( + os.path.join(master_invoke.network_dir, self.remote_host) + ".smf_launch" + ) pickle_fn = master_invoke.host_result_filename(self.remote_host) abortfn = master_invoke.abort_fn() ensure_deleted(launch_fn) ensure_deleted(pickle_fn) if self.prm.master_invoke.verbose: - print('wrote command %s to launch file %s' % (self.remote_cmd, launch_fn)) + print("wrote command %s to launch file %s" % (self.remote_cmd, launch_fn)) write_sync_file(launch_fn, self.remote_cmd) pickle_fn = master_invoke.host_result_filename(self.remote_host) # print('waiting for pickle file %s'%pickle_fn) @@ -53,7 +54,7 @@ def run(self): # print('%s not seen'%pickle_fn) if os.path.exists(abortfn): if master_invoke.verbose: - print('test abort seen by host ' + self.remote_host) + print("test abort seen by host " + self.remote_host) return time.sleep(1.0) self.status = master_invoke.OK # success! diff --git a/multi_thread_workload.py b/multi_thread_workload.py index 704b7f9..b28e07c 100644 --- a/multi_thread_workload.py +++ b/multi_thread_workload.py @@ -8,7 +8,13 @@ import smallfile from smallfile import OK, NOTOK, SMFResultException, SMFRunException, abort_test import invoke_process -from sync_files import touch, write_sync_file, write_pickle, ensure_dir_exists, ensure_deleted +from sync_files import ( + touch, + write_sync_file, + write_pickle, + ensure_dir_exists, + ensure_deleted, +) import output_results @@ -20,14 +26,16 @@ def create_worker_list(prm): thread_list = [] for k in range(0, prm.thread_count): nextinv = copy.copy(prm.master_invoke) - nextinv.tid = '%02d' % k + nextinv.tid = "%02d" % k if not prm.master_invoke.is_shared_dir: - nextinv.src_dirs = [d + os.sep + prm.master_invoke.onhost - + os.sep + 'thrd_' + nextinv.tid - for d in nextinv.src_dirs] - nextinv.dest_dirs = [d + os.sep + prm.master_invoke.onhost - + os.sep + 'thrd_' + nextinv.tid - for d in nextinv.dest_dirs] + nextinv.src_dirs = [ + d + os.sep + prm.master_invoke.onhost + os.sep + "thrd_" + nextinv.tid + for d in nextinv.src_dirs + ] + nextinv.dest_dirs = [ + d + os.sep + prm.master_invoke.onhost + os.sep + "thrd_" + nextinv.tid + for d in nextinv.dest_dirs + ] t = invoke_process.subprocess(nextinv) thread_list.append(t) ensure_deleted(nextinv.gen_thread_ready_fname(nextinv.tid)) @@ -36,6 +44,7 @@ def create_worker_list(prm): # what follows is code that gets done on each host + def run_multi_thread_workload(prm): master_invoke = prm.master_invoke @@ -54,7 +63,7 @@ def run_multi_thread_workload(prm): for d in dlist: ensure_dir_exists(d) if verbose: - print(host + ' saw ' + str(d)) + print(host + " saw " + str(d)) # for each thread set up SmallfileWorkload instance, # create a thread instance, and delete the thread-ready file @@ -70,15 +79,14 @@ def run_multi_thread_workload(prm): for t in thread_list: t.start() if verbose: - print('started %d worker threads on host %s' % - (len(thread_list), host)) + print("started %d worker threads on host %s" % (len(thread_list), host)) # wait for all threads to reach the starting gate # this makes it more likely that they will start simultaneously startup_timeout = prm.startup_timeout if smallfile.is_windows_os: - print('adding time for Windows synchronization') + print("adding time for Windows synchronization") startup_timeout += 30 abort_fname = my_host_invoke.abort_fn() thread_count = len(thread_list) @@ -89,7 +97,7 @@ def run_multi_thread_workload(prm): fn = t.invoke.gen_thread_ready_fname(t.invoke.tid) if not os.path.exists(fn): if verbose: - print('thread %d thread-ready file %s not found...' % (k, fn)) + print("thread %d thread-ready file %s not found..." % (k, fn)) break thread_to_wait_for = k + 1 if thread_to_wait_for == thread_count: @@ -102,33 +110,37 @@ def run_multi_thread_workload(prm): if thread_to_wait_for < thread_count: abort_test(abort_fname, thread_list) - raise SMFRunException('only %d threads reached starting gate within %d sec' - % (thread_to_wait_for, startup_timeout)) + raise SMFRunException( + "only %d threads reached starting gate within %d sec" + % (thread_to_wait_for, startup_timeout) + ) # declare that this host is at the starting gate if prm_slave: host_ready_fn = my_host_invoke.gen_host_ready_fname() if my_host_invoke.verbose: - print('host %s creating ready file %s' % - (my_host_invoke.onhost, host_ready_fn)) + print( + "host %s creating ready file %s" + % (my_host_invoke.onhost, host_ready_fn) + ) touch(host_ready_fn) sg = my_host_invoke.starting_gate if not prm_slave: # special case of no --host-set parameter try: - write_sync_file(sg, 'hi there') + write_sync_file(sg, "hi there") if verbose: - print('wrote starting gate file') + print("wrote starting gate file") except IOError as e: - print('error writing starting gate for threads: %s' % str(e)) + print("error writing starting gate for threads: %s" % str(e)) prm.test_start_time = time.time() # wait for starting_gate file to be created by test driver # every second we resume scan from last host file not found if verbose: - print('awaiting ' + sg) + print("awaiting " + sg) if prm_slave: for sec in range(0, prm.host_startup_timeout + 10): # hack to ensure that directory is up to date @@ -139,10 +151,11 @@ def run_multi_thread_workload(prm): time.sleep(0.5) if not os.path.exists(sg): abort_test(my_host_invoke.abort_fn(), thread_list) - raise SMFRunException('starting signal not seen within %d seconds' - % prm.host_startup_timeout) + raise SMFRunException( + "starting signal not seen within %d seconds" % prm.host_startup_timeout + ) if verbose: - print('starting test on host ' + host + ' in 2 seconds') + print("starting test on host " + host + " in 2 seconds") time.sleep(2 + random.random()) # let other hosts see starting gate file # FIXME: don't timeout the test, @@ -153,7 +166,7 @@ def run_multi_thread_workload(prm): for t in thread_list: if verbose: - print('waiting for thread %s' % t.invoke.tid) + print("waiting for thread %s" % t.invoke.tid) t.invoke = t.receiver.recv() # to get results from sub-process t.join() @@ -165,7 +178,7 @@ def run_multi_thread_workload(prm): invoke_list = [t.invoke for t in thread_list] output_results.output_results(invoke_list, prm) except SMFResultException as e: - print('ERROR: ' + str(e)) + print("ERROR: " + str(e)) exit_status = NOTOK else: @@ -173,13 +186,12 @@ def run_multi_thread_workload(prm): # then write out this host's result in pickle format # so test driver can pick up result - result_filename = \ - master_invoke.host_result_filename(prm.as_host) + result_filename = master_invoke.host_result_filename(prm.as_host) if verbose: - print('writing invokes to: ' + result_filename) + print("writing invokes to: " + result_filename) invok_list = [t.invoke for t in thread_list] if verbose: - print('saving result to filename %s' % result_filename) + print("saving result to filename %s" % result_filename) for ivk in invok_list: ivk.buf = None ivk.biggest_buf = None diff --git a/output_results.py b/output_results.py index 5f3b695..951e3aa 100644 --- a/output_results.py +++ b/output_results.py @@ -10,34 +10,40 @@ BYTES_PER_KiB = 1024.0 KiB_PER_MiB = 1024.0 + class result_stats: - # start with zeroing because we'll add + # start with zeroing because we'll add # other objects of this type to it def __init__(self): self.status = OK self.elapsed = 0.0 self.files = 0 - self.records = 0 + self.records = 0 self.files_per_sec = 0.0 self.IOPS = 0.0 self.MiBps = 0.0 def get_from_invoke(self, invk, record_sz_kb): if invk.elapsed_time is None: - print('WARNING: thread %s on host %s never completed' % - (invk.tid, invk.onhost)) + print( + "WARNING: thread %s on host %s never completed" + % (invk.tid, invk.onhost) + ) self.status = invk.status - self.elapsed = invk.elapsed_time if invk.elapsed_time is not None else 100000000.0 + self.elapsed = ( + invk.elapsed_time if invk.elapsed_time is not None else 100000000.0 + ) self.files = invk.filenum_final if invk.filenum_final is not None else 0 self.records = invk.rq_final if invk.rq_final is not None else 0 if invk.elapsed_time is not None and invk.elapsed_time > 0.0: self.files_per_sec = invk.filenum_final / invk.elapsed_time if invk.rq_final > 0: self.IOPS = invk.rq_final / invk.elapsed_time - self.MiBps = (invk.rq_final * record_sz_kb / KiB_PER_MiB) \ - / invk.elapsed_time + self.MiBps = ( + invk.rq_final * record_sz_kb / KiB_PER_MiB + ) / invk.elapsed_time # add component's fields to this object @@ -61,20 +67,19 @@ def add_to(self, component): def add_to_dict(self, target): if self.status != OK: - target['status'] = os.strerror(self.status) - target['elapsed'] = self.elapsed - target['files'] = self.files - target['records'] = self.records - target['filesPerSec'] = self.files_per_sec + target["status"] = os.strerror(self.status) + target["elapsed"] = self.elapsed + target["files"] = self.files + target["records"] = self.records + target["filesPerSec"] = self.files_per_sec if self.records > 0: - target['IOPS'] = self.IOPS - target['MiBps'] = self.MiBps + target["IOPS"] = self.IOPS + target["MiBps"] = self.MiBps def output_results(invoke_list, test_params): if len(invoke_list) < 1: - raise SMFResultException('no pickled invokes read, so no results' - ) + raise SMFResultException("no pickled invokes read, so no results") my_host_invoke = invoke_list[0] # pick a representative one rszkb = my_host_invoke.record_sz_kb if rszkb == 0: @@ -83,9 +88,9 @@ def output_results(invoke_list, test_params): rszkb = my_host_invoke.biggest_buf_size / my_host_invoke.BYTES_PER_KB rslt = {} - rslt['host'] = {} + rslt["host"] = {} stats_by_host = {} - cluster = stats_by_host['stats'] = result_stats() + cluster = stats_by_host["stats"] = result_stats() for invk in invoke_list: # for each parallel SmallfileWorkload @@ -93,16 +98,24 @@ def output_results(invoke_list, test_params): # and determine time interval over which test ran if not isinstance(invk, smallfile.SmallfileWorkload): - raise SMFResultException('invoke is of wrong type: %s' % str(invk)) + raise SMFResultException("invoke is of wrong type: %s" % str(invk)) if invk.status: - status = 'ERR: ' + os.strerror(invk.status) + status = "ERR: " + os.strerror(invk.status) else: - status = 'ok' - fmt = 'host = %s,thr = %s,elapsed = %s' - fmt += ',files = %s,records = %s,status = %s' - print(fmt % - (invk.onhost, invk.tid, str(invk.elapsed_time), - str(invk.filenum_final), str(invk.rq_final), status)) + status = "ok" + fmt = "host = %s,thr = %s,elapsed = %s" + fmt += ",files = %s,records = %s,status = %s" + print( + fmt + % ( + invk.onhost, + invk.tid, + str(invk.elapsed_time), + str(invk.filenum_final), + str(invk.rq_final), + status, + ) + ) per_thread = result_stats() per_thread.get_from_invoke(invk, rszkb) @@ -114,10 +127,10 @@ def output_results(invoke_list, test_params): except KeyError: # first time this host was seen stats_by_host[invk.onhost] = per_host = {} - per_host['thread'] = {} - per_host['stats'] = result_stats() - per_host['thread'][invk.tid] = per_thread - per_host['stats'].add_to(per_thread) + per_host["thread"] = {} + per_host["stats"] = result_stats() + per_host["thread"][invk.tid] = per_thread + per_host["stats"].add_to(per_thread) cluster.add_to(per_thread) # now counters are all added up, generate JSON @@ -125,82 +138,88 @@ def output_results(invoke_list, test_params): for invk in invoke_list: # for each parallel SmallfileWorkload per_host = stats_by_host[invk.onhost] try: - per_host_json = rslt['host'][invk.onhost] + per_host_json = rslt["host"][invk.onhost] except KeyError: - rslt['host'][invk.onhost] = per_host_json = {} - per_host['stats'].add_to_dict(per_host_json) - per_host_json['thread'] = {} - per_host_json['thread'][invk.tid] = per_thread_json = {} - per_thread = per_host['thread'][invk.tid] + rslt["host"][invk.onhost] = per_host_json = {} + per_host["stats"].add_to_dict(per_host_json) + per_host_json["thread"] = {} + per_host_json["thread"][invk.tid] = per_thread_json = {} + per_thread = per_host["thread"][invk.tid] per_thread.add_to_dict(per_thread_json) cluster.add_to_dict(rslt) - # if there is only 1 host in results, - # and no host was specified, + # if there is only 1 host in results, + # and no host was specified, # then remove that level from # result hierarchy, not needed - if len(rslt['host'].keys()) == 1 and test_params.host_set == None: - hostkey = list(rslt['host'].keys())[0] - threads_in_host = rslt['host'][hostkey]['thread'] - rslt['thread'] = threads_in_host - del rslt['host'] + if len(rslt["host"].keys()) == 1 and test_params.host_set == None: + hostkey = list(rslt["host"].keys())[0] + threads_in_host = rslt["host"][hostkey]["thread"] + rslt["thread"] = threads_in_host + del rslt["host"] - print('total threads = %d' % len(invoke_list)) - rslt['totalhreads'] = len(invoke_list) + print("total threads = %d" % len(invoke_list)) + rslt["totalhreads"] = len(invoke_list) - print('total files = %d' % cluster.files) + print("total files = %d" % cluster.files) if cluster.records > 0: - print('total IOPS = %d' % cluster.IOPS) + print("total IOPS = %d" % cluster.IOPS) total_data_gb = cluster.records * rszkb * 1.0 / KB_PER_GB - print('total data = %9.3f GiB' % total_data_gb) - rslt['totalDataGB'] = total_data_gb + print("total data = %9.3f GiB" % total_data_gb) + rslt["totalDataGB"] = total_data_gb if not test_params.host_set: - test_params.host_set = [ 'localhost' ] + test_params.host_set = ["localhost"] json_test_params = deepcopy(test_params) - json_test_params.host_set = ','.join(test_params.host_set) + json_test_params.host_set = ",".join(test_params.host_set) if len(invoke_list) < len(test_params.host_set) * test_params.thread_count: - print('WARNING: failed to get some responses from workload generators') + print("WARNING: failed to get some responses from workload generators") max_files = my_host_invoke.iterations * len(invoke_list) pct_files = 100.0 * cluster.files / max_files - print('%6.2f%% of requested files processed, warning threshold is %6.2f%%' % - (pct_files, smallfile.pct_files_min)) - rslt['pctFilesDone'] = pct_files + print( + "%6.2f%% of requested files processed, warning threshold is %6.2f%%" + % (pct_files, smallfile.pct_files_min) + ) + rslt["pctFilesDone"] = pct_files - print('elapsed time = %9.3f' % cluster.elapsed) - rslt['startTime'] = test_params.test_start_time - rslt['status'] = os.strerror(cluster.status) + print("elapsed time = %9.3f" % cluster.elapsed) + rslt["startTime"] = test_params.test_start_time + rslt["status"] = os.strerror(cluster.status) # output start time in elasticsearch-friendly format - rslt['date'] = time.strftime('%Y-%m-%dT%H:%M:%S.000Z', time.gmtime(test_params.test_start_time)) + rslt["date"] = time.strftime( + "%Y-%m-%dT%H:%M:%S.000Z", time.gmtime(test_params.test_start_time) + ) # don't output meaningless fields if cluster.elapsed < 0.001: # can't compute rates if it ended too quickly - print('WARNING: test must run longer than a millisecond') + print("WARNING: test must run longer than a millisecond") else: - print('files/sec = %f' % cluster.files_per_sec) + print("files/sec = %f" % cluster.files_per_sec) if cluster.records > 0: - print('IOPS = %f' % cluster.IOPS) - print('MiB/sec = %f' % cluster.MiBps) + print("IOPS = %f" % cluster.IOPS) + print("MiB/sec = %f" % cluster.MiBps) # if JSON output requested, generate it here if test_params.output_json: json_obj = json_test_params.to_json() - json_obj['results'] = rslt - with open(test_params.output_json, 'w') as jsonf: + json_obj["results"] = rslt + with open(test_params.output_json, "w") as jsonf: json.dump(json_obj, jsonf, indent=4) # finally, throw exceptions if something bad happened # wait until here to do it so we can see test results if cluster.status != OK: - print('WARNING: at least one thread encountered error, test may be incomplete') + print("WARNING: at least one thread encountered error, test may be incomplete") elif pct_files < smallfile.pct_files_min: - print('WARNING: not enough total files processed before 1st thread finished, change test parameters') + print( + "WARNING: not enough total files processed before 1st thread finished, change test parameters" + ) diff --git a/parse.py b/parse.py index f381b07..bbc0949 100644 --- a/parse.py +++ b/parse.py @@ -1,11 +1,11 @@ # -*- coding: utf-8 -*- -''' +""" parse.py -- parses CLI commands for smallfile_cli.py Copyright 2012 -- Ben England Licensed under the Apache License at http://www.apache.org/licenses/LICENSE-2.0 See Appendix on this page for instructions pertaining to license. -''' +""" import sys import os @@ -14,10 +14,12 @@ import smf_test_params from smf_test_params import bool2YN import argparse + yaml_parser_installed = False try: import yaml_parser from yaml_parser import parse_yaml + yaml_parser_installed = True except ImportError as e: pass @@ -32,6 +34,7 @@ # defining all test parameters. # default does short test in /var/tmp so you can see the program run + def parse(): # store as much as you can in SmallfileWorkload object # so per-thread invocations inherit @@ -39,125 +42,211 @@ def parse(): test_params = smf_test_params.smf_test_params() inv = test_params.master_invoke # for convenience - parser = argparse.ArgumentParser( - description='parse smallfile CLI parameters') + parser = argparse.ArgumentParser(description="parse smallfile CLI parameters") add = parser.add_argument - add('--yaml-input-file', - help='input YAML file containing all parameters below') - add('--output-json', - default=test_params.output_json, - help='if true then output JSON-format version of results') - add('--response-times', - type=boolean, default=inv.measure_rsptimes, - help='if true then record response time of each file op') - add('--network-sync-dir', - help='if --top not shared filesystem, provide shared filesystem directory') - add('--operation', - default='cleanup', choices=SmallfileWorkload.all_op_names, - help='type of operation to perform on each file') - add('--top', - type=directory_list, default=inv.top_dirs, - help='top directory or directories used by smallfile') - add('--host-set', - type=host_set, default=test_params.host_set, - help='list of workload generator hosts (or file containing it) ') - add('--launch-by-daemon', - type=boolean, default=test_params.launch_by_daemon, - help='use non-ssh launcher to get test running') - add('--files', - type=positive_integer, default=inv.iterations, - help='files processed per thread') - add('--threads', - type=positive_integer, default=test_params.thread_count, - help='threads per client') - add('--files-per-dir', - type=positive_integer, default=inv.files_per_dir, - help='files per (sub)directory') - add('--dirs-per-dir', - type=positive_integer, default=inv.dirs_per_dir, - help='subdirectories per directory') - add('--record-size', - type=positive_integer, default=inv.record_sz_kb, - help='record size (KB)') - add('--file-size', - type=non_negative_integer, default=inv.total_sz_kb, - help='subdirectories per directory') - add('--file-size-distribution', - type=file_size_distrib, default=inv.filesize_distr, - help='file size can be constant ("fixed") or random ("exponential")') - add('--fsync', - type=boolean, default=inv.fsync, - help='call fsync() after each file is written/modified') - add('--xattr-size', - type=non_negative_integer, default=inv.xattr_size, - help='extended attribute size (bytes)') - add('--xattr-count', - type=non_negative_integer, default=inv.xattr_count, - help='number of extended attributes per file') - add('--pause', - type=non_negative_integer, default=inv.pause_between_files, - help='pause between each file (microsec)') - add('--auto-pause', - type=boolean, default=inv.auto_pause, - help='adjust pause between files automatically based on response times') - add('--cleanup-delay-usec-per-file', - type=non_negative_integer, default=inv.cleanup_delay_usec_per_file, - help='time to delay after cleanup per file (microsec)') - add('--stonewall', - type=boolean, default=inv.stonewall, - help='stop measuring as soon as first thread is done') - add('--finish', - type=boolean, default=inv.finish_all_rq, - help='stop processing files as soon as first thread is done') - add('--prefix', - default=inv.prefix, - help='filename prefix') - add('--suffix', - default=inv.suffix, - help='filename suffix') - add('--hash-into-dirs', - type=boolean, default=inv.hash_to_dir, - help='if true then pseudo-randomly place files into directories') - add('--same-dir', - type=boolean, default=inv.is_shared_dir, - help='if true then all threads share the same directories') - add('--verbose', - type=boolean, default=inv.verbose, - help='if true then log extra messages about test') - add('--permute-host-dirs', - type=boolean, default=test_params.permute_host_dirs, - help='if true then shift clients to different host directories') - add('--record-ctime-size', - type=boolean, default=inv.record_ctime_size, - help='if true then update file xattr with ctime+size') - add('--verify-read', - type=boolean, default=inv.verify_read, - help='if true then check that data read = data written') - add('--incompressible', - type=boolean, default=inv.incompressible, - help='if true then non-compressible data written') + add("--yaml-input-file", help="input YAML file containing all parameters below") + add( + "--output-json", + default=test_params.output_json, + help="if true then output JSON-format version of results", + ) + add( + "--response-times", + type=boolean, + default=inv.measure_rsptimes, + help="if true then record response time of each file op", + ) + add( + "--network-sync-dir", + help="if --top not shared filesystem, provide shared filesystem directory", + ) + add( + "--operation", + default="cleanup", + choices=SmallfileWorkload.all_op_names, + help="type of operation to perform on each file", + ) + add( + "--top", + type=directory_list, + default=inv.top_dirs, + help="top directory or directories used by smallfile", + ) + add( + "--host-set", + type=host_set, + default=test_params.host_set, + help="list of workload generator hosts (or file containing it) ", + ) + add( + "--launch-by-daemon", + type=boolean, + default=test_params.launch_by_daemon, + help="use non-ssh launcher to get test running", + ) + add( + "--files", + type=positive_integer, + default=inv.iterations, + help="files processed per thread", + ) + add( + "--threads", + type=positive_integer, + default=test_params.thread_count, + help="threads per client", + ) + add( + "--files-per-dir", + type=positive_integer, + default=inv.files_per_dir, + help="files per (sub)directory", + ) + add( + "--dirs-per-dir", + type=positive_integer, + default=inv.dirs_per_dir, + help="subdirectories per directory", + ) + add( + "--record-size", + type=positive_integer, + default=inv.record_sz_kb, + help="record size (KB)", + ) + add( + "--file-size", + type=non_negative_integer, + default=inv.total_sz_kb, + help="subdirectories per directory", + ) + add( + "--file-size-distribution", + type=file_size_distrib, + default=inv.filesize_distr, + help='file size can be constant ("fixed") or random ("exponential")', + ) + add( + "--fsync", + type=boolean, + default=inv.fsync, + help="call fsync() after each file is written/modified", + ) + add( + "--xattr-size", + type=non_negative_integer, + default=inv.xattr_size, + help="extended attribute size (bytes)", + ) + add( + "--xattr-count", + type=non_negative_integer, + default=inv.xattr_count, + help="number of extended attributes per file", + ) + add( + "--pause", + type=non_negative_integer, + default=inv.pause_between_files, + help="pause between each file (microsec)", + ) + add( + "--auto-pause", + type=boolean, + default=inv.auto_pause, + help="adjust pause between files automatically based on response times", + ) + add( + "--cleanup-delay-usec-per-file", + type=non_negative_integer, + default=inv.cleanup_delay_usec_per_file, + help="time to delay after cleanup per file (microsec)", + ) + add( + "--stonewall", + type=boolean, + default=inv.stonewall, + help="stop measuring as soon as first thread is done", + ) + add( + "--finish", + type=boolean, + default=inv.finish_all_rq, + help="stop processing files as soon as first thread is done", + ) + add("--prefix", default=inv.prefix, help="filename prefix") + add("--suffix", default=inv.suffix, help="filename suffix") + add( + "--hash-into-dirs", + type=boolean, + default=inv.hash_to_dir, + help="if true then pseudo-randomly place files into directories", + ) + add( + "--same-dir", + type=boolean, + default=inv.is_shared_dir, + help="if true then all threads share the same directories", + ) + add( + "--verbose", + type=boolean, + default=inv.verbose, + help="if true then log extra messages about test", + ) + add( + "--permute-host-dirs", + type=boolean, + default=test_params.permute_host_dirs, + help="if true then shift clients to different host directories", + ) + add( + "--record-ctime-size", + type=boolean, + default=inv.record_ctime_size, + help="if true then update file xattr with ctime+size", + ) + add( + "--verify-read", + type=boolean, + default=inv.verify_read, + help="if true then check that data read = data written", + ) + add( + "--incompressible", + type=boolean, + default=inv.incompressible, + help="if true then non-compressible data written", + ) # these parameters shouldn't be used by mere mortals - add('--min-dirs-per-sec', - type=positive_integer, default=test_params.min_directories_per_sec, - help=argparse.SUPPRESS) - add('--log-to-stderr', type=boolean, default=inv.log_to_stderr, - help=argparse.SUPPRESS) - add('--remote-pgm-dir', default=test_params.remote_pgm_dir, - help=argparse.SUPPRESS) - add('--slave', - help=argparse.SUPPRESS) - add('--as-host', - help=argparse.SUPPRESS) - add('--host-count', - type=positive_integer, default=0, - help='total number of hosts/pods participating in smallfile test') + add( + "--min-dirs-per-sec", + type=positive_integer, + default=test_params.min_directories_per_sec, + help=argparse.SUPPRESS, + ) + add( + "--log-to-stderr", + type=boolean, + default=inv.log_to_stderr, + help=argparse.SUPPRESS, + ) + add("--remote-pgm-dir", default=test_params.remote_pgm_dir, help=argparse.SUPPRESS) + add("--slave", help=argparse.SUPPRESS) + add("--as-host", help=argparse.SUPPRESS) + add( + "--host-count", + type=positive_integer, + default=0, + help="total number of hosts/pods participating in smallfile test", + ) args = parser.parse_args() inv.opname = args.operation - test_params.top_dirs = [ os.path.abspath(p) for p in args.top ] + test_params.top_dirs = [os.path.abspath(p) for p in args.top] test_params.launch_by_daemon = args.launch_by_daemon inv.iterations = args.files test_params.thread_count = inv.threads = args.threads @@ -165,9 +254,7 @@ def parse(): inv.dirs_per_dir = args.dirs_per_dir inv.record_sz_kb = args.record_size inv.total_sz_kb = args.file_size - test_params.size_distribution = \ - inv.filesize_distr = \ - args.file_size_distribution + test_params.size_distribution = inv.filesize_distr = args.file_size_distribution inv.xattr_size = args.xattr_size inv.xattr_count = args.xattr_count inv.prefix = args.prefix @@ -175,9 +262,9 @@ def parse(): inv.hash_to_dir = args.hash_into_dirs inv.pause_between_files = args.pause inv.auto_pause = args.auto_pause - test_params.cleanup_delay_usec_per_file = \ - inv.cleanup_delay_usec_per_file = \ - args.cleanup_delay_usec_per_file + test_params.cleanup_delay_usec_per_file = ( + inv.cleanup_delay_usec_per_file + ) = args.cleanup_delay_usec_per_file inv.stonewall = args.stonewall inv.finish_all_rq = args.finish inv.measure_rsptimes = args.response_times @@ -203,7 +290,7 @@ def parse(): if args.yaml_input_file: if not yaml_parser_installed: - raise SmfParseException('python yaml module not available - is this PyPy?') + raise SmfParseException("python yaml module not available - is this PyPy?") yaml_parser.parse_yaml(test_params, args.yaml_input_file) # total_hosts is a parameter that allows pod workloads to know @@ -215,37 +302,46 @@ def parse(): else: inv.total_hosts = 1 - # network_sync_dir is where python processes share state + # network_sync_dir is where python processes share state if not test_params.network_sync_dir: - test_params.network_sync_dir = os.path.join(test_params.top_dirs[0], 'network_shared') + test_params.network_sync_dir = os.path.join( + test_params.top_dirs[0], "network_shared" + ) # validate parameters further now that we know what they all are - sdmsg = 'directory %s containing network sync dir. must exist on all hosts (including this one)' + sdmsg = "directory %s containing network sync dir. must exist on all hosts (including this one)" parentdir = os.path.dirname(test_params.network_sync_dir) if not os.path.isdir(parentdir) and args.host_set != None: raise SmfParseException(sdmsg % parentdir) if inv.record_sz_kb > inv.total_sz_kb and inv.total_sz_kb != 0: - raise SmfParseException('record size cannot exceed file size') + raise SmfParseException("record size cannot exceed file size") if inv.record_sz_kb == 0 and inv.verbose: - print(('record size not specified, ' + - 'large files will default to record size %d KB') % - (SmallfileWorkload.biggest_buf_size / inv.BYTES_PER_KB)) + print( + ( + "record size not specified, " + + "large files will default to record size %d KB" + ) + % (SmallfileWorkload.biggest_buf_size / inv.BYTES_PER_KB) + ) if test_params.top_dirs: for d in test_params.top_dirs: if len(d) < 6: raise SmfParseException( - 'directory less than 6 characters, ' + - 'cannot use top of filesystem, too dangerous') + "directory less than 6 characters, " + + "cannot use top of filesystem, too dangerous" + ) if not os.path.isdir(d) and test_params.network_sync_dir != None: raise SmfParseException( - 'you must ensure that shared directory ' + d + - ' is accessible ' + - 'from this host and every remote host in test') + "you must ensure that shared directory " + + d + + " is accessible " + + "from this host and every remote host in test" + ) if test_params.top_dirs: inv.set_top(test_params.top_dirs) else: @@ -255,34 +351,36 @@ def parse(): inv.network_dir = test_params.network_sync_dir else: test_params.network_sync_dir = inv.network_dir - inv.starting_gate = os.path.join(inv.network_dir, 'starting_gate.tmp') + inv.starting_gate = os.path.join(inv.network_dir, "starting_gate.tmp") if inv.iterations < 10: inv.stonewall = False - if inv.opname == 'cleanup' and (inv.auto_pause or (inv.pause_between_files > 0)): + if inv.opname == "cleanup" and (inv.auto_pause or (inv.pause_between_files > 0)): inv.auto_pause = False inv.pause_between_files = 0 - print('do not need pause between files during cleanup') + print("do not need pause between files during cleanup") if inv.total_hosts * inv.threads == 1: inv.auto_pause = False inv.pause_between_files = 0 - print('do not need pause between files for single-threaded workload') + print("do not need pause between files for single-threaded workload") if inv.auto_pause and inv.pause_between_files > 0: inv.pause_between_files = 0 - print('pause parameter not needed with auto-pause Y, setting pause to 0') + print("pause parameter not needed with auto-pause Y, setting pause to 0") - # create must finish all files so that subsequent ops have the files they need + # create must finish all files so that subsequent ops have the files they need # cleanup must finish all files so that all remnants of last test are removed - if (['cleanup', 'create', 'mkdir'].__contains__(inv.opname)) and not inv.finish_all_rq: - print('changing --finish to true for op type %s' % inv.opname) + if ( + ["cleanup", "create", "mkdir"].__contains__(inv.opname) + ) and not inv.finish_all_rq: + print("changing --finish to true for op type %s" % inv.opname) inv.finish_all_rq = True if not test_params.is_slave: prm_list = test_params.human_readable() for (prm_name, prm_value) in prm_list: - print('%40s : %s' % (prm_name, prm_value)) + print("%40s : %s" % (prm_name, prm_value)) inv.reset() test_params.recalculate_timeouts() diff --git a/parse_slave.py b/parse_slave.py index f0aef15..5bfa049 100644 --- a/parse_slave.py +++ b/parse_slave.py @@ -1,11 +1,11 @@ # -*- coding: utf-8 -*- -''' +""" parse_slave.py -- parses SSH cmd for invocation of smallfile_remote.py Copyright 2012 -- Ben England Licensed under the Apache License at http://www.apache.org/licenses/LICENSE-2.0 See Appendix on this page for instructions pertaining to license. -''' +""" import sys import os @@ -19,22 +19,25 @@ # pass via --network-sync-dir option # optionally pass host identity of this remote invocation + def parse(): - parser = argparse.ArgumentParser( - description='parse remote smallfile parameters') - parser.add_argument( '--network-sync-dir', - help='directory used to synchronize with test driver') - parser.add_argument( '--as-host', - default=smallfile.get_hostname(None), - help='directory used to synchronize with test driver') + parser = argparse.ArgumentParser(description="parse remote smallfile parameters") + parser.add_argument( + "--network-sync-dir", help="directory used to synchronize with test driver" + ) + parser.add_argument( + "--as-host", + default=smallfile.get_hostname(None), + help="directory used to synchronize with test driver", + ) args = parser.parse_args() - param_pickle_fname = os.path.join(args.network_sync_dir, 'param.pickle') + param_pickle_fname = os.path.join(args.network_sync_dir, "param.pickle") if not os.path.exists(param_pickle_fname): time.sleep(1.1) params = None - with open(param_pickle_fname, 'rb') as pickled_params: + with open(param_pickle_fname, "rb") as pickled_params: params = pickle.load(pickled_params) params.is_slave = True params.as_host = args.as_host diff --git a/parser_data_types.py b/parser_data_types.py index 95d14e9..6fe20f4 100644 --- a/parser_data_types.py +++ b/parser_data_types.py @@ -8,66 +8,72 @@ # if we throw exceptions, do it with this # so caller can specifically catch them + class SmfParseException(Exception): pass + # the next few routines implement data types # of smallfile parameters + def boolean(boolstr): if boolstr == True: return True elif boolstr == False: return False b = boolstr.lower() - if b == 'y' or b == 'yes' or b == 't' or b == 'true': + if b == "y" or b == "yes" or b == "t" or b == "true": bval = True - elif b == 'n' or b == 'no' or b == 'f' or b == 'false': + elif b == "n" or b == "no" or b == "f" or b == "false": bval = False else: - raise TypeExc('boolean value must be y|yes|t|true|n|no|f|false') + raise TypeExc("boolean value must be y|yes|t|true|n|no|f|false") return bval + def positive_integer(posint_str): intval = int(posint_str) if intval <= 0: - raise TypeExc( 'integer value greater than zero expected') + raise TypeExc("integer value greater than zero expected") return intval + def non_negative_integer(nonneg_str): intval = int(nonneg_str) if intval < 0: - raise TypeExc( 'non-negative integer value expected') + raise TypeExc("non-negative integer value expected") return intval + def host_set(hostname_list_str): if os.path.isfile(hostname_list_str): - with open(hostname_list_str, 'r') as f: - hostname_list = [ record.strip() for record in f.readlines() ] + with open(hostname_list_str, "r") as f: + hostname_list = [record.strip() for record in f.readlines()] else: - hostname_list = hostname_list_str.strip().split(',') + hostname_list = hostname_list_str.strip().split(",") if len(hostname_list) < 2: hostname_list = hostname_list_str.strip().split() if len(hostname_list) == 0: - raise TypeExc('host list must be non-empty') + raise TypeExc("host list must be non-empty") return hostname_list + def directory_list(directory_list_str): - directory_list = directory_list_str.strip().split(',') + directory_list = directory_list_str.strip().split(",") if len(directory_list) == 1: directory_list = directory_list_str.strip().split() if len(directory_list) == 0: - raise TypeExc('directory list must be non-empty') + raise TypeExc("directory list must be non-empty") return directory_list + def file_size_distrib(fsdistrib_str): # FIXME: should be a data type - if fsdistrib_str == 'exponential': + if fsdistrib_str == "exponential": return SmallfileWorkload.fsdistr_random_exponential - elif fsdistrib_str == 'fixed': + elif fsdistrib_str == "fixed": return SmallfileWorkload.fsdistr_fixed else: # should never get here - raise TypeExc( - 'file size distribution must be either "exponential" or "fixed"') - + raise TypeExc('file size distribution must be either "exponential" or "fixed"') diff --git a/profile_workload.py b/profile_workload.py index 8074178..a7590a0 100644 --- a/profile_workload.py +++ b/profile_workload.py @@ -6,21 +6,38 @@ import os import socket import smallfile -top = os.getenv('TOP') -count = int(os.getenv('COUNT')) + +top = os.getenv("TOP") +count = int(os.getenv("COUNT")) invk = smallfile.SmallfileWorkload() -invk.tid = '00' -invk.src_dirs = [top + os.sep + 'file_srcdir' + os.sep - + socket.gethostname() + os.sep + 'thrd_' + invk.tid] -invk.dest_dirs = [top + os.sep + 'file_dstdir' + os.sep - + socket.gethostname() + os.sep + 'thrd_' + invk.tid] -invk.network_dir = top + os.sep + 'network_shared' +invk.tid = "00" +invk.src_dirs = [ + top + + os.sep + + "file_srcdir" + + os.sep + + socket.gethostname() + + os.sep + + "thrd_" + + invk.tid +] +invk.dest_dirs = [ + top + + os.sep + + "file_dstdir" + + os.sep + + socket.gethostname() + + os.sep + + "thrd_" + + invk.tid +] +invk.network_dir = top + os.sep + "network_shared" invk.record_sz_kb = 0 invk.total_sz_kb = 1 -invk.starting_gate = os.path.join(invk.network_dir, 'starting_gate') +invk.starting_gate = os.path.join(invk.network_dir, "starting_gate") invk.stonewall = True invk.finish_all_rq = True -invk.opname = os.getenv('OPNAME') +invk.opname = os.getenv("OPNAME") invk.iterations = count print(invk) invk.do_workload() diff --git a/smallfile.py b/smallfile.py index 3c4c966..2b8502a 100644 --- a/smallfile.py +++ b/smallfile.py @@ -1,13 +1,13 @@ # -*- coding: utf-8 -*- -''' +""" smallfile.py -- SmallfileWorkload class used in each workload thread Copyright 2012 -- Ben England Licensed under the Apache License at http://www.apache.org/licenses/LICENSE-2.0 See Appendix on this page for instructions pertaining to license. Created on Apr 22, 2009 -''' +""" # repeat a file operation N times @@ -61,6 +61,7 @@ xattr_installed = False try: import xattr + xattr_installed = True except ImportError as e: pass @@ -68,6 +69,7 @@ fadvise_installed = False try: import drop_buffer_cache + fadvise_installed = True except ImportError as e: pass @@ -75,6 +77,7 @@ fallocate_installed = False try: import fallocate # not yet in python os module + fallocate_installed = True except ImportError as e: pass @@ -82,12 +85,14 @@ unittest_module = None try: import unittest2 + unittest_module = unittest2 except ImportError as e: pass try: import unittest + unittest_module = unittest except ImportError as e: pass @@ -98,20 +103,22 @@ # c # b Test.test_whatever + def run_unit_tests(): if unittest_module: unittest_module.main() else: - raise SMFRunException('no python unittest module available') + raise SMFRunException("no python unittest module available") + # python threading module method name isAlive changed to is_alive in python3 -use_isAlive = (sys.version_info[0] < 3) +use_isAlive = sys.version_info[0] < 3 # Windows 2008 server seemed to have this environment variable # didn't check if it's universal -is_windows_os = (os.getenv('HOMEDRIVE') is not None) +is_windows_os = os.getenv("HOMEDRIVE") is not None # O_BINARY variable means we don't need to special-case windows # in every open statement @@ -122,12 +129,12 @@ def run_unit_tests(): # for timeout debugging -debug_timeout = os.getenv('DEBUG_TIMEOUT') +debug_timeout = os.getenv("DEBUG_TIMEOUT") # FIXME: pass in file pathname instead of file number -class MFRdWrExc(Exception): +class MFRdWrExc(Exception): def __init__(self, opname_in, filenum_in, rqnum_in, bytesrtnd_in): self.opname = opname_in self.filenum = filenum_in @@ -135,9 +142,16 @@ def __init__(self, opname_in, filenum_in, rqnum_in, bytesrtnd_in): self.bytesrtnd = bytesrtnd_in def __str__(self): - return 'file ' + str(self.filenum) + ' request ' \ - + str(self.rqnum) + ' byte count ' + str(self.bytesrtnd) \ - + ' ' + self.opname + return ( + "file " + + str(self.filenum) + + " request " + + str(self.rqnum) + + " byte count " + + str(self.bytesrtnd) + + " " + + self.opname + ) class SMFResultException(Exception): @@ -147,12 +161,15 @@ class SMFResultException(Exception): class SMFRunException(Exception): pass + def myassert(bool_expr): - if (not bool_expr): - raise SMFRunException('assertion failed!') + if not bool_expr: + raise SMFRunException("assertion failed!") + # abort routine just cleans up threads + def abort_test(abort_fn, thread_list): if not os.path.exists(abort_fn): touch(abort_fn) @@ -163,9 +180,10 @@ def abort_test(abort_fn, thread_list): # hide difference between python2 and python3 # python threading module method name isAlive changed to is_alive in python3 + def thrd_is_alive(thrd): - use_isAlive = (sys.version_info[0] < 3) - return (thrd.isAlive() if use_isAlive else thrd.is_alive()) + use_isAlive = sys.version_info[0] < 3 + return thrd.isAlive() if use_isAlive else thrd.is_alive() # next two routines are for asynchronous replication @@ -174,21 +192,25 @@ def thrd_is_alive(thrd): # then we read xattr in do_await_create operation # and compute latencies from that + def remember_ctime_size_xattr(filedesc): nowtime = str(time.time()) st = os.fstat(filedesc) - xattr.setxattr(filedesc, 'user.smallfile-ctime-size', nowtime + ',' - + str(st.st_size / SmallfileWorkload.BYTES_PER_KB)) + xattr.setxattr( + filedesc, + "user.smallfile-ctime-size", + nowtime + "," + str(st.st_size / SmallfileWorkload.BYTES_PER_KB), + ) def recall_ctime_size_xattr(pathname): (ctime, size_kb) = (None, None) try: - with open(pathname, 'r') as fd: - xattr_str = xattr.getxattr(fd, 'user.smallfile-ctime-size') - token_pair = str(xattr_str).split(',') + with open(pathname, "r") as fd: + xattr_str = xattr.getxattr(fd, "user.smallfile-ctime-size") + token_pair = str(xattr_str).split(",") ctime = float(token_pair[0][2:]) - size_kb = int(token_pair[1].split('.')[0]) + size_kb = int(token_pair[1].split(".")[0]) except IOError as e: eno = e.errno if eno != errno.ENODATA: @@ -202,7 +224,7 @@ def get_hostname(h): return h -def hostaddr(h): # return the IP address of a hostname +def hostaddr(h): # return the IP address of a hostname if h is None: a = socket.gethostbyname(socket.gethostname()) else: @@ -211,48 +233,48 @@ def hostaddr(h): # return the IP address of a hostname def hexdump(b): - s = '' + s = "" for j in range(0, len(b)): - s += '%02x' % b[j] + s += "%02x" % b[j] return s -def binary_buf_str(b): # display a binary buffer as a text string - if sys.version < '3': +def binary_buf_str(b): # display a binary buffer as a text string + if sys.version < "3": return codecs.unicode_escape_decode(b)[0] else: if isinstance(b, str): - return bytes(b).decode('UTF-8', 'backslashreplace') + return bytes(b).decode("UTF-8", "backslashreplace") else: - return b.decode('UTF-8', 'backslashreplace') + return b.decode("UTF-8", "backslashreplace") class SmallfileWorkload: - rename_suffix = '.rnm' + rename_suffix = ".rnm" all_op_names = [ - 'create', - 'delete', - 'append', - 'overwrite', - 'read', - 'readdir', - 'rename', - 'delete-renamed', - 'cleanup', - 'symlink', - 'mkdir', - 'rmdir', - 'stat', - 'chmod', - 'setxattr', - 'getxattr', - 'swift-get', - 'swift-put', - 'ls-l', - 'await-create', - 'truncate-overwrite', - ] + "create", + "delete", + "append", + "overwrite", + "read", + "readdir", + "rename", + "delete-renamed", + "cleanup", + "symlink", + "mkdir", + "rmdir", + "stat", + "chmod", + "setxattr", + "getxattr", + "swift-get", + "swift-put", + "ls-l", + "await-create", + "truncate-overwrite", + ] OK = 0 NOTOK = 1 BYTES_PER_KB = 1024 @@ -262,11 +284,11 @@ class SmallfileWorkload: max_files_between_checks = 100 # default for UNIX - tmp_dir = os.getenv('TMPDIR') + tmp_dir = os.getenv("TMPDIR") if tmp_dir is None: # windows case - tmp_dir = os.getenv('TEMP') + tmp_dir = os.getenv("TEMP") if tmp_dir is None: # assume POSIX-like - tmp_dir = '/var/tmp' + tmp_dir = "/var/tmp" # constant file size fsdistr_fixed = -1 @@ -305,13 +327,13 @@ def __init__(self): self.is_shared_dir = False # file operation type, default idempotent - self.opname = 'cleanup' + self.opname = "cleanup" # how many files accessed, default = quick test self.iterations = 200 # top of directory tree, default always exists on local fs - top = join(self.tmp_dir, 'smf') + top = join(self.tmp_dir, "smf") # file that tells thread when to start running self.starting_gate = None @@ -341,10 +363,10 @@ def __init__(self): self.files_between_checks = 20 # prepend this to file name - self.prefix = '' + self.prefix = "" # append this to file name - self.suffix = '' + self.suffix = "" # directories are accessed randomly self.hash_to_dir = False @@ -370,7 +392,7 @@ def __init__(self): # , compare read data to what was written self.verify_read = True - # should we attempt to adjust pause between files + # should we attempt to adjust pause between files self.auto_pause = False # sleep this long between each file op @@ -386,7 +408,7 @@ def __init__(self): self.onhost = get_hostname(None) # thread ID - self.tid = '' + self.tid = "" # debug to screen self.log_to_stderr = False @@ -431,52 +453,52 @@ def __init__(self): # convert object to string for logging, etc. def __str__(self): - s = ' opname=' + self.opname - s += ' iterations=' + str(self.iterations) - s += ' top_dirs=' + str(self.top_dirs) - s += ' src_dirs=' + str(self.src_dirs) - s += ' dest_dirs=' + str(self.dest_dirs) - s += ' network_dir=' + str(self.network_dir) - s += ' shared=' + str(self.is_shared_dir) - s += ' record_sz_kb=' + str(self.record_sz_kb) - s += ' total_sz_kb=' + str(self.total_sz_kb) - s += ' filesize_distr=' + str(self.filesize_distr) - s += ' files_per_dir=%d' % self.files_per_dir - s += ' dirs_per_dir=%d' % self.dirs_per_dir - s += ' dirs_on_demand=' + str(self.dirs_on_demand) - s += ' xattr_size=%d' % self.xattr_size - s += ' xattr_count=%d' % self.xattr_count - s += ' starting_gate=' + str(self.starting_gate) - s += ' prefix=' + self.prefix - s += ' suffix=' + self.suffix - s += ' hash_to_dir=' + str(self.hash_to_dir) - s += ' fsync=' + str(self.fsync) - s += ' stonewall=' + str(self.stonewall) - s += ' cleanup_delay_usec_per_file=' + str(self.cleanup_delay_usec_per_file) - s += ' files_between_checks=' + str(self.files_between_checks) - s += ' pause=' + str(self.pause_between_files) - s += ' pause_sec=' + str(self.pause_sec) - s += ' auto_pause=' + str(self.auto_pause) - s += ' verify_read=' + str(self.verify_read) - s += ' incompressible=' + str(self.incompressible) - s += ' finish_all_rq=' + str(self.finish_all_rq) - s += ' rsp_times=' + str(self.measure_rsptimes) - s += ' tid=' + self.tid - s += ' loglevel=' + str(self.log_level) - s += ' filenum=' + str(self.filenum) - s += ' filenum_final=' + str(self.filenum_final) - s += ' rq=' + str(self.rq) - s += ' rq_final=' + str(self.rq_final) - s += ' total_hosts=' + str(self.total_hosts) - s += ' threads=' + str(self.threads) - s += ' start=' + str(self.start_time) - s += ' end=' + str(self.end_time) - s += ' elapsed=' + str(self.elapsed_time) - s += ' host=' + str(self.onhost) - s += ' status=' + str(self.status) - s += ' abort=' + str(self.abort) - s += ' log_to_stderr=' + str(self.log_to_stderr) - s += ' verbose=' + str(self.verbose) + s = " opname=" + self.opname + s += " iterations=" + str(self.iterations) + s += " top_dirs=" + str(self.top_dirs) + s += " src_dirs=" + str(self.src_dirs) + s += " dest_dirs=" + str(self.dest_dirs) + s += " network_dir=" + str(self.network_dir) + s += " shared=" + str(self.is_shared_dir) + s += " record_sz_kb=" + str(self.record_sz_kb) + s += " total_sz_kb=" + str(self.total_sz_kb) + s += " filesize_distr=" + str(self.filesize_distr) + s += " files_per_dir=%d" % self.files_per_dir + s += " dirs_per_dir=%d" % self.dirs_per_dir + s += " dirs_on_demand=" + str(self.dirs_on_demand) + s += " xattr_size=%d" % self.xattr_size + s += " xattr_count=%d" % self.xattr_count + s += " starting_gate=" + str(self.starting_gate) + s += " prefix=" + self.prefix + s += " suffix=" + self.suffix + s += " hash_to_dir=" + str(self.hash_to_dir) + s += " fsync=" + str(self.fsync) + s += " stonewall=" + str(self.stonewall) + s += " cleanup_delay_usec_per_file=" + str(self.cleanup_delay_usec_per_file) + s += " files_between_checks=" + str(self.files_between_checks) + s += " pause=" + str(self.pause_between_files) + s += " pause_sec=" + str(self.pause_sec) + s += " auto_pause=" + str(self.auto_pause) + s += " verify_read=" + str(self.verify_read) + s += " incompressible=" + str(self.incompressible) + s += " finish_all_rq=" + str(self.finish_all_rq) + s += " rsp_times=" + str(self.measure_rsptimes) + s += " tid=" + self.tid + s += " loglevel=" + str(self.log_level) + s += " filenum=" + str(self.filenum) + s += " filenum_final=" + str(self.filenum_final) + s += " rq=" + str(self.rq) + s += " rq_final=" + str(self.rq_final) + s += " total_hosts=" + str(self.total_hosts) + s += " threads=" + str(self.threads) + s += " start=" + str(self.start_time) + s += " end=" + str(self.end_time) + s += " elapsed=" + str(self.elapsed_time) + s += " host=" + str(self.onhost) + s += " status=" + str(self.status) + s += " abort=" + str(self.abort) + s += " log_to_stderr=" + str(self.log_to_stderr) + s += " verbose=" + str(self.verbose) return s # if you want to use the same instance for multiple tests @@ -525,16 +547,15 @@ def reset(self): def set_top(self, top_dirs, network_dir=None): self.top_dirs = top_dirs # create/read files here - self.src_dirs = [join(d, 'file_srcdir') for d in top_dirs] + self.src_dirs = [join(d, "file_srcdir") for d in top_dirs] # rename files to here - self.dest_dirs = [join(d, 'file_dstdir') for d in top_dirs] + self.dest_dirs = [join(d, "file_dstdir") for d in top_dirs] # directory for synchronization files shared across hosts - self.network_dir = join(top_dirs[0], 'network_shared') + self.network_dir = join(top_dirs[0], "network_shared") if network_dir: self.network_dir = network_dir - def create_top_dirs(self, is_multi_host): if os.path.exists(self.network_dir): rmtree(self.network_dir) @@ -550,7 +571,6 @@ def create_top_dirs(self, is_multi_host): time.sleep(1.1) # lets NFS mount option actimeo=1 take effect os.listdir(self.network_dir) - # create per-thread log file # we have to avoid getting the logger for self.tid more than once, # or else we'll add a handler more than once to this logger @@ -566,8 +586,7 @@ def start_log(self): h = logging.StreamHandler() else: h = logging.FileHandler(self.log_fn()) - log_format = (self.tid + - ' %(asctime)s - %(levelname)s - %(message)s') + log_format = self.tid + " %(asctime)s - %(levelname)s - %(message)s" formatter = logging.Formatter(log_format) h.setFormatter(formatter) self.log.addHandler(h) @@ -584,7 +603,6 @@ def op_starttime(self, starttime=None): else: self.op_start_time = starttime - # indicate end of an operation, # this appends the elapsed time of the operation to .rsptimes array @@ -597,21 +615,30 @@ def op_endtime(self, opname): if self.auto_pause: self.adjust_pause_time(end_time, rsp_time) - # save response times seen by this thread def save_rsptimes(self): - fname = 'rsptimes_' + str(self.tid) + '_' + get_hostname(None) \ - + '_' + self.opname + '_' + str(self.start_time) + '.csv' + fname = ( + "rsptimes_" + + str(self.tid) + + "_" + + get_hostname(None) + + "_" + + self.opname + + "_" + + str(self.start_time) + + ".csv" + ) rsptime_fname = join(self.network_dir, fname) - with open(rsptime_fname, 'w') as f: + with open(rsptime_fname, "w") as f: for (opname, start_time, rsp_time) in self.rsptimes: # time granularity is microseconds, accuracy is less - f.write('%8s, %9.6f, %9.6f\n' % - (opname, start_time - self.start_time, rsp_time)) + f.write( + "%8s, %9.6f, %9.6f\n" + % (opname, start_time - self.start_time, rsp_time) + ) os.fsync(f.fileno()) # particularly for NFS this is needed - # compute pause time based on available response time samples, # assuming all threads converge to roughly the same average response time # we treat the whole system as one big queueing center and apply @@ -620,7 +647,7 @@ def save_rsptimes(self): def calculate_pause_time(self, end_time): # there are samples to process - mean_rsptime = sum(self.pause_rsptime_history)/self.pause_rsptime_count + mean_rsptime = sum(self.pause_rsptime_history) / self.pause_rsptime_count time_so_far = end_time - self.pause_history_start_time # estimate system throughput assuming all threads are same # per-thread throughput is measured by number of rsptime samples @@ -630,28 +657,43 @@ def calculate_pause_time(self, end_time): mean_utilization = mean_rsptime * est_throughput old_pause = self.pause_sec new_pause = mean_utilization * mean_rsptime * self.throttling_factor - self.pause_sec = (old_pause + 2*new_pause) / 3.0 - self.log.debug('time_so_far %f samples %d index %d mean_rsptime %f throttle %f est_throughput %f mean_util %f' % - (time_so_far, self.pause_sample_count, self.pause_rsptime_index, mean_rsptime, self.throttling_factor, - est_throughput, mean_utilization)) - self.log.info('per-thread pause changed from %9.6f to %9.6f' % (old_pause, self.pause_sec)) + self.pause_sec = (old_pause + 2 * new_pause) / 3.0 + self.log.debug( + "time_so_far %f samples %d index %d mean_rsptime %f throttle %f est_throughput %f mean_util %f" + % ( + time_so_far, + self.pause_sample_count, + self.pause_rsptime_index, + mean_rsptime, + self.throttling_factor, + est_throughput, + mean_utilization, + ) + ) + self.log.info( + "per-thread pause changed from %9.6f to %9.6f" % (old_pause, self.pause_sec) + ) # adjust pause time based on whether response time was significantly bigger than pause time - # we lower the pause time until + # we lower the pause time until def adjust_pause_time(self, end_time, rsp_time): - self.log.debug('adjust_pause_time %f %f %f %f' % - (end_time, rsp_time, self.pause_sec, self.pause_history_start_time)) + self.log.debug( + "adjust_pause_time %f %f %f %f" + % (end_time, rsp_time, self.pause_sec, self.pause_history_start_time) + ) if self.pause_rsptime_index == self.pause_rsptime_unmeasured: self.pause_sec = 0.00001 self.pause_history_start_time = end_time - rsp_time # try to get the right order of magnitude for response time estimate immediately - self.pause_rsptime_history = [ rsp_time for k in range(0, self.pause_rsptime_count) ] + self.pause_rsptime_history = [ + rsp_time for k in range(0, self.pause_rsptime_count) + ] self.pause_rsptime_index = 1 self.pause_sample_count = 1 self.pause_sec = self.throttling_factor * rsp_time - #self.calculate_pause_time(end_time) - self.log.info('per-thread pause initialized to %9.6f' % self.pause_sec) + # self.calculate_pause_time(end_time) + self.log.info("per-thread pause initialized to %9.6f" % self.pause_sec) else: # insert response time into ring buffer of most recent response times self.pause_rsptime_history[self.pause_rsptime_index] = rsp_time @@ -661,8 +703,10 @@ def adjust_pause_time(self, end_time, rsp_time): self.pause_sample_count += 1 # if it's time to adjust pause_sec... - if self.pause_history_start_time + self.pause_history_duration < end_time or \ - self.pause_sample_count > self.pause_rsptime_count / 2: + if ( + self.pause_history_start_time + self.pause_history_duration < end_time + or self.pause_sample_count > self.pause_rsptime_count / 2 + ): self.calculate_pause_time(end_time) self.pause_history_start_time = end_time self.pause_sample_count = 0 @@ -673,7 +717,7 @@ def adjust_pause_time(self, end_time, rsp_time): # (i.e. it is ready to immediately begin generating workload) def gen_thread_ready_fname(self, tid, hostname=None): - return join(self.tmp_dir, 'thread_ready.' + tid + '.tmp') + return join(self.tmp_dir, "thread_ready." + tid + ".tmp") # each host uses this to signal that it is # ready to immediately begin generating workload @@ -683,31 +727,31 @@ def gen_thread_ready_fname(self, tid, hostname=None): def gen_host_ready_fname(self, hostname=None): if not hostname: hostname = self.onhost - return join(self.network_dir, 'host_ready.' + hostname + '.tmp') + return join(self.network_dir, "host_ready." + hostname + ".tmp") # abort file tells other threads not to start test # because something has already gone wrong def abort_fn(self): - return join(self.network_dir, 'abort.tmp') + return join(self.network_dir, "abort.tmp") # stonewall file stops test measurement # (does not stop worker thread unless --finish N is used) def stonewall_fn(self): - return join(self.network_dir, 'stonewall.tmp') + return join(self.network_dir, "stonewall.tmp") # log file for this worker thread goes here def log_fn(self): - return join(self.tmp_dir, 'invoke_logs-%s.log' % self.tid) + return join(self.tmp_dir, "invoke_logs-%s.log" % self.tid) # file for result stored as pickled python object def host_result_filename(self, result_host=None): if result_host is None: result_host = self.onhost - return join(self.network_dir, result_host + '_result.pickle') + return join(self.network_dir, result_host + "_result.pickle") # we use the seed function to control per-thread random sequence # we want seed to be saved @@ -715,37 +759,47 @@ def host_result_filename(self, result_host=None): # what file size is for thread T's file j without having to stat the file def init_random_seed(self): - fn = self.gen_thread_ready_fname(self.tid, - hostname=self.onhost) + '.seed' + fn = self.gen_thread_ready_fname(self.tid, hostname=self.onhost) + ".seed" thread_seed = str(time.time()) - self.log.debug('seed opname: ' + self.opname) - if self.opname == 'create' or self.opname == 'swift-put': - thread_seed = str(time.time()) + ' ' + self.tid + self.log.debug("seed opname: " + self.opname) + if self.opname == "create" or self.opname == "swift-put": + thread_seed = str(time.time()) + " " + self.tid ensure_deleted(fn) - with open(fn, 'w') as seedfile: + with open(fn, "w") as seedfile: seedfile.write(str(thread_seed)) - self.log.debug('write seed %s ' % thread_seed) - #elif ['append', 'read', 'swift-get'].__contains__(self.opname): + self.log.debug("write seed %s " % thread_seed) + # elif ['append', 'read', 'swift-get'].__contains__(self.opname): else: try: - with open(fn, 'r') as seedfile: + with open(fn, "r") as seedfile: thread_seed = seedfile.readlines()[0].strip() - self.log.debug('read seed %s ' % thread_seed) + self.log.debug("read seed %s " % thread_seed) except OSError as e: - if e.errno == errno.ENOENT and self.opname in ['cleanup', 'rmdir', 'delete']: - self.log.info('no saved random seed found in %s but it does not matter for deletes' % fn) + if e.errno == errno.ENOENT and self.opname in [ + "cleanup", + "rmdir", + "delete", + ]: + self.log.info( + "no saved random seed found in %s but it does not matter for deletes" + % fn + ) self.randstate.seed(thread_seed) def get_next_file_size(self): next_size = self.total_sz_kb if self.filesize_distr == self.fsdistr_random_exponential: - next_size = max(1, min(int(self.randstate.expovariate(1.0 - / self.total_sz_kb)), self.total_sz_kb - * self.random_size_limit)) + next_size = max( + 1, + min( + int(self.randstate.expovariate(1.0 / self.total_sz_kb)), + self.total_sz_kb * self.random_size_limit, + ), + ) if self.log_level == logging.DEBUG: - self.log.debug('rnd expn file size %d KB' % next_size) + self.log.debug("rnd expn file size %d KB" % next_size) else: - self.log.debug('fixed file size %d KB' % next_size) + self.log.debug("fixed file size %d KB" % next_size) return next_size # tell test driver that we're at the starting gate @@ -763,23 +817,24 @@ def wait_for_gate(self): delay_time = 0.1 while not os.path.exists(self.starting_gate): if os.path.exists(self.abort_fn()): - raise SMFRunException('thread ' + str(self.tid) - + ' saw abort flag') + raise SMFRunException("thread " + str(self.tid) + " saw abort flag") # wait a little longer so that # other clients have time to see that gate exists delay_time = delay_time * 1.5 - if delay_time > 2.0: delay_time = 2.0 + if delay_time > 2.0: + delay_time = 2.0 time.sleep(delay_time) gateinfo = os.stat(self.starting_gate) synch_time = gateinfo.st_mtime + 3.0 - time.time() if synch_time > 0.0: time.sleep(synch_time) if synch_time < 0.0: - self.log.warn('other threads may have already started') + self.log.warn("other threads may have already started") if self.verbose: - self.log.debug('started test at %f sec after waiting %f sec' % - (time.time(), synch_time)) - + self.log.debug( + "started test at %f sec after waiting %f sec" + % (time.time(), synch_time) + ) # record info needed to compute test statistics @@ -789,29 +844,32 @@ def end_test(self): # during do_workload() if self.test_ended(): return - myassert(self.end_time is None and - self.rq_final is None and - self.filenum_final is None) + myassert( + self.end_time is None + and self.rq_final is None + and self.filenum_final is None + ) self.rq_final = self.rq self.filenum_final = self.filenum self.end_time = time.time() self.elapsed_time = self.end_time - self.start_time stonewall_path = self.stonewall_fn() - if self.filenum >= self.iterations \ - and not os.path.exists(stonewall_path): + if self.filenum >= self.iterations and not os.path.exists(stonewall_path): try: touch(stonewall_path) - self.log.info('stonewall file %s written' % stonewall_path) + self.log.info("stonewall file %s written" % stonewall_path) except IOError as e: err = e.errno if err != errno.EEXIST: # workaround for possible bug in Gluster if err != errno.EINVAL: - self.log.error('unable to write stonewall file %s' % stonewall_path) + self.log.error( + "unable to write stonewall file %s" % stonewall_path + ) self.log.exception(e) self.status = err else: - self.log.info('saw EINVAL on stonewall, ignoring it') + self.log.info("saw EINVAL on stonewall, ignoring it") def test_ended(self): return (self.end_time is not None) and (self.end_time > self.start_time) @@ -823,11 +881,15 @@ def do_another_file(self): if self.stonewall and (((self.filenum + 1) % self.files_between_checks) == 0): stonewall_path = self.stonewall_fn() if self.verbose: - self.log.debug('checking for stonewall file %s after %s iterations' % - (stonewall_path, self.filenum)) + self.log.debug( + "checking for stonewall file %s after %s iterations" + % (stonewall_path, self.filenum) + ) if os.path.exists(stonewall_path): - self.log.info('stonewall file %s seen after %d iterations' % - (stonewall_path, self.filenum)) + self.log.info( + "stonewall file %s seen after %d iterations" + % (stonewall_path, self.filenum) + ) self.end_test() # if user doesn't want to finish all requests and test has ended, stop @@ -841,8 +903,7 @@ def do_another_file(self): self.end_test() return False if self.abort: - raise SMFRunException('thread ' + str(self.tid) - + ' saw abort flag') + raise SMFRunException("thread " + str(self.tid) + " saw abort flag") self.filenum += 1 if self.pause_sec > 0.0 and self.iterations % self.files_between_pause == 0: time.sleep(self.pause_sec * self.files_between_pause) @@ -875,10 +936,10 @@ def mk_seq_dir_name(self, file_num): dirs_in_level = level_dirs[level] quotient = dir_in // dirs_in_level dir_in = dir_in - quotient * dirs_in_level - dirnm = 'd_' + str(quotient).zfill(3) + dirnm = "d_" + str(quotient).zfill(3) pathlist.append(dirnm) level -= 1 - pathlist.append('d_' + str(dir_in).zfill(3)) + pathlist.append("d_" + str(dir_in).zfill(3)) return os.sep.join(pathlist) def mk_hashed_dir_name(self, file_num): @@ -887,7 +948,7 @@ def mk_hashed_dir_name(self, file_num): dir_num = random_hash // self.files_per_dir while dir_num > 1: dir_num_hash = dir_num * self.some_prime % self.dirs_per_dir - pathlist.insert(0, 'h_' + str(dir_num_hash).zfill(3)) + pathlist.insert(0, "h_" + str(dir_num_hash).zfill(3)) dir_num //= self.dirs_per_dir return os.sep.join(pathlist) @@ -920,16 +981,16 @@ def mk_file_nm(self, base_dirs, filenum=-1): self.file_dirs[filenum], os.sep, self.prefix, - '_', + "_", self.onhost, - '_', + "_", self.tid, - '_', + "_", str(filenum), - '_', + "_", self.suffix, - ] - return ''.join(components) + ] + return "".join(components) # generate buffer contents, use these on writes and # compare against them for reads where random data is used, @@ -946,21 +1007,24 @@ def create_biggest_buf(self, contents_random): # until we get to size 2^biggest_buf_size_bits in length if contents_random: - biggest_buf = bytearray([self.randstate.randrange(0, 127) - for k in - range(0, random_segment_size)]) + biggest_buf = bytearray( + [ + self.randstate.randrange(0, 127) + for k in range(0, random_segment_size) + ] + ) else: - biggest_buf = bytearray([k % 128 for k in - range(0, random_segment_size)]) + biggest_buf = bytearray( + [k % 128 for k in range(0, random_segment_size)] + ) # to prevent confusion in python when printing out buffer contents # WARNING: this line breaks PythonTidy utility - biggest_buf = biggest_buf.replace(b'\\', b'!') + biggest_buf = biggest_buf.replace(b"\\", b"!") # keep doubling buffer size until it is big enough - next_power_2 = (self.biggest_buf_size_bits - - self.random_seg_size_bits) + next_power_2 = self.biggest_buf_size_bits - self.random_seg_size_bits for j in range(0, next_power_2): biggest_buf.extend(biggest_buf[:]) @@ -981,18 +1045,18 @@ def create_biggest_buf(self, contents_random): powersum += powerof2 # biggest_buf length is now 2^j - 1 biggest_buf.extend( - bytearray([self.randstate.randrange(0, 255) - for k in range(0, powerof2)])) - biggest_buf.extend( - bytearray([self.randstate.randrange(0, 255)])) + bytearray( + [self.randstate.randrange(0, 255) for k in range(0, powerof2)] + ) + ) + biggest_buf.extend(bytearray([self.randstate.randrange(0, 255)])) # add extra space at end # so that we can get different buffer contents # by just using different offset into biggest_buf - biggest_buf.extend(biggest_buf[0:self.buf_offset_range]) - myassert( - len(biggest_buf) == self.biggest_buf_size + self.buf_offset_range) + biggest_buf.extend(biggest_buf[0 : self.buf_offset_range]) + myassert(len(biggest_buf) == self.biggest_buf_size + self.buf_offset_range) return biggest_buf # allocate buffer of correct size with offset based on filenum, tid, etc. @@ -1023,22 +1087,22 @@ def prepare_buf(self): # create a buffer with somewhat unique contents for this file, # so we'll know if there is a read error # unique_offset has to have same value across smallfile runs - # so that we can write data and then + # so that we can write data and then # know what to expect in written data later on # NOTE: this means self.biggest_buf must be # 1K larger than SmallfileWorkload.biggest_buf_size max_buffer_offset = 1 << 10 try: - unique_offset = ((int(self.tid)+1) * self.filenum) % max_buffer_offset + unique_offset = ((int(self.tid) + 1) * self.filenum) % max_buffer_offset except ValueError: unique_offset = self.filenum % max_buffer_offset myassert(total_space + unique_offset < len(self.biggest_buf)) - #if self.verbose: + # if self.verbose: # self.log.debug('unique_offset: %d' % unique_offset) - self.buf = self.biggest_buf[unique_offset:total_space + unique_offset] - #if self.verbose: + self.buf = self.biggest_buf[unique_offset : total_space + unique_offset] + # if self.verbose: # self.log.debug('start of prepared buf: %s' % self.buf.hex()[0:40]) # determine record size to use in test @@ -1059,9 +1123,9 @@ def get_record_size_to_use(self): # use set to avoid duplicating operations on directories def make_all_subdirs(self): - self.log.debug('making all subdirs') + self.log.debug("making all subdirs") abort_filename = self.abort_fn() - if self.tid != '00' and self.is_shared_dir: + if self.tid != "00" and self.is_shared_dir: return dirset = set() @@ -1082,9 +1146,9 @@ def make_all_subdirs(self): # we put files_per_dir files into each directory, so # we only need to check every files_per_dir filenames # for a new directory name - dir_range = range(0, - self.iterations + self.files_per_dir, - self.files_per_dir) + dir_range = range( + 0, self.iterations + self.files_per_dir, self.files_per_dir + ) # we need this range because # we need to create directories in each top dir @@ -1102,18 +1166,18 @@ def make_all_subdirs(self): if not exists(unique_dpath): try: os.makedirs(unique_dpath, 0o777) - if debug_timeout: time.sleep(1) + if debug_timeout: + time.sleep(1) except OSError as e: - if not (e.errno == errno.EEXIST - and self.is_shared_dir): + if not (e.errno == errno.EEXIST and self.is_shared_dir): raise e # clean up all subdirectories # algorithm same as make_all_subdirs def clean_all_subdirs(self): - self.log.debug('cleaning all subdirs') - if self.tid != '00' and self.is_shared_dir: + self.log.debug("cleaning all subdirs") + if self.tid != "00" and self.is_shared_dir: return for tree in [self.src_dirs, self.dest_dirs]: @@ -1123,9 +1187,9 @@ def clean_all_subdirs(self): if self.hash_to_dir: dir_range = range(0, self.iterations + 1) else: - dir_range = range(0, - self.iterations + self.files_per_dir, - self.files_per_dir) + dir_range = range( + 0, self.iterations + self.files_per_dir, self.files_per_dir + ) # construct set of directories @@ -1149,9 +1213,13 @@ def clean_all_subdirs(self): topdir = t break if not topdir: - raise SMFRunException(('directory %s is not part of ' + - 'any top-level directory in %s') - % (unique_dpath, str(tree))) + raise SMFRunException( + ( + "directory %s is not part of " + + "any top-level directory in %s" + ) + % (unique_dpath, str(tree)) + ) # delete this directory and # parent directories if empty and below top @@ -1171,7 +1239,7 @@ def clean_all_subdirs(self): break if err == errno.EBUSY: # might be mountpoint break - self.log.error('deleting directory dpath: %s' % e) + self.log.error("deleting directory dpath: %s" % e) if err != errno.ENOENT and not self.is_shared_dir: raise e unique_dpath = os.path.dirname(unique_dpath) @@ -1185,16 +1253,16 @@ def clean_all_subdirs(self): def do_create(self): if self.record_ctime_size and not xattr_installed: raise SMFRunException( - 'no python xattr module, cannot record create time + size') + "no python xattr module, cannot record create time + size" + ) while self.do_another_file(): fn = self.mk_file_nm(self.src_dirs) self.op_starttime() fd = -1 try: - fd = os.open(fn, - os.O_CREAT | os.O_EXCL | os.O_WRONLY | O_BINARY) + fd = os.open(fn, os.O_CREAT | os.O_EXCL | os.O_WRONLY | O_BINARY) if fd < 0: - self.log.error('failed to open file %s' % fn) + self.log.error("failed to open file %s" % fn) raise MFRdWrExc(self.opname, self.filenum, 0, 0) remaining_kb = self.get_next_file_size() self.prepare_buf() @@ -1204,8 +1272,7 @@ def do_create(self): rszbytes = next_kb * self.BYTES_PER_KB written = os.write(fd, self.buf[0:rszbytes]) if written != rszbytes: - raise MFRdWrExc(self.opname, self.filenum, - self.rq, written) + raise MFRdWrExc(self.opname, self.filenum, self.rq, written) self.rq += 1 remaining_kb -= next_kb if self.record_ctime_size: @@ -1226,7 +1293,7 @@ def do_create(self): def do_mkdir(self): while self.do_another_file(): - dir = self.mk_file_nm(self.src_dirs) + '.d' + dir = self.mk_file_nm(self.src_dirs) + ".d" self.op_starttime() try: os.mkdir(dir) @@ -1241,7 +1308,7 @@ def do_mkdir(self): def do_rmdir(self): while self.do_another_file(): - dir = self.mk_file_nm(self.src_dirs) + '.d' + dir = self.mk_file_nm(self.src_dirs) + ".d" self.op_starttime() os.rmdir(dir) self.op_endtime(self.opname) @@ -1249,7 +1316,7 @@ def do_rmdir(self): def do_symlink(self): while self.do_another_file(): fn = self.mk_file_nm(self.src_dirs) - fn2 = self.mk_file_nm(self.dest_dirs) + '.s' + fn2 = self.mk_file_nm(self.dest_dirs) + ".s" self.op_starttime() os.symlink(fn, fn2) self.op_endtime(self.opname) @@ -1274,24 +1341,29 @@ def do_chmod(self): def do_getxattr(self): if not xattr_installed: - raise SMFRunException('xattr module not present, ' + - 'getxattr and setxattr operations will not work') + raise SMFRunException( + "xattr module not present, " + + "getxattr and setxattr operations will not work" + ) while self.do_another_file(): fn = self.mk_file_nm(self.src_dirs) self.op_starttime() self.prepare_buf() for j in range(0, self.xattr_count): - v = xattr.getxattr(fn, 'user.smallfile-%d' % j) - if self.buf[j:self.xattr_size + j] != v: - raise MFRdWrExc('getxattr: value contents wrong', - self.filenum, j, len(v)) + v = xattr.getxattr(fn, "user.smallfile-%d" % j) + if self.buf[j : self.xattr_size + j] != v: + raise MFRdWrExc( + "getxattr: value contents wrong", self.filenum, j, len(v) + ) self.op_endtime(self.opname) def do_setxattr(self): if not xattr_installed: - raise SMFRunException('xattr module not present, ' + - 'getxattr and setxattr operations will not work') + raise SMFRunException( + "xattr module not present, " + + "getxattr and setxattr operations will not work" + ) while self.do_another_file(): fn = self.mk_file_nm(self.src_dirs) @@ -1300,8 +1372,11 @@ def do_setxattr(self): fd = os.open(fn, os.O_WRONLY | O_BINARY) for j in range(0, self.xattr_count): # make sure each xattr has a unique value - xattr.setxattr(fd, 'user.smallfile-%d' % j, - binary_buf_str(self.buf[j:self.xattr_size + j])) + xattr.setxattr( + fd, + "user.smallfile-%d" % j, + binary_buf_str(self.buf[j : self.xattr_size + j]), + ) if self.fsync: # fsync also flushes xattr values and metadata os.fsync(fd) os.close(fd) @@ -1318,10 +1393,11 @@ def do_truncate_overwrite(self): def do_write(self, append=False, truncate=False): if self.record_ctime_size and not xattr_installed: - raise SMFRunException('xattr module not present ' + - 'but record-ctime-size specified') + raise SMFRunException( + "xattr module not present " + "but record-ctime-size specified" + ) if append and truncate: - raise SMFRunException('can not append and truncate at the same time') + raise SMFRunException("can not append and truncate at the same time") while self.do_another_file(): fn = self.mk_file_nm(self.src_dirs) @@ -1333,7 +1409,8 @@ def do_write(self, append=False, truncate=False): if truncate: open_mode |= os.O_TRUNC fd = os.open(fn, open_mode) - if append: os.lseek(fd, 0, os.SEEK_END) + if append: + os.lseek(fd, 0, os.SEEK_END) remaining_kb = self.get_next_file_size() self.prepare_buf() rszkb = self.get_record_size_to_use() @@ -1343,8 +1420,7 @@ def do_write(self, append=False, truncate=False): written = os.write(fd, self.buf[0:rszbytes]) self.rq += 1 if written != rszbytes: - raise MFRdWrExc(self.opname, self.filenum, - self.rq, written) + raise MFRdWrExc(self.opname, self.filenum, self.rq, written) remaining_kb -= next_kb if self.record_ctime_size: remember_ctime_size_xattr(fd) @@ -1372,27 +1448,34 @@ def do_read(self): bytesread = os.read(fd, rszbytes) self.rq += 1 if len(bytesread) != rszbytes: - raise MFRdWrExc(self.opname, self.filenum, - self.rq, len(bytesread)) + raise MFRdWrExc( + self.opname, self.filenum, self.rq, len(bytesread) + ) if self.verify_read: # this is in fast path so avoid evaluating self.log.debug # unless people really want to see it if self.verbose: - self.log.debug(('read fn %s next_fsz %u remain %u ' + - 'rszbytes %u bytesread %u') - % (fn, next_fsz, remaining_kb, - rszbytes, len(bytesread))) + self.log.debug( + ( + "read fn %s next_fsz %u remain %u " + + "rszbytes %u bytesread %u" + ) + % (fn, next_fsz, remaining_kb, rszbytes, len(bytesread)) + ) if self.buf[0:rszbytes] != bytesread: bytes_matched = len(bytesread) for k in range(0, rszbytes): if self.buf[k] != bytesread[k]: bytes_matched = k break - #self.log.debug('front of read buffer: %s' % bytesread.hex()[0:40]) - raise MFRdWrExc('read: buffer contents matched up through byte %d' % bytes_matched, - self.filenum, - self.rq, - len(bytesread)) + # self.log.debug('front of read buffer: %s' % bytesread.hex()[0:40]) + raise MFRdWrExc( + "read: buffer contents matched up through byte %d" + % bytes_matched, + self.filenum, + self.rq, + len(bytesread), + ) remaining_kb -= next_kb finally: if fd > -1: @@ -1401,9 +1484,10 @@ def do_read(self): def do_readdir(self): if self.hash_to_dir: - raise SMFRunException('cannot do readdir test with ' + - '--hash-into-dirs option') - prev_dir = '' + raise SMFRunException( + "cannot do readdir test with " + "--hash-into-dirs option" + ) + prev_dir = "" dir_map = {} file_count = 0 while self.do_another_file(): @@ -1412,17 +1496,22 @@ def do_readdir(self): common_dir = None for d in self.top_dirs: if dir.startswith(d): - common_dir = dir[len(self.top_dirs[0]):] + common_dir = dir[len(self.top_dirs[0]) :] break if not common_dir: - raise SMFRunException(('readdir: filename %s is not ' + - 'in any top dir in %s') - % (fn, str(self.top_dirs))) + raise SMFRunException( + ("readdir: filename %s is not " + "in any top dir in %s") + % (fn, str(self.top_dirs)) + ) if common_dir != prev_dir: if file_count != len(dir_map): - raise MFRdWrExc(('readdir: not all files in ' + - 'directory %s were found') - % prev_dir, self.filenum, self.rq, 0) + raise MFRdWrExc( + ("readdir: not all files in " + "directory %s were found") + % prev_dir, + self.filenum, + self.rq, + 0, + ) self.op_starttime() dir_contents = [] for t in self.top_dirs: @@ -1432,14 +1521,18 @@ def do_readdir(self): prev_dir = common_dir dir_map = {} for listdir_filename in dir_contents: - if not listdir_filename[0] == 'd': + if not listdir_filename[0] == "d": dir_map[listdir_filename] = True # only include files file_count = 0 - if not fn.startswith('d'): + if not fn.startswith("d"): file_count += 1 # only count files, not directories if os.path.basename(fn) not in dir_map: - raise MFRdWrExc('readdir: file missing from directory %s' - % prev_dir, self.filenum, self.rq, 0) + raise MFRdWrExc( + "readdir: file missing from directory %s" % prev_dir, + self.filenum, + self.rq, + 0, + ) # this operation simulates a user doing "ls -lR" on a big directory tree # eventually we'll be able to use readdirplus() system call @@ -1447,9 +1540,10 @@ def do_readdir(self): def do_ls_l(self): if self.hash_to_dir: - raise SMFRunException('cannot do readdir test with ' + - '--hash-into-dirs option') - prev_dir = '' + raise SMFRunException( + "cannot do readdir test with " + "--hash-into-dirs option" + ) + prev_dir = "" dir_map = {} file_count = 0 while self.do_another_file(): @@ -1458,33 +1552,39 @@ def do_ls_l(self): common_dir = None for d in self.top_dirs: if dir.startswith(d): - common_dir = dir[len(self.top_dirs[0]):] + common_dir = dir[len(self.top_dirs[0]) :] break if not common_dir: - raise SMFRunException('ls-l: filename %s is not in any top dir in %s' - % (fn, str(self.top_dirs))) + raise SMFRunException( + "ls-l: filename %s is not in any top dir in %s" + % (fn, str(self.top_dirs)) + ) if common_dir != prev_dir: self.op_starttime() dir_contents = [] for t in self.top_dirs: next_dir = t + common_dir dir_contents.extend(os.listdir(next_dir)) - self.op_endtime(self.opname + '-readdir') + self.op_endtime(self.opname + "-readdir") prev_dir = common_dir dir_map = {} for listdir_filename in dir_contents: - if not listdir_filename[0] == 'd': + if not listdir_filename[0] == "d": dir_map[listdir_filename] = True # only include files file_count = 0 # per-file stat timing separate readdir timing self.op_starttime() os.stat(fn) - self.op_endtime(self.opname + '-stat') - if not fn.startswith('d'): + self.op_endtime(self.opname + "-stat") + if not fn.startswith("d"): file_count += 1 # only count files, not directories if os.path.basename(fn) not in dir_map: - raise MFRdWrExc('readdir: file missing from directory %s' - % prev_dir, self.filenum, self.rq, 0) + raise MFRdWrExc( + "readdir: file missing from directory %s" % prev_dir, + self.filenum, + self.rq, + 0, + ) # await-create is used for Gluster (async) geo-replication testing # instead of creating the files, we wait for them to appear @@ -1493,30 +1593,33 @@ def do_ls_l(self): def do_await_create(self): if not xattr_installed: - raise SMFRunException( - 'no python xattr module, so cannot read xattrs') + raise SMFRunException("no python xattr module, so cannot read xattrs") while self.do_another_file(): fn = self.mk_file_nm(self.src_dirs) - self.log.debug('awaiting file %s' % fn) + self.log.debug("awaiting file %s" % fn) while not os.path.exists(fn): time.sleep(1.0) - self.log.debug('awaiting original ctime-size xattr for file %s' - % fn) + self.log.debug("awaiting original ctime-size xattr for file %s" % fn) while True: (original_ctime, original_sz_kb) = recall_ctime_size_xattr(fn) if original_ctime is not None: break time.sleep(1.0) - self.log.debug(('waiting for file %s created ' + - 'at %f to grow to original size %u') - % (fn, original_ctime, original_sz_kb)) + self.log.debug( + ("waiting for file %s created " + "at %f to grow to original size %u") + % (fn, original_ctime, original_sz_kb) + ) while True: st = os.stat(fn) if st.st_size > original_sz_kb * self.BYTES_PER_KB: - raise SMFRunException(('asynchronously created replica ' + - 'in %s is %u bytes, ' + - 'larger than original %u KB') - % (fn, st.st_size, original_sz_kb)) + raise SMFRunException( + ( + "asynchronously created replica " + + "in %s is %u bytes, " + + "larger than original %u KB" + ) + % (fn, st.st_size, original_sz_kb) + ) elif st.st_size == original_sz_kb * self.BYTES_PER_KB: break self.op_starttime(starttime=original_ctime) @@ -1556,12 +1659,14 @@ def do_delete_renamed(self): def do_swift_get(self): if not xattr_installed: - raise SMFRunException('xattr module not present, ' + - 'getxattr and setxattr operations will not work') + raise SMFRunException( + "xattr module not present, " + + "getxattr and setxattr operations will not work" + ) l = self.log while self.do_another_file(): fn = self.mk_file_nm(self.src_dirs) - l.debug('swift_get fn %s ' % fn) + l.debug("swift_get fn %s " % fn) next_fsz = self.get_next_file_size() self.op_starttime() fd = os.open(fn, os.O_RDONLY | O_BINARY) @@ -1572,33 +1677,36 @@ def do_swift_get(self): while remaining_kb > 0: next_kb = min(rszkb, remaining_kb) rszbytes = next_kb * self.BYTES_PER_KB - l.debug('swift_get fd ' + - '%d next_fsz %u remain %u rszbytes %u ' % - (fd, next_fsz, remaining_kb, rszbytes)) + l.debug( + "swift_get fd " + + "%d next_fsz %u remain %u rszbytes %u " + % (fd, next_fsz, remaining_kb, rszbytes) + ) bytesread = os.read(fd, rszbytes) if len(bytesread) != rszbytes: - raise MFRdWrExc(self.opname, - self.filenum, - self.rq, - len(bytesread)) + raise MFRdWrExc( + self.opname, self.filenum, self.rq, len(bytesread) + ) if self.verify_read: if self.verbose: - l.debug('swift_get bytesread %u' % len(bytesread)) + l.debug("swift_get bytesread %u" % len(bytesread)) if self.buf[0:rszbytes] != bytesread: xpct_buf = self.buf[0:rszbytes] - l.debug('expect buf: ' + binary_buf_str(xpct_buf)) - l.debug('saw buf: ' + binary_buf_str(bytesread)) - raise MFRdWrExc('read: buffer contents wrong', - self.filenum, - self.rq, - len(bytesread)) + l.debug("expect buf: " + binary_buf_str(xpct_buf)) + l.debug("saw buf: " + binary_buf_str(bytesread)) + raise MFRdWrExc( + "read: buffer contents wrong", + self.filenum, + self.rq, + len(bytesread), + ) remaining_kb -= next_kb self.rq += 1 for j in range(0, self.xattr_count): try: - v = xattr.getxattr(fd, 'user.smallfile-all-%d' % j) + v = xattr.getxattr(fd, "user.smallfile-all-%d" % j) if self.verbose: - l.debug('xattr[%d] = %s' % (j, v)) + l.debug("xattr[%d] = %s" % (j, v)) except IOError as e: if e.errno != errno.ENODATA: raise e @@ -1609,14 +1717,12 @@ def do_swift_get(self): # this operation type tries to emulate what a Swift PUT request does def do_swift_put(self): - if not xattr_installed or \ - not fallocate_installed or \ - not fadvise_installed: - raise SMFRunException('one of necessary modules not available') + if not xattr_installed or not fallocate_installed or not fadvise_installed: + raise SMFRunException("one of necessary modules not available") l = self.log while self.do_another_file(): - fn = self.mk_file_nm(self.src_dirs) + '.tmp' + fn = self.mk_file_nm(self.src_dirs) + ".tmp" next_fsz = self.get_next_file_size() self.prepare_buf() self.op_starttime() @@ -1628,39 +1734,43 @@ def do_swift_put(self): # os.ftruncate(fd, fszbytes) ret = fallocate.fallocate(fd, 0, 0, fszbytes) if ret != OK: - raise SMFRunException('fallocate call returned %d' % ret) + raise SMFRunException("fallocate call returned %d" % ret) rszkb = self.get_record_size_to_use() remaining_kb = next_fsz while remaining_kb > 0: next_kb = min(rszkb, remaining_kb) rszbytes = next_kb * self.BYTES_PER_KB - l.debug('reading %d bytes' % rszbytes) + l.debug("reading %d bytes" % rszbytes) if rszbytes != len(self.buf): - l.debug('swift put self.buf: ' + - binary_buf_str(self.buf[0:rszbytes])) + l.debug( + "swift put self.buf: " + + binary_buf_str(self.buf[0:rszbytes]) + ) written = os.write(fd, self.buf[0:rszbytes]) else: - l.debug('swift put entire self.buf: ' + - binary_buf_str(self.buf[0:rszbytes])) + l.debug( + "swift put entire self.buf: " + + binary_buf_str(self.buf[0:rszbytes]) + ) written = os.write(fd, self.buf[:]) if written != rszbytes: - l.error('written byte count ' + - '%u not correct byte count %u' % - (written, rszbytes)) - raise MFRdWrExc(self.opname, self.filenum, - self.rq, written) + l.error( + "written byte count " + + "%u not correct byte count %u" % (written, rszbytes) + ) + raise MFRdWrExc(self.opname, self.filenum, self.rq, written) remaining_kb -= next_kb for j in range(0, self.xattr_count): - xattr_nm = 'user.smallfile-all-%d' % j + xattr_nm = "user.smallfile-all-%d" % j try: v = xattr.getxattr(fd, xattr_nm) except IOError as e: if e.errno != errno.ENODATA: raise e - l.error('xattr %s does not exist' % xattr_nm) + l.error("xattr %s does not exist" % xattr_nm) for j in range(0, self.xattr_count): - xattr_nm = 'user.smallfile-all-%d' % j - v = binary_buf_str(self.buf[j:self.xattr_size + j]) + xattr_nm = "user.smallfile-all-%d" % j + v = binary_buf_str(self.buf[j : self.xattr_size + j]) xattr.setxattr(fd, xattr_nm, v) # l.debug('xattr ' + xattr_nm + ' set to ' + v) @@ -1687,12 +1797,12 @@ def do_swift_put(self): except Exception as e: ensure_deleted(fn) if self.verbose: - print('exception on %s' % fn) + print("exception on %s" % fn) raise e finally: if fd > -1: os.close(fd) - self.op_endtime('swift-put') + self.op_endtime("swift-put") # unlike other ops, cleanup must always finish regardless of other threads @@ -1702,7 +1812,7 @@ def do_cleanup(self): save_finish = self.finish_all_rq self.finish_all_rq = True while self.do_another_file(): - sym = self.mk_file_nm(self.dest_dirs) + '.s' + sym = self.mk_file_nm(self.dest_dirs) + ".s" ensure_deleted(sym) basenm = self.mk_file_nm(self.src_dirs) fn = basenm @@ -1713,7 +1823,7 @@ def do_cleanup(self): ensure_deleted(fn) fn = basenm + self.rename_suffix ensure_deleted(fn) - dir = basenm + '.d' + dir = basenm + ".d" if os.path.exists(dir): os.rmdir(dir) self.clean_all_subdirs() @@ -1721,8 +1831,16 @@ def do_cleanup(self): self.finish_all_rq = save_finish if self.cleanup_delay_usec_per_file > 0: total_threads = self.threads * self.total_hosts - total_sleep_time = self.cleanup_delay_usec_per_file * self.iterations * total_threads / USEC_PER_SEC - self.log.info('waiting %f sec to give storage time to recycle deleted files' % total_sleep_time) + total_sleep_time = ( + self.cleanup_delay_usec_per_file + * self.iterations + * total_threads + / USEC_PER_SEC + ) + self.log.info( + "waiting %f sec to give storage time to recycle deleted files" + % total_sleep_time + ) time.sleep(total_sleep_time) def do_workload(self): @@ -1730,16 +1848,17 @@ def do_workload(self): for j in range(0, self.iterations + self.files_per_dir): self.file_dirs.append(self.mk_dir_name(j)) self.start_log() - self.log.info('do_workload: ' + str(self)) + self.log.info("do_workload: " + str(self)) ensure_dir_exists(self.network_dir) - if ['create', 'mkdir', 'swift-put'].__contains__(self.opname): + if ["create", "mkdir", "swift-put"].__contains__(self.opname): self.make_all_subdirs() # create_biggest_buf() depends on init_random_seed() self.init_random_seed() self.biggest_buf = self.create_biggest_buf(False) if self.total_sz_kb > 0: - self.files_between_checks = \ - max(10, int(self.max_files_between_checks - self.total_sz_kb / 100)) + self.files_between_checks = max( + 10, int(self.max_files_between_checks - self.total_sz_kb / 100) + ) try: self.wait_for_gate() self.start_time = time.time() @@ -1747,27 +1866,26 @@ def do_workload(self): func = SmallfileWorkload.workloads[o] func(self) # call the do_ function for that workload type except KeyError as e: - self.log.error('invalid workload type ' + o) + self.log.error("invalid workload type " + o) self.status = e.ENOKEY except KeyboardInterrupt as e: - self.log.error('control-C (SIGINT) signal received, ending test') + self.log.error("control-C (SIGINT) signal received, ending test") self.status = e.EINTR except OSError as e: self.status = e.errno - self.log.error('OSError status %d seen' % e.errno) + self.log.error("OSError status %d seen" % e.errno) self.log.exception(e) except MFRdWrExc as e: self.status = errno.EIO - self.log.error('MFRdWrExc seen') + self.log.error("MFRdWrExc seen") self.log.exception(e) if self.measure_rsptimes: self.save_rsptimes() if self.status != ok: - self.log.error('invocation did not complete cleanly') + self.log.error("invocation did not complete cleanly") if self.filenum != self.iterations: - self.log.info('recorded throughput after ' - + str(self.filenum) + ' files') - self.log.info('finished %s' % self.opname) + self.log.info("recorded throughput after " + str(self.filenum) + " files") + self.log.info("finished %s" % self.opname) # this next call works fine with python 2.7 # but not with python 2.6, why? do we need it? # logging.shutdown() @@ -1778,41 +1896,42 @@ def do_workload(self): # by workload name in this dictionary (hash table) workloads = { - 'create': do_create, - 'delete': do_delete, - 'symlink': do_symlink, - 'mkdir': do_mkdir, - 'rmdir': do_rmdir, - 'readdir': do_readdir, - 'ls-l': do_ls_l, - 'stat': do_stat, - 'getxattr': do_getxattr, - 'setxattr': do_setxattr, - 'chmod': do_chmod, - 'append': do_append, - 'overwrite': do_overwrite, - 'truncate-overwrite': do_truncate_overwrite, - 'read': do_read, - 'rename': do_rename, - 'delete-renamed': do_delete_renamed, - 'cleanup': do_cleanup, - 'swift-put': do_swift_put, - 'swift-get': do_swift_get, - 'await-create': do_await_create, - } + "create": do_create, + "delete": do_delete, + "symlink": do_symlink, + "mkdir": do_mkdir, + "rmdir": do_rmdir, + "readdir": do_readdir, + "ls-l": do_ls_l, + "stat": do_stat, + "getxattr": do_getxattr, + "setxattr": do_setxattr, + "chmod": do_chmod, + "append": do_append, + "overwrite": do_overwrite, + "truncate-overwrite": do_truncate_overwrite, + "read": do_read, + "rename": do_rename, + "delete-renamed": do_delete_renamed, + "cleanup": do_cleanup, + "swift-put": do_swift_put, + "swift-get": do_swift_get, + "await-create": do_await_create, + } # threads used to do multi-threaded unit testing -class TestThread(threading.Thread): +class TestThread(threading.Thread): def __init__(self, my_invocation, my_name): threading.Thread.__init__(self, name=my_name) self.invocation = my_invocation def __str__(self): - return 'TestThread ' + str(self.invocation) + ' ' + \ - threading.Thread.__str__(self) + return ( + "TestThread " + str(self.invocation) + " " + threading.Thread.__str__(self) + ) def run(self): try: @@ -1830,464 +1949,471 @@ def run(self): ok = 0 if unittest_module: - class Test(unittest_module.TestCase): - - - # run before every test - def setUp(self): - self.invok = SmallfileWorkload() - self.invok.opname = 'create' - self.invok.iterations = 50 - self.invok.files_per_dir = 5 - self.invok.dirs_per_dir = 2 - self.invok.verbose = True - self.invok.prefix = 'p' - self.invok.suffix = 's' - self.invok.tid = 'regtest' - self.invok.finish_all_rq = True - self.deltree(self.invok.network_dir) - ensure_dir_exists(self.invok.network_dir) - - def deltree(self, topdir): - if not os.path.exists(topdir): - return - if not os.path.isdir(topdir): - return - for (dir, subdirs, files) in os.walk(topdir, topdown=False): - for f in files: - os.unlink(join(dir, f)) - for d in subdirs: - os.rmdir(join(dir, d)) - os.rmdir(topdir) - - def chk_status(self): - if self.invok.status != ok: - raise SMFRunException('test failed, check log file %s' - % self.invok.log_fn()) - - def runTest(self, opName): - ensure_deleted(self.invok.stonewall_fn()) - self.invok.opname = opName - self.invok.do_workload() - self.chk_status() - - def file_size(self, fn): - st = os.stat(fn) - return st.st_size - - def checkDirEmpty(self, emptyDir): - self.assertTrue(os.listdir(emptyDir) == []) - - def lastFileNameInTest(self, tree): - return self.invok.mk_file_nm(tree, self.invok.filenum - 1) - - def checkDirListEmpty(self, emptyDirList): - for d in emptyDirList: - if exists(d): - assert os.listdir(d) == [] - - def cleanup_files(self): - self.runTest('cleanup') - - def mk_files(self): - self.cleanup_files() - self.runTest('create') - lastfn = self.lastFileNameInTest(self.invok.src_dirs) - self.assertTrue(exists(lastfn)) - assert os.path.getsize(lastfn) == \ - self.invok.total_sz_kb * self.invok.BYTES_PER_KB - - def test1_recreate_src_dest_dirs(self): - for s in self.invok.src_dirs: - self.deltree(s) - os.mkdir(s) - for s in self.invok.dest_dirs: - self.deltree(s) - os.mkdir(s) - - def test_a_MkFn(self): - self.mk_files() - ivk = self.invok - fn = ivk.mk_file_nm(ivk.src_dirs, 1) - lastfn = ivk.mk_file_nm(ivk.src_dirs, ivk.iterations) - - expectedFn = join(join(self.invok.src_dirs[0], 'd_000'), - ivk.prefix + '_' + - ivk.onhost + '_' + - ivk.tid + '_1_' + - ivk.suffix) - self.assertTrue(fn == expectedFn) - self.assertTrue(exists(fn)) - self.assertTrue(exists(lastfn)) - self.assertTrue(ivk.filenum == ivk.iterations) - os.unlink(fn) - self.assertTrue(not exists(fn)) - - def test_b_Cleanup(self): - self.cleanup_files() - - def test_c_Create(self): - self.mk_files() # depends on cleanup_files - fn = self.lastFileNameInTest(self.invok.src_dirs) - assert exists(fn) - self.cleanup_files() - - def test_c1_Mkdir(self): - self.cleanup_files() - self.runTest('mkdir') - last_dir = self.lastFileNameInTest(self.invok.src_dirs) + '.d' - self.assertTrue(exists(last_dir)) - self.cleanup_files() - - def test_c2_Rmdir(self): - self.cleanup_files() - self.runTest('mkdir') - last_dir = self.lastFileNameInTest(self.invok.src_dirs) + '.d' - self.assertTrue(exists(last_dir)) - self.runTest('rmdir') - self.assertTrue(not exists(last_dir)) - self.cleanup_files() - - def test_c3_Symlink(self): - if is_windows_os: - return - self.mk_files() - self.runTest('symlink') - lastSymlinkFile = self.lastFileNameInTest(self.invok.dest_dirs) - lastSymlinkFile += '.s' - self.assertTrue(exists(lastSymlinkFile)) - self.cleanup_files() - - def test_c4_Stat(self): - self.mk_files() - self.runTest('stat') - self.cleanup_files() - - def test_c44_Readdir(self): - self.invok.iterations = 50 - self.invok.files_per_dir = 5 - self.invok.dirs_per_dir = 2 - self.mk_files() - self.runTest('readdir') - self.cleanup_files() - - def test_c44a_Readdir_bigdir(self): - self.invok.iterations = 5000 - self.invok.files_per_dir = 1000 - self.invok.dirs_per_dir = 2 - self.mk_files() - self.runTest('readdir') - self.cleanup_files() - - def test_c45_Ls_l(self): - self.mk_files() - self.runTest('ls-l') - self.cleanup_files() - - def test_c5_Chmod(self): - self.mk_files() - self.runTest('chmod') - self.cleanup_files() - - def test_c6_xattr(self): - if xattr_installed: + + class Test(unittest_module.TestCase): + + # run before every test + def setUp(self): + self.invok = SmallfileWorkload() + self.invok.opname = "create" + self.invok.iterations = 50 + self.invok.files_per_dir = 5 + self.invok.dirs_per_dir = 2 + self.invok.verbose = True + self.invok.prefix = "p" + self.invok.suffix = "s" + self.invok.tid = "regtest" + self.invok.finish_all_rq = True + self.deltree(self.invok.network_dir) + ensure_dir_exists(self.invok.network_dir) + + def deltree(self, topdir): + if not os.path.exists(topdir): + return + if not os.path.isdir(topdir): + return + for (dir, subdirs, files) in os.walk(topdir, topdown=False): + for f in files: + os.unlink(join(dir, f)) + for d in subdirs: + os.rmdir(join(dir, d)) + os.rmdir(topdir) + + def chk_status(self): + if self.invok.status != ok: + raise SMFRunException( + "test failed, check log file %s" % self.invok.log_fn() + ) + + def runTest(self, opName): + ensure_deleted(self.invok.stonewall_fn()) + self.invok.opname = opName + self.invok.do_workload() + self.chk_status() + + def file_size(self, fn): + st = os.stat(fn) + return st.st_size + + def checkDirEmpty(self, emptyDir): + self.assertTrue(os.listdir(emptyDir) == []) + + def lastFileNameInTest(self, tree): + return self.invok.mk_file_nm(tree, self.invok.filenum - 1) + + def checkDirListEmpty(self, emptyDirList): + for d in emptyDirList: + if exists(d): + assert os.listdir(d) == [] + + def cleanup_files(self): + self.runTest("cleanup") + + def mk_files(self): + self.cleanup_files() + self.runTest("create") + lastfn = self.lastFileNameInTest(self.invok.src_dirs) + self.assertTrue(exists(lastfn)) + assert ( + os.path.getsize(lastfn) + == self.invok.total_sz_kb * self.invok.BYTES_PER_KB + ) + + def test1_recreate_src_dest_dirs(self): + for s in self.invok.src_dirs: + self.deltree(s) + os.mkdir(s) + for s in self.invok.dest_dirs: + self.deltree(s) + os.mkdir(s) + + def test_a_MkFn(self): self.mk_files() - self.fsync = True - self.xattr_size = 256 - self.xattr_count = 10 - self.runTest('setxattr') - self.runTest('getxattr') + ivk = self.invok + fn = ivk.mk_file_nm(ivk.src_dirs, 1) + lastfn = ivk.mk_file_nm(ivk.src_dirs, ivk.iterations) + + expectedFn = join( + join(self.invok.src_dirs[0], "d_000"), + ivk.prefix + "_" + ivk.onhost + "_" + ivk.tid + "_1_" + ivk.suffix, + ) + self.assertTrue(fn == expectedFn) + self.assertTrue(exists(fn)) + self.assertTrue(exists(lastfn)) + self.assertTrue(ivk.filenum == ivk.iterations) + os.unlink(fn) + self.assertTrue(not exists(fn)) + + def test_b_Cleanup(self): self.cleanup_files() - def test_d_Delete(self): - self.invok.measure_rsptimes = True - self.mk_files() - lastFn = self.lastFileNameInTest(self.invok.src_dirs) - self.runTest('delete') - self.assertTrue(not exists(lastFn)) - self.cleanup_files() - - def test_e_Rename(self): - self.invok.measure_rsptimes = False - self.mk_files() - self.runTest('rename') - fn = self.invok.mk_file_nm(self.invok.dest_dirs) - self.assertTrue(exists(fn)) - self.cleanup_files() - - def test_f_DeleteRenamed(self): - self.mk_files() - self.runTest('rename') - self.runTest('delete-renamed') - lastfn = self.invok.mk_file_nm(self.invok.dest_dirs) - # won't delete any files or directories that contain them - self.assertTrue(not exists(lastfn)) - self.cleanup_files() - - def test_g0_Overwrite(self): - self.mk_files() - orig_kb = self.invok.total_sz_kb - self.runTest('overwrite') - fn = self.lastFileNameInTest(self.invok.src_dirs) - self.assertTrue(self.file_size(fn) == orig_kb - * self.invok.BYTES_PER_KB) - self.cleanup_files() - - def test_g1_Append(self): - self.mk_files() - orig_kb = self.invok.total_sz_kb - self.invok.total_sz_kb *= 2 - self.runTest('append') - fn = self.lastFileNameInTest(self.invok.src_dirs) - self.assertTrue(self.file_size(fn) == 3 * orig_kb - * self.invok.BYTES_PER_KB) - self.cleanup_files() - - def test_g2_Append_Rsz_0_big_file(self): - self.mk_files() - orig_kb = self.invok.total_sz_kb - self.invok.total_sz_kb = 2048 - # boundary condition where we want record size < max buffer space - self.invok.record_sz_kb = 0 - self.runTest('append') - fn = self.lastFileNameInTest(self.invok.src_dirs) - self.assertTrue(self.file_size(fn) == (orig_kb + 2048) - * self.invok.BYTES_PER_KB) - self.cleanup_files() - - def test_h00_read(self): - if not xattr_installed: - return - self.invok.record_ctime_size = True - self.mk_files() - self.invok.verify_read = True - self.runTest('read') + def test_c_Create(self): + self.mk_files() # depends on cleanup_files + fn = self.lastFileNameInTest(self.invok.src_dirs) + assert exists(fn) + self.cleanup_files() - # this test inherits files from preceding test + def test_c1_Mkdir(self): + self.cleanup_files() + self.runTest("mkdir") + last_dir = self.lastFileNameInTest(self.invok.src_dirs) + ".d" + self.assertTrue(exists(last_dir)) + self.cleanup_files() - def test_h0_await_create(self): - if not xattr_installed: - return - self.runTest('await-create') - - def test_h1_Read_Rsz_0_big_file(self): - self.test_g2_Append_Rsz_0_big_file() - ivk = self.invok - ivk.total_sz_kb = 2048 - ivk.iterations = 5 - # boundary condition where we want record size < max buffer space - ivk.record_sz_kb = 0 - self.mk_files() - self.verify_read = True - self.runTest('read') - self.assertTrue(ivk.total_sz_kb * ivk.BYTES_PER_KB - > ivk.biggest_buf_size) - expected_reads_per_file = ivk.total_sz_kb * ivk.BYTES_PER_KB \ - // ivk.biggest_buf_size - self.assertTrue(ivk.rq == ivk.iterations - * expected_reads_per_file) - self.cleanup_files() - - def test_h2_read_bad_data(self): - self.mk_files() - self.invok.verify_read = True - fn = self.lastFileNameInTest(self.invok.src_dirs) - fd = os.open(fn, os.O_WRONLY | O_BINARY) - os.lseek(fd, 5, os.SEEK_SET) - - os.write(fd, b'!') - - os.close(fd) - try: - self.runTest('read') - except MFRdWrExc: - pass - except SMFRunException: - pass - self.assertTrue(self.invok.status != ok) - self.cleanup_files() - - def common_z_params(self): - self.invok.filesize_distr = self.invok.fsdistr_random_exponential - self.invok.incompressible = True - self.invok.verify_read = True - self.invok.pause_between_files = 50 - self.invok.iterations = 300 - self.invok.record_sz_kb = 1 - self.invok.total_sz_kb = 4 - - def test_z1_create(self): - self.common_z_params() - self.cleanup_files() - self.runTest('create') - - # test_z2_read inherits files from the z1_create test - # to inherit files, you must establish same test parameters as before - - def test_z2_read(self): - self.common_z_params() - self.runTest('read') - - # inherits files from the z1_create test - - def test_z3_append(self): - self.common_z_params() - self.runTest('append') - self.cleanup_files() - - # test read verification without incompressible true - - def test_y_read_verify_incompressible_false(self): - self.invok.incompressible = False - self.invok.verify_read = True - self.invok.finish_all_rq = True - self.invok.iterations = 300 - self.invok.record_sz_kb = 1 - self.invok.total_sz_kb = 4 - self.mk_files() - self.runTest('read') - - def test_y2_cleanup(self): - self.invok.incompressible = False - self.invok.verify_read = True - self.invok.finish_all_rq = True - self.invok.iterations = 300 - self.invok.record_sz_kb = 1 - self.invok.total_sz_kb = 4 - self.cleanup_files() - - def common_swift_params(self): - self.invok.invocations = 10 - self.invok.record_sz_kb = 5 - self.invok.total_sz_kb = 64 - self.invok.xattr_size = 128 - self.invok.xattr_count = 2 - self.invok.fsync = True - self.invok.filesize_distr = self.invok.fsdistr_random_exponential - - def test_i1_do_swift_put(self): - if not xattr_installed: - return - self.common_swift_params() - self.cleanup_files() - self.runTest('swift-put') + def test_c2_Rmdir(self): + self.cleanup_files() + self.runTest("mkdir") + last_dir = self.lastFileNameInTest(self.invok.src_dirs) + ".d" + self.assertTrue(exists(last_dir)) + self.runTest("rmdir") + self.assertTrue(not exists(last_dir)) + self.cleanup_files() - # swift_get inherits files from the i1_do_swift_put test + def test_c3_Symlink(self): + if is_windows_os: + return + self.mk_files() + self.runTest("symlink") + lastSymlinkFile = self.lastFileNameInTest(self.invok.dest_dirs) + lastSymlinkFile += ".s" + self.assertTrue(exists(lastSymlinkFile)) + self.cleanup_files() - def test_i2_do_swift_get(self): - if not xattr_installed: - return - self.common_swift_params() - self.cleanup_files() - - def test_j0_dir_name(self): - self.invok.files_per_dir = 20 - self.invok.dirs_per_dir = 3 - d = self.invok.mk_dir_name(29 * self.invok.files_per_dir) - expected = join('d_001', join('d_000', join('d_000', 'd_002'))) - self.assertTrue(d == expected) - self.invok.dirs_per_dir = 7 - d = self.invok.mk_dir_name(320 * self.invok.files_per_dir) - expected = join(join('d_006', 'd_003'), 'd_005') - self.assertTrue(d == expected) - - def test_j1_deep_tree(self): - self.invok.total_sz_kb = 0 - self.invok.record_sz_kb = 0 - self.invok.files_per_dir = 10 - self.invok.dirs_per_dir = 3 - self.invok.iterations = 200 - self.invok.prefix = '' - self.invok.suffix = 'deep' - self.mk_files() - self.assertTrue(exists(self.lastFileNameInTest(self.invok.src_dirs))) - self.cleanup_files() - - def test_j1a_pause(self): - self.invok.iterations = 2000 - self.invok.pause_between_files = 0 - self.invok.total_hosts = 10 - self.invok.auto_pause = True - self.mk_files() - self.cleanup_files() - - def test_j2_deep_hashed_tree(self): - self.invok.suffix = 'deep_hashed' - self.invok.total_sz_kb = 0 - self.invok.record_sz_kb = 0 - self.invok.files_per_dir = 5 - self.invok.dirs_per_dir = 4 - self.invok.iterations = 500 - self.invok.hash_to_dir = True - self.mk_files() - fn = self.lastFileNameInTest(self.invok.src_dirs) - expectedFn = os.sep.join([self.invok.src_dirs[0], 'h_001', - 'h_000', 'h_001', - 'p_%s_regtest_499_deep_hashed' % self.invok.onhost]) - self.assertTrue(fn == expectedFn) - self.assertTrue(exists(fn)) - self.cleanup_files() - - def test_z_multithr_stonewall(self): - self.invok.verbose = True - self.invok.stonewall = True - self.invok.finish = True - self.invok.prefix = 'thr_' - self.invok.suffix = 'foo' - self.invok.iterations = 400 - self.invok.files_per_dir = 10 - self.invok.dirs_per_dir = 3 - sgate_file = join(self.invok.network_dir, 'starting_gate.tmp') - self.invok.starting_gate = sgate_file - thread_ready_timeout = 4 - thread_count = 4 - self.test1_recreate_src_dest_dirs() - self.checkDirListEmpty(self.invok.src_dirs) - self.checkDirListEmpty(self.invok.dest_dirs) - self.checkDirEmpty(self.invok.network_dir) - invokeList = [] - for j in range(0, thread_count): - s = copy.copy(self.invok) # test copy constructor - s.tid = str(j) - s.src_dirs = [join(d, 'thrd_' + s.tid) for d in s.src_dirs] - s.dest_dirs = [join(d, 'thrd_' + s.tid) for d in - s.dest_dirs] - invokeList.append(s) - threadList = [] - for s in invokeList: - ensure_deleted(s.gen_thread_ready_fname(s.tid)) - threadList.append(TestThread(s, s.prefix + s.tid)) - for t in threadList: - t.start() - time.sleep(0.3) - threads_ready = True # define scope outside loop - for i in range(0, thread_ready_timeout): - threads_ready = True + def test_c4_Stat(self): + self.mk_files() + self.runTest("stat") + self.cleanup_files() + + def test_c44_Readdir(self): + self.invok.iterations = 50 + self.invok.files_per_dir = 5 + self.invok.dirs_per_dir = 2 + self.mk_files() + self.runTest("readdir") + self.cleanup_files() + + def test_c44a_Readdir_bigdir(self): + self.invok.iterations = 5000 + self.invok.files_per_dir = 1000 + self.invok.dirs_per_dir = 2 + self.mk_files() + self.runTest("readdir") + self.cleanup_files() + + def test_c45_Ls_l(self): + self.mk_files() + self.runTest("ls-l") + self.cleanup_files() + + def test_c5_Chmod(self): + self.mk_files() + self.runTest("chmod") + self.cleanup_files() + + def test_c6_xattr(self): + if xattr_installed: + self.mk_files() + self.fsync = True + self.xattr_size = 256 + self.xattr_count = 10 + self.runTest("setxattr") + self.runTest("getxattr") + self.cleanup_files() + + def test_d_Delete(self): + self.invok.measure_rsptimes = True + self.mk_files() + lastFn = self.lastFileNameInTest(self.invok.src_dirs) + self.runTest("delete") + self.assertTrue(not exists(lastFn)) + self.cleanup_files() + + def test_e_Rename(self): + self.invok.measure_rsptimes = False + self.mk_files() + self.runTest("rename") + fn = self.invok.mk_file_nm(self.invok.dest_dirs) + self.assertTrue(exists(fn)) + self.cleanup_files() + + def test_f_DeleteRenamed(self): + self.mk_files() + self.runTest("rename") + self.runTest("delete-renamed") + lastfn = self.invok.mk_file_nm(self.invok.dest_dirs) + # won't delete any files or directories that contain them + self.assertTrue(not exists(lastfn)) + self.cleanup_files() + + def test_g0_Overwrite(self): + self.mk_files() + orig_kb = self.invok.total_sz_kb + self.runTest("overwrite") + fn = self.lastFileNameInTest(self.invok.src_dirs) + self.assertTrue(self.file_size(fn) == orig_kb * self.invok.BYTES_PER_KB) + self.cleanup_files() + + def test_g1_Append(self): + self.mk_files() + orig_kb = self.invok.total_sz_kb + self.invok.total_sz_kb *= 2 + self.runTest("append") + fn = self.lastFileNameInTest(self.invok.src_dirs) + self.assertTrue(self.file_size(fn) == 3 * orig_kb * self.invok.BYTES_PER_KB) + self.cleanup_files() + + def test_g2_Append_Rsz_0_big_file(self): + self.mk_files() + orig_kb = self.invok.total_sz_kb + self.invok.total_sz_kb = 2048 + # boundary condition where we want record size < max buffer space + self.invok.record_sz_kb = 0 + self.runTest("append") + fn = self.lastFileNameInTest(self.invok.src_dirs) + self.assertTrue( + self.file_size(fn) == (orig_kb + 2048) * self.invok.BYTES_PER_KB + ) + self.cleanup_files() + + def test_h00_read(self): + if not xattr_installed: + return + self.invok.record_ctime_size = True + self.mk_files() + self.invok.verify_read = True + self.runTest("read") + + # this test inherits files from preceding test + + def test_h0_await_create(self): + if not xattr_installed: + return + self.runTest("await-create") + + def test_h1_Read_Rsz_0_big_file(self): + self.test_g2_Append_Rsz_0_big_file() + ivk = self.invok + ivk.total_sz_kb = 2048 + ivk.iterations = 5 + # boundary condition where we want record size < max buffer space + ivk.record_sz_kb = 0 + self.mk_files() + self.verify_read = True + self.runTest("read") + self.assertTrue(ivk.total_sz_kb * ivk.BYTES_PER_KB > ivk.biggest_buf_size) + expected_reads_per_file = ( + ivk.total_sz_kb * ivk.BYTES_PER_KB // ivk.biggest_buf_size + ) + self.assertTrue(ivk.rq == ivk.iterations * expected_reads_per_file) + self.cleanup_files() + + def test_h2_read_bad_data(self): + self.mk_files() + self.invok.verify_read = True + fn = self.lastFileNameInTest(self.invok.src_dirs) + fd = os.open(fn, os.O_WRONLY | O_BINARY) + os.lseek(fd, 5, os.SEEK_SET) + + os.write(fd, b"!") + + os.close(fd) + try: + self.runTest("read") + except MFRdWrExc: + pass + except SMFRunException: + pass + self.assertTrue(self.invok.status != ok) + self.cleanup_files() + + def common_z_params(self): + self.invok.filesize_distr = self.invok.fsdistr_random_exponential + self.invok.incompressible = True + self.invok.verify_read = True + self.invok.pause_between_files = 50 + self.invok.iterations = 300 + self.invok.record_sz_kb = 1 + self.invok.total_sz_kb = 4 + + def test_z1_create(self): + self.common_z_params() + self.cleanup_files() + self.runTest("create") + + # test_z2_read inherits files from the z1_create test + # to inherit files, you must establish same test parameters as before + + def test_z2_read(self): + self.common_z_params() + self.runTest("read") + + # inherits files from the z1_create test + + def test_z3_append(self): + self.common_z_params() + self.runTest("append") + self.cleanup_files() + + # test read verification without incompressible true + + def test_y_read_verify_incompressible_false(self): + self.invok.incompressible = False + self.invok.verify_read = True + self.invok.finish_all_rq = True + self.invok.iterations = 300 + self.invok.record_sz_kb = 1 + self.invok.total_sz_kb = 4 + self.mk_files() + self.runTest("read") + + def test_y2_cleanup(self): + self.invok.incompressible = False + self.invok.verify_read = True + self.invok.finish_all_rq = True + self.invok.iterations = 300 + self.invok.record_sz_kb = 1 + self.invok.total_sz_kb = 4 + self.cleanup_files() + + def common_swift_params(self): + self.invok.invocations = 10 + self.invok.record_sz_kb = 5 + self.invok.total_sz_kb = 64 + self.invok.xattr_size = 128 + self.invok.xattr_count = 2 + self.invok.fsync = True + self.invok.filesize_distr = self.invok.fsdistr_random_exponential + + def test_i1_do_swift_put(self): + if not xattr_installed: + return + self.common_swift_params() + self.cleanup_files() + self.runTest("swift-put") + + # swift_get inherits files from the i1_do_swift_put test + + def test_i2_do_swift_get(self): + if not xattr_installed: + return + self.common_swift_params() + self.cleanup_files() + + def test_j0_dir_name(self): + self.invok.files_per_dir = 20 + self.invok.dirs_per_dir = 3 + d = self.invok.mk_dir_name(29 * self.invok.files_per_dir) + expected = join("d_001", join("d_000", join("d_000", "d_002"))) + self.assertTrue(d == expected) + self.invok.dirs_per_dir = 7 + d = self.invok.mk_dir_name(320 * self.invok.files_per_dir) + expected = join(join("d_006", "d_003"), "d_005") + self.assertTrue(d == expected) + + def test_j1_deep_tree(self): + self.invok.total_sz_kb = 0 + self.invok.record_sz_kb = 0 + self.invok.files_per_dir = 10 + self.invok.dirs_per_dir = 3 + self.invok.iterations = 200 + self.invok.prefix = "" + self.invok.suffix = "deep" + self.mk_files() + self.assertTrue(exists(self.lastFileNameInTest(self.invok.src_dirs))) + self.cleanup_files() + + def test_j1a_pause(self): + self.invok.iterations = 2000 + self.invok.pause_between_files = 0 + self.invok.total_hosts = 10 + self.invok.auto_pause = True + self.mk_files() + self.cleanup_files() + + def test_j2_deep_hashed_tree(self): + self.invok.suffix = "deep_hashed" + self.invok.total_sz_kb = 0 + self.invok.record_sz_kb = 0 + self.invok.files_per_dir = 5 + self.invok.dirs_per_dir = 4 + self.invok.iterations = 500 + self.invok.hash_to_dir = True + self.mk_files() + fn = self.lastFileNameInTest(self.invok.src_dirs) + expectedFn = os.sep.join( + [ + self.invok.src_dirs[0], + "h_001", + "h_000", + "h_001", + "p_%s_regtest_499_deep_hashed" % self.invok.onhost, + ] + ) + self.assertTrue(fn == expectedFn) + self.assertTrue(exists(fn)) + self.cleanup_files() + + def test_z_multithr_stonewall(self): + self.invok.verbose = True + self.invok.stonewall = True + self.invok.finish = True + self.invok.prefix = "thr_" + self.invok.suffix = "foo" + self.invok.iterations = 400 + self.invok.files_per_dir = 10 + self.invok.dirs_per_dir = 3 + sgate_file = join(self.invok.network_dir, "starting_gate.tmp") + self.invok.starting_gate = sgate_file + thread_ready_timeout = 4 + thread_count = 4 + self.test1_recreate_src_dest_dirs() + self.checkDirListEmpty(self.invok.src_dirs) + self.checkDirListEmpty(self.invok.dest_dirs) + self.checkDirEmpty(self.invok.network_dir) + invokeList = [] + for j in range(0, thread_count): + s = copy.copy(self.invok) # test copy constructor + s.tid = str(j) + s.src_dirs = [join(d, "thrd_" + s.tid) for d in s.src_dirs] + s.dest_dirs = [join(d, "thrd_" + s.tid) for d in s.dest_dirs] + invokeList.append(s) + threadList = [] for s in invokeList: - thread_ready_file = s.gen_thread_ready_fname(s.tid) - if not os.path.exists(thread_ready_file): - threads_ready = False + ensure_deleted(s.gen_thread_ready_fname(s.tid)) + threadList.append(TestThread(s, s.prefix + s.tid)) + for t in threadList: + t.start() + time.sleep(0.3) + threads_ready = True # define scope outside loop + for i in range(0, thread_ready_timeout): + threads_ready = True + for s in invokeList: + thread_ready_file = s.gen_thread_ready_fname(s.tid) + if not os.path.exists(thread_ready_file): + threads_ready = False + break + if threads_ready: break - if threads_ready: - break - time.sleep(1.1) - if not threads_ready: - abort_test(self.invok.abort_fn(), threadList) + time.sleep(1.1) + if not threads_ready: + abort_test(self.invok.abort_fn(), threadList) + for t in threadList: + t.join(1.1) + raise SMFRunException( + "threads did not show up within %d seconds" % thread_ready_timeout + ) + touch(sgate_file) for t in threadList: - t.join(1.1) - raise SMFRunException('threads did not show up within %d seconds' - % thread_ready_timeout) - touch(sgate_file) - for t in threadList: - t.join() - if thrd_is_alive(t): - raise SMFRunException('thread join timeout:' + str(t)) - if t.invocation.status != ok: - raise SMFRunException('thread did not complete iterations: ' - + str(t)) + t.join() + if thrd_is_alive(t): + raise SMFRunException("thread join timeout:" + str(t)) + if t.invocation.status != ok: + raise SMFRunException( + "thread did not complete iterations: " + str(t) + ) # so you can just do "python smallfile.py" to test it -if __name__ == '__main__': +if __name__ == "__main__": run_unit_tests() diff --git a/smallfile_cli.py b/smallfile_cli.py index ba326dc..e094611 100755 --- a/smallfile_cli.py +++ b/smallfile_cli.py @@ -12,13 +12,13 @@ # ./smallfile_cli.py # -''' +""" smallfile_cli.py CLI user interface for generating metadata-intensive workloads Copyright 2012 -- Ben England Licensed under the Apache License at http://www.apache.org/licenses/LICENSE-2.0 See Appendix on this page for instructions pertaining to license. -''' +""" import sys import os @@ -49,6 +49,7 @@ # run a multi-host test + def run_multi_host_workload(prm): prm_host_set = prm.host_set @@ -58,19 +59,19 @@ def run_multi_host_workload(prm): starting_gate = master_invoke.starting_gate verbose = master_invoke.verbose - if os.getenv('PYPY'): - python_prog = os.getenv('PYPY') - elif sys.version.startswith('2'): - python_prog = 'python' - elif sys.version.startswith('3'): - python_prog = 'python3' + if os.getenv("PYPY"): + python_prog = os.getenv("PYPY") + elif sys.version.startswith("2"): + python_prog = "python" + elif sys.version.startswith("3"): + python_prog = "python3" else: - raise SMFRunException('unrecognized python version %s' % sys.version) + raise SMFRunException("unrecognized python version %s" % sys.version) # construct list of ssh threads to invoke in parallel master_invoke.create_top_dirs(True) - pickle_fn = os.path.join(prm.master_invoke.network_dir, 'param.pickle') + pickle_fn = os.path.join(prm.master_invoke.network_dir, "param.pickle") # if verbose: print('writing ' + pickle_fn)) @@ -82,28 +83,29 @@ def run_multi_host_workload(prm): host_ct = len(prm_host_set) for j in range(0, len(prm_host_set)): remote_host = prm_host_set[j] - smf_remote_pgm = os.path.join(prm.remote_pgm_dir, - 'smallfile_remote.py') - this_remote_cmd = '%s %s --network-sync-dir %s ' \ - % (python_prog, smf_remote_pgm, prm.master_invoke.network_dir) + smf_remote_pgm = os.path.join(prm.remote_pgm_dir, "smallfile_remote.py") + this_remote_cmd = "%s %s --network-sync-dir %s " % ( + python_prog, + smf_remote_pgm, + prm.master_invoke.network_dir, + ) # this_remote_cmd = remote_cmd if prm_permute_host_dirs: - this_remote_cmd += \ - ' --as-host %s' % prm_host_set[(j + 1) % host_ct] + this_remote_cmd += " --as-host %s" % prm_host_set[(j + 1) % host_ct] else: - this_remote_cmd += ' --as-host %s' % remote_host + this_remote_cmd += " --as-host %s" % remote_host if verbose: print(this_remote_cmd) if smallfile.is_windows_os or prm.launch_by_daemon: remote_thread_list.append( - launcher_thread.launcher_thread(prm, - remote_host, - this_remote_cmd)) + launcher_thread.launcher_thread(prm, remote_host, this_remote_cmd) + ) else: - remote_thread_list.append(ssh_thread.ssh_thread(remote_host, - this_remote_cmd)) + remote_thread_list.append( + ssh_thread.ssh_thread(remote_host, this_remote_cmd) + ) # start them @@ -136,15 +138,15 @@ def run_multi_host_workload(prm): # HACK to force directory entry coherency for Gluster ndirlist = os.listdir(master_invoke.network_dir) if master_invoke.verbose: - print('shared dir list: ' + str(ndirlist)) + print("shared dir list: " + str(ndirlist)) hosts_ready = True if os.path.exists(abortfn): - raise SMFRunException('worker host signaled abort') + raise SMFRunException("worker host signaled abort") for j in range(last_host_seen + 1, len(prm_host_set)): h = prm_host_set[j] fn = master_invoke.gen_host_ready_fname(h.strip()) if verbose: - print('checking for host filename ' + fn) + print("checking for host filename " + fn) if not os.path.exists(fn): hosts_ready = False break @@ -159,7 +161,7 @@ def run_multi_host_workload(prm): kill_remaining_threads = False for t in remote_thread_list: if not smallfile.thrd_is_alive(t): - print('thread %s on host %s has died' % (t, str(h))) + print("thread %s on host %s has died" % (t, str(h))) kill_remaining_threads = True break if kill_remaining_threads: @@ -173,26 +175,28 @@ def run_multi_host_workload(prm): sec += sec_delta sec_delta += 1 if verbose: - print('last_host_seen=%d sec=%d' % (last_host_seen, sec)) + print("last_host_seen=%d sec=%d" % (last_host_seen, sec)) except KeyboardInterrupt as e: - print('saw SIGINT signal, aborting test') + print("saw SIGINT signal, aborting test") exception_seen = e hosts_ready = False except Exception as e: exception_seen = e hosts_ready = False - print('saw exception %s, aborting test' % str(e)) + print("saw exception %s, aborting test" % str(e)) if not hosts_ready: smallfile.abort_test(abortfn, []) if h != None: - print('ERROR: host %s did not reach starting gate' % h) + print("ERROR: host %s did not reach starting gate" % h) else: - print('no host reached starting gate') + print("no host reached starting gate") if not exception_seen: - raise SMFRunException('hosts did not reach starting gate ' + - 'within %d seconds' % host_timeout) + raise SMFRunException( + "hosts did not reach starting gate " + + "within %d seconds" % host_timeout + ) else: - print('saw exception %s, aborting test' % str(exception_seen)) + print("saw exception %s, aborting test" % str(exception_seen)) sys.exit(NOTOK) else: @@ -200,20 +204,23 @@ def run_multi_host_workload(prm): # this is like firing the gun at the track meet try: - sync_files.write_sync_file(starting_gate, 'hi') + sync_files.write_sync_file(starting_gate, "hi") prm.test_start_time = time.time() - print('starting all threads by creating starting gate file %s' % - starting_gate) + print( + "starting all threads by creating starting gate file %s" % starting_gate + ) except IOError as e: - print('error writing starting gate: %s' % os.strerror(e.errno)) + print("error writing starting gate: %s" % os.strerror(e.errno)) # wait for them to finish for t in remote_thread_list: t.join() if t.status != OK: - print('ERROR: ssh thread for host %s completed with status %d' % - (t.remote_host, t.status)) + print( + "ERROR: ssh thread for host %s completed with status %d" + % (t.remote_host, t.status) + ) # attempt to aggregate results by reading pickle files # containing SmallfileWorkload instances @@ -230,7 +237,7 @@ def run_multi_host_workload(prm): pickle_fn = master_invoke.host_result_filename(h) if verbose: - print('reading pickle file: %s' % pickle_fn) + print("reading pickle file: %s" % pickle_fn) host_invoke_list = [] try: if one_shot_delay and not os.path.exists(pickle_fn): @@ -241,29 +248,30 @@ def run_multi_host_workload(prm): time.sleep(1.2) one_shot_delay = False - with open(pickle_fn, 'rb') as pickle_file: + with open(pickle_fn, "rb") as pickle_file: host_invoke_list = pickle.load(pickle_file) if verbose: - print(' read %d invoke objects' % len(host_invoke_list)) + print(" read %d invoke objects" % len(host_invoke_list)) invoke_list.extend(host_invoke_list) ensure_deleted(pickle_fn) except IOError as e: if e.errno != errno.ENOENT: raise e - print(' pickle file %s not found' % pickle_fn) + print(" pickle file %s not found" % pickle_fn) output_results.output_results(invoke_list, prm) all_ok = OK except IOError as e: - print('host %s filename %s: %s' % (h, pickle_fn, str(e))) + print("host %s filename %s: %s" % (h, pickle_fn, str(e))) except KeyboardInterrupt as e: - print('control-C signal seen (SIGINT)') + print("control-C signal seen (SIGINT)") sys.exit(all_ok) # main routine that does everything for this workload + def run_workload(): # if a --host-set parameter was passed, @@ -274,8 +282,8 @@ def run_workload(): try: params = parse.parse() except parse.SmfParseException as e: - print('ERROR: ' + str(e)) - print('use --help option to get CLI syntax') + print("ERROR: " + str(e)) + print("use --help option to get CLI syntax") sys.exit(NOTOK) # for multi-host test @@ -290,5 +298,5 @@ def run_workload(): # must be moved to within a routine unless it's trivial (like constants) # because windows doesn't support fork(). -if __name__ == '__main__': +if __name__ == "__main__": run_workload() diff --git a/smallfile_remote.py b/smallfile_remote.py index fed2d12..5b89d33 100755 --- a/smallfile_remote.py +++ b/smallfile_remote.py @@ -13,13 +13,13 @@ # smallfile modules -''' +""" smallfile_cli.py CLI user interface for generating metadata-intensive workloads Copyright 2012 -- Ben England Licensed under the Apache License at http://www.apache.org/licenses/LICENSE-2.0 See Appendix on this page for instructions pertaining to license. -''' +""" import multi_thread_workload import parse_slave @@ -34,7 +34,7 @@ def run_workload(): params = parse_slave.parse() if params.master_invoke.verbose: - print('slave params: %s' % str(params)) + print("slave params: %s" % str(params)) return multi_thread_workload.run_multi_thread_workload(params) @@ -43,5 +43,5 @@ def run_workload(): # must be moved to within a routine unless it's trivial (like constants) # because windows doesn't support fork(). -if __name__ == '__main__': +if __name__ == "__main__": run_workload() diff --git a/smallfile_rsptimes_stats.py b/smallfile_rsptimes_stats.py index 19e4988..0bab184 100755 --- a/smallfile_rsptimes_stats.py +++ b/smallfile_rsptimes_stats.py @@ -1,15 +1,15 @@ #!/usr/bin/env python3 # # smallfile_rsptimes_stats.py -- python program to reduce response time sample data from smallfile benchmark to -# statistics. +# statistics. # # in addition to stats for individual thread, it shows per-client and cluster-wide stats -# smallfile at present produces response time data in the /var/tmp/ directory +# smallfile at present produces response time data in the /var/tmp/ directory # within each workload generator # it is the user's responsibility to copy the data back # to a directory (on the test driver perhaps). -# this means that the files from each workload generator have to have -# the workload generator hostname embedded in them +# this means that the files from each workload generator have to have +# the workload generator hostname embedded in them # so that they can all be co-located in a single directory. # since there is no standard method for this yet, # this program has to be adjusted to parse the filenames @@ -23,8 +23,8 @@ # time since start of test (like before). The start time as # seconds since the epoch (1970) can be obtained from the JSON # output in the 'start-time' field. -# -# +# +# import sys from sys import argv import os @@ -36,56 +36,62 @@ from scipy.stats import tmean, tstd import bisect -time_infinity = 1<<62 +time_infinity = 1 << 62 # edit this list if you want additional percentiles -percentiles = [ 50, 90, 95, 99 ] +percentiles = [50, 90, 95, 99] min_rsptime_samples = 5 start_time = 0.0 -def usage( msg ): - print('ERROR: %s' % msg) - print('usage: python smallfile_rsptimes_stats.py ') - print(' [ --common-hostname-suffix my.suffix ] ') - print(' [ --time-interval positive-integer-seconds ] ') - print(' [ --start-time seconds-since-1970 ] ') - print(' directory' ) - sys.exit(1) + +def usage(msg): + print("ERROR: %s" % msg) + print("usage: python smallfile_rsptimes_stats.py ") + print(" [ --common-hostname-suffix my.suffix ] ") + print(" [ --time-interval positive-integer-seconds ] ") + print(" [ --start-time seconds-since-1970 ] ") + print(" directory") + sys.exit(1) + # parse files once, we assume here that we can hold them in RAM # so we don't have to keep reading them # by keeping them in RAM we allow binary search for starting # time since we want to isolate set of samples in a time interval -def parse_rsptime_file( result_dir, csv_pathname ): + +def parse_rsptime_file(result_dir, csv_pathname): samples = [] - with open(os.path.join(result_dir, csv_pathname), 'r') as f: - records = [ l.strip() for l in f.readlines() ] + with open(os.path.join(result_dir, csv_pathname), "r") as f: + records = [l.strip() for l in f.readlines()] for sample in records: - components = sample.split(',') + components = sample.split(",") op = components[0] at_time = float(components[1]) if start_time > 0: at_time += start_time rsp_time = float(components[2]) - samples.append( (op, at_time, rsp_time) ) + samples.append((op, at_time, rsp_time)) return samples # to be used for sorting based on tuple components -def get_at_time( rsptime_tuple ): + +def get_at_time(rsptime_tuple): (_, at_time, _) = rsptime_tuple return at_time -def get_rsp_time( rsptime_tuple ): + +def get_rsp_time(rsptime_tuple): (_, _, rsp_time) = rsptime_tuple return rsp_time # this function avoids duplication of sorting + def do_sorting(sample_set, already_sorted=False): if not already_sorted: sorted_samples = sorted(sample_set, key=get_at_time) @@ -99,18 +105,20 @@ def do_sorting(sample_set, already_sorted=False): # leverage python binary search module "bisect" # obtained from https://docs.python.org/2/library/bisect.html#searching-sorted-lists + def find_le(a, x): # find highest index with value < x i = bisect.bisect_right(a, x) return i + def find_gt(a, x): # find lowest index with value >= x i = bisect.bisect_left(a, x) if i < len(a): return i # since the only thing we are doing with this result - # is to extract a slice of an array, + # is to extract a slice of an array, # returning len(a) is a valid thing # raise ValueError @@ -118,8 +126,9 @@ def find_gt(a, x): # if you want this to calculate stats for a time_interval # t specify from_time and to_time -def reduce_thread_set( sorted_samples_tuple, from_time=0, to_time=time_infinity ): - # FIXME: need binary search to + +def reduce_thread_set(sorted_samples_tuple, from_time=0, to_time=time_infinity): + # FIXME: need binary search to # efficiently find beginning of time interval (sorted_samples, sorted_keys, sorted_times) = sorted_samples_tuple if to_time < time_infinity: @@ -134,7 +143,7 @@ def reduce_thread_set( sorted_samples_tuple, from_time=0, to_time=time_infinity maxtime = sorted_times[-1] mean = scipy.stats.tmean(sorted_times) stdev = scipy.stats.tstd(sorted_times) - pctdev = 100.0*stdev/mean + pctdev = 100.0 * stdev / mean pctiles = [] for p in percentiles: pctiles.append(numpy.percentile(sorted_times, float(p), overwrite_input=True)) @@ -143,22 +152,28 @@ def reduce_thread_set( sorted_samples_tuple, from_time=0, to_time=time_infinity # format the stats for output to a csv file + def format_stats(all_stats): if all_stats == None: - return ' 0,,,,,' + ',,,,,,,,,,,,,,,,'[0:len(percentiles)-1] + return " 0,,,,," + ",,,,,,,,,,,,,,,,"[0 : len(percentiles) - 1] (sample_count, mintime, maxtime, mean, pctdev, pctiles) = all_stats - partial_record = '%d, %f, %f, %f, %f, ' % ( - sample_count, mintime, maxtime, mean, pctdev) + partial_record = "%d, %f, %f, %f, %f, " % ( + sample_count, + mintime, + maxtime, + mean, + pctdev, + ) for p in pctiles: - partial_record += '%f, ' % p + partial_record += "%f, " % p return partial_record -#FIXME: convert to argparse module, more compact and standard +# FIXME: convert to argparse module, more compact and standard # define default parameter values hosts = {} -suffix = '' +suffix = "" argindex = 1 argcount = len(argv) time_interval = 10 @@ -166,36 +181,35 @@ def format_stats(all_stats): # parse any optional parameters while argindex < argcount: - pname = argv[argindex] - if not pname.startswith('--'): - break - if argindex == argcount - 1: - usage('every parameter consists of a --name and a value') - pval = argv[argindex + 1] - argindex += 2 - pname = pname[2:] - if pname == 'common-hostname-suffix': - suffix = pval - if not suffix.startswith('.'): - suffix = '.' + pval - elif pname == 'time-interval': - time_interval = int(pval) - elif pname == 'start-time': - start_time = float(pval) - else: - usage('--%s: no such optional parameter defined' % pname) - -if suffix != '': - print('filtering out suffix %s from hostnames' % suffix) -print('time interval is %d seconds' % time_interval) + pname = argv[argindex] + if not pname.startswith("--"): + break + if argindex == argcount - 1: + usage("every parameter consists of a --name and a value") + pval = argv[argindex + 1] + argindex += 2 + pname = pname[2:] + if pname == "common-hostname-suffix": + suffix = pval + if not suffix.startswith("."): + suffix = "." + pval + elif pname == "time-interval": + time_interval = int(pval) + elif pname == "start-time": + start_time = float(pval) + else: + usage("--%s: no such optional parameter defined" % pname) + +if suffix != "": + print("filtering out suffix %s from hostnames" % suffix) +print("time interval is %d seconds" % time_interval) # this regex plucks out a tuple of 2 values: # ## thread number ## hostname -regex = \ - 'rsptimes_([0-9]{2})_([0-9,a-z,\-,\.]*)%s_[-,a-z]*_[.,0-9]*.csv' +regex = "rsptimes_([0-9]{2})_([0-9,a-z,\-,\.]*)%s_[-,a-z]*_[.,0-9]*.csv" # filter out redundant suffix, if any, in hostname @@ -204,18 +218,18 @@ def format_stats(all_stats): # now parse hostnames and files if argindex != argcount - 1: - usage('need directory where response time files are') + usage("need directory where response time files are") directory = argv[argindex] if not os.path.isdir(directory): - usage('%s: directory containing result csv files was not provided' % directory) + usage("%s: directory containing result csv files was not provided" % directory) # process the results # we show individual threads, per-host groupings and all threads together samples_by_thread = {} hosts = {} -pathname_matcher = lambda path : path.startswith('rsptimes') and path.endswith('.csv') +pathname_matcher = lambda path: path.startswith("rsptimes") and path.endswith(".csv") pathnames = filter(pathname_matcher, os.listdir(directory)) max_thread = 0 for p in pathnames: @@ -223,9 +237,10 @@ def format_stats(all_stats): if not m: sys.stderr.write("warning: pathname could not be matched by regex: %s\n" % p) continue - (threadstr, host) = m.group(1,2) + (threadstr, host) = m.group(1, 2) thread = int(threadstr) - if max_thread < thread: max_thread = thread + if max_thread < thread: + max_thread = thread try: perhost_dict = hosts[host] except KeyError: @@ -233,35 +248,35 @@ def format_stats(all_stats): hosts[host] = perhost_dict # load response times for this file into memory # save what file it came from too - samples = parse_rsptime_file( directory, p ) + samples = parse_rsptime_file(directory, p) perhost_dict[threadstr] = (p, samples) - + hostcount = len(hosts.keys()) if hostcount == 0: - usage('%s: no .csv response time log files were found' % directory) + usage("%s: no .csv response time log files were found" % directory) -summary_pathname = os.path.join(directory, 'stats-rsptimes.csv') -header = 'host:thread, samples, min, max, mean, %dev, ' +summary_pathname = os.path.join(directory, "stats-rsptimes.csv") +header = "host:thread, samples, min, max, mean, %dev, " for p in percentiles: - header += '%d%%ile, ' % p + header += "%d%%ile, " % p -with open(summary_pathname, 'w') as outf: - outf.write(header + '\n') +with open(summary_pathname, "w") as outf: + outf.write(header + "\n") # aggregate response times across all threads and whole test duration # if there is only 1 host, no need for cluster-wide stats cluster_sample_set = None if len(hosts.keys()) > 1: - outf.write('cluster-wide stats:\n') + outf.write("cluster-wide stats:\n") cluster_sample_set = [] for per_host_dict in hosts.values(): for (_, samples) in per_host_dict.values(): cluster_sample_set.extend(samples) sorted_cluster_tuple = do_sorting(cluster_sample_set) cluster_results = reduce_thread_set(sorted_cluster_tuple) - outf.write('all-hosts:all-thrd,' + format_stats(cluster_results) + '\n') - outf.write('\n') + outf.write("all-hosts:all-thrd," + format_stats(cluster_results) + "\n") + outf.write("\n") # show them if there is variation amongst clients (could be network) # if there is only 1 thread per host, no need for per-host stats @@ -270,31 +285,31 @@ def format_stats(all_stats): host_keys = list(hosts.keys()) first_host = host_keys[0] if len(first_host) > 1: - outf.write('per-host stats:\n') + outf.write("per-host stats:\n") for h in sorted(hosts.keys()): sample_set = [] for (_, samples) in hosts[h].values(): sample_set.extend(samples) sorted_host_tuple = do_sorting(sample_set) host_results = reduce_thread_set(sorted_host_tuple) - outf.write(h + ':' + 'all-thrd' + ',' + format_stats(host_results) + '\n') - outf.write('\n') + outf.write(h + ":" + "all-thrd" + "," + format_stats(host_results) + "\n") + outf.write("\n") # show per-thread results so we can see if client Cephfs mountpoint is fair - outf.write('per-thread stats:\n') + outf.write("per-thread stats:\n") for h in sorted(hosts.keys()): threadset = hosts[h] for t in sorted(threadset.keys()): (_, samples) = threadset[t] - sorted_thrd_tuple = do_sorting(samples, already_sorted = True) + sorted_thrd_tuple = do_sorting(samples, already_sorted=True) thrd_results = reduce_thread_set(sorted_thrd_tuple) - outf.write(h + ':' + t + ',' + format_stats(thrd_results) + '\n') - outf.write('\n') + outf.write(h + ":" + t + "," + format_stats(thrd_results) + "\n") + outf.write("\n") # generate cluster-wide percentiles over time # to show if latency spikes occur - # first get max end time of any request, + # first get max end time of any request, # round that down to quantized time interval end_time = -1 @@ -303,7 +318,7 @@ def format_stats(all_stats): for t in threadset.keys(): (_, samples) = threadset[t] if len(samples) > 0: - (_, max_at_time,max_rsp_time) = samples[-1] + (_, max_at_time, max_rsp_time) = samples[-1] else: max_at_time = 0.0 max_rsp_time = 0.0 @@ -315,10 +330,10 @@ def format_stats(all_stats): # in that time interval if quantized_end_time > 0: - outf.write('cluster-wide response time stats over time:\n') - outf.write('time-since-start(sec), ' + header + '\n') + outf.write("cluster-wide response time stats over time:\n") + outf.write("time-since-start(sec), " + header + "\n") - # avoid re-sorting all response time samples + # avoid re-sorting all response time samples # if possible (and it often is) if cluster_sample_set == None: @@ -327,16 +342,14 @@ def format_stats(all_stats): for (_, samples) in per_host_dict.values(): cluster_sample_set.extend(samples) sorted_cluster_tuple = do_sorting(cluster_sample_set) - for from_t in range(int(start_time),quantized_end_time,time_interval): + for from_t in range(int(start_time), quantized_end_time, time_interval): to_t = from_t + time_interval - results_in_interval = reduce_thread_set(sorted_cluster_tuple, - from_time=from_t, - to_time=to_t) - outf.write('%-8d, all-hosts:all-thrd, ' % from_t) - outf.write(format_stats(results_in_interval) + '\n') - outf.write('\n') - - -print('rsp. time result summary at: %s' % summary_pathname) + results_in_interval = reduce_thread_set( + sorted_cluster_tuple, from_time=from_t, to_time=to_t + ) + outf.write("%-8d, all-hosts:all-thrd, " % from_t) + outf.write(format_stats(results_in_interval) + "\n") + outf.write("\n") +print("rsp. time result summary at: %s" % summary_pathname) diff --git a/smf_test_params.py b/smf_test_params.py index dcb9f1a..26593b2 100644 --- a/smf_test_params.py +++ b/smf_test_params.py @@ -8,28 +8,31 @@ # convert boolean value into 'Y' or 'N' + def bool2YN(boolval): if boolval: - return 'Y' - return 'N' + return "Y" + return "N" -class smf_test_params: - def __init__(self, - host_set = None, - thread_count = 2, - remote_pgm_dir = os.path.abspath(os.path.dirname(sys.argv[0])), - top_dirs = None, - network_sync_dir = None, - slave = False, - permute_host_dirs = False, - output_json = None): +class smf_test_params: + def __init__( + self, + host_set=None, + thread_count=2, + remote_pgm_dir=os.path.abspath(os.path.dirname(sys.argv[0])), + top_dirs=None, + network_sync_dir=None, + slave=False, + permute_host_dirs=False, + output_json=None, + ): # this field used to calculate timeouts self.min_directories_per_sec = 50 self.cleanup_delay_usec_per_sec = 0 self.output_json = output_json - self.version = '3.2' + self.version = "3.2" self.as_host = None self.host_set = host_set self.thread_count = thread_count @@ -59,25 +62,27 @@ def recalculate_timeouts(self): # we have to create both src_dir and dst_dir trees so times 2 # allow some time for thread synchronization - dir_creation_overhead = (self.thread_count // 30) + ((dirs * 2) // self.min_directories_per_sec) + dir_creation_overhead = (self.thread_count // 30) + ( + (dirs * 2) // self.min_directories_per_sec + ) # allow for creating list of pathnames if millions of files per dir file_creation_overhead = max(1, self.master_invoke.files_per_dir // 300000) # allow no less than 2 seconds to account for NTP inaccuracy self.startup_timeout = 2 + file_creation_overhead + dir_creation_overhead - + self.host_startup_timeout = self.startup_timeout if self.host_set is not None: # allow extra time for inter-host synchronization self.host_startup_timeout += 5 + (len(self.host_set) // 2) def __str__(self): - fmt = 'smf_test_params: version=%s json=%s as_host=%s host_set=%s ' - fmt += 'launch_by_daemon=%s ' - fmt += 'thread_count=%d remote_pgm_dir=%s ' - fmt += 'slave=%s permute_host_dirs=%s startup_timeout=%d ' - fmt += 'host_timeout=%d smf_invoke=%s ' + fmt = "smf_test_params: version=%s json=%s as_host=%s host_set=%s " + fmt += "launch_by_daemon=%s " + fmt += "thread_count=%d remote_pgm_dir=%s " + fmt += "slave=%s permute_host_dirs=%s startup_timeout=%d " + fmt += "host_timeout=%d smf_invoke=%s " return fmt % ( str(self.version), str(self.output_json), @@ -91,7 +96,7 @@ def __str__(self): self.startup_timeout, self.host_startup_timeout, str(self.master_invoke), - ) + ) # display results of parse so user knows what default values are # most important parameters come first @@ -102,56 +107,61 @@ def __str__(self): def human_readable(self): inv = self.master_invoke if inv.filesize_distr == smallfile.SmallfileWorkload.fsdistr_fixed: - fsdistr_str = 'fixed' + fsdistr_str = "fixed" else: - fsdistr_str = 'random exponential' + fsdistr_str = "random exponential" prm_list = [ - ('version', self.version), - ('hosts in test', '%s' % self.host_set), - ('launch by daemon', '%s' % str(self.launch_by_daemon)), - ('top test directory(s)', str(self.top_dirs)), - ('operation', inv.opname), - ('files/thread', '%d' % inv.iterations), - ('threads', '%d' % self.thread_count), - ('record size (KB, 0 = maximum)', '%d' % inv.record_sz_kb), - ('file size (KB)', '%d' % inv.total_sz_kb), - ('file size distribution', fsdistr_str), - ('files per dir', '%d' % inv.files_per_dir), - ('dirs per dir', '%d' % inv.dirs_per_dir), - ('threads share directories?', '%s' % bool2YN(inv.is_shared_dir)), - ('filename prefix', inv.prefix), - ('filename suffix', inv.suffix), - ('hash file number into dir.?', bool2YN(inv.hash_to_dir)), - ('fsync after modify?', bool2YN(inv.fsync)), - ('incompressible?', bool2YN(inv.incompressible)), - ('pause between files (microsec)', '%d' % inv.pause_between_files), - ('auto-pause?', bool2YN(inv.auto_pause)), - ('delay after cleanup per file (microsec)', '%d' % inv.cleanup_delay_usec_per_file), - ('minimum directories per sec', '%d' - % int(self.min_directories_per_sec)), - ('total hosts', '%d' % inv.total_hosts), - ('finish all requests?', '%s' % bool2YN(inv.finish_all_rq)), - ('stonewall?', '%s' % bool2YN(inv.stonewall)), - ('measure response times?', '%s' % bool2YN(inv.measure_rsptimes)), - ('verify read?', '%s' % bool2YN(inv.verify_read)), - ('verbose?', bool2YN(inv.verbose)), - ('log to stderr?', bool2YN(inv.log_to_stderr)), - ] + ("version", self.version), + ("hosts in test", "%s" % self.host_set), + ("launch by daemon", "%s" % str(self.launch_by_daemon)), + ("top test directory(s)", str(self.top_dirs)), + ("operation", inv.opname), + ("files/thread", "%d" % inv.iterations), + ("threads", "%d" % self.thread_count), + ("record size (KB, 0 = maximum)", "%d" % inv.record_sz_kb), + ("file size (KB)", "%d" % inv.total_sz_kb), + ("file size distribution", fsdistr_str), + ("files per dir", "%d" % inv.files_per_dir), + ("dirs per dir", "%d" % inv.dirs_per_dir), + ("threads share directories?", "%s" % bool2YN(inv.is_shared_dir)), + ("filename prefix", inv.prefix), + ("filename suffix", inv.suffix), + ("hash file number into dir.?", bool2YN(inv.hash_to_dir)), + ("fsync after modify?", bool2YN(inv.fsync)), + ("incompressible?", bool2YN(inv.incompressible)), + ("pause between files (microsec)", "%d" % inv.pause_between_files), + ("auto-pause?", bool2YN(inv.auto_pause)), + ( + "delay after cleanup per file (microsec)", + "%d" % inv.cleanup_delay_usec_per_file, + ), + ("minimum directories per sec", "%d" % int(self.min_directories_per_sec)), + ("total hosts", "%d" % inv.total_hosts), + ("finish all requests?", "%s" % bool2YN(inv.finish_all_rq)), + ("stonewall?", "%s" % bool2YN(inv.stonewall)), + ("measure response times?", "%s" % bool2YN(inv.measure_rsptimes)), + ("verify read?", "%s" % bool2YN(inv.verify_read)), + ("verbose?", bool2YN(inv.verbose)), + ("log to stderr?", bool2YN(inv.log_to_stderr)), + ] if smallfile.xattr_installed: - prm_list.extend([('ext.attr.size', '%d' % inv.xattr_size), - ('ext.attr.count', '%d' % inv.xattr_count)]) + prm_list.extend( + [ + ("ext.attr.size", "%d" % inv.xattr_size), + ("ext.attr.count", "%d" % inv.xattr_count), + ] + ) if self.host_set: - prm_list.extend([('permute host directories?', '%s' - % bool2YN(self.permute_host_dirs))]) + prm_list.extend( + [("permute host directories?", "%s" % bool2YN(self.permute_host_dirs))] + ) if self.remote_pgm_dir: - prm_list.append(('remote program directory', - self.remote_pgm_dir)) + prm_list.append(("remote program directory", self.remote_pgm_dir)) if self.network_sync_dir: - prm_list.append(('network thread sync. dir.', - self.network_sync_dir)) + prm_list.append(("network thread sync. dir.", self.network_sync_dir)) return prm_list - # add any parameters that might be relevant to + # add any parameters that might be relevant to # data analysis here, can skip parameters that # don't affect test results # don't convert to JSON here, so that caller @@ -159,51 +169,51 @@ def human_readable(self): def to_json(self): - # put params a level down so results can be + # put params a level down so results can be # inserted at same level json_dictionary = {} p = {} - json_dictionary['params'] = p + json_dictionary["params"] = p inv = self.master_invoke # put host-set at top because it can be very long # and we want rest of parameters to be grouped together - p['host_set'] = self.host_set - p['launch_by_daemon'] = self.launch_by_daemon - p['version'] = self.version - p['top'] = ','.join(self.top_dirs) - p['operation'] = inv.opname - p['files_per_thread'] = inv.iterations - p['threads'] = self.thread_count - p['file_size'] = inv.total_sz_kb - p['file_size_distr'] = self.size_distribution - p['files_per_dir'] = inv.files_per_dir - p['share_dir'] = bool2YN(inv.is_shared_dir) - p['fname_prefix'] = inv.prefix - p['fname_suffix'] = inv.suffix - p['hash_to_dir'] = bool2YN(inv.hash_to_dir) - p['fsync_after_modify'] = bool2YN(inv.fsync) - p['pause_between_files'] = str(inv.pause_between_files) - p['auto_pause'] = str(inv.auto_pause) - p['cleanup_delay_usec_per_file'] = str(inv.cleanup_delay_usec_per_file) - p['finish_all_requests'] = bool2YN(inv.finish_all_rq) - p['stonewall'] = bool2YN(inv.stonewall) - p['verify_read'] = bool2YN(inv.verify_read) - p['xattr_size'] = str(inv.xattr_size) - p['xattr_count'] = str(inv.xattr_count) - p['permute_host_dirs'] = bool2YN(self.permute_host_dirs) - p['network_sync_dir'] = self.network_sync_dir - p['min_directories_per_sec'] = self.min_directories_per_sec - p['total_hosts'] = inv.total_hosts + p["host_set"] = self.host_set + p["launch_by_daemon"] = self.launch_by_daemon + p["version"] = self.version + p["top"] = ",".join(self.top_dirs) + p["operation"] = inv.opname + p["files_per_thread"] = inv.iterations + p["threads"] = self.thread_count + p["file_size"] = inv.total_sz_kb + p["file_size_distr"] = self.size_distribution + p["files_per_dir"] = inv.files_per_dir + p["share_dir"] = bool2YN(inv.is_shared_dir) + p["fname_prefix"] = inv.prefix + p["fname_suffix"] = inv.suffix + p["hash_to_dir"] = bool2YN(inv.hash_to_dir) + p["fsync_after_modify"] = bool2YN(inv.fsync) + p["pause_between_files"] = str(inv.pause_between_files) + p["auto_pause"] = str(inv.auto_pause) + p["cleanup_delay_usec_per_file"] = str(inv.cleanup_delay_usec_per_file) + p["finish_all_requests"] = bool2YN(inv.finish_all_rq) + p["stonewall"] = bool2YN(inv.stonewall) + p["verify_read"] = bool2YN(inv.verify_read) + p["xattr_size"] = str(inv.xattr_size) + p["xattr_count"] = str(inv.xattr_count) + p["permute_host_dirs"] = bool2YN(self.permute_host_dirs) + p["network_sync_dir"] = self.network_sync_dir + p["min_directories_per_sec"] = self.min_directories_per_sec + p["total_hosts"] = inv.total_hosts # include startup-timeout and host-timeout to make possible - # diagnosis of timeout problems, but we don't normally need them + # diagnosis of timeout problems, but we don't normally need them # so don't include in human-readable output - p['startup_timeout'] = self.startup_timeout - p['host_timeout'] = self.host_startup_timeout + p["startup_timeout"] = self.startup_timeout + p["host_timeout"] = self.host_startup_timeout return json_dictionary diff --git a/ssh_thread.py b/ssh_thread.py index 7c55f47..e3fca13 100644 --- a/ssh_thread.py +++ b/ssh_thread.py @@ -1,11 +1,11 @@ # -*- coding: utf-8 -*- -''' +""" ssh_thread.py -- manages parallel execution of shell commands on remote hosts Copyright 2012 -- Ben England Licensed under the Apache License at http://www.apache.org/licenses/LICENSE-2.0 See Appendix on this page for instructions pertaining to license. -''' +""" import threading import os @@ -15,19 +15,26 @@ # for each remote host that we want to use as a workload generator # the thread just executes an ssh command to run this program on a remote host + class ssh_thread(threading.Thread): - ssh_prefix = 'ssh -x -o StrictHostKeyChecking=no ' + ssh_prefix = "ssh -x -o StrictHostKeyChecking=no " def __str__(self): - return 'ssh-thread:%s:%s:%s' % \ - (self.remote_host, str(self.status), self.remote_cmd) + return "ssh-thread:%s:%s:%s" % ( + self.remote_host, + str(self.status), + self.remote_cmd, + ) def __init__(self, remote_host, remote_cmd_in): threading.Thread.__init__(self) self.remote_host = remote_host - self.remote_cmd = '%s %s "%s"' % \ - (self.ssh_prefix, self.remote_host, remote_cmd_in) + self.remote_cmd = '%s %s "%s"' % ( + self.ssh_prefix, + self.remote_host, + remote_cmd_in, + ) # print('thread cmd %s'%self.remote_cmd) self.status = None diff --git a/sync_files.py b/sync_files.py index bb8c775..360f7b3 100644 --- a/sync_files.py +++ b/sync_files.py @@ -4,48 +4,54 @@ import shutil import time + class SyncFileException(Exception): pass -notyet = '.notyet' +notyet = ".notyet" + def touch(fpath): try: - with open(fpath, 'w') as sgf: - sgf.write('hi') + with open(fpath, "w") as sgf: + sgf.write("hi") sgf.flush() os.fsync(sgf.fileno()) except OSError as e: if e.errno != EEXIST: raise e + def write_sync_file(fpath, contents): - with open(fpath+notyet, 'w') as sgf: + with open(fpath + notyet, "w") as sgf: sgf.write(contents) sgf.flush() os.fsync(sgf.fileno()) # file should close when you exit block - os.rename(fpath+notyet, fpath) + os.rename(fpath + notyet, fpath) def write_pickle(fpath, obj): - with open(fpath+notyet, 'wb') as result_file: + with open(fpath + notyet, "wb") as result_file: pickle.dump(obj, result_file) result_file.flush() os.fsync(result_file.fileno()) # or else reader may not see data - os.rename(fpath+notyet, fpath) + os.rename(fpath + notyet, fpath) # create directory if it's not already there + def ensure_dir_exists(dirpath): if not os.path.exists(dirpath): parent_path = os.path.dirname(dirpath) if parent_path == dirpath: - raise SMFSyncFileException('ensure_dir_exists: ' + - 'cannot obtain parent path ' + - 'of non-existent path: ' + - dirpath) + raise SMFSyncFileException( + "ensure_dir_exists: " + + "cannot obtain parent path " + + "of non-existent path: " + + dirpath + ) ensure_dir_exists(parent_path) try: os.mkdir(dirpath) @@ -54,12 +60,14 @@ def ensure_dir_exists(dirpath): raise e else: if not os.path.isdir(dirpath): - raise SMFSyncFileException('%s already exists and is not a directory!' - % dirpath) + raise SMFSyncFileException( + "%s already exists and is not a directory!" % dirpath + ) # avoid exception if file we wish to delete is not there + def ensure_deleted(fn): try: if os.path.lexists(fn): @@ -68,7 +76,6 @@ def ensure_deleted(fn): # could be race condition with other client processes/hosts # if was race condition, file will no longer be there if os.path.exists(fn): - raise SMFSyncFileException('exception while ensuring %s deleted: %s' - % (fn, str(e))) - - + raise SMFSyncFileException( + "exception while ensuring %s deleted: %s" % (fn, str(e)) + ) diff --git a/yaml_parser.py b/yaml_parser.py index efb77f6..8b125f7 100644 --- a/yaml_parser.py +++ b/yaml_parser.py @@ -14,155 +14,168 @@ # except that the leading "--" is removed # modifies test_params object with contents of YAML file + def parse_yaml(test_params, input_yaml_file): inv = test_params.master_invoke y = {} - with open(input_yaml_file, 'r') as f: + with open(input_yaml_file, "r") as f: try: y = yaml.safe_load(f) if y == None: y = {} if type(y) is not dict: - raise SmfParseException('yaml.safe_load did not return dictionary - check input file format') + raise SmfParseException( + "yaml.safe_load did not return dictionary - check input file format" + ) except yaml.YAMLError as e: raise SmfParseException("YAML parse error: %s" % e) - + try: for k in y.keys(): v = y[k] - if k == 'yaml-input-file': - raise SmfParseException('cannot specify YAML input file from within itself!') - elif k == 'output-json': + if k == "yaml-input-file": + raise SmfParseException( + "cannot specify YAML input file from within itself!" + ) + elif k == "output-json": test_params.output_json = v - elif k == 'response-times': + elif k == "response-times": inv.measure_rsptimes = boolean(v) - elif k == 'network-sync-dir': + elif k == "network-sync-dir": inv.network_dir = boolean(v) - elif k == 'operation': + elif k == "operation": if not smallfile.SmallfileWorkload.all_op_names.__contains__(v): raise SmfParseException('operation "%s" not recognized') inv.opname = v - elif k == 'top': - test_params.top_dirs = [ os.path.abspath(p) for p in y['top'].split(',') ] - elif k == 'host-set': + elif k == "top": + test_params.top_dirs = [os.path.abspath(p) for p in y["top"].split(",")] + elif k == "host-set": test_params.host_set = host_set(v) - elif k == 'total-hosts': + elif k == "total-hosts": inv.total_hosts = positive_integer(v) - elif k == 'files': + elif k == "files": inv.iterations = positive_integer(v) - elif k == 'threads': + elif k == "threads": test_params.thread_count = positive_integer(v) - elif k == 'files-per-dir': + elif k == "files-per-dir": inv.files_per_dir = positive_integer(v) - elif k == 'dirs-per-dir': + elif k == "dirs-per-dir": inv.dirs_per_dir = positive_integer(v) - elif k == 'record-size': + elif k == "record-size": inv.record_sz_kb = positive_integer(v) - elif k == 'file-size': + elif k == "file-size": inv.total_sz_kb = non_negative_integer(v) - elif k == 'file-size-distribution': - test_params.size_distribution = inv.filesize_distr = file_size_distrib(v) - elif k == 'fsync': + elif k == "file-size-distribution": + test_params.size_distribution = inv.filesize_distr = file_size_distrib( + v + ) + elif k == "fsync": inv.fsync = boolean(v) - elif k == 'xattr-size': + elif k == "xattr-size": inv.xattr_size = positive_integer(v) - elif k == 'xattr-count': + elif k == "xattr-count": inv.xattr_count = positive_integer(v) - elif k == 'pause': + elif k == "pause": inv.pause_between_files = non_negative_integer(v) - elif k == 'auto-pause': + elif k == "auto-pause": inv.auto_pause = boolean(v) - elif k == 'cleanup-delay-usec-per-file': - inv.cleanup_delay_usec_per_file = test_params.cleanup_delay_usec_per_file = non_negative_integer(v) - elif k == 'stonewall': + elif k == "cleanup-delay-usec-per-file": + inv.cleanup_delay_usec_per_file = ( + test_params.cleanup_delay_usec_per_file + ) = non_negative_integer(v) + elif k == "stonewall": inv.stonewall = boolean(v) - elif k == 'finish': + elif k == "finish": inv.finish_all_rq = boolean(v) - elif k == 'prefix': + elif k == "prefix": inv.prefix = v - elif k == 'suffix': + elif k == "suffix": inv.suffix = v - elif k == 'hash-into-dirs': + elif k == "hash-into-dirs": inv.hash_to_dir = boolean(v) - elif k == 'same-dir': + elif k == "same-dir": inv.is_shared_dir = boolean(v) - elif k == 'verbose': + elif k == "verbose": inv.verbose = boolean(v) - elif k == 'permute-host-dirs': + elif k == "permute-host-dirs": test_params.permute_host_dirs = boolean(v) - elif k == 'record-time-size': + elif k == "record-time-size": inv.record_ctime_size = boolean(v) - elif k == 'verify-read': + elif k == "verify-read": inv.verify_read = boolean(v) - elif k == 'incompressible': + elif k == "incompressible": inv.incompressible = boolean(v) - elif k == 'min-dirs-per-sec': + elif k == "min-dirs-per-sec": test_params.min_directories_per_sec = positive_integer(v) - elif k == 'log-to-stderr': - raise SmfParseException('%s: not allowed in YAML input' % k) - elif k == 'remote-pgm-dir': - raise SmfParseException('%s: not allowed in YAML input' % k) + elif k == "log-to-stderr": + raise SmfParseException("%s: not allowed in YAML input" % k) + elif k == "remote-pgm-dir": + raise SmfParseException("%s: not allowed in YAML input" % k) else: - raise SmfParseException('%s: unrecognized input parameter name' % k) + raise SmfParseException("%s: unrecognized input parameter name" % k) except TypeExc as e: emsg = 'YAML parse error for key "%s" : %s' % (k, str(e)) raise SmfParseException(emsg) class TestYamlParse(unittest_module.TestCase): - def setUp(self): - self.params = smf_test_params.smf_test_params() + def setUp(self): + self.params = smf_test_params.smf_test_params() - def tearDown(self): - self.params = None + def tearDown(self): + self.params = None - def test_parse_empty(self): - fn = '/tmp/sample_parse_empty.yaml' - with open(fn, 'w') as f: - f.write('\n') - parse_yaml(self.params, fn) - # just looking for no exception here + def test_parse_empty(self): + fn = "/tmp/sample_parse_empty.yaml" + with open(fn, "w") as f: + f.write("\n") + parse_yaml(self.params, fn) + # just looking for no exception here + + def test_parse_all(self): + fn = "/tmp/sample_parse.yaml" + with open(fn, "w") as f: + f.write("operation: create\n") + parse_yaml(self.params, fn) + assert self.params.master_invoke.opname == "create" - def test_parse_all(self): - fn = '/tmp/sample_parse.yaml' - with open(fn, 'w') as f: - f.write('operation: create\n') + def test_parse_negint(self): + fn = "/tmp/sample_parse_negint.yaml" + with open(fn, "w") as f: + f.write("files: -3\n") + try: parse_yaml(self.params, fn) - assert(self.params.master_invoke.opname == 'create') + except SmfParseException as e: + msg = str(e) + if not msg.__contains__("greater than zero"): + raise e - def test_parse_negint(self): - fn = '/tmp/sample_parse_negint.yaml' - with open(fn, 'w') as f: - f.write('files: -3\n') - try: - parse_yaml(self.params, fn) - except SmfParseException as e: - msg = str(e) - if not msg.__contains__('greater than zero'): - raise e + def test_parse_hostset(self): + fn = "/tmp/sample_parse_hostset.yaml" + with open(fn, "w") as f: + f.write("host-set: host-foo,host-bar\n") + parse_yaml(self.params, fn) + assert self.params.host_set == ["host-foo", "host-bar"] - def test_parse_hostset(self): - fn = '/tmp/sample_parse_hostset.yaml' - with open(fn, 'w') as f: - f.write('host-set: host-foo,host-bar\n') - parse_yaml(self.params, fn) - assert(self.params.host_set == [ 'host-foo', 'host-bar' ]) + def test_parse_fsdistr_exponential(self): + fn = "/tmp/sample_parse_fsdistr_exponential.yaml" + with open(fn, "w") as f: + f.write("file-size-distribution: exponential\n") + parse_yaml(self.params, fn) + assert ( + self.params.master_invoke.filesize_distr + == smallfile.SmallfileWorkload.fsdistr_random_exponential + ) - def test_parse_fsdistr_exponential(self): - fn = '/tmp/sample_parse_fsdistr_exponential.yaml' - with open(fn, 'w') as f: - f.write('file-size-distribution: exponential\n') - parse_yaml(self.params, fn) - assert(self.params.master_invoke.filesize_distr == smallfile.SmallfileWorkload.fsdistr_random_exponential) + def test_parse_dir_list(self): + fn = "/tmp/sample_parse_dirlist.yaml" + with open(fn, "w") as f: + f.write("top: foo,bar \n") + parse_yaml(self.params, fn) + mydir = os.getcwd() + topdirs = [os.path.join(mydir, d) for d in ["foo", "bar"]] + assert self.params.top_dirs == topdirs - def test_parse_dir_list(self): - fn = '/tmp/sample_parse_dirlist.yaml' - with open(fn, 'w') as f: - f.write('top: foo,bar \n') - parse_yaml(self.params, fn) - mydir=os.getcwd() - topdirs = [ os.path.join(mydir, d) for d in [ 'foo', 'bar' ] ] - assert(self.params.top_dirs == topdirs) -if __name__ == '__main__': +if __name__ == "__main__": unittest_module.main() From 85ef3f332660333a97b3297d16b392a5125224d2 Mon Sep 17 00:00:00 2001 From: Peter Portante Date: Mon, 24 Oct 2022 10:41:18 -0400 Subject: [PATCH 2/4] Apply `isort --profile=black` --- invoke_process.py | 5 ++--- launch_smf_host.py | 9 +++++---- launcher_thread.py | 3 ++- multi_thread_workload.py | 16 ++++++++-------- output_results.py | 7 ++++--- parse.py | 19 +++++++++++++------ parse_slave.py | 9 +++++---- parser_data_types.py | 1 + profile_workload.py | 1 + smallfile.py | 21 +++++++++++---------- smallfile_cli.py | 30 ++++++++++++++++++------------ smallfile_rsptimes_stats.py | 9 +++++---- smf_test_params.py | 5 ++++- ssh_thread.py | 3 +-- yaml_parser.py | 23 ++++++++++++++++------- 15 files changed, 96 insertions(+), 65 deletions(-) diff --git a/invoke_process.py b/invoke_process.py index 9536068..2dba2e5 100644 --- a/invoke_process.py +++ b/invoke_process.py @@ -9,15 +9,14 @@ """ import multiprocessing -import shutil import os +import shutil import time import smallfile -from smallfile import unittest_module, SMFRunException +from smallfile import SMFRunException, unittest_module from sync_files import touch - # this class launches multiple threads with SmallfileWorkload instances # we do this because we can use > 1 core this way, with python threading, # it doesn't really use > 1 core because of the GIL (global lock) diff --git a/launch_smf_host.py b/launch_smf_host.py index 2230720..dd51417 100644 --- a/launch_smf_host.py +++ b/launch_smf_host.py @@ -29,13 +29,14 @@ # --substitute_top z:\smf # # -import sys -import os -import time import errno -import smallfile import logging +import os import socket +import sys +import time + +import smallfile OK = 0 NOTOK = 1 diff --git a/launcher_thread.py b/launcher_thread.py index 9802cab..927a85d 100644 --- a/launcher_thread.py +++ b/launcher_thread.py @@ -13,9 +13,10 @@ See Appendix on this page for instructions pertaining to license. """ -import threading import os +import threading import time + import smallfile from smallfile import ensure_deleted from sync_files import write_sync_file diff --git a/multi_thread_workload.py b/multi_thread_workload.py index b28e07c..cfd3e87 100644 --- a/multi_thread_workload.py +++ b/multi_thread_workload.py @@ -1,21 +1,21 @@ # -*- coding: utf-8 -*- +import copy import os +import random import sys import time -import random -import copy -import smallfile -from smallfile import OK, NOTOK, SMFResultException, SMFRunException, abort_test import invoke_process +import output_results +import smallfile +from smallfile import NOTOK, OK, SMFResultException, SMFRunException, abort_test from sync_files import ( + ensure_deleted, + ensure_dir_exists, touch, - write_sync_file, write_pickle, - ensure_dir_exists, - ensure_deleted, + write_sync_file, ) -import output_results def create_worker_list(prm): diff --git a/output_results.py b/output_results.py index 951e3aa..b006e2d 100644 --- a/output_results.py +++ b/output_results.py @@ -1,11 +1,12 @@ # -*- coding: utf-8 -*- -from copy import deepcopy -import os import json +import os import time +from copy import deepcopy + import smallfile -from smallfile import SMFResultException, KB_PER_GB, OK +from smallfile import KB_PER_GB, OK, SMFResultException BYTES_PER_KiB = 1024.0 KiB_PER_MiB = 1024.0 diff --git a/parse.py b/parse.py index bbc0949..9a8c8dd 100644 --- a/parse.py +++ b/parse.py @@ -7,13 +7,14 @@ See Appendix on this page for instructions pertaining to license. """ -import sys +import argparse import os +import sys + import smallfile -from smallfile import SmallfileWorkload, NOTOK import smf_test_params +from smallfile import NOTOK, SmallfileWorkload from smf_test_params import bool2YN -import argparse yaml_parser_installed = False try: @@ -25,9 +26,15 @@ pass import parser_data_types -from parser_data_types import SmfParseException -from parser_data_types import boolean, positive_integer, non_negative_integer -from parser_data_types import host_set, directory_list, file_size_distrib +from parser_data_types import ( + SmfParseException, + boolean, + directory_list, + file_size_distrib, + host_set, + non_negative_integer, + positive_integer, +) # parse command line # return smf_test_params.smf_test_params instance diff --git a/parse_slave.py b/parse_slave.py index 5bfa049..633bd4d 100644 --- a/parse_slave.py +++ b/parse_slave.py @@ -7,13 +7,14 @@ See Appendix on this page for instructions pertaining to license. """ -import sys -import os +import argparse import errno -import time +import os import pickle +import sys +import time + import smallfile -import argparse # parse command line and return unpickled test params # pass via --network-sync-dir option diff --git a/parser_data_types.py b/parser_data_types.py index 6fe20f4..32cd29a 100644 --- a/parser_data_types.py +++ b/parser_data_types.py @@ -1,5 +1,6 @@ import argparse import os + import smallfile from smallfile import SmallfileWorkload diff --git a/profile_workload.py b/profile_workload.py index a7590a0..1c120a4 100644 --- a/profile_workload.py +++ b/profile_workload.py @@ -5,6 +5,7 @@ import os import socket + import smallfile top = os.getenv("TOP") diff --git a/smallfile.py b/smallfile.py index 2b8502a..1de7ff2 100644 --- a/smallfile.py +++ b/smallfile.py @@ -29,21 +29,22 @@ # on Fedora 33 with python 3.9.2, unittest is built in and no package is needed +import codecs +import copy +import errno +import logging +import math import os import os.path -from os.path import exists, join -import sys -import time -import copy import random -import logging -import threading import socket -import errno -import codecs +import sys +import threading +import time +from os.path import exists, join from shutil import rmtree -import math -from sync_files import ensure_dir_exists, ensure_deleted, write_sync_file, touch + +from sync_files import ensure_deleted, ensure_dir_exists, touch, write_sync_file OK = 0 # system call return code for success NOTOK = 1 diff --git a/smallfile_cli.py b/smallfile_cli.py index e094611..f62f153 100755 --- a/smallfile_cli.py +++ b/smallfile_cli.py @@ -20,26 +20,32 @@ See Appendix on this page for instructions pertaining to license. """ -import sys +import errno import os import os.path -import errno +import pickle +import sys import time + +import launcher_thread +import multi_thread_workload +import output_results import parse -import pickle +import smallfile +import ssh_thread +import sync_files +from smallfile import ( + NOTOK, + OK, + SMFResultException, + SMFRunException, + ensure_deleted, + use_isAlive, +) # smallfile modules -import smallfile -from smallfile import ensure_deleted, SMFResultException, SMFRunException -from smallfile import OK, NOTOK -from smallfile import use_isAlive -import sync_files -import output_results -import multi_thread_workload -import launcher_thread -import ssh_thread # FIXME: should be monitoring progress, not total elapsed time diff --git a/smallfile_rsptimes_stats.py b/smallfile_rsptimes_stats.py index 0bab184..06ef116 100755 --- a/smallfile_rsptimes_stats.py +++ b/smallfile_rsptimes_stats.py @@ -25,16 +25,17 @@ # output in the 'start-time' field. # # -import sys -from sys import argv +import bisect import os -import string import re +import string +import sys +from sys import argv + import numpy import scipy import scipy.stats from scipy.stats import tmean, tstd -import bisect time_infinity = 1 << 62 diff --git a/smf_test_params.py b/smf_test_params.py index 26593b2..da71b68 100644 --- a/smf_test_params.py +++ b/smf_test_params.py @@ -4,7 +4,10 @@ # calculate timeouts to allow for initialization delays # while directory tree is created -import sys, os, smallfile +import os +import sys + +import smallfile # convert boolean value into 'Y' or 'N' diff --git a/ssh_thread.py b/ssh_thread.py index e3fca13..54b6e01 100644 --- a/ssh_thread.py +++ b/ssh_thread.py @@ -7,9 +7,8 @@ See Appendix on this page for instructions pertaining to license. """ -import threading import os - +import threading # this class is just used to create a python thread # for each remote host that we want to use as a workload generator diff --git a/yaml_parser.py b/yaml_parser.py index 8b125f7..9bbd57a 100644 --- a/yaml_parser.py +++ b/yaml_parser.py @@ -1,13 +1,22 @@ -import yaml -import smallfile -from smallfile import unittest_module import argparse +import os + +import yaml + import parser_data_types -from parser_data_types import SmfParseException, TypeExc -from parser_data_types import boolean, positive_integer, non_negative_integer -from parser_data_types import host_set, directory_list, file_size_distrib +import smallfile import smf_test_params -import os +from parser_data_types import ( + SmfParseException, + TypeExc, + boolean, + directory_list, + file_size_distrib, + host_set, + non_negative_integer, + positive_integer, +) +from smallfile import unittest_module # module to parse YAML input file containing smallfile parameters # YAML parameter names are identical to CLI parameter names From 42e9055ebadc72cbd62158118da6c45597f6d0b2 Mon Sep 17 00:00:00 2001 From: Peter Portante Date: Mon, 24 Oct 2022 10:42:17 -0400 Subject: [PATCH 3/4] Apply `black` again to fix `isort` change --- smallfile_cli.py | 1 - 1 file changed, 1 deletion(-) diff --git a/smallfile_cli.py b/smallfile_cli.py index f62f153..7ddd26e 100755 --- a/smallfile_cli.py +++ b/smallfile_cli.py @@ -46,7 +46,6 @@ # smallfile modules - # FIXME: should be monitoring progress, not total elapsed time min_files_per_sec = 15 From c55e984d57aca03301f9d7b134b69eb2b1d22f73 Mon Sep 17 00:00:00 2001 From: Peter Portante Date: Mon, 24 Oct 2022 10:58:35 -0400 Subject: [PATCH 4/4] Add a pre-commit hook --- .pre-commit-config.yaml | 16 ++++++++++++++++ requirements.txt | 1 + 2 files changed, 17 insertions(+) create mode 100644 .pre-commit-config.yaml create mode 100644 requirements.txt diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..cd19e60 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,16 @@ +--- +repos: + - repo: https://github.com/python/black.git + rev: 22.3.0 + hooks: + - id: black + name: black (python3) + language_version: python3 + args: ["--check"] + - repo: https://github.com/pycqa/isort + rev: 5.10.1 + hooks: + - id: isort + name: isort (python3) + language_version: python3 + args: ["--check"] diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..416634f --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +pre-commit