diff --git a/ci/unit/test_main_node.sh b/ci/unit/test_main_node.sh index 1ebbfb43e2a..bdda1ea0e3d 100755 --- a/ci/unit/test_main_node.sh +++ b/ci/unit/test_main_node.sh @@ -11,7 +11,6 @@ if grep /mnt/daos\ /proc/mounts; then fi sudo mkdir -p /mnt/daos -sudo mount -t tmpfs -o size=16G tmpfs /mnt/daos sudo mkdir -p "$DAOS_BASE" sudo mount -t nfs "$HOSTNAME":"$HOSTPWD" "$DAOS_BASE" sudo cp "$DAOS_BASE/install/bin/daos_admin" /usr/bin/daos_admin @@ -46,8 +45,10 @@ fi cd "$DAOS_BASE" if ${NLT:-false}; then mkdir -p vm_test + # NLT will mount /mnt/daos itself. ./utils/node_local_test.py --output-file=vm_test/nlt-errors.json all else + sudo mount -t tmpfs -o size=16G tmpfs /mnt/daos IS_CI=true OLD_CI=false RUN_TEST_VALGRIND="$WITH_VALGRIND" utils/run_test.sh if [ "$WITH_VALGRIND" == 'memcheck' ]; then diff --git a/utils/nlt_server.yaml b/utils/nlt_server.yaml index 37e6fbc8804..14e56a54da3 100644 --- a/utils/nlt_server.yaml +++ b/utils/nlt_server.yaml @@ -23,4 +23,4 @@ servers: - FI_SOCKETS_CONN_TIMEOUT=2000 scm_mount: /mnt/daos scm_class: ram - scm_size: 4 + scm_size: 32 diff --git a/utils/node_local_test.py b/utils/node_local_test.py index 0f0e103f86c..f9680d730cd 100755 --- a/utils/node_local_test.py +++ b/utils/node_local_test.py @@ -358,8 +358,7 @@ def start(self): server_env['PATH']) cmd = [daos_server, '--config={}'.format(self._yaml_file.name), - 'start', '-t' '4', '--insecure', '-d', self.agent_dir, - '--recreate-superblocks'] + 'start', '-t' '4', '--insecure', '-d', self.agent_dir] server_env['DAOS_DISABLE_REQ_FWD'] = '1' self._sp = subprocess.Popen(cmd, env=server_env) @@ -384,6 +383,23 @@ def start(self): # Use dmg to block until the server is ready to respond to requests. start = time.time() + + while True: + time.sleep(0.5) + rc = self.run_dmg(['storage', 'format']) + ready = False + if rc.returncode == 1: + for line in rc.stdout.decode('utf-8').splitlines(): + if 'format storage of running instance' in line: + ready = True + + if ready: + break + if time.time() - start > 20: + raise Exception("Failed to format") + + print('Format completion in {:.2f} seconds'.format(time.time() - start)) + while True: time.sleep(0.5) rc = self.run_dmg(['system', 'query']) @@ -391,7 +407,7 @@ def start(self): if rc.returncode == 0: for line in rc.stdout.decode('utf-8').splitlines(): if line.startswith('status'): - if 'Ready' in line or 'Joined' in line: + if 'Joined' in line: ready = True if ready: @@ -410,13 +426,12 @@ def stop(self): if not self._sp: return rc = self.run_dmg(['system', 'stop']) - print(rc) + assert rc.returncode == 0 start = time.time() while True: time.sleep(0.5) rc = self.run_dmg(['system', 'query']) - print(rc) ready = False if rc.returncode == 0: for line in rc.stdout.decode('utf-8').splitlines(): @@ -496,7 +511,9 @@ def run_dmg(self, cmd): exe_cmd.append('--insecure') exe_cmd.extend(cmd) - return subprocess.run(exe_cmd, stdout=subprocess.PIPE) + return subprocess.run(exe_cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) def il_cmd(dfuse, cmd, check_read=True, check_write=True): """Run a command under the interception library