diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3ad0ec21bb..b4eac5fa1a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -32,7 +32,7 @@ env: DOCKER_PACKAGE: ghcr.io/ansys/mapdl DOCKER_IMAGE_VERSION_DOCS_BUILD: v24.1-ubuntu-student ON_CI: True - PYTEST_ARGUMENTS: '-vv --durations=10 --maxfail=2 --reruns 2 --reruns-delay 1 --cov=ansys.mapdl.core --cov-report=html' + PYTEST_ARGUMENTS: '-vv --durations=10 --maxfail=3 --reruns 3 --reruns-delay 4 --cov=ansys.mapdl.core --cov-report=html' # Following env vars when changed will "reset" the mentioned cache, # by changing the cache file name. It is rendered as ...-v%RESET_XXX%-... @@ -496,6 +496,7 @@ jobs: P_SCHEMA: "/ansys_inc/v241/ansys/ac4/schema" PYTEST_TIMEOUT: 120 # seconds. Limit the duration for each unit test + steps: - name: "Install Git and checkout project" uses: actions/checkout@v4 diff --git a/pyproject.toml b/pyproject.toml index 7ed372c2da..9fca70e0dc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,7 +74,6 @@ tests = [ "vtk==9.3.0", "pytest-rerunfailures==13.0", "pytest-pyvista==0.1.9", - "pytest-timeout==2.2.0", ] doc = [ "sphinx==7.2.6", diff --git a/src/ansys/mapdl/core/misc.py b/src/ansys/mapdl/core/misc.py index a0b1cf52a1..b92359b5bc 100644 --- a/src/ansys/mapdl/core/misc.py +++ b/src/ansys/mapdl/core/misc.py @@ -447,7 +447,9 @@ def threaded_daemon(func): @wraps(func) def wrapper(*args, **kwargs): - name = kwargs.get("name", f"Threaded (with Daemon) `{func.__name__}` function") + name = kwargs.pop( + "thread_name", f"Threaded (with Daemon) `{func.__name__}` function" + ) thread = Thread(target=func, name=name, args=args, kwargs=kwargs) thread.daemon = True thread.start() diff --git a/src/ansys/mapdl/core/pool.py b/src/ansys/mapdl/core/pool.py index 91196d72d2..e33ffb5fb9 100755 --- a/src/ansys/mapdl/core/pool.py +++ b/src/ansys/mapdl/core/pool.py @@ -240,7 +240,9 @@ def __init__( # threaded spawn threads = [ - self._spawn_mapdl(i, ports[i], pbar, name=self._names(i)) + self._spawn_mapdl( + i, ports[i], pbar, name=self._names(i), thread_name=self._names(i) + ) for i in range(n_instances) ] if wait: @@ -258,7 +260,10 @@ def __init__( # monitor pool if requested if restart_failed: - self._pool_monitor_thread = self._monitor_pool(name="Monitoring_Thread") + # This name is using the wrapped to specify the thread name + self._pool_monitor_thread = self._monitor_pool( + thread_name="Monitoring_Thread" + ) self._verify_unique_ports() @@ -383,12 +388,12 @@ def map( pbar = tqdm(total=n, desc="MAPDL Running") @threaded_daemon - def func_wrapper(obj, func, timeout, args=None, name=""): + def func_wrapper(obj, func, timeout, args=None): """Expect obj to be an instance of Mapdl""" complete = [False] @threaded_daemon - def run(name=""): + def run(): if args is not None: if isinstance(args, (tuple, list)): results.append(func(obj, *args)) @@ -398,7 +403,7 @@ def run(name=""): results.append(func(obj)) complete[0] = True - run_thread = run(name="map.run") + run_thread = run(thread_name="map.run") if timeout: tstart = time.time() while not complete[0]: @@ -443,7 +448,9 @@ def run(name=""): instance = self.next_available() instance.locked = True threads.append( - func_wrapper(instance, func, timeout, args, name="Map_Thread") + func_wrapper( + instance, func, timeout, args, thread_name="Map_Thread" + ) ) if close_when_finished: @@ -692,7 +699,7 @@ def _spawn_mapdl( self._instances[index] = launch_mapdl( run_location=run_location, port=port, - override=self._override, + override=True, **self._spawn_kwargs, ) @@ -701,6 +708,9 @@ def _spawn_mapdl( while self._instances[index] is None: time.sleep(0.1) + assert not self._instances[index].exited + self._instances[index].prep7() + # LOG.debug("Spawned instance %d. Name '%s'", index, name) if pbar is not None: pbar.update(1) @@ -708,22 +718,30 @@ def _spawn_mapdl( self._spawning_i -= 1 @threaded_daemon - def _monitor_pool(self, refresh=1.0, name=""): + def _monitor_pool(self, refresh=1.0): """Checks if instances within a pool have exited (failed) and restarts them. + + """ while self._active: for index, instance in enumerate(self._instances): + name = self._names[index] if not instance: # encountered placeholder continue + if instance._exited: try: # use the next port after the current available port self._spawning_i += 1 port = max(self._ports) + 1 self._spawn_mapdl( - index, port=port, name=f"Instance {index}" + index, + port=port, + name=name, + thread_name=name, ).join() + except Exception as e: LOG.error(e, exc_info=True) self._spawning_i -= 1 diff --git a/tests/conftest.py b/tests/conftest.py index 88e43660a8..e5adb1d7e7 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -114,6 +114,8 @@ def has_dependency(requirement): try: + if os.name == "nt": + requirement = requirement.replace("-", ".") import_module(requirement) return True except ModuleNotFoundError: diff --git a/tests/test_pool.py b/tests/test_pool.py index c5ce8f0f76..bbbaff50c6 100644 --- a/tests/test_pool.py +++ b/tests/test_pool.py @@ -59,7 +59,7 @@ not os.path.isfile(MAPDL194PATH), reason="Requires MAPDL 194" ) -TWAIT = 90 +TWAIT = 100 NPROC = 1 @@ -88,7 +88,7 @@ def pool(tmpdir_factory): while len(mapdl_pool) != 0: time.sleep(0.1) if time.time() > timeout: - raise TimeoutError(f"Failed to restart instance in {TWAIT} seconds") + raise TimeoutError(f"Failed to kill instance in {TWAIT} seconds") assert len(mapdl_pool) == 0 @@ -110,13 +110,14 @@ def test_invalid_exec(): ) -@pytest.mark.xfail(strict=False, reason="Flaky test. See #2435") +# @pytest.mark.xfail(strict=False, reason="Flaky test. See #2435") @requires("local") def test_heal(pool): pool_sz = len(pool) + pool_names = pool._names # copy pool names + + # Killing one instance pool[0].exit() - pool[1].exit() - pool[2].exit() time.sleep(1) # wait for shutdown timeout = time.time() + TWAIT @@ -125,6 +126,7 @@ def test_heal(pool): if time.time() > timeout: raise TimeoutError(f"Failed to restart instance in {TWAIT} seconds") + assert pool._names == pool_names assert len(pool) == pool_sz pool._verify_unique_ports() @@ -151,10 +153,12 @@ def func(mapdl, tsleep): timeout = 2 times = np.array([0, 1, 3, 4]) - output = pool.map(func, times, timeout=timeout) + output = pool.map(func, times, timeout=timeout, wait=True) + assert len(output) == (times < timeout).sum() - # wait for the pool to heal before continuing + # the timeout option kills the MAPDL instance when we reach the timeout. + # Let's wait for the pool to heal before continuing timeout = time.time() + TWAIT while len(pool) < pool_sz: time.sleep(0.1)