Skip to content

Commit

Permalink
Merge branch 'main' into conda_executor
Browse files Browse the repository at this point in the history
  • Loading branch information
jan-janssen authored Feb 1, 2024
2 parents 44a1e3d + aa1e87e commit c6f065e
Show file tree
Hide file tree
Showing 20 changed files with 163 additions and 100 deletions.
3 changes: 2 additions & 1 deletion .ci_support/environment-docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@ channels:
dependencies:
- nbsphinx
- sphinx
- sphinx_rtd_theme
- myst-parser
- numpy
- openmpi
- cloudpickle =3.0.0
- mpi4py =3.1.5
- tqdm =4.66.1
- pyzmq =25.1.2
- flux-core
- flux-core
35 changes: 0 additions & 35 deletions .github/workflows/check-macos-latest.yml

This file was deleted.

2 changes: 1 addition & 1 deletion .github/workflows/unittest-mpich.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
strategy:
matrix:
include:
- operating-system: macos-11
- operating-system: macos-latest
python-version: '3.12'
label: osx-64-py-3-12-mpich
prefix: /Users/runner/miniconda3/envs/my-env
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/unittest-openmpi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
strategy:
matrix:
include:
- operating-system: macos-11
- operating-system: macos-latest
python-version: '3.12'
label: osx-64-py-3-12-openmpi
prefix: /Users/runner/miniconda3/envs/my-env
Expand Down
Binary file added docs/_static/pyiron-logo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/_static/pyiron_logo.ico
Binary file not shown.
9 changes: 8 additions & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,14 @@
# -- Options for HTML output -------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output

html_theme = 'alabaster'
try:
import sphinx_rtd_theme
html_theme = 'sphinx_rtd_theme'
html_logo = "../_static/pyiron-logo.png"
html_favicon = "../_static/pyiron_logo.ico"
except ImportError:
html_theme = 'alabaster'

html_static_path = ['_static']


Expand Down
27 changes: 20 additions & 7 deletions pympipool/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,13 @@ class Executor:
oversubscribe (bool): adds the `--oversubscribe` command line flag (OpenMPI only) - default False
init_function (None): optional function to preset arguments for functions which are submitted later
cwd (str/None): current working directory where the parallel python task is executed
sleep_interval (float): synchronization interval - default 0.1
hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
context of an HPC cluster this essential to be able to communicate to an
Executor running on a different compute node within the same allocation. And
in principle any computer should be able to resolve that their own hostname
points to the same address as localhost. Still MacOS >= 12 seems to disable
this look up for security reasons. So on MacOS it is required to set this
option to true
Examples:
```
Expand Down Expand Up @@ -70,8 +76,8 @@ def __init__(
oversubscribe=False,
init_function=None,
cwd=None,
sleep_interval=0.1,
executor=None,
hostname_localhost=False,
):
# Use __new__() instead of __init__(). This function is only implemented to enable auto-completion.
pass
Expand All @@ -85,8 +91,8 @@ def __new__(
oversubscribe=False,
init_function=None,
cwd=None,
sleep_interval=0.1,
executor=None,
hostname_localhost=False,
):
"""
Instead of returning a pympipool.Executor object this function returns either a pympipool.mpi.PyMPIExecutor,
Expand All @@ -104,7 +110,14 @@ def __new__(
oversubscribe (bool): adds the `--oversubscribe` command line flag (OpenMPI only) - default False
init_function (None): optional function to preset arguments for functions which are submitted later
cwd (str/None): current working directory where the parallel python task is executed
sleep_interval (float): synchronization interval - default 0.1
hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
context of an HPC cluster this essential to be able to communicate to an
Executor running on a different compute node within the same allocation. And
in principle any computer should be able to resolve that their own hostname
points to the same address as localhost. Still MacOS >= 12 seems to disable
this look up for security reasons. So on MacOS it is required to set this
option to true
"""
if flux_installed:
if oversubscribe:
Expand All @@ -119,15 +132,15 @@ def __new__(
gpus_per_worker=gpus_per_worker,
init_function=init_function,
cwd=cwd,
sleep_interval=sleep_interval,
hostname_localhost=hostname_localhost,
)
elif slurm_installed:
return PySlurmExecutor(
max_workers=max_workers,
cores_per_worker=cores_per_worker,
init_function=init_function,
cwd=cwd,
sleep_interval=sleep_interval,
hostname_localhost=hostname_localhost,
)
else:
if threads_per_core != 1:
Expand All @@ -149,5 +162,5 @@ def __new__(
cores_per_worker=cores_per_worker,
init_function=init_function,
cwd=cwd,
sleep_interval=sleep_interval,
hostname_localhost=hostname_localhost,
)
22 changes: 19 additions & 3 deletions pympipool/flux/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,14 @@ class PyFluxExecutor(ExecutorBase):
gpus_per_worker (int): number of GPUs per worker - defaults to 0
init_function (None): optional function to preset arguments for functions which are submitted later
cwd (str/None): current working directory where the parallel python task is executed
sleep_interval (float): synchronization interval - default 0.1
executor (flux.job.FluxExecutor): Flux Python interface to submit the workers to flux
hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
context of an HPC cluster this essential to be able to communicate to an
Executor running on a different compute node within the same allocation. And
in principle any computer should be able to resolve that their own hostname
points to the same address as localhost. Still MacOS >= 12 seems to disable
this look up for security reasons. So on MacOS it is required to set this
option to true
Examples:
Expand Down Expand Up @@ -58,8 +64,8 @@ def __init__(
gpus_per_worker=0,
init_function=None,
cwd=None,
sleep_interval=0.1,
executor=None,
hostname_localhost=False,
):
super().__init__()
self._process = RaisingThread(
Expand All @@ -68,7 +74,7 @@ def __init__(
# Broker Arguments
"future_queue": self._future_queue,
"max_workers": max_workers,
"sleep_interval": sleep_interval,
"hostname_localhost": hostname_localhost,
"executor_class": PyFluxSingleTaskExecutor,
# Executor Arguments
"cores": cores_per_worker,
Expand All @@ -93,6 +99,14 @@ class PyFluxSingleTaskExecutor(ExecutorBase):
init_function (None): optional function to preset arguments for functions which are submitted later
cwd (str/None): current working directory where the parallel python task is executed
executor (flux.job.FluxExecutor): Flux Python interface to submit the workers to flux
hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
context of an HPC cluster this essential to be able to communicate to an
Executor running on a different compute node within the same allocation. And
in principle any computer should be able to resolve that their own hostname
points to the same address as localhost. Still MacOS >= 12 seems to disable
this look up for security reasons. So on MacOS it is required to set this
option to true
"""

def __init__(
Expand All @@ -103,6 +117,7 @@ def __init__(
init_function=None,
cwd=None,
executor=None,
hostname_localhost=False,
):
super().__init__()
self._process = RaisingThread(
Expand All @@ -112,6 +127,7 @@ def __init__(
"future_queue": self._future_queue,
"cores": cores,
"interface_class": FluxPythonInterface,
"hostname_localhost": hostname_localhost,
# Interface Arguments
"threads_per_core": threads_per_core,
"gpus_per_core": gpus_per_task,
Expand Down
21 changes: 18 additions & 3 deletions pympipool/mpi/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,13 @@ class PyMPIExecutor(ExecutorBase):
oversubscribe (bool): adds the `--oversubscribe` command line flag (OpenMPI only) - default False
init_function (None): optional function to preset arguments for functions which are submitted later
cwd (str/None): current working directory where the parallel python task is executed
sleep_interval (float): synchronization interval - default 0.1
hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
context of an HPC cluster this essential to be able to communicate to an
Executor running on a different compute node within the same allocation. And
in principle any computer should be able to resolve that their own hostname
points to the same address as localhost. Still MacOS >= 12 seems to disable
this look up for security reasons. So on MacOS it is required to set this
option to true
Examples:
Expand Down Expand Up @@ -52,7 +58,7 @@ def __init__(
oversubscribe=False,
init_function=None,
cwd=None,
sleep_interval=0.1,
hostname_localhost=False,
):
super().__init__()
self._process = RaisingThread(
Expand All @@ -61,8 +67,8 @@ def __init__(
# Broker Arguments
"future_queue": self._future_queue,
"max_workers": max_workers,
"sleep_interval": sleep_interval,
"executor_class": PyMPISingleTaskExecutor,
"hostname_localhost": hostname_localhost,
# Executor Arguments
"cores": cores_per_worker,
"oversubscribe": oversubscribe,
Expand All @@ -82,6 +88,13 @@ class PyMPISingleTaskExecutor(ExecutorBase):
oversubscribe (bool): adds the `--oversubscribe` command line flag (OpenMPI only) - default False
init_function (None): optional function to preset arguments for functions which are submitted later
cwd (str/None): current working directory where the parallel python task is executed
hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
context of an HPC cluster this essential to be able to communicate to an
Executor running on a different compute node within the same allocation. And
in principle any computer should be able to resolve that their own hostname
points to the same address as localhost. Still MacOS >= 12 seems to disable
this look up for security reasons. So on MacOS it is required to set this
option to true
"""

Expand All @@ -91,6 +104,7 @@ def __init__(
oversubscribe=False,
init_function=None,
cwd=None,
hostname_localhost=False,
):
super().__init__()
self._process = RaisingThread(
Expand All @@ -103,6 +117,7 @@ def __init__(
# Interface Arguments
"cwd": cwd,
"oversubscribe": oversubscribe,
"hostname_localhost": hostname_localhost,
},
)
self._process.start()
Expand Down
28 changes: 24 additions & 4 deletions pympipool/shared/communication.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,11 +98,31 @@ def __del__(self):
def interface_bootup(
command_lst,
connections,
hostname_localhost=False,
):
command_lst += [
"--host",
gethostname(),
]
"""
Start interface for ZMQ communication
Args:
command_lst (list): List of commands as strings
connections (pympipool.shared.interface.BaseInterface): Interface to start parallel process, like MPI, SLURM or
Flux
hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
context of an HPC cluster this essential to be able to communicate to an
Executor running on a different compute node within the same allocation. And
in principle any computer should be able to resolve that their own hostname
points to the same address as localhost. Still MacOS >= 12 seems to disable
this look up for security reasons. So on MacOS it is required to set this
option to true
Returns:
pympipool.shared.communication.SocketInterface: socket interface for zmq communication
"""
if not hostname_localhost:
command_lst += [
"--host",
gethostname(),
]
interface = SocketInterface(interface=connections)
command_lst += [
"--zmqport",
Expand Down
28 changes: 16 additions & 12 deletions pympipool/shared/executorbase.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import os
import queue
import sys
from time import sleep

import cloudpickle

Expand Down Expand Up @@ -119,6 +118,7 @@ def execute_parallel_tasks(
future_queue,
cores,
interface_class,
hostname_localhost=False,
**kwargs,
):
"""
Expand All @@ -128,11 +128,19 @@ def execute_parallel_tasks(
future_queue (queue.Queue): task queue of dictionary objects which are submitted to the parallel process
cores (int): defines the total number of MPI ranks to use
interface_class:
hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
context of an HPC cluster this essential to be able to communicate to an
Executor running on a different compute node within the same allocation. And
in principle any computer should be able to resolve that their own hostname
points to the same address as localhost. Still MacOS >= 12 seems to disable
this look up for security reasons. So on MacOS it is required to set this
option to true
"""
execute_parallel_tasks_loop(
interface=interface_bootup(
command_lst=_get_backend_path(cores=cores),
connections=interface_class(cores=cores, **kwargs),
hostname_localhost=hostname_localhost,
),
future_queue=future_queue,
)
Expand Down Expand Up @@ -167,7 +175,6 @@ def executor_broker(
future_queue,
max_workers,
executor_class,
sleep_interval=0.1,
**kwargs,
):
meta_future_lst = _get_executor_dict(
Expand All @@ -176,17 +183,14 @@ def executor_broker(
**kwargs,
)
while True:
try:
task_dict = future_queue.get_nowait()
except queue.Empty:
sleep(sleep_interval)
if execute_task_dict(
task_dict=future_queue.get(), meta_future_lst=meta_future_lst
):
future_queue.task_done()
else:
if execute_task_dict(task_dict=task_dict, meta_future_lst=meta_future_lst):
future_queue.task_done()
else:
future_queue.task_done()
future_queue.join()
break
future_queue.task_done()
future_queue.join()
break


def execute_task_dict(task_dict, meta_future_lst):
Expand Down
Loading

0 comments on commit c6f065e

Please sign in to comment.