From 82b5b7cb8caa8e3b5b5e3c0e565d007832823a01 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Fri, 4 Aug 2023 19:06:41 -0600 Subject: [PATCH 001/134] Use test discover --- .github/workflows/unittest-mpich.yml | 2 +- .github/workflows/unittest-openmpi.yml | 2 +- .github/workflows/unittest-win.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/unittest-mpich.yml b/.github/workflows/unittest-mpich.yml index 5268df86..0189ead1 100644 --- a/.github/workflows/unittest-mpich.yml +++ b/.github/workflows/unittest-mpich.yml @@ -57,4 +57,4 @@ jobs: - name: Test shell: bash -l {0} timeout-minutes: 5 - run: for f in $(ls tests/test_*.py); do echo $f; python -m unittest $f; done + run: python -m unittest discover tests diff --git a/.github/workflows/unittest-openmpi.yml b/.github/workflows/unittest-openmpi.yml index 7a99f59d..178f7443 100644 --- a/.github/workflows/unittest-openmpi.yml +++ b/.github/workflows/unittest-openmpi.yml @@ -57,7 +57,7 @@ jobs: - name: Test shell: bash -l {0} timeout-minutes: 5 - run: for f in $(ls tests/test_*.py); do echo $f; python -m unittest $f; done + run: python -m unittest discover tests env: OMPI_MCA_plm: 'isolated' OMPI_MCA_rmaps_base_oversubscribe: 'yes' diff --git a/.github/workflows/unittest-win.yml b/.github/workflows/unittest-win.yml index 6d196e9c..5467292f 100644 --- a/.github/workflows/unittest-win.yml +++ b/.github/workflows/unittest-win.yml @@ -34,4 +34,4 @@ jobs: - name: Test shell: bash -l {0} timeout-minutes: 5 - run: for f in $(ls tests/test_*.py); do echo $f; python -m unittest $f; done + run: python -m unittest discover tests From 2a86aebebf74be0aba3b77672e5790718e1a2726 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 4 Oct 2023 13:01:37 +0000 Subject: [PATCH 002/134] Bump mpi4py from 3.1.4 to 3.1.5 Bumps [mpi4py](https://github.com/mpi4py/mpi4py) from 3.1.4 to 3.1.5. - [Release notes](https://github.com/mpi4py/mpi4py/releases) - [Changelog](https://github.com/mpi4py/mpi4py/blob/master/CHANGES.rst) - [Commits](https://github.com/mpi4py/mpi4py/compare/3.1.4...3.1.5) --- updated-dependencies: - dependency-name: mpi4py dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 7f02c491..9331b686 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ packages=find_packages(exclude=["*tests*", "*.ci_support*"]), install_requires=[ 'cloudpickle==2.2.1', - 'mpi4py==3.1.4', + 'mpi4py==3.1.5', 'tqdm==4.66.1', 'pyzmq==24.0.1', ], From 595902ff541d67f505ce3ca6e117453fe40dd8b7 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Wed, 4 Oct 2023 15:49:59 +0200 Subject: [PATCH 003/134] Update environment-mpich.yml --- .ci_support/environment-mpich.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci_support/environment-mpich.yml b/.ci_support/environment-mpich.yml index 46e3b286..e5f40b21 100644 --- a/.ci_support/environment-mpich.yml +++ b/.ci_support/environment-mpich.yml @@ -5,6 +5,6 @@ dependencies: - numpy - mpich - cloudpickle =2.2.1 -- mpi4py =3.1.4 +- mpi4py =3.1.5 - tqdm =4.66.1 - pyzmq =24.0.1 From af70ceb88beffda7eacfd98c05f06a904c110fd3 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Wed, 4 Oct 2023 15:50:10 +0200 Subject: [PATCH 004/134] Update environment-openmpi.yml --- .ci_support/environment-openmpi.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci_support/environment-openmpi.yml b/.ci_support/environment-openmpi.yml index e4a2f862..7cb87df5 100644 --- a/.ci_support/environment-openmpi.yml +++ b/.ci_support/environment-openmpi.yml @@ -5,6 +5,6 @@ dependencies: - numpy - openmpi - cloudpickle =2.2.1 -- mpi4py =3.1.4 +- mpi4py =3.1.5 - tqdm =4.66.1 - pyzmq =24.0.1 From 16a3e724f8eed9e68348277c5ab82b9ab1b87631 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Wed, 4 Oct 2023 15:50:21 +0200 Subject: [PATCH 005/134] Update environment-win.yml --- .ci_support/environment-win.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci_support/environment-win.yml b/.ci_support/environment-win.yml index a7356492..6b93f026 100644 --- a/.ci_support/environment-win.yml +++ b/.ci_support/environment-win.yml @@ -5,6 +5,6 @@ dependencies: - numpy - msmpi - cloudpickle =2.2.1 -- mpi4py =3.1.4 +- mpi4py =3.1.5 - tqdm =4.66.1 - pyzmq =24.0.1 From de2fad24fabc836f0f581e01458a2ab49dae4347 Mon Sep 17 00:00:00 2001 From: liamhuber Date: Thu, 12 Oct 2023 09:17:29 -0700 Subject: [PATCH 006/134] Add gitignore --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..e3649bcf --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +*.pyc +.DS_Store +.coverage +.idea/ From d0c6676c361ec3b5846ed4fb44f9d4dfcb07d495 Mon Sep 17 00:00:00 2001 From: liamhuber Date: Thu, 12 Oct 2023 09:53:05 -0700 Subject: [PATCH 007/134] Reproduce and split up pyiron_workflow executor tests --- tests/unpicklable_elements/__init__.py | 21 ++++++ tests/unpicklable_elements/test_args.py | 69 ++++++++++++++++++ tests/unpicklable_elements/test_callable.py | 74 ++++++++++++++++++++ tests/unpicklable_elements/test_exception.py | 55 +++++++++++++++ tests/unpicklable_elements/test_return.py | 74 ++++++++++++++++++++ tests/unpicklable_elements/test_timeout.py | 67 ++++++++++++++++++ 6 files changed, 360 insertions(+) create mode 100644 tests/unpicklable_elements/__init__.py create mode 100644 tests/unpicklable_elements/test_args.py create mode 100644 tests/unpicklable_elements/test_callable.py create mode 100644 tests/unpicklable_elements/test_exception.py create mode 100644 tests/unpicklable_elements/test_return.py create mode 100644 tests/unpicklable_elements/test_timeout.py diff --git a/tests/unpicklable_elements/__init__.py b/tests/unpicklable_elements/__init__.py new file mode 100644 index 00000000..7411989d --- /dev/null +++ b/tests/unpicklable_elements/__init__.py @@ -0,0 +1,21 @@ +""" +`pympipool` should be able to handle the case where _no_ elements of the execution can +be pickled with the traditional `pickle` module but rather require `cloudpickle`. + +This is particularly important for compatibility with `pyiron_workflow`, which +dynamically defines (unpickleable) all sorts of objects. + +Currently, `pyiron_workflow` defines its own executor, +`pyiron_workflow.executors.CloudPickleProcessPool`, which can handle these unpickleable + things, but is otherwise very primitive compared to + `pympipool.mpi.executor.PyMPISingleTaskExecutor`. + +Simply replacing `CloudPickleProcessPool` with `PyMPISingleTaskExecutor` in the +`pyiron_atomistics` tests mostly works OK, and work perfectly when the tests are ported +to a notebook, but some tests hang indefinitely on CI and running unittests locally. + +To debug this, we break the tests up into their individual components (so hanging +doesn't stop us from seeing the results of other tests). Once everything is running, +these can be re-condensed into a single test file and this entire subdirectory can be +deleted. +""" diff --git a/tests/unpicklable_elements/test_args.py b/tests/unpicklable_elements/test_args.py new file mode 100644 index 00000000..241c4e53 --- /dev/null +++ b/tests/unpicklable_elements/test_args.py @@ -0,0 +1,69 @@ +from functools import partialmethod +from time import sleep +import unittest + +from pympipool.mpi.executor import PyMPISingleTaskExecutor + + +class Foo: + """ + A base class to be dynamically modified for testing CloudpickleProcessPoolExecutor. + """ + def __init__(self, fnc: callable): + self.fnc = fnc + self.result = None + + @property + def run(self): + return self.fnc + + def process_result(self, future): + self.result = future.result() + + +def dynamic_foo(): + """ + A decorator for dynamically modifying the Foo class to test + CloudpickleProcessPoolExecutor. + + Overrides the `fnc` input of `Foo` with the decorated function. + """ + def as_dynamic_foo(fnc: callable): + return type( + "DynamicFoo", + (Foo,), # Define parentage + { + "__init__": partialmethod( + Foo.__init__, + fnc + ) + }, + ) + + return as_dynamic_foo + +class TestUnpickleableElements(unittest.TestCase): + def test_unpickleable_args(self): + """ + We should be able to use an unpickleable return value -- in this case, a + method of a dynamically defined class. + """ + + @dynamic_foo() + def does_nothing(): + return + + @dynamic_foo() + def slowly_returns_unpickleable(unpickleable_arg): + """ + Returns a complex, dynamically defined variable + """ + sleep(0.1) + unpickleable_arg.result = "input updated" + return unpickleable_arg + + dynamic_dynamic = slowly_returns_unpickleable() + executor = PyMPISingleTaskExecutor() + unpicklable_object = does_nothing() + fs = executor.submit(dynamic_dynamic.run, unpicklable_object) + self.assertEqual(fs.result().result, "input updated") diff --git a/tests/unpicklable_elements/test_callable.py b/tests/unpicklable_elements/test_callable.py new file mode 100644 index 00000000..3bbb2ed8 --- /dev/null +++ b/tests/unpicklable_elements/test_callable.py @@ -0,0 +1,74 @@ +from functools import partialmethod +from time import sleep +import unittest + +from pympipool.mpi.executor import PyMPISingleTaskExecutor + + +class Foo: + """ + A base class to be dynamically modified for testing CloudpickleProcessPoolExecutor. + """ + def __init__(self, fnc: callable): + self.fnc = fnc + self.result = None + + @property + def run(self): + return self.fnc + + def process_result(self, future): + self.result = future.result() + + +def dynamic_foo(): + """ + A decorator for dynamically modifying the Foo class to test + CloudpickleProcessPoolExecutor. + + Overrides the `fnc` input of `Foo` with the decorated function. + """ + def as_dynamic_foo(fnc: callable): + return type( + "DynamicFoo", + (Foo,), # Define parentage + { + "__init__": partialmethod( + Foo.__init__, + fnc + ) + }, + ) + + return as_dynamic_foo + + +class TestUnpickleableElements(unittest.TestCase): + def test_unpickleable_callable(self): + """ + We should be able to use an unpickleable callable -- in this case, a method of + a dynamically defined class. + """ + fortytwo = 42 # No magic numbers; we use it in a couple places so give it a var + + @dynamic_foo() + def slowly_returns_42(): + sleep(0.1) + return fortytwo + + dynamic_42 = slowly_returns_42() # Instantiate the dynamically defined class + self.assertIsInstance( + dynamic_42, + Foo, + msg="Just a sanity check that the test is set up right" + ) + self.assertIsNone( + dynamic_42.result, + msg="Just a sanity check that the test is set up right" + ) + executor = PyMPISingleTaskExecutor() + fs = executor.submit(dynamic_42.run) + fs.add_done_callback(dynamic_42.process_result) + self.assertFalse(fs.done(), msg="Should be running on the executor") + self.assertEqual(fortytwo, fs.result(), msg="Future must complete") + self.assertEqual(fortytwo, dynamic_42.result, msg="Callback must get called") \ No newline at end of file diff --git a/tests/unpicklable_elements/test_exception.py b/tests/unpicklable_elements/test_exception.py new file mode 100644 index 00000000..fe0a33f6 --- /dev/null +++ b/tests/unpicklable_elements/test_exception.py @@ -0,0 +1,55 @@ +from functools import partialmethod +import unittest + +from pympipool.mpi.executor import PyMPISingleTaskExecutor + + +class Foo: + """ + A base class to be dynamically modified for testing CloudpickleProcessPoolExecutor. + """ + def __init__(self, fnc: callable): + self.fnc = fnc + self.result = None + + @property + def run(self): + return self.fnc + + def process_result(self, future): + self.result = future.result() + + +def dynamic_foo(): + """ + A decorator for dynamically modifying the Foo class to test + CloudpickleProcessPoolExecutor. + + Overrides the `fnc` input of `Foo` with the decorated function. + """ + def as_dynamic_foo(fnc: callable): + return type( + "DynamicFoo", + (Foo,), # Define parentage + { + "__init__": partialmethod( + Foo.__init__, + fnc + ) + }, + ) + + return as_dynamic_foo + + +class TestUnpickleableElements(unittest.TestCase): + def test_exception(self): + @dynamic_foo() + def raise_error(): + raise RuntimeError + + re = raise_error() + executor = PyMPISingleTaskExecutor() + fs = executor.submit(re.run) + with self.assertRaises(RuntimeError): + fs.result() diff --git a/tests/unpicklable_elements/test_return.py b/tests/unpicklable_elements/test_return.py new file mode 100644 index 00000000..b54dd066 --- /dev/null +++ b/tests/unpicklable_elements/test_return.py @@ -0,0 +1,74 @@ +from functools import partialmethod +from time import sleep +import unittest + +from pympipool.mpi.executor import PyMPISingleTaskExecutor + + +class Foo: + """ + A base class to be dynamically modified for testing CloudpickleProcessPoolExecutor. + """ + def __init__(self, fnc: callable): + self.fnc = fnc + self.result = None + + @property + def run(self): + return self.fnc + + def process_result(self, future): + self.result = future.result() + + +def dynamic_foo(): + """ + A decorator for dynamically modifying the Foo class to test + CloudpickleProcessPoolExecutor. + + Overrides the `fnc` input of `Foo` with the decorated function. + """ + def as_dynamic_foo(fnc: callable): + return type( + "DynamicFoo", + (Foo,), # Define parentage + { + "__init__": partialmethod( + Foo.__init__, + fnc + ) + }, + ) + + return as_dynamic_foo + + +class TestUnpickleableElements(unittest.TestCase): + def test_unpickleable_return(self): + """ + We should be able to use an unpickleable return value -- in this case, a + method of a dynamically defined class. + """ + + @dynamic_foo() + def does_nothing(): + return + + @dynamic_foo() + def slowly_returns_unpickleable(): + """ + Returns a complex, dynamically defined variable + """ + sleep(0.1) + inside_variable = does_nothing() + inside_variable.result = "it was an inside job!" + return inside_variable + + dynamic_dynamic = slowly_returns_unpickleable() + executor = PyMPISingleTaskExecutor() + fs = executor.submit(dynamic_dynamic.run) + self.assertIsInstance( + fs.result(), + Foo, + ) + self.assertEqual(fs.result().result, "it was an inside job!") \ No newline at end of file diff --git a/tests/unpicklable_elements/test_timeout.py b/tests/unpicklable_elements/test_timeout.py new file mode 100644 index 00000000..3a0c2e29 --- /dev/null +++ b/tests/unpicklable_elements/test_timeout.py @@ -0,0 +1,67 @@ +from functools import partialmethod +from concurrent.futures import TimeoutError +from time import sleep +import unittest + +from pympipool.mpi.executor import PyMPISingleTaskExecutor + + +class Foo: + """ + A base class to be dynamically modified for testing CloudpickleProcessPoolExecutor. + """ + def __init__(self, fnc: callable): + self.fnc = fnc + self.result = None + + @property + def run(self): + return self.fnc + + def process_result(self, future): + self.result = future.result() + + +def dynamic_foo(): + """ + A decorator for dynamically modifying the Foo class to test + CloudpickleProcessPoolExecutor. + + Overrides the `fnc` input of `Foo` with the decorated function. + """ + def as_dynamic_foo(fnc: callable): + return type( + "DynamicFoo", + (Foo,), # Define parentage + { + "__init__": partialmethod( + Foo.__init__, + fnc + ) + }, + ) + + return as_dynamic_foo + + +class TestUnpickleableElements(unittest.TestCase): + def test_timeout(self): + fortytwo = 42 + + @dynamic_foo() + def slow(): + sleep(0.1) + return fortytwo + + f = slow() + executor = PyMPISingleTaskExecutor() + fs = executor.submit(f.run) + self.assertEqual( + fs.result(timeout=30), + fortytwo, + msg="waiting long enough should get the result" + ) + + with self.assertRaises(TimeoutError): + fs = executor.submit(f.run) + fs.result(timeout=0.0001) From 17285246976f7dafa02d3eab9dee2a15a3587f64 Mon Sep 17 00:00:00 2001 From: liamhuber Date: Thu, 12 Oct 2023 10:47:20 -0700 Subject: [PATCH 008/134] Flatten the tests so the CI command can actually see them --- tests/{unpicklable_elements => }/test_args.py | 0 .../test_callable.py | 0 .../test_exception.py | 0 .../{unpicklable_elements => }/test_return.py | 0 .../test_timeout.py | 0 tests/unpicklable_elements/__init__.py | 21 ------------------- 6 files changed, 21 deletions(-) rename tests/{unpicklable_elements => }/test_args.py (100%) rename tests/{unpicklable_elements => }/test_callable.py (100%) rename tests/{unpicklable_elements => }/test_exception.py (100%) rename tests/{unpicklable_elements => }/test_return.py (100%) rename tests/{unpicklable_elements => }/test_timeout.py (100%) delete mode 100644 tests/unpicklable_elements/__init__.py diff --git a/tests/unpicklable_elements/test_args.py b/tests/test_args.py similarity index 100% rename from tests/unpicklable_elements/test_args.py rename to tests/test_args.py diff --git a/tests/unpicklable_elements/test_callable.py b/tests/test_callable.py similarity index 100% rename from tests/unpicklable_elements/test_callable.py rename to tests/test_callable.py diff --git a/tests/unpicklable_elements/test_exception.py b/tests/test_exception.py similarity index 100% rename from tests/unpicklable_elements/test_exception.py rename to tests/test_exception.py diff --git a/tests/unpicklable_elements/test_return.py b/tests/test_return.py similarity index 100% rename from tests/unpicklable_elements/test_return.py rename to tests/test_return.py diff --git a/tests/unpicklable_elements/test_timeout.py b/tests/test_timeout.py similarity index 100% rename from tests/unpicklable_elements/test_timeout.py rename to tests/test_timeout.py diff --git a/tests/unpicklable_elements/__init__.py b/tests/unpicklable_elements/__init__.py deleted file mode 100644 index 7411989d..00000000 --- a/tests/unpicklable_elements/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -""" -`pympipool` should be able to handle the case where _no_ elements of the execution can -be pickled with the traditional `pickle` module but rather require `cloudpickle`. - -This is particularly important for compatibility with `pyiron_workflow`, which -dynamically defines (unpickleable) all sorts of objects. - -Currently, `pyiron_workflow` defines its own executor, -`pyiron_workflow.executors.CloudPickleProcessPool`, which can handle these unpickleable - things, but is otherwise very primitive compared to - `pympipool.mpi.executor.PyMPISingleTaskExecutor`. - -Simply replacing `CloudPickleProcessPool` with `PyMPISingleTaskExecutor` in the -`pyiron_atomistics` tests mostly works OK, and work perfectly when the tests are ported -to a notebook, but some tests hang indefinitely on CI and running unittests locally. - -To debug this, we break the tests up into their individual components (so hanging -doesn't stop us from seeing the results of other tests). Once everything is running, -these can be re-condensed into a single test file and this entire subdirectory can be -deleted. -""" From b4d60c6639cd273981bec0c48320966b7c9fc65b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 16 Oct 2023 13:00:38 +0000 Subject: [PATCH 009/134] Bump cloudpickle from 2.2.1 to 3.0.0 Bumps [cloudpickle](https://github.com/cloudpipe/cloudpickle) from 2.2.1 to 3.0.0. - [Release notes](https://github.com/cloudpipe/cloudpickle/releases) - [Changelog](https://github.com/cloudpipe/cloudpickle/blob/master/CHANGES.md) - [Commits](https://github.com/cloudpipe/cloudpickle/compare/v2.2.1...3.0.0) --- updated-dependencies: - dependency-name: cloudpickle dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 9ee0f78d..fd740c24 100644 --- a/setup.py +++ b/setup.py @@ -28,7 +28,7 @@ ], packages=find_packages(exclude=["*tests*", "*.ci_support*"]), install_requires=[ - 'cloudpickle==2.2.1', + 'cloudpickle==3.0.0', 'mpi4py==3.1.4', 'tqdm==4.66.1', 'pyzmq==25.1.1', From 77a424a338ea7f512fe1abf69c00865cbc660b29 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Mon, 16 Oct 2023 15:15:22 +0200 Subject: [PATCH 010/134] Update environment-mpich.yml --- .ci_support/environment-mpich.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci_support/environment-mpich.yml b/.ci_support/environment-mpich.yml index cba47606..e4d97ded 100644 --- a/.ci_support/environment-mpich.yml +++ b/.ci_support/environment-mpich.yml @@ -4,7 +4,7 @@ dependencies: - python - numpy - mpich -- cloudpickle =2.2.1 +- cloudpickle =3.0.0 - mpi4py =3.1.4 - tqdm =4.66.1 - pyzmq =25.1.1 From f96c33b245f04eadad95e7a9e005b9bb55f7082d Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Mon, 16 Oct 2023 15:15:38 +0200 Subject: [PATCH 011/134] Update environment-openmpi.yml --- .ci_support/environment-openmpi.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci_support/environment-openmpi.yml b/.ci_support/environment-openmpi.yml index 3dcad16c..996a6d95 100644 --- a/.ci_support/environment-openmpi.yml +++ b/.ci_support/environment-openmpi.yml @@ -4,7 +4,7 @@ dependencies: - python - numpy - openmpi -- cloudpickle =2.2.1 +- cloudpickle =3.0.0 - mpi4py =3.1.4 - tqdm =4.66.1 - pyzmq =25.1.1 From 8bf2c5583713c58c7e1522d9e687866ad1d13ef3 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Mon, 16 Oct 2023 15:16:22 +0200 Subject: [PATCH 012/134] Update environment-win.yml --- .ci_support/environment-win.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci_support/environment-win.yml b/.ci_support/environment-win.yml index d4ac426c..9e84be7f 100644 --- a/.ci_support/environment-win.yml +++ b/.ci_support/environment-win.yml @@ -4,7 +4,7 @@ dependencies: - python - numpy - msmpi -- cloudpickle =2.2.1 +- cloudpickle =3.0.0 - mpi4py =3.1.4 - tqdm =4.66.1 - pyzmq =25.1.1 From 367b427d0e5ba9bcef91a276bba952db32b51486 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Tue, 17 Oct 2023 15:40:57 +0200 Subject: [PATCH 013/134] Update unittest-openmpi.yml --- .github/workflows/unittest-openmpi.yml | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/.github/workflows/unittest-openmpi.yml b/.github/workflows/unittest-openmpi.yml index 7a99f59d..5b1899ff 100644 --- a/.github/workflows/unittest-openmpi.yml +++ b/.github/workflows/unittest-openmpi.yml @@ -24,21 +24,6 @@ jobs: python-version: '3.11' label: linux-64-py-3-11-openmpi prefix: /usr/share/miniconda3/envs/my-env - - - operating-system: ubuntu-latest - python-version: '3.10' - label: linux-64-py-3-10-openmpi - prefix: /usr/share/miniconda3/envs/my-env - - - operating-system: ubuntu-latest - python-version: 3.9 - label: linux-64-py-3-9-openmpi - prefix: /usr/share/miniconda3/envs/my-env - - - operating-system: ubuntu-latest - python-version: 3.8 - label: linux-64-py-3-8-openmpi - prefix: /usr/share/miniconda3/envs/my-env steps: - uses: actions/checkout@v2 From ab86f2b571ebd9576f7885733a69e7282ed78dee Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Thu, 19 Oct 2023 14:15:06 +0200 Subject: [PATCH 014/134] Update setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index fd740c24..d8f33265 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ setup( name='pympipool', version=versioneer.get_version(), - description='pympipool - scale python functions over multiple compute nodes', + description='pympipool - Scale serial and MPI-parallel python functions over hundreds of compute nodes all from within a jupyter notebook or serial python process.', long_description=Path("README.md").read_text(), long_description_content_type='text/markdown', url='https://github.com/jan-janssen/pympipool', From 6c3a1c4293126232e4f9a21a981490007d410b75 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Thu, 19 Oct 2023 14:31:29 +0200 Subject: [PATCH 015/134] Update unittest-openmpi.yml --- .github/workflows/unittest-openmpi.yml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/.github/workflows/unittest-openmpi.yml b/.github/workflows/unittest-openmpi.yml index 5b1899ff..616e4d74 100644 --- a/.github/workflows/unittest-openmpi.yml +++ b/.github/workflows/unittest-openmpi.yml @@ -25,6 +25,21 @@ jobs: label: linux-64-py-3-11-openmpi prefix: /usr/share/miniconda3/envs/my-env + - operating-system: ubuntu-latest + python-version: '3.10' + label: linux-64-py-3-10-openmpi + prefix: /usr/share/miniconda3/envs/my-env + + - operating-system: ubuntu-latest + python-version: 3.9 + label: linux-64-py-3-9-openmpi + prefix: /usr/share/miniconda3/envs/my-env + + - operating-system: ubuntu-latest + python-version: 3.8 + label: linux-64-py-3-8-openmpi + prefix: /usr/share/miniconda3/envs/my-env + steps: - uses: actions/checkout@v2 - uses: conda-incubator/setup-miniconda@v2.2.0 From 6d265452f076706001e937bb56e4ac6d2a1f4eea Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Tue, 31 Oct 2023 11:26:48 +0100 Subject: [PATCH 016/134] Split Slurm and MPI interface --- pympipool/mpi/executor.py | 59 +---------------- pympipool/shared/interface.py | 31 +++++---- pympipool/slurm/__init__.py | 1 + pympipool/slurm/executor.py | 119 ++++++++++++++++++++++++++++++++++ 4 files changed, 141 insertions(+), 69 deletions(-) create mode 100644 pympipool/slurm/__init__.py create mode 100644 pympipool/slurm/executor.py diff --git a/pympipool/mpi/executor.py b/pympipool/mpi/executor.py index 9aac7289..5d137789 100644 --- a/pympipool/mpi/executor.py +++ b/pympipool/mpi/executor.py @@ -4,7 +4,7 @@ ExecutorBase, executor_broker, ) -from pympipool.shared.interface import MpiExecInterface, SlurmSubprocessInterface +from pympipool.shared.interface import MpiExecInterface from pympipool.shared.thread import RaisingThread @@ -13,39 +13,22 @@ class PyMPIExecutor(ExecutorBase): Args: max_workers (int): defines the number workers which can execute functions in parallel cores_per_worker (int): number of MPI cores to be used for each function call - threads_per_core (int): number of OpenMP threads to be used for each function call - gpus_per_worker (int): number of GPUs per worker - defaults to 0 oversubscribe (bool): adds the `--oversubscribe` command line flag (OpenMPI only) - default False init_function (None): optional function to preset arguments for functions which are submitted later cwd (str/None): current working directory where the parallel python task is executed sleep_interval (float): synchronization interval - default 0.1 - enable_slurm_backend (bool): enable the SLURM queueing system as backend - defaults to False """ def __init__( self, max_workers, cores_per_worker=1, - threads_per_core=1, - gpus_per_worker=0, oversubscribe=False, init_function=None, cwd=None, sleep_interval=0.1, - enable_slurm_backend=False, ): super().__init__() - if not enable_slurm_backend: - if threads_per_core != 1: - raise ValueError( - "The MPI backend only supports threads_per_core=1, " - + "to manage threads use the SLURM queuing system enable_slurm_backend=True ." - ) - elif gpus_per_worker != 0: - raise ValueError( - "The MPI backend only supports gpus_per_core=0, " - + "to manage GPUs use the SLURM queuing system enable_slurm_backend=True ." - ) self._process = RaisingThread( target=executor_broker, kwargs={ @@ -56,12 +39,9 @@ def __init__( "executor_class": PyMPISingleTaskExecutor, # Executor Arguments "cores": cores_per_worker, - "threads_per_core": threads_per_core, - "gpus_per_task": int(gpus_per_worker / cores_per_worker), "oversubscribe": oversubscribe, "init_function": init_function, "cwd": cwd, - "enable_slurm_backend": enable_slurm_backend, }, ) self._process.start() @@ -77,12 +57,9 @@ class PyMPISingleTaskExecutor(ExecutorBase): Args: cores (int): defines the number of MPI ranks to use for each function call - threads_per_core (int): number of OpenMP threads to be used for each function call - gpus_per_task (int): number of GPUs per MPI rank - defaults to 0 oversubscribe (bool): adds the `--oversubscribe` command line flag (OpenMPI only) - default False init_function (None): optional function to preset arguments for functions which are submitted later cwd (str/None): current working directory where the parallel python task is executed - enable_slurm_backend (bool): enable the SLURM queueing system as backend - defaults to False Examples: ``` @@ -108,12 +85,9 @@ class PyMPISingleTaskExecutor(ExecutorBase): def __init__( self, cores=1, - threads_per_core=1, - gpus_per_task=0, oversubscribe=False, init_function=None, cwd=None, - enable_slurm_backend=False, ): super().__init__() self._process = RaisingThread( @@ -122,41 +96,12 @@ def __init__( # Executor Arguments "future_queue": self._future_queue, "cores": cores, - "interface_class": get_interface, + "interface_class": MpiExecInterface, # Interface Arguments - "threads_per_core": threads_per_core, - "gpus_per_core": gpus_per_task, "cwd": cwd, "oversubscribe": oversubscribe, - "enable_slurm_backend": enable_slurm_backend, }, ) self._process.start() self._set_init_function(init_function=init_function) cloudpickle_register(ind=3) - - -def get_interface( - cores=1, - threads_per_core=1, - gpus_per_core=0, - cwd=None, - oversubscribe=False, - enable_slurm_backend=False, -): - if not enable_slurm_backend: - return MpiExecInterface( - cwd=cwd, - cores=cores, - threads_per_core=threads_per_core, - gpus_per_core=gpus_per_core, - oversubscribe=oversubscribe, - ) - else: - return SlurmSubprocessInterface( - cwd=cwd, - cores=cores, - threads_per_core=threads_per_core, - gpus_per_core=gpus_per_core, - oversubscribe=oversubscribe, - ) diff --git a/pympipool/shared/interface.py b/pympipool/shared/interface.py index 8885cb36..147ca578 100644 --- a/pympipool/shared/interface.py +++ b/pympipool/shared/interface.py @@ -4,12 +4,10 @@ class BaseInterface(ABC): def __init__( - self, cwd, cores=1, threads_per_core=1, gpus_per_core=0, oversubscribe=False + self, cwd, cores=1, oversubscribe=False ): self._cwd = cwd self._cores = cores - self._threads_per_core = threads_per_core - self._gpus_per_core = gpus_per_core self._oversubscribe = oversubscribe def bootup(self, command_lst): @@ -27,15 +25,11 @@ def __init__( self, cwd=None, cores=1, - threads_per_core=1, - gpus_per_core=0, oversubscribe=False, ): super().__init__( cwd=cwd, cores=cores, - threads_per_core=threads_per_core, - gpus_per_core=gpus_per_core, oversubscribe=oversubscribe, ) self._process = None @@ -63,7 +57,6 @@ class MpiExecInterface(SubprocessInterface): def generate_command(self, command_lst): command_prepend_lst = generate_mpiexec_command( cores=self._cores, - gpus_per_core=self._gpus_per_core, oversubscribe=self._oversubscribe, ) return super().generate_command( @@ -71,7 +64,23 @@ def generate_command(self, command_lst): ) -class SlurmSubprocessInterface(SubprocessInterface): +class SrunInterface(SubprocessInterface): + def __init__( + self, + cwd=None, + cores=1, + threads_per_core=1, + gpus_per_core=0, + oversubscribe=False, + ): + super().__init__( + cwd=cwd, + cores=cores, + oversubscribe=oversubscribe, + ) + self._threads_per_core = threads_per_core + self._gpus_per_core = gpus_per_core + def generate_command(self, command_lst): command_prepend_lst = generate_slurm_command( cores=self._cores, @@ -85,12 +94,10 @@ def generate_command(self, command_lst): ) -def generate_mpiexec_command(cores, gpus_per_core=0, oversubscribe=False): +def generate_mpiexec_command(cores, oversubscribe=False): command_prepend_lst = ["mpiexec", "-n", str(cores)] if oversubscribe: command_prepend_lst += ["--oversubscribe"] - if gpus_per_core > 0: - raise ValueError() return command_prepend_lst diff --git a/pympipool/slurm/__init__.py b/pympipool/slurm/__init__.py new file mode 100644 index 00000000..d9534d99 --- /dev/null +++ b/pympipool/slurm/__init__.py @@ -0,0 +1 @@ +from pympipool.slurm.executor import PySlurmExecutor \ No newline at end of file diff --git a/pympipool/slurm/executor.py b/pympipool/slurm/executor.py new file mode 100644 index 00000000..2adf0264 --- /dev/null +++ b/pympipool/slurm/executor.py @@ -0,0 +1,119 @@ +from pympipool.shared.executorbase import ( + cloudpickle_register, + execute_parallel_tasks, + ExecutorBase, + executor_broker, +) +from pympipool.shared.interface import SrunInterface +from pympipool.shared.thread import RaisingThread + + +class PySlurmExecutor(ExecutorBase): + """ + Args: + max_workers (int): defines the number workers which can execute functions in parallel + cores_per_worker (int): number of MPI cores to be used for each function call + threads_per_core (int): number of OpenMP threads to be used for each function call + gpus_per_worker (int): number of GPUs per worker - defaults to 0 + oversubscribe (bool): adds the `--oversubscribe` command line flag (OpenMPI only) - default False + init_function (None): optional function to preset arguments for functions which are submitted later + cwd (str/None): current working directory where the parallel python task is executed + sleep_interval (float): synchronization interval - default 0.1 + """ + + def __init__( + self, + max_workers, + cores_per_worker=1, + threads_per_core=1, + gpus_per_worker=0, + oversubscribe=False, + init_function=None, + cwd=None, + sleep_interval=0.1, + ): + super().__init__() + self._process = RaisingThread( + target=executor_broker, + kwargs={ + # Broker Arguments + "future_queue": self._future_queue, + "max_workers": max_workers, + "sleep_interval": sleep_interval, + "executor_class": PySlurmSingleTaskExecutor, + # Executor Arguments + "cores": cores_per_worker, + "threads_per_core": threads_per_core, + "gpus_per_task": int(gpus_per_worker / cores_per_worker), + "oversubscribe": oversubscribe, + "init_function": init_function, + "cwd": cwd, + }, + ) + self._process.start() + + +class PySlurmSingleTaskExecutor(ExecutorBase): + """ + The pympipool.Executor behaves like the concurrent.futures.Executor but it uses mpi4py to execute parallel tasks. + In contrast to the mpi4py.futures.MPIPoolExecutor the pympipool.Executor can be executed in a serial python process + and does not require the python script to be executed with MPI. Still internally the pympipool.Executor uses the + mpi4py.futures.MPIPoolExecutor, consequently it is primarily an abstraction of its functionality to improve the + usability in particular when used in combination with Jupyter notebooks. + + Args: + cores (int): defines the number of MPI ranks to use for each function call + threads_per_core (int): number of OpenMP threads to be used for each function call + gpus_per_task (int): number of GPUs per MPI rank - defaults to 0 + oversubscribe (bool): adds the `--oversubscribe` command line flag (OpenMPI only) - default False + init_function (None): optional function to preset arguments for functions which are submitted later + cwd (str/None): current working directory where the parallel python task is executed + + Examples: + ``` + >>> import numpy as np + >>> from pympipool.mpi.executor import PyMPISingleTaskExecutor + >>> + >>> def calc(i, j, k): + >>> from mpi4py import MPI + >>> size = MPI.COMM_WORLD.Get_size() + >>> rank = MPI.COMM_WORLD.Get_rank() + >>> return np.array([i, j, k]), size, rank + >>> + >>> def init_k(): + >>> return {"k": 3} + >>> + >>> with PyMPISingleTaskExecutor(cores=2, init_function=init_k) as p: + >>> fs = p.submit(calc, 2, j=4) + >>> print(fs.result()) + [(array([2, 4, 3]), 2, 0), (array([2, 4, 3]), 2, 1)] + ``` + """ + + def __init__( + self, + cores=1, + threads_per_core=1, + gpus_per_task=0, + oversubscribe=False, + init_function=None, + cwd=None, + ): + super().__init__() + self._process = RaisingThread( + target=execute_parallel_tasks, + kwargs={ + # Executor Arguments + "future_queue": self._future_queue, + "cores": cores, + "interface_class": SrunInterface, + # Interface Arguments + "threads_per_core": threads_per_core, + "gpus_per_core": gpus_per_task, + "cwd": cwd, + "oversubscribe": oversubscribe, + }, + ) + self._process.start() + self._set_init_function(init_function=init_function) + cloudpickle_register(ind=3) From 8189887381b5b5b51e7c41ec072ec96f03a50ec6 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Tue, 31 Oct 2023 11:30:37 +0100 Subject: [PATCH 017/134] update tests --- tests/test_parse.py | 2 +- tests/test_worker.py | 10 +++++----- tests/test_worker_memory.py | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/test_parse.py b/tests/test_parse.py index 32123162..a94c048d 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -2,7 +2,7 @@ import sys import unittest from pympipool.shared.backend import parse_arguments -from pympipool.shared.interface import SlurmSubprocessInterface, MpiExecInterface +from pympipool.shared.interface import SrunInterface, MpiExecInterface class TestParser(unittest.TestCase): diff --git a/tests/test_worker.py b/tests/test_worker.py index 5bc26747..386fb16f 100644 --- a/tests/test_worker.py +++ b/tests/test_worker.py @@ -3,7 +3,7 @@ from queue import Queue from time import sleep from concurrent.futures import CancelledError -from pympipool.mpi.executor import PyMPISingleTaskExecutor, get_interface +from pympipool.mpi.executor import PyMPISingleTaskExecutor, MpiExecInterface from pympipool.shared.executorbase import cloudpickle_register, execute_parallel_tasks from concurrent.futures import Future @@ -107,7 +107,7 @@ def test_execute_task_failed_no_argument(self): future_queue=q, cores=1, oversubscribe=False, - interface_class=get_interface, + interface_class=MpiExecInterface, ) q.join() @@ -121,7 +121,7 @@ def test_execute_task_failed_wrong_argument(self): future_queue=q, cores=1, oversubscribe=False, - interface_class=get_interface, + interface_class=MpiExecInterface, ) q.join() @@ -135,7 +135,7 @@ def test_execute_task(self): future_queue=q, cores=1, oversubscribe=False, - interface_class=get_interface, + interface_class=MpiExecInterface, ) self.assertEqual(f.result(), np.array(4)) q.join() @@ -150,7 +150,7 @@ def test_execute_task_parallel(self): future_queue=q, cores=2, oversubscribe=False, - interface_class=get_interface, + interface_class=MpiExecInterface, ) self.assertEqual(f.result(), [np.array(4), np.array(4)]) q.join() diff --git a/tests/test_worker_memory.py b/tests/test_worker_memory.py index f8aaa7cb..2d4da738 100644 --- a/tests/test_worker_memory.py +++ b/tests/test_worker_memory.py @@ -3,7 +3,7 @@ from queue import Queue from pympipool.shared.backend import call_funct from pympipool.shared.executorbase import cloudpickle_register, execute_parallel_tasks -from pympipool.mpi.executor import PyMPISingleTaskExecutor, get_interface +from pympipool.mpi.executor import PyMPISingleTaskExecutor, MpiExecInterface from concurrent.futures import Future @@ -43,7 +43,7 @@ def test_execute_task(self): future_queue=q, cores=1, oversubscribe=False, - interface_class=get_interface, + interface_class=MpiExecInterface, ) self.assertEqual(f.result(), np.array([5])) q.join() From c6bf57659e40791435b41c9b73558135de6dd177 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Tue, 31 Oct 2023 11:31:09 +0100 Subject: [PATCH 018/134] black formatting --- pympipool/shared/interface.py | 4 +--- pympipool/slurm/__init__.py | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/pympipool/shared/interface.py b/pympipool/shared/interface.py index 147ca578..54818c79 100644 --- a/pympipool/shared/interface.py +++ b/pympipool/shared/interface.py @@ -3,9 +3,7 @@ class BaseInterface(ABC): - def __init__( - self, cwd, cores=1, oversubscribe=False - ): + def __init__(self, cwd, cores=1, oversubscribe=False): self._cwd = cwd self._cores = cores self._oversubscribe = oversubscribe diff --git a/pympipool/slurm/__init__.py b/pympipool/slurm/__init__.py index d9534d99..ed4aeab3 100644 --- a/pympipool/slurm/__init__.py +++ b/pympipool/slurm/__init__.py @@ -1 +1 @@ -from pympipool.slurm.executor import PySlurmExecutor \ No newline at end of file +from pympipool.slurm.executor import PySlurmExecutor From cf737272571a3e37eb241ed0360cf5f79ced9aa1 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Tue, 31 Oct 2023 11:34:13 +0100 Subject: [PATCH 019/134] Fix import --- pympipool/shared/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pympipool/shared/__init__.py b/pympipool/shared/__init__.py index 73fadf43..2daf2f44 100644 --- a/pympipool/shared/__init__.py +++ b/pympipool/shared/__init__.py @@ -8,4 +8,4 @@ ) from pympipool.shared.executorbase import cancel_items_in_queue from pympipool.shared.thread import RaisingThread -from pympipool.shared.interface import MpiExecInterface, SlurmSubprocessInterface +from pympipool.shared.interface import MpiExecInterface, SrunInterface From faa6643e92edba3f990d755710adead07a85885e Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Tue, 31 Oct 2023 11:36:56 +0100 Subject: [PATCH 020/134] no more GPU support for MPI --- tests/test_interface.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_interface.py b/tests/test_interface.py index 149e5f03..b8cac32e 100644 --- a/tests/test_interface.py +++ b/tests/test_interface.py @@ -18,7 +18,7 @@ def test_interface(self): task_dict = {"fn": calc, "args": (), "kwargs": {"i": 2}} interface = SocketInterface( interface=MpiExecInterface( - cwd=None, cores=1, gpus_per_core=0, oversubscribe=False + cwd=None, cores=1, oversubscribe=False ) ) interface.bootup( From cf7f52250456abf685b2eb1854207a4e931a054d Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Tue, 31 Oct 2023 11:40:17 +0100 Subject: [PATCH 021/134] Error type changes --- tests/test_meta.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_meta.py b/tests/test_meta.py index 643883c8..1bba9a07 100644 --- a/tests/test_meta.py +++ b/tests/test_meta.py @@ -110,7 +110,7 @@ def test_meta_executor_parallel(self): self.assertTrue(fs_1.done()) def test_errors(self): - with self.assertRaises(ValueError): + with self.assertRaises(TypeError): PyMPIExecutor(max_workers=1, cores_per_worker=1, threads_per_core=2) - with self.assertRaises(ValueError): + with self.assertRaises(TypeError): PyMPIExecutor(max_workers=1, cores_per_worker=1, gpus_per_worker=1) From 29ae9103d264a573dc3d3c3e4177c18e5c35a1e2 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Tue, 31 Oct 2023 11:44:11 +0100 Subject: [PATCH 022/134] more fixes --- tests/test_parse.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/tests/test_parse.py b/tests/test_parse.py index a94c048d..eaa953d1 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -22,7 +22,7 @@ def test_command_local(self): result_dict["zmqport"], ] interface = MpiExecInterface( - cwd=None, cores=2, gpus_per_core=0, oversubscribe=True + cwd=None, cores=2, oversubscribe=True ) self.assertEqual( command_lst, @@ -32,13 +32,6 @@ def test_command_local(self): ) self.assertEqual(result_dict, parse_arguments(command_lst)) - def test_mpiexec_gpu(self): - interface = MpiExecInterface( - cwd=os.path.abspath("."), cores=2, gpus_per_core=1, oversubscribe=True - ) - with self.assertRaises(ValueError): - interface.bootup(command_lst=[]) - def test_command_slurm(self): result_dict = { "host": "127.0.0.1", @@ -59,7 +52,7 @@ def test_command_slurm(self): "--zmqport", result_dict["zmqport"], ] - interface = SlurmSubprocessInterface( + interface = SrunInterface( cwd=os.path.abspath("."), cores=2, gpus_per_core=1, oversubscribe=True ) self.assertEqual( From 95206700a4351a4396b4937eebd71eff0919fe48 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Tue, 31 Oct 2023 11:58:05 +0100 Subject: [PATCH 023/134] fixes for flux --- pympipool/flux/executor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pympipool/flux/executor.py b/pympipool/flux/executor.py index 4c8930da..95425118 100644 --- a/pympipool/flux/executor.py +++ b/pympipool/flux/executor.py @@ -136,10 +136,10 @@ def __init__( super().__init__( cwd=cwd, cores=cores, - gpus_per_core=gpus_per_core, - threads_per_core=threads_per_core, oversubscribe=oversubscribe, ) + self._threads_per_core = threads_per_core + self._gpus_per_core = gpus_per_core self._executor = executor self._future = None From eba13b5701069f30547cbc3556a9e081e86c2244 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Tue, 31 Oct 2023 12:17:03 +0100 Subject: [PATCH 024/134] Fix interface shutdown --- pympipool/shared/executorbase.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pympipool/shared/executorbase.py b/pympipool/shared/executorbase.py index c950eae1..cf1a27c5 100644 --- a/pympipool/shared/executorbase.py +++ b/pympipool/shared/executorbase.py @@ -53,6 +53,7 @@ def shutdown(self, wait=True, *, cancel_futures=False): if cancel_futures: cancel_items_in_queue(que=self._future_queue) self._future_queue.put({"shutdown": True, "wait": wait}) + self._future_queue.join() self._process.join() def __len__(self): From 7d7bd99d7113e740adffad5e7b08b18edd05e893 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Tue, 31 Oct 2023 12:26:38 +0100 Subject: [PATCH 025/134] Fix order --- pympipool/shared/executorbase.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pympipool/shared/executorbase.py b/pympipool/shared/executorbase.py index cf1a27c5..232367fa 100644 --- a/pympipool/shared/executorbase.py +++ b/pympipool/shared/executorbase.py @@ -53,8 +53,8 @@ def shutdown(self, wait=True, *, cancel_futures=False): if cancel_futures: cancel_items_in_queue(que=self._future_queue) self._future_queue.put({"shutdown": True, "wait": wait}) - self._future_queue.join() self._process.join() + self._future_queue.join() def __len__(self): return self._future_queue.qsize() From 0fd6e42f5d18555c8b1ae5d5158455a52757dc44 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Thu, 2 Nov 2023 12:36:43 +0100 Subject: [PATCH 026/134] Update unittest-openmpi.yml --- .github/workflows/unittest-openmpi.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/unittest-openmpi.yml b/.github/workflows/unittest-openmpi.yml index d5597caf..3d81e857 100644 --- a/.github/workflows/unittest-openmpi.yml +++ b/.github/workflows/unittest-openmpi.yml @@ -57,7 +57,7 @@ jobs: - name: Test shell: bash -l {0} timeout-minutes: 5 - run: python -m unittest discover tests + run: cd tests; python -m unittest discover . env: OMPI_MCA_plm: 'isolated' OMPI_MCA_rmaps_base_oversubscribe: 'yes' From 78dc4c54d634e0a89667cbf406dd7c704992b65b Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Thu, 2 Nov 2023 12:37:03 +0100 Subject: [PATCH 027/134] Update unittest-mpich.yml --- .github/workflows/unittest-mpich.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/unittest-mpich.yml b/.github/workflows/unittest-mpich.yml index 0189ead1..2a5b988b 100644 --- a/.github/workflows/unittest-mpich.yml +++ b/.github/workflows/unittest-mpich.yml @@ -57,4 +57,4 @@ jobs: - name: Test shell: bash -l {0} timeout-minutes: 5 - run: python -m unittest discover tests + run: cd tests; python -m unittest discover . From 81feb8509a9b1948f5f61dd05341b8dc3c1212ae Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Thu, 2 Nov 2023 12:37:14 +0100 Subject: [PATCH 028/134] Update unittest-win.yml --- .github/workflows/unittest-win.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/unittest-win.yml b/.github/workflows/unittest-win.yml index 5467292f..809d4f53 100644 --- a/.github/workflows/unittest-win.yml +++ b/.github/workflows/unittest-win.yml @@ -34,4 +34,4 @@ jobs: - name: Test shell: bash -l {0} timeout-minutes: 5 - run: python -m unittest discover tests + run: cd tests; python -m unittest discover . From 59ae8c302b0048903c1b407865dd1b3c15cfb250 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Tue, 7 Nov 2023 08:41:04 +0100 Subject: [PATCH 029/134] Fix initialization --- pympipool/__init__.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pympipool/__init__.py b/pympipool/__init__.py index 80edaf05..e344cbe0 100644 --- a/pympipool/__init__.py +++ b/pympipool/__init__.py @@ -1,4 +1,11 @@ from ._version import get_versions +from pympipool.mpi.executor import PyMPIExecutor +from pympipool.slurm.executor import PySlurmExecutor + +try: # The PyFluxExecutor requires flux-core to be installed. + from pympipool.flux.executor import PyFluxExecutor +except ImportError: + pass __version__ = get_versions()["version"] del get_versions From 5f88f503db6eaf3ea7b909854b4676fb251f79e6 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Tue, 7 Nov 2023 09:06:07 +0100 Subject: [PATCH 030/134] Join Queues --- pympipool/shared/executorbase.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pympipool/shared/executorbase.py b/pympipool/shared/executorbase.py index 232367fa..9e75f31a 100644 --- a/pympipool/shared/executorbase.py +++ b/pympipool/shared/executorbase.py @@ -135,6 +135,7 @@ def execute_parallel_tasks_loop(interface, future_queue): if "shutdown" in task_dict.keys() and task_dict["shutdown"]: interface.shutdown(wait=task_dict["wait"]) future_queue.task_done() + future_queue.join() break elif "fn" in task_dict.keys() and "future" in task_dict.keys(): f = task_dict.pop("future") @@ -175,6 +176,7 @@ def executor_broker( future_queue.task_done() else: future_queue.task_done() + future_queue.join() break From 1aef6b0bb18f951a73f599f8e99d5e4ff3717c8f Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Tue, 7 Nov 2023 09:44:55 +0100 Subject: [PATCH 031/134] Fix working directory in SLURM --- pympipool/shared/interface.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pympipool/shared/interface.py b/pympipool/shared/interface.py index 54818c79..ce52fda9 100644 --- a/pympipool/shared/interface.py +++ b/pympipool/shared/interface.py @@ -102,7 +102,9 @@ def generate_mpiexec_command(cores, oversubscribe=False): def generate_slurm_command( cores, cwd, threads_per_core=1, gpus_per_core=0, oversubscribe=False ): - command_prepend_lst = ["srun", "-n", str(cores), "-D", cwd] + command_prepend_lst = ["srun", "-n", str(cores)] + if cwd is not None: + command_prepend_lst += ["-D", cwd] if threads_per_core > 1: command_prepend_lst += ["--cpus-per-task" + str(threads_per_core)] if gpus_per_core > 0: From c37530e9c9095a95f625ac801d066a6986a4d280 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Tue, 7 Nov 2023 10:15:23 +0100 Subject: [PATCH 032/134] Commmunicate before termination --- pympipool/shared/interface.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pympipool/shared/interface.py b/pympipool/shared/interface.py index 54818c79..1d585824 100644 --- a/pympipool/shared/interface.py +++ b/pympipool/shared/interface.py @@ -42,6 +42,7 @@ def generate_command(self, command_lst): return command_lst def shutdown(self, wait=True): + self._process.communicate() self._process.terminate() if wait: self._process.wait() From 9d67dad3eb282247f6ca0d50f959d611cc940853 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Tue, 7 Nov 2023 16:06:15 +0100 Subject: [PATCH 033/134] Raise error when srun is not available --- pympipool/shared/interface.py | 8 ++++++-- pympipool/slurm/executor.py | 10 +++++++++- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/pympipool/shared/interface.py b/pympipool/shared/interface.py index e64493c3..0fee6905 100644 --- a/pympipool/shared/interface.py +++ b/pympipool/shared/interface.py @@ -2,6 +2,10 @@ import subprocess +MPI_COMMAND = "mpiexec" +SLURM_COMMAND = "srun" + + class BaseInterface(ABC): def __init__(self, cwd, cores=1, oversubscribe=False): self._cwd = cwd @@ -94,7 +98,7 @@ def generate_command(self, command_lst): def generate_mpiexec_command(cores, oversubscribe=False): - command_prepend_lst = ["mpiexec", "-n", str(cores)] + command_prepend_lst = [MPI_COMMAND, "-n", str(cores)] if oversubscribe: command_prepend_lst += ["--oversubscribe"] return command_prepend_lst @@ -103,7 +107,7 @@ def generate_mpiexec_command(cores, oversubscribe=False): def generate_slurm_command( cores, cwd, threads_per_core=1, gpus_per_core=0, oversubscribe=False ): - command_prepend_lst = ["srun", "-n", str(cores)] + command_prepend_lst = [SLURM_COMMAND, "-n", str(cores)] if cwd is not None: command_prepend_lst += ["-D", cwd] if threads_per_core > 1: diff --git a/pympipool/slurm/executor.py b/pympipool/slurm/executor.py index 2adf0264..19fdb920 100644 --- a/pympipool/slurm/executor.py +++ b/pympipool/slurm/executor.py @@ -1,13 +1,21 @@ +import shutil +import subprocess + + from pympipool.shared.executorbase import ( cloudpickle_register, execute_parallel_tasks, ExecutorBase, executor_broker, ) -from pympipool.shared.interface import SrunInterface +from pympipool.shared.interface import SrunInterface, SLURM_COMMAND from pympipool.shared.thread import RaisingThread +if shutil.which(SLURM_COMMAND) is None: + raise subprocess.SubprocessError("SLURM command " + SLURM_COMMAND + " not found.") + + class PySlurmExecutor(ExecutorBase): """ Args: From 67e220cd29e647cf34a368e6df2b2787e5747bc5 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Tue, 7 Nov 2023 16:15:24 +0100 Subject: [PATCH 034/134] Update executor.py --- pympipool/slurm/executor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pympipool/slurm/executor.py b/pympipool/slurm/executor.py index 19fdb920..33688506 100644 --- a/pympipool/slurm/executor.py +++ b/pympipool/slurm/executor.py @@ -13,7 +13,7 @@ if shutil.which(SLURM_COMMAND) is None: - raise subprocess.SubprocessError("SLURM command " + SLURM_COMMAND + " not found.") + raise ImportError("SLURM command " + SLURM_COMMAND + " not found.") class PySlurmExecutor(ExecutorBase): From be6d22b68b875e6f9d2478a32a6227b3c374a271 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Tue, 7 Nov 2023 16:16:45 +0100 Subject: [PATCH 035/134] Update __init__.py --- pympipool/__init__.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pympipool/__init__.py b/pympipool/__init__.py index e344cbe0..b0dc5e64 100644 --- a/pympipool/__init__.py +++ b/pympipool/__init__.py @@ -1,11 +1,15 @@ from ._version import get_versions from pympipool.mpi.executor import PyMPIExecutor -from pympipool.slurm.executor import PySlurmExecutor try: # The PyFluxExecutor requires flux-core to be installed. from pympipool.flux.executor import PyFluxExecutor except ImportError: pass +try: # The PySlurmExecutor requires the srun command to be available. + from pympipool.slurm.executor import PySlurmExecutor +except ImportError: + pass + __version__ = get_versions()["version"] del get_versions From c0d9e103d8955a79d4883abf13575ddf05b143d6 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Tue, 7 Nov 2023 16:25:18 +0100 Subject: [PATCH 036/134] Implement universal executor class --- pympipool/__init__.py | 45 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/pympipool/__init__.py b/pympipool/__init__.py index e344cbe0..6979129a 100644 --- a/pympipool/__init__.py +++ b/pympipool/__init__.py @@ -4,8 +4,53 @@ try: # The PyFluxExecutor requires flux-core to be installed. from pympipool.flux.executor import PyFluxExecutor + flux_installed = True except ImportError: + flux_installed = False pass +try: # The PySlurmExecutor requires the srun command to be available. + from pympipool.slurm.executor import PySlurmExecutor + slurm_installed = True +except ImportError: + slurm_installed = False + pass + + __version__ = get_versions()["version"] del get_versions + + +class Executor: + def __new__( + cls, + max_workers, + cores_per_worker=1, + init_function=None, + cwd=None, + sleep_interval=0.1 + ): + if flux_installed: + return PyFluxExecutor( + max_workers=max_workers, + cores_per_worker=cores_per_worker, + init_function=init_function, + cwd=cwd, + sleep_interval=sleep_interval, + ) + elif slurm_installed: + return PySlurmExecutor( + max_workers=max_workers, + cores_per_worker=cores_per_worker, + init_function=init_function, + cwd=cwd, + sleep_interval=sleep_interval, + ) + else: + return PyMPIExecutor( + max_workers=max_workers, + cores_per_worker=cores_per_worker, + init_function=init_function, + cwd=cwd, + sleep_interval=sleep_interval, + ) From f8b2846b2e5882c0fa19f696e4c24e127fe26ac0 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Tue, 7 Nov 2023 16:26:21 +0100 Subject: [PATCH 037/134] black formatting --- pympipool/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pympipool/__init__.py b/pympipool/__init__.py index fa95fae3..46c2ed85 100644 --- a/pympipool/__init__.py +++ b/pympipool/__init__.py @@ -3,6 +3,7 @@ try: # The PyFluxExecutor requires flux-core to be installed. from pympipool.flux.executor import PyFluxExecutor + flux_installed = True except ImportError: flux_installed = False @@ -10,6 +11,7 @@ try: # The PySlurmExecutor requires the srun command to be available. from pympipool.slurm.executor import PySlurmExecutor + slurm_installed = True except ImportError: slurm_installed = False @@ -27,7 +29,7 @@ def __new__( cores_per_worker=1, init_function=None, cwd=None, - sleep_interval=0.1 + sleep_interval=0.1, ): if flux_installed: return PyFluxExecutor( From 64e8c392d332eb96abec8f8d108c9e23eccf755b Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Tue, 7 Nov 2023 16:32:59 +0100 Subject: [PATCH 038/134] only enable flux when flux was started --- pympipool/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pympipool/__init__.py b/pympipool/__init__.py index 46c2ed85..3fdd9a5e 100644 --- a/pympipool/__init__.py +++ b/pympipool/__init__.py @@ -1,10 +1,11 @@ +import os from ._version import get_versions from pympipool.mpi.executor import PyMPIExecutor try: # The PyFluxExecutor requires flux-core to be installed. from pympipool.flux.executor import PyFluxExecutor - flux_installed = True + flux_installed = "FLUX_URI" in os.environ except ImportError: flux_installed = False pass From db3bfabc0c1b98378ae4a457e1754e51d0aa9d6e Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Tue, 7 Nov 2023 18:32:21 +0100 Subject: [PATCH 039/134] Update DocStrings --- pympipool/flux/executor.py | 54 +++++++++++++++++++------------------ pympipool/mpi/executor.py | 51 ++++++++++++++++++----------------- pympipool/slurm/executor.py | 51 ++++++++++++++++++----------------- 3 files changed, 82 insertions(+), 74 deletions(-) diff --git a/pympipool/flux/executor.py b/pympipool/flux/executor.py index 95425118..db604c9e 100644 --- a/pympipool/flux/executor.py +++ b/pympipool/flux/executor.py @@ -14,6 +14,11 @@ class PyFluxExecutor(ExecutorBase): """ + The pympipool.flux.PyFluxExecutor leverages the flux framework to distribute python tasks within a queuing system + allocation. In analogy to the pympipool.slurm.PySlurmExecutur it provides the option to specify the number of + threads per worker as well as the number of GPUs per worker in addition to specifying the number of cores per + worker. + Args: max_workers (int): defines the number workers which can execute functions in parallel cores_per_worker (int): number of MPI cores to be used for each function call @@ -23,6 +28,27 @@ class PyFluxExecutor(ExecutorBase): cwd (str/None): current working directory where the parallel python task is executed sleep_interval (float): synchronization interval - default 0.1 executor (flux.job.FluxExecutor): Flux Python interface to submit the workers to flux + + Examples: + ``` + >>> import numpy as np + >>> from pympipool.flux import PyFluxExecutor + >>> + >>> def calc(i, j, k): + >>> from mpi4py import MPI + >>> size = MPI.COMM_WORLD.Get_size() + >>> rank = MPI.COMM_WORLD.Get_rank() + >>> return np.array([i, j, k]), size, rank + >>> + >>> def init_k(): + >>> return {"k": 3} + >>> + >>> with PyFluxExecutor(cores=2, init_function=init_k) as p: + >>> fs = p.submit(calc, 2, j=4) + >>> print(fs.result()) + + [(array([2, 4, 3]), 2, 0), (array([2, 4, 3]), 2, 1)] + ``` """ def __init__( @@ -59,11 +85,7 @@ def __init__( class PyFluxSingleTaskExecutor(ExecutorBase): """ - The pympipool.Executor behaves like the concurrent.futures.Executor but it uses mpi4py to execute parallel tasks. - In contrast to the mpi4py.futures.MPIPoolExecutor the pympipool.Executor can be executed in a serial python process - and does not require the python script to be executed with MPI. Still internally the pympipool.Executor uses the - mpi4py.futures.MPIPoolExecutor, consequently it is primarily an abstraction of its functionality to improve the - usability in particular when used in combination with Jupyter notebooks. + The pympipool.flux.PyFluxSingleTaskExecutor is the internal worker for the pympipool.flux.PyFluxExecutor. Args: cores (int): defines the number of MPI ranks to use for each function call @@ -71,27 +93,7 @@ class PyFluxSingleTaskExecutor(ExecutorBase): gpus_per_task (int): number of GPUs per MPI rank - defaults to 0 init_function (None): optional function to preset arguments for functions which are submitted later cwd (str/None): current working directory where the parallel python task is executed - - Examples: - ``` - >>> import numpy as np - >>> from pympipool.flux.executor import PyFluxSingleTaskExecutor - >>> - >>> def calc(i, j, k): - >>> from mpi4py import MPI - >>> size = MPI.COMM_WORLD.Get_size() - >>> rank = MPI.COMM_WORLD.Get_rank() - >>> return np.array([i, j, k]), size, rank - >>> - >>> def init_k(): - >>> return {"k": 3} - >>> - >>> with PyFluxSingleTaskExecutor(cores=2, init_function=init_k) as p: - >>> fs = p.submit(calc, 2, j=4) - >>> print(fs.result()) - - [(array([2, 4, 3]), 2, 0), (array([2, 4, 3]), 2, 1)] - ``` + executor (flux.job.FluxExecutor): Flux Python interface to submit the workers to flux """ def __init__( diff --git a/pympipool/mpi/executor.py b/pympipool/mpi/executor.py index 5d137789..1c5fa2e6 100644 --- a/pympipool/mpi/executor.py +++ b/pympipool/mpi/executor.py @@ -10,6 +10,12 @@ class PyMPIExecutor(ExecutorBase): """ + The pympipool.mpi.PyMPIExecutor leverages the message passing interface MPI to distribute python tasks within an + MPI allocation. In contrast to the mpi4py.futures.MPIPoolExecutor the pympipool.mpi.PyMPIExecutor can be executed + in a serial python process and does not require the python script to be executed with MPI. Consequently, it is + primarily an abstraction of its functionality to improve the usability in particular when used in combination with \ + Jupyter notebooks. + Args: max_workers (int): defines the number workers which can execute functions in parallel cores_per_worker (int): number of MPI cores to be used for each function call @@ -17,6 +23,26 @@ class PyMPIExecutor(ExecutorBase): init_function (None): optional function to preset arguments for functions which are submitted later cwd (str/None): current working directory where the parallel python task is executed sleep_interval (float): synchronization interval - default 0.1 + + Examples: + ``` + >>> import numpy as np + >>> from pympipool.mpi import PyMPIExecutor + >>> + >>> def calc(i, j, k): + >>> from mpi4py import MPI + >>> size = MPI.COMM_WORLD.Get_size() + >>> rank = MPI.COMM_WORLD.Get_rank() + >>> return np.array([i, j, k]), size, rank + >>> + >>> def init_k(): + >>> return {"k": 3} + >>> + >>> with PyMPIExecutor(cores=2, init_function=init_k) as p: + >>> fs = p.submit(calc, 2, j=4) + >>> print(fs.result()) + [(array([2, 4, 3]), 2, 0), (array([2, 4, 3]), 2, 1)] + ``` """ def __init__( @@ -49,11 +75,7 @@ def __init__( class PyMPISingleTaskExecutor(ExecutorBase): """ - The pympipool.Executor behaves like the concurrent.futures.Executor but it uses mpi4py to execute parallel tasks. - In contrast to the mpi4py.futures.MPIPoolExecutor the pympipool.Executor can be executed in a serial python process - and does not require the python script to be executed with MPI. Still internally the pympipool.Executor uses the - mpi4py.futures.MPIPoolExecutor, consequently it is primarily an abstraction of its functionality to improve the - usability in particular when used in combination with Jupyter notebooks. + The pympipool.mpi.PyMPISingleTaskExecutor is the internal worker for the pympipool.mpi.PyMPIExecutor. Args: cores (int): defines the number of MPI ranks to use for each function call @@ -61,25 +83,6 @@ class PyMPISingleTaskExecutor(ExecutorBase): init_function (None): optional function to preset arguments for functions which are submitted later cwd (str/None): current working directory where the parallel python task is executed - Examples: - ``` - >>> import numpy as np - >>> from pympipool.mpi.executor import PyMPISingleTaskExecutor - >>> - >>> def calc(i, j, k): - >>> from mpi4py import MPI - >>> size = MPI.COMM_WORLD.Get_size() - >>> rank = MPI.COMM_WORLD.Get_rank() - >>> return np.array([i, j, k]), size, rank - >>> - >>> def init_k(): - >>> return {"k": 3} - >>> - >>> with PyMPISingleTaskExecutor(cores=2, init_function=init_k) as p: - >>> fs = p.submit(calc, 2, j=4) - >>> print(fs.result()) - [(array([2, 4, 3]), 2, 0), (array([2, 4, 3]), 2, 1)] - ``` """ def __init__( diff --git a/pympipool/slurm/executor.py b/pympipool/slurm/executor.py index 33688506..f31ad713 100644 --- a/pympipool/slurm/executor.py +++ b/pympipool/slurm/executor.py @@ -18,6 +18,11 @@ class PySlurmExecutor(ExecutorBase): """ + The pympipool.slurm.PySlurmExecutor leverages the srun command to distribute python tasks within a SLURM queuing + system allocation. In analogy to the pympipool.flux.PyFluxExecutor it provides the option to specify the number of + threads per worker as well as the number of GPUs per worker in addition to specifying the number of cores per + worker. + Args: max_workers (int): defines the number workers which can execute functions in parallel cores_per_worker (int): number of MPI cores to be used for each function call @@ -27,6 +32,27 @@ class PySlurmExecutor(ExecutorBase): init_function (None): optional function to preset arguments for functions which are submitted later cwd (str/None): current working directory where the parallel python task is executed sleep_interval (float): synchronization interval - default 0.1 + + Examples: + ``` + >>> import numpy as np + >>> from pympipool.slurm import PySlurmExecutor + >>> + >>> def calc(i, j, k): + >>> from mpi4py import MPI + >>> size = MPI.COMM_WORLD.Get_size() + >>> rank = MPI.COMM_WORLD.Get_rank() + >>> return np.array([i, j, k]), size, rank + >>> + >>> def init_k(): + >>> return {"k": 3} + >>> + >>> with PySlurmExecutor(cores=2, init_function=init_k) as p: + >>> fs = p.submit(calc, 2, j=4) + >>> print(fs.result()) + + [(array([2, 4, 3]), 2, 0), (array([2, 4, 3]), 2, 1)] + ``` """ def __init__( @@ -63,11 +89,7 @@ def __init__( class PySlurmSingleTaskExecutor(ExecutorBase): """ - The pympipool.Executor behaves like the concurrent.futures.Executor but it uses mpi4py to execute parallel tasks. - In contrast to the mpi4py.futures.MPIPoolExecutor the pympipool.Executor can be executed in a serial python process - and does not require the python script to be executed with MPI. Still internally the pympipool.Executor uses the - mpi4py.futures.MPIPoolExecutor, consequently it is primarily an abstraction of its functionality to improve the - usability in particular when used in combination with Jupyter notebooks. + The pympipool.slurm.PySlurmSingleTaskExecutor is the internal worker for the pympipool.slurm.PySlurmExecutor. Args: cores (int): defines the number of MPI ranks to use for each function call @@ -77,25 +99,6 @@ class PySlurmSingleTaskExecutor(ExecutorBase): init_function (None): optional function to preset arguments for functions which are submitted later cwd (str/None): current working directory where the parallel python task is executed - Examples: - ``` - >>> import numpy as np - >>> from pympipool.mpi.executor import PyMPISingleTaskExecutor - >>> - >>> def calc(i, j, k): - >>> from mpi4py import MPI - >>> size = MPI.COMM_WORLD.Get_size() - >>> rank = MPI.COMM_WORLD.Get_rank() - >>> return np.array([i, j, k]), size, rank - >>> - >>> def init_k(): - >>> return {"k": 3} - >>> - >>> with PyMPISingleTaskExecutor(cores=2, init_function=init_k) as p: - >>> fs = p.submit(calc, 2, j=4) - >>> print(fs.result()) - [(array([2, 4, 3]), 2, 0), (array([2, 4, 3]), 2, 1)] - ``` """ def __init__( From 98f2c0bfdeb55dcdf313bc359263913461241b99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Jan=C3=9Fen?= Date: Tue, 7 Nov 2023 19:10:22 +0100 Subject: [PATCH 040/134] Add max_workers=1 as default parameter for all Executors --- pympipool/flux/executor.py | 2 +- pympipool/mpi/executor.py | 2 +- pympipool/slurm/executor.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pympipool/flux/executor.py b/pympipool/flux/executor.py index 95425118..eea42163 100644 --- a/pympipool/flux/executor.py +++ b/pympipool/flux/executor.py @@ -27,7 +27,7 @@ class PyFluxExecutor(ExecutorBase): def __init__( self, - max_workers, + max_workers=1, cores_per_worker=1, threads_per_core=1, gpus_per_worker=0, diff --git a/pympipool/mpi/executor.py b/pympipool/mpi/executor.py index 5d137789..8d49b464 100644 --- a/pympipool/mpi/executor.py +++ b/pympipool/mpi/executor.py @@ -21,7 +21,7 @@ class PyMPIExecutor(ExecutorBase): def __init__( self, - max_workers, + max_workers=1, cores_per_worker=1, oversubscribe=False, init_function=None, diff --git a/pympipool/slurm/executor.py b/pympipool/slurm/executor.py index 33688506..09316ca6 100644 --- a/pympipool/slurm/executor.py +++ b/pympipool/slurm/executor.py @@ -31,7 +31,7 @@ class PySlurmExecutor(ExecutorBase): def __init__( self, - max_workers, + max_workers=1, cores_per_worker=1, threads_per_core=1, gpus_per_worker=0, From e401b414560efacf077575b2d2744d4cfa57be6a Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Tue, 7 Nov 2023 19:16:03 +0100 Subject: [PATCH 041/134] Update pympipool/__init__.py --- pympipool/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pympipool/__init__.py b/pympipool/__init__.py index 3fdd9a5e..77561022 100644 --- a/pympipool/__init__.py +++ b/pympipool/__init__.py @@ -26,7 +26,7 @@ class Executor: def __new__( cls, - max_workers, + max_workers=1, cores_per_worker=1, init_function=None, cwd=None, From 1eff542a2437ff6c2ec664fcff668124ff0d4660 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Jan=C3=9Fen?= Date: Tue, 7 Nov 2023 21:03:24 +0100 Subject: [PATCH 042/134] Add __del__() for Executors --- pympipool/shared/executorbase.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pympipool/shared/executorbase.py b/pympipool/shared/executorbase.py index 9e75f31a..810ecc37 100644 --- a/pympipool/shared/executorbase.py +++ b/pympipool/shared/executorbase.py @@ -55,10 +55,15 @@ def shutdown(self, wait=True, *, cancel_futures=False): self._future_queue.put({"shutdown": True, "wait": wait}) self._process.join() self._future_queue.join() + self._process = None def __len__(self): return self._future_queue.qsize() + def __del__(self): + if self._process is not None: + self.shutdown(wait=True) + def _set_init_function(self, init_function): if init_function is not None: self._future_queue.put( From 0163c4bfd75f993053c79b8f83e7dc1b5ee46c8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Jan=C3=9Fen?= Date: Tue, 7 Nov 2023 21:13:47 +0100 Subject: [PATCH 043/134] check if attribute is available --- pympipool/shared/executorbase.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pympipool/shared/executorbase.py b/pympipool/shared/executorbase.py index 810ecc37..2f2e81e7 100644 --- a/pympipool/shared/executorbase.py +++ b/pympipool/shared/executorbase.py @@ -61,7 +61,7 @@ def __len__(self): return self._future_queue.qsize() def __del__(self): - if self._process is not None: + if hasattr(self, "_process") and self._process is not None: self.shutdown(wait=True) def _set_init_function(self, init_function): From 49e9a173dfe2d27566fdcf6a91c08acd00d73f59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Jan=C3=9Fen?= Date: Tue, 7 Nov 2023 21:25:00 +0100 Subject: [PATCH 044/134] if no process is defined do not try to join it --- pympipool/shared/executorbase.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pympipool/shared/executorbase.py b/pympipool/shared/executorbase.py index 2f2e81e7..2c672c2c 100644 --- a/pympipool/shared/executorbase.py +++ b/pympipool/shared/executorbase.py @@ -53,7 +53,8 @@ def shutdown(self, wait=True, *, cancel_futures=False): if cancel_futures: cancel_items_in_queue(que=self._future_queue) self._future_queue.put({"shutdown": True, "wait": wait}) - self._process.join() + if self._process is not None: + self._process.join() self._future_queue.join() self._process = None From 7ac050135458c1dcd7dfd22454158890fbad1afa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Jan=C3=9Fen?= Date: Tue, 7 Nov 2023 21:30:06 +0100 Subject: [PATCH 045/134] handle exceptions --- pympipool/shared/executorbase.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pympipool/shared/executorbase.py b/pympipool/shared/executorbase.py index 2c672c2c..8e64096a 100644 --- a/pympipool/shared/executorbase.py +++ b/pympipool/shared/executorbase.py @@ -53,8 +53,7 @@ def shutdown(self, wait=True, *, cancel_futures=False): if cancel_futures: cancel_items_in_queue(que=self._future_queue) self._future_queue.put({"shutdown": True, "wait": wait}) - if self._process is not None: - self._process.join() + self._process.join() self._future_queue.join() self._process = None @@ -62,8 +61,10 @@ def __len__(self): return self._future_queue.qsize() def __del__(self): - if hasattr(self, "_process") and self._process is not None: + try: self.shutdown(wait=True) + except (AttributeError, RuntimeError): + pass def _set_init_function(self, init_function): if init_function is not None: From 2270484fa1e263cb623e804c24508de8fb3f6515 Mon Sep 17 00:00:00 2001 From: liamhuber Date: Tue, 7 Nov 2023 13:35:37 -0800 Subject: [PATCH 046/134] Recombine all similar tests into a single module --- tests/test_args.py | 69 ------------ tests/test_callable.py | 74 ------------- tests/test_exception.py | 55 ---------- tests/test_return.py | 74 ------------- tests/test_timeout.py | 67 ------------ tests/test_with_dynamic_objects.py | 164 +++++++++++++++++++++++++++++ 6 files changed, 164 insertions(+), 339 deletions(-) delete mode 100644 tests/test_args.py delete mode 100644 tests/test_callable.py delete mode 100644 tests/test_exception.py delete mode 100644 tests/test_return.py delete mode 100644 tests/test_timeout.py create mode 100644 tests/test_with_dynamic_objects.py diff --git a/tests/test_args.py b/tests/test_args.py deleted file mode 100644 index 241c4e53..00000000 --- a/tests/test_args.py +++ /dev/null @@ -1,69 +0,0 @@ -from functools import partialmethod -from time import sleep -import unittest - -from pympipool.mpi.executor import PyMPISingleTaskExecutor - - -class Foo: - """ - A base class to be dynamically modified for testing CloudpickleProcessPoolExecutor. - """ - def __init__(self, fnc: callable): - self.fnc = fnc - self.result = None - - @property - def run(self): - return self.fnc - - def process_result(self, future): - self.result = future.result() - - -def dynamic_foo(): - """ - A decorator for dynamically modifying the Foo class to test - CloudpickleProcessPoolExecutor. - - Overrides the `fnc` input of `Foo` with the decorated function. - """ - def as_dynamic_foo(fnc: callable): - return type( - "DynamicFoo", - (Foo,), # Define parentage - { - "__init__": partialmethod( - Foo.__init__, - fnc - ) - }, - ) - - return as_dynamic_foo - -class TestUnpickleableElements(unittest.TestCase): - def test_unpickleable_args(self): - """ - We should be able to use an unpickleable return value -- in this case, a - method of a dynamically defined class. - """ - - @dynamic_foo() - def does_nothing(): - return - - @dynamic_foo() - def slowly_returns_unpickleable(unpickleable_arg): - """ - Returns a complex, dynamically defined variable - """ - sleep(0.1) - unpickleable_arg.result = "input updated" - return unpickleable_arg - - dynamic_dynamic = slowly_returns_unpickleable() - executor = PyMPISingleTaskExecutor() - unpicklable_object = does_nothing() - fs = executor.submit(dynamic_dynamic.run, unpicklable_object) - self.assertEqual(fs.result().result, "input updated") diff --git a/tests/test_callable.py b/tests/test_callable.py deleted file mode 100644 index 3bbb2ed8..00000000 --- a/tests/test_callable.py +++ /dev/null @@ -1,74 +0,0 @@ -from functools import partialmethod -from time import sleep -import unittest - -from pympipool.mpi.executor import PyMPISingleTaskExecutor - - -class Foo: - """ - A base class to be dynamically modified for testing CloudpickleProcessPoolExecutor. - """ - def __init__(self, fnc: callable): - self.fnc = fnc - self.result = None - - @property - def run(self): - return self.fnc - - def process_result(self, future): - self.result = future.result() - - -def dynamic_foo(): - """ - A decorator for dynamically modifying the Foo class to test - CloudpickleProcessPoolExecutor. - - Overrides the `fnc` input of `Foo` with the decorated function. - """ - def as_dynamic_foo(fnc: callable): - return type( - "DynamicFoo", - (Foo,), # Define parentage - { - "__init__": partialmethod( - Foo.__init__, - fnc - ) - }, - ) - - return as_dynamic_foo - - -class TestUnpickleableElements(unittest.TestCase): - def test_unpickleable_callable(self): - """ - We should be able to use an unpickleable callable -- in this case, a method of - a dynamically defined class. - """ - fortytwo = 42 # No magic numbers; we use it in a couple places so give it a var - - @dynamic_foo() - def slowly_returns_42(): - sleep(0.1) - return fortytwo - - dynamic_42 = slowly_returns_42() # Instantiate the dynamically defined class - self.assertIsInstance( - dynamic_42, - Foo, - msg="Just a sanity check that the test is set up right" - ) - self.assertIsNone( - dynamic_42.result, - msg="Just a sanity check that the test is set up right" - ) - executor = PyMPISingleTaskExecutor() - fs = executor.submit(dynamic_42.run) - fs.add_done_callback(dynamic_42.process_result) - self.assertFalse(fs.done(), msg="Should be running on the executor") - self.assertEqual(fortytwo, fs.result(), msg="Future must complete") - self.assertEqual(fortytwo, dynamic_42.result, msg="Callback must get called") \ No newline at end of file diff --git a/tests/test_exception.py b/tests/test_exception.py deleted file mode 100644 index fe0a33f6..00000000 --- a/tests/test_exception.py +++ /dev/null @@ -1,55 +0,0 @@ -from functools import partialmethod -import unittest - -from pympipool.mpi.executor import PyMPISingleTaskExecutor - - -class Foo: - """ - A base class to be dynamically modified for testing CloudpickleProcessPoolExecutor. - """ - def __init__(self, fnc: callable): - self.fnc = fnc - self.result = None - - @property - def run(self): - return self.fnc - - def process_result(self, future): - self.result = future.result() - - -def dynamic_foo(): - """ - A decorator for dynamically modifying the Foo class to test - CloudpickleProcessPoolExecutor. - - Overrides the `fnc` input of `Foo` with the decorated function. - """ - def as_dynamic_foo(fnc: callable): - return type( - "DynamicFoo", - (Foo,), # Define parentage - { - "__init__": partialmethod( - Foo.__init__, - fnc - ) - }, - ) - - return as_dynamic_foo - - -class TestUnpickleableElements(unittest.TestCase): - def test_exception(self): - @dynamic_foo() - def raise_error(): - raise RuntimeError - - re = raise_error() - executor = PyMPISingleTaskExecutor() - fs = executor.submit(re.run) - with self.assertRaises(RuntimeError): - fs.result() diff --git a/tests/test_return.py b/tests/test_return.py deleted file mode 100644 index b54dd066..00000000 --- a/tests/test_return.py +++ /dev/null @@ -1,74 +0,0 @@ -from functools import partialmethod -from time import sleep -import unittest - -from pympipool.mpi.executor import PyMPISingleTaskExecutor - - -class Foo: - """ - A base class to be dynamically modified for testing CloudpickleProcessPoolExecutor. - """ - def __init__(self, fnc: callable): - self.fnc = fnc - self.result = None - - @property - def run(self): - return self.fnc - - def process_result(self, future): - self.result = future.result() - - -def dynamic_foo(): - """ - A decorator for dynamically modifying the Foo class to test - CloudpickleProcessPoolExecutor. - - Overrides the `fnc` input of `Foo` with the decorated function. - """ - def as_dynamic_foo(fnc: callable): - return type( - "DynamicFoo", - (Foo,), # Define parentage - { - "__init__": partialmethod( - Foo.__init__, - fnc - ) - }, - ) - - return as_dynamic_foo - - -class TestUnpickleableElements(unittest.TestCase): - def test_unpickleable_return(self): - """ - We should be able to use an unpickleable return value -- in this case, a - method of a dynamically defined class. - """ - - @dynamic_foo() - def does_nothing(): - return - - @dynamic_foo() - def slowly_returns_unpickleable(): - """ - Returns a complex, dynamically defined variable - """ - sleep(0.1) - inside_variable = does_nothing() - inside_variable.result = "it was an inside job!" - return inside_variable - - dynamic_dynamic = slowly_returns_unpickleable() - executor = PyMPISingleTaskExecutor() - fs = executor.submit(dynamic_dynamic.run) - self.assertIsInstance( - fs.result(), - Foo, - ) - self.assertEqual(fs.result().result, "it was an inside job!") \ No newline at end of file diff --git a/tests/test_timeout.py b/tests/test_timeout.py deleted file mode 100644 index 3a0c2e29..00000000 --- a/tests/test_timeout.py +++ /dev/null @@ -1,67 +0,0 @@ -from functools import partialmethod -from concurrent.futures import TimeoutError -from time import sleep -import unittest - -from pympipool.mpi.executor import PyMPISingleTaskExecutor - - -class Foo: - """ - A base class to be dynamically modified for testing CloudpickleProcessPoolExecutor. - """ - def __init__(self, fnc: callable): - self.fnc = fnc - self.result = None - - @property - def run(self): - return self.fnc - - def process_result(self, future): - self.result = future.result() - - -def dynamic_foo(): - """ - A decorator for dynamically modifying the Foo class to test - CloudpickleProcessPoolExecutor. - - Overrides the `fnc` input of `Foo` with the decorated function. - """ - def as_dynamic_foo(fnc: callable): - return type( - "DynamicFoo", - (Foo,), # Define parentage - { - "__init__": partialmethod( - Foo.__init__, - fnc - ) - }, - ) - - return as_dynamic_foo - - -class TestUnpickleableElements(unittest.TestCase): - def test_timeout(self): - fortytwo = 42 - - @dynamic_foo() - def slow(): - sleep(0.1) - return fortytwo - - f = slow() - executor = PyMPISingleTaskExecutor() - fs = executor.submit(f.run) - self.assertEqual( - fs.result(timeout=30), - fortytwo, - msg="waiting long enough should get the result" - ) - - with self.assertRaises(TimeoutError): - fs = executor.submit(f.run) - fs.result(timeout=0.0001) diff --git a/tests/test_with_dynamic_objects.py b/tests/test_with_dynamic_objects.py new file mode 100644 index 00000000..6c6b6b3d --- /dev/null +++ b/tests/test_with_dynamic_objects.py @@ -0,0 +1,164 @@ +""" +The purpose of these tests is the check executor behaviour when the python objects +are dynamically generated -- a case that is typically quite tricky for serializers. +""" +from functools import partialmethod +from time import sleep +import unittest + +from pympipool.mpi.executor import PyMPISingleTaskExecutor + + +class Foo: + """ + A base class to be dynamically modified for putting an executor/serializer through + its paces. + """ + def __init__(self, fnc: callable): + self.fnc = fnc + self.result = None + + @property + def run(self): + return self.fnc + + def process_result(self, future): + self.result = future.result() + + +def dynamic_foo(): + """ + A decorator for dynamically modifying the Foo class to test + CloudpickleProcessPoolExecutor. + + Overrides the `fnc` input of `Foo` with the decorated function. + """ + def as_dynamic_foo(fnc: callable): + return type( + "DynamicFoo", + (Foo,), # Define parentage + { + "__init__": partialmethod( + Foo.__init__, + fnc + ) + }, + ) + + return as_dynamic_foo + + +class TestDynamicallyDefinedObjects(unittest.TestCase): + def test_unpickleable_args(self): + """ + We should be able to use an dynamically defined return value. + """ + + @dynamic_foo() + def does_nothing(): + return + + @dynamic_foo() + def slowly_returns_unpickleable(unpickleable_arg): + """ + Returns a complex, dynamically defined variable + """ + sleep(0.1) + unpickleable_arg.result = "input updated" + return unpickleable_arg + + dynamic_dynamic = slowly_returns_unpickleable() + executor = PyMPISingleTaskExecutor() + unpicklable_object = does_nothing() + fs = executor.submit(dynamic_dynamic.run, unpicklable_object) + self.assertEqual(fs.result().result, "input updated") + + def test_unpickleable_callable(self): + """ + We should be able to use an unpickleable callable -- in this case, a method of + a dynamically defined class. + """ + fortytwo = 42 # No magic numbers; we use it in a couple places so give it a var + + @dynamic_foo() + def slowly_returns_42(): + sleep(0.1) + return fortytwo + + dynamic_42 = slowly_returns_42() # Instantiate the dynamically defined class + self.assertIsInstance( + dynamic_42, + Foo, + msg="Just a sanity check that the test is set up right" + ) + self.assertIsNone( + dynamic_42.result, + msg="Just a sanity check that the test is set up right" + ) + executor = PyMPISingleTaskExecutor() + fs = executor.submit(dynamic_42.run) + fs.add_done_callback(dynamic_42.process_result) + self.assertFalse(fs.done(), msg="Should be running on the executor") + self.assertEqual(fortytwo, fs.result(), msg="Future must complete") + self.assertEqual(fortytwo, dynamic_42.result, msg="Callback must get called") + + def test_exception(self): + @dynamic_foo() + def raise_error(): + raise RuntimeError + + re = raise_error() + executor = PyMPISingleTaskExecutor() + fs = executor.submit(re.run) + with self.assertRaises(RuntimeError): + fs.result() + + def test_unpickleable_return(self): + """ + We should be able to use an unpickleable return value -- in this case, a + method of a dynamically defined class. + """ + + @dynamic_foo() + def does_nothing(): + return + + @dynamic_foo() + def slowly_returns_unpickleable(): + """ + Returns a complex, dynamically defined variable + """ + sleep(0.1) + inside_variable = does_nothing() + inside_variable.result = "it was an inside job!" + return inside_variable + + dynamic_dynamic = slowly_returns_unpickleable() + executor = PyMPISingleTaskExecutor() + fs = executor.submit(dynamic_dynamic.run) + self.assertIsInstance( + fs.result(), + Foo, + ) + self.assertEqual(fs.result().result, "it was an inside job!") + + def test_timeout(self): + fortytwo = 42 + + @dynamic_foo() + def slow(): + sleep(0.1) + return fortytwo + + f = slow() + executor = PyMPISingleTaskExecutor() + fs = executor.submit(f.run) + self.assertEqual( + fs.result(timeout=30), + fortytwo, + msg="waiting long enough should get the result" + ) + + with self.assertRaises(TimeoutError): + fs = executor.submit(f.run) + fs.result(timeout=0.0001) \ No newline at end of file From 4a2af62a50119806194e6bd93ebd34f6357394cc Mon Sep 17 00:00:00 2001 From: liamhuber Date: Tue, 7 Nov 2023 13:36:40 -0800 Subject: [PATCH 047/134] Directly test the user-facing executor --- tests/test_with_dynamic_objects.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/test_with_dynamic_objects.py b/tests/test_with_dynamic_objects.py index 6c6b6b3d..e952753a 100644 --- a/tests/test_with_dynamic_objects.py +++ b/tests/test_with_dynamic_objects.py @@ -6,7 +6,7 @@ from time import sleep import unittest -from pympipool.mpi.executor import PyMPISingleTaskExecutor +from pympipool.mpi.executor import PyMPIExecutor as Executor class Foo: @@ -68,7 +68,7 @@ def slowly_returns_unpickleable(unpickleable_arg): return unpickleable_arg dynamic_dynamic = slowly_returns_unpickleable() - executor = PyMPISingleTaskExecutor() + executor = Executor() unpicklable_object = does_nothing() fs = executor.submit(dynamic_dynamic.run, unpicklable_object) self.assertEqual(fs.result().result, "input updated") @@ -95,7 +95,7 @@ def slowly_returns_42(): dynamic_42.result, msg="Just a sanity check that the test is set up right" ) - executor = PyMPISingleTaskExecutor() + executor = Executor() fs = executor.submit(dynamic_42.run) fs.add_done_callback(dynamic_42.process_result) self.assertFalse(fs.done(), msg="Should be running on the executor") @@ -108,7 +108,7 @@ def raise_error(): raise RuntimeError re = raise_error() - executor = PyMPISingleTaskExecutor() + executor = Executor() fs = executor.submit(re.run) with self.assertRaises(RuntimeError): fs.result() @@ -134,7 +134,7 @@ def slowly_returns_unpickleable(): return inside_variable dynamic_dynamic = slowly_returns_unpickleable() - executor = PyMPISingleTaskExecutor() + executor = Executor() fs = executor.submit(dynamic_dynamic.run) self.assertIsInstance( fs.result(), @@ -151,7 +151,7 @@ def slow(): return fortytwo f = slow() - executor = PyMPISingleTaskExecutor() + executor = Executor() fs = executor.submit(f.run) self.assertEqual( fs.result(timeout=30), From 820e14e31a93c7e8132ba4101b5a059b71166dfc Mon Sep 17 00:00:00 2001 From: liamhuber Date: Tue, 7 Nov 2023 13:39:18 -0800 Subject: [PATCH 048/134] Replace "unpickleable" with "dynamic" --- tests/test_with_dynamic_objects.py | 31 ++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/tests/test_with_dynamic_objects.py b/tests/test_with_dynamic_objects.py index e952753a..5cbc0766 100644 --- a/tests/test_with_dynamic_objects.py +++ b/tests/test_with_dynamic_objects.py @@ -1,6 +1,9 @@ """ The purpose of these tests is the check executor behaviour when the python objects -are dynamically generated -- a case that is typically quite tricky for serializers. +are dynamically generated. +This is a special (and rather difficult) case for serializing objects which cannot +be pickled using the standard pickle module, and thus poses a relatively thorough test +for the general un-pickle-able case. """ from functools import partialmethod from time import sleep @@ -49,7 +52,7 @@ def as_dynamic_foo(fnc: callable): class TestDynamicallyDefinedObjects(unittest.TestCase): - def test_unpickleable_args(self): + def test_dynamic_args(self): """ We should be able to use an dynamically defined return value. """ @@ -59,23 +62,23 @@ def does_nothing(): return @dynamic_foo() - def slowly_returns_unpickleable(unpickleable_arg): + def slowly_returns_dynamic(dynamic_arg): """ Returns a complex, dynamically defined variable """ sleep(0.1) - unpickleable_arg.result = "input updated" - return unpickleable_arg + dynamic_arg.result = "input updated" + return dynamic_arg - dynamic_dynamic = slowly_returns_unpickleable() + dynamic_dynamic = slowly_returns_dynamic() executor = Executor() - unpicklable_object = does_nothing() - fs = executor.submit(dynamic_dynamic.run, unpicklable_object) + dynamic_object = does_nothing() + fs = executor.submit(dynamic_dynamic.run, dynamic_object) self.assertEqual(fs.result().result, "input updated") - def test_unpickleable_callable(self): + def test_dynamic_callable(self): """ - We should be able to use an unpickleable callable -- in this case, a method of + We should be able to use an dynamic callable -- in this case, a method of a dynamically defined class. """ fortytwo = 42 # No magic numbers; we use it in a couple places so give it a var @@ -113,9 +116,9 @@ def raise_error(): with self.assertRaises(RuntimeError): fs.result() - def test_unpickleable_return(self): + def test_dynamic_return(self): """ - We should be able to use an unpickleable return value -- in this case, a + We should be able to use an dynamic return value -- in this case, a method of a dynamically defined class. """ @@ -124,7 +127,7 @@ def does_nothing(): return @dynamic_foo() - def slowly_returns_unpickleable(): + def slowly_returns_dynamic(): """ Returns a complex, dynamically defined variable """ @@ -133,7 +136,7 @@ def slowly_returns_unpickleable(): inside_variable.result = "it was an inside job!" return inside_variable - dynamic_dynamic = slowly_returns_unpickleable() + dynamic_dynamic = slowly_returns_dynamic() executor = Executor() fs = executor.submit(dynamic_dynamic.run) self.assertIsInstance( From de207164872be7637f13253a70ba3070c16b7174 Mon Sep 17 00:00:00 2001 From: liamhuber Date: Tue, 7 Nov 2023 13:41:35 -0800 Subject: [PATCH 049/134] Fix grammar for indefinite articles after consonant-vowel switch --- tests/test_with_dynamic_objects.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_with_dynamic_objects.py b/tests/test_with_dynamic_objects.py index 5cbc0766..9609145e 100644 --- a/tests/test_with_dynamic_objects.py +++ b/tests/test_with_dynamic_objects.py @@ -54,7 +54,7 @@ def as_dynamic_foo(fnc: callable): class TestDynamicallyDefinedObjects(unittest.TestCase): def test_dynamic_args(self): """ - We should be able to use an dynamically defined return value. + We should be able to use a dynamically defined return value. """ @dynamic_foo() @@ -78,7 +78,7 @@ def slowly_returns_dynamic(dynamic_arg): def test_dynamic_callable(self): """ - We should be able to use an dynamic callable -- in this case, a method of + We should be able to use a dynamic callable -- in this case, a method of a dynamically defined class. """ fortytwo = 42 # No magic numbers; we use it in a couple places so give it a var @@ -118,7 +118,7 @@ def raise_error(): def test_dynamic_return(self): """ - We should be able to use an dynamic return value -- in this case, a + We should be able to use a dynamic return value -- in this case, a method of a dynamically defined class. """ From f5cfdab2022e2e45c69bc43328341056a3c5859f Mon Sep 17 00:00:00 2001 From: liamhuber Date: Tue, 7 Nov 2023 13:42:11 -0800 Subject: [PATCH 050/134] Complete docstrings for the individual tests --- tests/test_with_dynamic_objects.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/test_with_dynamic_objects.py b/tests/test_with_dynamic_objects.py index 9609145e..7032542b 100644 --- a/tests/test_with_dynamic_objects.py +++ b/tests/test_with_dynamic_objects.py @@ -106,6 +106,10 @@ def slowly_returns_42(): self.assertEqual(fortytwo, dynamic_42.result, msg="Callback must get called") def test_exception(self): + """ + Exceptions from dynamically defined callables should get cleanly raised. + """ + @dynamic_foo() def raise_error(): raise RuntimeError @@ -146,6 +150,10 @@ def slowly_returns_dynamic(): self.assertEqual(fs.result().result, "it was an inside job!") def test_timeout(self): + """ + Timeouts for dynamically defined callables should be handled ok. + """ + fortytwo = 42 @dynamic_foo() From 7cef19b5c9adfba4f5a07c0039799c966706c5f4 Mon Sep 17 00:00:00 2001 From: liamhuber Date: Tue, 7 Nov 2023 13:44:09 -0800 Subject: [PATCH 051/134] Use better variable name Don't double down on the word "result" --- tests/test_with_dynamic_objects.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_with_dynamic_objects.py b/tests/test_with_dynamic_objects.py index 7032542b..55fe47f4 100644 --- a/tests/test_with_dynamic_objects.py +++ b/tests/test_with_dynamic_objects.py @@ -67,14 +67,14 @@ def slowly_returns_dynamic(dynamic_arg): Returns a complex, dynamically defined variable """ sleep(0.1) - dynamic_arg.result = "input updated" + dynamic_arg.attribute_on_dynamic = "attribute updated" return dynamic_arg dynamic_dynamic = slowly_returns_dynamic() executor = Executor() dynamic_object = does_nothing() fs = executor.submit(dynamic_dynamic.run, dynamic_object) - self.assertEqual(fs.result().result, "input updated") + self.assertEqual(fs.result().attribute_on_dynamic, "attribute updated") def test_dynamic_callable(self): """ @@ -172,4 +172,4 @@ def slow(): with self.assertRaises(TimeoutError): fs = executor.submit(f.run) - fs.result(timeout=0.0001) \ No newline at end of file + fs.result(timeout=0.0001) From f0ec5b62c62dc378918cf39f01e658d4170500ac Mon Sep 17 00:00:00 2001 From: liamhuber Date: Tue, 7 Nov 2023 13:55:21 -0800 Subject: [PATCH 052/134] Complete messages for asserts --- tests/test_with_dynamic_objects.py | 46 +++++++++++++++++++++++++----- 1 file changed, 39 insertions(+), 7 deletions(-) diff --git a/tests/test_with_dynamic_objects.py b/tests/test_with_dynamic_objects.py index 55fe47f4..82ddc0f4 100644 --- a/tests/test_with_dynamic_objects.py +++ b/tests/test_with_dynamic_objects.py @@ -74,7 +74,12 @@ def slowly_returns_dynamic(dynamic_arg): executor = Executor() dynamic_object = does_nothing() fs = executor.submit(dynamic_dynamic.run, dynamic_object) - self.assertEqual(fs.result().attribute_on_dynamic, "attribute updated") + self.assertEqual( + fs.result().attribute_on_dynamic, + "attribute updated", + msg="The submit callable should have modified the mutable, dynamically " + "defined object with a new attribute." + ) def test_dynamic_callable(self): """ @@ -101,9 +106,22 @@ def slowly_returns_42(): executor = Executor() fs = executor.submit(dynamic_42.run) fs.add_done_callback(dynamic_42.process_result) - self.assertFalse(fs.done(), msg="Should be running on the executor") - self.assertEqual(fortytwo, fs.result(), msg="Future must complete") - self.assertEqual(fortytwo, dynamic_42.result, msg="Callback must get called") + self.assertFalse( + fs.done(), + msg="The submit callable sleeps long enough that we expect to still be " + "running here -- did something fail to get submit to an executor??" + ) + self.assertEqual( + fortytwo, + fs.result(), + msg="The future is expected to behave as usual" + ) + self.assertEqual( + fortytwo, + dynamic_42.result, + msg="The callback modifies its object and should run by the time the result" + "is available -- did it fail to get called?" + ) def test_exception(self): """ @@ -117,7 +135,10 @@ def raise_error(): re = raise_error() executor = Executor() fs = executor.submit(re.run) - with self.assertRaises(RuntimeError): + with self.assertRaises( + RuntimeError, + msg="The callable just raises an error -- this should get shown to the user" + ): fs.result() def test_dynamic_return(self): @@ -146,8 +167,15 @@ def slowly_returns_dynamic(): self.assertIsInstance( fs.result(), Foo, + msg="Just a sanity check that we're getting the right type of dynamically " + "defined type of object" + ) + self.assertEqual( + fs.result().result, + "it was an inside job!", + msg="The submit callable modifies the object that owns it, and this should" + "be reflected in the main process after deserialziation" ) - self.assertEqual(fs.result().result, "it was an inside job!") def test_timeout(self): """ @@ -170,6 +198,10 @@ def slow(): msg="waiting long enough should get the result" ) - with self.assertRaises(TimeoutError): + with self.assertRaises( + TimeoutError, + msg="With a timeout time smaller than our submit callable's sleep time, " + "we had better get an exception!" + ): fs = executor.submit(f.run) fs.result(timeout=0.0001) From c407511dbfdf3d290a7e9cf8693c505658d074aa Mon Sep 17 00:00:00 2001 From: liamhuber Date: Tue, 7 Nov 2023 14:36:37 -0800 Subject: [PATCH 053/134] Naively try including the error that's breaking things --- tests/test_with_dynamic_objects.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_with_dynamic_objects.py b/tests/test_with_dynamic_objects.py index 82ddc0f4..020cbd4d 100644 --- a/tests/test_with_dynamic_objects.py +++ b/tests/test_with_dynamic_objects.py @@ -5,6 +5,7 @@ be pickled using the standard pickle module, and thus poses a relatively thorough test for the general un-pickle-able case. """ +from concurrent.futures._base import TimeoutError as cfbTimeoutError from functools import partialmethod from time import sleep import unittest @@ -199,7 +200,7 @@ def slow(): ) with self.assertRaises( - TimeoutError, + (TimeoutError, cfbTimeoutError), msg="With a timeout time smaller than our submit callable's sleep time, " "we had better get an exception!" ): From 847a0a5e5bb8177c1bdeb7aef3958151247a534e Mon Sep 17 00:00:00 2001 From: liamhuber Date: Wed, 8 Nov 2023 08:43:53 -0800 Subject: [PATCH 054/134] Simplify test names --- tests/test_with_dynamic_objects.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_with_dynamic_objects.py b/tests/test_with_dynamic_objects.py index 020cbd4d..032f07b8 100644 --- a/tests/test_with_dynamic_objects.py +++ b/tests/test_with_dynamic_objects.py @@ -53,7 +53,7 @@ def as_dynamic_foo(fnc: callable): class TestDynamicallyDefinedObjects(unittest.TestCase): - def test_dynamic_args(self): + def test_args(self): """ We should be able to use a dynamically defined return value. """ @@ -82,7 +82,7 @@ def slowly_returns_dynamic(dynamic_arg): "defined object with a new attribute." ) - def test_dynamic_callable(self): + def test_callable(self): """ We should be able to use a dynamic callable -- in this case, a method of a dynamically defined class. From 54bfa60eef167fdcf9948055ab4a89c78471c55f Mon Sep 17 00:00:00 2001 From: liamhuber Date: Wed, 8 Nov 2023 08:47:23 -0800 Subject: [PATCH 055/134] Ensure that callback _methods_ can modify their owners Unsurprisingly, this passed totally fine on my local machine. --- tests/test_with_dynamic_objects.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tests/test_with_dynamic_objects.py b/tests/test_with_dynamic_objects.py index 032f07b8..5c8923ae 100644 --- a/tests/test_with_dynamic_objects.py +++ b/tests/test_with_dynamic_objects.py @@ -21,6 +21,7 @@ class Foo: def __init__(self, fnc: callable): self.fnc = fnc self.result = None + self.flag = False @property def run(self): @@ -28,6 +29,7 @@ def run(self): def process_result(self, future): self.result = future.result() + self.flag = True def dynamic_foo(): @@ -124,6 +126,27 @@ def slowly_returns_42(): "is available -- did it fail to get called?" ) + def test_callback(self): + """Make sure the callback methods can modify their owners""" + + @dynamic_foo() + def returns_42(): + return 42 + + dynamic_42 = returns_42() + self.assertFalse( + dynamic_42.flag, + msg="Sanity check that the test starts in the expected condition" + ) + executor = Executor() + fs = executor.submit(dynamic_42.run) + fs.add_done_callback(dynamic_42.process_result) + fs.result() # Wait for the process to finish + self.assertTrue( + dynamic_42.flag, + msg="Callback methods need to be able to modify their owners" + ) + def test_exception(self): """ Exceptions from dynamically defined callables should get cleanly raised. From f670367442a8a0a2d5729c9f1c9fcc72b748c6f0 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Wed, 8 Nov 2023 18:54:54 +0100 Subject: [PATCH 056/134] Update Docstring for __init__.py --- pympipool/__init__.py | 91 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) diff --git a/pympipool/__init__.py b/pympipool/__init__.py index 77561022..d696a7fa 100644 --- a/pympipool/__init__.py +++ b/pympipool/__init__.py @@ -24,18 +24,99 @@ class Executor: + """ + The pympipool.Executor leverages either the message passing interface (MPI), the SLURM workload manager or preferable + the flux framework for distributing python functions within a given resource allocation. In contrast to the + mpi4py.futures.MPIPoolExecutor the pympipool.Executor can be executed in a serial python process and does not + require the python script to be executed with MPI. It is even possible to execute the pympipool.Executor directly in + an interactive Jupyter notebook. + + Args: + max_workers (int): defines the number workers which can execute functions in parallel + cores_per_worker (int): number of MPI cores to be used for each function call + threads_per_core (int): number of OpenMP threads to be used for each function call + gpus_per_worker (int): number of GPUs per worker - defaults to 0 + oversubscribe (bool): adds the `--oversubscribe` command line flag (OpenMPI only) - default False + init_function (None): optional function to preset arguments for functions which are submitted later + cwd (str/None): current working directory where the parallel python task is executed + sleep_interval (float): synchronization interval - default 0.1 + + Examples: + ``` + >>> import numpy as np + >>> from pympipool import Executor + >>> + >>> def calc(i, j, k): + >>> from mpi4py import MPI + >>> size = MPI.COMM_WORLD.Get_size() + >>> rank = MPI.COMM_WORLD.Get_rank() + >>> return np.array([i, j, k]), size, rank + >>> + >>> def init_k(): + >>> return {"k": 3} + >>> + >>> with Executor(cores=2, init_function=init_k) as p: + >>> fs = p.submit(calc, 2, j=4) + >>> print(fs.result()) + [(array([2, 4, 3]), 2, 0), (array([2, 4, 3]), 2, 1)] + ``` + """ + def __init__( + self, + max_workers=1, + cores_per_worker=1, + threads_per_core=1, + gpus_per_worker=0, + oversubscribe=False, + init_function=None, + cwd=None, + sleep_interval=0.1, + executor=None, + ): + # Use __new__() instead of __init__(). This function is only implemented to enable auto-completion. + pass + def __new__( cls, max_workers=1, cores_per_worker=1, + threads_per_core=1, + gpus_per_worker=0, + oversubscribe=False, init_function=None, cwd=None, sleep_interval=0.1, + executor=None, ): + """ + Instead of returning a pympipool.Executor object this function returns either a pympipool.mpi.PyMPIExecutor, + pympipool.slurm.PySlurmExecutor or pympipool.flux.PyFluxExecutor depending on which backend is available. The + pympipool.flux.PyFluxExecutor is the preferred choice while the pympipool.mpi.PyMPIExecutor is primarily used + for development and testing. The pympipool.flux.PyFluxExecutor requires flux-core from the flux-framework to be + installed and in addition flux-sched to enable GPU scheduling. Finally, the pympipool.slurm.PySlurmExecutor + requires the SLURM workload manager to be installed on the system. + + Args: + max_workers (int): defines the number workers which can execute functions in parallel + cores_per_worker (int): number of MPI cores to be used for each function call + threads_per_core (int): number of OpenMP threads to be used for each function call + gpus_per_worker (int): number of GPUs per worker - defaults to 0 + oversubscribe (bool): adds the `--oversubscribe` command line flag (OpenMPI only) - default False + init_function (None): optional function to preset arguments for functions which are submitted later + cwd (str/None): current working directory where the parallel python task is executed + sleep_interval (float): synchronization interval - default 0.1 + """ if flux_installed: + if oversubscribe: + raise ValueError( + "Oversubscribing is not supported for the pympipool.flux.PyFLuxExecutor backend." + "Please use oversubscribe=False instead of oversubscribe=True." + ) return PyFluxExecutor( max_workers=max_workers, cores_per_worker=cores_per_worker, + threads_per_core=threads_per_core, + gpus_per_worker=gpus_per_worker, init_function=init_function, cwd=cwd, sleep_interval=sleep_interval, @@ -49,6 +130,16 @@ def __new__( sleep_interval=sleep_interval, ) else: + if threads_per_core != 1: + raise ValueError( + "Thread based parallelism is not supported for the pympipool.mpi.PyMPIExecutor backend." + "Please use threads_per_core=1 instead of threads_per_core=" + str(threads_per_core) + "." + ) + if gpus_per_worker != 0: + raise ValueError( + "GPU assignment is not supported for the pympipool.mpi.PyMPIExecutor backend." + "Please use gpus_per_worker=0 instead of gpus_per_worker=" + str(gpus_per_worker) + "." + ) return PyMPIExecutor( max_workers=max_workers, cores_per_worker=cores_per_worker, From bc42e91cc49cef8f40c31637cda2d7447718bbce Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Wed, 8 Nov 2023 18:55:08 +0100 Subject: [PATCH 057/134] Update Documentation index --- docs/source/index.rst | 138 +++++++++++++++++++++++++++++++++++------- 1 file changed, 117 insertions(+), 21 deletions(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index da66dfac..3070080c 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -1,32 +1,128 @@ -.. pympipool documentation master file, created by - sphinx-quickstart on Sat Jun 10 11:15:31 2023. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. +==================================================================== +pympipool - up-scale python functions for high performance computing +==================================================================== -pympipool - scale python functions over multiple compute nodes -============================================================== +:Author: Jan Janssen +:Contact: janssen@lanl.gov -Up-scaling python functions for high performance computing (HPC) can be challenging. While the python standard library provides interfaces for multiprocessing and asynchronous task execution, namely `multiprocessing `_ and `concurrent.futures `_ both are limited to the execution on a single compute node. So a series of python libraries have been developed to address the up-scaling of python functions for HPC. Starting in the datascience and machine learning community with solutions like `dask `_ over more HPC focused solutions like `parsl `_ up to Python bindings for the message passing interface (MPI) named `mpi4py `_. Each of these solutions has their advantages and disadvantages, in particular the mixing of MPI parallel python functions and serial python functions in combined workflows remains challenging. +Up-scaling python functions for high performance computing (HPC) can be challenging. While the python standard library +provides interfaces for multiprocessing and asynchronous task execution, namely +`multiprocessing `_ and +`concurrent.futures `_ both are +limited to the execution on a single compute node. So a series of python libraries have been developed to address the +up-scaling of python functions for HPC. Starting in the datascience and machine learning community with solutions +like `dask `_ over more HPC focused solutions like +`fireworks `_ and `parsl `_ up to Python +bindings for the message passing interface (MPI) named `mpi4py `_. Each of these +solutions has their advantages and disadvantages, in particular scaling beyond serial python functions, including thread +based parallelism, MPI parallel python application or assignment of GPUs to individual python function remains +challenging. To address these challenges :code:`pympipool` is developed with three goals in mind: -* Reimplement the standard python library interfaces namely `multiprocessing.pool.Pool `_ and `concurrent.futures.Executor `_ as closely as possible, to minimize the barrier of up-scaling an existing workflow to be used on HPC resources. -* Integrate MPI parallel python functions based on `mpi4py `_ on the same level as serial python functions, so both can be combined in a single workflow. This allows the users to parallelize their workflows one function at a time. Internally this is achieved by coupling a serial python process to a MPI parallel python process. +* Extend the standard python library `concurrent.futures.Executor `_ interface, to minimize the barrier of up-scaling an existing workflow to be used on HPC resources. +* Integrate thread based parallelism, MPI parallel python functions based on `mpi4py `_ and GPU assignment. This allows the users to accelerate their workflows one function at a time. * Embrace `Jupyter `_ notebooks for the interactive development of HPC workflows, as they allow the users to document their though process right next to the python code and their results all within one document. -Features --------- -As different users and different workflows have different requirements in terms of the level of parallelization, the -:code:`pympipool` implements a series of five different interfaces: +HPC Context +----------- +In contrast to frameworks like `dask `_, `fireworks `_ +and `parsl `_ which can be used to submit a number of worker processes directly the the HPC +queuing system and then transfer tasks from either the login node or an interactive allocation to these worker processes +to accelerate the execution, `mpi4py `_ and :code:`pympipool` follow a different +approach. Here the user creates their HPC allocation first and then `mpi4py `_ or +:code:`pympipool` can be used to distribute the tasks within this allocation. The advantage of this approach is that +no central data storage is required as the workers and the scheduling task can communicate directly. -* `pympipool.Pool `_: Following the `multiprocessing.pool.Pool `_ the :code:`pympipool.Pool` class implements the `map()` and `starmap()` functions. Internally these connect to an MPI parallel subprocess running the `mpi4py.futures.MPIPoolExecutor `_. So by increasing the number of workers, by setting the :code:`max_workers` parameter the :code:`pympipool.Pool` can scale the execution of serial python functions beyond a single compute node. For MPI parallel python functions the :code:`pympipool.MPISpawnPool` is derived from the :code:`pympipool.Pool` and uses :code:`MPI_Spawn()` to execute those. For more details see below. -* `pympipool.Executor `_: The easiest way to execute MPI parallel python functions right next to serial python functions is the :code:`pympipool.Executor`. It implements the executor interface defined by the `concurrent.futures.Executor `_. So functions are submitted to the :code:`pympipool.Executor` using the :code:`submit()` function, which returns an `concurrent.futures.Future `_ object. With these `concurrent.futures.Future `_ objects asynchronous workflows can be constructed which periodically check if the computation is completed `done()` and then query the results using the :code:`result()` function. The limitation of the :code:`pympipool.Executor` is lack of load balancing, each :code:`pympipool.Executor` acts as a serial first in first out (FIFO) queue. So it is the task of the user to balance the load of many different tasks over multiple :code:`pympipool.Executor` instances. -* `pympipool.HPCExecutor `_: To address the limitation of the :code:`pympipool.Executor` that only a single task is executed at any time, the :code:`pympipool.HPCExecutor` provides a wrapper around multiple :code:`pympipool.Executor` objects. It balances the queues of the individual :code:`pympipool.Executor` objects to maximize the throughput for the given resources. This functionality comes with an additional overhead of another thread, acting as a broker between the task queue of the :code:`pympipool.HPCExecutor` and the individual :code:`pympipool.Executor` objects. -* `pympipool.PoolExecutor `_: To combine the functionality of the :code:`pympipool.Pool` and the :code:`pympipool.Executor` the :code:`pympipool.PoolExecutor` again connects to the `mpi4py.futures.MPIPoolExecutor `_. Still in contrast to the :code:`pympipool.Pool` it does not implement the :code:`map()` and :code:`starmap()` functions but rather the :code:`submit()` function based on the `concurrent.futures.Executor `_ interface. In this case the load balancing happens internally and the maximum number of workers :code:`max_workers` defines the maximum number of parallel tasks. But only serial python tasks can be executed in contrast to the :code:`pympipool.Executor` which can also execute MPI parallel python tasks. -* `pympipool.MPISpawnPool `_: An alternative way to support MPI parallel functions in addition to the :code:`pympipool.Executor` is the :code:`pympipool.MPISpawnPool`. Just like the :code:`pympipool.Pool` it supports the :code:`map()` and :code:`starmap()` functions. The additional :code:`ranks_per_task` parameter defines how many MPI ranks are used per task. All functions are executed with the same number of MPI ranks. The limitation of this approach is that it uses :code:`MPI_Spawn()` to create new MPI ranks for the execution of the individual tasks. Consequently, this approach is not as scalable as the :code:`pympipool.Executor` but it offers load balancing for a large number of similar MPI parallel tasks. -* `pympipool.SocketInterface `_: The key functionality of the :code:`pympipool` package is the coupling of a serial python process with an MPI parallel python process. This happens in the background using a combination of the `zero message queue `_ and `cloudpickle `_ to communicate binary python objects. The :code:`pympipool.SocketInterface` is an abstraction of this interface, which is used in the other classes inside :code:`pympipool` and might also be helpful for other projects. +Example +------- +The following examples illustrates how :code:`pympipool` can be used to distribute a series of MPI parallel function +calls within a queuing system allocation. :code:`example.py`:: -In addition to using MPI to start a number of processes on different HPC computing resources, :code:`pympipool` also supports the `flux-framework `_ as additional backend. By setting the optional :code:`enable_flux_backend` parameter to :code:`True` the flux backend can be enabled for the :code:`pympipool.Pool`, :code:`pympipool.Executor` and :code:`pympipool.PoolExecutor`. Other optional parameters include the selection of the working directory where the python function should be executed :code:`cwd` and the option to oversubscribe MPI tasks which is an `OpenMPI `_ specific feature which can be enabled by setting :code:`oversubscribe` to :code:`True`. For more details on the :code:`pympipool` classes and their application, the extended documentation is linked below. + from pympipool import Executor + + def calc(i): + from mpi4py import MPI + size = MPI.COMM_WORLD.Get_size() + rank = MPI.COMM_WORLD.Get_rank() + return i, size, rank + + with Executor(max_workers=2, cores_per_worker=2) as exe: + fs_0 = exe.submit(calc, 0) + fs_1 = exe.submit(calc, 1) + print(fs_0.result(), fs_1.result()) + +This example can be executed using:: + + python example.py + +Which returns:: + + [(0, 2, 0), (0, 2, 1)], [(1, 2, 0), (1, 2, 1)] + +The important part in this example is that `mpi4py `_ is only used in the :code:`calc()` +function, not in the python script, consequently it is not necessary to call the script with :code:`mpiexec` but instead +a call with the regular python interpreter is sufficient. This highlights how :code:`pympipool` allows the users to +parallelize one function at a time and not having to convert their whole workflow to use `mpi4py `_. +The same code can also be executed inside a jupyter notebook directly which enables an interactive development process. + +The standard `concurrent.futures.Executor `_ +interface is extended by adding the option :code:`cores_per_worker=2` to assign multiple MPI ranks to each function call. +To create two workers :code:`max_workers=2` each with two cores each requires a total of four CPU cores to be available. +After submitting the function :code:`calc()` with the corresponding parameter to the executor :code:`exe.submit(calc, 0)` +a python `concurrent.futures.Future `_ is +returned. Consequently, the :code:`pympipool.Executor` can be used as a drop-in replacement for the +`concurrent.futures.Executor `_ +which allows the user to add parallelism to their workflow one function at a time. + +Backend +------- +Depending on the availability of different resource schedulers in your HPC environment the :code:`pympipool.Executor` +uses a different backend, with the :code:`pympipool.flux.PyFluxExecutor` being the preferred backend: + +* `pympipool.mpi.PyMpiExecutor `_: The simplest executor of the three uses `mpi4py `_ as a backend. This simplifies the installation on all operation systems including Windows. Still at the same time it limits the up-scaling to a single compute node and serial or MPI parallel python functions. There is no support for thread based parallelism or GPU assignment. This interface is primarily used for testing and developing or as a fall-back solution. It is not recommended to use this interface in production. +* `pympipool.slurm.PySlurmExecutor `_: The `SLURM workload manager `_ is commonly used on HPC systems to schedule and distribute tasks. :code:`pympipool` provides a python interface for scheduling the execution of python functions as SLURM job steps which are typically created using the :code:`srun` command. This executor supports serial python functions, thread based parallelism, MPI based parallelism and the assignment of GPUs to individual python functions. When the `SLURM workload manager `_ is installed on your HPC cluster this interface can be a reasonable choice, still depending on the SLURM configuration in can be limited in terms of the fine-grained scheduling or the responsiveness when working with hundreds of compute nodes in an individual allocation. +* `pympipool.flux.PyFluxExecutor `_: The `flux `_ is the preferred backend for :code:`pympipool`. Just like the :code:`pympipool.slurm.PySlurmExecutor` it supports serial python functions, thread based parallelism, MPI based parallelism and the assignment of GPUs to individual python functions. Still the advantages of using the `flux `_ as a backend are the easy installation, the faster allocation of resources as the resources are managed within the allocation and no central databases is used and the superior level of fine-grained resource assignment which is typically not available on HPC resource schedulers. + +Each of these backends consists of two parts a broker and a worker. When a new tasks is submitted from the user it is +received by the broker and the broker identifies the first available worker. The worker then executes a task and returns +it to the broker, who returns it to the user. While there is only one broker per :code:`pympipool.Executor` the number +of workers can be specified with the :code:`max_workers` parameter. + +Disclaimer +---------- +While we try to develop a stable and reliable software library, the development remains a opensource project under the +BSD 3-Clause License without any warranties:: + + BSD 3-Clause License + + Copyright (c) 2022, Jan Janssen + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. Documentation ------------- @@ -35,7 +131,7 @@ Documentation :maxdepth: 2 installation - interfaces + examples development * :ref:`modindex` \ No newline at end of file From 71ed46d42c32bfe1bb836cc6946940562db1a2fc Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Wed, 8 Nov 2023 18:55:15 +0100 Subject: [PATCH 058/134] Update Installation --- docs/source/installation.md | 119 +++++++++++++++++++++++++++++++++--- 1 file changed, 111 insertions(+), 8 deletions(-) diff --git a/docs/source/installation.md b/docs/source/installation.md index e3d3f457..7461aa89 100644 --- a/docs/source/installation.md +++ b/docs/source/installation.md @@ -1,18 +1,121 @@ # Installation -The `pympipool` package can be installed either via `pip` or `conda`. While most HPC systems use Linux these days, the `pympipool` package can be installed on all major operation systems. +For up-scaling python functions beyond a single compute node `pympipool` requires the integration with a high +performance computing (HPC) resource manager. These HPC resource manager are currently only supported for Linux. Still +for testing and development purposes the `pympipool` package can installed on all major operating systems including +Windows. This basic installation is based on the `pympipool.mpi.PyMPIExecutor` interface and allows up-scaling serial +and parallel python functions which use the message passing interface (MPI) for python [`mpi4py`](https://mpi4py.readthedocs.io) +on a single compute node. In addition, the integration with an HPC resource manager provides scaling beyond one compute +node, thread based parallelism and the assignment of GPUs. -## pypi-based installation -`pympipool` can be installed from the python package index (pypi) using the following command: +## Basic Installation +For testing and development purposes the `pympipool` package can installed on all major operating systems including +Windows. It is recommended to use the [conda package manager](https://anaconda.org/conda-forge/pympipool) for the +installation of the `pympipool` package. Still for advanced users who aim at maximizing their performance by compiling +their own version of `mpi` and `mpi4py` the `pympipool` package is also provided via the +[python package index (pypi)](https://pypi.org/project/pympipool/). + +### conda-based installation +In the same way `pympipool` can be installed with the [conda package manager](https://anaconda.org/conda-forge/pympipool): +``` +conda install -c conda-forge pympipool +``` +When resolving the dependencies with `conda` gets slow it is recommended to use `mamba` instead of `conda`. So you can +also install `pympipool` using: +``` +mamba install -c conda-forge pympipool +``` + +### pypi-based installation +`pympipool` can be installed from the [python package index (pypi)](https://pypi.org/project/pympipool/) using the +following command: ``` pip install pympipool ``` -## conda-based installation -The `conda` package combines all dependencies in one package: +## Integration with high performance computing +`pympipool` currently provides interfaces to the [SLURM workload manager](https://www.schedmd.com) and the +[flux framework](https://flux-framework.org). With the [flux framework](https://flux-framework.org) being the +recommended solution as it can be installed without root user rights and it can be integrated in existing resource +managers like the [SLURM workload manager](https://www.schedmd.com). The advantages of using `pympipool` in combination +with these resource schedulers is the fine-grained resource allocation. In addition, to scaling beyond a single compute +node they add the ability to assign GPUs and thread based parallelism. + +### Flux Framework +For Linux users without a pre-installed resource scheduler in their high performance computing (HPC) environment, the +[flux framework](https://flux-framework.org) can be installed with the `conda` package manager: ``` -conda install -c conda-forge pympipool +conda install -c conda-forge flux-core +``` +For alternative ways to install the [flux framework](https://flux-framework.org) please refer to their official +[documentation](https://flux-framework.readthedocs.io/en/latest/quickstart.html). + +#### Nvidia +For adding GPU support in the [flux framework](https://flux-framework.org) you want to install `flux-sched` in addition +to `flux-core`. For Nvidia GPUs you need: ``` -When resolving the dependencies with `conda` gets slow it is recommended to use `mamba` instead of `conda`. So you can also install `pympipool` using: +conda install -c conda-forge flux-core flux-sched libhwloc=*=cuda* ``` -mamba install -c conda-forge pympipool +In case this fails because there is no GPU on the login node and the `cudatoolkit` cannot be installed you can use the +`CONDA_OVERRIDE_CUDA` environment variable to pretend a local cuda version is installed `conda` can link to using: +``` +CONDA_OVERRIDE_CUDA="11.6" conda install -c conda-forge flux-core flux-sched libhwloc=*=cuda* +``` + +#### AMD +For adding GPU support in the [flux framework](https://flux-framework.org) you want to install `flux-sched` in addition +to `flux-core`. For AMD GPUs you need: +``` +conda install -c conda-forge flux-core flux-sched ``` + +#### Test Flux +To test the [flux framework](https://flux-framework.org) and validate the GPUs are correctly recognized you can start +a flux instance using: +``` +flux start +``` +Afterwards, you can list the resources accessible to flux using: +``` +flux resource list +``` +This should contain a column for the GPUs if you installed the required dependencies. Here is an example output for a +workstation with a six core CPU and a single GPU: +``` + STATE NNODES NCORES NGPUS NODELIST + free 1 6 1 ljubi + allocated 0 0 0 + down 0 0 0 +``` +As the [flux framework](https://flux-framework.org) only lists physical cores rather than virtual cores enabled by +hyper-threading the total number of CPU cores might be half the number of cores you expect. + +When the [flux framework](https://flux-framework.org) is used inside an existing queuing system, then you have to +communicate these resources to it. For the [SLURM workload manager](https://www.schedmd.com) this is achieved by calling +`flux start` with `srun`. For an interactive session use: +``` +srun --pty flux start +``` +Alternatively, to execute a python script which uses `pympipool` you can call it with: +``` +srun flux start python +``` +In the same way to start a Jupyter Notebook in an interactive allocation you can use: +``` +srun --pty flux start jupyter notebook +``` +Then each jupyter notebook you execute on this jupyter notebook server has access to the resources of the interactive +allocation. + +### SLURM +The installation of the [SLURM workload manager](https://www.schedmd.com) is explained in the corresponding +[documentation](https://slurm.schedmd.com/quickstart_admin.html) . As it requires root access, it is not explained here. +Rather we assume you have access to an HPC cluster which already has SLURM installed. + +While the [SLURM workload manager](https://www.schedmd.com) and the [flux framework](https://flux-framework.org) are +both resource schedulers, the [flux framework](https://flux-framework.org) can also be installed on an HPC system which +uses the [SLURM workload manager](https://www.schedmd.com) as primary resource scheduler. This enables more fine-grained +scheduling like independent GPU access on HPC systems where [SLURM workload manager](https://www.schedmd.com) is +configured to allow only one job step per node. Furthermore, the [flux framework](https://flux-framework.org) provides +superior performance in large allocation with several hundred compute nodes or in the case when many `pympipool.Executor` +objects are created frequently, as each creation of an `pympipool.Executor` results in an `srun` call which is +communicated to the central database of the [SLURM workload manager](https://www.schedmd.com). From e4216935605c0be4b326f3ffe921cae118bd8cce Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Wed, 8 Nov 2023 18:55:29 +0100 Subject: [PATCH 059/134] Restructure Examples --- docs/source/{interfaces.md => examples.md} | 53 +++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) rename docs/source/{interfaces.md => examples.md} (87%) diff --git a/docs/source/interfaces.md b/docs/source/examples.md similarity index 87% rename from docs/source/interfaces.md rename to docs/source/examples.md index b07cccfe..eda38d44 100644 --- a/docs/source/interfaces.md +++ b/docs/source/examples.md @@ -1,4 +1,55 @@ -# Interfaces +# Examples +## Background +### Backends +The availability of certain features depends on the backend `pympipool` is installed with. In particular the thread +based parallelism and the GPU assignment is only available with the `pympipool.slurm.PySlurmExecutor` or the +`pympipool.flux.PyFluxExecutor` backend. The latter is recommended based on the easy installation, the faster allocation +of resources as the resources are managed within the allocation and no central databases is used and the superior level +of fine-grained resource assignment which is typically not available on other HPC resource schedulers including the +[SLURM workload manager](https://www.schedmd.com). The `pympipool.flux.PyFluxExecutor` requires +[flux framework](https://flux-framework.org) to be installed in addition to the `pympipool` package. The features are +summarized in the table below: + +| Feature \ Backend | `PyMpiExecutor` | `PySlurmExecutor` | `PyFluxExecutor` | +|:--------------------------:|:---------------:|:-----------------:|:----------------:| +| Thread based parallelism | no | yes | yes | +| MPI based parallelism | yes | yes | yes | +| GPU assignment | no | yes | yes | +| Resource over-subscription | yes | yes | no | +| Scalability | 1 node | ~100 nodes | no limit | + +### Up-Scaling +The `pympipool.Executor` extends the interface of the [`concurrent.futures.Executor`](https://docs.python.org/3/library/concurrent.futures.html#module-concurrent.futures) +to simplify the up-scaling of individual functions in a given workflow. In addition, to specifying the maximum number +of workers `max_workers` the user can also specify the number of cores per worker `cores_per_worker` for MPI based +parallelism, the number of threads per core `threads_per_core` for thread based parallelism and the number of GPUs per +worker `gpus_per_worker`. Finally, for those backends which support over-subscribing this can also be enabled using the +`oversubscribe` parameter. All these parameters are optional, so the `pympipool.Executor` can be used as a drop-in +replacement for the [`concurrent.futures.Executor`](https://docs.python.org/3/library/concurrent.futures.html#module-concurrent.futures): + +``` +from pympipool import Executor + +with Executor( + max_workers=1, + cores_per_worker=1, + threads_per_core=1, + gpus_per_worker=0, + oversubscribe=False +) as exe: + fs = exe.submit() +``` + +## Serial Python Function + +## Data Loading + +## MPI Parallel Python Function + +## GPU Assignment + +## Backwards compatibility + The `pympipool` class provides five different interfaces to scale python functions over multiple compute nodes. They are briefly summarized here and explained in more detail below. From 02d5e711922abc628fb21c800d745782509250f3 Mon Sep 17 00:00:00 2001 From: liamhuber Date: Wed, 8 Nov 2023 13:21:38 -0800 Subject: [PATCH 060/134] Refactor: rename variable --- tests/test_with_dynamic_objects.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/tests/test_with_dynamic_objects.py b/tests/test_with_dynamic_objects.py index 5c8923ae..9ea7e600 100644 --- a/tests/test_with_dynamic_objects.py +++ b/tests/test_with_dynamic_objects.py @@ -21,15 +21,16 @@ class Foo: def __init__(self, fnc: callable): self.fnc = fnc self.result = None - self.flag = False + self.running = False @property def run(self): + self.running = True return self.fnc def process_result(self, future): self.result = future.result() - self.flag = True + self.running = False def dynamic_foo(): @@ -135,15 +136,19 @@ def returns_42(): dynamic_42 = returns_42() self.assertFalse( - dynamic_42.flag, + dynamic_42.running, msg="Sanity check that the test starts in the expected condition" ) executor = Executor() fs = executor.submit(dynamic_42.run) fs.add_done_callback(dynamic_42.process_result) - fs.result() # Wait for the process to finish self.assertTrue( - dynamic_42.flag, + dynamic_42.running, + msg="Submit method need to be able to modify their owners" + ) + fs.result() # Wait for the process to finish + self.assertFalse( + dynamic_42.running, msg="Callback methods need to be able to modify their owners" ) From 109f612f824de34e25b185c025da8d9798e190f1 Mon Sep 17 00:00:00 2001 From: liamhuber Date: Wed, 8 Nov 2023 13:21:59 -0800 Subject: [PATCH 061/134] Refactor: rename function --- tests/test_with_dynamic_objects.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_with_dynamic_objects.py b/tests/test_with_dynamic_objects.py index 9ea7e600..9dfd8d13 100644 --- a/tests/test_with_dynamic_objects.py +++ b/tests/test_with_dynamic_objects.py @@ -170,7 +170,7 @@ def raise_error(): ): fs.result() - def test_dynamic_return(self): + def test_return(self): """ We should be able to use a dynamic return value -- in this case, a method of a dynamically defined class. From 7f8dcf0e6da2857908e97f6813a3ead4b364092f Mon Sep 17 00:00:00 2001 From: liamhuber Date: Wed, 8 Nov 2023 13:24:24 -0800 Subject: [PATCH 062/134] Use universal executor --- tests/test_with_dynamic_objects.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_with_dynamic_objects.py b/tests/test_with_dynamic_objects.py index 9dfd8d13..29494857 100644 --- a/tests/test_with_dynamic_objects.py +++ b/tests/test_with_dynamic_objects.py @@ -10,7 +10,7 @@ from time import sleep import unittest -from pympipool.mpi.executor import PyMPIExecutor as Executor +from pympipool import Executor class Foo: From 4a229d4fb75c45ebb1c5b8c57842e9ffd2cd6a07 Mon Sep 17 00:00:00 2001 From: liamhuber Date: Wed, 8 Nov 2023 13:43:32 -0800 Subject: [PATCH 063/134] Don't wait on deletion --- pympipool/shared/executorbase.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pympipool/shared/executorbase.py b/pympipool/shared/executorbase.py index 8e64096a..39536877 100644 --- a/pympipool/shared/executorbase.py +++ b/pympipool/shared/executorbase.py @@ -62,7 +62,7 @@ def __len__(self): def __del__(self): try: - self.shutdown(wait=True) + self.shutdown(wait=False) except (AttributeError, RuntimeError): pass From c934f45e5311c1ac12643b563011ba21ae9f3ae8 Mon Sep 17 00:00:00 2001 From: liamhuber Date: Wed, 8 Nov 2023 14:25:36 -0800 Subject: [PATCH 064/134] Only join the queues on shutdown if you were told to wait --- pympipool/shared/executorbase.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pympipool/shared/executorbase.py b/pympipool/shared/executorbase.py index 39536877..1e8d27dc 100644 --- a/pympipool/shared/executorbase.py +++ b/pympipool/shared/executorbase.py @@ -53,8 +53,9 @@ def shutdown(self, wait=True, *, cancel_futures=False): if cancel_futures: cancel_items_in_queue(que=self._future_queue) self._future_queue.put({"shutdown": True, "wait": wait}) - self._process.join() - self._future_queue.join() + if wait: + self._process.join() + self._future_queue.join() self._process = None def __len__(self): From c0f519d8dbd1cc0ca4d30e7ad8ae214501de14a2 Mon Sep 17 00:00:00 2001 From: liamhuber Date: Wed, 8 Nov 2023 14:27:01 -0800 Subject: [PATCH 065/134] Remove the reference to the future queue on shutdown It has no impact on the tests here, and without this change the pyiron_workflow tests hang --- pympipool/shared/executorbase.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pympipool/shared/executorbase.py b/pympipool/shared/executorbase.py index 1e8d27dc..f56a349e 100644 --- a/pympipool/shared/executorbase.py +++ b/pympipool/shared/executorbase.py @@ -57,6 +57,7 @@ def shutdown(self, wait=True, *, cancel_futures=False): self._process.join() self._future_queue.join() self._process = None + self._future_queue = None def __len__(self): return self._future_queue.qsize() From 81f326c8871af592946eb114d3e8e5aa9db844bc Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Thu, 9 Nov 2023 16:07:42 +0100 Subject: [PATCH 066/134] fix black formatting --- pympipool/__init__.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pympipool/__init__.py b/pympipool/__init__.py index d696a7fa..fff44455 100644 --- a/pympipool/__init__.py +++ b/pympipool/__init__.py @@ -61,6 +61,7 @@ class Executor: [(array([2, 4, 3]), 2, 0), (array([2, 4, 3]), 2, 1)] ``` """ + def __init__( self, max_workers=1, @@ -133,12 +134,16 @@ def __new__( if threads_per_core != 1: raise ValueError( "Thread based parallelism is not supported for the pympipool.mpi.PyMPIExecutor backend." - "Please use threads_per_core=1 instead of threads_per_core=" + str(threads_per_core) + "." + "Please use threads_per_core=1 instead of threads_per_core=" + + str(threads_per_core) + + "." ) if gpus_per_worker != 0: raise ValueError( "GPU assignment is not supported for the pympipool.mpi.PyMPIExecutor backend." - "Please use gpus_per_worker=0 instead of gpus_per_worker=" + str(gpus_per_worker) + "." + "Please use gpus_per_worker=0 instead of gpus_per_worker=" + + str(gpus_per_worker) + + "." ) return PyMPIExecutor( max_workers=max_workers, From 979744032ef96c377d2f50eb2b20443ba568a8f9 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Thu, 9 Nov 2023 17:45:14 +0100 Subject: [PATCH 067/134] Update documentation --- docs/source/development.md | 29 +- docs/source/examples.md | 538 +++++++++++++++++-------------------- docs/source/index.rst | 6 +- 3 files changed, 272 insertions(+), 301 deletions(-) diff --git a/docs/source/development.md b/docs/source/development.md index 871be717..ae2a38de 100644 --- a/docs/source/development.md +++ b/docs/source/development.md @@ -1,10 +1,27 @@ # Development The `pympipool` package is developed based on the need to simplify the up-scaling of python functions over multiple compute nodes. The project is under active development, so the difference between the individual interfaces might not -always be clearly defined. The `pympipool.Pool` interface is the oldest and consequently currently most stable but at -the same time also most limited interface. The `pympipool.Executor` is the recommended interface for most workflows but -it can be computationally less efficient than the `pympipool.PoolExecutor` interface for large number of serial python -functions. Finally, the `pympipool.MPISpawnPool` is primarily a prototype of an alternative interface, which is available -for testing but typically not recommended, based on the limitations of initiating new communicators. +always be clearly defined. -Any feedback and contributions are welcome. \ No newline at end of file +## Contributions +Any feedback and contributions are welcome. + +## Integration +The key functionality of the `pympipool` package is the up-scaling of python functions with thread based parallelism, +MPI based parallelism or by assigning GPUs to individual python functions. In the background this is realized using a +combination of the [zero message queue](https://zeromq.org) and [cloudpickle](https://github.com/cloudpipe/cloudpickle) +to communicate binary python objects. The `pympipool.communication.SocketInterface` is an abstraction of this interface, +which is used in the other classes inside `pympipool` and might also be helpful for other projects. It comes with a +series of utility functions: + +* `pympipool.communication.interface_bootup()`: To initialize the interface +* `pympipool.communication.interface_connect()`: To connect the interface to another instance +* `pympipool.communication.interface_send()`: To send messages via this interface +* `pympipool.communication.interface_receive()`: To receive messages via this interface +* `pympipool.communication.interface_shutdown()`: To shutdown the interface + +## Alternative Projects +[dask](https://www.dask.org), [fireworks](https://materialsproject.github.io/fireworks/) and [parsl](http://parsl-project.org) +address similar challenges. On the one hand they are more restrictive when it comes to the assignment of resource to +a given worker for execution, on the other hand they provide support beyond the high performance computing (HPC) +environment. \ No newline at end of file diff --git a/docs/source/examples.md b/docs/source/examples.md index eda38d44..0dc2fdf8 100644 --- a/docs/source/examples.md +++ b/docs/source/examples.md @@ -1,32 +1,41 @@ # Examples -## Background -### Backends -The availability of certain features depends on the backend `pympipool` is installed with. In particular the thread -based parallelism and the GPU assignment is only available with the `pympipool.slurm.PySlurmExecutor` or the -`pympipool.flux.PyFluxExecutor` backend. The latter is recommended based on the easy installation, the faster allocation -of resources as the resources are managed within the allocation and no central databases is used and the superior level -of fine-grained resource assignment which is typically not available on other HPC resource schedulers including the -[SLURM workload manager](https://www.schedmd.com). The `pympipool.flux.PyFluxExecutor` requires -[flux framework](https://flux-framework.org) to be installed in addition to the `pympipool` package. The features are -summarized in the table below: +The `pympipool.Executor` extends the interface of the [`concurrent.futures.Executor`](https://docs.python.org/3/library/concurrent.futures.html#module-concurrent.futures) +to simplify the up-scaling of individual functions in a given workflow. -| Feature \ Backend | `PyMpiExecutor` | `PySlurmExecutor` | `PyFluxExecutor` | -|:--------------------------:|:---------------:|:-----------------:|:----------------:| -| Thread based parallelism | no | yes | yes | -| MPI based parallelism | yes | yes | yes | -| GPU assignment | no | yes | yes | -| Resource over-subscription | yes | yes | no | -| Scalability | 1 node | ~100 nodes | no limit | +## Compatibility +Starting with the basic example of `1+1=2`. With the `ThreadPoolExecutor` from the [`concurrent.futures`](https://docs.python.org/3/library/concurrent.futures.html#module-concurrent.futures) +standard library this can be written as - `test_thread.py`: +``` +from concurrent.futures import ThreadPoolExecutor -### Up-Scaling -The `pympipool.Executor` extends the interface of the [`concurrent.futures.Executor`](https://docs.python.org/3/library/concurrent.futures.html#module-concurrent.futures) -to simplify the up-scaling of individual functions in a given workflow. In addition, to specifying the maximum number +with ThreadPoolExecutor( + max_workers=1, +) as exe: + future = exe.submit(sum, [1, 1]) + print(future.result()) +``` +In this case `max_workers=1` limits the number of threads uses by the `ThreadPoolExecutor` to one. Then the `sum()` +function is submitted to the executor with a list with two ones `[1, 1]` as input. A [`concurrent.futures.Future`](https://docs.python.org/3/library/concurrent.futures.html#module-concurrent.futures) +object is returned. The `Future` object allows to check the status of the execution with the `done()` method which +returns `True` or `False` depending on the state of the execution. Or the main process can wait until the execution is +completed by calling `result()`. + +This example stored in a python file named `test_thread.py` can be executed using the python interpreter: +``` +python test_thread.py +>>> 2 +``` +The result of the calculation is `1+1=2`. + +The `pympipool.Executor` class extends the interface of the [`concurrent.futures.Executor`](https://docs.python.org/3/library/concurrent.futures.html#module-concurrent.futures) +class by providing more parameters to specify the level of parallelism. In addition, to specifying the maximum number of workers `max_workers` the user can also specify the number of cores per worker `cores_per_worker` for MPI based -parallelism, the number of threads per core `threads_per_core` for thread based parallelism and the number of GPUs per -worker `gpus_per_worker`. Finally, for those backends which support over-subscribing this can also be enabled using the +parallelism, the number of threads per core `threads_per_core` for thread based parallelism and the number of GPUs per +worker `gpus_per_worker`. Finally, for those backends which support over-subscribing this can also be enabled using the `oversubscribe` parameter. All these parameters are optional, so the `pympipool.Executor` can be used as a drop-in -replacement for the [`concurrent.futures.Executor`](https://docs.python.org/3/library/concurrent.futures.html#module-concurrent.futures): +replacement for the [`concurrent.futures.Executor`](https://docs.python.org/3/library/concurrent.futures.html#module-concurrent.futures). +The previous example is rewritten for the `pympipool.Executor` in - `test_sum.py`: ``` from pympipool import Executor @@ -37,118 +46,175 @@ with Executor( gpus_per_worker=0, oversubscribe=False ) as exe: - fs = exe.submit() + future = exe.submit(sum, [1,1]) + print(future.result()) ``` +Again this example can be executed with the python interpreter: +``` +python test_sum.py +>>> 2 +``` +The result of the calculation is again `1+1=2`. + +Beyond pre-defined functions like the `sum()` function, the same functionality can be used to submit user-defined +functions. In the `test_serial.py` example a custom summation function is defined: +``` +from pympipool import Executor + +def calc(*args): + return sum(*args) + +with Executor(max_workers=2) as exe: + fs_1 = exe.submit(calc, [2, 1]) + fs_2 = exe.submit(calc, [2, 2]) + fs_3 = exe.submit(calc, [2, 3]) + fs_4 = exe.submit(calc, [2, 4]) + print([ + fs_1.result(), + fs_2.result(), + fs_3.result(), + fs_4.result(), + ]) +``` +In contrast to the previous example where just a single function was submitted to a single worker, in this case a total +of four functions is submitted to a group of two workers `max_workers=2`. Consequently, the functions are executed as a +set of two pairs. -## Serial Python Function +The script can be executed with any python interpreter: +``` +python test_serial.py +>>> [3, 4, 5, 6] +``` +It returns the corresponding sums as expected. The same can be achieved with the built-in [`concurrent.futures.Executor`](https://docs.python.org/3/library/concurrent.futures.html#module-concurrent.futures) +classes. Still one advantage of using the `pympipool.Executor` rather than the built-in ones, is the ability to execute +the same commands in interactive environments like [Jupyter notebooks](https://jupyter.org). This is achieved by using +[cloudpickle](https://github.com/cloudpipe/cloudpickle) to serialize the python function and its parameters rather than +the regular pickle package. + +For backwards compatibility with the [`multiprocessing.Pool`](https://docs.python.org/3/library/multiprocessing.html) +class the [`concurrent.futures.Executor`](https://docs.python.org/3/library/concurrent.futures.html#module-concurrent.futures) +also implements the `map()` function to map a series of inputs to a function. The same `map()` function is also +available in the `pympipool.Executor` - `test_map.py`: +``` +from pympipool import Executor -## Data Loading +def calc(*args): + return sum(*args) -## MPI Parallel Python Function +with Executor(max_workers=2) as exe: + print(list(exe.map(calc, [[2, 1], [2, 2], [2, 3], [2, 4]]))) +``` +Again the script can be executed with any python interpreter: +``` +python test_map.py +>>> [3, 4, 5, 6] +``` +The results remain the same. + +## Data Handling +A limitation of many parallel approaches is the overhead in communication when working with large datasets. Instead of +reading the same dataset repetitively, the `pympipool.Executor` loads the dataset only once per worker and afterwards +each function submitted to this worker has access to the dataset, as it is already loaded in memory. To achieve this +the user defines an initialization function `init_function` which returns a dictionary with one key per dataset. The +keys of the dictionary can then be used as additional input parameters in each function submitted to the `pympipool.Executor`. +This functionality is illustrated in the `test_data.py` example: +``` +from pympipool import Executor -## GPU Assignment +def calc(i, j, k): + return i + j + k -## Backwards compatibility +def init_function(): + return {"j": 4, "k": 3, "l": 2} -The `pympipool` class provides five different interfaces to scale python functions over multiple compute nodes. They are -briefly summarized here and explained in more detail below. +with Executor(cores=1, init_function=init_function) as exe: + fs = exe.submit(calc, 2, j=5) + print(fs.result()) +``` +The function `calc()` requires three inputs `i`, `j` and `k`. But when the function is submitted to the executor only +two inputs are provided `fs = exe.submit(calc, 2, j=5)`. In this case the first input parameter is mapped to `i=2`, the +second input parameter is specified explicitly `j=5` but the third input parameter `k` is not provided. So the +`pympipool.Executor` automatically checks the keys set in the `init_function()` function. In this case the returned +dictionary `{"j": 4, "k": 3, "l": 2}` defines `j=4`, `k=3` and `l=2`. For this specific call of the `calc()` function, +`i` and `j` are already provided so `j` is not required, but `k=3` is used from the `init_function()` and as the `calc()` +function does not define the `l` parameter this one is also ignored. + +Again the script can be executed with any python interpreter: +``` +python test_data.py +>>> 10 +``` +The result is `2+5+3=10` as `i=2` and `j=5` are provided during the submission and `k=3` is defined in the `init_function()` +function. -| Feature | Pool | Executor | HPCExecutor | PoolExecutor | MPISpawnPool | -|:----------------------:|:----:|:--------:|:-----------:|:------------:|:------------:| -| `map()` | yes | yes | yes | yes | yes | -| `starmap()` | yes | no | no | no | yes | -| `submit()` | no | yes | yes | yes | no | -| parallel execution | yes | no | yes | yes | yes | -| MPI parallel functions | no | yes | yes | no | yes | -| flux framework support | yes | yes | yes | yes | no | -| internal storage | no | yes | yes | no | no | +## Up-Scaling +The availability of certain features depends on the backend `pympipool` is installed with. In particular the thread +based parallelism and the GPU assignment is only available with the `pympipool.slurm.PySlurmExecutor` or the +`pympipool.flux.PyFluxExecutor` backend. The latter is recommended based on the easy installation, the faster allocation +of resources as the resources are managed within the allocation and no central databases is used and the superior level +of fine-grained resource assignment which is typically not available on other HPC resource schedulers including the +[SLURM workload manager](https://www.schedmd.com). The `pympipool.flux.PyFluxExecutor` requires +[flux framework](https://flux-framework.org) to be installed in addition to the `pympipool` package. The features are +summarized in the table below: -While all four interfaces implement the `map()` function, only two of them implement the `starmap()` function while the -rest implements the asynchronous `sumbit()` function which returns [`concurrent.futures.Future`](https://docs.python.org/3/library/concurrent.futures.html#future-objects). -In terms of the execution it is important to differentiate between parallel execution, meaning multiple individual -functions are executed in parallel and MPI parallel functions, which each require multiple MPI ranks to be executed. -Furthermore, most interfaces are integrated with the [flux-framework](https://flux-framework.org) and the -[SLURM queuing system](https://slurm.schedmd.com) so rather than using MPI ranks to distribute functions over multiple -compute nodes, they can also use the flux-framework or the SLURM queuing system for this purpose. Finally, the -`pympipool.Executor` and `pympipool.HPCExecutor` are currently the only interfaces which implements an internal storage, -so data can remain in the executor process while applying multiple functions which interact with this data. +| Feature \ Backend | `PyMpiExecutor` | `PySlurmExecutor` | `PyFluxExecutor` | +|:--------------------------:|:---------------:|:-----------------:|:----------------:| +| Thread based parallelism | no | yes | yes | +| MPI based parallelism | yes | yes | yes | +| GPU assignment | no | yes | yes | +| Resource over-subscription | yes | yes | no | +| Scalability | 1 node | ~100 nodes | no limit | -The sixth interface is the `SocketInterface`. This interface connects two python processes to transfer python objects -between them. It is used for all the above interfaces to connect the serial python process of the user interacts, with -the MPI parallel python process, which executes the python functions over multiple compute nodes. +### Thread based Parallelism +The number of threads per core can be controlled with the `threads_per_core` parameter during the initialization of the +`pympipool.Executor`. Unfortunately, there is no uniform way to control the number of cores a given underlying library +uses for thread based parallelism, so it might be necessary to set certain environment variables manually: -## Pool -Following the [`multiprocessing.pool.Pool`](https://docs.python.org/3/library/multiprocessing.html) -the `pympipool.Pool` class implements the `map()` and `starmap()` functions. Internally these connect to an MPI parallel -subprocess running the [`mpi4py.futures.MPIPoolExecutor`](https://mpi4py.readthedocs.io/en/stable/mpi4py.futures.html#mpipoolexecutor). -So by increasing the number of workers, by setting the `max_workers` parameter the `pympipool.Pool` can scale the -execution of serial python functions beyond a single compute node. For MPI parallel python functions the `pympipool.MPISpawnPool` -is derived from the `pympipool.Pool` and uses `MPI_Spawn()` to execute those. For more details see below. +* `OMP_NUM_THREADS`: for openmp +* `OPENBLAS_NUM_THREADS`: for openblas +* `MKL_NUM_THREADS`: for mkl +* `VECLIB_MAXIMUM_THREADS`: for accelerate on Mac Os X +* `NUMEXPR_NUM_THREADS`: for numexpr -Example how to use the `pympipool.Pool` class. This can be executed inside a jupyter notebook, interactive python shell -or as a python script. For the example a python script is used. Write a python test script named `test_pool_map.py`: -```python -import numpy as np -from pympipool import Pool +At the current stage `pympipool.Executor` does not set these parameters itself, so you have to add them in the function +you submit before importing the corresponding library: +``` def calc(i): - return np.array(i ** 2) - -with Pool(max_workers=2) as p: - print(p.map(func=calc, iterable=[1, 2, 3, 4])) -``` -The function `calc()` is applied on the list of arguments `iterable`. The script is executed as serial python process, -while internally it uses MPI to execute two sets of two parameters at a time. As you see the `numpy` library is -dynamically included when the function is transferred to the MPI parallel subprocess for execution. To execute the -python file `test_pool.py` in a serial python process use: -``` -python test_pool_map.py ->>> [array(1), array(4), array(9), array(16)] -``` -Beyond the number of workers defined by `max_workers`, the additional parameters are `oversubscribe` to enable -[OpenMPI](https://www.open-mpi.org) over-subscription, `enable_flux_backend` and `enable_slurm_backend` to switch from -MPI as backend to flux or SLURM as alternative backend. In addition, the parameters `queue_adapter` and -`queue_adapter_kwargs` provide an interface to [pysqa](https://pysqa.readthedocs.org) the simple queue adapter for -python. The `queue_adapter` can be set as `pysqa.queueadapter.QueueAdapter` object and the `queue_adapter_kwargs` -parameter represents a dictionary of input arguments for the `submit_job()` function of the queue adapter. Finally, the -`cwd` parameter specifies the current working directory where the python functions are executed. - -In addition to the `map()` function, the `pympipool.Pool` interface implements the `starmap()` function. The example is -very similar to the one above. Just this time the `calc()` function accepts two arguments rather than one: -```python -from pympipool import Pool - -def calc(i, j): - return i + j - -with Pool(max_workers=2) as p: - print(p.starmap(func=calc, iterable=[[1, 2], [3, 4], [5, 6], [7, 8]])) -``` -The script named `test_pool_starmap.py` is executed and the sum of the input parameters is returned: -``` -python test_pool_starmap.py ->>> [3, 7, 11, 15] -``` -In summary the `pympipool.Pool` class implements both the `map()` function and the `starmap()` function to scale serial -python functions over multiple compute nodes. It internally handles the load distribution over multiple compute nodes. - -## Executor -The easiest way to execute MPI parallel python functions right next to serial python functions is the `pympipool.Executor`. -It implements the executor interface defined by the [`concurrent.futures.Executor`](https://docs.python.org/3/library/concurrent.futures.html#module-concurrent.futures). -So functions are submitted to the `pympipool.Executor` using the `submit()` function, which returns an -[`concurrent.futures.Future`](https://docs.python.org/3/library/concurrent.futures.html#future-objects) object. With -these [`concurrent.futures.Future`](https://docs.python.org/3/library/concurrent.futures.html#future-objects) objects -asynchronous workflows can be constructed which periodically check if the computation is completed `done()` and then query -the results using the `result()` function. The limitation of the `pympipool.Executor` is lack of load balancing, each -`pympipool.Executor` acts as a serial first in first out (FIFO) queue. So it is the task of the user to balance the load -of many different tasks over multiple `pympipool.Executor` instances. - -In comparison to the [`concurrent.futures.Executor`](https://docs.python.org/3/library/concurrent.futures.html#module-concurrent.futures) -in the standard python library the `pympipool.Executor` can execute MPI parallel python functions which internally use -the `mpi4py` library. In this example the `calc()` function returns the total number of MPI ranks and the index of the -individual MPI ranks. By setting the `cores` parameter of the `pympipool.Executor` to `2` the `calc()` function is -executed with two MPI ranks. -```python + import os + os.environ["OMP_NUM_THREADS"] = "2" + os.environ["OPENBLAS_NUM_THREADS"] = "2" + os.environ["MKL_NUM_THREADS"] = "2" + os.environ["VECLIB_MAXIMUM_THREADS"] = "2" + os.environ["NUMEXPR_NUM_THREADS"] = "2" + import numpy as np + return i +``` + +Most modern CPUs use hyper-threading to present the operating system with double the number of virtual cores compared to +the number of physical cores available. So unless this functionality is disabled `threads_per_core=2` is a reasonable +default. Just be careful if the number of threads is not specified it is possible that all workers try to access all +cores at the same time which can lead to poor performance. So it is typically a good idea to monitor the CPU utilization +with increasing number of workers. + +Specific manycore CPU models like the Intel Xeon Phi processors provide a much higher hyper-threading ration and require +a higher number of threads per core for optimal performance. + +### MPI Parallel Python Function +Beyond thread based parallelism, the message passing interface (MPI) is the de facto standard parallel execution in +scientific computing and the [`mpi4py`](https://mpi4py.readthedocs.io) bindings to the MPI libraries are commonly used +to parallelize existing workflows. The limitation of this approach is that it requires the whole code to adopt the MPI +communication standards to coordinate the way how information is distributed. Just like the `pympipool.Executor` the +[`mpi4py.futures.MPIPoolExecutor`](https://mpi4py.readthedocs.io/en/stable/mpi4py.futures.html#mpipoolexecutor) +implements the [`concurrent.futures.Executor`](https://docs.python.org/3/library/concurrent.futures.html#module-concurrent.futures) +interface. Still in this case eah python function submitted to the executor is still limited to serial execution. The +novel approach of the `pympipool.Executor` is mixing these two types of parallelism. Individual functions can use +the [`mpi4py`](https://mpi4py.readthedocs.io) library to handle the parallel execution within the context of this +function while these functions can still me submitted to the `pympipool.Executor` just like any other function. The +advantage of this approach is that the users can parallelize their workflows one function at the time. + +The example in `test_mpi.py` illustrates the submission of a simple MPI parallel python function: +``` from pympipool import Executor def calc(i): @@ -157,72 +223,32 @@ def calc(i): rank = MPI.COMM_WORLD.Get_rank() return i, size, rank -with Executor(cores=2) as p: - fs = p.submit(calc, 3) +with Executor(cores_per_worker=2) as exe: + fs = exe.submit(calc, 3) print(fs.result()) ``` -The important part is that in contrast to the `mpi4py` library the scripts which use the `pympipool.Executor` class can -be executed as serial python scripts, without the need to invoke external MPI calls. -``` -python test_executor_mpi.py ->>> [(3, 2, 0), (3, 2, 1)] -``` -The responses of the individual MPI ranks are returned as a combined python list. So in this case each MPI rank returns -a triple of the parameter `i=3`, the total number of MPI ranks `2` and the index of the selected MPI rank. - -In addition to the ability to execute MPI parallel functions the `pympipool.Executor` class also implements an internal -data storage, which can be utilized for serial and MPI parallel python functions. By adding an initialization function -`init_function` as additional parameter to the initialization of the `pympipool.Executor` class which returns a dictionary -of python variables, these variables are added to the internal storage. Each function which is submitted to this -`pympipool.Executor` class can use these variables as input parameters, interact with them or modify them. -```python -from pympipool import Executor - -def calc(i, j, k): - return i + j + k - -def init_function(): - return {"j": 4, "k": 3, "l": 2} +The `calc()` function initializes the [`mpi4py`](https://mpi4py.readthedocs.io) library and gathers the size of the +allocation and the rank of the current process within the MPI allocation. This function is then submitted to an +`pympipool.Executor` which is initialized with a single worker with two cores `cores_per_worker=2`. So each function +call is going to have access to two cores. -with Executor(cores=1, init_function=init_function) as p: - fs = p.submit(calc, 2, j=5) - print(fs.result()) -``` -In this example the `calc()` function takes three arguments `i`,`j` and `k`. While the arguments `j`, `k` and `l` are -set by the `init_function()` function. When the `calc()` function is submitted only the `i` parameter is required, while -the parameters `j` and `k` can be accessed from the internal storage. At the same time the `l` parameter which is not -used by the `calc()` function, does not interact with it. So not all functions have to use all parameters. Finally, -when the parameter is provided during the submission `submit()`, like in this case the `j` parameter, then the submitted -parameter is used rather than the parameter from internal memory. +Just like before the script can be called with any python interpreter even though it is using the [`mpi4py`](https://mpi4py.readthedocs.io) +library in the background it is not necessary to execute the script with `mpiexec` or `mpirun`: ``` -python test_executor_init.py ->>> 10 +python test_mpi.py +>>> [(3, 2, 0), (3, 2, 1)] ``` -So the sum of `i`,`j` and `k` results in `10` rather than `9`. Beyond the number of cores defined by `cores`, the number -of GPUs defined by `gpus_per_task` and the initialization function defined by `init_function` the additional parameters -are `oversubscribe` to enable [OpenMPI](https://www.open-mpi.org) over-subscription, `enable_flux_backend` and -`enable_slurm_backend` to switch from MPI as backend to flux or SLURM as alternative backend. In addition, the -parameters `queue_adapter` and `queue_adapter_kwargs` provide an interface to [pysqa](https://pysqa.readthedocs.org) the -simple queue adapter for python. The `queue_adapter` can be set as `pysqa.queueadapter.QueueAdapter` object and the -`queue_adapter_kwargs` parameter represents a dictionary of input arguments for the `submit_job()` function of the queue -adapter. Finally, the `cwd` parameter specifies the current working directory where the python functions are executed. - -When multiple functions are submitted to the `pympipool.Executor` class then they are executed following the first in -first out principle. The `len()` function applied on the `pympipool.Executor` object can be used to list how many items -are still waiting to be executed. - -## HPCExecutor -To address the limitation of the `pympipool.Executor` that only a single task is executed at any time, the -`pympipool.HPCExecutor` provides a wrapper around multiple `pympipool.Executor` objects. It balances the queues of the -individual `pympipool.Executor` objects to maximize the throughput for the given resources. This functionality comes -with an additional overhead of another thread, acting as a broker between the task queue of the `pympipool.HPCExecutor` -and the individual `pympipool.Executor` objects. - -Example how to use the `pympipool.HPCExecutor` class. This can be executed inside a jupyter notebook, interactive python -shell or as a python script. For the example a python script is used. Write a python test script named `test_hpc_gpu.py`: +The response consists of a list of two tuples, one for each MPI parallel process, with the first entry of the tuple +being the parameter `i=3`, followed by the number of MPI parallel processes assigned to the function call `cores_per_worker=2` +and finally the index of the specific process `0` or `1`. + +### GPU Assignment +With the rise of machine learning applications, the use of GPUs for scientific application becomes more and more popular. +Consequently, it is essential to have full control over the assignment of GPUs to specific python functions. In the +`test_gpu.py` example the `tensorflow` library is used to identify the GPUs and return their configuration: ``` import socket -from pympipool import HPCExecutor +from pympipool import Executor from tensorflow.python.client import device_lib def get_available_gpus(): @@ -232,121 +258,49 @@ def get_available_gpus(): for x in local_device_protos if x.device_type == 'GPU' ] -with HPCExecutor( +with Executor( max_workers=2, - cores_per_worker=1, gpus_per_worker=1, - enable_flux_backend=True, ) as exe: fs_1 = exe.submit(get_available_gpus) fs_2 = exe.submit(get_available_gpus) -print(fs_1.result()) -print(fs_2.result()) -``` -The example demonstrates how one GPU is assigned to each of the two tasks which are executed in parallel. To access the -GPUs the `tensorflow` package is used in the `get_available_gpus()` function to return a brief summary of the available -GPU. The initialization of the `pympipool.HPCExecutor` then follows the same scheme as the initialization of the -`pympipool.Executor`. The `max_workers` argument defines the number of `pympipool.Executor` objects the -`pympipool.HPCExecutor` is managing internally. Then for each of these `pympipool.Executor` objects the number of cores -is defined by `cores_per_worker` and the number of GPUs is defined by `gpus_per_worker`. By default the number of GPUs -is set to zero, as assigning GPUs to tasks requires an advanced scheduling backend like the flux-framework enabled by -the `enable_flux_backend` option or the SLURM queuing system backend enabled by the `enable_slurm_backend` option. In -addition, the parameters `queue_adapter` and `queue_adapter_kwargs` provide an interface to -[pysqa](https://pysqa.readthedocs.org) the simple queue adapter for python. The `queue_adapter` can be set as -`pysqa.queueadapter.QueueAdapter` object and the `queue_adapter_kwargs` parameter represents a dictionary of input -arguments for the `submit_job()` function of the queue adapter. Finally, the `cwd` parameter specifies the current -working directory where the python functions are executed. - -The submission of the individual tasks follows the definition of the `pympipool.HPCExecutor` in analogy to the example -above for the `pympipool.Executor`. Finally, the results are printed to the standard output: -``` -python test_hpc_gpu.py ->>> [('/device:GPU:0', 'device: 0, name: Tesla V100S-PCIE-32GB, pci bus id: 0000:84:00.0, compute capability: 7.0', 'cn138')] ->>> [('/device:GPU:0', 'device: 0, name: Tesla V100S-PCIE-32GB, pci bus id: 0000:84:00.0, compute capability: 7.0', 'cn139')] -``` -The output highlights that for each of the two workers there is only a single GPU visible. This applies to the example -case where only one GPU is available on each of the two hosts, as well as to hosts with multiple GPUs. Consequently, -the `pympipool.HPCExecutor` drastically simplifies the scheduling of `GPU` and their assignment for python tasks. - -## PoolExecutor -To combine the functionality of the `pympipool.Pool` and the `pympipool.Executor` the `pympipool.PoolExecutor` again -connects to the [`mpi4py.futures.MPIPoolExecutor`](https://mpi4py.readthedocs.io/en/stable/mpi4py.futures.html#mpipoolexecutor). -Still in contrast to the `pympipool.Pool` it does not implement the `map()` and `starmap()` functions but rather the -`submit()` function based on the [`concurrent.futures.Executor`](https://docs.python.org/3/library/concurrent.futures.html#module-concurrent.futures) -interface. In this case the load balancing happens internally and the maximum number of workers `max_workers` defines -the maximum number of parallel tasks. But only serial python tasks can be executed in contrast to the `pympipool.Executor` -which can also execute MPI parallel python tasks. - -In the example a simple `calc()` function which calculates the sum of two parameters `i` and `j` is submitted with four -different parameter combinations to an `pympipool.PoolExecutor` with a total of two workers specified by the `max_workers` -parameter. -```python -from pympipool import PoolExecutor - -def calc(i, j): - return i + j - -with PoolExecutor(max_workers=2) as p: - fs1 = p.submit(calc, 1, 2) - fs2 = p.submit(calc, 3, 4) - fs3 = p.submit(calc, 5, 6) - fs4 = p.submit(calc, 7, 8) - print(fs1.result(), fs2.result(), fs3.result(), fs4.result()) -``` -The functions are executed in two sets of two function and the result is returned when all functions are executed as the -`result()` call waits until the future object completed. -``` -python test_pool_executor.py ->>> 3 7 11 15 -``` -Beyond the number of workers defined by `max_workers`, the additional parameters are `oversubscribe` to enable -[OpenMPI](https://www.open-mpi.org) over-subscription, `enable_flux_backend` and `enable_slurm_backend` to switch from -MPI as backend to flux or SLURM as alternative backend. In addition, the parameters `queue_adapter` and -`queue_adapter_kwargs` provide an interface to [pysqa](https://pysqa.readthedocs.org) the simple queue adapter for -python. The `queue_adapter` can be set as `pysqa.queueadapter.QueueAdapter` object and the `queue_adapter_kwargs` -parameter represents a dictionary of input arguments for the `submit_job()` function of the queue adapter. Finally, the -`cwd` parameter specifies the current working directory where the python functions are executed. - -## MPISpawnPool -An alternative way to support MPI parallel functions in addition to the `pympipool.Executor` is the `pympipool.MPISpawnPool`. -Just like the `pympipool.Pool` it supports the `map()` and `starmap()` functions. The additional `ranks_per_task` -parameter defines how many MPI ranks are used per task. All functions are executed with the same number of MPI ranks. -The limitation of this approach is that it uses `MPI_Spawn()` to create new MPI ranks for the execution of the -individual tasks. Consequently, this approach is not as scalable as the `pympipool.Executor` but it offers load -balancing for a large number of similar MPI parallel tasks. - -In the example the maximum number of workers is defined by the maximum number of MPI ranks `max_ranks` devided by the -number of ranks per task `ranks_per_tasks`. So in the case of a total of four ranks and two ranks per task only two -workers are created. -```python -from pympipool import MPISpawnPool - -def calc(i, comm): - return i, comm.Get_size(), comm.Get_rank() - -with MPISpawnPool(max_ranks=4, ranks_per_task=2) as p: - print(p.map(func=calc, iterable=[1, 2, 3, 4])) -``` -In contrast to the `pympipool.Executor` which returns the results of each individual MPI rank, the `pympipool.MPISpawnPool` -only returns the results of one MPI rank per function call, so it is the users task to synchronize the response of the -MPI parallel functions. -``` -python test_mpispawnpool.py ->>> [[1, 2, 0], [2, 2, 0], [3, 2, 0], [4, 2, 0]] -``` -Beyond the maximum number of ranks defined by `max_ranks` and the ranks per task defined by `ranks_per_task` the -additional parameters are `oversubscribe` to enable [OpenMPI](https://www.open-mpi.org) over-subscription. In addition, -the parameters `queue_adapter` and `queue_adapter_kwargs` provide an interface to [pysqa](https://pysqa.readthedocs.org) -the simple queue adapter for python. The `queue_adapter` can be set as `pysqa.queueadapter.QueueAdapter` object and the -`queue_adapter_kwargs` parameter represents a dictionary of input arguments for the `submit_job()` function of the queue -adapter. Finally, the `cwd` parameter specifies the current working directory where the MPI parallel python functions -are executed. The flux backend as well as the SLURM backend are not supported for the `pympipool.MPISpawnPool` as the -`MPI_Spawn()` command is incompatible to the internal management of ranks inside flux and SLURM. - -## SocketInterface -`pympipool.SocketInterface`: The key functionality of the `pympipool` package is the coupling of a serial python process -with an MPI parallel python process. This happens in the background using a combination of the [zero message queue](https://zeromq.org) -and [cloudpickle](https://github.com/cloudpipe/cloudpickle) to communicate binary python objects. The `pympipool.SocketInterface` -is an abstraction of this interface, which is used in the other classes inside `pympipool` and might also be helpful for -other projects. \ No newline at end of file +print(fs_1.result(), fs_2.result()) +``` +The additional parameter `gpus_per_worker=1` specifies that one GPU is assigned to each worker. This functionality +requires `pympipool` to be connected to a resource manager like the [SLURM workload manager](https://www.schedmd.com) +or preferably the [flux framework](https://flux-framework.org). The rest of the script follows the previous examples, +as two functions are submitted and the results are printed. + +To clarify the execution of such an example on a high performance computing (HPC) cluster using the [SLURM workload manager](https://www.schedmd.com) +the submission script is given below: +``` +#!/bin/bash +#SBATCH --nodes=2 +#SBATCH --gpus-per-node=1 +#SBATCH --get-user-env=L + +python test_gpu.py +``` +The important part is that for using the `pympipool.slurm.PySlurmExecutor` backend the script `test_gpu.py` does not +need to be executed with `srun` but rather it is sufficient to just execute it with the python interpreter. `pympipool` +internally calls `srun` to assign the individual resources to a given worker. + +For the more complex setup of running the [flux framework](https://flux-framework.org) as a secondary resource scheduler +within the [SLURM workload manager](https://www.schedmd.com) it is essential that the resources are passed from the +[SLURM workload manager](https://www.schedmd.com) to the [flux framework](https://flux-framework.org). This is achieved +by calling `srun flux start` in the submission script: +``` +#!/bin/bash +#SBATCH --nodes=2 +#SBATCH --gpus-per-node=1 +#SBATCH --get-user-env=L + +srun flux start python test_gpu.py +``` +As a result the GPUs available on the two compute nodes are reported: +``` +>>> [('/device:GPU:0', 'device: 0, name: Tesla V100S-PCIE-32GB, pci bus id: 0000:84:00.0, compute capability: 7.0', 'cn138'), +>>> ('/device:GPU:0', 'device: 0, name: Tesla V100S-PCIE-32GB, pci bus id: 0000:84:00.0, compute capability: 7.0', 'cn139')] +``` +In this case each compute node `cn138` and `cn139` is equipped with one `Tesla V100S-PCIE-32GB`. diff --git a/docs/source/index.rst b/docs/source/index.rst index 3070080c..bc18ecc3 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -80,9 +80,9 @@ Backend Depending on the availability of different resource schedulers in your HPC environment the :code:`pympipool.Executor` uses a different backend, with the :code:`pympipool.flux.PyFluxExecutor` being the preferred backend: -* `pympipool.mpi.PyMpiExecutor `_: The simplest executor of the three uses `mpi4py `_ as a backend. This simplifies the installation on all operation systems including Windows. Still at the same time it limits the up-scaling to a single compute node and serial or MPI parallel python functions. There is no support for thread based parallelism or GPU assignment. This interface is primarily used for testing and developing or as a fall-back solution. It is not recommended to use this interface in production. -* `pympipool.slurm.PySlurmExecutor `_: The `SLURM workload manager `_ is commonly used on HPC systems to schedule and distribute tasks. :code:`pympipool` provides a python interface for scheduling the execution of python functions as SLURM job steps which are typically created using the :code:`srun` command. This executor supports serial python functions, thread based parallelism, MPI based parallelism and the assignment of GPUs to individual python functions. When the `SLURM workload manager `_ is installed on your HPC cluster this interface can be a reasonable choice, still depending on the SLURM configuration in can be limited in terms of the fine-grained scheduling or the responsiveness when working with hundreds of compute nodes in an individual allocation. -* `pympipool.flux.PyFluxExecutor `_: The `flux `_ is the preferred backend for :code:`pympipool`. Just like the :code:`pympipool.slurm.PySlurmExecutor` it supports serial python functions, thread based parallelism, MPI based parallelism and the assignment of GPUs to individual python functions. Still the advantages of using the `flux `_ as a backend are the easy installation, the faster allocation of resources as the resources are managed within the allocation and no central databases is used and the superior level of fine-grained resource assignment which is typically not available on HPC resource schedulers. +* :code:`pympipool.mpi.PyMpiExecutor`: The simplest executor of the three uses `mpi4py `_ as a backend. This simplifies the installation on all operation systems including Windows. Still at the same time it limits the up-scaling to a single compute node and serial or MPI parallel python functions. There is no support for thread based parallelism or GPU assignment. This interface is primarily used for testing and developing or as a fall-back solution. It is not recommended to use this interface in production. +* :code:`pympipool.slurm.PySlurmExecutor`: The `SLURM workload manager `_ is commonly used on HPC systems to schedule and distribute tasks. :code:`pympipool` provides a python interface for scheduling the execution of python functions as SLURM job steps which are typically created using the :code:`srun` command. This executor supports serial python functions, thread based parallelism, MPI based parallelism and the assignment of GPUs to individual python functions. When the `SLURM workload manager `_ is installed on your HPC cluster this interface can be a reasonable choice, still depending on the SLURM configuration in can be limited in terms of the fine-grained scheduling or the responsiveness when working with hundreds of compute nodes in an individual allocation. +* :code:`pympipool.flux.PyFluxExecutor`: The `flux `_ is the preferred backend for :code:`pympipool`. Just like the :code:`pympipool.slurm.PySlurmExecutor` it supports serial python functions, thread based parallelism, MPI based parallelism and the assignment of GPUs to individual python functions. Still the advantages of using the `flux `_ as a backend are the easy installation, the faster allocation of resources as the resources are managed within the allocation and no central databases is used and the superior level of fine-grained resource assignment which is typically not available on HPC resource schedulers. Each of these backends consists of two parts a broker and a worker. When a new tasks is submitted from the user it is received by the broker and the broker identifies the first available worker. The worker then executes a task and returns From ff33cae89b58dfb54ca4ed422afd5743895be49d Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Thu, 9 Nov 2023 17:51:18 +0100 Subject: [PATCH 068/134] Minor corrections --- docs/source/examples.md | 4 ++-- docs/source/index.rst | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/source/examples.md b/docs/source/examples.md index 0dc2fdf8..3fd12121 100644 --- a/docs/source/examples.md +++ b/docs/source/examples.md @@ -165,7 +165,7 @@ summarized in the table below: | Resource over-subscription | yes | yes | no | | Scalability | 1 node | ~100 nodes | no limit | -### Thread based Parallelism +### Thread-based Parallelism The number of threads per core can be controlled with the `threads_per_core` parameter during the initialization of the `pympipool.Executor`. Unfortunately, there is no uniform way to control the number of cores a given underlying library uses for thread based parallelism, so it might be necessary to set certain environment variables manually: @@ -200,7 +200,7 @@ with increasing number of workers. Specific manycore CPU models like the Intel Xeon Phi processors provide a much higher hyper-threading ration and require a higher number of threads per core for optimal performance. -### MPI Parallel Python Function +### MPI Parallel Python Functions Beyond thread based parallelism, the message passing interface (MPI) is the de facto standard parallel execution in scientific computing and the [`mpi4py`](https://mpi4py.readthedocs.io) bindings to the MPI libraries are commonly used to parallelize existing workflows. The limitation of this approach is that it requires the whole code to adopt the MPI diff --git a/docs/source/index.rst b/docs/source/index.rst index bc18ecc3..492d7721 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -34,8 +34,8 @@ approach. Here the user creates their HPC allocation first and then `mpi4py `_ which allows the user to add parallelism to their workflow one function at a time. -Backend -------- +Backends +-------- Depending on the availability of different resource schedulers in your HPC environment the :code:`pympipool.Executor` uses a different backend, with the :code:`pympipool.flux.PyFluxExecutor` being the preferred backend: From 0079808006f28a770810340905f9754e40455a54 Mon Sep 17 00:00:00 2001 From: liamhuber Date: Thu, 9 Nov 2023 10:44:04 -0800 Subject: [PATCH 069/134] Test for future persisting past the lifetime of the executor Motivated the `wait=False` in executor __del__ --- tests/test_future.py | 48 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/tests/test_future.py b/tests/test_future.py index 1d59d4ee..f7f5d4d0 100644 --- a/tests/test_future.py +++ b/tests/test_future.py @@ -27,3 +27,51 @@ def test_pool_serial_multi_core(self): sleep(1) self.assertTrue(output.done()) self.assertEqual(output.result(), [np.array(4), np.array(4)]) + + def test_independence_from_executor(self): + """ + Ensure that futures are able to live on after the executor gets garbage + collected. + """ + + with self.subTest("From the main process"): + mutable = [] + + def slow_callable(): + from time import sleep + sleep(1) + return True + + def callback(future): + mutable.append("Called back") + + def submit(): + # Executor only exists in this scope and can get garbage collected after + # this function is exits + future = PyMPISingleTaskExecutor().submit(slow_callable) + future.add_done_callback(callback) + return future + + self.assertListEqual( + [], + mutable, + msg="Sanity check that test is starting in the expected condition" + ) + future = submit() + + self.assertFalse( + future.done(), + msg="The submit function is slow, it should be running still" + ) + self.assertListEqual( + [], + mutable, + msg="While running, the mutable should not have been impacted by the " + "callback" + ) + future.result() # Wait for the calculation to finish + self.assertListEqual( + ["Called back"], + mutable, + msg="After completion, the callback should modify the mutable data" + ) From 8b6e8a670134a9941f44e5056af68730b5a449ef Mon Sep 17 00:00:00 2001 From: liamhuber Date: Thu, 9 Nov 2023 10:44:44 -0800 Subject: [PATCH 070/134] Test for persistence of future when submitting from a class Motivated the changes to `join` behaviour during `shutdown` --- tests/test_future.py | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/tests/test_future.py b/tests/test_future.py index f7f5d4d0..426d0156 100644 --- a/tests/test_future.py +++ b/tests/test_future.py @@ -75,3 +75,40 @@ def submit(): mutable, msg="After completion, the callback should modify the mutable data" ) + + with self.subTest("From inside a class"): + class Foo: + def __init__(self): + self.running = False + + def run(self): + self.running = True + + future = PyMPISingleTaskExecutor().submit(self.return_42) + future.add_done_callback(self.finished) + + return future + + def return_42(self): + from time import sleep + sleep(1) + return 42 + + def finished(self, future): + self.running = False + + foo = Foo() + self.assertFalse( + foo.running, + msg="Sanity check that the test starts in the expected condition" + ) + fs = foo.run() + self.assertTrue( + foo.running, + msg="We should be able to exit the run method before the task completes" + ) + fs.result() # Wait for completion + self.assertFalse( + foo.running, + msg="After task completion, we expect the callback to modify the class" + ) \ No newline at end of file From e06dadd8dc15971bb6a22c4813e7a1ec7282ff7d Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Fri, 10 Nov 2023 09:58:07 +0100 Subject: [PATCH 071/134] Update README.md --- README.md | 252 +++++++++++++++++++++--------------- docs/source/installation.md | 2 +- 2 files changed, 148 insertions(+), 106 deletions(-) diff --git a/README.md b/README.md index 1206b284..a1516702 100644 --- a/README.md +++ b/README.md @@ -1,110 +1,152 @@ -# pympipool - scale python functions over multiple compute nodes +# pympipool - up-scale python functions for high performance computing [![Unittests](https://github.com/pyiron/pympipool/actions/workflows/unittest-openmpi.yml/badge.svg)](https://github.com/pyiron/pympipool/actions/workflows/unittest-openmpi.yml) [![Coverage Status](https://coveralls.io/repos/github/pyiron/pympipool/badge.svg?branch=main)](https://coveralls.io/github/pyiron/pympipool?branch=main) -Up-scaling python functions for high performance computing (HPC) can be challenging. While the python standard library -provides interfaces for multiprocessing and asynchronous task execution, namely [`multiprocessing`](https://docs.python.org/3/library/multiprocessing.html) -and [`concurrent.futures`](https://docs.python.org/3/library/concurrent.futures.html#module-concurrent.futures) both are -limited to the execution on a single compute node. So a series of python libraries have been developed to address the -up-scaling of python functions for HPC. Starting in the datascience and machine learning community with solutions like -[dask](https://www.dask.org) over more HPC focused solutions like [parsl](http://parsl-project.org) up to Python bindings -for the message passing interface (MPI) named [mpi4py](https://mpi4py.readthedocs.io). Each of these solutions has their -advantages and disadvantages, in particular the mixing of MPI parallel python functions and serial python functions in -combined workflows remains challenging. - -To address these challenges `pympipool` is developed with three goals in mind: -* Reimplement the standard python library interfaces namely [`multiprocessing.pool.Pool`](https://docs.python.org/3/library/multiprocessing.html) -and [`concurrent.futures.Executor`](https://docs.python.org/3/library/concurrent.futures.html#module-concurrent.futures) -as closely as possible, to minimize the barrier of up-scaling an existing workflow to be used on HPC resources. -* Integrate MPI parallel python functions based on [mpi4py](https://mpi4py.readthedocs.io) on the same level as serial -python functions, so both can be combined in a single workflow. This allows the users to parallelize their workflows -one function at a time. Internally this is achieved by coupling a serial python process to a MPI parallel python process. -* Embrace [Jupyter](https://jupyter.org) notebooks for the interactive development of HPC workflows, as they allow the -users to document their though process right next to the python code and their results all within one document. - -# Features -As different users and different workflows have different requirements in terms of the level of parallelization, the -`pympipool` implements a series of five different interfaces: -* [`pympipool.Pool`](https://pympipool.readthedocs.io/en/latest/interfaces.html#pool): Following the -[`multiprocessing.pool.Pool`](https://docs.python.org/3/library/multiprocessing.html) the `pympipool.Pool` class -implements the `map()` and `starmap()` functions. Internally these connect to an MPI parallel subprocess running the -[`mpi4py.futures.MPIPoolExecutor`](https://mpi4py.readthedocs.io/en/stable/mpi4py.futures.html#mpipoolexecutor). -So by increasing the number of workers, by setting the `max_workers` parameter the `pympipool.Pool` can scale the -execution of serial python functions beyond a single compute node. For MPI parallel python functions the `pympipool.MPISpawnPool` -is derived from the `pympipool.Pool` and uses `MPI_Spawn()` to execute those. For more details see below. -* [`pympipool.Executor`](https://pympipool.readthedocs.io/en/latest/interfaces.html#executor): The easiest way to -execute MPI parallel python functions right next to serial python functions is the `pympipool.Executor`. It implements -the executor interface defined by the [`concurrent.futures.Executor`](https://docs.python.org/3/library/concurrent.futures.html#module-concurrent.futures). -So functions are submitted to the `pympipool.Executor` using the `submit()` function, which returns an -[`concurrent.futures.Future`](https://docs.python.org/3/library/concurrent.futures.html#future-objects) object. With -these [`concurrent.futures.Future`](https://docs.python.org/3/library/concurrent.futures.html#future-objects) objects -asynchronous workflows can be constructed which periodically check if the computation is completed `done()` and then -query the results using the `result()` function. The limitation of the `pympipool.Executor` is lack of load balancing, -each `pympipool.Executor` acts as a serial first in first out (FIFO) queue. So it is the task of the user to balance the -load of many different tasks over multiple `pympipool.Executor` instances. -* [`pympipool.HPCExecutor`](https://pympipool.readthedocs.io/en/latest/interfaces.html#hpcexecutor>): To address the -limitation of the `pympipool.Executor` that only a single task is executed at any time, the `pympipool.HPCExecutor` -provides a wrapper around multiple `pympipool.Executor` objects. It balances the queues of the individual -`pympipool.Executor` objects to maximize the throughput for the given resources. This functionality comes with an -additional overhead of another thread, acting as a broker between the task queue of the `pympipool.HPCExecutor` and the -individual `pympipool.Executor` objects. -* [`pympipool.PoolExecutor`](https://pympipool.readthedocs.io/en/latest/interfaces.html#poolexecutor): To combine the -functionality of the `pympipool.Pool` and the `pympipool.Executor` the `pympipool.PoolExecutor` again connects to the -[`mpi4py.futures.MPIPoolExecutor`](https://mpi4py.readthedocs.io/en/stable/mpi4py.futures.html#mpipoolexecutor). -Still in contrast to the `pympipool.Pool` it does not implement the `map()` and `starmap()` functions but rather the -`submit()` function based on the [`concurrent.futures.Executor`](https://docs.python.org/3/library/concurrent.futures.html#module-concurrent.futures) -interface. In this case the load balancing happens internally and the maximum number of workers `max_workers` defines -the maximum number of parallel tasks. But only serial python tasks can be executed in contrast to the `pympipool.Executor` -which can also execute MPI parallel python tasks. -* [`pympipool.MPISpawnPool`](https://pympipool.readthedocs.io/en/latest/interfaces.html#mpispawnpool): An alternative -way to support MPI parallel functions in addition to the `pympipool.Executor` is the `pympipool.MPISpawnPool`. Just like -the `pympipool.Pool` it supports the `map()` and `starmap()` functions. The additional `ranks_per_task` parameter -defines how many MPI ranks are used per task. All functions are executed with the same number of MPI ranks. The -limitation of this approach is that it uses `MPI_Spawn()` to create new MPI ranks for the execution of the individual -tasks. Consequently, this approach is not as scalable as the `pympipool.Executor` but it offers load balancing for a -large number of similar MPI parallel tasks. -* [`pympipool.SocketInterface`](https://pympipool.readthedocs.io/en/latest/interfaces.html#socketinterface): The key -functionality of the `pympipool` package is the coupling of a serial python process with an MPI parallel python process. -This happens in the background using a combination of the [zero message queue](https://zeromq.org) and -[cloudpickle](https://github.com/cloudpipe/cloudpickle) to communicate binary python objects. The `pympipool.SocketInterface` -is an abstraction of this interface, which is used in the other classes inside `pympipool` and might also be helpful for -other projects. - -In addition to using MPI to start a number of processes on different HPC computing resources, `pympipool` also supports -the [flux-framework](https://flux-framework.org) as additional backend. By setting the optional `enable_flux_backend` -parameter to `True` the flux backend can be enabled for the `pympipool.Pool`, `pympipool.Executor` and `pympipool.PoolExecutor`. -Other optional parameters include the selection of the working directory where the python function should be executed `cwd` -and the option to oversubscribe MPI tasks which is an [OpenMPI](https://www.open-mpi.org) specific feature which can be -enabled by setting `oversubscribe` to `True`. For more details on the `pympipool` classes and their application, the -extended documentation is linked below. +Up-scaling python functions for high performance computing (HPC) can be challenging. While the python standard library +provides interfaces for multiprocessing and asynchronous task execution, namely +[multiprocessing](https://docs.python.org/3/library/multiprocessing.html) and +[concurrent.futures](https://docs.python.org/3/library/concurrent.futures.html#module-concurrent.futures) both are +limited to the execution on a single compute node. So a series of python libraries have been developed to address the +up-scaling of python functions for HPC. Starting in the datascience and machine learning community with solutions +like [dask](https://www.dask.org) over more HPC focused solutions like +[fireworks](https://materialsproject.github.io/fireworks/) and [parsl](http://parsl-project.org) up to Python +bindings for the message passing interface (MPI) named [mpi4py](https://mpi4py.readthedocs.io). Each of these +solutions has their advantages and disadvantages, in particular scaling beyond serial python functions, including thread +based parallelism, MPI parallel python application or assignment of GPUs to individual python function remains +challenging. + +To address these challenges `pympipool` is developed with three goals in mind: + +* Extend the standard python library [`concurrent.futures.Executor`](https://docs.python.org/3/library/concurrent.futures.html#module-concurrent.futures) + interface, to minimize the barrier of up-scaling an existing workflow to be used on HPC resources. +* Integrate thread based parallelism, MPI parallel python functions based on [mpi4py](https://mpi4py.readthedocs.io) and + GPU assignment. This allows the users to accelerate their workflows one function at a time. +* Embrace [Jupyter](https://jupyter.org) notebooks for the interactive development of HPC workflows, as they allow the + users to document their though process right next to the python code and their results all within one document. + +## HPC Context +In contrast to frameworks like [dask](https://www.dask.org), [fireworks](https://materialsproject.github.io/fireworks/) +and [parsl](http://parsl-project.org) which can be used to submit a number of worker processes directly the the HPC +queuing system and then transfer tasks from either the login node or an interactive allocation to these worker processes +to accelerate the execution, [mpi4py](https://mpi4py.readthedocs.io) and `pympipool` follow a different +approach. Here the user creates their HPC allocation first and then [mpi4py](https://mpi4py.readthedocs.io) or +`pympipool` can be used to distribute the tasks within this allocation. The advantage of this approach is that +no central data storage is required as the workers and the scheduling task can communicate directly. + +## Examples +The following examples illustrates how `pympipool` can be used to distribute a series of MPI parallel function calls +within a queuing system allocation. `example.py`: +``` +from pympipool import Executor + +def calc(i): + from mpi4py import MPI + size = MPI.COMM_WORLD.Get_size() + rank = MPI.COMM_WORLD.Get_rank() + return i, size, rank + +with Executor(max_workers=2, cores_per_worker=2) as exe: + fs_0 = exe.submit(calc, 0) + fs_1 = exe.submit(calc, 1) + print(fs_0.result(), fs_1.result()) +``` +This example can be executed using:: +``` +python example.py +``` +Which returns:: +``` +>>> [(0, 2, 0), (0, 2, 1)], [(1, 2, 0), (1, 2, 1)] +``` +The important part in this example is that [mpi4py](https://mpi4py.readthedocs.io) is only used in the `calc()` +function, not in the python script, consequently it is not necessary to call the script with `mpiexec` but instead +a call with the regular python interpreter is sufficient. This highlights how `pympipool` allows the users to +parallelize one function at a time and not having to convert their whole workflow to use [mpi4py](https://mpi4py.readthedocs.io). +The same code can also be executed inside a jupyter notebook directly which enables an interactive development process. + +The standard [`concurrent.futures.Executor`](https://docs.python.org/3/library/concurrent.futures.html#module-concurrent.futures) +interface is extended by adding the option `cores_per_worker=2` to assign multiple MPI ranks to each function call. +To create two workers `max_workers=2` each with two cores each requires a total of four CPU cores to be available. +After submitting the function `calc()` with the corresponding parameter to the executor `exe.submit(calc, 0)` +a python [`concurrent.futures.Future`](https://docs.python.org/3/library/concurrent.futures.html#future-objects) is +returned. Consequently, the `pympipool.Executor` can be used as a drop-in replacement for the +[`concurrent.futures.Executor`](https://docs.python.org/3/library/concurrent.futures.html#module-concurrent.futures) +which allows the user to add parallelism to their workflow one function at a time. + +## Backends +Depending on the availability of different resource schedulers in your HPC environment the `pympipool.Executor` +uses a different backend, with the `pympipool.flux.PyFluxExecutor` being the preferred backend: + +* `pympipool.mpi.PyMpiExecutor`: The simplest executor of the three uses [mpi4py](https://mpi4py.readthedocs.io) as a + backend. This simplifies the installation on all operating systems including Windows. Still at the same time it limits + the up-scaling to a single compute node and serial or MPI parallel python functions. There is no support for thread + based parallelism or GPU assignment. This interface is primarily used for testing and developing or as a fall-back + solution. It is not recommended to use this interface in production. +* `pympipool.slurm.PySlurmExecutor`: The [SLURM workload manager](https://www.schedmd.com) is commonly used on HPC + systems to schedule and distribute tasks. `pympipool` provides a python interface for scheduling the execution of + python functions as SLURM job steps which are typically created using the `srun` command. This executor supports + serial python functions, thread based parallelism, MPI based parallelism and the assignment of GPUs to individual + python functions. When the [SLURM workload manager](https://www.schedmd.com) is installed on your HPC cluster this + interface can be a reasonable choice, still depending on the [SLURM workload manager](https://www.schedmd.com) + configuration in can be limited in terms of the fine-grained scheduling or the responsiveness when working with + hundreds of compute nodes in an individual allocation. +* `pympipool.flux.PyFluxExecutor`: The [flux framework]](https://flux-framework.org) is the preferred backend for + `pympipool`. Just like the `pympipool.slurm.PySlurmExecutor` it supports serial python functions, thread based + parallelism, MPI based parallelism and the assignment of GPUs to individual python functions. Still the advantages of + using the [flux framework](https://flux-framework.org) as a backend are the easy installation, the faster allocation + of resources as the resources are managed within the allocation and no central databases is used and the superior + level of fine-grained resource assignment which is typically not available on HPC resource schedulers. + +Each of these backends consists of two parts a broker and a worker. When a new tasks is submitted from the user it is +received by the broker and the broker identifies the first available worker. The worker then executes a task and returns +it to the broker, who returns it to the user. While there is only one broker per `pympipool.Executor` the number +of workers can be specified with the `max_workers` parameter. + +## Disclaimer +While we try to develop a stable and reliable software library, the development remains a opensource project under the +BSD 3-Clause License without any warranties:: +``` +BSD 3-Clause License + +Copyright (c) 2022, Jan Janssen +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +``` # Documentation -* [Installation](https://pympipool.readthedocs.io/en/latest/installation.html) - * [pypi-based installation](https://pympipool.readthedocs.io/en/latest/installation.html#pypi-based-installation) - * [conda-based installation](https://pympipool.readthedocs.io/en/latest/installation.html#conda-based-installation) -* [Interfaces](https://pympipool.readthedocs.io/en/latest/interfaces.html) - * [Pool](https://pympipool.readthedocs.io/en/latest/interfaces.html#pool) - * [Executor](https://pympipool.readthedocs.io/en/latest/interfaces.html#executor) - * [HPCExecutor](https://pympipool.readthedocs.io/en/latest/interfaces.html#hpcexecutor>) - * [ParallelExecutor](https://pympipool.readthedocs.io/en/latest/interfaces.html#poolexecutor) - * [MPISpawnPool](https://pympipool.readthedocs.io/en/latest/interfaces.html#mpispawnpool) - * [SocketInterface](https://pympipool.readthedocs.io/en/latest/interfaces.html#socketinterface) -* [Development](https://pympipool.readthedocs.io/en/latest/development.html) - -# License -`pympipool` is released under the BSD license https://github.com/pyiron/pympipool/blob/main/LICENSE . It is a spin-off -of the `pyiron` project https://github.com/pyiron/pyiron therefore if you use `pympipool` for calculation which result -in a scientific publication, please cite: - - @article{pyiron-paper, - title = {pyiron: An integrated development environment for computational materials science}, - journal = {Computational Materials Science}, - volume = {163}, - pages = {24 - 36}, - year = {2019}, - issn = {0927-0256}, - doi = {https://doi.org/10.1016/j.commatsci.2018.07.043}, - url = {http://www.sciencedirect.com/science/article/pii/S0927025618304786}, - author = {Jan Janssen and Sudarsan Surendralal and Yury Lysogorskiy and Mira Todorova and Tilmann Hickel and Ralf Drautz and Jörg Neugebauer}, - keywords = {Modelling workflow, Integrated development environment, Complex simulation protocols}, - } +* [Installation](https://pympipool.readthedocs.io/en/latest/installation.html) + * [Basic Installation](https://pympipool.readthedocs.io/en/latest/installation.html#basic-installation) + * [High Performance Computing](https://pympipool.readthedocs.io/en/latest/installation.html#high-performance-computing) +* [Examples](https://pympipool.readthedocs.io/en/latest/examples.html) + * [Compatibility](https://pympipool.readthedocs.io/en/latest/examples.html#compatibility) + * [Data Handling](https://pympipool.readthedocs.io/en/latest/examples.html#data-handling) + * [Up-Scaling](https://pympipool.readthedocs.io/en/latest/examples.html#up-scaling) +* [Development](https://pympipool.readthedocs.io/en/latest/development.html) + * [Contributions](https://pympipool.readthedocs.io/en/latest/development.html#contributions) + * [Integration](https://pympipool.readthedocs.io/en/latest/development.html#integration) + * [Alternative Projects](https://pympipool.readthedocs.io/en/latest/development.html#alternative-projects) +* [Module Index](https://pympipool.readthedocs.io/en/latest/py-modindex.html) \ No newline at end of file diff --git a/docs/source/installation.md b/docs/source/installation.md index 7461aa89..ccbb3069 100644 --- a/docs/source/installation.md +++ b/docs/source/installation.md @@ -32,7 +32,7 @@ following command: pip install pympipool ``` -## Integration with high performance computing +## High Performance Computing `pympipool` currently provides interfaces to the [SLURM workload manager](https://www.schedmd.com) and the [flux framework](https://flux-framework.org). With the [flux framework](https://flux-framework.org) being the recommended solution as it can be installed without root user rights and it can be integrated in existing resource From c75b0f48e5281fb57f96c4abf235c145d64a200d Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Fri, 10 Nov 2023 10:09:23 +0100 Subject: [PATCH 072/134] Update test_flux.py --- tests/test_flux.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_flux.py b/tests/test_flux.py index 4d84511d..6359dc98 100644 --- a/tests/test_flux.py +++ b/tests/test_flux.py @@ -15,7 +15,7 @@ FluxPythonInterface, ) - skip_flux_test = False + skip_flux_test = "FLUX_URI" not in os.environ except ImportError: skip_flux_test = True From e08af4e309fba0c15371e5c372a73a3be8dbfa73 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Fri, 10 Nov 2023 10:13:01 +0100 Subject: [PATCH 073/134] Update unittest-flux.yml --- .github/workflows/unittest-flux.yml | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/.github/workflows/unittest-flux.yml b/.github/workflows/unittest-flux.yml index 4b990797..c5c3273b 100644 --- a/.github/workflows/unittest-flux.yml +++ b/.github/workflows/unittest-flux.yml @@ -47,13 +47,7 @@ jobs: - name: Test shell: bash -l {0} timeout-minutes: 5 - run: > - for f in $(ls tests/test_*.py); do - echo $f; - if [ $f != "tests/test_flux.py" ]; then - coverage run --omit pympipool/_version.py -m unittest $f; - fi - done + run: cd tests; coverage run --omit pympipool/_version.py -m unittest discover . env: OMPI_MCA_plm: 'isolated' OMPI_MCA_rmaps_base_oversubscribe: 'yes' From 4023a912aaf6fd08d13785f23efd9047ffd3a8c2 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Fri, 10 Nov 2023 10:17:07 +0100 Subject: [PATCH 074/134] Update test_flux.py --- tests/test_flux.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_flux.py b/tests/test_flux.py index 6359dc98..c5313e0e 100644 --- a/tests/test_flux.py +++ b/tests/test_flux.py @@ -1,4 +1,5 @@ from concurrent.futures import Future +import os from queue import Queue import numpy as np From bcd255972313612bb3f0e2ef83f6f89ddb85ed69 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Fri, 10 Nov 2023 11:52:06 +0100 Subject: [PATCH 075/134] Small corrections to the documentation --- docs/source/examples.md | 2 +- docs/source/installation.md | 19 ++++++++++++++----- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/docs/source/examples.md b/docs/source/examples.md index 3fd12121..b63f8e18 100644 --- a/docs/source/examples.md +++ b/docs/source/examples.md @@ -127,7 +127,7 @@ def calc(i, j, k): def init_function(): return {"j": 4, "k": 3, "l": 2} -with Executor(cores=1, init_function=init_function) as exe: +with Executor(max_workers=1, init_function=init_function) as exe: fs = exe.submit(calc, 2, j=5) print(fs.result()) ``` diff --git a/docs/source/installation.md b/docs/source/installation.md index ccbb3069..83abc2f0 100644 --- a/docs/source/installation.md +++ b/docs/source/installation.md @@ -2,10 +2,13 @@ For up-scaling python functions beyond a single compute node `pympipool` requires the integration with a high performance computing (HPC) resource manager. These HPC resource manager are currently only supported for Linux. Still for testing and development purposes the `pympipool` package can installed on all major operating systems including -Windows. This basic installation is based on the `pympipool.mpi.PyMPIExecutor` interface and allows up-scaling serial +Windows. + +This basic installation is based on the `pympipool.mpi.PyMPIExecutor` interface and allows up-scaling serial and parallel python functions which use the message passing interface (MPI) for python [`mpi4py`](https://mpi4py.readthedocs.io) on a single compute node. In addition, the integration with an HPC resource manager provides scaling beyond one compute -node, thread based parallelism and the assignment of GPUs. +node, thread based parallelism and the assignment of GPUs. Still the user would not call the interface directly, but +rather use it through the `pympipool.Executor`. ## Basic Installation For testing and development purposes the `pympipool` package can installed on all major operating systems including @@ -38,7 +41,13 @@ pip install pympipool recommended solution as it can be installed without root user rights and it can be integrated in existing resource managers like the [SLURM workload manager](https://www.schedmd.com). The advantages of using `pympipool` in combination with these resource schedulers is the fine-grained resource allocation. In addition, to scaling beyond a single compute -node they add the ability to assign GPUs and thread based parallelism. +node they add the ability to assign GPUs and thread based parallelism. The two resource manager are internally linked to +two interfaces: + +* `pympipool.slurm.PySlurmExecutor`: The interface for the [SLURM workload manager](https://www.schedmd.com). +* `pympipool.flux.PyFluxExecutor`: The interface for the [flux framework](https://flux-framework.org). + +Still the user would not call these interfaces directly, but rather use it through the `pympipool.Executor`. ### Flux Framework For Linux users without a pre-installed resource scheduler in their high performance computing (HPC) environment, the @@ -116,6 +125,6 @@ both resource schedulers, the [flux framework](https://flux-framework.org) can a uses the [SLURM workload manager](https://www.schedmd.com) as primary resource scheduler. This enables more fine-grained scheduling like independent GPU access on HPC systems where [SLURM workload manager](https://www.schedmd.com) is configured to allow only one job step per node. Furthermore, the [flux framework](https://flux-framework.org) provides -superior performance in large allocation with several hundred compute nodes or in the case when many `pympipool.Executor` -objects are created frequently, as each creation of an `pympipool.Executor` results in an `srun` call which is +superior performance in large allocation with several hundred compute nodes or in the case when many `pympipool.slurm.PySlurmExecutor` +objects are created frequently, as each creation of an `pympipool.slurm.PySlurmExecutor` results in an `srun` call which is communicated to the central database of the [SLURM workload manager](https://www.schedmd.com). From 8b0d94a4c62deeb0636583f09744e02112cf6794 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Fri, 10 Nov 2023 11:59:40 +0100 Subject: [PATCH 076/134] Add mybinder --- binder/environment.yml | 11 +++++++++++ binder/kernel.json | 16 ++++++++++++++++ binder/postBuild | 6 ++++++ 3 files changed, 33 insertions(+) create mode 100644 binder/environment.yml create mode 100644 binder/kernel.json create mode 100644 binder/postBuild diff --git a/binder/environment.yml b/binder/environment.yml new file mode 100644 index 00000000..34f38cda --- /dev/null +++ b/binder/environment.yml @@ -0,0 +1,11 @@ +channels: +- conda-forge +dependencies: +- python +- numpy +- openmpi +- cloudpickle =3.0.0 +- flux-core +- mpi4py =3.1.5 +- tqdm =4.66.1 +- pyzmq =25.1.1 diff --git a/binder/kernel.json b/binder/kernel.json new file mode 100644 index 00000000..16c69cf5 --- /dev/null +++ b/binder/kernel.json @@ -0,0 +1,16 @@ +{ + "argv": [ + "flux", + "start", + "/srv/conda/envs/notebook/bin/python", + "-m", + "ipykernel_launcher", + "-f", + "{connection_file}" + ], + "display_name": "Flux", + "language": "python", + "metadata": { + "debugger": true + } +} \ No newline at end of file diff --git a/binder/postBuild b/binder/postBuild new file mode 100644 index 00000000..f5cee2b7 --- /dev/null +++ b/binder/postBuild @@ -0,0 +1,6 @@ +# jupyter kernel +mkdir -p /home/jovyan/.local/share/jupyter/kernels/flux +cp binder/kernel.json /home/jovyan/.local/share/jupyter/kernels/flux + +# install pympipool +pip install . \ No newline at end of file From f707a1336fc80e4bb0c2fdacbf795fc3e30766d1 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Fri, 10 Nov 2023 12:20:56 +0100 Subject: [PATCH 077/134] Add example notebook --- README.md | 1 + notebooks/examples.ipynb | 1 + 2 files changed, 2 insertions(+) create mode 100644 notebooks/examples.ipynb diff --git a/README.md b/README.md index a1516702..54287344 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ # pympipool - up-scale python functions for high performance computing [![Unittests](https://github.com/pyiron/pympipool/actions/workflows/unittest-openmpi.yml/badge.svg)](https://github.com/pyiron/pympipool/actions/workflows/unittest-openmpi.yml) [![Coverage Status](https://coveralls.io/repos/github/pyiron/pympipool/badge.svg?branch=main)](https://coveralls.io/github/pyiron/pympipool?branch=main) +[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/pyiron/pympipool/HEAD?labpath=notebooks%2Fexamples.ipynb) Up-scaling python functions for high performance computing (HPC) can be challenging. While the python standard library provides interfaces for multiprocessing and asynchronous task execution, namely diff --git a/notebooks/examples.ipynb b/notebooks/examples.ipynb new file mode 100644 index 00000000..90596a0b --- /dev/null +++ b/notebooks/examples.ipynb @@ -0,0 +1 @@ +{"metadata":{"kernelspec":{"name":"flux","display_name":"Flux","language":"python"},"language_info":{"name":"python","version":"3.11.6","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat_minor":5,"nbformat":4,"cells":[{"cell_type":"markdown","source":"# Examples\nThe `pympipool.Executor` extends the interface of the [`concurrent.futures.Executor`](https://docs.python.org/3/library/concurrent.futures.html#module-concurrent.futures)\nto simplify the up-scaling of individual functions in a given workflow.","metadata":{},"id":"19bad499-5a97-425c-beec-dcd88d693d4c"},{"cell_type":"markdown","source":"## Compatibility\nStarting with the basic example of `1+1=2`. With the `ThreadPoolExecutor` from the [`concurrent.futures`](https://docs.python.org/3/library/concurrent.futures.html#module-concurrent.futures)\nstandard library this can be written as: ","metadata":{},"id":"f752ec8d-50b8-46fb-86f2-08a9126f1a39"},{"cell_type":"code","source":"from concurrent.futures import ThreadPoolExecutor","metadata":{"trusted":true},"execution_count":1,"outputs":[],"id":"584cd590-acaf-48d7-a5b5-e4049a9626b7"},{"cell_type":"code","source":"with ThreadPoolExecutor(\n max_workers=1,\n) as exe:\n future = exe.submit(sum, [1, 1])\n print(future.result())","metadata":{"trusted":true},"execution_count":2,"outputs":[{"name":"stdout","text":"2\n","output_type":"stream"}],"id":"73673e42-2c68-4b91-b6ff-db1ecb2c0587"},{"cell_type":"markdown","source":"In this case `max_workers=1` limits the number of threads uses by the `ThreadPoolExecutor` to one. Then the `sum()` \nfunction is submitted to the executor with a list with two ones `[1, 1]` as input. A [`concurrent.futures.Future`](https://docs.python.org/3/library/concurrent.futures.html#module-concurrent.futures)\nobject is returned. The `Future` object allows to check the status of the execution with the `done()` method which \nreturns `True` or `False` depending on the state of the execution. Or the main process can wait until the execution is \ncompleted by calling `result()`. ","metadata":{},"id":"32156314-02a9-4210-8a8c-94afe09b64f6"},{"cell_type":"markdown","source":"The result of the calculation is `1+1=2`. ","metadata":{},"id":"b750d830-bd0d-4474-9f70-913d0b9d6b8a"},{"cell_type":"markdown","source":"The `pympipool.Executor` class extends the interface of the [`concurrent.futures.Executor`](https://docs.python.org/3/library/concurrent.futures.html#module-concurrent.futures) \nclass by providing more parameters to specify the level of parallelism. In addition, to specifying the maximum number \nof workers `max_workers` the user can also specify the number of cores per worker `cores_per_worker` for MPI based \nparallelism, the number of threads per core `threads_per_core` for thread based parallelism and the number of GPUs per\nworker `gpus_per_worker`. Finally, for those backends which support over-subscribing this can also be enabled using the \n`oversubscribe` parameter. All these parameters are optional, so the `pympipool.Executor` can be used as a drop-in \nreplacement for the [`concurrent.futures.Executor`](https://docs.python.org/3/library/concurrent.futures.html#module-concurrent.futures).","metadata":{},"id":"4fbf72a2-0e0e-43ce-be8f-db3489c4eafe"},{"cell_type":"markdown","source":"The previous example is rewritten for the `pympipool.Executor`:","metadata":{},"id":"9b5a26e2-3d18-4778-ba10-e3e213b70433"},{"cell_type":"code","source":"from pympipool import Executor ","metadata":{"trusted":true},"execution_count":3,"outputs":[],"id":"60373c38-63f8-48dc-be0f-ddb71ebf88f8"},{"cell_type":"code","source":"with Executor(\n max_workers=1, \n cores_per_worker=1, \n threads_per_core=1, \n gpus_per_worker=0, \n oversubscribe=False\n) as exe:\n future = exe.submit(sum, [1,1])\n print(future.result())","metadata":{"trusted":true},"execution_count":4,"outputs":[{"name":"stdout","text":"2\n","output_type":"stream"}],"id":"fd755b28-ff01-4530-9099-001cac151e31"},{"cell_type":"markdown","source":"The result of the calculation is again `1+1=2`.","metadata":{},"id":"44c4bc4b-cf97-461e-98e7-62bcdb8caff2"},{"cell_type":"markdown","source":"Beyond pre-defined functions like the `sum()` function, the same functionality can be used to submit user-defined \nfunctions. In the following example a custom summation function is defined: ","metadata":{},"id":"331aed93-806a-4057-ab9c-19479190f472"},{"cell_type":"code","source":"def calc(*args):\n return sum(*args)","metadata":{"trusted":true},"execution_count":5,"outputs":[],"id":"cdeb8710-b328-463d-a436-82d6756e76b3"},{"cell_type":"markdown","source":"In contrast to the previous example where just a single function was submitted to a single worker, in this case a total\nof four functions is submitted to a group of two workers `max_workers=2`. Consequently, the functions are executed as a\nset of two pairs. ","metadata":{},"id":"d5efa995-d4d4-4f9c-a7e6-38dd66143535"},{"cell_type":"code","source":"with Executor(max_workers=2) as exe:\n fs_1 = exe.submit(calc, [2, 1])\n fs_2 = exe.submit(calc, [2, 2])\n fs_3 = exe.submit(calc, [2, 3])\n fs_4 = exe.submit(calc, [2, 4])\n print([\n fs_1.result(), \n fs_2.result(), \n fs_3.result(), \n fs_4.result(),\n ])","metadata":{"trusted":true},"execution_count":6,"outputs":[{"name":"stdout","text":"[3, 4, 5, 6]\n","output_type":"stream"}],"id":"82033832-7ccd-4c67-a1fb-57f55710b77c"},{"cell_type":"markdown","source":"The snippet can be executed with any python interpreter. It returns the corresponding sums as expected. The same can be achieved with the built-in [`concurrent.futures.Executor`](https://docs.python.org/3/library/concurrent.futures.html#module-concurrent.futures)\nclasses. Still one advantage of using the `pympipool.Executor` rather than the built-in ones, is the ability to execute \nthe same commands in interactive environments like [Jupyter notebooks](https://jupyter.org). This is achieved by using \n[cloudpickle](https://github.com/cloudpipe/cloudpickle) to serialize the python function and its parameters rather than\nthe regular pickle package. ","metadata":{},"id":"86838528-312e-46cc-b022-0c946bf95037"},{"cell_type":"markdown","source":"For backwards compatibility with the [`multiprocessing.Pool`](https://docs.python.org/3/library/multiprocessing.html) \nclass the [`concurrent.futures.Executor`](https://docs.python.org/3/library/concurrent.futures.html#module-concurrent.futures)\nalso implements the `map()` function to map a series of inputs to a function. The same `map()` function is also \navailable in the `pympipool.Executor`: ","metadata":{},"id":"4de690ed-661c-4e6e-a97e-c478393d0dc6"},{"cell_type":"code","source":"with Executor(max_workers=2) as exe:\n print(list(exe.map(calc, [[2, 1], [2, 2], [2, 3], [2, 4]])))","metadata":{"trusted":true},"execution_count":7,"outputs":[{"name":"stdout","text":"[3, 4, 5, 6]\n","output_type":"stream"}],"id":"3f06b0c1-5ee1-40c5-82ab-31d77cfcdb46"},{"cell_type":"markdown","source":"The results remain the same. ","metadata":{},"id":"a5d0f249-23bb-4727-8b09-87320ecb98eb"},{"cell_type":"markdown","source":"## Data Handling\nA limitation of many parallel approaches is the overhead in communication when working with large datasets. Instead of\nreading the same dataset repetitively, the `pympipool.Executor` loads the dataset only once per worker and afterwards \neach function submitted to this worker has access to the dataset, as it is already loaded in memory. To achieve this\nthe user defines an initialization function `init_function` which returns a dictionary with one key per dataset. The \nkeys of the dictionary can then be used as additional input parameters in each function submitted to the `pympipool.Executor`.\nThis functionality is illustrated in the following example: ","metadata":{},"id":"580b00ee-6d5b-4ca9-ba36-ff70128c0b6b"},{"cell_type":"code","source":"def calc(i, j, k):\n return i + j + k","metadata":{"trusted":true},"execution_count":8,"outputs":[],"id":"8fe8c750-4dc5-4b26-ad8d-9f755bff3494"},{"cell_type":"code","source":"def init_function():\n return {\"j\": 4, \"k\": 3, \"l\": 2}","metadata":{"trusted":true},"execution_count":9,"outputs":[],"id":"5f943266-1bee-421e-a1b4-583d222b1c99"},{"cell_type":"code","source":"with Executor(max_workers=1, init_function=init_function) as exe:\n fs = exe.submit(calc, 2, j=5)\n print(fs.result())","metadata":{"trusted":true},"execution_count":10,"outputs":[{"name":"stdout","text":"10\n","output_type":"stream"}],"id":"0debe907-b646-4fd5-bae7-46b16645d2f3"},{"cell_type":"markdown","source":"The function `calc()` requires three inputs `i`, `j` and `k`. But when the function is submitted to the executor only \ntwo inputs are provided `fs = exe.submit(calc, 2, j=5)`. In this case the first input parameter is mapped to `i=2`, the\nsecond input parameter is specified explicitly `j=5` but the third input parameter `k` is not provided. So the \n`pympipool.Executor` automatically checks the keys set in the `init_function()` function. In this case the returned \ndictionary `{\"j\": 4, \"k\": 3, \"l\": 2}` defines `j=4`, `k=3` and `l=2`. For this specific call of the `calc()` function,\n`i` and `j` are already provided so `j` is not required, but `k=3` is used from the `init_function()` and as the `calc()`\nfunction does not define the `l` parameter this one is also ignored. ","metadata":{},"id":"72fa803a-ace0-41ea-8090-d64dfd0797cc"},{"cell_type":"markdown","source":"The result is `2+5+3=10` as `i=2` and `j=5` are provided during the submission and `k=3` is defined in the `init_function()`\nfunction.","metadata":{},"id":"1443d216-1add-445a-a662-5b16af6c1443"},{"cell_type":"markdown","source":"## Up-Scaling \nThe availability of certain features depends on the backend `pympipool` is installed with. In particular the thread \nbased parallelism and the GPU assignment is only available with the `pympipool.slurm.PySlurmExecutor` or the \n`pympipool.flux.PyFluxExecutor` backend. The latter is recommended based on the easy installation, the faster allocation \nof resources as the resources are managed within the allocation and no central databases is used and the superior level \nof fine-grained resource assignment which is typically not available on other HPC resource schedulers including the\n[SLURM workload manager](https://www.schedmd.com). The `pympipool.flux.PyFluxExecutor` requires \n[flux framework](https://flux-framework.org) to be installed in addition to the `pympipool` package. The features are \nsummarized in the table below: \n\n| Feature \\ Backend | `PyMpiExecutor` | `PySlurmExecutor` | `PyFluxExecutor` |\n|:--------------------------:|:---------------:|:-----------------:|:----------------:|\n| Thread based parallelism | no | yes | yes | \n| MPI based parallelism | yes | yes | yes |\n| GPU assignment | no | yes | yes |\n| Resource over-subscription | yes | yes | no |\n| Scalability | 1 node | ~100 nodes | no limit |","metadata":{},"id":"8d1e21ec-0b8d-45bf-bfb1-62b3df8e242a"},{"cell_type":"markdown","source":"### Thread-based Parallelism\nThe number of threads per core can be controlled with the `threads_per_core` parameter during the initialization of the \n`pympipool.Executor`. Unfortunately, there is no uniform way to control the number of cores a given underlying library \nuses for thread based parallelism, so it might be necessary to set certain environment variables manually: \n\n* `OMP_NUM_THREADS`: for openmp\n* `OPENBLAS_NUM_THREADS`: for openblas\n* `MKL_NUM_THREADS`: for mkl\n* `VECLIB_MAXIMUM_THREADS`: for accelerate on Mac Os X\n* `NUMEXPR_NUM_THREADS`: for numexpr\n\nAt the current stage `pympipool.Executor` does not set these parameters itself, so you have to add them in the function\nyou submit before importing the corresponding library: ","metadata":{},"id":"f3b9cc80-70ed-4bc8-abf9-62ecbd70b960"},{"cell_type":"code","source":"def calc(i):\n import os\n os.environ[\"OMP_NUM_THREADS\"] = \"2\"\n os.environ[\"OPENBLAS_NUM_THREADS\"] = \"2\"\n os.environ[\"MKL_NUM_THREADS\"] = \"2\"\n os.environ[\"VECLIB_MAXIMUM_THREADS\"] = \"2\"\n os.environ[\"NUMEXPR_NUM_THREADS\"] = \"2\"\n import numpy as np\n return i","metadata":{"trusted":true},"execution_count":11,"outputs":[],"id":"fbf5f7b2-eb3e-4a81-bae8-e429747300a0"},{"cell_type":"markdown","source":"Most modern CPUs use hyper-threading to present the operating system with double the number of virtual cores compared to\nthe number of physical cores available. So unless this functionality is disabled `threads_per_core=2` is a reasonable \ndefault. Just be careful if the number of threads is not specified it is possible that all workers try to access all \ncores at the same time which can lead to poor performance. So it is typically a good idea to monitor the CPU utilization\nwith increasing number of workers. ","metadata":{},"id":"334619d0-8d95-419e-885c-e5bc05747584"},{"cell_type":"markdown","source":"Specific manycore CPU models like the Intel Xeon Phi processors provide a much higher hyper-threading ration and require\na higher number of threads per core for optimal performance. ","metadata":{},"id":"7c3146c1-8722-4b67-ab21-c250b8e7c9dd"},{"cell_type":"markdown","source":"### MPI Parallel Python Functions\nBeyond thread based parallelism, the message passing interface (MPI) is the de facto standard parallel execution in \nscientific computing and the [`mpi4py`](https://mpi4py.readthedocs.io) bindings to the MPI libraries are commonly used\nto parallelize existing workflows. The limitation of this approach is that it requires the whole code to adopt the MPI\ncommunication standards to coordinate the way how information is distributed. Just like the `pympipool.Executor` the \n[`mpi4py.futures.MPIPoolExecutor`](https://mpi4py.readthedocs.io/en/stable/mpi4py.futures.html#mpipoolexecutor) \nimplements the [`concurrent.futures.Executor`](https://docs.python.org/3/library/concurrent.futures.html#module-concurrent.futures)\ninterface. Still in this case eah python function submitted to the executor is still limited to serial execution. The\nnovel approach of the `pympipool.Executor` is mixing these two types of parallelism. Individual functions can use\nthe [`mpi4py`](https://mpi4py.readthedocs.io) library to handle the parallel execution within the context of this \nfunction while these functions can still me submitted to the `pympipool.Executor` just like any other function. The\nadvantage of this approach is that the users can parallelize their workflows one function at the time. \n\nThe following example illustrates the submission of a simple MPI parallel python function: ","metadata":{},"id":"b4976d45-0f4e-496c-8173-9631f512135b"},{"cell_type":"code","source":"def calc(i):\n from mpi4py import MPI\n size = MPI.COMM_WORLD.Get_size()\n rank = MPI.COMM_WORLD.Get_rank()\n return i, size, rank","metadata":{"trusted":true},"execution_count":12,"outputs":[],"id":"cfa072a4-f88f-45b0-be94-a78f0edad513"},{"cell_type":"code","source":"with Executor(cores_per_worker=2) as exe:\n fs = exe.submit(calc, 3)\n print(fs.result())","metadata":{"trusted":true},"execution_count":13,"outputs":[{"name":"stdout","text":"[(3, 2, 0), (3, 2, 1)]\n","output_type":"stream"}],"id":"fd036b03-085d-4850-b11e-537c8fd476d5"},{"cell_type":"markdown","source":"The `calc()` function initializes the [`mpi4py`](https://mpi4py.readthedocs.io) library and gathers the size of the \nallocation and the rank of the current process within the MPI allocation. This function is then submitted to an \n`pympipool.Executor` which is initialized with a single worker with two cores `cores_per_worker=2`. So each function\ncall is going to have access to two cores. \n\nJust like before the script can be called with any python interpreter even though it is using the [`mpi4py`](https://mpi4py.readthedocs.io)\nlibrary in the background it is not necessary to execute the script with `mpiexec` or `mpirun`.","metadata":{},"id":"b4f97426-d8fb-42ef-98ca-135054bd39a7"},{"cell_type":"markdown","source":"The response consists of a list of two tuples, one for each MPI parallel process, with the first entry of the tuple \nbeing the parameter `i=3`, followed by the number of MPI parallel processes assigned to the function call `cores_per_worker=2`\nand finally the index of the specific process `0` or `1`. ","metadata":{},"id":"69dcfdcb-41db-4c3b-a1c5-07ff3be0c9a0"},{"cell_type":"markdown","source":"### GPU Assignment\nWith the rise of machine learning applications, the use of GPUs for scientific application becomes more and more popular.\nConsequently, it is essential to have full control over the assignment of GPUs to specific python functions. In the \nfollowing example the `tensorflow` library is used to identify the GPUs and return their configuration: ","metadata":{},"id":"dc41f241-663c-474e-ae1e-b2365389bc90"},{"cell_type":"raw","source":"import socket\nfrom tensorflow.python.client import device_lib","metadata":{},"id":"6ac9630b-4ab5-4f7f-bf55-812e8189da4f"},{"cell_type":"code","source":"def get_available_gpus():\n local_device_protos = device_lib.list_local_devices()\n return [\n (x.name, x.physical_device_desc, socket.gethostname()) \n for x in local_device_protos if x.device_type == 'GPU'\n ]","metadata":{"trusted":true},"execution_count":14,"outputs":[],"id":"998138f5-f0cb-47a7-ba36-7594b8ec41fc"},{"cell_type":"raw","source":"with Executor(\n max_workers=2, \n gpus_per_worker=1, \n) as exe:\n fs_1 = exe.submit(get_available_gpus)\n fs_2 = exe.submit(get_available_gpus)\n print(fs_1.result(), fs_2.result())","metadata":{},"id":"9d33af22-7b90-4ff7-a434-9c4cd9a930d5"},{"cell_type":"markdown","source":"The additional parameter `gpus_per_worker=1` specifies that one GPU is assigned to each worker. This functionality \nrequires `pympipool` to be connected to a resource manager like the [SLURM workload manager](https://www.schedmd.com)\nor preferably the [flux framework](https://flux-framework.org). The rest of the script follows the previous examples, \nas two functions are submitted and the results are printed. ","metadata":{},"id":"8dc7a989-908a-48a6-8d06-ac1e24173f5c"},{"cell_type":"markdown","source":"To clarify the execution of such an example on a high performance computing (HPC) cluster using the [SLURM workload manager](https://www.schedmd.com)\nthe submission script is given below: ","metadata":{},"id":"d1a17c6c-41ee-4595-913e-4af7272010a5"},{"cell_type":"raw","source":"#!/bin/bash\n#SBATCH --nodes=2\n#SBATCH --gpus-per-node=1\n#SBATCH --get-user-env=L\n\npython test_gpu.py","metadata":{},"id":"11ce332b-d2c1-4434-84c4-1e523e430848"},{"cell_type":"markdown","source":"The important part is that for using the `pympipool.slurm.PySlurmExecutor` backend the script `test_gpu.py` does not \nneed to be executed with `srun` but rather it is sufficient to just execute it with the python interpreter. `pympipool`\ninternally calls `srun` to assign the individual resources to a given worker. ","metadata":{},"id":"14bf6228-db64-406b-b04f-4d23daaa836d"},{"cell_type":"markdown","source":"For the more complex setup of running the [flux framework](https://flux-framework.org) as a secondary resource scheduler\nwithin the [SLURM workload manager](https://www.schedmd.com) it is essential that the resources are passed from the \n[SLURM workload manager](https://www.schedmd.com) to the [flux framework](https://flux-framework.org). This is achieved\nby calling `srun flux start` in the submission script: ","metadata":{},"id":"66e3be02-c11c-4053-9600-6bcfefefb127"},{"cell_type":"raw","source":"#!/bin/bash\n#SBATCH --nodes=2\n#SBATCH --gpus-per-node=1\n#SBATCH --get-user-env=L\n\nsrun flux start python test_gpu.py","metadata":{},"id":"aa0e2abf-7ab2-464f-a341-b93f91fbdd99"},{"cell_type":"markdown","source":"As a result the GPUs available on the two compute nodes are reported: ","metadata":{},"id":"6c84fb7d-4285-4d73-8f1e-7cb88050eb85"},{"cell_type":"raw","source":">>> [('/device:GPU:0', 'device: 0, name: Tesla V100S-PCIE-32GB, pci bus id: 0000:84:00.0, compute capability: 7.0', 'cn138'),\n>>> ('/device:GPU:0', 'device: 0, name: Tesla V100S-PCIE-32GB, pci bus id: 0000:84:00.0, compute capability: 7.0', 'cn139')]","metadata":{},"id":"a431e015-a309-49ac-9f10-756bda0177fc"},{"cell_type":"markdown","source":"In this case each compute node `cn138` and `cn139` is equipped with one `Tesla V100S-PCIE-32GB`.","metadata":{},"id":"70eb9a19-325e-4179-a196-4417e3f30e19"}]} \ No newline at end of file From 36cad8653c41c81ddfdd6b9a7b98a499ddc846b0 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Fri, 10 Nov 2023 13:38:59 +0100 Subject: [PATCH 078/134] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 54287344..d357e529 100644 --- a/README.md +++ b/README.md @@ -91,7 +91,7 @@ uses a different backend, with the `pympipool.flux.PyFluxExecutor` being the pre interface can be a reasonable choice, still depending on the [SLURM workload manager](https://www.schedmd.com) configuration in can be limited in terms of the fine-grained scheduling or the responsiveness when working with hundreds of compute nodes in an individual allocation. -* `pympipool.flux.PyFluxExecutor`: The [flux framework]](https://flux-framework.org) is the preferred backend for +* `pympipool.flux.PyFluxExecutor`: The [flux framework](https://flux-framework.org) is the preferred backend for `pympipool`. Just like the `pympipool.slurm.PySlurmExecutor` it supports serial python functions, thread based parallelism, MPI based parallelism and the assignment of GPUs to individual python functions. Still the advantages of using the [flux framework](https://flux-framework.org) as a backend are the easy installation, the faster allocation @@ -150,4 +150,4 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * [Contributions](https://pympipool.readthedocs.io/en/latest/development.html#contributions) * [Integration](https://pympipool.readthedocs.io/en/latest/development.html#integration) * [Alternative Projects](https://pympipool.readthedocs.io/en/latest/development.html#alternative-projects) -* [Module Index](https://pympipool.readthedocs.io/en/latest/py-modindex.html) \ No newline at end of file +* [Module Index](https://pympipool.readthedocs.io/en/latest/py-modindex.html) From 771a86333d8fe9e36164515c67d96a8bb5d27ae3 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Fri, 10 Nov 2023 13:40:13 +0100 Subject: [PATCH 079/134] Update index.rst --- docs/source/index.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index 492d7721..fb03a647 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -82,7 +82,7 @@ uses a different backend, with the :code:`pympipool.flux.PyFluxExecutor` being t * :code:`pympipool.mpi.PyMpiExecutor`: The simplest executor of the three uses `mpi4py `_ as a backend. This simplifies the installation on all operation systems including Windows. Still at the same time it limits the up-scaling to a single compute node and serial or MPI parallel python functions. There is no support for thread based parallelism or GPU assignment. This interface is primarily used for testing and developing or as a fall-back solution. It is not recommended to use this interface in production. * :code:`pympipool.slurm.PySlurmExecutor`: The `SLURM workload manager `_ is commonly used on HPC systems to schedule and distribute tasks. :code:`pympipool` provides a python interface for scheduling the execution of python functions as SLURM job steps which are typically created using the :code:`srun` command. This executor supports serial python functions, thread based parallelism, MPI based parallelism and the assignment of GPUs to individual python functions. When the `SLURM workload manager `_ is installed on your HPC cluster this interface can be a reasonable choice, still depending on the SLURM configuration in can be limited in terms of the fine-grained scheduling or the responsiveness when working with hundreds of compute nodes in an individual allocation. -* :code:`pympipool.flux.PyFluxExecutor`: The `flux `_ is the preferred backend for :code:`pympipool`. Just like the :code:`pympipool.slurm.PySlurmExecutor` it supports serial python functions, thread based parallelism, MPI based parallelism and the assignment of GPUs to individual python functions. Still the advantages of using the `flux `_ as a backend are the easy installation, the faster allocation of resources as the resources are managed within the allocation and no central databases is used and the superior level of fine-grained resource assignment which is typically not available on HPC resource schedulers. +* :code:`pympipool.flux.PyFluxExecutor`: The `flux framework `_ is the preferred backend for :code:`pympipool`. Just like the :code:`pympipool.slurm.PySlurmExecutor` it supports serial python functions, thread based parallelism, MPI based parallelism and the assignment of GPUs to individual python functions. Still the advantages of using the `flux `_ as a backend are the easy installation, the faster allocation of resources as the resources are managed within the allocation and no central databases is used and the superior level of fine-grained resource assignment which is typically not available on HPC resource schedulers. Each of these backends consists of two parts a broker and a worker. When a new tasks is submitted from the user it is received by the broker and the broker identifies the first available worker. The worker then executes a task and returns @@ -134,4 +134,4 @@ Documentation examples development -* :ref:`modindex` \ No newline at end of file +* :ref:`modindex` From bee8c91d7bc3cb6f1de55ca7a2735188ba0ce897 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Fri, 10 Nov 2023 13:40:54 +0100 Subject: [PATCH 080/134] Update index.rst --- docs/source/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index fb03a647..d11a4f5a 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -82,7 +82,7 @@ uses a different backend, with the :code:`pympipool.flux.PyFluxExecutor` being t * :code:`pympipool.mpi.PyMpiExecutor`: The simplest executor of the three uses `mpi4py `_ as a backend. This simplifies the installation on all operation systems including Windows. Still at the same time it limits the up-scaling to a single compute node and serial or MPI parallel python functions. There is no support for thread based parallelism or GPU assignment. This interface is primarily used for testing and developing or as a fall-back solution. It is not recommended to use this interface in production. * :code:`pympipool.slurm.PySlurmExecutor`: The `SLURM workload manager `_ is commonly used on HPC systems to schedule and distribute tasks. :code:`pympipool` provides a python interface for scheduling the execution of python functions as SLURM job steps which are typically created using the :code:`srun` command. This executor supports serial python functions, thread based parallelism, MPI based parallelism and the assignment of GPUs to individual python functions. When the `SLURM workload manager `_ is installed on your HPC cluster this interface can be a reasonable choice, still depending on the SLURM configuration in can be limited in terms of the fine-grained scheduling or the responsiveness when working with hundreds of compute nodes in an individual allocation. -* :code:`pympipool.flux.PyFluxExecutor`: The `flux framework `_ is the preferred backend for :code:`pympipool`. Just like the :code:`pympipool.slurm.PySlurmExecutor` it supports serial python functions, thread based parallelism, MPI based parallelism and the assignment of GPUs to individual python functions. Still the advantages of using the `flux `_ as a backend are the easy installation, the faster allocation of resources as the resources are managed within the allocation and no central databases is used and the superior level of fine-grained resource assignment which is typically not available on HPC resource schedulers. +* :code:`pympipool.flux.PyFluxExecutor`: The `flux framework `_ is the preferred backend for :code:`pympipool`. Just like the :code:`pympipool.slurm.PySlurmExecutor` it supports serial python functions, thread based parallelism, MPI based parallelism and the assignment of GPUs to individual python functions. Still the advantages of using the `flux framework `_ as a backend are the easy installation, the faster allocation of resources as the resources are managed within the allocation and no central databases is used and the superior level of fine-grained resource assignment which is typically not available on HPC resource schedulers. Each of these backends consists of two parts a broker and a worker. When a new tasks is submitted from the user it is received by the broker and the broker identifies the first available worker. The worker then executes a task and returns From f55a078ec2c3699e7f7ccc9dc6bdb7bcef38b7b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Jan=C3=9Fen?= Date: Sat, 11 Nov 2023 09:56:27 +0100 Subject: [PATCH 081/134] Fix module index --- .ci_support/environment-docs.yml | 3 ++- pympipool/__init__.py | 12 +++++------- pympipool/slurm/executor.py | 10 +--------- 3 files changed, 8 insertions(+), 17 deletions(-) diff --git a/.ci_support/environment-docs.yml b/.ci_support/environment-docs.yml index a6ab251e..1a112178 100644 --- a/.ci_support/environment-docs.yml +++ b/.ci_support/environment-docs.yml @@ -9,4 +9,5 @@ dependencies: - cloudpickle - mpi4py - tqdm - - pyzmq \ No newline at end of file + - pyzmq + - flux-core \ No newline at end of file diff --git a/pympipool/__init__.py b/pympipool/__init__.py index fff44455..9753846b 100644 --- a/pympipool/__init__.py +++ b/pympipool/__init__.py @@ -1,6 +1,9 @@ import os +import shutil from ._version import get_versions from pympipool.mpi.executor import PyMPIExecutor +from pympipool.shared.interface import SLURM_COMMAND +from pympipool.slurm.executor import PySlurmExecutor try: # The PyFluxExecutor requires flux-core to be installed. from pympipool.flux.executor import PyFluxExecutor @@ -10,13 +13,8 @@ flux_installed = False pass -try: # The PySlurmExecutor requires the srun command to be available. - from pympipool.slurm.executor import PySlurmExecutor - - slurm_installed = True -except ImportError: - slurm_installed = False - pass +# The PySlurmExecutor requires the srun command to be available. +slurm_installed = shutil.which(SLURM_COMMAND) is not None __version__ = get_versions()["version"] diff --git a/pympipool/slurm/executor.py b/pympipool/slurm/executor.py index 2e4e5765..96b349a3 100644 --- a/pympipool/slurm/executor.py +++ b/pympipool/slurm/executor.py @@ -1,21 +1,13 @@ -import shutil -import subprocess - - from pympipool.shared.executorbase import ( cloudpickle_register, execute_parallel_tasks, ExecutorBase, executor_broker, ) -from pympipool.shared.interface import SrunInterface, SLURM_COMMAND +from pympipool.shared.interface import SrunInterface from pympipool.shared.thread import RaisingThread -if shutil.which(SLURM_COMMAND) is None: - raise ImportError("SLURM command " + SLURM_COMMAND + " not found.") - - class PySlurmExecutor(ExecutorBase): """ The pympipool.slurm.PySlurmExecutor leverages the srun command to distribute python tasks within a SLURM queuing From 3704f91a96672ce6a58cbef16dfe0309ae6daef1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Jan=C3=9Fen?= Date: Sat, 11 Nov 2023 10:09:35 +0100 Subject: [PATCH 082/134] Fix example docstrings --- pympipool/flux/executor.py | 5 ++--- pympipool/mpi/executor.py | 4 ++-- pympipool/slurm/executor.py | 3 +-- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/pympipool/flux/executor.py b/pympipool/flux/executor.py index 40308ff3..5f0067dc 100644 --- a/pympipool/flux/executor.py +++ b/pympipool/flux/executor.py @@ -30,7 +30,7 @@ class PyFluxExecutor(ExecutorBase): executor (flux.job.FluxExecutor): Flux Python interface to submit the workers to flux Examples: - ``` + >>> import numpy as np >>> from pympipool.flux import PyFluxExecutor >>> @@ -46,9 +46,8 @@ class PyFluxExecutor(ExecutorBase): >>> with PyFluxExecutor(cores=2, init_function=init_k) as p: >>> fs = p.submit(calc, 2, j=4) >>> print(fs.result()) - [(array([2, 4, 3]), 2, 0), (array([2, 4, 3]), 2, 1)] - ``` + """ def __init__( diff --git a/pympipool/mpi/executor.py b/pympipool/mpi/executor.py index 39dc53f8..8f6c2183 100644 --- a/pympipool/mpi/executor.py +++ b/pympipool/mpi/executor.py @@ -25,7 +25,7 @@ class PyMPIExecutor(ExecutorBase): sleep_interval (float): synchronization interval - default 0.1 Examples: - ``` + >>> import numpy as np >>> from pympipool.mpi import PyMPIExecutor >>> @@ -42,7 +42,7 @@ class PyMPIExecutor(ExecutorBase): >>> fs = p.submit(calc, 2, j=4) >>> print(fs.result()) [(array([2, 4, 3]), 2, 0), (array([2, 4, 3]), 2, 1)] - ``` + """ def __init__( diff --git a/pympipool/slurm/executor.py b/pympipool/slurm/executor.py index 96b349a3..315860e2 100644 --- a/pympipool/slurm/executor.py +++ b/pympipool/slurm/executor.py @@ -26,7 +26,7 @@ class PySlurmExecutor(ExecutorBase): sleep_interval (float): synchronization interval - default 0.1 Examples: - ``` + >>> import numpy as np >>> from pympipool.slurm import PySlurmExecutor >>> @@ -44,7 +44,6 @@ class PySlurmExecutor(ExecutorBase): >>> print(fs.result()) [(array([2, 4, 3]), 2, 0), (array([2, 4, 3]), 2, 1)] - ``` """ def __init__( From 7e6f40c63faa82e6151e3106089888aacce2a081 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Jan=C3=9Fen?= Date: Sat, 11 Nov 2023 10:10:15 +0100 Subject: [PATCH 083/134] Ignore VS code configuration --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index e3649bcf..912537cd 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ .DS_Store .coverage .idea/ +.vscode/ From c1683f3b3476746ba553e87f0c56d2845ea035fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Jan=C3=9Fen?= Date: Sat, 11 Nov 2023 10:13:42 +0100 Subject: [PATCH 084/134] docstring replace cores with max_workers --- pympipool/flux/executor.py | 2 +- pympipool/mpi/executor.py | 2 +- pympipool/slurm/executor.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pympipool/flux/executor.py b/pympipool/flux/executor.py index 5f0067dc..9ab1e52d 100644 --- a/pympipool/flux/executor.py +++ b/pympipool/flux/executor.py @@ -43,7 +43,7 @@ class PyFluxExecutor(ExecutorBase): >>> def init_k(): >>> return {"k": 3} >>> - >>> with PyFluxExecutor(cores=2, init_function=init_k) as p: + >>> with PyFluxExecutor(max_workers=2, init_function=init_k) as p: >>> fs = p.submit(calc, 2, j=4) >>> print(fs.result()) [(array([2, 4, 3]), 2, 0), (array([2, 4, 3]), 2, 1)] diff --git a/pympipool/mpi/executor.py b/pympipool/mpi/executor.py index 8f6c2183..33b53f68 100644 --- a/pympipool/mpi/executor.py +++ b/pympipool/mpi/executor.py @@ -38,7 +38,7 @@ class PyMPIExecutor(ExecutorBase): >>> def init_k(): >>> return {"k": 3} >>> - >>> with PyMPIExecutor(cores=2, init_function=init_k) as p: + >>> with PyMPIExecutor(max_workers=2, init_function=init_k) as p: >>> fs = p.submit(calc, 2, j=4) >>> print(fs.result()) [(array([2, 4, 3]), 2, 0), (array([2, 4, 3]), 2, 1)] diff --git a/pympipool/slurm/executor.py b/pympipool/slurm/executor.py index 315860e2..1414493e 100644 --- a/pympipool/slurm/executor.py +++ b/pympipool/slurm/executor.py @@ -39,7 +39,7 @@ class PySlurmExecutor(ExecutorBase): >>> def init_k(): >>> return {"k": 3} >>> - >>> with PySlurmExecutor(cores=2, init_function=init_k) as p: + >>> with PySlurmExecutor(max_workers=2, init_function=init_k) as p: >>> fs = p.submit(calc, 2, j=4) >>> print(fs.result()) From 509e16c4dd815b79f5f83ffb6e244ab2e521f4d3 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Mon, 13 Nov 2023 08:36:16 +0100 Subject: [PATCH 085/134] Update setup.py --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 6a0c27b0..70140b13 100644 --- a/setup.py +++ b/setup.py @@ -12,8 +12,8 @@ description='pympipool - Scale serial and MPI-parallel python functions over hundreds of compute nodes all from within a jupyter notebook or serial python process.', long_description=Path("README.md").read_text(), long_description_content_type='text/markdown', - url='https://github.com/jan-janssen/pympipool', - author_email='jan.janssen@outlook.com', + url='https://github.com/pyiron/pympipool', + author_email='janssen@lanl.gov', license='BSD', classifiers=[ From 76b5b7304926f17b58bca25e9a78bfae34cd2b10 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Thu, 16 Nov 2023 11:28:24 +0100 Subject: [PATCH 086/134] Use trusted publisher action --- .github/workflows/deploy.yml | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index b32f614b..85d02b34 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -1,19 +1,27 @@ +# This workflow is used to upload and deploy a new release to PyPi +# Based on https://github.com/pypa/gh-action-pypi-publish + name: PyPi Release on: push: pull_request: + workflow_dispatch: -# based on https://github.com/pypa/gh-action-pypi-publish jobs: build: + if: startsWith(github.event.ref, 'refs/tags') || github.event_name == 'release' runs-on: ubuntu-latest - + environment: + name: pypi + url: https://pypi.org/p/pympipool + permissions: + id-token: write steps: - uses: actions/checkout@v2 - uses: actions/setup-python@v2 with: - python-version: '3.11' + python-version: "3.11" - name: Install dependencies run: >- @@ -25,8 +33,4 @@ jobs: run: >- python setup.py sdist bdist_wheel - name: Publish distribution 📦 to PyPI - if: startsWith(github.event.ref, 'refs/tags') || github.event_name == 'release' - uses: pypa/gh-action-pypi-publish@master - with: - user: __token__ - password: ${{ secrets.pypi_password }} \ No newline at end of file + uses: pypa/gh-action-pypi-publish@release/v1 From 2c7e8c3b64cc010b5866d7316a58cfd016f85662 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Tue, 21 Nov 2023 10:34:21 +0100 Subject: [PATCH 087/134] Implement a Shell based Executor --- pympipool/shared/executorbase.py | 2 +- pympipool/shell/__init__.py | 0 pympipool/shell/executor.py | 75 ++++++++++++++++++++++++++++++++ tests/test_shell.py | 35 +++++++++++++++ 4 files changed, 111 insertions(+), 1 deletion(-) create mode 100644 pympipool/shell/__init__.py create mode 100644 pympipool/shell/executor.py create mode 100644 tests/test_shell.py diff --git a/pympipool/shared/executorbase.py b/pympipool/shared/executorbase.py index f56a349e..7c8918cd 100644 --- a/pympipool/shared/executorbase.py +++ b/pympipool/shared/executorbase.py @@ -190,7 +190,7 @@ def executor_broker( def execute_task_dict(task_dict, meta_future_lst): - if "fn" in task_dict.keys(): + if "fn" in task_dict.keys() or "future" in task_dict.keys(): meta_future = next(as_completed(meta_future_lst.keys())) executor = meta_future_lst.pop(meta_future) executor.future_queue.put(task_dict) diff --git a/pympipool/shell/__init__.py b/pympipool/shell/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pympipool/shell/executor.py b/pympipool/shell/executor.py new file mode 100644 index 00000000..fe37ac09 --- /dev/null +++ b/pympipool/shell/executor.py @@ -0,0 +1,75 @@ +from concurrent.futures import Future +import subprocess + +from pympipool.shared.executorbase import ( + executor_broker, ExecutorBase +) +from pympipool.shared.thread import RaisingThread + + +def execute_single_task(future_queue): + while True: + task_dict = future_queue.get() + if "shutdown" in task_dict.keys() and task_dict["shutdown"]: + future_queue.task_done() + future_queue.join() + break + elif "future" in task_dict.keys(): + f = task_dict.pop("future") + if f.set_running_or_notify_cancel(): + try: + f.set_result( + subprocess.check_output( + *task_dict["args"], **task_dict["kwargs"] + ) + ) + except Exception as thread_exception: + future_queue.task_done() + f.set_exception(exception=thread_exception) + raise thread_exception + else: + future_queue.task_done() + else: + raise KeyError(task_dict) + + +class ShellStaticExecutor(ExecutorBase): + def __init__(self): + super().__init__() + self._process = RaisingThread( + target=execute_single_task, + kwargs={ + "future_queue": self._future_queue, + }, + ) + self._process.start() + + def submit(self, *args, **kwargs): + f = Future() + self._future_queue.put({"future": f, "args": args, "kwargs": kwargs}) + return f + + +class ShellExecutor(ExecutorBase): + def __init__( + self, + max_workers=1, + sleep_interval=0.1, + ): + super().__init__() + self._process = RaisingThread( + target=executor_broker, + kwargs={ + # Broker Arguments + "future_queue": self._future_queue, + "max_workers": max_workers, + "sleep_interval": sleep_interval, + "executor_class": ShellStaticExecutor, + }, + ) + self._process.start() + + def submit(self, *args, **kwargs): + f = Future() + self._future_queue.put({"future": f, "args": args, "kwargs": kwargs}) + return f diff --git a/tests/test_shell.py b/tests/test_shell.py new file mode 100644 index 00000000..76113b7e --- /dev/null +++ b/tests/test_shell.py @@ -0,0 +1,35 @@ +from unittest import TestCase + +from pympipool.shell.executor import ShellStaticExecutor, ShellExecutor + + +class StaticExecutorTest(TestCase): + def test_shell_static(self): + with ShellStaticExecutor() as exe: + future = exe.submit(["echo", "test"], universal_newlines=True) + self.assertFalse(future.done()) + self.assertEqual("test\n", future.result()) + self.assertTrue(future.done()) + + def test_shell(self): + with ShellExecutor(max_workers=2) as exe: + f_1 = exe.submit(["echo", "test_1"], universal_newlines=True) + f_2 = exe.submit(["echo", "test_2"], universal_newlines=True) + f_3 = exe.submit(["echo", "test_3"], universal_newlines=True) + f_4 = exe.submit(["echo", "test_4"], universal_newlines=True) + self.assertFalse(f_1.done()) + self.assertFalse(f_2.done()) + self.assertFalse(f_3.done()) + self.assertFalse(f_4.done()) + self.assertEqual("test_1\n", f_1.result()) + self.assertEqual("test_2\n", f_2.result()) + self.assertTrue(f_1.done()) + self.assertTrue(f_2.done()) + self.assertFalse(f_3.done()) + self.assertFalse(f_4.done()) + self.assertEqual("test_3\n", f_3.result()) + self.assertEqual("test_4\n", f_4.result()) + self.assertTrue(f_1.done()) + self.assertTrue(f_2.done()) + self.assertTrue(f_3.done()) + self.assertTrue(f_4.done()) From 1927a06a56d53f7775cac13d031b2d84c070cf35 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Tue, 21 Nov 2023 10:35:36 +0100 Subject: [PATCH 088/134] black formatting --- pympipool/shell/executor.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pympipool/shell/executor.py b/pympipool/shell/executor.py index fe37ac09..4667e195 100644 --- a/pympipool/shell/executor.py +++ b/pympipool/shell/executor.py @@ -1,9 +1,7 @@ from concurrent.futures import Future import subprocess -from pympipool.shared.executorbase import ( - executor_broker, ExecutorBase -) +from pympipool.shared.executorbase import executor_broker, ExecutorBase from pympipool.shared.thread import RaisingThread From 5b0cfc8bc4220143fbb60e74152505f3d873bc5b Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Tue, 21 Nov 2023 10:41:44 +0100 Subject: [PATCH 089/134] Add support for interactive executables --- pympipool/shell/interactive.py | 111 ++++++++++++++++++++++++++++++++ tests/executables/count.py | 13 ++++ tests/test_shell_interactive.py | 19 ++++++ 3 files changed, 143 insertions(+) create mode 100644 pympipool/shell/interactive.py create mode 100644 tests/executables/count.py create mode 100644 tests/test_shell_interactive.py diff --git a/pympipool/shell/interactive.py b/pympipool/shell/interactive.py new file mode 100644 index 00000000..acf7e295 --- /dev/null +++ b/pympipool/shell/interactive.py @@ -0,0 +1,111 @@ +from concurrent.futures import Future +import subprocess +from time import sleep + +from pympipool.shared.executorbase import cancel_items_in_queue, ExecutorBase +from pympipool.shared.thread import RaisingThread + + +def wait_for_process_to_stop(process, sleep_interval=10e-10): + while process.poll() is None: + sleep(sleep_interval) + + +def execute_single_task(future_queue): + process = None + while True: + task_dict = future_queue.get() + if "shutdown" in task_dict.keys() and task_dict["shutdown"]: + if process is not None and process.poll() is None: + process.stdin.flush() + process.stdin.close() + process.stdout.close() + process.stderr.close() + process.terminate() + wait_for_process_to_stop(process=process) + future_queue.task_done() + # future_queue.join() + break + elif "init" in task_dict.keys() and task_dict["init"]: + process = subprocess.Popen( + *task_dict["args"], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + **task_dict["kwargs"], + ) + elif "future" in task_dict.keys(): + if process is None: + raise ValueError("process not initialized") + elif process.poll() is None: + f = task_dict.pop("future") + if f.set_running_or_notify_cancel(): + try: + process.stdin.write(task_dict["input"]) + process.stdin.flush() + lines_count = 0 + output = "" + while True: + output_current = process.stdout.readline() + output += output_current + lines_count += 1 + if ( + task_dict["stop_read_pattern"] is not None + and task_dict["stop_read_pattern"] in output_current + ): + break + elif ( + task_dict["lines_to_read"] is not None + and task_dict["lines_to_read"] == lines_count + ): + break + f.set_result(output) + except Exception as thread_exception: + future_queue.task_done() + f.set_exception(exception=thread_exception) + raise thread_exception + else: + future_queue.task_done() + else: + raise ValueError("process exited") + + +class ShellInteractiveExecutor(ExecutorBase): + def __init__(self, *args, **kwargs): + super().__init__() + self._process = RaisingThread( + target=execute_single_task, + kwargs={ + "future_queue": self._future_queue, + }, + ) + self._process.start() + self._future_queue.put({"init": True, "args": args, "kwargs": kwargs}) + + def submit(self, string_input, lines_to_read=None, stop_read_pattern=None): + if lines_to_read is None and stop_read_pattern is None: + raise ValueError( + "Either the number of lines_to_read (int) or the stop_read_pattern (str) has to be defined." + ) + if string_input[-1:] != "\n": + string_input += "\n" + f = Future() + self._future_queue.put( + { + "future": f, + "input": string_input, + "lines_to_read": lines_to_read, + "stop_read_pattern": stop_read_pattern, + } + ) + return f + + def shutdown(self, wait=True, *, cancel_futures=False): + if cancel_futures: + cancel_items_in_queue(que=self._future_queue) + self._future_queue.put({"shutdown": True, "wait": wait}) + if wait: + self._process.join() + # self._future_queue.join() + self._process = None + self._future_queue = None diff --git a/tests/executables/count.py b/tests/executables/count.py new file mode 100644 index 00000000..3e65d51e --- /dev/null +++ b/tests/executables/count.py @@ -0,0 +1,13 @@ +def count(iterations): + for i in range(int(iterations)): + print(i) + print("done") + + +if __name__ == "__main__": + while True: + user_input = input() + if "shutdown" in user_input: + break + else: + count(iterations=int(user_input)) \ No newline at end of file diff --git a/tests/test_shell_interactive.py b/tests/test_shell_interactive.py new file mode 100644 index 00000000..84c997ea --- /dev/null +++ b/tests/test_shell_interactive.py @@ -0,0 +1,19 @@ +import os + +from unittest import TestCase + +from pympipool.shell.interactive import ShellInteractiveExecutor + + +class InteractiveExecutorTest(TestCase): + def test_shell_interactive(self): + executable_path = os.path.join(os.path.dirname(__file__), "executables", "count.py") + with ShellInteractiveExecutor(["python", executable_path], universal_newlines=True) as exe: + future_lines = exe.submit(string_input="4", lines_to_read=5, stop_read_pattern=None) + future_pattern = exe.submit(string_input="4", lines_to_read=None, stop_read_pattern="done") + self.assertFalse(future_lines.done()) + self.assertFalse(future_pattern.done()) + self.assertEqual("0\n1\n2\n3\ndone\n", future_lines.result()) + self.assertEqual("0\n1\n2\n3\ndone\n", future_pattern.result()) + self.assertTrue(future_lines.done()) + self.assertTrue(future_pattern.done()) From c2ada02bc59789c1dc6993087b35e4343e79653b Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Tue, 21 Nov 2023 10:43:41 +0100 Subject: [PATCH 090/134] Make shell scripts available through init --- pympipool/__init__.py | 2 ++ pympipool/shell/__init__.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/pympipool/__init__.py b/pympipool/__init__.py index 9753846b..dfd18e95 100644 --- a/pympipool/__init__.py +++ b/pympipool/__init__.py @@ -3,6 +3,8 @@ from ._version import get_versions from pympipool.mpi.executor import PyMPIExecutor from pympipool.shared.interface import SLURM_COMMAND +from pympipool.shell.executor import ShellExecutor +from pympipool.shell.interactive import ShellInteractiveExecutor from pympipool.slurm.executor import PySlurmExecutor try: # The PyFluxExecutor requires flux-core to be installed. diff --git a/pympipool/shell/__init__.py b/pympipool/shell/__init__.py index e69de29b..09f4b1cc 100644 --- a/pympipool/shell/__init__.py +++ b/pympipool/shell/__init__.py @@ -0,0 +1,2 @@ +from pympipool.shell.executor import ShellExecutor +from pympipool.shell.interactive import ShellInteractiveExecutor From a704d20a8b1d85fb8461b4c84ae9f768c904bb1c Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Tue, 21 Nov 2023 11:05:00 +0100 Subject: [PATCH 091/134] Add tests for internal functionality --- tests/test_shell.py | 21 +++++++++++++++++---- tests/test_shell_interactive.py | 30 +++++++++++++++++++++++++----- 2 files changed, 42 insertions(+), 9 deletions(-) diff --git a/tests/test_shell.py b/tests/test_shell.py index 76113b7e..ed9658d9 100644 --- a/tests/test_shell.py +++ b/tests/test_shell.py @@ -1,17 +1,30 @@ +from concurrent.futures import Future +import queue + from unittest import TestCase -from pympipool.shell.executor import ShellStaticExecutor, ShellExecutor +from pympipool.shell.executor import ShellStaticExecutor, ShellExecutor, execute_single_task + +class ShellExecutorTest(TestCase): + def test_execute_single_task(self): + test_queue = queue.Queue() + f = Future() + test_queue.put({"future": f, "args": [["echo", "test"]], "kwargs": {"universal_newlines": True}}) + test_queue.put({"shutdown": True}) + self.assertFalse(f.done()) + execute_single_task(future_queue=test_queue) + self.assertTrue(f.done()) + self.assertEqual("test\n", f.result()) -class StaticExecutorTest(TestCase): - def test_shell_static(self): + def test_shell_static_executor(self): with ShellStaticExecutor() as exe: future = exe.submit(["echo", "test"], universal_newlines=True) self.assertFalse(future.done()) self.assertEqual("test\n", future.result()) self.assertTrue(future.done()) - def test_shell(self): + def test_shell_executor(self): with ShellExecutor(max_workers=2) as exe: f_1 = exe.submit(["echo", "test_1"], universal_newlines=True) f_2 = exe.submit(["echo", "test_2"], universal_newlines=True) diff --git a/tests/test_shell_interactive.py b/tests/test_shell_interactive.py index 84c997ea..2f0406fe 100644 --- a/tests/test_shell_interactive.py +++ b/tests/test_shell_interactive.py @@ -1,14 +1,34 @@ +from concurrent.futures import Future import os +import queue from unittest import TestCase -from pympipool.shell.interactive import ShellInteractiveExecutor +from pympipool.shell.interactive import ShellInteractiveExecutor, execute_single_task -class InteractiveExecutorTest(TestCase): - def test_shell_interactive(self): - executable_path = os.path.join(os.path.dirname(__file__), "executables", "count.py") - with ShellInteractiveExecutor(["python", executable_path], universal_newlines=True) as exe: +class ShellInteractiveExecutorTest(TestCase): + def setUp(self): + self.executable_path = os.path.join(os.path.dirname(__file__), "executables", "count.py") + + def test_execute_single_task(self): + test_queue = queue.Queue() + future_lines = Future() + future_pattern = Future() + test_queue.put({"init": True, "args": [["python", self.executable_path]], "kwargs": {"universal_newlines": True}}) + test_queue.put({"future": future_lines, "input": "4\n", "lines_to_read": 5, "stop_read_pattern": None}) + test_queue.put({"future": future_pattern, "input": "4\n", "lines_to_read": None, "stop_read_pattern": "done"}) + test_queue.put({"shutdown": True}) + self.assertFalse(future_lines.done()) + self.assertFalse(future_pattern.done()) + execute_single_task(future_queue=test_queue) + self.assertTrue(future_lines.done()) + self.assertTrue(future_pattern.done()) + self.assertEqual("0\n1\n2\n3\ndone\n", future_lines.result()) + self.assertEqual("0\n1\n2\n3\ndone\n", future_pattern.result()) + + def test_shell_interactive_executor(self): + with ShellInteractiveExecutor(["python", self.executable_path], universal_newlines=True) as exe: future_lines = exe.submit(string_input="4", lines_to_read=5, stop_read_pattern=None) future_pattern = exe.submit(string_input="4", lines_to_read=None, stop_read_pattern="done") self.assertFalse(future_lines.done()) From 7c796e7ac9a95e75e4498a06e54b838098a0dfde Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Tue, 21 Nov 2023 13:04:13 +0100 Subject: [PATCH 092/134] Refactoring and adding documentation --- docs/source/examples.md | 87 +++++++++++++++++++ pympipool/__init__.py | 4 +- pympipool/shell/__init__.py | 4 +- pympipool/shell/executor.py | 41 ++++++++- pympipool/shell/interactive.py | 62 ++++++++++++- tests/test_shell_interactive.py | 4 +- ...t_shell.py => test_subprocess_executor.py} | 26 ++++-- 7 files changed, 212 insertions(+), 16 deletions(-) rename tests/{test_shell.py => test_subprocess_executor.py} (64%) diff --git a/docs/source/examples.md b/docs/source/examples.md index b63f8e18..3e69e749 100644 --- a/docs/source/examples.md +++ b/docs/source/examples.md @@ -304,3 +304,90 @@ As a result the GPUs available on the two compute nodes are reported: >>> ('/device:GPU:0', 'device: 0, name: Tesla V100S-PCIE-32GB, pci bus id: 0000:84:00.0, compute capability: 7.0', 'cn139')] ``` In this case each compute node `cn138` and `cn139` is equipped with one `Tesla V100S-PCIE-32GB`. + +## External Executables +While `pympipool` was initially designed for up-scaling python functions for HPC, the same functionality can be leveraged +to up-scale any executable independent of the programming language it is developed in. This approach follows the design +of the `flux.job.FluxExecutor` included in the [flux framework](https://flux-framework.org). In `pympipool` this approach +is extended to support any kind of subprocess, so it is no longer limited to the [flux framework](https://flux-framework.org). + +### Subprocess +Following the [`subprocess.check_output()`](https://docs.python.org/3/library/subprocess.html) interface of the standard +python libraries, any kind of command can be submitted to the `pympipool.SubprocessExecutor`. The command can either be +specified as a list `["echo", "test"]` in which the first entry is typically the executable followed by the corresponding +parameters or the command can be specified as a string `"echo test"` with the additional parameter `shell=True`. +``` +from pympipool import SubprocessExecutor + +with SubprocessExecutor(max_workers=2) as exe: + future = exe.submit(["echo", "test"], universal_newlines=True) + print(future.done(), future.result(), future.done()) +>>> (False, "test", True) +``` +In analogy to the previous examples the `SubprocessExecutor` class is directly imported from the `pympipool` module and +the maximum number of workers is set to two `max_workers=2`. In contrast to the `pympipool.Executor` class no other +settings to assign specific hardware to the command via the python interface are available in the `SubprocessExecutor` +class. To specify the hardware requirements for the individual commands, the user has to manually assign the resources +using the commands of the resource schedulers like `srun`, `flux run` or `mpiexec`. + +The `concurrent.futures.Future` object returned after submitting a command to the `pymipool.SubprocessExecutor` behaves +just like any other future object. It provides a `done()` function to check if the execution completed as well as a +`result()` function to return the output of the submitted command. + +In comparison to the `flux.job.FluxExecutor` included in the [flux framework](https://flux-framework.org) the +`pymipool.SubprocessExecutor` differs in two ways. One the `pymipool.SubprocessExecutor` does not provide any option for +resource assignment and two the `pymipool.SubprocessExecutor` returns the output of the command rather than just +returning the exit status when calling `result()`. + +### Interactive Shell +Beyond external executables which are called once with a set of input parameters and or input files and return one set +of outputs, there are some executables which allow the user to interact with the executable during the execution. The +challenge of interfacing a python process with such an interactive executable is to identify when the executable is ready +to receive the next input. A very basis example for an interactive executable is a script which counts to the number +input by the user. This can be written in python as `count.py`: +``` +def count(iterations): + for i in range(int(iterations)): + print(i) + print("done") + + +if __name__ == "__main__": + while True: + user_input = input() + if "shutdown" in user_input: + break + else: + count(iterations=int(user_input)) +``` +This example is challenging in terms of interfacing it with a python process as the length of the output changes depending +on the input. The first option that the `pympipool.ShellExecutor` provides is specifying the number of lines to read for +each call submitted to the executable using the `lines_to_read` parameter. In comparison to the `SubprocessExecutor` +defined above the `ShellExecutor` only supports the execution of a single executable at a time, correspondingly the input +parameters for calling the executable are provided at the time of initialization of the `ShellExecutor` and the inputs +are submitted using the `submit()` function: +``` +from pympipool import ShellExecutor + +with ShellExecutor(["python", "count.py"], universal_newlines=True) as exe: + future_lines = exe.submit(string_input="4", lines_to_read=5) + print(future_lines.done(), future_lines.result(), future_lines.done()) +>>> (False, "0\n1\n2\n3\ndone\n", True) +``` +The response for a given set of input is again returned as `concurrent.futures.Future` object, this allows the user to +execute other steps on the python side while waiting for the completion of the external executable. In this case the +example counts the numbers from `0` to `3` and prints each of them in one line followed by `done` to notify the user its +waiting for new inputs. This results in `n+1` lines of output for the input of `n`. Still predicting the number of lines +for a given input can be challenging, so the `pympipool.ShellExecutor` class also provides the option to wait until a +specific pattern is found in the output using the `stop_read_pattern`: +``` +from pympipool import ShellExecutor + +with ShellExecutor(["python", "count.py"], universal_newlines=True) as exe: + future_pattern = exe.submit(string_input="4", stop_read_pattern="done") + print(future_pattern.done(), future_pattern.result(), future_pattern.done()) +>>> (False, "0\n1\n2\n3\ndone\n", True) +``` +In this example the pattern simply searches for the string `done` in the output of the program and returns all the output +gathered from the executable since the last input as the result of the `concurrent.futures.Future` object returned after +the submission of the interactive command. \ No newline at end of file diff --git a/pympipool/__init__.py b/pympipool/__init__.py index dfd18e95..dc0bbf68 100644 --- a/pympipool/__init__.py +++ b/pympipool/__init__.py @@ -3,8 +3,8 @@ from ._version import get_versions from pympipool.mpi.executor import PyMPIExecutor from pympipool.shared.interface import SLURM_COMMAND -from pympipool.shell.executor import ShellExecutor -from pympipool.shell.interactive import ShellInteractiveExecutor +from pympipool.shell.executor import SubprocessExecutor +from pympipool.shell.interactive import ShellExecutor from pympipool.slurm.executor import PySlurmExecutor try: # The PyFluxExecutor requires flux-core to be installed. diff --git a/pympipool/shell/__init__.py b/pympipool/shell/__init__.py index 09f4b1cc..3086c26f 100644 --- a/pympipool/shell/__init__.py +++ b/pympipool/shell/__init__.py @@ -1,2 +1,2 @@ -from pympipool.shell.executor import ShellExecutor -from pympipool.shell.interactive import ShellInteractiveExecutor +from pympipool.shell.executor import SubprocessExecutor +from pympipool.shell.interactive import ShellExecutor diff --git a/pympipool/shell/executor.py b/pympipool/shell/executor.py index 4667e195..5b72d1ed 100644 --- a/pympipool/shell/executor.py +++ b/pympipool/shell/executor.py @@ -6,6 +6,12 @@ def execute_single_task(future_queue): + """ + Process items received via the queue. + + Args: + future_queue (queue.Queue): + """ while True: task_dict = future_queue.get() if "shutdown" in task_dict.keys() and task_dict["shutdown"]: @@ -31,7 +37,10 @@ def execute_single_task(future_queue): raise KeyError(task_dict) -class ShellStaticExecutor(ExecutorBase): +class SubprocessSingleExecutor(ExecutorBase): + """ + The pympipool.shell.SubprocessSingleExecutor is the internal worker for the pympipool.shell.SubprocessExecutor. + """ def __init__(self): super().__init__() self._process = RaisingThread( @@ -48,7 +57,26 @@ def submit(self, *args, **kwargs): return f -class ShellExecutor(ExecutorBase): +class SubprocessExecutor(ExecutorBase): + """ + The pympipool.shell.SubprocessExecutor enables the submission of command line calls via the subprocess.check_output() + interface of the python standard library. It is based on the concurrent.futures.Executor class and returns a + concurrent.futures.Future object for every submitted command line call. Still it does not provide any option to + interact with the external executable during the execution. + + Args: + max_workers (int): defines the number workers which can execute functions in parallel + sleep_interval (float): synchronization interval - default 0.1 + + Examples: + + >>> from pympipool import SubprocessExecutor + >>> with SubprocessExecutor(max_workers=2) as exe: + >>> future = exe.submit(["echo", "test"], universal_newlines=True) + >>> print(future.done(), future.result(), future.done()) + (False, "test", True) + + """ def __init__( self, max_workers=1, @@ -62,12 +90,19 @@ def __init__( "future_queue": self._future_queue, "max_workers": max_workers, "sleep_interval": sleep_interval, - "executor_class": ShellStaticExecutor, + "executor_class": SubprocessSingleExecutor, }, ) self._process.start() def submit(self, *args, **kwargs): + """ + Submit a command line call to be executed. The given arguments are provided to subprocess.Popen() as additional + inputs to control the execution. + + Returns: + A Future representing the given call. + """ f = Future() self._future_queue.put({"future": f, "args": args, "kwargs": kwargs}) return f diff --git a/pympipool/shell/interactive.py b/pympipool/shell/interactive.py index acf7e295..f990c8e3 100644 --- a/pympipool/shell/interactive.py +++ b/pympipool/shell/interactive.py @@ -7,11 +7,24 @@ def wait_for_process_to_stop(process, sleep_interval=10e-10): + """ + Wait for the subprocess.Popen() process to stop executing + + Args: + process (subprocess.Popen): process object + sleep_interval (float): interval to sleep during poll() calls + """ while process.poll() is None: sleep(sleep_interval) def execute_single_task(future_queue): + """ + Process items received via the queue. + + Args: + future_queue (queue.Queue): + """ process = None while True: task_dict = future_queue.get() @@ -70,7 +83,27 @@ def execute_single_task(future_queue): raise ValueError("process exited") -class ShellInteractiveExecutor(ExecutorBase): +class ShellExecutor(ExecutorBase): + """ + In contrast to the other pympipool.shell.SubprocessExecutor and the pympipool.Executor the pympipool.shell.ShellExecutor + can only execute a single process at a given time. Still it adds the capability to interact with this process during + its execution. The initialization of the pympipool.shell.ShellExecutor takes the same input arguments as the + subprocess.Popen() call for the standard library to start a subprocess. + + Examples + + >>> from pympipool import ShellExecutor + >>> with ShellExecutor(["python", "count.py"], universal_newlines=True) as exe: + >>> future_lines = exe.submit(string_input="4", lines_to_read=5) + >>> print(future_lines.done(), future_lines.result(), future_lines.done()) + (False, "0\n1\n2\n3\ndone\n", True) + + >>> from pympipool import ShellExecutor + >>> with ShellExecutor(["python", "count.py"], universal_newlines=True) as exe: + >>> future_pattern = exe.submit(string_input="4", stop_read_pattern="done") + >>> print(future_pattern.done(), future_pattern.result(), future_pattern.done()) + (False, "0\n1\n2\n3\ndone\n", True) + """ def __init__(self, *args, **kwargs): super().__init__() self._process = RaisingThread( @@ -83,6 +116,20 @@ def __init__(self, *args, **kwargs): self._future_queue.put({"init": True, "args": args, "kwargs": kwargs}) def submit(self, string_input, lines_to_read=None, stop_read_pattern=None): + """ + Submit the input as a string to the executable. In addition to the input the ShellExecutor also needs a measure + to identify the completion of the execution. This can either be provided based on the number of lines to read + using the `lines_to_read` parameter or by providing a string pattern using the `stop_read_pattern` to stop + reading new lines. One of these two stopping criteria has to be defined. + + Args: + string_input (str): Input to be communicated to the underlying executable + lines_to_read (None/int): integer number of lines to read from the command line (optional) + stop_read_pattern (None/str): string pattern to indicate the command line output is completed (optional) + + Returns: + A Future representing the given call. + """ if lines_to_read is None and stop_read_pattern is None: raise ValueError( "Either the number of lines_to_read (int) or the stop_read_pattern (str) has to be defined." @@ -101,6 +148,19 @@ def submit(self, string_input, lines_to_read=None, stop_read_pattern=None): return f def shutdown(self, wait=True, *, cancel_futures=False): + """Clean-up the resources associated with the Executor. + + It is safe to call this method several times. Otherwise, no other + methods can be called after this one. + + Args: + wait: If True then shutdown will not return until all running + futures have finished executing and the resources used by the + parallel_executors have been reclaimed. + cancel_futures: If True then shutdown will cancel all pending + futures. Futures that are completed or running will not be + cancelled. + """ if cancel_futures: cancel_items_in_queue(que=self._future_queue) self._future_queue.put({"shutdown": True, "wait": wait}) diff --git a/tests/test_shell_interactive.py b/tests/test_shell_interactive.py index 2f0406fe..3c98cd6a 100644 --- a/tests/test_shell_interactive.py +++ b/tests/test_shell_interactive.py @@ -4,7 +4,7 @@ from unittest import TestCase -from pympipool.shell.interactive import ShellInteractiveExecutor, execute_single_task +from pympipool.shell.interactive import ShellExecutor, execute_single_task class ShellInteractiveExecutorTest(TestCase): @@ -28,7 +28,7 @@ def test_execute_single_task(self): self.assertEqual("0\n1\n2\n3\ndone\n", future_pattern.result()) def test_shell_interactive_executor(self): - with ShellInteractiveExecutor(["python", self.executable_path], universal_newlines=True) as exe: + with ShellExecutor(["python", self.executable_path], universal_newlines=True) as exe: future_lines = exe.submit(string_input="4", lines_to_read=5, stop_read_pattern=None) future_pattern = exe.submit(string_input="4", lines_to_read=None, stop_read_pattern="done") self.assertFalse(future_lines.done()) diff --git a/tests/test_shell.py b/tests/test_subprocess_executor.py similarity index 64% rename from tests/test_shell.py rename to tests/test_subprocess_executor.py index ed9658d9..209dffb2 100644 --- a/tests/test_shell.py +++ b/tests/test_subprocess_executor.py @@ -3,10 +3,10 @@ from unittest import TestCase -from pympipool.shell.executor import ShellStaticExecutor, ShellExecutor, execute_single_task +from pympipool.shell.executor import SubprocessSingleExecutor, SubprocessExecutor, execute_single_task -class ShellExecutorTest(TestCase): +class SubprocessExecutorTest(TestCase): def test_execute_single_task(self): test_queue = queue.Queue() f = Future() @@ -17,15 +17,29 @@ def test_execute_single_task(self): self.assertTrue(f.done()) self.assertEqual("test\n", f.result()) - def test_shell_static_executor(self): - with ShellStaticExecutor() as exe: - future = exe.submit(["echo", "test"], universal_newlines=True) + def test_shell_static_executor_args(self): + with SubprocessSingleExecutor() as exe: + future = exe.submit(["echo", "test"], universal_newlines=True, shell=False) + self.assertFalse(future.done()) + self.assertEqual("test\n", future.result()) + self.assertTrue(future.done()) + + def test_shell_static_executor_binary(self): + with SubprocessSingleExecutor() as exe: + future = exe.submit(["echo", "test"], universal_newlines=False, shell=False) + self.assertFalse(future.done()) + self.assertEqual(b"test\n", future.result()) + self.assertTrue(future.done()) + + def test_shell_static_executor_shell(self): + with SubprocessSingleExecutor() as exe: + future = exe.submit("echo test", universal_newlines=True, shell=True) self.assertFalse(future.done()) self.assertEqual("test\n", future.result()) self.assertTrue(future.done()) def test_shell_executor(self): - with ShellExecutor(max_workers=2) as exe: + with SubprocessExecutor(max_workers=2) as exe: f_1 = exe.submit(["echo", "test_1"], universal_newlines=True) f_2 = exe.submit(["echo", "test_2"], universal_newlines=True) f_3 = exe.submit(["echo", "test_3"], universal_newlines=True) From c9ae294a9eca5d8af89735083abdf000cfb6901d Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Tue, 21 Nov 2023 13:11:51 +0100 Subject: [PATCH 093/134] update example notebook --- notebooks/examples.ipynb | 838 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 837 insertions(+), 1 deletion(-) diff --git a/notebooks/examples.ipynb b/notebooks/examples.ipynb index 90596a0b..f5d61df8 100644 --- a/notebooks/examples.ipynb +++ b/notebooks/examples.ipynb @@ -1 +1,837 @@ -{"metadata":{"kernelspec":{"name":"flux","display_name":"Flux","language":"python"},"language_info":{"name":"python","version":"3.11.6","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat_minor":5,"nbformat":4,"cells":[{"cell_type":"markdown","source":"# Examples\nThe `pympipool.Executor` extends the interface of the [`concurrent.futures.Executor`](https://docs.python.org/3/library/concurrent.futures.html#module-concurrent.futures)\nto simplify the up-scaling of individual functions in a given workflow.","metadata":{},"id":"19bad499-5a97-425c-beec-dcd88d693d4c"},{"cell_type":"markdown","source":"## Compatibility\nStarting with the basic example of `1+1=2`. With the `ThreadPoolExecutor` from the [`concurrent.futures`](https://docs.python.org/3/library/concurrent.futures.html#module-concurrent.futures)\nstandard library this can be written as: ","metadata":{},"id":"f752ec8d-50b8-46fb-86f2-08a9126f1a39"},{"cell_type":"code","source":"from concurrent.futures import ThreadPoolExecutor","metadata":{"trusted":true},"execution_count":1,"outputs":[],"id":"584cd590-acaf-48d7-a5b5-e4049a9626b7"},{"cell_type":"code","source":"with ThreadPoolExecutor(\n max_workers=1,\n) as exe:\n future = exe.submit(sum, [1, 1])\n print(future.result())","metadata":{"trusted":true},"execution_count":2,"outputs":[{"name":"stdout","text":"2\n","output_type":"stream"}],"id":"73673e42-2c68-4b91-b6ff-db1ecb2c0587"},{"cell_type":"markdown","source":"In this case `max_workers=1` limits the number of threads uses by the `ThreadPoolExecutor` to one. Then the `sum()` \nfunction is submitted to the executor with a list with two ones `[1, 1]` as input. A [`concurrent.futures.Future`](https://docs.python.org/3/library/concurrent.futures.html#module-concurrent.futures)\nobject is returned. The `Future` object allows to check the status of the execution with the `done()` method which \nreturns `True` or `False` depending on the state of the execution. Or the main process can wait until the execution is \ncompleted by calling `result()`. ","metadata":{},"id":"32156314-02a9-4210-8a8c-94afe09b64f6"},{"cell_type":"markdown","source":"The result of the calculation is `1+1=2`. ","metadata":{},"id":"b750d830-bd0d-4474-9f70-913d0b9d6b8a"},{"cell_type":"markdown","source":"The `pympipool.Executor` class extends the interface of the [`concurrent.futures.Executor`](https://docs.python.org/3/library/concurrent.futures.html#module-concurrent.futures) \nclass by providing more parameters to specify the level of parallelism. In addition, to specifying the maximum number \nof workers `max_workers` the user can also specify the number of cores per worker `cores_per_worker` for MPI based \nparallelism, the number of threads per core `threads_per_core` for thread based parallelism and the number of GPUs per\nworker `gpus_per_worker`. Finally, for those backends which support over-subscribing this can also be enabled using the \n`oversubscribe` parameter. All these parameters are optional, so the `pympipool.Executor` can be used as a drop-in \nreplacement for the [`concurrent.futures.Executor`](https://docs.python.org/3/library/concurrent.futures.html#module-concurrent.futures).","metadata":{},"id":"4fbf72a2-0e0e-43ce-be8f-db3489c4eafe"},{"cell_type":"markdown","source":"The previous example is rewritten for the `pympipool.Executor`:","metadata":{},"id":"9b5a26e2-3d18-4778-ba10-e3e213b70433"},{"cell_type":"code","source":"from pympipool import Executor ","metadata":{"trusted":true},"execution_count":3,"outputs":[],"id":"60373c38-63f8-48dc-be0f-ddb71ebf88f8"},{"cell_type":"code","source":"with Executor(\n max_workers=1, \n cores_per_worker=1, \n threads_per_core=1, \n gpus_per_worker=0, \n oversubscribe=False\n) as exe:\n future = exe.submit(sum, [1,1])\n print(future.result())","metadata":{"trusted":true},"execution_count":4,"outputs":[{"name":"stdout","text":"2\n","output_type":"stream"}],"id":"fd755b28-ff01-4530-9099-001cac151e31"},{"cell_type":"markdown","source":"The result of the calculation is again `1+1=2`.","metadata":{},"id":"44c4bc4b-cf97-461e-98e7-62bcdb8caff2"},{"cell_type":"markdown","source":"Beyond pre-defined functions like the `sum()` function, the same functionality can be used to submit user-defined \nfunctions. In the following example a custom summation function is defined: ","metadata":{},"id":"331aed93-806a-4057-ab9c-19479190f472"},{"cell_type":"code","source":"def calc(*args):\n return sum(*args)","metadata":{"trusted":true},"execution_count":5,"outputs":[],"id":"cdeb8710-b328-463d-a436-82d6756e76b3"},{"cell_type":"markdown","source":"In contrast to the previous example where just a single function was submitted to a single worker, in this case a total\nof four functions is submitted to a group of two workers `max_workers=2`. Consequently, the functions are executed as a\nset of two pairs. ","metadata":{},"id":"d5efa995-d4d4-4f9c-a7e6-38dd66143535"},{"cell_type":"code","source":"with Executor(max_workers=2) as exe:\n fs_1 = exe.submit(calc, [2, 1])\n fs_2 = exe.submit(calc, [2, 2])\n fs_3 = exe.submit(calc, [2, 3])\n fs_4 = exe.submit(calc, [2, 4])\n print([\n fs_1.result(), \n fs_2.result(), \n fs_3.result(), \n fs_4.result(),\n ])","metadata":{"trusted":true},"execution_count":6,"outputs":[{"name":"stdout","text":"[3, 4, 5, 6]\n","output_type":"stream"}],"id":"82033832-7ccd-4c67-a1fb-57f55710b77c"},{"cell_type":"markdown","source":"The snippet can be executed with any python interpreter. It returns the corresponding sums as expected. The same can be achieved with the built-in [`concurrent.futures.Executor`](https://docs.python.org/3/library/concurrent.futures.html#module-concurrent.futures)\nclasses. Still one advantage of using the `pympipool.Executor` rather than the built-in ones, is the ability to execute \nthe same commands in interactive environments like [Jupyter notebooks](https://jupyter.org). This is achieved by using \n[cloudpickle](https://github.com/cloudpipe/cloudpickle) to serialize the python function and its parameters rather than\nthe regular pickle package. ","metadata":{},"id":"86838528-312e-46cc-b022-0c946bf95037"},{"cell_type":"markdown","source":"For backwards compatibility with the [`multiprocessing.Pool`](https://docs.python.org/3/library/multiprocessing.html) \nclass the [`concurrent.futures.Executor`](https://docs.python.org/3/library/concurrent.futures.html#module-concurrent.futures)\nalso implements the `map()` function to map a series of inputs to a function. The same `map()` function is also \navailable in the `pympipool.Executor`: ","metadata":{},"id":"4de690ed-661c-4e6e-a97e-c478393d0dc6"},{"cell_type":"code","source":"with Executor(max_workers=2) as exe:\n print(list(exe.map(calc, [[2, 1], [2, 2], [2, 3], [2, 4]])))","metadata":{"trusted":true},"execution_count":7,"outputs":[{"name":"stdout","text":"[3, 4, 5, 6]\n","output_type":"stream"}],"id":"3f06b0c1-5ee1-40c5-82ab-31d77cfcdb46"},{"cell_type":"markdown","source":"The results remain the same. ","metadata":{},"id":"a5d0f249-23bb-4727-8b09-87320ecb98eb"},{"cell_type":"markdown","source":"## Data Handling\nA limitation of many parallel approaches is the overhead in communication when working with large datasets. Instead of\nreading the same dataset repetitively, the `pympipool.Executor` loads the dataset only once per worker and afterwards \neach function submitted to this worker has access to the dataset, as it is already loaded in memory. To achieve this\nthe user defines an initialization function `init_function` which returns a dictionary with one key per dataset. The \nkeys of the dictionary can then be used as additional input parameters in each function submitted to the `pympipool.Executor`.\nThis functionality is illustrated in the following example: ","metadata":{},"id":"580b00ee-6d5b-4ca9-ba36-ff70128c0b6b"},{"cell_type":"code","source":"def calc(i, j, k):\n return i + j + k","metadata":{"trusted":true},"execution_count":8,"outputs":[],"id":"8fe8c750-4dc5-4b26-ad8d-9f755bff3494"},{"cell_type":"code","source":"def init_function():\n return {\"j\": 4, \"k\": 3, \"l\": 2}","metadata":{"trusted":true},"execution_count":9,"outputs":[],"id":"5f943266-1bee-421e-a1b4-583d222b1c99"},{"cell_type":"code","source":"with Executor(max_workers=1, init_function=init_function) as exe:\n fs = exe.submit(calc, 2, j=5)\n print(fs.result())","metadata":{"trusted":true},"execution_count":10,"outputs":[{"name":"stdout","text":"10\n","output_type":"stream"}],"id":"0debe907-b646-4fd5-bae7-46b16645d2f3"},{"cell_type":"markdown","source":"The function `calc()` requires three inputs `i`, `j` and `k`. But when the function is submitted to the executor only \ntwo inputs are provided `fs = exe.submit(calc, 2, j=5)`. In this case the first input parameter is mapped to `i=2`, the\nsecond input parameter is specified explicitly `j=5` but the third input parameter `k` is not provided. So the \n`pympipool.Executor` automatically checks the keys set in the `init_function()` function. In this case the returned \ndictionary `{\"j\": 4, \"k\": 3, \"l\": 2}` defines `j=4`, `k=3` and `l=2`. For this specific call of the `calc()` function,\n`i` and `j` are already provided so `j` is not required, but `k=3` is used from the `init_function()` and as the `calc()`\nfunction does not define the `l` parameter this one is also ignored. ","metadata":{},"id":"72fa803a-ace0-41ea-8090-d64dfd0797cc"},{"cell_type":"markdown","source":"The result is `2+5+3=10` as `i=2` and `j=5` are provided during the submission and `k=3` is defined in the `init_function()`\nfunction.","metadata":{},"id":"1443d216-1add-445a-a662-5b16af6c1443"},{"cell_type":"markdown","source":"## Up-Scaling \nThe availability of certain features depends on the backend `pympipool` is installed with. In particular the thread \nbased parallelism and the GPU assignment is only available with the `pympipool.slurm.PySlurmExecutor` or the \n`pympipool.flux.PyFluxExecutor` backend. The latter is recommended based on the easy installation, the faster allocation \nof resources as the resources are managed within the allocation and no central databases is used and the superior level \nof fine-grained resource assignment which is typically not available on other HPC resource schedulers including the\n[SLURM workload manager](https://www.schedmd.com). The `pympipool.flux.PyFluxExecutor` requires \n[flux framework](https://flux-framework.org) to be installed in addition to the `pympipool` package. The features are \nsummarized in the table below: \n\n| Feature \\ Backend | `PyMpiExecutor` | `PySlurmExecutor` | `PyFluxExecutor` |\n|:--------------------------:|:---------------:|:-----------------:|:----------------:|\n| Thread based parallelism | no | yes | yes | \n| MPI based parallelism | yes | yes | yes |\n| GPU assignment | no | yes | yes |\n| Resource over-subscription | yes | yes | no |\n| Scalability | 1 node | ~100 nodes | no limit |","metadata":{},"id":"8d1e21ec-0b8d-45bf-bfb1-62b3df8e242a"},{"cell_type":"markdown","source":"### Thread-based Parallelism\nThe number of threads per core can be controlled with the `threads_per_core` parameter during the initialization of the \n`pympipool.Executor`. Unfortunately, there is no uniform way to control the number of cores a given underlying library \nuses for thread based parallelism, so it might be necessary to set certain environment variables manually: \n\n* `OMP_NUM_THREADS`: for openmp\n* `OPENBLAS_NUM_THREADS`: for openblas\n* `MKL_NUM_THREADS`: for mkl\n* `VECLIB_MAXIMUM_THREADS`: for accelerate on Mac Os X\n* `NUMEXPR_NUM_THREADS`: for numexpr\n\nAt the current stage `pympipool.Executor` does not set these parameters itself, so you have to add them in the function\nyou submit before importing the corresponding library: ","metadata":{},"id":"f3b9cc80-70ed-4bc8-abf9-62ecbd70b960"},{"cell_type":"code","source":"def calc(i):\n import os\n os.environ[\"OMP_NUM_THREADS\"] = \"2\"\n os.environ[\"OPENBLAS_NUM_THREADS\"] = \"2\"\n os.environ[\"MKL_NUM_THREADS\"] = \"2\"\n os.environ[\"VECLIB_MAXIMUM_THREADS\"] = \"2\"\n os.environ[\"NUMEXPR_NUM_THREADS\"] = \"2\"\n import numpy as np\n return i","metadata":{"trusted":true},"execution_count":11,"outputs":[],"id":"fbf5f7b2-eb3e-4a81-bae8-e429747300a0"},{"cell_type":"markdown","source":"Most modern CPUs use hyper-threading to present the operating system with double the number of virtual cores compared to\nthe number of physical cores available. So unless this functionality is disabled `threads_per_core=2` is a reasonable \ndefault. Just be careful if the number of threads is not specified it is possible that all workers try to access all \ncores at the same time which can lead to poor performance. So it is typically a good idea to monitor the CPU utilization\nwith increasing number of workers. ","metadata":{},"id":"334619d0-8d95-419e-885c-e5bc05747584"},{"cell_type":"markdown","source":"Specific manycore CPU models like the Intel Xeon Phi processors provide a much higher hyper-threading ration and require\na higher number of threads per core for optimal performance. ","metadata":{},"id":"7c3146c1-8722-4b67-ab21-c250b8e7c9dd"},{"cell_type":"markdown","source":"### MPI Parallel Python Functions\nBeyond thread based parallelism, the message passing interface (MPI) is the de facto standard parallel execution in \nscientific computing and the [`mpi4py`](https://mpi4py.readthedocs.io) bindings to the MPI libraries are commonly used\nto parallelize existing workflows. The limitation of this approach is that it requires the whole code to adopt the MPI\ncommunication standards to coordinate the way how information is distributed. Just like the `pympipool.Executor` the \n[`mpi4py.futures.MPIPoolExecutor`](https://mpi4py.readthedocs.io/en/stable/mpi4py.futures.html#mpipoolexecutor) \nimplements the [`concurrent.futures.Executor`](https://docs.python.org/3/library/concurrent.futures.html#module-concurrent.futures)\ninterface. Still in this case eah python function submitted to the executor is still limited to serial execution. The\nnovel approach of the `pympipool.Executor` is mixing these two types of parallelism. Individual functions can use\nthe [`mpi4py`](https://mpi4py.readthedocs.io) library to handle the parallel execution within the context of this \nfunction while these functions can still me submitted to the `pympipool.Executor` just like any other function. The\nadvantage of this approach is that the users can parallelize their workflows one function at the time. \n\nThe following example illustrates the submission of a simple MPI parallel python function: ","metadata":{},"id":"b4976d45-0f4e-496c-8173-9631f512135b"},{"cell_type":"code","source":"def calc(i):\n from mpi4py import MPI\n size = MPI.COMM_WORLD.Get_size()\n rank = MPI.COMM_WORLD.Get_rank()\n return i, size, rank","metadata":{"trusted":true},"execution_count":12,"outputs":[],"id":"cfa072a4-f88f-45b0-be94-a78f0edad513"},{"cell_type":"code","source":"with Executor(cores_per_worker=2) as exe:\n fs = exe.submit(calc, 3)\n print(fs.result())","metadata":{"trusted":true},"execution_count":13,"outputs":[{"name":"stdout","text":"[(3, 2, 0), (3, 2, 1)]\n","output_type":"stream"}],"id":"fd036b03-085d-4850-b11e-537c8fd476d5"},{"cell_type":"markdown","source":"The `calc()` function initializes the [`mpi4py`](https://mpi4py.readthedocs.io) library and gathers the size of the \nallocation and the rank of the current process within the MPI allocation. This function is then submitted to an \n`pympipool.Executor` which is initialized with a single worker with two cores `cores_per_worker=2`. So each function\ncall is going to have access to two cores. \n\nJust like before the script can be called with any python interpreter even though it is using the [`mpi4py`](https://mpi4py.readthedocs.io)\nlibrary in the background it is not necessary to execute the script with `mpiexec` or `mpirun`.","metadata":{},"id":"b4f97426-d8fb-42ef-98ca-135054bd39a7"},{"cell_type":"markdown","source":"The response consists of a list of two tuples, one for each MPI parallel process, with the first entry of the tuple \nbeing the parameter `i=3`, followed by the number of MPI parallel processes assigned to the function call `cores_per_worker=2`\nand finally the index of the specific process `0` or `1`. ","metadata":{},"id":"69dcfdcb-41db-4c3b-a1c5-07ff3be0c9a0"},{"cell_type":"markdown","source":"### GPU Assignment\nWith the rise of machine learning applications, the use of GPUs for scientific application becomes more and more popular.\nConsequently, it is essential to have full control over the assignment of GPUs to specific python functions. In the \nfollowing example the `tensorflow` library is used to identify the GPUs and return their configuration: ","metadata":{},"id":"dc41f241-663c-474e-ae1e-b2365389bc90"},{"cell_type":"raw","source":"import socket\nfrom tensorflow.python.client import device_lib","metadata":{},"id":"6ac9630b-4ab5-4f7f-bf55-812e8189da4f"},{"cell_type":"code","source":"def get_available_gpus():\n local_device_protos = device_lib.list_local_devices()\n return [\n (x.name, x.physical_device_desc, socket.gethostname()) \n for x in local_device_protos if x.device_type == 'GPU'\n ]","metadata":{"trusted":true},"execution_count":14,"outputs":[],"id":"998138f5-f0cb-47a7-ba36-7594b8ec41fc"},{"cell_type":"raw","source":"with Executor(\n max_workers=2, \n gpus_per_worker=1, \n) as exe:\n fs_1 = exe.submit(get_available_gpus)\n fs_2 = exe.submit(get_available_gpus)\n print(fs_1.result(), fs_2.result())","metadata":{},"id":"9d33af22-7b90-4ff7-a434-9c4cd9a930d5"},{"cell_type":"markdown","source":"The additional parameter `gpus_per_worker=1` specifies that one GPU is assigned to each worker. This functionality \nrequires `pympipool` to be connected to a resource manager like the [SLURM workload manager](https://www.schedmd.com)\nor preferably the [flux framework](https://flux-framework.org). The rest of the script follows the previous examples, \nas two functions are submitted and the results are printed. ","metadata":{},"id":"8dc7a989-908a-48a6-8d06-ac1e24173f5c"},{"cell_type":"markdown","source":"To clarify the execution of such an example on a high performance computing (HPC) cluster using the [SLURM workload manager](https://www.schedmd.com)\nthe submission script is given below: ","metadata":{},"id":"d1a17c6c-41ee-4595-913e-4af7272010a5"},{"cell_type":"raw","source":"#!/bin/bash\n#SBATCH --nodes=2\n#SBATCH --gpus-per-node=1\n#SBATCH --get-user-env=L\n\npython test_gpu.py","metadata":{},"id":"11ce332b-d2c1-4434-84c4-1e523e430848"},{"cell_type":"markdown","source":"The important part is that for using the `pympipool.slurm.PySlurmExecutor` backend the script `test_gpu.py` does not \nneed to be executed with `srun` but rather it is sufficient to just execute it with the python interpreter. `pympipool`\ninternally calls `srun` to assign the individual resources to a given worker. ","metadata":{},"id":"14bf6228-db64-406b-b04f-4d23daaa836d"},{"cell_type":"markdown","source":"For the more complex setup of running the [flux framework](https://flux-framework.org) as a secondary resource scheduler\nwithin the [SLURM workload manager](https://www.schedmd.com) it is essential that the resources are passed from the \n[SLURM workload manager](https://www.schedmd.com) to the [flux framework](https://flux-framework.org). This is achieved\nby calling `srun flux start` in the submission script: ","metadata":{},"id":"66e3be02-c11c-4053-9600-6bcfefefb127"},{"cell_type":"raw","source":"#!/bin/bash\n#SBATCH --nodes=2\n#SBATCH --gpus-per-node=1\n#SBATCH --get-user-env=L\n\nsrun flux start python test_gpu.py","metadata":{},"id":"aa0e2abf-7ab2-464f-a341-b93f91fbdd99"},{"cell_type":"markdown","source":"As a result the GPUs available on the two compute nodes are reported: ","metadata":{},"id":"6c84fb7d-4285-4d73-8f1e-7cb88050eb85"},{"cell_type":"raw","source":">>> [('/device:GPU:0', 'device: 0, name: Tesla V100S-PCIE-32GB, pci bus id: 0000:84:00.0, compute capability: 7.0', 'cn138'),\n>>> ('/device:GPU:0', 'device: 0, name: Tesla V100S-PCIE-32GB, pci bus id: 0000:84:00.0, compute capability: 7.0', 'cn139')]","metadata":{},"id":"a431e015-a309-49ac-9f10-756bda0177fc"},{"cell_type":"markdown","source":"In this case each compute node `cn138` and `cn139` is equipped with one `Tesla V100S-PCIE-32GB`.","metadata":{},"id":"70eb9a19-325e-4179-a196-4417e3f30e19"}]} \ No newline at end of file +{ + "cells": [ + { + "cell_type": "markdown", + "id": "19bad499-5a97-425c-beec-dcd88d693d4c", + "metadata": {}, + "source": [ + "# Examples\n", + "The `pympipool.Executor` extends the interface of the [`concurrent.futures.Executor`](https://docs.python.org/3/library/concurrent.futures.html#module-concurrent.futures)\n", + "to simplify the up-scaling of individual functions in a given workflow." + ] + }, + { + "cell_type": "markdown", + "id": "f752ec8d-50b8-46fb-86f2-08a9126f1a39", + "metadata": {}, + "source": [ + "## Compatibility\n", + "Starting with the basic example of `1+1=2`. With the `ThreadPoolExecutor` from the [`concurrent.futures`](https://docs.python.org/3/library/concurrent.futures.html#module-concurrent.futures)\n", + "standard library this can be written as: " + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "584cd590-acaf-48d7-a5b5-e4049a9626b7", + "metadata": {}, + "outputs": [], + "source": [ + "from concurrent.futures import ThreadPoolExecutor" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "73673e42-2c68-4b91-b6ff-db1ecb2c0587", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2\n" + ] + } + ], + "source": [ + "with ThreadPoolExecutor(\n", + " max_workers=1,\n", + ") as exe:\n", + " future = exe.submit(sum, [1, 1])\n", + " print(future.result())" + ] + }, + { + "cell_type": "markdown", + "id": "32156314-02a9-4210-8a8c-94afe09b64f6", + "metadata": {}, + "source": [ + "In this case `max_workers=1` limits the number of threads uses by the `ThreadPoolExecutor` to one. Then the `sum()` \n", + "function is submitted to the executor with a list with two ones `[1, 1]` as input. A [`concurrent.futures.Future`](https://docs.python.org/3/library/concurrent.futures.html#module-concurrent.futures)\n", + "object is returned. The `Future` object allows to check the status of the execution with the `done()` method which \n", + "returns `True` or `False` depending on the state of the execution. Or the main process can wait until the execution is \n", + "completed by calling `result()`. " + ] + }, + { + "cell_type": "markdown", + "id": "b750d830-bd0d-4474-9f70-913d0b9d6b8a", + "metadata": {}, + "source": [ + "The result of the calculation is `1+1=2`. " + ] + }, + { + "cell_type": "markdown", + "id": "4fbf72a2-0e0e-43ce-be8f-db3489c4eafe", + "metadata": {}, + "source": [ + "The `pympipool.Executor` class extends the interface of the [`concurrent.futures.Executor`](https://docs.python.org/3/library/concurrent.futures.html#module-concurrent.futures) \n", + "class by providing more parameters to specify the level of parallelism. In addition, to specifying the maximum number \n", + "of workers `max_workers` the user can also specify the number of cores per worker `cores_per_worker` for MPI based \n", + "parallelism, the number of threads per core `threads_per_core` for thread based parallelism and the number of GPUs per\n", + "worker `gpus_per_worker`. Finally, for those backends which support over-subscribing this can also be enabled using the \n", + "`oversubscribe` parameter. All these parameters are optional, so the `pympipool.Executor` can be used as a drop-in \n", + "replacement for the [`concurrent.futures.Executor`](https://docs.python.org/3/library/concurrent.futures.html#module-concurrent.futures)." + ] + }, + { + "cell_type": "markdown", + "id": "9b5a26e2-3d18-4778-ba10-e3e213b70433", + "metadata": {}, + "source": [ + "The previous example is rewritten for the `pympipool.Executor`:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "60373c38-63f8-48dc-be0f-ddb71ebf88f8", + "metadata": {}, + "outputs": [], + "source": [ + "from pympipool import Executor " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "fd755b28-ff01-4530-9099-001cac151e31", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2\n" + ] + } + ], + "source": [ + "with Executor(\n", + " max_workers=1, \n", + " cores_per_worker=1, \n", + " threads_per_core=1, \n", + " gpus_per_worker=0, \n", + " oversubscribe=False\n", + ") as exe:\n", + " future = exe.submit(sum, [1,1])\n", + " print(future.result())" + ] + }, + { + "cell_type": "markdown", + "id": "44c4bc4b-cf97-461e-98e7-62bcdb8caff2", + "metadata": {}, + "source": [ + "The result of the calculation is again `1+1=2`." + ] + }, + { + "cell_type": "markdown", + "id": "331aed93-806a-4057-ab9c-19479190f472", + "metadata": {}, + "source": [ + "Beyond pre-defined functions like the `sum()` function, the same functionality can be used to submit user-defined \n", + "functions. In the following example a custom summation function is defined: " + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "cdeb8710-b328-463d-a436-82d6756e76b3", + "metadata": {}, + "outputs": [], + "source": [ + "def calc(*args):\n", + " return sum(*args)" + ] + }, + { + "cell_type": "markdown", + "id": "d5efa995-d4d4-4f9c-a7e6-38dd66143535", + "metadata": {}, + "source": [ + "In contrast to the previous example where just a single function was submitted to a single worker, in this case a total\n", + "of four functions is submitted to a group of two workers `max_workers=2`. Consequently, the functions are executed as a\n", + "set of two pairs. " + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "82033832-7ccd-4c67-a1fb-57f55710b77c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[3, 4, 5, 6]\n" + ] + } + ], + "source": [ + "with Executor(max_workers=2) as exe:\n", + " fs_1 = exe.submit(calc, [2, 1])\n", + " fs_2 = exe.submit(calc, [2, 2])\n", + " fs_3 = exe.submit(calc, [2, 3])\n", + " fs_4 = exe.submit(calc, [2, 4])\n", + " print([\n", + " fs_1.result(), \n", + " fs_2.result(), \n", + " fs_3.result(), \n", + " fs_4.result(),\n", + " ])" + ] + }, + { + "cell_type": "markdown", + "id": "86838528-312e-46cc-b022-0c946bf95037", + "metadata": {}, + "source": [ + "The snippet can be executed with any python interpreter. It returns the corresponding sums as expected. The same can be achieved with the built-in [`concurrent.futures.Executor`](https://docs.python.org/3/library/concurrent.futures.html#module-concurrent.futures)\n", + "classes. Still one advantage of using the `pympipool.Executor` rather than the built-in ones, is the ability to execute \n", + "the same commands in interactive environments like [Jupyter notebooks](https://jupyter.org). This is achieved by using \n", + "[cloudpickle](https://github.com/cloudpipe/cloudpickle) to serialize the python function and its parameters rather than\n", + "the regular pickle package. " + ] + }, + { + "cell_type": "markdown", + "id": "4de690ed-661c-4e6e-a97e-c478393d0dc6", + "metadata": {}, + "source": [ + "For backwards compatibility with the [`multiprocessing.Pool`](https://docs.python.org/3/library/multiprocessing.html) \n", + "class the [`concurrent.futures.Executor`](https://docs.python.org/3/library/concurrent.futures.html#module-concurrent.futures)\n", + "also implements the `map()` function to map a series of inputs to a function. The same `map()` function is also \n", + "available in the `pympipool.Executor`: " + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "3f06b0c1-5ee1-40c5-82ab-31d77cfcdb46", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[3, 4, 5, 6]\n" + ] + } + ], + "source": [ + "with Executor(max_workers=2) as exe:\n", + " print(list(exe.map(calc, [[2, 1], [2, 2], [2, 3], [2, 4]])))" + ] + }, + { + "cell_type": "markdown", + "id": "a5d0f249-23bb-4727-8b09-87320ecb98eb", + "metadata": {}, + "source": [ + "The results remain the same. " + ] + }, + { + "cell_type": "markdown", + "id": "580b00ee-6d5b-4ca9-ba36-ff70128c0b6b", + "metadata": {}, + "source": [ + "## Data Handling\n", + "A limitation of many parallel approaches is the overhead in communication when working with large datasets. Instead of\n", + "reading the same dataset repetitively, the `pympipool.Executor` loads the dataset only once per worker and afterwards \n", + "each function submitted to this worker has access to the dataset, as it is already loaded in memory. To achieve this\n", + "the user defines an initialization function `init_function` which returns a dictionary with one key per dataset. The \n", + "keys of the dictionary can then be used as additional input parameters in each function submitted to the `pympipool.Executor`.\n", + "This functionality is illustrated in the following example: " + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "8fe8c750-4dc5-4b26-ad8d-9f755bff3494", + "metadata": {}, + "outputs": [], + "source": [ + "def calc(i, j, k):\n", + " return i + j + k" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "5f943266-1bee-421e-a1b4-583d222b1c99", + "metadata": {}, + "outputs": [], + "source": [ + "def init_function():\n", + " return {\"j\": 4, \"k\": 3, \"l\": 2}" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "0debe907-b646-4fd5-bae7-46b16645d2f3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "10\n" + ] + } + ], + "source": [ + "with Executor(max_workers=1, init_function=init_function) as exe:\n", + " fs = exe.submit(calc, 2, j=5)\n", + " print(fs.result())" + ] + }, + { + "cell_type": "markdown", + "id": "72fa803a-ace0-41ea-8090-d64dfd0797cc", + "metadata": {}, + "source": [ + "The function `calc()` requires three inputs `i`, `j` and `k`. But when the function is submitted to the executor only \n", + "two inputs are provided `fs = exe.submit(calc, 2, j=5)`. In this case the first input parameter is mapped to `i=2`, the\n", + "second input parameter is specified explicitly `j=5` but the third input parameter `k` is not provided. So the \n", + "`pympipool.Executor` automatically checks the keys set in the `init_function()` function. In this case the returned \n", + "dictionary `{\"j\": 4, \"k\": 3, \"l\": 2}` defines `j=4`, `k=3` and `l=2`. For this specific call of the `calc()` function,\n", + "`i` and `j` are already provided so `j` is not required, but `k=3` is used from the `init_function()` and as the `calc()`\n", + "function does not define the `l` parameter this one is also ignored. " + ] + }, + { + "cell_type": "markdown", + "id": "1443d216-1add-445a-a662-5b16af6c1443", + "metadata": {}, + "source": [ + "The result is `2+5+3=10` as `i=2` and `j=5` are provided during the submission and `k=3` is defined in the `init_function()`\n", + "function." + ] + }, + { + "cell_type": "markdown", + "id": "8d1e21ec-0b8d-45bf-bfb1-62b3df8e242a", + "metadata": {}, + "source": [ + "## Up-Scaling \n", + "The availability of certain features depends on the backend `pympipool` is installed with. In particular the thread \n", + "based parallelism and the GPU assignment is only available with the `pympipool.slurm.PySlurmExecutor` or the \n", + "`pympipool.flux.PyFluxExecutor` backend. The latter is recommended based on the easy installation, the faster allocation \n", + "of resources as the resources are managed within the allocation and no central databases is used and the superior level \n", + "of fine-grained resource assignment which is typically not available on other HPC resource schedulers including the\n", + "[SLURM workload manager](https://www.schedmd.com). The `pympipool.flux.PyFluxExecutor` requires \n", + "[flux framework](https://flux-framework.org) to be installed in addition to the `pympipool` package. The features are \n", + "summarized in the table below: \n", + "\n", + "| Feature \\ Backend | `PyMpiExecutor` | `PySlurmExecutor` | `PyFluxExecutor` |\n", + "|:--------------------------:|:---------------:|:-----------------:|:----------------:|\n", + "| Thread based parallelism | no | yes | yes | \n", + "| MPI based parallelism | yes | yes | yes |\n", + "| GPU assignment | no | yes | yes |\n", + "| Resource over-subscription | yes | yes | no |\n", + "| Scalability | 1 node | ~100 nodes | no limit |" + ] + }, + { + "cell_type": "markdown", + "id": "f3b9cc80-70ed-4bc8-abf9-62ecbd70b960", + "metadata": {}, + "source": [ + "### Thread-based Parallelism\n", + "The number of threads per core can be controlled with the `threads_per_core` parameter during the initialization of the \n", + "`pympipool.Executor`. Unfortunately, there is no uniform way to control the number of cores a given underlying library \n", + "uses for thread based parallelism, so it might be necessary to set certain environment variables manually: \n", + "\n", + "* `OMP_NUM_THREADS`: for openmp\n", + "* `OPENBLAS_NUM_THREADS`: for openblas\n", + "* `MKL_NUM_THREADS`: for mkl\n", + "* `VECLIB_MAXIMUM_THREADS`: for accelerate on Mac Os X\n", + "* `NUMEXPR_NUM_THREADS`: for numexpr\n", + "\n", + "At the current stage `pympipool.Executor` does not set these parameters itself, so you have to add them in the function\n", + "you submit before importing the corresponding library: " + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "fbf5f7b2-eb3e-4a81-bae8-e429747300a0", + "metadata": {}, + "outputs": [], + "source": [ + "def calc(i):\n", + " import os\n", + " os.environ[\"OMP_NUM_THREADS\"] = \"2\"\n", + " os.environ[\"OPENBLAS_NUM_THREADS\"] = \"2\"\n", + " os.environ[\"MKL_NUM_THREADS\"] = \"2\"\n", + " os.environ[\"VECLIB_MAXIMUM_THREADS\"] = \"2\"\n", + " os.environ[\"NUMEXPR_NUM_THREADS\"] = \"2\"\n", + " import numpy as np\n", + " return i" + ] + }, + { + "cell_type": "markdown", + "id": "334619d0-8d95-419e-885c-e5bc05747584", + "metadata": {}, + "source": [ + "Most modern CPUs use hyper-threading to present the operating system with double the number of virtual cores compared to\n", + "the number of physical cores available. So unless this functionality is disabled `threads_per_core=2` is a reasonable \n", + "default. Just be careful if the number of threads is not specified it is possible that all workers try to access all \n", + "cores at the same time which can lead to poor performance. So it is typically a good idea to monitor the CPU utilization\n", + "with increasing number of workers. " + ] + }, + { + "cell_type": "markdown", + "id": "7c3146c1-8722-4b67-ab21-c250b8e7c9dd", + "metadata": {}, + "source": [ + "Specific manycore CPU models like the Intel Xeon Phi processors provide a much higher hyper-threading ration and require\n", + "a higher number of threads per core for optimal performance. " + ] + }, + { + "cell_type": "markdown", + "id": "b4976d45-0f4e-496c-8173-9631f512135b", + "metadata": {}, + "source": [ + "### MPI Parallel Python Functions\n", + "Beyond thread based parallelism, the message passing interface (MPI) is the de facto standard parallel execution in \n", + "scientific computing and the [`mpi4py`](https://mpi4py.readthedocs.io) bindings to the MPI libraries are commonly used\n", + "to parallelize existing workflows. The limitation of this approach is that it requires the whole code to adopt the MPI\n", + "communication standards to coordinate the way how information is distributed. Just like the `pympipool.Executor` the \n", + "[`mpi4py.futures.MPIPoolExecutor`](https://mpi4py.readthedocs.io/en/stable/mpi4py.futures.html#mpipoolexecutor) \n", + "implements the [`concurrent.futures.Executor`](https://docs.python.org/3/library/concurrent.futures.html#module-concurrent.futures)\n", + "interface. Still in this case eah python function submitted to the executor is still limited to serial execution. The\n", + "novel approach of the `pympipool.Executor` is mixing these two types of parallelism. Individual functions can use\n", + "the [`mpi4py`](https://mpi4py.readthedocs.io) library to handle the parallel execution within the context of this \n", + "function while these functions can still me submitted to the `pympipool.Executor` just like any other function. The\n", + "advantage of this approach is that the users can parallelize their workflows one function at the time. \n", + "\n", + "The following example illustrates the submission of a simple MPI parallel python function: " + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "cfa072a4-f88f-45b0-be94-a78f0edad513", + "metadata": {}, + "outputs": [], + "source": [ + "def calc(i):\n", + " from mpi4py import MPI\n", + " size = MPI.COMM_WORLD.Get_size()\n", + " rank = MPI.COMM_WORLD.Get_rank()\n", + " return i, size, rank" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "fd036b03-085d-4850-b11e-537c8fd476d5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[(3, 2, 0), (3, 2, 1)]\n" + ] + } + ], + "source": [ + "with Executor(cores_per_worker=2) as exe:\n", + " fs = exe.submit(calc, 3)\n", + " print(fs.result())" + ] + }, + { + "cell_type": "markdown", + "id": "b4f97426-d8fb-42ef-98ca-135054bd39a7", + "metadata": {}, + "source": [ + "The `calc()` function initializes the [`mpi4py`](https://mpi4py.readthedocs.io) library and gathers the size of the \n", + "allocation and the rank of the current process within the MPI allocation. This function is then submitted to an \n", + "`pympipool.Executor` which is initialized with a single worker with two cores `cores_per_worker=2`. So each function\n", + "call is going to have access to two cores. \n", + "\n", + "Just like before the script can be called with any python interpreter even though it is using the [`mpi4py`](https://mpi4py.readthedocs.io)\n", + "library in the background it is not necessary to execute the script with `mpiexec` or `mpirun`." + ] + }, + { + "cell_type": "markdown", + "id": "69dcfdcb-41db-4c3b-a1c5-07ff3be0c9a0", + "metadata": {}, + "source": [ + "The response consists of a list of two tuples, one for each MPI parallel process, with the first entry of the tuple \n", + "being the parameter `i=3`, followed by the number of MPI parallel processes assigned to the function call `cores_per_worker=2`\n", + "and finally the index of the specific process `0` or `1`. " + ] + }, + { + "cell_type": "markdown", + "id": "dc41f241-663c-474e-ae1e-b2365389bc90", + "metadata": {}, + "source": [ + "### GPU Assignment\n", + "With the rise of machine learning applications, the use of GPUs for scientific application becomes more and more popular.\n", + "Consequently, it is essential to have full control over the assignment of GPUs to specific python functions. In the \n", + "following example the `tensorflow` library is used to identify the GPUs and return their configuration: " + ] + }, + { + "cell_type": "raw", + "id": "6ac9630b-4ab5-4f7f-bf55-812e8189da4f", + "metadata": {}, + "source": [ + "import socket\n", + "from tensorflow.python.client import device_lib" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "998138f5-f0cb-47a7-ba36-7594b8ec41fc", + "metadata": {}, + "outputs": [], + "source": [ + "def get_available_gpus():\n", + " local_device_protos = device_lib.list_local_devices()\n", + " return [\n", + " (x.name, x.physical_device_desc, socket.gethostname()) \n", + " for x in local_device_protos if x.device_type == 'GPU'\n", + " ]" + ] + }, + { + "cell_type": "raw", + "id": "9d33af22-7b90-4ff7-a434-9c4cd9a930d5", + "metadata": {}, + "source": [ + "with Executor(\n", + " max_workers=2, \n", + " gpus_per_worker=1, \n", + ") as exe:\n", + " fs_1 = exe.submit(get_available_gpus)\n", + " fs_2 = exe.submit(get_available_gpus)\n", + " print(fs_1.result(), fs_2.result())" + ] + }, + { + "cell_type": "markdown", + "id": "8dc7a989-908a-48a6-8d06-ac1e24173f5c", + "metadata": {}, + "source": [ + "The additional parameter `gpus_per_worker=1` specifies that one GPU is assigned to each worker. This functionality \n", + "requires `pympipool` to be connected to a resource manager like the [SLURM workload manager](https://www.schedmd.com)\n", + "or preferably the [flux framework](https://flux-framework.org). The rest of the script follows the previous examples, \n", + "as two functions are submitted and the results are printed. " + ] + }, + { + "cell_type": "markdown", + "id": "d1a17c6c-41ee-4595-913e-4af7272010a5", + "metadata": {}, + "source": [ + "To clarify the execution of such an example on a high performance computing (HPC) cluster using the [SLURM workload manager](https://www.schedmd.com)\n", + "the submission script is given below: " + ] + }, + { + "cell_type": "raw", + "id": "11ce332b-d2c1-4434-84c4-1e523e430848", + "metadata": {}, + "source": [ + "#!/bin/bash\n", + "#SBATCH --nodes=2\n", + "#SBATCH --gpus-per-node=1\n", + "#SBATCH --get-user-env=L\n", + "\n", + "python test_gpu.py" + ] + }, + { + "cell_type": "markdown", + "id": "14bf6228-db64-406b-b04f-4d23daaa836d", + "metadata": {}, + "source": [ + "The important part is that for using the `pympipool.slurm.PySlurmExecutor` backend the script `test_gpu.py` does not \n", + "need to be executed with `srun` but rather it is sufficient to just execute it with the python interpreter. `pympipool`\n", + "internally calls `srun` to assign the individual resources to a given worker. " + ] + }, + { + "cell_type": "markdown", + "id": "66e3be02-c11c-4053-9600-6bcfefefb127", + "metadata": {}, + "source": [ + "For the more complex setup of running the [flux framework](https://flux-framework.org) as a secondary resource scheduler\n", + "within the [SLURM workload manager](https://www.schedmd.com) it is essential that the resources are passed from the \n", + "[SLURM workload manager](https://www.schedmd.com) to the [flux framework](https://flux-framework.org). This is achieved\n", + "by calling `srun flux start` in the submission script: " + ] + }, + { + "cell_type": "raw", + "id": "aa0e2abf-7ab2-464f-a341-b93f91fbdd99", + "metadata": {}, + "source": [ + "#!/bin/bash\n", + "#SBATCH --nodes=2\n", + "#SBATCH --gpus-per-node=1\n", + "#SBATCH --get-user-env=L\n", + "\n", + "srun flux start python test_gpu.py" + ] + }, + { + "cell_type": "markdown", + "id": "6c84fb7d-4285-4d73-8f1e-7cb88050eb85", + "metadata": {}, + "source": [ + "As a result the GPUs available on the two compute nodes are reported: " + ] + }, + { + "cell_type": "raw", + "id": "a431e015-a309-49ac-9f10-756bda0177fc", + "metadata": {}, + "source": [ + ">>> [('/device:GPU:0', 'device: 0, name: Tesla V100S-PCIE-32GB, pci bus id: 0000:84:00.0, compute capability: 7.0', 'cn138'),\n", + ">>> ('/device:GPU:0', 'device: 0, name: Tesla V100S-PCIE-32GB, pci bus id: 0000:84:00.0, compute capability: 7.0', 'cn139')]" + ] + }, + { + "cell_type": "markdown", + "id": "70eb9a19-325e-4179-a196-4417e3f30e19", + "metadata": {}, + "source": [ + "In this case each compute node `cn138` and `cn139` is equipped with one `Tesla V100S-PCIE-32GB`." + ] + }, + { + "cell_type": "markdown", + "id": "35e426c3-2e61-42f6-8c85-31dd288b7f51", + "metadata": {}, + "source": [ + "### Subprocess\n", + "Following the [`subprocess.check_output()`](https://docs.python.org/3/library/subprocess.html) interface of the standard\n", + "python libraries, any kind of command can be submitted to the `pympipool.SubprocessExecutor`. The command can either be \n", + "specified as a list `[\"echo\", \"test\"]` in which the first entry is typically the executable followed by the corresponding\n", + "parameters or the command can be specified as a string `\"echo test\"` with the additional parameter `shell=True`." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "91463388-789a-4749-b02f-71a6d76f9b96", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "False test\n", + " True\n" + ] + } + ], + "source": [ + "from pympipool import SubprocessExecutor\n", + "\n", + "with SubprocessExecutor(max_workers=2) as exe:\n", + " future = exe.submit([\"echo\", \"test\"], universal_newlines=True)\n", + " print(future.done(), future.result(), future.done())" + ] + }, + { + "cell_type": "markdown", + "id": "a8c436db-fc60-45a8-8603-ff9a7e8439ca", + "metadata": {}, + "source": [ + "In analogy to the previous examples the `SubprocessExecutor` class is directly imported from the `pympipool` module and \n", + "the maximum number of workers is set to two `max_workers=2`. In contrast to the `pympipool.Executor` class no other\n", + "settings to assign specific hardware to the command via the python interface are available in the `SubprocessExecutor` \n", + "class. To specify the hardware requirements for the individual commands, the user has to manually assign the resources\n", + "using the commands of the resource schedulers like `srun`, `flux run` or `mpiexec`.\n", + "\n", + "The `concurrent.futures.Future` object returned after submitting a command to the `pymipool.SubprocessExecutor` behaves\n", + "just like any other future object. It provides a `done()` function to check if the execution completed as well as a \n", + "`result()` function to return the output of the submitted command. \n", + "\n", + "In comparison to the `flux.job.FluxExecutor` included in the [flux framework](https://flux-framework.org) the \n", + "`pymipool.SubprocessExecutor` differs in two ways. One the `pymipool.SubprocessExecutor` does not provide any option for\n", + "resource assignment and two the `pymipool.SubprocessExecutor` returns the output of the command rather than just \n", + "returning the exit status when calling `result()`. " + ] + }, + { + "cell_type": "markdown", + "id": "212cc7ed-b8df-447f-98df-e5390c839660", + "metadata": {}, + "source": [ + "### Interactive Shell\n", + "Beyond external executables which are called once with a set of input parameters and or input files and return one set\n", + "of outputs, there are some executables which allow the user to interact with the executable during the execution. The \n", + "challenge of interfacing a python process with such an interactive executable is to identify when the executable is ready\n", + "to receive the next input. A very basis example for an interactive executable is a script which counts to the number \n", + "input by the user. This can be written in python as `count.py`:" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "094f9afc-6fca-43c4-9794-51408ff5fc5f", + "metadata": {}, + "outputs": [], + "source": [ + "with open(\"count.py\", \"w\") as f:\n", + " f.writelines(\"\"\"\\\n", + "def count(iterations):\n", + " for i in range(int(iterations)):\n", + " print(i)\n", + " print(\"done\")\n", + "\n", + "\n", + "if __name__ == \"__main__\":\n", + " while True:\n", + " user_input = input()\n", + " if \"shutdown\" in user_input:\n", + " break\n", + " else:\n", + " count(iterations=int(user_input))\n", + "\"\"\")" + ] + }, + { + "cell_type": "markdown", + "id": "9eb17afc-ada6-4b75-b4e5-aba5a15e2512", + "metadata": {}, + "source": [ + "This example is challenging in terms of interfacing it with a python process as the length of the output changes depending\n", + "on the input. The first option that the `pympipool.ShellExecutor` provides is specifying the number of lines to read for\n", + "each call submitted to the executable using the `lines_to_read` parameter. In comparison to the `SubprocessExecutor` \n", + "defined above the `ShellExecutor` only supports the execution of a single executable at a time, correspondingly the input\n", + "parameters for calling the executable are provided at the time of initialization of the `ShellExecutor` and the inputs \n", + "are submitted using the `submit()` function:" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "8ec6f955-8d40-42b0-a116-c5b538cc8e87", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "False 0\n", + "1\n", + "2\n", + "3\n", + "done\n", + " True\n" + ] + } + ], + "source": [ + "from pympipool import ShellExecutor\n", + "\n", + "with ShellExecutor([\"python\", \"count.py\"], universal_newlines=True) as exe:\n", + " future_lines = exe.submit(string_input=\"4\", lines_to_read=5)\n", + " print(future_lines.done(), future_lines.result(), future_lines.done())" + ] + }, + { + "cell_type": "markdown", + "id": "fcb6fdfc-d5c2-4008-91c1-3f935920cb7e", + "metadata": {}, + "source": [ + "The response for a given set of input is again returned as `concurrent.futures.Future` object, this allows the user to\n", + "execute other steps on the python side while waiting for the completion of the external executable. In this case the \n", + "example counts the numbers from `0` to `3` and prints each of them in one line followed by `done` to notify the user its\n", + "waiting for new inputs. This results in `n+1` lines of output for the input of `n`. Still predicting the number of lines\n", + "for a given input can be challenging, so the `pympipool.ShellExecutor` class also provides the option to wait until a \n", + "specific pattern is found in the output using the `stop_read_pattern`:" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "b048e987-2472-4481-87af-131ade1b1ce1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "False 0\n", + "1\n", + "2\n", + "3\n", + "done\n", + " True\n" + ] + } + ], + "source": [ + "from pympipool import ShellExecutor\n", + "\n", + "with ShellExecutor([\"python\", \"count.py\"], universal_newlines=True) as exe:\n", + " future_pattern = exe.submit(string_input=\"4\", stop_read_pattern=\"done\")\n", + " print(future_pattern.done(), future_pattern.result(), future_pattern.done())" + ] + }, + { + "cell_type": "markdown", + "id": "7e59baad-fdd1-4895-8eed-efefedc91806", + "metadata": {}, + "source": [ + "In this example the pattern simply searches for the string `done` in the output of the program and returns all the output\n", + "gathered from the executable since the last input as the result of the `concurrent.futures.Future` object returned after\n", + "the submission of the interactive command. " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 6188145f4945aa91745379f893462f1db653e451 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Tue, 21 Nov 2023 13:12:04 +0100 Subject: [PATCH 094/134] black formatting --- pympipool/shell/executor.py | 2 ++ pympipool/shell/interactive.py | 1 + 2 files changed, 3 insertions(+) diff --git a/pympipool/shell/executor.py b/pympipool/shell/executor.py index 5b72d1ed..d9eda6fa 100644 --- a/pympipool/shell/executor.py +++ b/pympipool/shell/executor.py @@ -41,6 +41,7 @@ class SubprocessSingleExecutor(ExecutorBase): """ The pympipool.shell.SubprocessSingleExecutor is the internal worker for the pympipool.shell.SubprocessExecutor. """ + def __init__(self): super().__init__() self._process = RaisingThread( @@ -77,6 +78,7 @@ class SubprocessExecutor(ExecutorBase): (False, "test", True) """ + def __init__( self, max_workers=1, diff --git a/pympipool/shell/interactive.py b/pympipool/shell/interactive.py index f990c8e3..b155577b 100644 --- a/pympipool/shell/interactive.py +++ b/pympipool/shell/interactive.py @@ -104,6 +104,7 @@ class ShellExecutor(ExecutorBase): >>> print(future_pattern.done(), future_pattern.result(), future_pattern.done()) (False, "0\n1\n2\n3\ndone\n", True) """ + def __init__(self, *args, **kwargs): super().__init__() self._process = RaisingThread( From e312b7a05796b5d670d6b9efb4def86b04a32040 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Tue, 21 Nov 2023 13:14:45 +0100 Subject: [PATCH 095/134] Add missing section to notebook --- notebooks/examples.ipynb | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/notebooks/examples.ipynb b/notebooks/examples.ipynb index f5d61df8..c91b48a2 100644 --- a/notebooks/examples.ipynb +++ b/notebooks/examples.ipynb @@ -628,6 +628,18 @@ "In this case each compute node `cn138` and `cn139` is equipped with one `Tesla V100S-PCIE-32GB`." ] }, + { + "cell_type": "markdown", + "id": "5b554291-41ba-484d-b3da-a764bb286c4c", + "metadata": {}, + "source": [ + "## External Executables\n", + "While `pympipool` was initially designed for up-scaling python functions for HPC, the same functionality can be leveraged\n", + "to up-scale any executable independent of the programming language it is developed in. This approach follows the design \n", + "of the `flux.job.FluxExecutor` included in the [flux framework](https://flux-framework.org). In `pympipool` this approach\n", + "is extended to support any kind of subprocess, so it is no longer limited to the [flux framework](https://flux-framework.org)." + ] + }, { "cell_type": "markdown", "id": "35e426c3-2e61-42f6-8c85-31dd288b7f51", From a233b64bedc3af92420167a9d5acf04e764a58aa Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Fri, 24 Nov 2023 09:08:44 +0100 Subject: [PATCH 096/134] Test lower limits of dependencies --- .ci_support/environment-old.yml | 10 ++++++++ .github/workflows/unittests-old.yml | 36 +++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) create mode 100644 .ci_support/environment-old.yml create mode 100644 .github/workflows/unittests-old.yml diff --git a/.ci_support/environment-old.yml b/.ci_support/environment-old.yml new file mode 100644 index 00000000..9e1996db --- /dev/null +++ b/.ci_support/environment-old.yml @@ -0,0 +1,10 @@ +channels: +- conda-forge +dependencies: +- python +- numpy +- openmpi =4.0.3 +- cloudpickle =1.4.0 +- mpi4py =3.1.2 +- tqdm =4.44.0 +- pyzmq =18.1.1 \ No newline at end of file diff --git a/.github/workflows/unittests-old.yml b/.github/workflows/unittests-old.yml new file mode 100644 index 00000000..3074bd74 --- /dev/null +++ b/.github/workflows/unittests-old.yml @@ -0,0 +1,36 @@ +# This workflow is used to run the unittest of pyiron + +name: Unittest Lower Bound + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + build: + + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: conda-incubator/setup-miniconda@v2.2.0 + with: + python-version: '3.9' + mamba-version: "*" + channels: conda-forge + miniforge-variant: Mambaforge + channel-priority: strict + auto-update-conda: true + environment-file: .ci_support/environment-old.yml + - name: Setup + shell: bash -l {0} + run: pip install --no-deps . + - name: Test + shell: bash -l {0} + timeout-minutes: 5 + run: cd tests; python -m unittest discover . + env: + OMPI_MCA_plm: 'isolated' + OMPI_MCA_rmaps_base_oversubscribe: 'yes' + OMPI_MCA_btl_vader_single_copy_mechanism: 'none' From 480d27fcb583c2914d120331d7af41ab35e3c7bc Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Fri, 24 Nov 2023 09:11:59 +0100 Subject: [PATCH 097/134] mpi4py restricts the openmpi version --- .ci_support/environment-old.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci_support/environment-old.yml b/.ci_support/environment-old.yml index 9e1996db..56d12433 100644 --- a/.ci_support/environment-old.yml +++ b/.ci_support/environment-old.yml @@ -3,7 +3,7 @@ channels: dependencies: - python - numpy -- openmpi =4.0.3 +- openmpi =4.1.0 - cloudpickle =1.4.0 - mpi4py =3.1.2 - tqdm =4.44.0 From 21671e5b1e46b2e270ce3dabbb46bde4d57813c5 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Fri, 24 Nov 2023 09:17:17 +0100 Subject: [PATCH 098/134] update communication interface --- .ci_support/environment-old.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci_support/environment-old.yml b/.ci_support/environment-old.yml index 56d12433..857795c2 100644 --- a/.ci_support/environment-old.yml +++ b/.ci_support/environment-old.yml @@ -7,4 +7,4 @@ dependencies: - cloudpickle =1.4.0 - mpi4py =3.1.2 - tqdm =4.44.0 -- pyzmq =18.1.1 \ No newline at end of file +- pyzmq =25.0.0 \ No newline at end of file From ea03c37ec4e88cbffec268342f55f88e3eb4ef3e Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Fri, 24 Nov 2023 09:27:24 +0100 Subject: [PATCH 099/134] more recent versions --- .ci_support/environment-old.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.ci_support/environment-old.yml b/.ci_support/environment-old.yml index 857795c2..4ce39086 100644 --- a/.ci_support/environment-old.yml +++ b/.ci_support/environment-old.yml @@ -4,7 +4,7 @@ dependencies: - python - numpy - openmpi =4.1.0 -- cloudpickle =1.4.0 -- mpi4py =3.1.2 -- tqdm =4.44.0 +- cloudpickle =2.2.1 +- mpi4py =3.1.4 +- tqdm =4.64.1 - pyzmq =25.0.0 \ No newline at end of file From 8b1352d8664e907a22f53846dd50b4761702875a Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Fri, 24 Nov 2023 09:29:06 +0100 Subject: [PATCH 100/134] fix openmpi version --- .ci_support/environment-old.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci_support/environment-old.yml b/.ci_support/environment-old.yml index 4ce39086..51c78b43 100644 --- a/.ci_support/environment-old.yml +++ b/.ci_support/environment-old.yml @@ -3,7 +3,7 @@ channels: dependencies: - python - numpy -- openmpi =4.1.0 +- openmpi =4.1.4 - cloudpickle =2.2.1 - mpi4py =3.1.4 - tqdm =4.64.1 From b55e58579b013641067cb93678c36de5c5e079d6 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Fri, 24 Nov 2023 09:31:50 +0100 Subject: [PATCH 101/134] test old tqdm version --- .ci_support/environment-old.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci_support/environment-old.yml b/.ci_support/environment-old.yml index 51c78b43..49d490e8 100644 --- a/.ci_support/environment-old.yml +++ b/.ci_support/environment-old.yml @@ -6,5 +6,5 @@ dependencies: - openmpi =4.1.4 - cloudpickle =2.2.1 - mpi4py =3.1.4 -- tqdm =4.64.1 +- tqdm =4.44.0 - pyzmq =25.0.0 \ No newline at end of file From c7f3c640268a2db6e89e55c777871b2f5d1283f3 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Fri, 24 Nov 2023 09:34:28 +0100 Subject: [PATCH 102/134] downgrade cloudpickle --- .ci_support/environment-old.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci_support/environment-old.yml b/.ci_support/environment-old.yml index 49d490e8..161f7ae1 100644 --- a/.ci_support/environment-old.yml +++ b/.ci_support/environment-old.yml @@ -4,7 +4,7 @@ dependencies: - python - numpy - openmpi =4.1.4 -- cloudpickle =2.2.1 +- cloudpickle =2.0.0 - mpi4py =3.1.4 - tqdm =4.44.0 - pyzmq =25.0.0 \ No newline at end of file From a03dffdb009fb4e5f9525bb58a34d1a82430efdc Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Fri, 24 Nov 2023 13:04:36 +0100 Subject: [PATCH 103/134] Update black action --- .github/workflows/black.yml | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml index 363ba27f..9f5d698e 100644 --- a/.github/workflows/black.yml +++ b/.github/workflows/black.yml @@ -1,5 +1,4 @@ -# This workflow will install Python dependencies, run tests and lint with a variety of Python versions -# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions +# This workflow is used to check the differences and find conflict markers or whitespace errors name: Black @@ -14,19 +13,7 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - - uses: conda-incubator/setup-miniconda@v2.2.0 + - uses: psf/black@stable with: - python-version: '3.11' - mamba-version: "*" - channels: conda-forge - miniforge-variant: Mambaforge - channel-priority: strict - auto-update-conda: true - environment-file: .ci_support/environment-openmpi.yml - - name: Setup - shell: bash -l {0} - run: | - conda install -c conda-forge black - - name: Test - shell: bash -l {0} - run: black --check pympipool \ No newline at end of file + options: "--check --diff" + src: ./${{ github.event.repository.name }} From 7f4c55f582a9b08dd438f824a707fb11c4798d60 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Fri, 24 Nov 2023 13:45:49 +0100 Subject: [PATCH 104/134] Create UpdateDependabotPR.yml --- .github/workflows/UpdateDependabotPR.yml | 37 ++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 .github/workflows/UpdateDependabotPR.yml diff --git a/.github/workflows/UpdateDependabotPR.yml b/.github/workflows/UpdateDependabotPR.yml new file mode 100644 index 00000000..c2c71134 --- /dev/null +++ b/.github/workflows/UpdateDependabotPR.yml @@ -0,0 +1,37 @@ +name: UpdateDependabotPR + +on: + pull_request_target: + branches: [ main ] + +jobs: + build: + runs-on: ubuntu-latest + if: (github.actor == 'dependabot[bot]') + steps: + - uses: actions/checkout@v2 + with: + ref: ${{ github.event.pull_request.head.ref }} # Check out the head of the actual branch, not the PR + fetch-depth: 0 # otherwise, you will fail to push refs to dest repo + token: ${{ secrets.DEPENDABOT_WORKFLOW_TOKEN }} + - name: UpdateEnvironmentFile + env: + PR_TITLE: ${{ github.event.pull_request.title }} + shell: bash -l {0} + run: | + package=$(echo "$PR_TITLE" | awk '{print $2}') + from=$(echo "$PR_TITLE" | awk '{print $4}') + to=$(echo "$PR_TITLE" | awk '{print $6}') + sed -i "/${package}/s/${from}/${to}/g" .ci_support/environment-mpich.yml + sed -i "/${package}/s/${from}/${to}/g" .ci_support/environment-openmpi.yml + sed -i "/${package}/s/${from}/${to}/g" .ci_support/environment-win.yml + - name: UpdateDependabotPR commit + run: | + git config --local user.email "pyiron@mpie.de" + git config --local user.name "pyironrunner" + git commit -m "[dependabot skip] Update environment" -a + - name: UpdateDependabotPR push + uses: ad-m/github-push-action@master + with: + github_token: ${{ secrets.DEPENDABOT_WORKFLOW_TOKEN }} + branch: ${{ github.event.pull_request.head.ref }} From 3412f9d07348e491fdcdd19edd874ce7b4b405de Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Fri, 24 Nov 2023 13:51:08 +0100 Subject: [PATCH 105/134] Update and rename UpdateDependabotPR.yml to dependabot.yml --- .github/workflows/{UpdateDependabotPR.yml => dependabot.yml} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename .github/workflows/{UpdateDependabotPR.yml => dependabot.yml} (98%) diff --git a/.github/workflows/UpdateDependabotPR.yml b/.github/workflows/dependabot.yml similarity index 98% rename from .github/workflows/UpdateDependabotPR.yml rename to .github/workflows/dependabot.yml index c2c71134..85028d85 100644 --- a/.github/workflows/UpdateDependabotPR.yml +++ b/.github/workflows/dependabot.yml @@ -1,4 +1,4 @@ -name: UpdateDependabotPR +name: Update Dependabot on: pull_request_target: From 60dfae0cad19f98cb97b7541a3b1bb290e8b14d2 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Fri, 24 Nov 2023 15:09:10 +0100 Subject: [PATCH 106/134] Create format_black.yml --- .github/workflows/format_black.yml | 33 ++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 .github/workflows/format_black.yml diff --git a/.github/workflows/format_black.yml b/.github/workflows/format_black.yml new file mode 100644 index 00000000..f7a62e5d --- /dev/null +++ b/.github/workflows/format_black.yml @@ -0,0 +1,33 @@ +# This workflow will checkout the branch of the PR, apply black formatting and commit the result to the PR. Does not work for forks. + +name: Format black + +on: + pull_request: + types: [labeled] + +jobs: + build: + if: contains(github.event.pull_request.labels.*.name, 'format_black' ) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + token: ${{ secrets.DEPENDABOT_WORKFLOW_TOKEN }} + ref: ${{ github.event.pull_request.head.ref }} # Check out the head of the actual branch, not the PR + fetch-depth: 0 # otherwise, you will fail to push refs to dest repo + - name: format black + uses: psf/black@stable + with: + options: "" + src: "./${{ github.event.repository.name }}" + - name: commit + run: | + git config --local user.email "pyiron@mpie.de" + git config --local user.name "pyiron-runner" + git commit -m "Format black" -a + - name: push + uses: ad-m/github-push-action@master + with: + github_token: ${{ secrets.DEPENDABOT_WORKFLOW_TOKEN }} + branch: ${{ github.event.pull_request.head.ref }} From c2ff257b0f18fa0845d33eaeef108575e2808667 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Jan=C3=9Fen?= Date: Mon, 27 Nov 2023 07:18:16 +0100 Subject: [PATCH 107/134] Switch to pyproject.toml --- .ci_support/release.py | 61 + .github/workflows/deploy.yml | 12 +- .github/workflows/unittest-flux.yml | 12 +- .github/workflows/unittest-mpich.yml | 8 +- .github/workflows/unittest-openmpi.yml | 8 +- .github/workflows/unittest-win.yml | 8 +- .github/workflows/unittests-old.yml | 8 +- MANIFEST.in | 2 - pympipool/__init__.py | 1 - pyproject.toml | 50 + setup.cfg | 12 - setup.py | 35 +- versioneer.py | 2277 ------------------------ 13 files changed, 140 insertions(+), 2354 deletions(-) create mode 100644 .ci_support/release.py create mode 100644 pyproject.toml delete mode 100644 setup.cfg delete mode 100644 versioneer.py diff --git a/.ci_support/release.py b/.ci_support/release.py new file mode 100644 index 00000000..a3edb012 --- /dev/null +++ b/.ci_support/release.py @@ -0,0 +1,61 @@ +def get_setup_version_and_pattern(setup_content): + depend_lst, version_lst = [], [] + for l in setup_content: + if '==' in l: + lst = l.split('[')[-1].split(']')[0].replace(' ', '').replace('"', '').replace("'", '').split(',') + for dep in lst: + if dep != '\n': + version_lst.append(dep.split('==')[1]) + depend_lst.append(dep.split('==')[0]) + + version_high_dict = {d: v for d, v in zip(depend_lst, version_lst)} + return version_high_dict + + +def get_env_version(env_content): + read_flag = False + depend_lst, version_lst = [], [] + for l in env_content: + if 'dependencies:' in l: + read_flag = True + elif read_flag: + lst = l.replace('-', '').replace(' ', '').replace('\n', '').split("=") + if len(lst) == 2: + depend_lst.append(lst[0]) + version_lst.append(lst[1]) + return {d:v for d, v in zip(depend_lst, version_lst)} + + +def update_dependencies(setup_content, version_low_dict, version_high_dict): + version_combo_dict = {} + for dep, ver in version_high_dict.items(): + if dep in version_low_dict.keys() and version_low_dict[dep] != ver: + version_combo_dict[dep] = dep + ">=" + version_low_dict[dep] + ",<=" + ver + else: + version_combo_dict[dep] = dep + "==" + ver + + setup_content_new = "" + pattern_dict = {d:d + "==" + v for d, v in version_high_dict.items()} + for l in setup_content: + for k, v in pattern_dict.items(): + if v in l: + l = l.replace(v, version_combo_dict[k]) + setup_content_new +=l + return setup_content_new + + +if __name__ == "__main__": + with open('pyproject.toml', "r") as f: + setup_content = f.readlines() + + with open('environment.yml', "r") as f: + env_content = f.readlines() + + setup_content_new = update_dependencies( + setup_content=setup_content[2:], + version_low_dict=get_env_version(env_content=env_content), + version_high_dict=get_setup_version_and_pattern(setup_content=setup_content[2:]), + ) + + with open('pyproject.toml', "w") as f: + f.writelines("".join(setup_content[:2]) + setup_content_new) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 85d02b34..2cfd6b2a 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -22,15 +22,13 @@ jobs: - uses: actions/setup-python@v2 with: python-version: "3.11" - - name: Install dependencies - run: >- - python -m pip install --user --upgrade setuptools wheel + run: python -m pip install --user --upgrade cloudpickle mpi4py pyzmq setuptools tqdm versioneer wheel - name: Convert dependencies - run: >- - sed -i 's/==/>=/g' setup.py; cat setup.py + run: | + cp .ci_support/environment-old.yml environment.yml + python .ci_support/release.py; cat pyproject.toml - name: Build - run: >- - python setup.py sdist bdist_wheel + run: python setup.py sdist bdist_wheel - name: Publish distribution 📦 to PyPI uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.github/workflows/unittest-flux.yml b/.github/workflows/unittest-flux.yml index c5c3273b..8056b3e3 100644 --- a/.github/workflows/unittest-flux.yml +++ b/.github/workflows/unittest-flux.yml @@ -38,16 +38,14 @@ jobs: channel-priority: strict auto-update-conda: true environment-file: ${{ matrix.environment-file }} - - name: Install flux - shell: bash -l {0} - run: mamba install -y flux-core coverage - - name: Setup - shell: bash -l {0} - run: pip install --no-deps . - name: Test shell: bash -l {0} timeout-minutes: 5 - run: cd tests; coverage run --omit pympipool/_version.py -m unittest discover . + run: | + mamba install -y flux-core coverage + pip install --no-deps . + cd tests + coverage run --omit pympipool/_version.py -m unittest discover . env: OMPI_MCA_plm: 'isolated' OMPI_MCA_rmaps_base_oversubscribe: 'yes' diff --git a/.github/workflows/unittest-mpich.yml b/.github/workflows/unittest-mpich.yml index 2a5b988b..d192b304 100644 --- a/.github/workflows/unittest-mpich.yml +++ b/.github/workflows/unittest-mpich.yml @@ -51,10 +51,10 @@ jobs: channel-priority: strict auto-update-conda: true environment-file: .ci_support/environment-mpich.yml - - name: Setup - shell: bash -l {0} - run: pip install --no-deps . - name: Test shell: bash -l {0} timeout-minutes: 5 - run: cd tests; python -m unittest discover . + run: | + pip install --no-deps . + cd tests + python -m unittest discover . diff --git a/.github/workflows/unittest-openmpi.yml b/.github/workflows/unittest-openmpi.yml index 3d81e857..ba8cbef2 100644 --- a/.github/workflows/unittest-openmpi.yml +++ b/.github/workflows/unittest-openmpi.yml @@ -51,13 +51,13 @@ jobs: channel-priority: strict auto-update-conda: true environment-file: .ci_support/environment-openmpi.yml - - name: Setup - shell: bash -l {0} - run: pip install --no-deps . - name: Test shell: bash -l {0} timeout-minutes: 5 - run: cd tests; python -m unittest discover . + run: | + pip install --no-deps . + cd tests + python -m unittest discover . env: OMPI_MCA_plm: 'isolated' OMPI_MCA_rmaps_base_oversubscribe: 'yes' diff --git a/.github/workflows/unittest-win.yml b/.github/workflows/unittest-win.yml index 809d4f53..2e548ff0 100644 --- a/.github/workflows/unittest-win.yml +++ b/.github/workflows/unittest-win.yml @@ -28,10 +28,10 @@ jobs: channel-priority: strict auto-update-conda: true environment-file: .ci_support/environment-win.yml - - name: Setup - shell: bash -l {0} - run: pip install --no-deps . - name: Test shell: bash -l {0} timeout-minutes: 5 - run: cd tests; python -m unittest discover . + run: | + pip install --no-deps . + cd tests + python -m unittest discover . diff --git a/.github/workflows/unittests-old.yml b/.github/workflows/unittests-old.yml index 3074bd74..28439812 100644 --- a/.github/workflows/unittests-old.yml +++ b/.github/workflows/unittests-old.yml @@ -23,13 +23,13 @@ jobs: channel-priority: strict auto-update-conda: true environment-file: .ci_support/environment-old.yml - - name: Setup - shell: bash -l {0} - run: pip install --no-deps . - name: Test shell: bash -l {0} timeout-minutes: 5 - run: cd tests; python -m unittest discover . + run: | + pip install --no-deps . + cd tests + python -m unittest discover . env: OMPI_MCA_plm: 'isolated' OMPI_MCA_rmaps_base_oversubscribe: 'yes' diff --git a/MANIFEST.in b/MANIFEST.in index 3078faee..cc0d1164 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1 @@ -include versioneer.py -include pympipool/_version.py include LICENSE \ No newline at end of file diff --git a/pympipool/__init__.py b/pympipool/__init__.py index dc0bbf68..1567c721 100644 --- a/pympipool/__init__.py +++ b/pympipool/__init__.py @@ -20,7 +20,6 @@ __version__ = get_versions()["version"] -del get_versions class Executor: diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..66929cb9 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,50 @@ +[build-system] +requires = ["cloudpickle", "mpi4py", "pyzmq", "setuptools", "tqdm", "versioneer[toml]==0.29"] +build-backend = "setuptools.build_meta" + +[project] +name = "pympipool" +description = "Scale serial and MPI-parallel python functions over hundreds of compute nodes all from within a jupyter notebook or serial python process." +authors = [ + { name = "Jan Janssen", email = "janssen@lanl.gov" }, +] +readme = "README.md" +license = { file = "LICENSE" } +keywords = ["pyiron"] +requires-python = ">=3.8" +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Topic :: Scientific/Engineering :: Physics", + "License :: OSI Approved :: BSD License", + "Intended Audience :: Science/Research", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", +] +dependencies = [ + "cloudpickle==3.0.0", + "mpi4py==3.1.5", + "pyzmq==25.1.1", + "tqdm==4.66.1", +] +dynamic = ["version"] + +[project.urls] +Homepage = "https://github.com/pyiron/pympipool" +Documentation = "https://pympipool.readthedocs.io" +Repository = "https://github.com/pyiron/pympipool" + +[tool.setuptools.packages.find] +include = ["pympipool*"] + +[tool.setuptools.dynamic] +version = {attr = "pympipool.__version__"} + +[tool.versioneer] +VCS = "git" +style = "pep440-pre" +versionfile_source = "pympipool/_version.py" +parentdir_prefix = "pympipool" +tag_prefix = "pympipool-" diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index ea01e101..00000000 --- a/setup.cfg +++ /dev/null @@ -1,12 +0,0 @@ - -# See the docstring in versioneer.py for instructions. Note that you must -# re-run 'versioneer.py setup' after changing this section, and commit the -# resulting files. - -[versioneer] -VCS = git -style = pep440-pre -versionfile_source = pympipool/_version.py -#versionfile_build = -tag_prefix = pympipool- -parentdir_prefix = pympipool diff --git a/setup.py b/setup.py index 70140b13..edb08a34 100644 --- a/setup.py +++ b/setup.py @@ -1,37 +1,8 @@ -""" -Setuptools based setup module -""" -from setuptools import setup, find_packages -from pathlib import Path -import versioneer +from setuptools import setup +import versioneer setup( - name='pympipool', version=versioneer.get_version(), - description='pympipool - Scale serial and MPI-parallel python functions over hundreds of compute nodes all from within a jupyter notebook or serial python process.', - long_description=Path("README.md").read_text(), - long_description_content_type='text/markdown', - url='https://github.com/pyiron/pympipool', - author_email='janssen@lanl.gov', - license='BSD', - - classifiers=[ - 'Development Status :: 5 - Production/Stable', - 'License :: OSI Approved :: BSD License', - 'Intended Audience :: Science/Research', - 'Operating System :: OS Independent', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: 3.9', - 'Programming Language :: Python :: 3.10', - 'Programming Language :: Python :: 3.11' - ], - packages=find_packages(exclude=["*tests*", "*.ci_support*"]), - install_requires=[ - 'cloudpickle==3.0.0', - 'mpi4py==3.1.5', - 'tqdm==4.66.1', - 'pyzmq==25.1.1', - ], cmdclass=versioneer.get_cmdclass(), -) +) \ No newline at end of file diff --git a/versioneer.py b/versioneer.py deleted file mode 100644 index 1e3753e6..00000000 --- a/versioneer.py +++ /dev/null @@ -1,2277 +0,0 @@ - -# Version: 0.29 - -"""The Versioneer - like a rocketeer, but for versions. - -The Versioneer -============== - -* like a rocketeer, but for versions! -* https://github.com/python-versioneer/python-versioneer -* Brian Warner -* License: Public Domain (Unlicense) -* Compatible with: Python 3.7, 3.8, 3.9, 3.10, 3.11 and pypy3 -* [![Latest Version][pypi-image]][pypi-url] -* [![Build Status][travis-image]][travis-url] - -This is a tool for managing a recorded version number in setuptools-based -python projects. The goal is to remove the tedious and error-prone "update -the embedded version string" step from your release process. Making a new -release should be as easy as recording a new tag in your version-control -system, and maybe making new tarballs. - - -## Quick Install - -Versioneer provides two installation modes. The "classic" vendored mode installs -a copy of versioneer into your repository. The experimental build-time dependency mode -is intended to allow you to skip this step and simplify the process of upgrading. - -### Vendored mode - -* `pip install versioneer` to somewhere in your $PATH - * A [conda-forge recipe](https://github.com/conda-forge/versioneer-feedstock) is - available, so you can also use `conda install -c conda-forge versioneer` -* add a `[tool.versioneer]` section to your `pyproject.toml` or a - `[versioneer]` section to your `setup.cfg` (see [Install](INSTALL.md)) - * Note that you will need to add `tomli; python_version < "3.11"` to your - build-time dependencies if you use `pyproject.toml` -* run `versioneer install --vendor` in your source tree, commit the results -* verify version information with `python setup.py version` - -### Build-time dependency mode - -* `pip install versioneer` to somewhere in your $PATH - * A [conda-forge recipe](https://github.com/conda-forge/versioneer-feedstock) is - available, so you can also use `conda install -c conda-forge versioneer` -* add a `[tool.versioneer]` section to your `pyproject.toml` or a - `[versioneer]` section to your `setup.cfg` (see [Install](INSTALL.md)) -* add `versioneer` (with `[toml]` extra, if configuring in `pyproject.toml`) - to the `requires` key of the `build-system` table in `pyproject.toml`: - ```toml - [build-system] - requires = ["setuptools", "versioneer[toml]"] - build-backend = "setuptools.build_meta" - ``` -* run `versioneer install --no-vendor` in your source tree, commit the results -* verify version information with `python setup.py version` - -## Version Identifiers - -Source trees come from a variety of places: - -* a version-control system checkout (mostly used by developers) -* a nightly tarball, produced by build automation -* a snapshot tarball, produced by a web-based VCS browser, like github's - "tarball from tag" feature -* a release tarball, produced by "setup.py sdist", distributed through PyPI - -Within each source tree, the version identifier (either a string or a number, -this tool is format-agnostic) can come from a variety of places: - -* ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows - about recent "tags" and an absolute revision-id -* the name of the directory into which the tarball was unpacked -* an expanded VCS keyword ($Id$, etc) -* a `_version.py` created by some earlier build step - -For released software, the version identifier is closely related to a VCS -tag. Some projects use tag names that include more than just the version -string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool -needs to strip the tag prefix to extract the version identifier. For -unreleased software (between tags), the version identifier should provide -enough information to help developers recreate the same tree, while also -giving them an idea of roughly how old the tree is (after version 1.2, before -version 1.3). Many VCS systems can report a description that captures this, -for example `git describe --tags --dirty --always` reports things like -"0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the -0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has -uncommitted changes). - -The version identifier is used for multiple purposes: - -* to allow the module to self-identify its version: `myproject.__version__` -* to choose a name and prefix for a 'setup.py sdist' tarball - -## Theory of Operation - -Versioneer works by adding a special `_version.py` file into your source -tree, where your `__init__.py` can import it. This `_version.py` knows how to -dynamically ask the VCS tool for version information at import time. - -`_version.py` also contains `$Revision$` markers, and the installation -process marks `_version.py` to have this marker rewritten with a tag name -during the `git archive` command. As a result, generated tarballs will -contain enough information to get the proper version. - -To allow `setup.py` to compute a version too, a `versioneer.py` is added to -the top level of your source tree, next to `setup.py` and the `setup.cfg` -that configures it. This overrides several distutils/setuptools commands to -compute the version when invoked, and changes `setup.py build` and `setup.py -sdist` to replace `_version.py` with a small static file that contains just -the generated version data. - -## Installation - -See [INSTALL.md](./INSTALL.md) for detailed installation instructions. - -## Version-String Flavors - -Code which uses Versioneer can learn about its version string at runtime by -importing `_version` from your main `__init__.py` file and running the -`get_versions()` function. From the "outside" (e.g. in `setup.py`), you can -import the top-level `versioneer.py` and run `get_versions()`. - -Both functions return a dictionary with different flavors of version -information: - -* `['version']`: A condensed version string, rendered using the selected - style. This is the most commonly used value for the project's version - string. The default "pep440" style yields strings like `0.11`, - `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section - below for alternative styles. - -* `['full-revisionid']`: detailed revision identifier. For Git, this is the - full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac". - -* `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the - commit date in ISO 8601 format. This will be None if the date is not - available. - -* `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that - this is only accurate if run in a VCS checkout, otherwise it is likely to - be False or None - -* `['error']`: if the version string could not be computed, this will be set - to a string describing the problem, otherwise it will be None. It may be - useful to throw an exception in setup.py if this is set, to avoid e.g. - creating tarballs with a version string of "unknown". - -Some variants are more useful than others. Including `full-revisionid` in a -bug report should allow developers to reconstruct the exact code being tested -(or indicate the presence of local changes that should be shared with the -developers). `version` is suitable for display in an "about" box or a CLI -`--version` output: it can be easily compared against release notes and lists -of bugs fixed in various releases. - -The installer adds the following text to your `__init__.py` to place a basic -version in `YOURPROJECT.__version__`: - - from ._version import get_versions - __version__ = get_versions()['version'] - del get_versions - -## Styles - -The setup.cfg `style=` configuration controls how the VCS information is -rendered into a version string. - -The default style, "pep440", produces a PEP440-compliant string, equal to the -un-prefixed tag name for actual releases, and containing an additional "local -version" section with more detail for in-between builds. For Git, this is -TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags ---dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the -tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and -that this commit is two revisions ("+2") beyond the "0.11" tag. For released -software (exactly equal to a known tag), the identifier will only contain the -stripped tag, e.g. "0.11". - -Other styles are available. See [details.md](details.md) in the Versioneer -source tree for descriptions. - -## Debugging - -Versioneer tries to avoid fatal errors: if something goes wrong, it will tend -to return a version of "0+unknown". To investigate the problem, run `setup.py -version`, which will run the version-lookup code in a verbose mode, and will -display the full contents of `get_versions()` (including the `error` string, -which may help identify what went wrong). - -## Known Limitations - -Some situations are known to cause problems for Versioneer. This details the -most significant ones. More can be found on Github -[issues page](https://github.com/python-versioneer/python-versioneer/issues). - -### Subprojects - -Versioneer has limited support for source trees in which `setup.py` is not in -the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are -two common reasons why `setup.py` might not be in the root: - -* Source trees which contain multiple subprojects, such as - [Buildbot](https://github.com/buildbot/buildbot), which contains both - "master" and "slave" subprojects, each with their own `setup.py`, - `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI - distributions (and upload multiple independently-installable tarballs). -* Source trees whose main purpose is to contain a C library, but which also - provide bindings to Python (and perhaps other languages) in subdirectories. - -Versioneer will look for `.git` in parent directories, and most operations -should get the right version string. However `pip` and `setuptools` have bugs -and implementation details which frequently cause `pip install .` from a -subproject directory to fail to find a correct version string (so it usually -defaults to `0+unknown`). - -`pip install --editable .` should work correctly. `setup.py install` might -work too. - -Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in -some later version. - -[Bug #38](https://github.com/python-versioneer/python-versioneer/issues/38) is tracking -this issue. The discussion in -[PR #61](https://github.com/python-versioneer/python-versioneer/pull/61) describes the -issue from the Versioneer side in more detail. -[pip PR#3176](https://github.com/pypa/pip/pull/3176) and -[pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve -pip to let Versioneer work correctly. - -Versioneer-0.16 and earlier only looked for a `.git` directory next to the -`setup.cfg`, so subprojects were completely unsupported with those releases. - -### Editable installs with setuptools <= 18.5 - -`setup.py develop` and `pip install --editable .` allow you to install a -project into a virtualenv once, then continue editing the source code (and -test) without re-installing after every change. - -"Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a -convenient way to specify executable scripts that should be installed along -with the python package. - -These both work as expected when using modern setuptools. When using -setuptools-18.5 or earlier, however, certain operations will cause -`pkg_resources.DistributionNotFound` errors when running the entrypoint -script, which must be resolved by re-installing the package. This happens -when the install happens with one version, then the egg_info data is -regenerated while a different version is checked out. Many setup.py commands -cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into -a different virtualenv), so this can be surprising. - -[Bug #83](https://github.com/python-versioneer/python-versioneer/issues/83) describes -this one, but upgrading to a newer version of setuptools should probably -resolve it. - - -## Updating Versioneer - -To upgrade your project to a new release of Versioneer, do the following: - -* install the new Versioneer (`pip install -U versioneer` or equivalent) -* edit `setup.cfg` and `pyproject.toml`, if necessary, - to include any new configuration settings indicated by the release notes. - See [UPGRADING](./UPGRADING.md) for details. -* re-run `versioneer install --[no-]vendor` in your source tree, to replace - `SRC/_version.py` -* commit any changed files - -## Future Directions - -This tool is designed to make it easily extended to other version-control -systems: all VCS-specific components are in separate directories like -src/git/ . The top-level `versioneer.py` script is assembled from these -components by running make-versioneer.py . In the future, make-versioneer.py -will take a VCS name as an argument, and will construct a version of -`versioneer.py` that is specific to the given VCS. It might also take the -configuration arguments that are currently provided manually during -installation by editing setup.py . Alternatively, it might go the other -direction and include code from all supported VCS systems, reducing the -number of intermediate scripts. - -## Similar projects - -* [setuptools_scm](https://github.com/pypa/setuptools_scm/) - a non-vendored build-time - dependency -* [minver](https://github.com/jbweston/miniver) - a lightweight reimplementation of - versioneer -* [versioningit](https://github.com/jwodder/versioningit) - a PEP 518-based setuptools - plugin - -## License - -To make Versioneer easier to embed, all its code is dedicated to the public -domain. The `_version.py` that it creates is also in the public domain. -Specifically, both are released under the "Unlicense", as described in -https://unlicense.org/. - -[pypi-image]: https://img.shields.io/pypi/v/versioneer.svg -[pypi-url]: https://pypi.python.org/pypi/versioneer/ -[travis-image]: -https://img.shields.io/travis/com/python-versioneer/python-versioneer.svg -[travis-url]: https://travis-ci.com/github/python-versioneer/python-versioneer - -""" -# pylint:disable=invalid-name,import-outside-toplevel,missing-function-docstring -# pylint:disable=missing-class-docstring,too-many-branches,too-many-statements -# pylint:disable=raise-missing-from,too-many-lines,too-many-locals,import-error -# pylint:disable=too-few-public-methods,redefined-outer-name,consider-using-with -# pylint:disable=attribute-defined-outside-init,too-many-arguments - -import configparser -import errno -import json -import os -import re -import subprocess -import sys -from pathlib import Path -from typing import Any, Callable, cast, Dict, List, Optional, Tuple, Union -from typing import NoReturn -import functools - -have_tomllib = True -if sys.version_info >= (3, 11): - import tomllib -else: - try: - import tomli as tomllib - except ImportError: - have_tomllib = False - - -class VersioneerConfig: - """Container for Versioneer configuration parameters.""" - - VCS: str - style: str - tag_prefix: str - versionfile_source: str - versionfile_build: Optional[str] - parentdir_prefix: Optional[str] - verbose: Optional[bool] - - -def get_root() -> str: - """Get the project root directory. - - We require that all commands are run from the project root, i.e. the - directory that contains setup.py, setup.cfg, and versioneer.py . - """ - root = os.path.realpath(os.path.abspath(os.getcwd())) - setup_py = os.path.join(root, "setup.py") - pyproject_toml = os.path.join(root, "pyproject.toml") - versioneer_py = os.path.join(root, "versioneer.py") - if not ( - os.path.exists(setup_py) - or os.path.exists(pyproject_toml) - or os.path.exists(versioneer_py) - ): - # allow 'python path/to/setup.py COMMAND' - root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0]))) - setup_py = os.path.join(root, "setup.py") - pyproject_toml = os.path.join(root, "pyproject.toml") - versioneer_py = os.path.join(root, "versioneer.py") - if not ( - os.path.exists(setup_py) - or os.path.exists(pyproject_toml) - or os.path.exists(versioneer_py) - ): - err = ("Versioneer was unable to run the project root directory. " - "Versioneer requires setup.py to be executed from " - "its immediate directory (like 'python setup.py COMMAND'), " - "or in a way that lets it use sys.argv[0] to find the root " - "(like 'python path/to/setup.py COMMAND').") - raise VersioneerBadRootError(err) - try: - # Certain runtime workflows (setup.py install/develop in a setuptools - # tree) execute all dependencies in a single python process, so - # "versioneer" may be imported multiple times, and python's shared - # module-import table will cache the first one. So we can't use - # os.path.dirname(__file__), as that will find whichever - # versioneer.py was first imported, even in later projects. - my_path = os.path.realpath(os.path.abspath(__file__)) - me_dir = os.path.normcase(os.path.splitext(my_path)[0]) - vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0]) - if me_dir != vsr_dir and "VERSIONEER_PEP518" not in globals(): - print("Warning: build in %s is using versioneer.py from %s" - % (os.path.dirname(my_path), versioneer_py)) - except NameError: - pass - return root - - -def get_config_from_root(root: str) -> VersioneerConfig: - """Read the project setup.cfg file to determine Versioneer config.""" - # This might raise OSError (if setup.cfg is missing), or - # configparser.NoSectionError (if it lacks a [versioneer] section), or - # configparser.NoOptionError (if it lacks "VCS="). See the docstring at - # the top of versioneer.py for instructions on writing your setup.cfg . - root_pth = Path(root) - pyproject_toml = root_pth / "pyproject.toml" - setup_cfg = root_pth / "setup.cfg" - section: Union[Dict[str, Any], configparser.SectionProxy, None] = None - if pyproject_toml.exists() and have_tomllib: - try: - with open(pyproject_toml, 'rb') as fobj: - pp = tomllib.load(fobj) - section = pp['tool']['versioneer'] - except (tomllib.TOMLDecodeError, KeyError) as e: - print(f"Failed to load config from {pyproject_toml}: {e}") - print("Try to load it from setup.cfg") - if not section: - parser = configparser.ConfigParser() - with open(setup_cfg) as cfg_file: - parser.read_file(cfg_file) - parser.get("versioneer", "VCS") # raise error if missing - - section = parser["versioneer"] - - # `cast`` really shouldn't be used, but its simplest for the - # common VersioneerConfig users at the moment. We verify against - # `None` values elsewhere where it matters - - cfg = VersioneerConfig() - cfg.VCS = section['VCS'] - cfg.style = section.get("style", "") - cfg.versionfile_source = cast(str, section.get("versionfile_source")) - cfg.versionfile_build = section.get("versionfile_build") - cfg.tag_prefix = cast(str, section.get("tag_prefix")) - if cfg.tag_prefix in ("''", '""', None): - cfg.tag_prefix = "" - cfg.parentdir_prefix = section.get("parentdir_prefix") - if isinstance(section, configparser.SectionProxy): - # Make sure configparser translates to bool - cfg.verbose = section.getboolean("verbose") - else: - cfg.verbose = section.get("verbose") - - return cfg - - -class NotThisMethod(Exception): - """Exception raised if a method is not valid for the current scenario.""" - - -# these dictionaries contain VCS-specific tools -LONG_VERSION_PY: Dict[str, str] = {} -HANDLERS: Dict[str, Dict[str, Callable]] = {} - - -def register_vcs_handler(vcs: str, method: str) -> Callable: # decorator - """Create decorator to mark a method as the handler of a VCS.""" - def decorate(f: Callable) -> Callable: - """Store f in HANDLERS[vcs][method].""" - HANDLERS.setdefault(vcs, {})[method] = f - return f - return decorate - - -def run_command( - commands: List[str], - args: List[str], - cwd: Optional[str] = None, - verbose: bool = False, - hide_stderr: bool = False, - env: Optional[Dict[str, str]] = None, -) -> Tuple[Optional[str], Optional[int]]: - """Call the given command(s).""" - assert isinstance(commands, list) - process = None - - popen_kwargs: Dict[str, Any] = {} - if sys.platform == "win32": - # This hides the console window if pythonw.exe is used - startupinfo = subprocess.STARTUPINFO() - startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW - popen_kwargs["startupinfo"] = startupinfo - - for command in commands: - try: - dispcmd = str([command] + args) - # remember shell=False, so use git.cmd on windows, not just git - process = subprocess.Popen([command] + args, cwd=cwd, env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None), **popen_kwargs) - break - except OSError as e: - if e.errno == errno.ENOENT: - continue - if verbose: - print("unable to run %s" % dispcmd) - print(e) - return None, None - else: - if verbose: - print("unable to find command, tried %s" % (commands,)) - return None, None - stdout = process.communicate()[0].strip().decode() - if process.returncode != 0: - if verbose: - print("unable to run %s (error)" % dispcmd) - print("stdout was %s" % stdout) - return None, process.returncode - return stdout, process.returncode - - -LONG_VERSION_PY['git'] = r''' -# This file helps to compute a version number in source trees obtained from -# git-archive tarball (such as those provided by githubs download-from-tag -# feature). Distribution tarballs (built by setup.py sdist) and build -# directories (produced by setup.py build) will contain a much shorter file -# that just contains the computed version number. - -# This file is released into the public domain. -# Generated by versioneer-0.29 -# https://github.com/python-versioneer/python-versioneer - -"""Git implementation of _version.py.""" - -import errno -import os -import re -import subprocess -import sys -from typing import Any, Callable, Dict, List, Optional, Tuple -import functools - - -def get_keywords() -> Dict[str, str]: - """Get the keywords needed to look up the version information.""" - # these strings will be replaced by git during git-archive. - # setup.py/versioneer.py will grep for the variable names, so they must - # each be defined on a line of their own. _version.py will just call - # get_keywords(). - git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s" - git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s" - git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s" - keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} - return keywords - - -class VersioneerConfig: - """Container for Versioneer configuration parameters.""" - - VCS: str - style: str - tag_prefix: str - parentdir_prefix: str - versionfile_source: str - verbose: bool - - -def get_config() -> VersioneerConfig: - """Create, populate and return the VersioneerConfig() object.""" - # these strings are filled in when 'setup.py versioneer' creates - # _version.py - cfg = VersioneerConfig() - cfg.VCS = "git" - cfg.style = "%(STYLE)s" - cfg.tag_prefix = "%(TAG_PREFIX)s" - cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s" - cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s" - cfg.verbose = False - return cfg - - -class NotThisMethod(Exception): - """Exception raised if a method is not valid for the current scenario.""" - - -LONG_VERSION_PY: Dict[str, str] = {} -HANDLERS: Dict[str, Dict[str, Callable]] = {} - - -def register_vcs_handler(vcs: str, method: str) -> Callable: # decorator - """Create decorator to mark a method as the handler of a VCS.""" - def decorate(f: Callable) -> Callable: - """Store f in HANDLERS[vcs][method].""" - if vcs not in HANDLERS: - HANDLERS[vcs] = {} - HANDLERS[vcs][method] = f - return f - return decorate - - -def run_command( - commands: List[str], - args: List[str], - cwd: Optional[str] = None, - verbose: bool = False, - hide_stderr: bool = False, - env: Optional[Dict[str, str]] = None, -) -> Tuple[Optional[str], Optional[int]]: - """Call the given command(s).""" - assert isinstance(commands, list) - process = None - - popen_kwargs: Dict[str, Any] = {} - if sys.platform == "win32": - # This hides the console window if pythonw.exe is used - startupinfo = subprocess.STARTUPINFO() - startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW - popen_kwargs["startupinfo"] = startupinfo - - for command in commands: - try: - dispcmd = str([command] + args) - # remember shell=False, so use git.cmd on windows, not just git - process = subprocess.Popen([command] + args, cwd=cwd, env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None), **popen_kwargs) - break - except OSError as e: - if e.errno == errno.ENOENT: - continue - if verbose: - print("unable to run %%s" %% dispcmd) - print(e) - return None, None - else: - if verbose: - print("unable to find command, tried %%s" %% (commands,)) - return None, None - stdout = process.communicate()[0].strip().decode() - if process.returncode != 0: - if verbose: - print("unable to run %%s (error)" %% dispcmd) - print("stdout was %%s" %% stdout) - return None, process.returncode - return stdout, process.returncode - - -def versions_from_parentdir( - parentdir_prefix: str, - root: str, - verbose: bool, -) -> Dict[str, Any]: - """Try to determine the version from the parent directory name. - - Source tarballs conventionally unpack into a directory that includes both - the project name and a version string. We will also support searching up - two directory levels for an appropriately named parent directory - """ - rootdirs = [] - - for _ in range(3): - dirname = os.path.basename(root) - if dirname.startswith(parentdir_prefix): - return {"version": dirname[len(parentdir_prefix):], - "full-revisionid": None, - "dirty": False, "error": None, "date": None} - rootdirs.append(root) - root = os.path.dirname(root) # up a level - - if verbose: - print("Tried directories %%s but none started with prefix %%s" %% - (str(rootdirs), parentdir_prefix)) - raise NotThisMethod("rootdir doesn't start with parentdir_prefix") - - -@register_vcs_handler("git", "get_keywords") -def git_get_keywords(versionfile_abs: str) -> Dict[str, str]: - """Extract version information from the given file.""" - # the code embedded in _version.py can just fetch the value of these - # keywords. When used from setup.py, we don't want to import _version.py, - # so we do it with a regexp instead. This function is not used from - # _version.py. - keywords: Dict[str, str] = {} - try: - with open(versionfile_abs, "r") as fobj: - for line in fobj: - if line.strip().startswith("git_refnames ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["refnames"] = mo.group(1) - if line.strip().startswith("git_full ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["full"] = mo.group(1) - if line.strip().startswith("git_date ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["date"] = mo.group(1) - except OSError: - pass - return keywords - - -@register_vcs_handler("git", "keywords") -def git_versions_from_keywords( - keywords: Dict[str, str], - tag_prefix: str, - verbose: bool, -) -> Dict[str, Any]: - """Get version information from git keywords.""" - if "refnames" not in keywords: - raise NotThisMethod("Short version file found") - date = keywords.get("date") - if date is not None: - # Use only the last line. Previous lines may contain GPG signature - # information. - date = date.splitlines()[-1] - - # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant - # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601 - # -like" string, which we must then edit to make compliant), because - # it's been around since git-1.5.3, and it's too difficult to - # discover which version we're using, or to work around using an - # older one. - date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - refnames = keywords["refnames"].strip() - if refnames.startswith("$Format"): - if verbose: - print("keywords are unexpanded, not using") - raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = {r.strip() for r in refnames.strip("()").split(",")} - # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of - # just "foo-1.0". If we see a "tag: " prefix, prefer those. - TAG = "tag: " - tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} - if not tags: - # Either we're using git < 1.8.3, or there really are no tags. We use - # a heuristic: assume all version tags have a digit. The old git %%d - # expansion behaves like git log --decorate=short and strips out the - # refs/heads/ and refs/tags/ prefixes that would let us distinguish - # between branches and tags. By ignoring refnames without digits, we - # filter out many common branch names like "release" and - # "stabilization", as well as "HEAD" and "master". - tags = {r for r in refs if re.search(r'\d', r)} - if verbose: - print("discarding '%%s', no digits" %% ",".join(refs - tags)) - if verbose: - print("likely tags: %%s" %% ",".join(sorted(tags))) - for ref in sorted(tags): - # sorting will prefer e.g. "2.0" over "2.0rc1" - if ref.startswith(tag_prefix): - r = ref[len(tag_prefix):] - # Filter out refs that exactly match prefix or that don't start - # with a number once the prefix is stripped (mostly a concern - # when prefix is '') - if not re.match(r'\d', r): - continue - if verbose: - print("picking %%s" %% r) - return {"version": r, - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": None, - "date": date} - # no suitable tags, so version is "0+unknown", but full hex is still there - if verbose: - print("no suitable tags, using unknown + full revision id") - return {"version": "0+unknown", - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": "no suitable tags", "date": None} - - -@register_vcs_handler("git", "pieces_from_vcs") -def git_pieces_from_vcs( - tag_prefix: str, - root: str, - verbose: bool, - runner: Callable = run_command -) -> Dict[str, Any]: - """Get version from 'git describe' in the root of the source tree. - - This only gets called if the git-archive 'subst' keywords were *not* - expanded, and _version.py hasn't already been rewritten with a short - version string, meaning we're inside a checked out source tree. - """ - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] - - # GIT_DIR can interfere with correct operation of Versioneer. - # It may be intended to be passed to the Versioneer-versioned project, - # but that should not change where we get our version from. - env = os.environ.copy() - env.pop("GIT_DIR", None) - runner = functools.partial(runner, env=env) - - _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, - hide_stderr=not verbose) - if rc != 0: - if verbose: - print("Directory %%s not under git control" %% root) - raise NotThisMethod("'git rev-parse --git-dir' returned error") - - # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] - # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = runner(GITS, [ - "describe", "--tags", "--dirty", "--always", "--long", - "--match", f"{tag_prefix}[[:digit:]]*" - ], cwd=root) - # --long was added in git-1.5.5 - if describe_out is None: - raise NotThisMethod("'git describe' failed") - describe_out = describe_out.strip() - full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) - if full_out is None: - raise NotThisMethod("'git rev-parse' failed") - full_out = full_out.strip() - - pieces: Dict[str, Any] = {} - pieces["long"] = full_out - pieces["short"] = full_out[:7] # maybe improved later - pieces["error"] = None - - branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], - cwd=root) - # --abbrev-ref was added in git-1.6.3 - if rc != 0 or branch_name is None: - raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") - branch_name = branch_name.strip() - - if branch_name == "HEAD": - # If we aren't exactly on a branch, pick a branch which represents - # the current commit. If all else fails, we are on a branchless - # commit. - branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) - # --contains was added in git-1.5.4 - if rc != 0 or branches is None: - raise NotThisMethod("'git branch --contains' returned error") - branches = branches.split("\n") - - # Remove the first line if we're running detached - if "(" in branches[0]: - branches.pop(0) - - # Strip off the leading "* " from the list of branches. - branches = [branch[2:] for branch in branches] - if "master" in branches: - branch_name = "master" - elif not branches: - branch_name = None - else: - # Pick the first branch that is returned. Good or bad. - branch_name = branches[0] - - pieces["branch"] = branch_name - - # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] - # TAG might have hyphens. - git_describe = describe_out - - # look for -dirty suffix - dirty = git_describe.endswith("-dirty") - pieces["dirty"] = dirty - if dirty: - git_describe = git_describe[:git_describe.rindex("-dirty")] - - # now we have TAG-NUM-gHEX or HEX - - if "-" in git_describe: - # TAG-NUM-gHEX - mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) - if not mo: - # unparsable. Maybe git-describe is misbehaving? - pieces["error"] = ("unable to parse git-describe output: '%%s'" - %% describe_out) - return pieces - - # tag - full_tag = mo.group(1) - if not full_tag.startswith(tag_prefix): - if verbose: - fmt = "tag '%%s' doesn't start with prefix '%%s'" - print(fmt %% (full_tag, tag_prefix)) - pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'" - %% (full_tag, tag_prefix)) - return pieces - pieces["closest-tag"] = full_tag[len(tag_prefix):] - - # distance: number of commits since tag - pieces["distance"] = int(mo.group(2)) - - # commit: short hex revision ID - pieces["short"] = mo.group(3) - - else: - # HEX: no tags - pieces["closest-tag"] = None - out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root) - pieces["distance"] = len(out.split()) # total number of commits - - # commit date: see ISO-8601 comment in git_versions_from_keywords() - date = runner(GITS, ["show", "-s", "--format=%%ci", "HEAD"], cwd=root)[0].strip() - # Use only the last line. Previous lines may contain GPG signature - # information. - date = date.splitlines()[-1] - pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - - return pieces - - -def plus_or_dot(pieces: Dict[str, Any]) -> str: - """Return a + if we don't already have one, else return a .""" - if "+" in pieces.get("closest-tag", ""): - return "." - return "+" - - -def render_pep440(pieces: Dict[str, Any]) -> str: - """Build up version string, with post-release "local version identifier". - - Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you - get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty - - Exceptions: - 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += plus_or_dot(pieces) - rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"], - pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_branch(pieces: Dict[str, Any]) -> str: - """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . - - The ".dev0" means not master branch. Note that .dev0 sorts backwards - (a feature branch will appear "older" than the master branch). - - Exceptions: - 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0" - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += "+untagged.%%d.g%%s" %% (pieces["distance"], - pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]: - """Split pep440 version string at the post-release segment. - - Returns the release segments before the post-release and the - post-release version number (or -1 if no post-release segment is present). - """ - vc = str.split(ver, ".post") - return vc[0], int(vc[1] or 0) if len(vc) == 2 else None - - -def render_pep440_pre(pieces: Dict[str, Any]) -> str: - """TAG[.postN.devDISTANCE] -- No -dirty. - - Exceptions: - 1: no tags. 0.post0.devDISTANCE - """ - if pieces["closest-tag"]: - if pieces["distance"]: - # update the post release segment - tag_version, post_version = pep440_split_post(pieces["closest-tag"]) - rendered = tag_version - if post_version is not None: - rendered += ".post%%d.dev%%d" %% (post_version + 1, pieces["distance"]) - else: - rendered += ".post0.dev%%d" %% (pieces["distance"]) - else: - # no commits, use the tag as the version - rendered = pieces["closest-tag"] - else: - # exception #1 - rendered = "0.post0.dev%%d" %% pieces["distance"] - return rendered - - -def render_pep440_post(pieces: Dict[str, Any]) -> str: - """TAG[.postDISTANCE[.dev0]+gHEX] . - - The ".dev0" means dirty. Note that .dev0 sorts backwards - (a dirty tree will appear "older" than the corresponding clean one), - but you shouldn't be releasing software with -dirty anyways. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%%d" %% pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%%s" %% pieces["short"] - else: - # exception #1 - rendered = "0.post%%d" %% pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += "+g%%s" %% pieces["short"] - return rendered - - -def render_pep440_post_branch(pieces: Dict[str, Any]) -> str: - """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . - - The ".dev0" means not master branch. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%%d" %% pieces["distance"] - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%%s" %% pieces["short"] - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0.post%%d" %% pieces["distance"] - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += "+g%%s" %% pieces["short"] - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_old(pieces: Dict[str, Any]) -> str: - """TAG[.postDISTANCE[.dev0]] . - - The ".dev0" means dirty. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%%d" %% pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - else: - # exception #1 - rendered = "0.post%%d" %% pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - return rendered - - -def render_git_describe(pieces: Dict[str, Any]) -> str: - """TAG[-DISTANCE-gHEX][-dirty]. - - Like 'git describe --tags --dirty --always'. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"]: - rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render_git_describe_long(pieces: Dict[str, Any]) -> str: - """TAG-DISTANCE-gHEX[-dirty]. - - Like 'git describe --tags --dirty --always -long'. - The distance/hash is unconditional. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]: - """Render the given version pieces into the requested style.""" - if pieces["error"]: - return {"version": "unknown", - "full-revisionid": pieces.get("long"), - "dirty": None, - "error": pieces["error"], - "date": None} - - if not style or style == "default": - style = "pep440" # the default - - if style == "pep440": - rendered = render_pep440(pieces) - elif style == "pep440-branch": - rendered = render_pep440_branch(pieces) - elif style == "pep440-pre": - rendered = render_pep440_pre(pieces) - elif style == "pep440-post": - rendered = render_pep440_post(pieces) - elif style == "pep440-post-branch": - rendered = render_pep440_post_branch(pieces) - elif style == "pep440-old": - rendered = render_pep440_old(pieces) - elif style == "git-describe": - rendered = render_git_describe(pieces) - elif style == "git-describe-long": - rendered = render_git_describe_long(pieces) - else: - raise ValueError("unknown style '%%s'" %% style) - - return {"version": rendered, "full-revisionid": pieces["long"], - "dirty": pieces["dirty"], "error": None, - "date": pieces.get("date")} - - -def get_versions() -> Dict[str, Any]: - """Get version information or return default if unable to do so.""" - # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have - # __file__, we can work backwards from there to the root. Some - # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which - # case we can only use expanded keywords. - - cfg = get_config() - verbose = cfg.verbose - - try: - return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, - verbose) - except NotThisMethod: - pass - - try: - root = os.path.realpath(__file__) - # versionfile_source is the relative path from the top of the source - # tree (where the .git directory might live) to this file. Invert - # this to find the root from __file__. - for _ in cfg.versionfile_source.split('/'): - root = os.path.dirname(root) - except NameError: - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to find root of source tree", - "date": None} - - try: - pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) - return render(pieces, cfg.style) - except NotThisMethod: - pass - - try: - if cfg.parentdir_prefix: - return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) - except NotThisMethod: - pass - - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to compute version", "date": None} -''' - - -@register_vcs_handler("git", "get_keywords") -def git_get_keywords(versionfile_abs: str) -> Dict[str, str]: - """Extract version information from the given file.""" - # the code embedded in _version.py can just fetch the value of these - # keywords. When used from setup.py, we don't want to import _version.py, - # so we do it with a regexp instead. This function is not used from - # _version.py. - keywords: Dict[str, str] = {} - try: - with open(versionfile_abs, "r") as fobj: - for line in fobj: - if line.strip().startswith("git_refnames ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["refnames"] = mo.group(1) - if line.strip().startswith("git_full ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["full"] = mo.group(1) - if line.strip().startswith("git_date ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["date"] = mo.group(1) - except OSError: - pass - return keywords - - -@register_vcs_handler("git", "keywords") -def git_versions_from_keywords( - keywords: Dict[str, str], - tag_prefix: str, - verbose: bool, -) -> Dict[str, Any]: - """Get version information from git keywords.""" - if "refnames" not in keywords: - raise NotThisMethod("Short version file found") - date = keywords.get("date") - if date is not None: - # Use only the last line. Previous lines may contain GPG signature - # information. - date = date.splitlines()[-1] - - # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant - # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 - # -like" string, which we must then edit to make compliant), because - # it's been around since git-1.5.3, and it's too difficult to - # discover which version we're using, or to work around using an - # older one. - date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - refnames = keywords["refnames"].strip() - if refnames.startswith("$Format"): - if verbose: - print("keywords are unexpanded, not using") - raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = {r.strip() for r in refnames.strip("()").split(",")} - # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of - # just "foo-1.0". If we see a "tag: " prefix, prefer those. - TAG = "tag: " - tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} - if not tags: - # Either we're using git < 1.8.3, or there really are no tags. We use - # a heuristic: assume all version tags have a digit. The old git %d - # expansion behaves like git log --decorate=short and strips out the - # refs/heads/ and refs/tags/ prefixes that would let us distinguish - # between branches and tags. By ignoring refnames without digits, we - # filter out many common branch names like "release" and - # "stabilization", as well as "HEAD" and "master". - tags = {r for r in refs if re.search(r'\d', r)} - if verbose: - print("discarding '%s', no digits" % ",".join(refs - tags)) - if verbose: - print("likely tags: %s" % ",".join(sorted(tags))) - for ref in sorted(tags): - # sorting will prefer e.g. "2.0" over "2.0rc1" - if ref.startswith(tag_prefix): - r = ref[len(tag_prefix):] - # Filter out refs that exactly match prefix or that don't start - # with a number once the prefix is stripped (mostly a concern - # when prefix is '') - if not re.match(r'\d', r): - continue - if verbose: - print("picking %s" % r) - return {"version": r, - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": None, - "date": date} - # no suitable tags, so version is "0+unknown", but full hex is still there - if verbose: - print("no suitable tags, using unknown + full revision id") - return {"version": "0+unknown", - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": "no suitable tags", "date": None} - - -@register_vcs_handler("git", "pieces_from_vcs") -def git_pieces_from_vcs( - tag_prefix: str, - root: str, - verbose: bool, - runner: Callable = run_command -) -> Dict[str, Any]: - """Get version from 'git describe' in the root of the source tree. - - This only gets called if the git-archive 'subst' keywords were *not* - expanded, and _version.py hasn't already been rewritten with a short - version string, meaning we're inside a checked out source tree. - """ - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] - - # GIT_DIR can interfere with correct operation of Versioneer. - # It may be intended to be passed to the Versioneer-versioned project, - # but that should not change where we get our version from. - env = os.environ.copy() - env.pop("GIT_DIR", None) - runner = functools.partial(runner, env=env) - - _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, - hide_stderr=not verbose) - if rc != 0: - if verbose: - print("Directory %s not under git control" % root) - raise NotThisMethod("'git rev-parse --git-dir' returned error") - - # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] - # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = runner(GITS, [ - "describe", "--tags", "--dirty", "--always", "--long", - "--match", f"{tag_prefix}[[:digit:]]*" - ], cwd=root) - # --long was added in git-1.5.5 - if describe_out is None: - raise NotThisMethod("'git describe' failed") - describe_out = describe_out.strip() - full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) - if full_out is None: - raise NotThisMethod("'git rev-parse' failed") - full_out = full_out.strip() - - pieces: Dict[str, Any] = {} - pieces["long"] = full_out - pieces["short"] = full_out[:7] # maybe improved later - pieces["error"] = None - - branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], - cwd=root) - # --abbrev-ref was added in git-1.6.3 - if rc != 0 or branch_name is None: - raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") - branch_name = branch_name.strip() - - if branch_name == "HEAD": - # If we aren't exactly on a branch, pick a branch which represents - # the current commit. If all else fails, we are on a branchless - # commit. - branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) - # --contains was added in git-1.5.4 - if rc != 0 or branches is None: - raise NotThisMethod("'git branch --contains' returned error") - branches = branches.split("\n") - - # Remove the first line if we're running detached - if "(" in branches[0]: - branches.pop(0) - - # Strip off the leading "* " from the list of branches. - branches = [branch[2:] for branch in branches] - if "master" in branches: - branch_name = "master" - elif not branches: - branch_name = None - else: - # Pick the first branch that is returned. Good or bad. - branch_name = branches[0] - - pieces["branch"] = branch_name - - # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] - # TAG might have hyphens. - git_describe = describe_out - - # look for -dirty suffix - dirty = git_describe.endswith("-dirty") - pieces["dirty"] = dirty - if dirty: - git_describe = git_describe[:git_describe.rindex("-dirty")] - - # now we have TAG-NUM-gHEX or HEX - - if "-" in git_describe: - # TAG-NUM-gHEX - mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) - if not mo: - # unparsable. Maybe git-describe is misbehaving? - pieces["error"] = ("unable to parse git-describe output: '%s'" - % describe_out) - return pieces - - # tag - full_tag = mo.group(1) - if not full_tag.startswith(tag_prefix): - if verbose: - fmt = "tag '%s' doesn't start with prefix '%s'" - print(fmt % (full_tag, tag_prefix)) - pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" - % (full_tag, tag_prefix)) - return pieces - pieces["closest-tag"] = full_tag[len(tag_prefix):] - - # distance: number of commits since tag - pieces["distance"] = int(mo.group(2)) - - # commit: short hex revision ID - pieces["short"] = mo.group(3) - - else: - # HEX: no tags - pieces["closest-tag"] = None - out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root) - pieces["distance"] = len(out.split()) # total number of commits - - # commit date: see ISO-8601 comment in git_versions_from_keywords() - date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip() - # Use only the last line. Previous lines may contain GPG signature - # information. - date = date.splitlines()[-1] - pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - - return pieces - - -def do_vcs_install(versionfile_source: str, ipy: Optional[str]) -> None: - """Git-specific installation logic for Versioneer. - - For Git, this means creating/changing .gitattributes to mark _version.py - for export-subst keyword substitution. - """ - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] - files = [versionfile_source] - if ipy: - files.append(ipy) - if "VERSIONEER_PEP518" not in globals(): - try: - my_path = __file__ - if my_path.endswith((".pyc", ".pyo")): - my_path = os.path.splitext(my_path)[0] + ".py" - versioneer_file = os.path.relpath(my_path) - except NameError: - versioneer_file = "versioneer.py" - files.append(versioneer_file) - present = False - try: - with open(".gitattributes", "r") as fobj: - for line in fobj: - if line.strip().startswith(versionfile_source): - if "export-subst" in line.strip().split()[1:]: - present = True - break - except OSError: - pass - if not present: - with open(".gitattributes", "a+") as fobj: - fobj.write(f"{versionfile_source} export-subst\n") - files.append(".gitattributes") - run_command(GITS, ["add", "--"] + files) - - -def versions_from_parentdir( - parentdir_prefix: str, - root: str, - verbose: bool, -) -> Dict[str, Any]: - """Try to determine the version from the parent directory name. - - Source tarballs conventionally unpack into a directory that includes both - the project name and a version string. We will also support searching up - two directory levels for an appropriately named parent directory - """ - rootdirs = [] - - for _ in range(3): - dirname = os.path.basename(root) - if dirname.startswith(parentdir_prefix): - return {"version": dirname[len(parentdir_prefix):], - "full-revisionid": None, - "dirty": False, "error": None, "date": None} - rootdirs.append(root) - root = os.path.dirname(root) # up a level - - if verbose: - print("Tried directories %s but none started with prefix %s" % - (str(rootdirs), parentdir_prefix)) - raise NotThisMethod("rootdir doesn't start with parentdir_prefix") - - -SHORT_VERSION_PY = """ -# This file was generated by 'versioneer.py' (0.29) from -# revision-control system data, or from the parent directory name of an -# unpacked source archive. Distribution tarballs contain a pre-generated copy -# of this file. - -import json - -version_json = ''' -%s -''' # END VERSION_JSON - - -def get_versions(): - return json.loads(version_json) -""" - - -def versions_from_file(filename: str) -> Dict[str, Any]: - """Try to determine the version from _version.py if present.""" - try: - with open(filename) as f: - contents = f.read() - except OSError: - raise NotThisMethod("unable to read _version.py") - mo = re.search(r"version_json = '''\n(.*)''' # END VERSION_JSON", - contents, re.M | re.S) - if not mo: - mo = re.search(r"version_json = '''\r\n(.*)''' # END VERSION_JSON", - contents, re.M | re.S) - if not mo: - raise NotThisMethod("no version_json in _version.py") - return json.loads(mo.group(1)) - - -def write_to_version_file(filename: str, versions: Dict[str, Any]) -> None: - """Write the given version number to the given _version.py file.""" - contents = json.dumps(versions, sort_keys=True, - indent=1, separators=(",", ": ")) - with open(filename, "w") as f: - f.write(SHORT_VERSION_PY % contents) - - print("set %s to '%s'" % (filename, versions["version"])) - - -def plus_or_dot(pieces: Dict[str, Any]) -> str: - """Return a + if we don't already have one, else return a .""" - if "+" in pieces.get("closest-tag", ""): - return "." - return "+" - - -def render_pep440(pieces: Dict[str, Any]) -> str: - """Build up version string, with post-release "local version identifier". - - Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you - get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty - - Exceptions: - 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += plus_or_dot(pieces) - rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0+untagged.%d.g%s" % (pieces["distance"], - pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_branch(pieces: Dict[str, Any]) -> str: - """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . - - The ".dev0" means not master branch. Note that .dev0 sorts backwards - (a feature branch will appear "older" than the master branch). - - Exceptions: - 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0" - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += "+untagged.%d.g%s" % (pieces["distance"], - pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]: - """Split pep440 version string at the post-release segment. - - Returns the release segments before the post-release and the - post-release version number (or -1 if no post-release segment is present). - """ - vc = str.split(ver, ".post") - return vc[0], int(vc[1] or 0) if len(vc) == 2 else None - - -def render_pep440_pre(pieces: Dict[str, Any]) -> str: - """TAG[.postN.devDISTANCE] -- No -dirty. - - Exceptions: - 1: no tags. 0.post0.devDISTANCE - """ - if pieces["closest-tag"]: - if pieces["distance"]: - # update the post release segment - tag_version, post_version = pep440_split_post(pieces["closest-tag"]) - rendered = tag_version - if post_version is not None: - rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"]) - else: - rendered += ".post0.dev%d" % (pieces["distance"]) - else: - # no commits, use the tag as the version - rendered = pieces["closest-tag"] - else: - # exception #1 - rendered = "0.post0.dev%d" % pieces["distance"] - return rendered - - -def render_pep440_post(pieces: Dict[str, Any]) -> str: - """TAG[.postDISTANCE[.dev0]+gHEX] . - - The ".dev0" means dirty. Note that .dev0 sorts backwards - (a dirty tree will appear "older" than the corresponding clean one), - but you shouldn't be releasing software with -dirty anyways. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%s" % pieces["short"] - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += "+g%s" % pieces["short"] - return rendered - - -def render_pep440_post_branch(pieces: Dict[str, Any]) -> str: - """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . - - The ".dev0" means not master branch. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%s" % pieces["short"] - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += "+g%s" % pieces["short"] - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_old(pieces: Dict[str, Any]) -> str: - """TAG[.postDISTANCE[.dev0]] . - - The ".dev0" means dirty. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - return rendered - - -def render_git_describe(pieces: Dict[str, Any]) -> str: - """TAG[-DISTANCE-gHEX][-dirty]. - - Like 'git describe --tags --dirty --always'. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"]: - rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render_git_describe_long(pieces: Dict[str, Any]) -> str: - """TAG-DISTANCE-gHEX[-dirty]. - - Like 'git describe --tags --dirty --always -long'. - The distance/hash is unconditional. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]: - """Render the given version pieces into the requested style.""" - if pieces["error"]: - return {"version": "unknown", - "full-revisionid": pieces.get("long"), - "dirty": None, - "error": pieces["error"], - "date": None} - - if not style or style == "default": - style = "pep440" # the default - - if style == "pep440": - rendered = render_pep440(pieces) - elif style == "pep440-branch": - rendered = render_pep440_branch(pieces) - elif style == "pep440-pre": - rendered = render_pep440_pre(pieces) - elif style == "pep440-post": - rendered = render_pep440_post(pieces) - elif style == "pep440-post-branch": - rendered = render_pep440_post_branch(pieces) - elif style == "pep440-old": - rendered = render_pep440_old(pieces) - elif style == "git-describe": - rendered = render_git_describe(pieces) - elif style == "git-describe-long": - rendered = render_git_describe_long(pieces) - else: - raise ValueError("unknown style '%s'" % style) - - return {"version": rendered, "full-revisionid": pieces["long"], - "dirty": pieces["dirty"], "error": None, - "date": pieces.get("date")} - - -class VersioneerBadRootError(Exception): - """The project root directory is unknown or missing key files.""" - - -def get_versions(verbose: bool = False) -> Dict[str, Any]: - """Get the project version from whatever source is available. - - Returns dict with two keys: 'version' and 'full'. - """ - if "versioneer" in sys.modules: - # see the discussion in cmdclass.py:get_cmdclass() - del sys.modules["versioneer"] - - root = get_root() - cfg = get_config_from_root(root) - - assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg" - handlers = HANDLERS.get(cfg.VCS) - assert handlers, "unrecognized VCS '%s'" % cfg.VCS - verbose = verbose or bool(cfg.verbose) # `bool()` used to avoid `None` - assert cfg.versionfile_source is not None, \ - "please set versioneer.versionfile_source" - assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix" - - versionfile_abs = os.path.join(root, cfg.versionfile_source) - - # extract version from first of: _version.py, VCS command (e.g. 'git - # describe'), parentdir. This is meant to work for developers using a - # source checkout, for users of a tarball created by 'setup.py sdist', - # and for users of a tarball/zipball created by 'git archive' or github's - # download-from-tag feature or the equivalent in other VCSes. - - get_keywords_f = handlers.get("get_keywords") - from_keywords_f = handlers.get("keywords") - if get_keywords_f and from_keywords_f: - try: - keywords = get_keywords_f(versionfile_abs) - ver = from_keywords_f(keywords, cfg.tag_prefix, verbose) - if verbose: - print("got version from expanded keyword %s" % ver) - return ver - except NotThisMethod: - pass - - try: - ver = versions_from_file(versionfile_abs) - if verbose: - print("got version from file %s %s" % (versionfile_abs, ver)) - return ver - except NotThisMethod: - pass - - from_vcs_f = handlers.get("pieces_from_vcs") - if from_vcs_f: - try: - pieces = from_vcs_f(cfg.tag_prefix, root, verbose) - ver = render(pieces, cfg.style) - if verbose: - print("got version from VCS %s" % ver) - return ver - except NotThisMethod: - pass - - try: - if cfg.parentdir_prefix: - ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose) - if verbose: - print("got version from parentdir %s" % ver) - return ver - except NotThisMethod: - pass - - if verbose: - print("unable to compute version") - - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, "error": "unable to compute version", - "date": None} - - -def get_version() -> str: - """Get the short version string for this project.""" - return get_versions()["version"] - - -def get_cmdclass(cmdclass: Optional[Dict[str, Any]] = None): - """Get the custom setuptools subclasses used by Versioneer. - - If the package uses a different cmdclass (e.g. one from numpy), it - should be provide as an argument. - """ - if "versioneer" in sys.modules: - del sys.modules["versioneer"] - # this fixes the "python setup.py develop" case (also 'install' and - # 'easy_install .'), in which subdependencies of the main project are - # built (using setup.py bdist_egg) in the same python process. Assume - # a main project A and a dependency B, which use different versions - # of Versioneer. A's setup.py imports A's Versioneer, leaving it in - # sys.modules by the time B's setup.py is executed, causing B to run - # with the wrong versioneer. Setuptools wraps the sub-dep builds in a - # sandbox that restores sys.modules to it's pre-build state, so the - # parent is protected against the child's "import versioneer". By - # removing ourselves from sys.modules here, before the child build - # happens, we protect the child from the parent's versioneer too. - # Also see https://github.com/python-versioneer/python-versioneer/issues/52 - - cmds = {} if cmdclass is None else cmdclass.copy() - - # we add "version" to setuptools - from setuptools import Command - - class cmd_version(Command): - description = "report generated version string" - user_options: List[Tuple[str, str, str]] = [] - boolean_options: List[str] = [] - - def initialize_options(self) -> None: - pass - - def finalize_options(self) -> None: - pass - - def run(self) -> None: - vers = get_versions(verbose=True) - print("Version: %s" % vers["version"]) - print(" full-revisionid: %s" % vers.get("full-revisionid")) - print(" dirty: %s" % vers.get("dirty")) - print(" date: %s" % vers.get("date")) - if vers["error"]: - print(" error: %s" % vers["error"]) - cmds["version"] = cmd_version - - # we override "build_py" in setuptools - # - # most invocation pathways end up running build_py: - # distutils/build -> build_py - # distutils/install -> distutils/build ->.. - # setuptools/bdist_wheel -> distutils/install ->.. - # setuptools/bdist_egg -> distutils/install_lib -> build_py - # setuptools/install -> bdist_egg ->.. - # setuptools/develop -> ? - # pip install: - # copies source tree to a tempdir before running egg_info/etc - # if .git isn't copied too, 'git describe' will fail - # then does setup.py bdist_wheel, or sometimes setup.py install - # setup.py egg_info -> ? - - # pip install -e . and setuptool/editable_wheel will invoke build_py - # but the build_py command is not expected to copy any files. - - # we override different "build_py" commands for both environments - if 'build_py' in cmds: - _build_py: Any = cmds['build_py'] - else: - from setuptools.command.build_py import build_py as _build_py - - class cmd_build_py(_build_py): - def run(self) -> None: - root = get_root() - cfg = get_config_from_root(root) - versions = get_versions() - _build_py.run(self) - if getattr(self, "editable_mode", False): - # During editable installs `.py` and data files are - # not copied to build_lib - return - # now locate _version.py in the new build/ directory and replace - # it with an updated value - if cfg.versionfile_build: - target_versionfile = os.path.join(self.build_lib, - cfg.versionfile_build) - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, versions) - cmds["build_py"] = cmd_build_py - - if 'build_ext' in cmds: - _build_ext: Any = cmds['build_ext'] - else: - from setuptools.command.build_ext import build_ext as _build_ext - - class cmd_build_ext(_build_ext): - def run(self) -> None: - root = get_root() - cfg = get_config_from_root(root) - versions = get_versions() - _build_ext.run(self) - if self.inplace: - # build_ext --inplace will only build extensions in - # build/lib<..> dir with no _version.py to write to. - # As in place builds will already have a _version.py - # in the module dir, we do not need to write one. - return - # now locate _version.py in the new build/ directory and replace - # it with an updated value - if not cfg.versionfile_build: - return - target_versionfile = os.path.join(self.build_lib, - cfg.versionfile_build) - if not os.path.exists(target_versionfile): - print(f"Warning: {target_versionfile} does not exist, skipping " - "version update. This can happen if you are running build_ext " - "without first running build_py.") - return - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, versions) - cmds["build_ext"] = cmd_build_ext - - if "cx_Freeze" in sys.modules: # cx_freeze enabled? - from cx_Freeze.dist import build_exe as _build_exe # type: ignore - # nczeczulin reports that py2exe won't like the pep440-style string - # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g. - # setup(console=[{ - # "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION - # "product_version": versioneer.get_version(), - # ... - - class cmd_build_exe(_build_exe): - def run(self) -> None: - root = get_root() - cfg = get_config_from_root(root) - versions = get_versions() - target_versionfile = cfg.versionfile_source - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, versions) - - _build_exe.run(self) - os.unlink(target_versionfile) - with open(cfg.versionfile_source, "w") as f: - LONG = LONG_VERSION_PY[cfg.VCS] - f.write(LONG % - {"DOLLAR": "$", - "STYLE": cfg.style, - "TAG_PREFIX": cfg.tag_prefix, - "PARENTDIR_PREFIX": cfg.parentdir_prefix, - "VERSIONFILE_SOURCE": cfg.versionfile_source, - }) - cmds["build_exe"] = cmd_build_exe - del cmds["build_py"] - - if 'py2exe' in sys.modules: # py2exe enabled? - try: - from py2exe.setuptools_buildexe import py2exe as _py2exe # type: ignore - except ImportError: - from py2exe.distutils_buildexe import py2exe as _py2exe # type: ignore - - class cmd_py2exe(_py2exe): - def run(self) -> None: - root = get_root() - cfg = get_config_from_root(root) - versions = get_versions() - target_versionfile = cfg.versionfile_source - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, versions) - - _py2exe.run(self) - os.unlink(target_versionfile) - with open(cfg.versionfile_source, "w") as f: - LONG = LONG_VERSION_PY[cfg.VCS] - f.write(LONG % - {"DOLLAR": "$", - "STYLE": cfg.style, - "TAG_PREFIX": cfg.tag_prefix, - "PARENTDIR_PREFIX": cfg.parentdir_prefix, - "VERSIONFILE_SOURCE": cfg.versionfile_source, - }) - cmds["py2exe"] = cmd_py2exe - - # sdist farms its file list building out to egg_info - if 'egg_info' in cmds: - _egg_info: Any = cmds['egg_info'] - else: - from setuptools.command.egg_info import egg_info as _egg_info - - class cmd_egg_info(_egg_info): - def find_sources(self) -> None: - # egg_info.find_sources builds the manifest list and writes it - # in one shot - super().find_sources() - - # Modify the filelist and normalize it - root = get_root() - cfg = get_config_from_root(root) - self.filelist.append('versioneer.py') - if cfg.versionfile_source: - # There are rare cases where versionfile_source might not be - # included by default, so we must be explicit - self.filelist.append(cfg.versionfile_source) - self.filelist.sort() - self.filelist.remove_duplicates() - - # The write method is hidden in the manifest_maker instance that - # generated the filelist and was thrown away - # We will instead replicate their final normalization (to unicode, - # and POSIX-style paths) - from setuptools import unicode_utils - normalized = [unicode_utils.filesys_decode(f).replace(os.sep, '/') - for f in self.filelist.files] - - manifest_filename = os.path.join(self.egg_info, 'SOURCES.txt') - with open(manifest_filename, 'w') as fobj: - fobj.write('\n'.join(normalized)) - - cmds['egg_info'] = cmd_egg_info - - # we override different "sdist" commands for both environments - if 'sdist' in cmds: - _sdist: Any = cmds['sdist'] - else: - from setuptools.command.sdist import sdist as _sdist - - class cmd_sdist(_sdist): - def run(self) -> None: - versions = get_versions() - self._versioneer_generated_versions = versions - # unless we update this, the command will keep using the old - # version - self.distribution.metadata.version = versions["version"] - return _sdist.run(self) - - def make_release_tree(self, base_dir: str, files: List[str]) -> None: - root = get_root() - cfg = get_config_from_root(root) - _sdist.make_release_tree(self, base_dir, files) - # now locate _version.py in the new base_dir directory - # (remembering that it may be a hardlink) and replace it with an - # updated value - target_versionfile = os.path.join(base_dir, cfg.versionfile_source) - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, - self._versioneer_generated_versions) - cmds["sdist"] = cmd_sdist - - return cmds - - -CONFIG_ERROR = """ -setup.cfg is missing the necessary Versioneer configuration. You need -a section like: - - [versioneer] - VCS = git - style = pep440 - versionfile_source = src/myproject/_version.py - versionfile_build = myproject/_version.py - tag_prefix = - parentdir_prefix = myproject- - -You will also need to edit your setup.py to use the results: - - import versioneer - setup(version=versioneer.get_version(), - cmdclass=versioneer.get_cmdclass(), ...) - -Please read the docstring in ./versioneer.py for configuration instructions, -edit setup.cfg, and re-run the installer or 'python versioneer.py setup'. -""" - -SAMPLE_CONFIG = """ -# See the docstring in versioneer.py for instructions. Note that you must -# re-run 'versioneer.py setup' after changing this section, and commit the -# resulting files. - -[versioneer] -#VCS = git -#style = pep440 -#versionfile_source = -#versionfile_build = -#tag_prefix = -#parentdir_prefix = - -""" - -OLD_SNIPPET = """ -from ._version import get_versions -__version__ = get_versions()['version'] -del get_versions -""" - -INIT_PY_SNIPPET = """ -from . import {0} -__version__ = {0}.get_versions()['version'] -""" - - -def do_setup() -> int: - """Do main VCS-independent setup function for installing Versioneer.""" - root = get_root() - try: - cfg = get_config_from_root(root) - except (OSError, configparser.NoSectionError, - configparser.NoOptionError) as e: - if isinstance(e, (OSError, configparser.NoSectionError)): - print("Adding sample versioneer config to setup.cfg", - file=sys.stderr) - with open(os.path.join(root, "setup.cfg"), "a") as f: - f.write(SAMPLE_CONFIG) - print(CONFIG_ERROR, file=sys.stderr) - return 1 - - print(" creating %s" % cfg.versionfile_source) - with open(cfg.versionfile_source, "w") as f: - LONG = LONG_VERSION_PY[cfg.VCS] - f.write(LONG % {"DOLLAR": "$", - "STYLE": cfg.style, - "TAG_PREFIX": cfg.tag_prefix, - "PARENTDIR_PREFIX": cfg.parentdir_prefix, - "VERSIONFILE_SOURCE": cfg.versionfile_source, - }) - - ipy = os.path.join(os.path.dirname(cfg.versionfile_source), - "__init__.py") - maybe_ipy: Optional[str] = ipy - if os.path.exists(ipy): - try: - with open(ipy, "r") as f: - old = f.read() - except OSError: - old = "" - module = os.path.splitext(os.path.basename(cfg.versionfile_source))[0] - snippet = INIT_PY_SNIPPET.format(module) - if OLD_SNIPPET in old: - print(" replacing boilerplate in %s" % ipy) - with open(ipy, "w") as f: - f.write(old.replace(OLD_SNIPPET, snippet)) - elif snippet not in old: - print(" appending to %s" % ipy) - with open(ipy, "a") as f: - f.write(snippet) - else: - print(" %s unmodified" % ipy) - else: - print(" %s doesn't exist, ok" % ipy) - maybe_ipy = None - - # Make VCS-specific changes. For git, this means creating/changing - # .gitattributes to mark _version.py for export-subst keyword - # substitution. - do_vcs_install(cfg.versionfile_source, maybe_ipy) - return 0 - - -def scan_setup_py() -> int: - """Validate the contents of setup.py against Versioneer's expectations.""" - found = set() - setters = False - errors = 0 - with open("setup.py", "r") as f: - for line in f.readlines(): - if "import versioneer" in line: - found.add("import") - if "versioneer.get_cmdclass()" in line: - found.add("cmdclass") - if "versioneer.get_version()" in line: - found.add("get_version") - if "versioneer.VCS" in line: - setters = True - if "versioneer.versionfile_source" in line: - setters = True - if len(found) != 3: - print("") - print("Your setup.py appears to be missing some important items") - print("(but I might be wrong). Please make sure it has something") - print("roughly like the following:") - print("") - print(" import versioneer") - print(" setup( version=versioneer.get_version(),") - print(" cmdclass=versioneer.get_cmdclass(), ...)") - print("") - errors += 1 - if setters: - print("You should remove lines like 'versioneer.VCS = ' and") - print("'versioneer.versionfile_source = ' . This configuration") - print("now lives in setup.cfg, and should be removed from setup.py") - print("") - errors += 1 - return errors - - -def setup_command() -> NoReturn: - """Set up Versioneer and exit with appropriate error code.""" - errors = do_setup() - errors += scan_setup_py() - sys.exit(1 if errors else 0) - - -if __name__ == "__main__": - cmd = sys.argv[1] - if cmd == "setup": - setup_command() From 7f0ca0cbbf121f9993e96b97f1cd84adb80115f5 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Mon, 27 Nov 2023 07:46:51 +0100 Subject: [PATCH 108/134] remove mpi4py from dependencies --- .github/workflows/deploy.yml | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 2cfd6b2a..e9212bd6 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -23,7 +23,7 @@ jobs: with: python-version: "3.11" - name: Install dependencies - run: python -m pip install --user --upgrade cloudpickle mpi4py pyzmq setuptools tqdm versioneer wheel + run: python -m pip install --user --upgrade cloudpickle pyzmq setuptools tqdm versioneer wheel - name: Convert dependencies run: | cp .ci_support/environment-old.yml environment.yml diff --git a/pyproject.toml b/pyproject.toml index 66929cb9..3b61ad72 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [build-system] -requires = ["cloudpickle", "mpi4py", "pyzmq", "setuptools", "tqdm", "versioneer[toml]==0.29"] +requires = ["cloudpickle", "pyzmq", "setuptools", "tqdm", "versioneer[toml]==0.29"] build-backend = "setuptools.build_meta" [project] From 09dae1060651fb593a076b8bb17ae9661484d031 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Jan=C3=9Fen?= Date: Mon, 27 Nov 2023 12:17:10 +0100 Subject: [PATCH 109/134] Test pip no-build-isolation option --- .github/workflows/unittest-flux.yml | 2 +- .github/workflows/unittest-mpich.yml | 2 +- .github/workflows/unittest-openmpi.yml | 2 +- .github/workflows/unittest-win.yml | 2 +- .github/workflows/unittests-old.yml | 2 +- pyproject.toml | 2 +- versioneer.py | 2277 ++++++++++++++++++++++++ 7 files changed, 2283 insertions(+), 6 deletions(-) create mode 100644 versioneer.py diff --git a/.github/workflows/unittest-flux.yml b/.github/workflows/unittest-flux.yml index 8056b3e3..b4fc60ee 100644 --- a/.github/workflows/unittest-flux.yml +++ b/.github/workflows/unittest-flux.yml @@ -43,7 +43,7 @@ jobs: timeout-minutes: 5 run: | mamba install -y flux-core coverage - pip install --no-deps . + pip install . -vv --no-deps --no-build-isolation cd tests coverage run --omit pympipool/_version.py -m unittest discover . env: diff --git a/.github/workflows/unittest-mpich.yml b/.github/workflows/unittest-mpich.yml index d192b304..bc90737f 100644 --- a/.github/workflows/unittest-mpich.yml +++ b/.github/workflows/unittest-mpich.yml @@ -55,6 +55,6 @@ jobs: shell: bash -l {0} timeout-minutes: 5 run: | - pip install --no-deps . + pip install . -vv --no-deps --no-build-isolation cd tests python -m unittest discover . diff --git a/.github/workflows/unittest-openmpi.yml b/.github/workflows/unittest-openmpi.yml index ba8cbef2..fa4a5764 100644 --- a/.github/workflows/unittest-openmpi.yml +++ b/.github/workflows/unittest-openmpi.yml @@ -55,7 +55,7 @@ jobs: shell: bash -l {0} timeout-minutes: 5 run: | - pip install --no-deps . + pip install . -vv --no-deps --no-build-isolation cd tests python -m unittest discover . env: diff --git a/.github/workflows/unittest-win.yml b/.github/workflows/unittest-win.yml index 2e548ff0..a26a0d84 100644 --- a/.github/workflows/unittest-win.yml +++ b/.github/workflows/unittest-win.yml @@ -32,6 +32,6 @@ jobs: shell: bash -l {0} timeout-minutes: 5 run: | - pip install --no-deps . + pip install . -vv --no-deps --no-build-isolation cd tests python -m unittest discover . diff --git a/.github/workflows/unittests-old.yml b/.github/workflows/unittests-old.yml index 28439812..5c2eb8da 100644 --- a/.github/workflows/unittests-old.yml +++ b/.github/workflows/unittests-old.yml @@ -27,7 +27,7 @@ jobs: shell: bash -l {0} timeout-minutes: 5 run: | - pip install --no-deps . + pip install . -vv --no-deps --no-build-isolation cd tests python -m unittest discover . env: diff --git a/pyproject.toml b/pyproject.toml index 3b61ad72..02db79b9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [build-system] -requires = ["cloudpickle", "pyzmq", "setuptools", "tqdm", "versioneer[toml]==0.29"] +requires = ["setuptools"] build-backend = "setuptools.build_meta" [project] diff --git a/versioneer.py b/versioneer.py new file mode 100644 index 00000000..1e3753e6 --- /dev/null +++ b/versioneer.py @@ -0,0 +1,2277 @@ + +# Version: 0.29 + +"""The Versioneer - like a rocketeer, but for versions. + +The Versioneer +============== + +* like a rocketeer, but for versions! +* https://github.com/python-versioneer/python-versioneer +* Brian Warner +* License: Public Domain (Unlicense) +* Compatible with: Python 3.7, 3.8, 3.9, 3.10, 3.11 and pypy3 +* [![Latest Version][pypi-image]][pypi-url] +* [![Build Status][travis-image]][travis-url] + +This is a tool for managing a recorded version number in setuptools-based +python projects. The goal is to remove the tedious and error-prone "update +the embedded version string" step from your release process. Making a new +release should be as easy as recording a new tag in your version-control +system, and maybe making new tarballs. + + +## Quick Install + +Versioneer provides two installation modes. The "classic" vendored mode installs +a copy of versioneer into your repository. The experimental build-time dependency mode +is intended to allow you to skip this step and simplify the process of upgrading. + +### Vendored mode + +* `pip install versioneer` to somewhere in your $PATH + * A [conda-forge recipe](https://github.com/conda-forge/versioneer-feedstock) is + available, so you can also use `conda install -c conda-forge versioneer` +* add a `[tool.versioneer]` section to your `pyproject.toml` or a + `[versioneer]` section to your `setup.cfg` (see [Install](INSTALL.md)) + * Note that you will need to add `tomli; python_version < "3.11"` to your + build-time dependencies if you use `pyproject.toml` +* run `versioneer install --vendor` in your source tree, commit the results +* verify version information with `python setup.py version` + +### Build-time dependency mode + +* `pip install versioneer` to somewhere in your $PATH + * A [conda-forge recipe](https://github.com/conda-forge/versioneer-feedstock) is + available, so you can also use `conda install -c conda-forge versioneer` +* add a `[tool.versioneer]` section to your `pyproject.toml` or a + `[versioneer]` section to your `setup.cfg` (see [Install](INSTALL.md)) +* add `versioneer` (with `[toml]` extra, if configuring in `pyproject.toml`) + to the `requires` key of the `build-system` table in `pyproject.toml`: + ```toml + [build-system] + requires = ["setuptools", "versioneer[toml]"] + build-backend = "setuptools.build_meta" + ``` +* run `versioneer install --no-vendor` in your source tree, commit the results +* verify version information with `python setup.py version` + +## Version Identifiers + +Source trees come from a variety of places: + +* a version-control system checkout (mostly used by developers) +* a nightly tarball, produced by build automation +* a snapshot tarball, produced by a web-based VCS browser, like github's + "tarball from tag" feature +* a release tarball, produced by "setup.py sdist", distributed through PyPI + +Within each source tree, the version identifier (either a string or a number, +this tool is format-agnostic) can come from a variety of places: + +* ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows + about recent "tags" and an absolute revision-id +* the name of the directory into which the tarball was unpacked +* an expanded VCS keyword ($Id$, etc) +* a `_version.py` created by some earlier build step + +For released software, the version identifier is closely related to a VCS +tag. Some projects use tag names that include more than just the version +string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool +needs to strip the tag prefix to extract the version identifier. For +unreleased software (between tags), the version identifier should provide +enough information to help developers recreate the same tree, while also +giving them an idea of roughly how old the tree is (after version 1.2, before +version 1.3). Many VCS systems can report a description that captures this, +for example `git describe --tags --dirty --always` reports things like +"0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the +0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has +uncommitted changes). + +The version identifier is used for multiple purposes: + +* to allow the module to self-identify its version: `myproject.__version__` +* to choose a name and prefix for a 'setup.py sdist' tarball + +## Theory of Operation + +Versioneer works by adding a special `_version.py` file into your source +tree, where your `__init__.py` can import it. This `_version.py` knows how to +dynamically ask the VCS tool for version information at import time. + +`_version.py` also contains `$Revision$` markers, and the installation +process marks `_version.py` to have this marker rewritten with a tag name +during the `git archive` command. As a result, generated tarballs will +contain enough information to get the proper version. + +To allow `setup.py` to compute a version too, a `versioneer.py` is added to +the top level of your source tree, next to `setup.py` and the `setup.cfg` +that configures it. This overrides several distutils/setuptools commands to +compute the version when invoked, and changes `setup.py build` and `setup.py +sdist` to replace `_version.py` with a small static file that contains just +the generated version data. + +## Installation + +See [INSTALL.md](./INSTALL.md) for detailed installation instructions. + +## Version-String Flavors + +Code which uses Versioneer can learn about its version string at runtime by +importing `_version` from your main `__init__.py` file and running the +`get_versions()` function. From the "outside" (e.g. in `setup.py`), you can +import the top-level `versioneer.py` and run `get_versions()`. + +Both functions return a dictionary with different flavors of version +information: + +* `['version']`: A condensed version string, rendered using the selected + style. This is the most commonly used value for the project's version + string. The default "pep440" style yields strings like `0.11`, + `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section + below for alternative styles. + +* `['full-revisionid']`: detailed revision identifier. For Git, this is the + full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac". + +* `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the + commit date in ISO 8601 format. This will be None if the date is not + available. + +* `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that + this is only accurate if run in a VCS checkout, otherwise it is likely to + be False or None + +* `['error']`: if the version string could not be computed, this will be set + to a string describing the problem, otherwise it will be None. It may be + useful to throw an exception in setup.py if this is set, to avoid e.g. + creating tarballs with a version string of "unknown". + +Some variants are more useful than others. Including `full-revisionid` in a +bug report should allow developers to reconstruct the exact code being tested +(or indicate the presence of local changes that should be shared with the +developers). `version` is suitable for display in an "about" box or a CLI +`--version` output: it can be easily compared against release notes and lists +of bugs fixed in various releases. + +The installer adds the following text to your `__init__.py` to place a basic +version in `YOURPROJECT.__version__`: + + from ._version import get_versions + __version__ = get_versions()['version'] + del get_versions + +## Styles + +The setup.cfg `style=` configuration controls how the VCS information is +rendered into a version string. + +The default style, "pep440", produces a PEP440-compliant string, equal to the +un-prefixed tag name for actual releases, and containing an additional "local +version" section with more detail for in-between builds. For Git, this is +TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags +--dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the +tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and +that this commit is two revisions ("+2") beyond the "0.11" tag. For released +software (exactly equal to a known tag), the identifier will only contain the +stripped tag, e.g. "0.11". + +Other styles are available. See [details.md](details.md) in the Versioneer +source tree for descriptions. + +## Debugging + +Versioneer tries to avoid fatal errors: if something goes wrong, it will tend +to return a version of "0+unknown". To investigate the problem, run `setup.py +version`, which will run the version-lookup code in a verbose mode, and will +display the full contents of `get_versions()` (including the `error` string, +which may help identify what went wrong). + +## Known Limitations + +Some situations are known to cause problems for Versioneer. This details the +most significant ones. More can be found on Github +[issues page](https://github.com/python-versioneer/python-versioneer/issues). + +### Subprojects + +Versioneer has limited support for source trees in which `setup.py` is not in +the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are +two common reasons why `setup.py` might not be in the root: + +* Source trees which contain multiple subprojects, such as + [Buildbot](https://github.com/buildbot/buildbot), which contains both + "master" and "slave" subprojects, each with their own `setup.py`, + `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI + distributions (and upload multiple independently-installable tarballs). +* Source trees whose main purpose is to contain a C library, but which also + provide bindings to Python (and perhaps other languages) in subdirectories. + +Versioneer will look for `.git` in parent directories, and most operations +should get the right version string. However `pip` and `setuptools` have bugs +and implementation details which frequently cause `pip install .` from a +subproject directory to fail to find a correct version string (so it usually +defaults to `0+unknown`). + +`pip install --editable .` should work correctly. `setup.py install` might +work too. + +Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in +some later version. + +[Bug #38](https://github.com/python-versioneer/python-versioneer/issues/38) is tracking +this issue. The discussion in +[PR #61](https://github.com/python-versioneer/python-versioneer/pull/61) describes the +issue from the Versioneer side in more detail. +[pip PR#3176](https://github.com/pypa/pip/pull/3176) and +[pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve +pip to let Versioneer work correctly. + +Versioneer-0.16 and earlier only looked for a `.git` directory next to the +`setup.cfg`, so subprojects were completely unsupported with those releases. + +### Editable installs with setuptools <= 18.5 + +`setup.py develop` and `pip install --editable .` allow you to install a +project into a virtualenv once, then continue editing the source code (and +test) without re-installing after every change. + +"Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a +convenient way to specify executable scripts that should be installed along +with the python package. + +These both work as expected when using modern setuptools. When using +setuptools-18.5 or earlier, however, certain operations will cause +`pkg_resources.DistributionNotFound` errors when running the entrypoint +script, which must be resolved by re-installing the package. This happens +when the install happens with one version, then the egg_info data is +regenerated while a different version is checked out. Many setup.py commands +cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into +a different virtualenv), so this can be surprising. + +[Bug #83](https://github.com/python-versioneer/python-versioneer/issues/83) describes +this one, but upgrading to a newer version of setuptools should probably +resolve it. + + +## Updating Versioneer + +To upgrade your project to a new release of Versioneer, do the following: + +* install the new Versioneer (`pip install -U versioneer` or equivalent) +* edit `setup.cfg` and `pyproject.toml`, if necessary, + to include any new configuration settings indicated by the release notes. + See [UPGRADING](./UPGRADING.md) for details. +* re-run `versioneer install --[no-]vendor` in your source tree, to replace + `SRC/_version.py` +* commit any changed files + +## Future Directions + +This tool is designed to make it easily extended to other version-control +systems: all VCS-specific components are in separate directories like +src/git/ . The top-level `versioneer.py` script is assembled from these +components by running make-versioneer.py . In the future, make-versioneer.py +will take a VCS name as an argument, and will construct a version of +`versioneer.py` that is specific to the given VCS. It might also take the +configuration arguments that are currently provided manually during +installation by editing setup.py . Alternatively, it might go the other +direction and include code from all supported VCS systems, reducing the +number of intermediate scripts. + +## Similar projects + +* [setuptools_scm](https://github.com/pypa/setuptools_scm/) - a non-vendored build-time + dependency +* [minver](https://github.com/jbweston/miniver) - a lightweight reimplementation of + versioneer +* [versioningit](https://github.com/jwodder/versioningit) - a PEP 518-based setuptools + plugin + +## License + +To make Versioneer easier to embed, all its code is dedicated to the public +domain. The `_version.py` that it creates is also in the public domain. +Specifically, both are released under the "Unlicense", as described in +https://unlicense.org/. + +[pypi-image]: https://img.shields.io/pypi/v/versioneer.svg +[pypi-url]: https://pypi.python.org/pypi/versioneer/ +[travis-image]: +https://img.shields.io/travis/com/python-versioneer/python-versioneer.svg +[travis-url]: https://travis-ci.com/github/python-versioneer/python-versioneer + +""" +# pylint:disable=invalid-name,import-outside-toplevel,missing-function-docstring +# pylint:disable=missing-class-docstring,too-many-branches,too-many-statements +# pylint:disable=raise-missing-from,too-many-lines,too-many-locals,import-error +# pylint:disable=too-few-public-methods,redefined-outer-name,consider-using-with +# pylint:disable=attribute-defined-outside-init,too-many-arguments + +import configparser +import errno +import json +import os +import re +import subprocess +import sys +from pathlib import Path +from typing import Any, Callable, cast, Dict, List, Optional, Tuple, Union +from typing import NoReturn +import functools + +have_tomllib = True +if sys.version_info >= (3, 11): + import tomllib +else: + try: + import tomli as tomllib + except ImportError: + have_tomllib = False + + +class VersioneerConfig: + """Container for Versioneer configuration parameters.""" + + VCS: str + style: str + tag_prefix: str + versionfile_source: str + versionfile_build: Optional[str] + parentdir_prefix: Optional[str] + verbose: Optional[bool] + + +def get_root() -> str: + """Get the project root directory. + + We require that all commands are run from the project root, i.e. the + directory that contains setup.py, setup.cfg, and versioneer.py . + """ + root = os.path.realpath(os.path.abspath(os.getcwd())) + setup_py = os.path.join(root, "setup.py") + pyproject_toml = os.path.join(root, "pyproject.toml") + versioneer_py = os.path.join(root, "versioneer.py") + if not ( + os.path.exists(setup_py) + or os.path.exists(pyproject_toml) + or os.path.exists(versioneer_py) + ): + # allow 'python path/to/setup.py COMMAND' + root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0]))) + setup_py = os.path.join(root, "setup.py") + pyproject_toml = os.path.join(root, "pyproject.toml") + versioneer_py = os.path.join(root, "versioneer.py") + if not ( + os.path.exists(setup_py) + or os.path.exists(pyproject_toml) + or os.path.exists(versioneer_py) + ): + err = ("Versioneer was unable to run the project root directory. " + "Versioneer requires setup.py to be executed from " + "its immediate directory (like 'python setup.py COMMAND'), " + "or in a way that lets it use sys.argv[0] to find the root " + "(like 'python path/to/setup.py COMMAND').") + raise VersioneerBadRootError(err) + try: + # Certain runtime workflows (setup.py install/develop in a setuptools + # tree) execute all dependencies in a single python process, so + # "versioneer" may be imported multiple times, and python's shared + # module-import table will cache the first one. So we can't use + # os.path.dirname(__file__), as that will find whichever + # versioneer.py was first imported, even in later projects. + my_path = os.path.realpath(os.path.abspath(__file__)) + me_dir = os.path.normcase(os.path.splitext(my_path)[0]) + vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0]) + if me_dir != vsr_dir and "VERSIONEER_PEP518" not in globals(): + print("Warning: build in %s is using versioneer.py from %s" + % (os.path.dirname(my_path), versioneer_py)) + except NameError: + pass + return root + + +def get_config_from_root(root: str) -> VersioneerConfig: + """Read the project setup.cfg file to determine Versioneer config.""" + # This might raise OSError (if setup.cfg is missing), or + # configparser.NoSectionError (if it lacks a [versioneer] section), or + # configparser.NoOptionError (if it lacks "VCS="). See the docstring at + # the top of versioneer.py for instructions on writing your setup.cfg . + root_pth = Path(root) + pyproject_toml = root_pth / "pyproject.toml" + setup_cfg = root_pth / "setup.cfg" + section: Union[Dict[str, Any], configparser.SectionProxy, None] = None + if pyproject_toml.exists() and have_tomllib: + try: + with open(pyproject_toml, 'rb') as fobj: + pp = tomllib.load(fobj) + section = pp['tool']['versioneer'] + except (tomllib.TOMLDecodeError, KeyError) as e: + print(f"Failed to load config from {pyproject_toml}: {e}") + print("Try to load it from setup.cfg") + if not section: + parser = configparser.ConfigParser() + with open(setup_cfg) as cfg_file: + parser.read_file(cfg_file) + parser.get("versioneer", "VCS") # raise error if missing + + section = parser["versioneer"] + + # `cast`` really shouldn't be used, but its simplest for the + # common VersioneerConfig users at the moment. We verify against + # `None` values elsewhere where it matters + + cfg = VersioneerConfig() + cfg.VCS = section['VCS'] + cfg.style = section.get("style", "") + cfg.versionfile_source = cast(str, section.get("versionfile_source")) + cfg.versionfile_build = section.get("versionfile_build") + cfg.tag_prefix = cast(str, section.get("tag_prefix")) + if cfg.tag_prefix in ("''", '""', None): + cfg.tag_prefix = "" + cfg.parentdir_prefix = section.get("parentdir_prefix") + if isinstance(section, configparser.SectionProxy): + # Make sure configparser translates to bool + cfg.verbose = section.getboolean("verbose") + else: + cfg.verbose = section.get("verbose") + + return cfg + + +class NotThisMethod(Exception): + """Exception raised if a method is not valid for the current scenario.""" + + +# these dictionaries contain VCS-specific tools +LONG_VERSION_PY: Dict[str, str] = {} +HANDLERS: Dict[str, Dict[str, Callable]] = {} + + +def register_vcs_handler(vcs: str, method: str) -> Callable: # decorator + """Create decorator to mark a method as the handler of a VCS.""" + def decorate(f: Callable) -> Callable: + """Store f in HANDLERS[vcs][method].""" + HANDLERS.setdefault(vcs, {})[method] = f + return f + return decorate + + +def run_command( + commands: List[str], + args: List[str], + cwd: Optional[str] = None, + verbose: bool = False, + hide_stderr: bool = False, + env: Optional[Dict[str, str]] = None, +) -> Tuple[Optional[str], Optional[int]]: + """Call the given command(s).""" + assert isinstance(commands, list) + process = None + + popen_kwargs: Dict[str, Any] = {} + if sys.platform == "win32": + # This hides the console window if pythonw.exe is used + startupinfo = subprocess.STARTUPINFO() + startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW + popen_kwargs["startupinfo"] = startupinfo + + for command in commands: + try: + dispcmd = str([command] + args) + # remember shell=False, so use git.cmd on windows, not just git + process = subprocess.Popen([command] + args, cwd=cwd, env=env, + stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr + else None), **popen_kwargs) + break + except OSError as e: + if e.errno == errno.ENOENT: + continue + if verbose: + print("unable to run %s" % dispcmd) + print(e) + return None, None + else: + if verbose: + print("unable to find command, tried %s" % (commands,)) + return None, None + stdout = process.communicate()[0].strip().decode() + if process.returncode != 0: + if verbose: + print("unable to run %s (error)" % dispcmd) + print("stdout was %s" % stdout) + return None, process.returncode + return stdout, process.returncode + + +LONG_VERSION_PY['git'] = r''' +# This file helps to compute a version number in source trees obtained from +# git-archive tarball (such as those provided by githubs download-from-tag +# feature). Distribution tarballs (built by setup.py sdist) and build +# directories (produced by setup.py build) will contain a much shorter file +# that just contains the computed version number. + +# This file is released into the public domain. +# Generated by versioneer-0.29 +# https://github.com/python-versioneer/python-versioneer + +"""Git implementation of _version.py.""" + +import errno +import os +import re +import subprocess +import sys +from typing import Any, Callable, Dict, List, Optional, Tuple +import functools + + +def get_keywords() -> Dict[str, str]: + """Get the keywords needed to look up the version information.""" + # these strings will be replaced by git during git-archive. + # setup.py/versioneer.py will grep for the variable names, so they must + # each be defined on a line of their own. _version.py will just call + # get_keywords(). + git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s" + git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s" + git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s" + keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} + return keywords + + +class VersioneerConfig: + """Container for Versioneer configuration parameters.""" + + VCS: str + style: str + tag_prefix: str + parentdir_prefix: str + versionfile_source: str + verbose: bool + + +def get_config() -> VersioneerConfig: + """Create, populate and return the VersioneerConfig() object.""" + # these strings are filled in when 'setup.py versioneer' creates + # _version.py + cfg = VersioneerConfig() + cfg.VCS = "git" + cfg.style = "%(STYLE)s" + cfg.tag_prefix = "%(TAG_PREFIX)s" + cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s" + cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s" + cfg.verbose = False + return cfg + + +class NotThisMethod(Exception): + """Exception raised if a method is not valid for the current scenario.""" + + +LONG_VERSION_PY: Dict[str, str] = {} +HANDLERS: Dict[str, Dict[str, Callable]] = {} + + +def register_vcs_handler(vcs: str, method: str) -> Callable: # decorator + """Create decorator to mark a method as the handler of a VCS.""" + def decorate(f: Callable) -> Callable: + """Store f in HANDLERS[vcs][method].""" + if vcs not in HANDLERS: + HANDLERS[vcs] = {} + HANDLERS[vcs][method] = f + return f + return decorate + + +def run_command( + commands: List[str], + args: List[str], + cwd: Optional[str] = None, + verbose: bool = False, + hide_stderr: bool = False, + env: Optional[Dict[str, str]] = None, +) -> Tuple[Optional[str], Optional[int]]: + """Call the given command(s).""" + assert isinstance(commands, list) + process = None + + popen_kwargs: Dict[str, Any] = {} + if sys.platform == "win32": + # This hides the console window if pythonw.exe is used + startupinfo = subprocess.STARTUPINFO() + startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW + popen_kwargs["startupinfo"] = startupinfo + + for command in commands: + try: + dispcmd = str([command] + args) + # remember shell=False, so use git.cmd on windows, not just git + process = subprocess.Popen([command] + args, cwd=cwd, env=env, + stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr + else None), **popen_kwargs) + break + except OSError as e: + if e.errno == errno.ENOENT: + continue + if verbose: + print("unable to run %%s" %% dispcmd) + print(e) + return None, None + else: + if verbose: + print("unable to find command, tried %%s" %% (commands,)) + return None, None + stdout = process.communicate()[0].strip().decode() + if process.returncode != 0: + if verbose: + print("unable to run %%s (error)" %% dispcmd) + print("stdout was %%s" %% stdout) + return None, process.returncode + return stdout, process.returncode + + +def versions_from_parentdir( + parentdir_prefix: str, + root: str, + verbose: bool, +) -> Dict[str, Any]: + """Try to determine the version from the parent directory name. + + Source tarballs conventionally unpack into a directory that includes both + the project name and a version string. We will also support searching up + two directory levels for an appropriately named parent directory + """ + rootdirs = [] + + for _ in range(3): + dirname = os.path.basename(root) + if dirname.startswith(parentdir_prefix): + return {"version": dirname[len(parentdir_prefix):], + "full-revisionid": None, + "dirty": False, "error": None, "date": None} + rootdirs.append(root) + root = os.path.dirname(root) # up a level + + if verbose: + print("Tried directories %%s but none started with prefix %%s" %% + (str(rootdirs), parentdir_prefix)) + raise NotThisMethod("rootdir doesn't start with parentdir_prefix") + + +@register_vcs_handler("git", "get_keywords") +def git_get_keywords(versionfile_abs: str) -> Dict[str, str]: + """Extract version information from the given file.""" + # the code embedded in _version.py can just fetch the value of these + # keywords. When used from setup.py, we don't want to import _version.py, + # so we do it with a regexp instead. This function is not used from + # _version.py. + keywords: Dict[str, str] = {} + try: + with open(versionfile_abs, "r") as fobj: + for line in fobj: + if line.strip().startswith("git_refnames ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["refnames"] = mo.group(1) + if line.strip().startswith("git_full ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["full"] = mo.group(1) + if line.strip().startswith("git_date ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["date"] = mo.group(1) + except OSError: + pass + return keywords + + +@register_vcs_handler("git", "keywords") +def git_versions_from_keywords( + keywords: Dict[str, str], + tag_prefix: str, + verbose: bool, +) -> Dict[str, Any]: + """Get version information from git keywords.""" + if "refnames" not in keywords: + raise NotThisMethod("Short version file found") + date = keywords.get("date") + if date is not None: + # Use only the last line. Previous lines may contain GPG signature + # information. + date = date.splitlines()[-1] + + # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant + # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601 + # -like" string, which we must then edit to make compliant), because + # it's been around since git-1.5.3, and it's too difficult to + # discover which version we're using, or to work around using an + # older one. + date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) + refnames = keywords["refnames"].strip() + if refnames.startswith("$Format"): + if verbose: + print("keywords are unexpanded, not using") + raise NotThisMethod("unexpanded keywords, not a git-archive tarball") + refs = {r.strip() for r in refnames.strip("()").split(",")} + # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of + # just "foo-1.0". If we see a "tag: " prefix, prefer those. + TAG = "tag: " + tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} + if not tags: + # Either we're using git < 1.8.3, or there really are no tags. We use + # a heuristic: assume all version tags have a digit. The old git %%d + # expansion behaves like git log --decorate=short and strips out the + # refs/heads/ and refs/tags/ prefixes that would let us distinguish + # between branches and tags. By ignoring refnames without digits, we + # filter out many common branch names like "release" and + # "stabilization", as well as "HEAD" and "master". + tags = {r for r in refs if re.search(r'\d', r)} + if verbose: + print("discarding '%%s', no digits" %% ",".join(refs - tags)) + if verbose: + print("likely tags: %%s" %% ",".join(sorted(tags))) + for ref in sorted(tags): + # sorting will prefer e.g. "2.0" over "2.0rc1" + if ref.startswith(tag_prefix): + r = ref[len(tag_prefix):] + # Filter out refs that exactly match prefix or that don't start + # with a number once the prefix is stripped (mostly a concern + # when prefix is '') + if not re.match(r'\d', r): + continue + if verbose: + print("picking %%s" %% r) + return {"version": r, + "full-revisionid": keywords["full"].strip(), + "dirty": False, "error": None, + "date": date} + # no suitable tags, so version is "0+unknown", but full hex is still there + if verbose: + print("no suitable tags, using unknown + full revision id") + return {"version": "0+unknown", + "full-revisionid": keywords["full"].strip(), + "dirty": False, "error": "no suitable tags", "date": None} + + +@register_vcs_handler("git", "pieces_from_vcs") +def git_pieces_from_vcs( + tag_prefix: str, + root: str, + verbose: bool, + runner: Callable = run_command +) -> Dict[str, Any]: + """Get version from 'git describe' in the root of the source tree. + + This only gets called if the git-archive 'subst' keywords were *not* + expanded, and _version.py hasn't already been rewritten with a short + version string, meaning we're inside a checked out source tree. + """ + GITS = ["git"] + if sys.platform == "win32": + GITS = ["git.cmd", "git.exe"] + + # GIT_DIR can interfere with correct operation of Versioneer. + # It may be intended to be passed to the Versioneer-versioned project, + # but that should not change where we get our version from. + env = os.environ.copy() + env.pop("GIT_DIR", None) + runner = functools.partial(runner, env=env) + + _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, + hide_stderr=not verbose) + if rc != 0: + if verbose: + print("Directory %%s not under git control" %% root) + raise NotThisMethod("'git rev-parse --git-dir' returned error") + + # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] + # if there isn't one, this yields HEX[-dirty] (no NUM) + describe_out, rc = runner(GITS, [ + "describe", "--tags", "--dirty", "--always", "--long", + "--match", f"{tag_prefix}[[:digit:]]*" + ], cwd=root) + # --long was added in git-1.5.5 + if describe_out is None: + raise NotThisMethod("'git describe' failed") + describe_out = describe_out.strip() + full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) + if full_out is None: + raise NotThisMethod("'git rev-parse' failed") + full_out = full_out.strip() + + pieces: Dict[str, Any] = {} + pieces["long"] = full_out + pieces["short"] = full_out[:7] # maybe improved later + pieces["error"] = None + + branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], + cwd=root) + # --abbrev-ref was added in git-1.6.3 + if rc != 0 or branch_name is None: + raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") + branch_name = branch_name.strip() + + if branch_name == "HEAD": + # If we aren't exactly on a branch, pick a branch which represents + # the current commit. If all else fails, we are on a branchless + # commit. + branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) + # --contains was added in git-1.5.4 + if rc != 0 or branches is None: + raise NotThisMethod("'git branch --contains' returned error") + branches = branches.split("\n") + + # Remove the first line if we're running detached + if "(" in branches[0]: + branches.pop(0) + + # Strip off the leading "* " from the list of branches. + branches = [branch[2:] for branch in branches] + if "master" in branches: + branch_name = "master" + elif not branches: + branch_name = None + else: + # Pick the first branch that is returned. Good or bad. + branch_name = branches[0] + + pieces["branch"] = branch_name + + # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] + # TAG might have hyphens. + git_describe = describe_out + + # look for -dirty suffix + dirty = git_describe.endswith("-dirty") + pieces["dirty"] = dirty + if dirty: + git_describe = git_describe[:git_describe.rindex("-dirty")] + + # now we have TAG-NUM-gHEX or HEX + + if "-" in git_describe: + # TAG-NUM-gHEX + mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) + if not mo: + # unparsable. Maybe git-describe is misbehaving? + pieces["error"] = ("unable to parse git-describe output: '%%s'" + %% describe_out) + return pieces + + # tag + full_tag = mo.group(1) + if not full_tag.startswith(tag_prefix): + if verbose: + fmt = "tag '%%s' doesn't start with prefix '%%s'" + print(fmt %% (full_tag, tag_prefix)) + pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'" + %% (full_tag, tag_prefix)) + return pieces + pieces["closest-tag"] = full_tag[len(tag_prefix):] + + # distance: number of commits since tag + pieces["distance"] = int(mo.group(2)) + + # commit: short hex revision ID + pieces["short"] = mo.group(3) + + else: + # HEX: no tags + pieces["closest-tag"] = None + out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root) + pieces["distance"] = len(out.split()) # total number of commits + + # commit date: see ISO-8601 comment in git_versions_from_keywords() + date = runner(GITS, ["show", "-s", "--format=%%ci", "HEAD"], cwd=root)[0].strip() + # Use only the last line. Previous lines may contain GPG signature + # information. + date = date.splitlines()[-1] + pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) + + return pieces + + +def plus_or_dot(pieces: Dict[str, Any]) -> str: + """Return a + if we don't already have one, else return a .""" + if "+" in pieces.get("closest-tag", ""): + return "." + return "+" + + +def render_pep440(pieces: Dict[str, Any]) -> str: + """Build up version string, with post-release "local version identifier". + + Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you + get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty + + Exceptions: + 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += plus_or_dot(pieces) + rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"], + pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def render_pep440_branch(pieces: Dict[str, Any]) -> str: + """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . + + The ".dev0" means not master branch. Note that .dev0 sorts backwards + (a feature branch will appear "older" than the master branch). + + Exceptions: + 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0" + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += "+untagged.%%d.g%%s" %% (pieces["distance"], + pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]: + """Split pep440 version string at the post-release segment. + + Returns the release segments before the post-release and the + post-release version number (or -1 if no post-release segment is present). + """ + vc = str.split(ver, ".post") + return vc[0], int(vc[1] or 0) if len(vc) == 2 else None + + +def render_pep440_pre(pieces: Dict[str, Any]) -> str: + """TAG[.postN.devDISTANCE] -- No -dirty. + + Exceptions: + 1: no tags. 0.post0.devDISTANCE + """ + if pieces["closest-tag"]: + if pieces["distance"]: + # update the post release segment + tag_version, post_version = pep440_split_post(pieces["closest-tag"]) + rendered = tag_version + if post_version is not None: + rendered += ".post%%d.dev%%d" %% (post_version + 1, pieces["distance"]) + else: + rendered += ".post0.dev%%d" %% (pieces["distance"]) + else: + # no commits, use the tag as the version + rendered = pieces["closest-tag"] + else: + # exception #1 + rendered = "0.post0.dev%%d" %% pieces["distance"] + return rendered + + +def render_pep440_post(pieces: Dict[str, Any]) -> str: + """TAG[.postDISTANCE[.dev0]+gHEX] . + + The ".dev0" means dirty. Note that .dev0 sorts backwards + (a dirty tree will appear "older" than the corresponding clean one), + but you shouldn't be releasing software with -dirty anyways. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%%d" %% pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "g%%s" %% pieces["short"] + else: + # exception #1 + rendered = "0.post%%d" %% pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += "+g%%s" %% pieces["short"] + return rendered + + +def render_pep440_post_branch(pieces: Dict[str, Any]) -> str: + """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . + + The ".dev0" means not master branch. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%%d" %% pieces["distance"] + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "g%%s" %% pieces["short"] + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0.post%%d" %% pieces["distance"] + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += "+g%%s" %% pieces["short"] + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def render_pep440_old(pieces: Dict[str, Any]) -> str: + """TAG[.postDISTANCE[.dev0]] . + + The ".dev0" means dirty. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%%d" %% pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + else: + # exception #1 + rendered = "0.post%%d" %% pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + return rendered + + +def render_git_describe(pieces: Dict[str, Any]) -> str: + """TAG[-DISTANCE-gHEX][-dirty]. + + Like 'git describe --tags --dirty --always'. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"]: + rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render_git_describe_long(pieces: Dict[str, Any]) -> str: + """TAG-DISTANCE-gHEX[-dirty]. + + Like 'git describe --tags --dirty --always -long'. + The distance/hash is unconditional. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]: + """Render the given version pieces into the requested style.""" + if pieces["error"]: + return {"version": "unknown", + "full-revisionid": pieces.get("long"), + "dirty": None, + "error": pieces["error"], + "date": None} + + if not style or style == "default": + style = "pep440" # the default + + if style == "pep440": + rendered = render_pep440(pieces) + elif style == "pep440-branch": + rendered = render_pep440_branch(pieces) + elif style == "pep440-pre": + rendered = render_pep440_pre(pieces) + elif style == "pep440-post": + rendered = render_pep440_post(pieces) + elif style == "pep440-post-branch": + rendered = render_pep440_post_branch(pieces) + elif style == "pep440-old": + rendered = render_pep440_old(pieces) + elif style == "git-describe": + rendered = render_git_describe(pieces) + elif style == "git-describe-long": + rendered = render_git_describe_long(pieces) + else: + raise ValueError("unknown style '%%s'" %% style) + + return {"version": rendered, "full-revisionid": pieces["long"], + "dirty": pieces["dirty"], "error": None, + "date": pieces.get("date")} + + +def get_versions() -> Dict[str, Any]: + """Get version information or return default if unable to do so.""" + # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have + # __file__, we can work backwards from there to the root. Some + # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which + # case we can only use expanded keywords. + + cfg = get_config() + verbose = cfg.verbose + + try: + return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, + verbose) + except NotThisMethod: + pass + + try: + root = os.path.realpath(__file__) + # versionfile_source is the relative path from the top of the source + # tree (where the .git directory might live) to this file. Invert + # this to find the root from __file__. + for _ in cfg.versionfile_source.split('/'): + root = os.path.dirname(root) + except NameError: + return {"version": "0+unknown", "full-revisionid": None, + "dirty": None, + "error": "unable to find root of source tree", + "date": None} + + try: + pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) + return render(pieces, cfg.style) + except NotThisMethod: + pass + + try: + if cfg.parentdir_prefix: + return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) + except NotThisMethod: + pass + + return {"version": "0+unknown", "full-revisionid": None, + "dirty": None, + "error": "unable to compute version", "date": None} +''' + + +@register_vcs_handler("git", "get_keywords") +def git_get_keywords(versionfile_abs: str) -> Dict[str, str]: + """Extract version information from the given file.""" + # the code embedded in _version.py can just fetch the value of these + # keywords. When used from setup.py, we don't want to import _version.py, + # so we do it with a regexp instead. This function is not used from + # _version.py. + keywords: Dict[str, str] = {} + try: + with open(versionfile_abs, "r") as fobj: + for line in fobj: + if line.strip().startswith("git_refnames ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["refnames"] = mo.group(1) + if line.strip().startswith("git_full ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["full"] = mo.group(1) + if line.strip().startswith("git_date ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["date"] = mo.group(1) + except OSError: + pass + return keywords + + +@register_vcs_handler("git", "keywords") +def git_versions_from_keywords( + keywords: Dict[str, str], + tag_prefix: str, + verbose: bool, +) -> Dict[str, Any]: + """Get version information from git keywords.""" + if "refnames" not in keywords: + raise NotThisMethod("Short version file found") + date = keywords.get("date") + if date is not None: + # Use only the last line. Previous lines may contain GPG signature + # information. + date = date.splitlines()[-1] + + # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant + # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 + # -like" string, which we must then edit to make compliant), because + # it's been around since git-1.5.3, and it's too difficult to + # discover which version we're using, or to work around using an + # older one. + date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) + refnames = keywords["refnames"].strip() + if refnames.startswith("$Format"): + if verbose: + print("keywords are unexpanded, not using") + raise NotThisMethod("unexpanded keywords, not a git-archive tarball") + refs = {r.strip() for r in refnames.strip("()").split(",")} + # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of + # just "foo-1.0". If we see a "tag: " prefix, prefer those. + TAG = "tag: " + tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} + if not tags: + # Either we're using git < 1.8.3, or there really are no tags. We use + # a heuristic: assume all version tags have a digit. The old git %d + # expansion behaves like git log --decorate=short and strips out the + # refs/heads/ and refs/tags/ prefixes that would let us distinguish + # between branches and tags. By ignoring refnames without digits, we + # filter out many common branch names like "release" and + # "stabilization", as well as "HEAD" and "master". + tags = {r for r in refs if re.search(r'\d', r)} + if verbose: + print("discarding '%s', no digits" % ",".join(refs - tags)) + if verbose: + print("likely tags: %s" % ",".join(sorted(tags))) + for ref in sorted(tags): + # sorting will prefer e.g. "2.0" over "2.0rc1" + if ref.startswith(tag_prefix): + r = ref[len(tag_prefix):] + # Filter out refs that exactly match prefix or that don't start + # with a number once the prefix is stripped (mostly a concern + # when prefix is '') + if not re.match(r'\d', r): + continue + if verbose: + print("picking %s" % r) + return {"version": r, + "full-revisionid": keywords["full"].strip(), + "dirty": False, "error": None, + "date": date} + # no suitable tags, so version is "0+unknown", but full hex is still there + if verbose: + print("no suitable tags, using unknown + full revision id") + return {"version": "0+unknown", + "full-revisionid": keywords["full"].strip(), + "dirty": False, "error": "no suitable tags", "date": None} + + +@register_vcs_handler("git", "pieces_from_vcs") +def git_pieces_from_vcs( + tag_prefix: str, + root: str, + verbose: bool, + runner: Callable = run_command +) -> Dict[str, Any]: + """Get version from 'git describe' in the root of the source tree. + + This only gets called if the git-archive 'subst' keywords were *not* + expanded, and _version.py hasn't already been rewritten with a short + version string, meaning we're inside a checked out source tree. + """ + GITS = ["git"] + if sys.platform == "win32": + GITS = ["git.cmd", "git.exe"] + + # GIT_DIR can interfere with correct operation of Versioneer. + # It may be intended to be passed to the Versioneer-versioned project, + # but that should not change where we get our version from. + env = os.environ.copy() + env.pop("GIT_DIR", None) + runner = functools.partial(runner, env=env) + + _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, + hide_stderr=not verbose) + if rc != 0: + if verbose: + print("Directory %s not under git control" % root) + raise NotThisMethod("'git rev-parse --git-dir' returned error") + + # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] + # if there isn't one, this yields HEX[-dirty] (no NUM) + describe_out, rc = runner(GITS, [ + "describe", "--tags", "--dirty", "--always", "--long", + "--match", f"{tag_prefix}[[:digit:]]*" + ], cwd=root) + # --long was added in git-1.5.5 + if describe_out is None: + raise NotThisMethod("'git describe' failed") + describe_out = describe_out.strip() + full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) + if full_out is None: + raise NotThisMethod("'git rev-parse' failed") + full_out = full_out.strip() + + pieces: Dict[str, Any] = {} + pieces["long"] = full_out + pieces["short"] = full_out[:7] # maybe improved later + pieces["error"] = None + + branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], + cwd=root) + # --abbrev-ref was added in git-1.6.3 + if rc != 0 or branch_name is None: + raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") + branch_name = branch_name.strip() + + if branch_name == "HEAD": + # If we aren't exactly on a branch, pick a branch which represents + # the current commit. If all else fails, we are on a branchless + # commit. + branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) + # --contains was added in git-1.5.4 + if rc != 0 or branches is None: + raise NotThisMethod("'git branch --contains' returned error") + branches = branches.split("\n") + + # Remove the first line if we're running detached + if "(" in branches[0]: + branches.pop(0) + + # Strip off the leading "* " from the list of branches. + branches = [branch[2:] for branch in branches] + if "master" in branches: + branch_name = "master" + elif not branches: + branch_name = None + else: + # Pick the first branch that is returned. Good or bad. + branch_name = branches[0] + + pieces["branch"] = branch_name + + # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] + # TAG might have hyphens. + git_describe = describe_out + + # look for -dirty suffix + dirty = git_describe.endswith("-dirty") + pieces["dirty"] = dirty + if dirty: + git_describe = git_describe[:git_describe.rindex("-dirty")] + + # now we have TAG-NUM-gHEX or HEX + + if "-" in git_describe: + # TAG-NUM-gHEX + mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) + if not mo: + # unparsable. Maybe git-describe is misbehaving? + pieces["error"] = ("unable to parse git-describe output: '%s'" + % describe_out) + return pieces + + # tag + full_tag = mo.group(1) + if not full_tag.startswith(tag_prefix): + if verbose: + fmt = "tag '%s' doesn't start with prefix '%s'" + print(fmt % (full_tag, tag_prefix)) + pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" + % (full_tag, tag_prefix)) + return pieces + pieces["closest-tag"] = full_tag[len(tag_prefix):] + + # distance: number of commits since tag + pieces["distance"] = int(mo.group(2)) + + # commit: short hex revision ID + pieces["short"] = mo.group(3) + + else: + # HEX: no tags + pieces["closest-tag"] = None + out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root) + pieces["distance"] = len(out.split()) # total number of commits + + # commit date: see ISO-8601 comment in git_versions_from_keywords() + date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip() + # Use only the last line. Previous lines may contain GPG signature + # information. + date = date.splitlines()[-1] + pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) + + return pieces + + +def do_vcs_install(versionfile_source: str, ipy: Optional[str]) -> None: + """Git-specific installation logic for Versioneer. + + For Git, this means creating/changing .gitattributes to mark _version.py + for export-subst keyword substitution. + """ + GITS = ["git"] + if sys.platform == "win32": + GITS = ["git.cmd", "git.exe"] + files = [versionfile_source] + if ipy: + files.append(ipy) + if "VERSIONEER_PEP518" not in globals(): + try: + my_path = __file__ + if my_path.endswith((".pyc", ".pyo")): + my_path = os.path.splitext(my_path)[0] + ".py" + versioneer_file = os.path.relpath(my_path) + except NameError: + versioneer_file = "versioneer.py" + files.append(versioneer_file) + present = False + try: + with open(".gitattributes", "r") as fobj: + for line in fobj: + if line.strip().startswith(versionfile_source): + if "export-subst" in line.strip().split()[1:]: + present = True + break + except OSError: + pass + if not present: + with open(".gitattributes", "a+") as fobj: + fobj.write(f"{versionfile_source} export-subst\n") + files.append(".gitattributes") + run_command(GITS, ["add", "--"] + files) + + +def versions_from_parentdir( + parentdir_prefix: str, + root: str, + verbose: bool, +) -> Dict[str, Any]: + """Try to determine the version from the parent directory name. + + Source tarballs conventionally unpack into a directory that includes both + the project name and a version string. We will also support searching up + two directory levels for an appropriately named parent directory + """ + rootdirs = [] + + for _ in range(3): + dirname = os.path.basename(root) + if dirname.startswith(parentdir_prefix): + return {"version": dirname[len(parentdir_prefix):], + "full-revisionid": None, + "dirty": False, "error": None, "date": None} + rootdirs.append(root) + root = os.path.dirname(root) # up a level + + if verbose: + print("Tried directories %s but none started with prefix %s" % + (str(rootdirs), parentdir_prefix)) + raise NotThisMethod("rootdir doesn't start with parentdir_prefix") + + +SHORT_VERSION_PY = """ +# This file was generated by 'versioneer.py' (0.29) from +# revision-control system data, or from the parent directory name of an +# unpacked source archive. Distribution tarballs contain a pre-generated copy +# of this file. + +import json + +version_json = ''' +%s +''' # END VERSION_JSON + + +def get_versions(): + return json.loads(version_json) +""" + + +def versions_from_file(filename: str) -> Dict[str, Any]: + """Try to determine the version from _version.py if present.""" + try: + with open(filename) as f: + contents = f.read() + except OSError: + raise NotThisMethod("unable to read _version.py") + mo = re.search(r"version_json = '''\n(.*)''' # END VERSION_JSON", + contents, re.M | re.S) + if not mo: + mo = re.search(r"version_json = '''\r\n(.*)''' # END VERSION_JSON", + contents, re.M | re.S) + if not mo: + raise NotThisMethod("no version_json in _version.py") + return json.loads(mo.group(1)) + + +def write_to_version_file(filename: str, versions: Dict[str, Any]) -> None: + """Write the given version number to the given _version.py file.""" + contents = json.dumps(versions, sort_keys=True, + indent=1, separators=(",", ": ")) + with open(filename, "w") as f: + f.write(SHORT_VERSION_PY % contents) + + print("set %s to '%s'" % (filename, versions["version"])) + + +def plus_or_dot(pieces: Dict[str, Any]) -> str: + """Return a + if we don't already have one, else return a .""" + if "+" in pieces.get("closest-tag", ""): + return "." + return "+" + + +def render_pep440(pieces: Dict[str, Any]) -> str: + """Build up version string, with post-release "local version identifier". + + Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you + get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty + + Exceptions: + 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += plus_or_dot(pieces) + rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0+untagged.%d.g%s" % (pieces["distance"], + pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def render_pep440_branch(pieces: Dict[str, Any]) -> str: + """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . + + The ".dev0" means not master branch. Note that .dev0 sorts backwards + (a feature branch will appear "older" than the master branch). + + Exceptions: + 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0" + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += "+untagged.%d.g%s" % (pieces["distance"], + pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]: + """Split pep440 version string at the post-release segment. + + Returns the release segments before the post-release and the + post-release version number (or -1 if no post-release segment is present). + """ + vc = str.split(ver, ".post") + return vc[0], int(vc[1] or 0) if len(vc) == 2 else None + + +def render_pep440_pre(pieces: Dict[str, Any]) -> str: + """TAG[.postN.devDISTANCE] -- No -dirty. + + Exceptions: + 1: no tags. 0.post0.devDISTANCE + """ + if pieces["closest-tag"]: + if pieces["distance"]: + # update the post release segment + tag_version, post_version = pep440_split_post(pieces["closest-tag"]) + rendered = tag_version + if post_version is not None: + rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"]) + else: + rendered += ".post0.dev%d" % (pieces["distance"]) + else: + # no commits, use the tag as the version + rendered = pieces["closest-tag"] + else: + # exception #1 + rendered = "0.post0.dev%d" % pieces["distance"] + return rendered + + +def render_pep440_post(pieces: Dict[str, Any]) -> str: + """TAG[.postDISTANCE[.dev0]+gHEX] . + + The ".dev0" means dirty. Note that .dev0 sorts backwards + (a dirty tree will appear "older" than the corresponding clean one), + but you shouldn't be releasing software with -dirty anyways. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "g%s" % pieces["short"] + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += "+g%s" % pieces["short"] + return rendered + + +def render_pep440_post_branch(pieces: Dict[str, Any]) -> str: + """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . + + The ".dev0" means not master branch. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "g%s" % pieces["short"] + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += "+g%s" % pieces["short"] + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def render_pep440_old(pieces: Dict[str, Any]) -> str: + """TAG[.postDISTANCE[.dev0]] . + + The ".dev0" means dirty. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + return rendered + + +def render_git_describe(pieces: Dict[str, Any]) -> str: + """TAG[-DISTANCE-gHEX][-dirty]. + + Like 'git describe --tags --dirty --always'. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"]: + rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render_git_describe_long(pieces: Dict[str, Any]) -> str: + """TAG-DISTANCE-gHEX[-dirty]. + + Like 'git describe --tags --dirty --always -long'. + The distance/hash is unconditional. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]: + """Render the given version pieces into the requested style.""" + if pieces["error"]: + return {"version": "unknown", + "full-revisionid": pieces.get("long"), + "dirty": None, + "error": pieces["error"], + "date": None} + + if not style or style == "default": + style = "pep440" # the default + + if style == "pep440": + rendered = render_pep440(pieces) + elif style == "pep440-branch": + rendered = render_pep440_branch(pieces) + elif style == "pep440-pre": + rendered = render_pep440_pre(pieces) + elif style == "pep440-post": + rendered = render_pep440_post(pieces) + elif style == "pep440-post-branch": + rendered = render_pep440_post_branch(pieces) + elif style == "pep440-old": + rendered = render_pep440_old(pieces) + elif style == "git-describe": + rendered = render_git_describe(pieces) + elif style == "git-describe-long": + rendered = render_git_describe_long(pieces) + else: + raise ValueError("unknown style '%s'" % style) + + return {"version": rendered, "full-revisionid": pieces["long"], + "dirty": pieces["dirty"], "error": None, + "date": pieces.get("date")} + + +class VersioneerBadRootError(Exception): + """The project root directory is unknown or missing key files.""" + + +def get_versions(verbose: bool = False) -> Dict[str, Any]: + """Get the project version from whatever source is available. + + Returns dict with two keys: 'version' and 'full'. + """ + if "versioneer" in sys.modules: + # see the discussion in cmdclass.py:get_cmdclass() + del sys.modules["versioneer"] + + root = get_root() + cfg = get_config_from_root(root) + + assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg" + handlers = HANDLERS.get(cfg.VCS) + assert handlers, "unrecognized VCS '%s'" % cfg.VCS + verbose = verbose or bool(cfg.verbose) # `bool()` used to avoid `None` + assert cfg.versionfile_source is not None, \ + "please set versioneer.versionfile_source" + assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix" + + versionfile_abs = os.path.join(root, cfg.versionfile_source) + + # extract version from first of: _version.py, VCS command (e.g. 'git + # describe'), parentdir. This is meant to work for developers using a + # source checkout, for users of a tarball created by 'setup.py sdist', + # and for users of a tarball/zipball created by 'git archive' or github's + # download-from-tag feature or the equivalent in other VCSes. + + get_keywords_f = handlers.get("get_keywords") + from_keywords_f = handlers.get("keywords") + if get_keywords_f and from_keywords_f: + try: + keywords = get_keywords_f(versionfile_abs) + ver = from_keywords_f(keywords, cfg.tag_prefix, verbose) + if verbose: + print("got version from expanded keyword %s" % ver) + return ver + except NotThisMethod: + pass + + try: + ver = versions_from_file(versionfile_abs) + if verbose: + print("got version from file %s %s" % (versionfile_abs, ver)) + return ver + except NotThisMethod: + pass + + from_vcs_f = handlers.get("pieces_from_vcs") + if from_vcs_f: + try: + pieces = from_vcs_f(cfg.tag_prefix, root, verbose) + ver = render(pieces, cfg.style) + if verbose: + print("got version from VCS %s" % ver) + return ver + except NotThisMethod: + pass + + try: + if cfg.parentdir_prefix: + ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose) + if verbose: + print("got version from parentdir %s" % ver) + return ver + except NotThisMethod: + pass + + if verbose: + print("unable to compute version") + + return {"version": "0+unknown", "full-revisionid": None, + "dirty": None, "error": "unable to compute version", + "date": None} + + +def get_version() -> str: + """Get the short version string for this project.""" + return get_versions()["version"] + + +def get_cmdclass(cmdclass: Optional[Dict[str, Any]] = None): + """Get the custom setuptools subclasses used by Versioneer. + + If the package uses a different cmdclass (e.g. one from numpy), it + should be provide as an argument. + """ + if "versioneer" in sys.modules: + del sys.modules["versioneer"] + # this fixes the "python setup.py develop" case (also 'install' and + # 'easy_install .'), in which subdependencies of the main project are + # built (using setup.py bdist_egg) in the same python process. Assume + # a main project A and a dependency B, which use different versions + # of Versioneer. A's setup.py imports A's Versioneer, leaving it in + # sys.modules by the time B's setup.py is executed, causing B to run + # with the wrong versioneer. Setuptools wraps the sub-dep builds in a + # sandbox that restores sys.modules to it's pre-build state, so the + # parent is protected against the child's "import versioneer". By + # removing ourselves from sys.modules here, before the child build + # happens, we protect the child from the parent's versioneer too. + # Also see https://github.com/python-versioneer/python-versioneer/issues/52 + + cmds = {} if cmdclass is None else cmdclass.copy() + + # we add "version" to setuptools + from setuptools import Command + + class cmd_version(Command): + description = "report generated version string" + user_options: List[Tuple[str, str, str]] = [] + boolean_options: List[str] = [] + + def initialize_options(self) -> None: + pass + + def finalize_options(self) -> None: + pass + + def run(self) -> None: + vers = get_versions(verbose=True) + print("Version: %s" % vers["version"]) + print(" full-revisionid: %s" % vers.get("full-revisionid")) + print(" dirty: %s" % vers.get("dirty")) + print(" date: %s" % vers.get("date")) + if vers["error"]: + print(" error: %s" % vers["error"]) + cmds["version"] = cmd_version + + # we override "build_py" in setuptools + # + # most invocation pathways end up running build_py: + # distutils/build -> build_py + # distutils/install -> distutils/build ->.. + # setuptools/bdist_wheel -> distutils/install ->.. + # setuptools/bdist_egg -> distutils/install_lib -> build_py + # setuptools/install -> bdist_egg ->.. + # setuptools/develop -> ? + # pip install: + # copies source tree to a tempdir before running egg_info/etc + # if .git isn't copied too, 'git describe' will fail + # then does setup.py bdist_wheel, or sometimes setup.py install + # setup.py egg_info -> ? + + # pip install -e . and setuptool/editable_wheel will invoke build_py + # but the build_py command is not expected to copy any files. + + # we override different "build_py" commands for both environments + if 'build_py' in cmds: + _build_py: Any = cmds['build_py'] + else: + from setuptools.command.build_py import build_py as _build_py + + class cmd_build_py(_build_py): + def run(self) -> None: + root = get_root() + cfg = get_config_from_root(root) + versions = get_versions() + _build_py.run(self) + if getattr(self, "editable_mode", False): + # During editable installs `.py` and data files are + # not copied to build_lib + return + # now locate _version.py in the new build/ directory and replace + # it with an updated value + if cfg.versionfile_build: + target_versionfile = os.path.join(self.build_lib, + cfg.versionfile_build) + print("UPDATING %s" % target_versionfile) + write_to_version_file(target_versionfile, versions) + cmds["build_py"] = cmd_build_py + + if 'build_ext' in cmds: + _build_ext: Any = cmds['build_ext'] + else: + from setuptools.command.build_ext import build_ext as _build_ext + + class cmd_build_ext(_build_ext): + def run(self) -> None: + root = get_root() + cfg = get_config_from_root(root) + versions = get_versions() + _build_ext.run(self) + if self.inplace: + # build_ext --inplace will only build extensions in + # build/lib<..> dir with no _version.py to write to. + # As in place builds will already have a _version.py + # in the module dir, we do not need to write one. + return + # now locate _version.py in the new build/ directory and replace + # it with an updated value + if not cfg.versionfile_build: + return + target_versionfile = os.path.join(self.build_lib, + cfg.versionfile_build) + if not os.path.exists(target_versionfile): + print(f"Warning: {target_versionfile} does not exist, skipping " + "version update. This can happen if you are running build_ext " + "without first running build_py.") + return + print("UPDATING %s" % target_versionfile) + write_to_version_file(target_versionfile, versions) + cmds["build_ext"] = cmd_build_ext + + if "cx_Freeze" in sys.modules: # cx_freeze enabled? + from cx_Freeze.dist import build_exe as _build_exe # type: ignore + # nczeczulin reports that py2exe won't like the pep440-style string + # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g. + # setup(console=[{ + # "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION + # "product_version": versioneer.get_version(), + # ... + + class cmd_build_exe(_build_exe): + def run(self) -> None: + root = get_root() + cfg = get_config_from_root(root) + versions = get_versions() + target_versionfile = cfg.versionfile_source + print("UPDATING %s" % target_versionfile) + write_to_version_file(target_versionfile, versions) + + _build_exe.run(self) + os.unlink(target_versionfile) + with open(cfg.versionfile_source, "w") as f: + LONG = LONG_VERSION_PY[cfg.VCS] + f.write(LONG % + {"DOLLAR": "$", + "STYLE": cfg.style, + "TAG_PREFIX": cfg.tag_prefix, + "PARENTDIR_PREFIX": cfg.parentdir_prefix, + "VERSIONFILE_SOURCE": cfg.versionfile_source, + }) + cmds["build_exe"] = cmd_build_exe + del cmds["build_py"] + + if 'py2exe' in sys.modules: # py2exe enabled? + try: + from py2exe.setuptools_buildexe import py2exe as _py2exe # type: ignore + except ImportError: + from py2exe.distutils_buildexe import py2exe as _py2exe # type: ignore + + class cmd_py2exe(_py2exe): + def run(self) -> None: + root = get_root() + cfg = get_config_from_root(root) + versions = get_versions() + target_versionfile = cfg.versionfile_source + print("UPDATING %s" % target_versionfile) + write_to_version_file(target_versionfile, versions) + + _py2exe.run(self) + os.unlink(target_versionfile) + with open(cfg.versionfile_source, "w") as f: + LONG = LONG_VERSION_PY[cfg.VCS] + f.write(LONG % + {"DOLLAR": "$", + "STYLE": cfg.style, + "TAG_PREFIX": cfg.tag_prefix, + "PARENTDIR_PREFIX": cfg.parentdir_prefix, + "VERSIONFILE_SOURCE": cfg.versionfile_source, + }) + cmds["py2exe"] = cmd_py2exe + + # sdist farms its file list building out to egg_info + if 'egg_info' in cmds: + _egg_info: Any = cmds['egg_info'] + else: + from setuptools.command.egg_info import egg_info as _egg_info + + class cmd_egg_info(_egg_info): + def find_sources(self) -> None: + # egg_info.find_sources builds the manifest list and writes it + # in one shot + super().find_sources() + + # Modify the filelist and normalize it + root = get_root() + cfg = get_config_from_root(root) + self.filelist.append('versioneer.py') + if cfg.versionfile_source: + # There are rare cases where versionfile_source might not be + # included by default, so we must be explicit + self.filelist.append(cfg.versionfile_source) + self.filelist.sort() + self.filelist.remove_duplicates() + + # The write method is hidden in the manifest_maker instance that + # generated the filelist and was thrown away + # We will instead replicate their final normalization (to unicode, + # and POSIX-style paths) + from setuptools import unicode_utils + normalized = [unicode_utils.filesys_decode(f).replace(os.sep, '/') + for f in self.filelist.files] + + manifest_filename = os.path.join(self.egg_info, 'SOURCES.txt') + with open(manifest_filename, 'w') as fobj: + fobj.write('\n'.join(normalized)) + + cmds['egg_info'] = cmd_egg_info + + # we override different "sdist" commands for both environments + if 'sdist' in cmds: + _sdist: Any = cmds['sdist'] + else: + from setuptools.command.sdist import sdist as _sdist + + class cmd_sdist(_sdist): + def run(self) -> None: + versions = get_versions() + self._versioneer_generated_versions = versions + # unless we update this, the command will keep using the old + # version + self.distribution.metadata.version = versions["version"] + return _sdist.run(self) + + def make_release_tree(self, base_dir: str, files: List[str]) -> None: + root = get_root() + cfg = get_config_from_root(root) + _sdist.make_release_tree(self, base_dir, files) + # now locate _version.py in the new base_dir directory + # (remembering that it may be a hardlink) and replace it with an + # updated value + target_versionfile = os.path.join(base_dir, cfg.versionfile_source) + print("UPDATING %s" % target_versionfile) + write_to_version_file(target_versionfile, + self._versioneer_generated_versions) + cmds["sdist"] = cmd_sdist + + return cmds + + +CONFIG_ERROR = """ +setup.cfg is missing the necessary Versioneer configuration. You need +a section like: + + [versioneer] + VCS = git + style = pep440 + versionfile_source = src/myproject/_version.py + versionfile_build = myproject/_version.py + tag_prefix = + parentdir_prefix = myproject- + +You will also need to edit your setup.py to use the results: + + import versioneer + setup(version=versioneer.get_version(), + cmdclass=versioneer.get_cmdclass(), ...) + +Please read the docstring in ./versioneer.py for configuration instructions, +edit setup.cfg, and re-run the installer or 'python versioneer.py setup'. +""" + +SAMPLE_CONFIG = """ +# See the docstring in versioneer.py for instructions. Note that you must +# re-run 'versioneer.py setup' after changing this section, and commit the +# resulting files. + +[versioneer] +#VCS = git +#style = pep440 +#versionfile_source = +#versionfile_build = +#tag_prefix = +#parentdir_prefix = + +""" + +OLD_SNIPPET = """ +from ._version import get_versions +__version__ = get_versions()['version'] +del get_versions +""" + +INIT_PY_SNIPPET = """ +from . import {0} +__version__ = {0}.get_versions()['version'] +""" + + +def do_setup() -> int: + """Do main VCS-independent setup function for installing Versioneer.""" + root = get_root() + try: + cfg = get_config_from_root(root) + except (OSError, configparser.NoSectionError, + configparser.NoOptionError) as e: + if isinstance(e, (OSError, configparser.NoSectionError)): + print("Adding sample versioneer config to setup.cfg", + file=sys.stderr) + with open(os.path.join(root, "setup.cfg"), "a") as f: + f.write(SAMPLE_CONFIG) + print(CONFIG_ERROR, file=sys.stderr) + return 1 + + print(" creating %s" % cfg.versionfile_source) + with open(cfg.versionfile_source, "w") as f: + LONG = LONG_VERSION_PY[cfg.VCS] + f.write(LONG % {"DOLLAR": "$", + "STYLE": cfg.style, + "TAG_PREFIX": cfg.tag_prefix, + "PARENTDIR_PREFIX": cfg.parentdir_prefix, + "VERSIONFILE_SOURCE": cfg.versionfile_source, + }) + + ipy = os.path.join(os.path.dirname(cfg.versionfile_source), + "__init__.py") + maybe_ipy: Optional[str] = ipy + if os.path.exists(ipy): + try: + with open(ipy, "r") as f: + old = f.read() + except OSError: + old = "" + module = os.path.splitext(os.path.basename(cfg.versionfile_source))[0] + snippet = INIT_PY_SNIPPET.format(module) + if OLD_SNIPPET in old: + print(" replacing boilerplate in %s" % ipy) + with open(ipy, "w") as f: + f.write(old.replace(OLD_SNIPPET, snippet)) + elif snippet not in old: + print(" appending to %s" % ipy) + with open(ipy, "a") as f: + f.write(snippet) + else: + print(" %s unmodified" % ipy) + else: + print(" %s doesn't exist, ok" % ipy) + maybe_ipy = None + + # Make VCS-specific changes. For git, this means creating/changing + # .gitattributes to mark _version.py for export-subst keyword + # substitution. + do_vcs_install(cfg.versionfile_source, maybe_ipy) + return 0 + + +def scan_setup_py() -> int: + """Validate the contents of setup.py against Versioneer's expectations.""" + found = set() + setters = False + errors = 0 + with open("setup.py", "r") as f: + for line in f.readlines(): + if "import versioneer" in line: + found.add("import") + if "versioneer.get_cmdclass()" in line: + found.add("cmdclass") + if "versioneer.get_version()" in line: + found.add("get_version") + if "versioneer.VCS" in line: + setters = True + if "versioneer.versionfile_source" in line: + setters = True + if len(found) != 3: + print("") + print("Your setup.py appears to be missing some important items") + print("(but I might be wrong). Please make sure it has something") + print("roughly like the following:") + print("") + print(" import versioneer") + print(" setup( version=versioneer.get_version(),") + print(" cmdclass=versioneer.get_cmdclass(), ...)") + print("") + errors += 1 + if setters: + print("You should remove lines like 'versioneer.VCS = ' and") + print("'versioneer.versionfile_source = ' . This configuration") + print("now lives in setup.cfg, and should be removed from setup.py") + print("") + errors += 1 + return errors + + +def setup_command() -> NoReturn: + """Set up Versioneer and exit with appropriate error code.""" + errors = do_setup() + errors += scan_setup_py() + sys.exit(1 if errors else 0) + + +if __name__ == "__main__": + cmd = sys.argv[1] + if cmd == "setup": + setup_command() From 2141b60b3cd26e39f357bfed45f800d4538cb6d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Jan=C3=9Fen?= Date: Mon, 27 Nov 2023 12:49:12 +0100 Subject: [PATCH 110/134] Install versioneer via pip --- .github/workflows/pypicheck.yml | 1 + .github/workflows/unittest-flux.yml | 1 + .github/workflows/unittest-mpich.yml | 1 + .github/workflows/unittest-openmpi.yml | 1 + .github/workflows/unittest-win.yml | 1 + .github/workflows/unittests-old.yml | 1 + versioneer.py | 2277 ------------------------ 7 files changed, 6 insertions(+), 2277 deletions(-) delete mode 100644 versioneer.py diff --git a/.github/workflows/pypicheck.yml b/.github/workflows/pypicheck.yml index 202829a7..32230668 100644 --- a/.github/workflows/pypicheck.yml +++ b/.github/workflows/pypicheck.yml @@ -23,5 +23,6 @@ jobs: - name: Setup shell: bash -l {0} run: | + pip install versioneer[toml]==0.29 pip install --no-deps . pip check diff --git a/.github/workflows/unittest-flux.yml b/.github/workflows/unittest-flux.yml index b4fc60ee..62b8f009 100644 --- a/.github/workflows/unittest-flux.yml +++ b/.github/workflows/unittest-flux.yml @@ -43,6 +43,7 @@ jobs: timeout-minutes: 5 run: | mamba install -y flux-core coverage + pip install versioneer[toml]==0.29 pip install . -vv --no-deps --no-build-isolation cd tests coverage run --omit pympipool/_version.py -m unittest discover . diff --git a/.github/workflows/unittest-mpich.yml b/.github/workflows/unittest-mpich.yml index bc90737f..0b560735 100644 --- a/.github/workflows/unittest-mpich.yml +++ b/.github/workflows/unittest-mpich.yml @@ -55,6 +55,7 @@ jobs: shell: bash -l {0} timeout-minutes: 5 run: | + pip install versioneer[toml]==0.29 pip install . -vv --no-deps --no-build-isolation cd tests python -m unittest discover . diff --git a/.github/workflows/unittest-openmpi.yml b/.github/workflows/unittest-openmpi.yml index fa4a5764..d1d5e5bb 100644 --- a/.github/workflows/unittest-openmpi.yml +++ b/.github/workflows/unittest-openmpi.yml @@ -55,6 +55,7 @@ jobs: shell: bash -l {0} timeout-minutes: 5 run: | + pip install versioneer[toml]==0.29 pip install . -vv --no-deps --no-build-isolation cd tests python -m unittest discover . diff --git a/.github/workflows/unittest-win.yml b/.github/workflows/unittest-win.yml index a26a0d84..9a8fbb1f 100644 --- a/.github/workflows/unittest-win.yml +++ b/.github/workflows/unittest-win.yml @@ -32,6 +32,7 @@ jobs: shell: bash -l {0} timeout-minutes: 5 run: | + pip install versioneer[toml]==0.29 pip install . -vv --no-deps --no-build-isolation cd tests python -m unittest discover . diff --git a/.github/workflows/unittests-old.yml b/.github/workflows/unittests-old.yml index 5c2eb8da..b8c486e8 100644 --- a/.github/workflows/unittests-old.yml +++ b/.github/workflows/unittests-old.yml @@ -27,6 +27,7 @@ jobs: shell: bash -l {0} timeout-minutes: 5 run: | + pip install versioneer[toml]==0.29 pip install . -vv --no-deps --no-build-isolation cd tests python -m unittest discover . diff --git a/versioneer.py b/versioneer.py deleted file mode 100644 index 1e3753e6..00000000 --- a/versioneer.py +++ /dev/null @@ -1,2277 +0,0 @@ - -# Version: 0.29 - -"""The Versioneer - like a rocketeer, but for versions. - -The Versioneer -============== - -* like a rocketeer, but for versions! -* https://github.com/python-versioneer/python-versioneer -* Brian Warner -* License: Public Domain (Unlicense) -* Compatible with: Python 3.7, 3.8, 3.9, 3.10, 3.11 and pypy3 -* [![Latest Version][pypi-image]][pypi-url] -* [![Build Status][travis-image]][travis-url] - -This is a tool for managing a recorded version number in setuptools-based -python projects. The goal is to remove the tedious and error-prone "update -the embedded version string" step from your release process. Making a new -release should be as easy as recording a new tag in your version-control -system, and maybe making new tarballs. - - -## Quick Install - -Versioneer provides two installation modes. The "classic" vendored mode installs -a copy of versioneer into your repository. The experimental build-time dependency mode -is intended to allow you to skip this step and simplify the process of upgrading. - -### Vendored mode - -* `pip install versioneer` to somewhere in your $PATH - * A [conda-forge recipe](https://github.com/conda-forge/versioneer-feedstock) is - available, so you can also use `conda install -c conda-forge versioneer` -* add a `[tool.versioneer]` section to your `pyproject.toml` or a - `[versioneer]` section to your `setup.cfg` (see [Install](INSTALL.md)) - * Note that you will need to add `tomli; python_version < "3.11"` to your - build-time dependencies if you use `pyproject.toml` -* run `versioneer install --vendor` in your source tree, commit the results -* verify version information with `python setup.py version` - -### Build-time dependency mode - -* `pip install versioneer` to somewhere in your $PATH - * A [conda-forge recipe](https://github.com/conda-forge/versioneer-feedstock) is - available, so you can also use `conda install -c conda-forge versioneer` -* add a `[tool.versioneer]` section to your `pyproject.toml` or a - `[versioneer]` section to your `setup.cfg` (see [Install](INSTALL.md)) -* add `versioneer` (with `[toml]` extra, if configuring in `pyproject.toml`) - to the `requires` key of the `build-system` table in `pyproject.toml`: - ```toml - [build-system] - requires = ["setuptools", "versioneer[toml]"] - build-backend = "setuptools.build_meta" - ``` -* run `versioneer install --no-vendor` in your source tree, commit the results -* verify version information with `python setup.py version` - -## Version Identifiers - -Source trees come from a variety of places: - -* a version-control system checkout (mostly used by developers) -* a nightly tarball, produced by build automation -* a snapshot tarball, produced by a web-based VCS browser, like github's - "tarball from tag" feature -* a release tarball, produced by "setup.py sdist", distributed through PyPI - -Within each source tree, the version identifier (either a string or a number, -this tool is format-agnostic) can come from a variety of places: - -* ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows - about recent "tags" and an absolute revision-id -* the name of the directory into which the tarball was unpacked -* an expanded VCS keyword ($Id$, etc) -* a `_version.py` created by some earlier build step - -For released software, the version identifier is closely related to a VCS -tag. Some projects use tag names that include more than just the version -string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool -needs to strip the tag prefix to extract the version identifier. For -unreleased software (between tags), the version identifier should provide -enough information to help developers recreate the same tree, while also -giving them an idea of roughly how old the tree is (after version 1.2, before -version 1.3). Many VCS systems can report a description that captures this, -for example `git describe --tags --dirty --always` reports things like -"0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the -0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has -uncommitted changes). - -The version identifier is used for multiple purposes: - -* to allow the module to self-identify its version: `myproject.__version__` -* to choose a name and prefix for a 'setup.py sdist' tarball - -## Theory of Operation - -Versioneer works by adding a special `_version.py` file into your source -tree, where your `__init__.py` can import it. This `_version.py` knows how to -dynamically ask the VCS tool for version information at import time. - -`_version.py` also contains `$Revision$` markers, and the installation -process marks `_version.py` to have this marker rewritten with a tag name -during the `git archive` command. As a result, generated tarballs will -contain enough information to get the proper version. - -To allow `setup.py` to compute a version too, a `versioneer.py` is added to -the top level of your source tree, next to `setup.py` and the `setup.cfg` -that configures it. This overrides several distutils/setuptools commands to -compute the version when invoked, and changes `setup.py build` and `setup.py -sdist` to replace `_version.py` with a small static file that contains just -the generated version data. - -## Installation - -See [INSTALL.md](./INSTALL.md) for detailed installation instructions. - -## Version-String Flavors - -Code which uses Versioneer can learn about its version string at runtime by -importing `_version` from your main `__init__.py` file and running the -`get_versions()` function. From the "outside" (e.g. in `setup.py`), you can -import the top-level `versioneer.py` and run `get_versions()`. - -Both functions return a dictionary with different flavors of version -information: - -* `['version']`: A condensed version string, rendered using the selected - style. This is the most commonly used value for the project's version - string. The default "pep440" style yields strings like `0.11`, - `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section - below for alternative styles. - -* `['full-revisionid']`: detailed revision identifier. For Git, this is the - full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac". - -* `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the - commit date in ISO 8601 format. This will be None if the date is not - available. - -* `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that - this is only accurate if run in a VCS checkout, otherwise it is likely to - be False or None - -* `['error']`: if the version string could not be computed, this will be set - to a string describing the problem, otherwise it will be None. It may be - useful to throw an exception in setup.py if this is set, to avoid e.g. - creating tarballs with a version string of "unknown". - -Some variants are more useful than others. Including `full-revisionid` in a -bug report should allow developers to reconstruct the exact code being tested -(or indicate the presence of local changes that should be shared with the -developers). `version` is suitable for display in an "about" box or a CLI -`--version` output: it can be easily compared against release notes and lists -of bugs fixed in various releases. - -The installer adds the following text to your `__init__.py` to place a basic -version in `YOURPROJECT.__version__`: - - from ._version import get_versions - __version__ = get_versions()['version'] - del get_versions - -## Styles - -The setup.cfg `style=` configuration controls how the VCS information is -rendered into a version string. - -The default style, "pep440", produces a PEP440-compliant string, equal to the -un-prefixed tag name for actual releases, and containing an additional "local -version" section with more detail for in-between builds. For Git, this is -TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags ---dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the -tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and -that this commit is two revisions ("+2") beyond the "0.11" tag. For released -software (exactly equal to a known tag), the identifier will only contain the -stripped tag, e.g. "0.11". - -Other styles are available. See [details.md](details.md) in the Versioneer -source tree for descriptions. - -## Debugging - -Versioneer tries to avoid fatal errors: if something goes wrong, it will tend -to return a version of "0+unknown". To investigate the problem, run `setup.py -version`, which will run the version-lookup code in a verbose mode, and will -display the full contents of `get_versions()` (including the `error` string, -which may help identify what went wrong). - -## Known Limitations - -Some situations are known to cause problems for Versioneer. This details the -most significant ones. More can be found on Github -[issues page](https://github.com/python-versioneer/python-versioneer/issues). - -### Subprojects - -Versioneer has limited support for source trees in which `setup.py` is not in -the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are -two common reasons why `setup.py` might not be in the root: - -* Source trees which contain multiple subprojects, such as - [Buildbot](https://github.com/buildbot/buildbot), which contains both - "master" and "slave" subprojects, each with their own `setup.py`, - `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI - distributions (and upload multiple independently-installable tarballs). -* Source trees whose main purpose is to contain a C library, but which also - provide bindings to Python (and perhaps other languages) in subdirectories. - -Versioneer will look for `.git` in parent directories, and most operations -should get the right version string. However `pip` and `setuptools` have bugs -and implementation details which frequently cause `pip install .` from a -subproject directory to fail to find a correct version string (so it usually -defaults to `0+unknown`). - -`pip install --editable .` should work correctly. `setup.py install` might -work too. - -Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in -some later version. - -[Bug #38](https://github.com/python-versioneer/python-versioneer/issues/38) is tracking -this issue. The discussion in -[PR #61](https://github.com/python-versioneer/python-versioneer/pull/61) describes the -issue from the Versioneer side in more detail. -[pip PR#3176](https://github.com/pypa/pip/pull/3176) and -[pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve -pip to let Versioneer work correctly. - -Versioneer-0.16 and earlier only looked for a `.git` directory next to the -`setup.cfg`, so subprojects were completely unsupported with those releases. - -### Editable installs with setuptools <= 18.5 - -`setup.py develop` and `pip install --editable .` allow you to install a -project into a virtualenv once, then continue editing the source code (and -test) without re-installing after every change. - -"Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a -convenient way to specify executable scripts that should be installed along -with the python package. - -These both work as expected when using modern setuptools. When using -setuptools-18.5 or earlier, however, certain operations will cause -`pkg_resources.DistributionNotFound` errors when running the entrypoint -script, which must be resolved by re-installing the package. This happens -when the install happens with one version, then the egg_info data is -regenerated while a different version is checked out. Many setup.py commands -cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into -a different virtualenv), so this can be surprising. - -[Bug #83](https://github.com/python-versioneer/python-versioneer/issues/83) describes -this one, but upgrading to a newer version of setuptools should probably -resolve it. - - -## Updating Versioneer - -To upgrade your project to a new release of Versioneer, do the following: - -* install the new Versioneer (`pip install -U versioneer` or equivalent) -* edit `setup.cfg` and `pyproject.toml`, if necessary, - to include any new configuration settings indicated by the release notes. - See [UPGRADING](./UPGRADING.md) for details. -* re-run `versioneer install --[no-]vendor` in your source tree, to replace - `SRC/_version.py` -* commit any changed files - -## Future Directions - -This tool is designed to make it easily extended to other version-control -systems: all VCS-specific components are in separate directories like -src/git/ . The top-level `versioneer.py` script is assembled from these -components by running make-versioneer.py . In the future, make-versioneer.py -will take a VCS name as an argument, and will construct a version of -`versioneer.py` that is specific to the given VCS. It might also take the -configuration arguments that are currently provided manually during -installation by editing setup.py . Alternatively, it might go the other -direction and include code from all supported VCS systems, reducing the -number of intermediate scripts. - -## Similar projects - -* [setuptools_scm](https://github.com/pypa/setuptools_scm/) - a non-vendored build-time - dependency -* [minver](https://github.com/jbweston/miniver) - a lightweight reimplementation of - versioneer -* [versioningit](https://github.com/jwodder/versioningit) - a PEP 518-based setuptools - plugin - -## License - -To make Versioneer easier to embed, all its code is dedicated to the public -domain. The `_version.py` that it creates is also in the public domain. -Specifically, both are released under the "Unlicense", as described in -https://unlicense.org/. - -[pypi-image]: https://img.shields.io/pypi/v/versioneer.svg -[pypi-url]: https://pypi.python.org/pypi/versioneer/ -[travis-image]: -https://img.shields.io/travis/com/python-versioneer/python-versioneer.svg -[travis-url]: https://travis-ci.com/github/python-versioneer/python-versioneer - -""" -# pylint:disable=invalid-name,import-outside-toplevel,missing-function-docstring -# pylint:disable=missing-class-docstring,too-many-branches,too-many-statements -# pylint:disable=raise-missing-from,too-many-lines,too-many-locals,import-error -# pylint:disable=too-few-public-methods,redefined-outer-name,consider-using-with -# pylint:disable=attribute-defined-outside-init,too-many-arguments - -import configparser -import errno -import json -import os -import re -import subprocess -import sys -from pathlib import Path -from typing import Any, Callable, cast, Dict, List, Optional, Tuple, Union -from typing import NoReturn -import functools - -have_tomllib = True -if sys.version_info >= (3, 11): - import tomllib -else: - try: - import tomli as tomllib - except ImportError: - have_tomllib = False - - -class VersioneerConfig: - """Container for Versioneer configuration parameters.""" - - VCS: str - style: str - tag_prefix: str - versionfile_source: str - versionfile_build: Optional[str] - parentdir_prefix: Optional[str] - verbose: Optional[bool] - - -def get_root() -> str: - """Get the project root directory. - - We require that all commands are run from the project root, i.e. the - directory that contains setup.py, setup.cfg, and versioneer.py . - """ - root = os.path.realpath(os.path.abspath(os.getcwd())) - setup_py = os.path.join(root, "setup.py") - pyproject_toml = os.path.join(root, "pyproject.toml") - versioneer_py = os.path.join(root, "versioneer.py") - if not ( - os.path.exists(setup_py) - or os.path.exists(pyproject_toml) - or os.path.exists(versioneer_py) - ): - # allow 'python path/to/setup.py COMMAND' - root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0]))) - setup_py = os.path.join(root, "setup.py") - pyproject_toml = os.path.join(root, "pyproject.toml") - versioneer_py = os.path.join(root, "versioneer.py") - if not ( - os.path.exists(setup_py) - or os.path.exists(pyproject_toml) - or os.path.exists(versioneer_py) - ): - err = ("Versioneer was unable to run the project root directory. " - "Versioneer requires setup.py to be executed from " - "its immediate directory (like 'python setup.py COMMAND'), " - "or in a way that lets it use sys.argv[0] to find the root " - "(like 'python path/to/setup.py COMMAND').") - raise VersioneerBadRootError(err) - try: - # Certain runtime workflows (setup.py install/develop in a setuptools - # tree) execute all dependencies in a single python process, so - # "versioneer" may be imported multiple times, and python's shared - # module-import table will cache the first one. So we can't use - # os.path.dirname(__file__), as that will find whichever - # versioneer.py was first imported, even in later projects. - my_path = os.path.realpath(os.path.abspath(__file__)) - me_dir = os.path.normcase(os.path.splitext(my_path)[0]) - vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0]) - if me_dir != vsr_dir and "VERSIONEER_PEP518" not in globals(): - print("Warning: build in %s is using versioneer.py from %s" - % (os.path.dirname(my_path), versioneer_py)) - except NameError: - pass - return root - - -def get_config_from_root(root: str) -> VersioneerConfig: - """Read the project setup.cfg file to determine Versioneer config.""" - # This might raise OSError (if setup.cfg is missing), or - # configparser.NoSectionError (if it lacks a [versioneer] section), or - # configparser.NoOptionError (if it lacks "VCS="). See the docstring at - # the top of versioneer.py for instructions on writing your setup.cfg . - root_pth = Path(root) - pyproject_toml = root_pth / "pyproject.toml" - setup_cfg = root_pth / "setup.cfg" - section: Union[Dict[str, Any], configparser.SectionProxy, None] = None - if pyproject_toml.exists() and have_tomllib: - try: - with open(pyproject_toml, 'rb') as fobj: - pp = tomllib.load(fobj) - section = pp['tool']['versioneer'] - except (tomllib.TOMLDecodeError, KeyError) as e: - print(f"Failed to load config from {pyproject_toml}: {e}") - print("Try to load it from setup.cfg") - if not section: - parser = configparser.ConfigParser() - with open(setup_cfg) as cfg_file: - parser.read_file(cfg_file) - parser.get("versioneer", "VCS") # raise error if missing - - section = parser["versioneer"] - - # `cast`` really shouldn't be used, but its simplest for the - # common VersioneerConfig users at the moment. We verify against - # `None` values elsewhere where it matters - - cfg = VersioneerConfig() - cfg.VCS = section['VCS'] - cfg.style = section.get("style", "") - cfg.versionfile_source = cast(str, section.get("versionfile_source")) - cfg.versionfile_build = section.get("versionfile_build") - cfg.tag_prefix = cast(str, section.get("tag_prefix")) - if cfg.tag_prefix in ("''", '""', None): - cfg.tag_prefix = "" - cfg.parentdir_prefix = section.get("parentdir_prefix") - if isinstance(section, configparser.SectionProxy): - # Make sure configparser translates to bool - cfg.verbose = section.getboolean("verbose") - else: - cfg.verbose = section.get("verbose") - - return cfg - - -class NotThisMethod(Exception): - """Exception raised if a method is not valid for the current scenario.""" - - -# these dictionaries contain VCS-specific tools -LONG_VERSION_PY: Dict[str, str] = {} -HANDLERS: Dict[str, Dict[str, Callable]] = {} - - -def register_vcs_handler(vcs: str, method: str) -> Callable: # decorator - """Create decorator to mark a method as the handler of a VCS.""" - def decorate(f: Callable) -> Callable: - """Store f in HANDLERS[vcs][method].""" - HANDLERS.setdefault(vcs, {})[method] = f - return f - return decorate - - -def run_command( - commands: List[str], - args: List[str], - cwd: Optional[str] = None, - verbose: bool = False, - hide_stderr: bool = False, - env: Optional[Dict[str, str]] = None, -) -> Tuple[Optional[str], Optional[int]]: - """Call the given command(s).""" - assert isinstance(commands, list) - process = None - - popen_kwargs: Dict[str, Any] = {} - if sys.platform == "win32": - # This hides the console window if pythonw.exe is used - startupinfo = subprocess.STARTUPINFO() - startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW - popen_kwargs["startupinfo"] = startupinfo - - for command in commands: - try: - dispcmd = str([command] + args) - # remember shell=False, so use git.cmd on windows, not just git - process = subprocess.Popen([command] + args, cwd=cwd, env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None), **popen_kwargs) - break - except OSError as e: - if e.errno == errno.ENOENT: - continue - if verbose: - print("unable to run %s" % dispcmd) - print(e) - return None, None - else: - if verbose: - print("unable to find command, tried %s" % (commands,)) - return None, None - stdout = process.communicate()[0].strip().decode() - if process.returncode != 0: - if verbose: - print("unable to run %s (error)" % dispcmd) - print("stdout was %s" % stdout) - return None, process.returncode - return stdout, process.returncode - - -LONG_VERSION_PY['git'] = r''' -# This file helps to compute a version number in source trees obtained from -# git-archive tarball (such as those provided by githubs download-from-tag -# feature). Distribution tarballs (built by setup.py sdist) and build -# directories (produced by setup.py build) will contain a much shorter file -# that just contains the computed version number. - -# This file is released into the public domain. -# Generated by versioneer-0.29 -# https://github.com/python-versioneer/python-versioneer - -"""Git implementation of _version.py.""" - -import errno -import os -import re -import subprocess -import sys -from typing import Any, Callable, Dict, List, Optional, Tuple -import functools - - -def get_keywords() -> Dict[str, str]: - """Get the keywords needed to look up the version information.""" - # these strings will be replaced by git during git-archive. - # setup.py/versioneer.py will grep for the variable names, so they must - # each be defined on a line of their own. _version.py will just call - # get_keywords(). - git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s" - git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s" - git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s" - keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} - return keywords - - -class VersioneerConfig: - """Container for Versioneer configuration parameters.""" - - VCS: str - style: str - tag_prefix: str - parentdir_prefix: str - versionfile_source: str - verbose: bool - - -def get_config() -> VersioneerConfig: - """Create, populate and return the VersioneerConfig() object.""" - # these strings are filled in when 'setup.py versioneer' creates - # _version.py - cfg = VersioneerConfig() - cfg.VCS = "git" - cfg.style = "%(STYLE)s" - cfg.tag_prefix = "%(TAG_PREFIX)s" - cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s" - cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s" - cfg.verbose = False - return cfg - - -class NotThisMethod(Exception): - """Exception raised if a method is not valid for the current scenario.""" - - -LONG_VERSION_PY: Dict[str, str] = {} -HANDLERS: Dict[str, Dict[str, Callable]] = {} - - -def register_vcs_handler(vcs: str, method: str) -> Callable: # decorator - """Create decorator to mark a method as the handler of a VCS.""" - def decorate(f: Callable) -> Callable: - """Store f in HANDLERS[vcs][method].""" - if vcs not in HANDLERS: - HANDLERS[vcs] = {} - HANDLERS[vcs][method] = f - return f - return decorate - - -def run_command( - commands: List[str], - args: List[str], - cwd: Optional[str] = None, - verbose: bool = False, - hide_stderr: bool = False, - env: Optional[Dict[str, str]] = None, -) -> Tuple[Optional[str], Optional[int]]: - """Call the given command(s).""" - assert isinstance(commands, list) - process = None - - popen_kwargs: Dict[str, Any] = {} - if sys.platform == "win32": - # This hides the console window if pythonw.exe is used - startupinfo = subprocess.STARTUPINFO() - startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW - popen_kwargs["startupinfo"] = startupinfo - - for command in commands: - try: - dispcmd = str([command] + args) - # remember shell=False, so use git.cmd on windows, not just git - process = subprocess.Popen([command] + args, cwd=cwd, env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None), **popen_kwargs) - break - except OSError as e: - if e.errno == errno.ENOENT: - continue - if verbose: - print("unable to run %%s" %% dispcmd) - print(e) - return None, None - else: - if verbose: - print("unable to find command, tried %%s" %% (commands,)) - return None, None - stdout = process.communicate()[0].strip().decode() - if process.returncode != 0: - if verbose: - print("unable to run %%s (error)" %% dispcmd) - print("stdout was %%s" %% stdout) - return None, process.returncode - return stdout, process.returncode - - -def versions_from_parentdir( - parentdir_prefix: str, - root: str, - verbose: bool, -) -> Dict[str, Any]: - """Try to determine the version from the parent directory name. - - Source tarballs conventionally unpack into a directory that includes both - the project name and a version string. We will also support searching up - two directory levels for an appropriately named parent directory - """ - rootdirs = [] - - for _ in range(3): - dirname = os.path.basename(root) - if dirname.startswith(parentdir_prefix): - return {"version": dirname[len(parentdir_prefix):], - "full-revisionid": None, - "dirty": False, "error": None, "date": None} - rootdirs.append(root) - root = os.path.dirname(root) # up a level - - if verbose: - print("Tried directories %%s but none started with prefix %%s" %% - (str(rootdirs), parentdir_prefix)) - raise NotThisMethod("rootdir doesn't start with parentdir_prefix") - - -@register_vcs_handler("git", "get_keywords") -def git_get_keywords(versionfile_abs: str) -> Dict[str, str]: - """Extract version information from the given file.""" - # the code embedded in _version.py can just fetch the value of these - # keywords. When used from setup.py, we don't want to import _version.py, - # so we do it with a regexp instead. This function is not used from - # _version.py. - keywords: Dict[str, str] = {} - try: - with open(versionfile_abs, "r") as fobj: - for line in fobj: - if line.strip().startswith("git_refnames ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["refnames"] = mo.group(1) - if line.strip().startswith("git_full ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["full"] = mo.group(1) - if line.strip().startswith("git_date ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["date"] = mo.group(1) - except OSError: - pass - return keywords - - -@register_vcs_handler("git", "keywords") -def git_versions_from_keywords( - keywords: Dict[str, str], - tag_prefix: str, - verbose: bool, -) -> Dict[str, Any]: - """Get version information from git keywords.""" - if "refnames" not in keywords: - raise NotThisMethod("Short version file found") - date = keywords.get("date") - if date is not None: - # Use only the last line. Previous lines may contain GPG signature - # information. - date = date.splitlines()[-1] - - # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant - # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601 - # -like" string, which we must then edit to make compliant), because - # it's been around since git-1.5.3, and it's too difficult to - # discover which version we're using, or to work around using an - # older one. - date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - refnames = keywords["refnames"].strip() - if refnames.startswith("$Format"): - if verbose: - print("keywords are unexpanded, not using") - raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = {r.strip() for r in refnames.strip("()").split(",")} - # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of - # just "foo-1.0". If we see a "tag: " prefix, prefer those. - TAG = "tag: " - tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} - if not tags: - # Either we're using git < 1.8.3, or there really are no tags. We use - # a heuristic: assume all version tags have a digit. The old git %%d - # expansion behaves like git log --decorate=short and strips out the - # refs/heads/ and refs/tags/ prefixes that would let us distinguish - # between branches and tags. By ignoring refnames without digits, we - # filter out many common branch names like "release" and - # "stabilization", as well as "HEAD" and "master". - tags = {r for r in refs if re.search(r'\d', r)} - if verbose: - print("discarding '%%s', no digits" %% ",".join(refs - tags)) - if verbose: - print("likely tags: %%s" %% ",".join(sorted(tags))) - for ref in sorted(tags): - # sorting will prefer e.g. "2.0" over "2.0rc1" - if ref.startswith(tag_prefix): - r = ref[len(tag_prefix):] - # Filter out refs that exactly match prefix or that don't start - # with a number once the prefix is stripped (mostly a concern - # when prefix is '') - if not re.match(r'\d', r): - continue - if verbose: - print("picking %%s" %% r) - return {"version": r, - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": None, - "date": date} - # no suitable tags, so version is "0+unknown", but full hex is still there - if verbose: - print("no suitable tags, using unknown + full revision id") - return {"version": "0+unknown", - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": "no suitable tags", "date": None} - - -@register_vcs_handler("git", "pieces_from_vcs") -def git_pieces_from_vcs( - tag_prefix: str, - root: str, - verbose: bool, - runner: Callable = run_command -) -> Dict[str, Any]: - """Get version from 'git describe' in the root of the source tree. - - This only gets called if the git-archive 'subst' keywords were *not* - expanded, and _version.py hasn't already been rewritten with a short - version string, meaning we're inside a checked out source tree. - """ - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] - - # GIT_DIR can interfere with correct operation of Versioneer. - # It may be intended to be passed to the Versioneer-versioned project, - # but that should not change where we get our version from. - env = os.environ.copy() - env.pop("GIT_DIR", None) - runner = functools.partial(runner, env=env) - - _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, - hide_stderr=not verbose) - if rc != 0: - if verbose: - print("Directory %%s not under git control" %% root) - raise NotThisMethod("'git rev-parse --git-dir' returned error") - - # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] - # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = runner(GITS, [ - "describe", "--tags", "--dirty", "--always", "--long", - "--match", f"{tag_prefix}[[:digit:]]*" - ], cwd=root) - # --long was added in git-1.5.5 - if describe_out is None: - raise NotThisMethod("'git describe' failed") - describe_out = describe_out.strip() - full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) - if full_out is None: - raise NotThisMethod("'git rev-parse' failed") - full_out = full_out.strip() - - pieces: Dict[str, Any] = {} - pieces["long"] = full_out - pieces["short"] = full_out[:7] # maybe improved later - pieces["error"] = None - - branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], - cwd=root) - # --abbrev-ref was added in git-1.6.3 - if rc != 0 or branch_name is None: - raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") - branch_name = branch_name.strip() - - if branch_name == "HEAD": - # If we aren't exactly on a branch, pick a branch which represents - # the current commit. If all else fails, we are on a branchless - # commit. - branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) - # --contains was added in git-1.5.4 - if rc != 0 or branches is None: - raise NotThisMethod("'git branch --contains' returned error") - branches = branches.split("\n") - - # Remove the first line if we're running detached - if "(" in branches[0]: - branches.pop(0) - - # Strip off the leading "* " from the list of branches. - branches = [branch[2:] for branch in branches] - if "master" in branches: - branch_name = "master" - elif not branches: - branch_name = None - else: - # Pick the first branch that is returned. Good or bad. - branch_name = branches[0] - - pieces["branch"] = branch_name - - # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] - # TAG might have hyphens. - git_describe = describe_out - - # look for -dirty suffix - dirty = git_describe.endswith("-dirty") - pieces["dirty"] = dirty - if dirty: - git_describe = git_describe[:git_describe.rindex("-dirty")] - - # now we have TAG-NUM-gHEX or HEX - - if "-" in git_describe: - # TAG-NUM-gHEX - mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) - if not mo: - # unparsable. Maybe git-describe is misbehaving? - pieces["error"] = ("unable to parse git-describe output: '%%s'" - %% describe_out) - return pieces - - # tag - full_tag = mo.group(1) - if not full_tag.startswith(tag_prefix): - if verbose: - fmt = "tag '%%s' doesn't start with prefix '%%s'" - print(fmt %% (full_tag, tag_prefix)) - pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'" - %% (full_tag, tag_prefix)) - return pieces - pieces["closest-tag"] = full_tag[len(tag_prefix):] - - # distance: number of commits since tag - pieces["distance"] = int(mo.group(2)) - - # commit: short hex revision ID - pieces["short"] = mo.group(3) - - else: - # HEX: no tags - pieces["closest-tag"] = None - out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root) - pieces["distance"] = len(out.split()) # total number of commits - - # commit date: see ISO-8601 comment in git_versions_from_keywords() - date = runner(GITS, ["show", "-s", "--format=%%ci", "HEAD"], cwd=root)[0].strip() - # Use only the last line. Previous lines may contain GPG signature - # information. - date = date.splitlines()[-1] - pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - - return pieces - - -def plus_or_dot(pieces: Dict[str, Any]) -> str: - """Return a + if we don't already have one, else return a .""" - if "+" in pieces.get("closest-tag", ""): - return "." - return "+" - - -def render_pep440(pieces: Dict[str, Any]) -> str: - """Build up version string, with post-release "local version identifier". - - Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you - get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty - - Exceptions: - 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += plus_or_dot(pieces) - rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"], - pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_branch(pieces: Dict[str, Any]) -> str: - """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . - - The ".dev0" means not master branch. Note that .dev0 sorts backwards - (a feature branch will appear "older" than the master branch). - - Exceptions: - 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0" - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += "+untagged.%%d.g%%s" %% (pieces["distance"], - pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]: - """Split pep440 version string at the post-release segment. - - Returns the release segments before the post-release and the - post-release version number (or -1 if no post-release segment is present). - """ - vc = str.split(ver, ".post") - return vc[0], int(vc[1] or 0) if len(vc) == 2 else None - - -def render_pep440_pre(pieces: Dict[str, Any]) -> str: - """TAG[.postN.devDISTANCE] -- No -dirty. - - Exceptions: - 1: no tags. 0.post0.devDISTANCE - """ - if pieces["closest-tag"]: - if pieces["distance"]: - # update the post release segment - tag_version, post_version = pep440_split_post(pieces["closest-tag"]) - rendered = tag_version - if post_version is not None: - rendered += ".post%%d.dev%%d" %% (post_version + 1, pieces["distance"]) - else: - rendered += ".post0.dev%%d" %% (pieces["distance"]) - else: - # no commits, use the tag as the version - rendered = pieces["closest-tag"] - else: - # exception #1 - rendered = "0.post0.dev%%d" %% pieces["distance"] - return rendered - - -def render_pep440_post(pieces: Dict[str, Any]) -> str: - """TAG[.postDISTANCE[.dev0]+gHEX] . - - The ".dev0" means dirty. Note that .dev0 sorts backwards - (a dirty tree will appear "older" than the corresponding clean one), - but you shouldn't be releasing software with -dirty anyways. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%%d" %% pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%%s" %% pieces["short"] - else: - # exception #1 - rendered = "0.post%%d" %% pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += "+g%%s" %% pieces["short"] - return rendered - - -def render_pep440_post_branch(pieces: Dict[str, Any]) -> str: - """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . - - The ".dev0" means not master branch. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%%d" %% pieces["distance"] - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%%s" %% pieces["short"] - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0.post%%d" %% pieces["distance"] - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += "+g%%s" %% pieces["short"] - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_old(pieces: Dict[str, Any]) -> str: - """TAG[.postDISTANCE[.dev0]] . - - The ".dev0" means dirty. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%%d" %% pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - else: - # exception #1 - rendered = "0.post%%d" %% pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - return rendered - - -def render_git_describe(pieces: Dict[str, Any]) -> str: - """TAG[-DISTANCE-gHEX][-dirty]. - - Like 'git describe --tags --dirty --always'. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"]: - rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render_git_describe_long(pieces: Dict[str, Any]) -> str: - """TAG-DISTANCE-gHEX[-dirty]. - - Like 'git describe --tags --dirty --always -long'. - The distance/hash is unconditional. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]: - """Render the given version pieces into the requested style.""" - if pieces["error"]: - return {"version": "unknown", - "full-revisionid": pieces.get("long"), - "dirty": None, - "error": pieces["error"], - "date": None} - - if not style or style == "default": - style = "pep440" # the default - - if style == "pep440": - rendered = render_pep440(pieces) - elif style == "pep440-branch": - rendered = render_pep440_branch(pieces) - elif style == "pep440-pre": - rendered = render_pep440_pre(pieces) - elif style == "pep440-post": - rendered = render_pep440_post(pieces) - elif style == "pep440-post-branch": - rendered = render_pep440_post_branch(pieces) - elif style == "pep440-old": - rendered = render_pep440_old(pieces) - elif style == "git-describe": - rendered = render_git_describe(pieces) - elif style == "git-describe-long": - rendered = render_git_describe_long(pieces) - else: - raise ValueError("unknown style '%%s'" %% style) - - return {"version": rendered, "full-revisionid": pieces["long"], - "dirty": pieces["dirty"], "error": None, - "date": pieces.get("date")} - - -def get_versions() -> Dict[str, Any]: - """Get version information or return default if unable to do so.""" - # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have - # __file__, we can work backwards from there to the root. Some - # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which - # case we can only use expanded keywords. - - cfg = get_config() - verbose = cfg.verbose - - try: - return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, - verbose) - except NotThisMethod: - pass - - try: - root = os.path.realpath(__file__) - # versionfile_source is the relative path from the top of the source - # tree (where the .git directory might live) to this file. Invert - # this to find the root from __file__. - for _ in cfg.versionfile_source.split('/'): - root = os.path.dirname(root) - except NameError: - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to find root of source tree", - "date": None} - - try: - pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) - return render(pieces, cfg.style) - except NotThisMethod: - pass - - try: - if cfg.parentdir_prefix: - return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) - except NotThisMethod: - pass - - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to compute version", "date": None} -''' - - -@register_vcs_handler("git", "get_keywords") -def git_get_keywords(versionfile_abs: str) -> Dict[str, str]: - """Extract version information from the given file.""" - # the code embedded in _version.py can just fetch the value of these - # keywords. When used from setup.py, we don't want to import _version.py, - # so we do it with a regexp instead. This function is not used from - # _version.py. - keywords: Dict[str, str] = {} - try: - with open(versionfile_abs, "r") as fobj: - for line in fobj: - if line.strip().startswith("git_refnames ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["refnames"] = mo.group(1) - if line.strip().startswith("git_full ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["full"] = mo.group(1) - if line.strip().startswith("git_date ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["date"] = mo.group(1) - except OSError: - pass - return keywords - - -@register_vcs_handler("git", "keywords") -def git_versions_from_keywords( - keywords: Dict[str, str], - tag_prefix: str, - verbose: bool, -) -> Dict[str, Any]: - """Get version information from git keywords.""" - if "refnames" not in keywords: - raise NotThisMethod("Short version file found") - date = keywords.get("date") - if date is not None: - # Use only the last line. Previous lines may contain GPG signature - # information. - date = date.splitlines()[-1] - - # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant - # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 - # -like" string, which we must then edit to make compliant), because - # it's been around since git-1.5.3, and it's too difficult to - # discover which version we're using, or to work around using an - # older one. - date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - refnames = keywords["refnames"].strip() - if refnames.startswith("$Format"): - if verbose: - print("keywords are unexpanded, not using") - raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = {r.strip() for r in refnames.strip("()").split(",")} - # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of - # just "foo-1.0". If we see a "tag: " prefix, prefer those. - TAG = "tag: " - tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} - if not tags: - # Either we're using git < 1.8.3, or there really are no tags. We use - # a heuristic: assume all version tags have a digit. The old git %d - # expansion behaves like git log --decorate=short and strips out the - # refs/heads/ and refs/tags/ prefixes that would let us distinguish - # between branches and tags. By ignoring refnames without digits, we - # filter out many common branch names like "release" and - # "stabilization", as well as "HEAD" and "master". - tags = {r for r in refs if re.search(r'\d', r)} - if verbose: - print("discarding '%s', no digits" % ",".join(refs - tags)) - if verbose: - print("likely tags: %s" % ",".join(sorted(tags))) - for ref in sorted(tags): - # sorting will prefer e.g. "2.0" over "2.0rc1" - if ref.startswith(tag_prefix): - r = ref[len(tag_prefix):] - # Filter out refs that exactly match prefix or that don't start - # with a number once the prefix is stripped (mostly a concern - # when prefix is '') - if not re.match(r'\d', r): - continue - if verbose: - print("picking %s" % r) - return {"version": r, - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": None, - "date": date} - # no suitable tags, so version is "0+unknown", but full hex is still there - if verbose: - print("no suitable tags, using unknown + full revision id") - return {"version": "0+unknown", - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": "no suitable tags", "date": None} - - -@register_vcs_handler("git", "pieces_from_vcs") -def git_pieces_from_vcs( - tag_prefix: str, - root: str, - verbose: bool, - runner: Callable = run_command -) -> Dict[str, Any]: - """Get version from 'git describe' in the root of the source tree. - - This only gets called if the git-archive 'subst' keywords were *not* - expanded, and _version.py hasn't already been rewritten with a short - version string, meaning we're inside a checked out source tree. - """ - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] - - # GIT_DIR can interfere with correct operation of Versioneer. - # It may be intended to be passed to the Versioneer-versioned project, - # but that should not change where we get our version from. - env = os.environ.copy() - env.pop("GIT_DIR", None) - runner = functools.partial(runner, env=env) - - _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, - hide_stderr=not verbose) - if rc != 0: - if verbose: - print("Directory %s not under git control" % root) - raise NotThisMethod("'git rev-parse --git-dir' returned error") - - # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] - # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = runner(GITS, [ - "describe", "--tags", "--dirty", "--always", "--long", - "--match", f"{tag_prefix}[[:digit:]]*" - ], cwd=root) - # --long was added in git-1.5.5 - if describe_out is None: - raise NotThisMethod("'git describe' failed") - describe_out = describe_out.strip() - full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) - if full_out is None: - raise NotThisMethod("'git rev-parse' failed") - full_out = full_out.strip() - - pieces: Dict[str, Any] = {} - pieces["long"] = full_out - pieces["short"] = full_out[:7] # maybe improved later - pieces["error"] = None - - branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], - cwd=root) - # --abbrev-ref was added in git-1.6.3 - if rc != 0 or branch_name is None: - raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") - branch_name = branch_name.strip() - - if branch_name == "HEAD": - # If we aren't exactly on a branch, pick a branch which represents - # the current commit. If all else fails, we are on a branchless - # commit. - branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) - # --contains was added in git-1.5.4 - if rc != 0 or branches is None: - raise NotThisMethod("'git branch --contains' returned error") - branches = branches.split("\n") - - # Remove the first line if we're running detached - if "(" in branches[0]: - branches.pop(0) - - # Strip off the leading "* " from the list of branches. - branches = [branch[2:] for branch in branches] - if "master" in branches: - branch_name = "master" - elif not branches: - branch_name = None - else: - # Pick the first branch that is returned. Good or bad. - branch_name = branches[0] - - pieces["branch"] = branch_name - - # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] - # TAG might have hyphens. - git_describe = describe_out - - # look for -dirty suffix - dirty = git_describe.endswith("-dirty") - pieces["dirty"] = dirty - if dirty: - git_describe = git_describe[:git_describe.rindex("-dirty")] - - # now we have TAG-NUM-gHEX or HEX - - if "-" in git_describe: - # TAG-NUM-gHEX - mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) - if not mo: - # unparsable. Maybe git-describe is misbehaving? - pieces["error"] = ("unable to parse git-describe output: '%s'" - % describe_out) - return pieces - - # tag - full_tag = mo.group(1) - if not full_tag.startswith(tag_prefix): - if verbose: - fmt = "tag '%s' doesn't start with prefix '%s'" - print(fmt % (full_tag, tag_prefix)) - pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" - % (full_tag, tag_prefix)) - return pieces - pieces["closest-tag"] = full_tag[len(tag_prefix):] - - # distance: number of commits since tag - pieces["distance"] = int(mo.group(2)) - - # commit: short hex revision ID - pieces["short"] = mo.group(3) - - else: - # HEX: no tags - pieces["closest-tag"] = None - out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root) - pieces["distance"] = len(out.split()) # total number of commits - - # commit date: see ISO-8601 comment in git_versions_from_keywords() - date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip() - # Use only the last line. Previous lines may contain GPG signature - # information. - date = date.splitlines()[-1] - pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - - return pieces - - -def do_vcs_install(versionfile_source: str, ipy: Optional[str]) -> None: - """Git-specific installation logic for Versioneer. - - For Git, this means creating/changing .gitattributes to mark _version.py - for export-subst keyword substitution. - """ - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] - files = [versionfile_source] - if ipy: - files.append(ipy) - if "VERSIONEER_PEP518" not in globals(): - try: - my_path = __file__ - if my_path.endswith((".pyc", ".pyo")): - my_path = os.path.splitext(my_path)[0] + ".py" - versioneer_file = os.path.relpath(my_path) - except NameError: - versioneer_file = "versioneer.py" - files.append(versioneer_file) - present = False - try: - with open(".gitattributes", "r") as fobj: - for line in fobj: - if line.strip().startswith(versionfile_source): - if "export-subst" in line.strip().split()[1:]: - present = True - break - except OSError: - pass - if not present: - with open(".gitattributes", "a+") as fobj: - fobj.write(f"{versionfile_source} export-subst\n") - files.append(".gitattributes") - run_command(GITS, ["add", "--"] + files) - - -def versions_from_parentdir( - parentdir_prefix: str, - root: str, - verbose: bool, -) -> Dict[str, Any]: - """Try to determine the version from the parent directory name. - - Source tarballs conventionally unpack into a directory that includes both - the project name and a version string. We will also support searching up - two directory levels for an appropriately named parent directory - """ - rootdirs = [] - - for _ in range(3): - dirname = os.path.basename(root) - if dirname.startswith(parentdir_prefix): - return {"version": dirname[len(parentdir_prefix):], - "full-revisionid": None, - "dirty": False, "error": None, "date": None} - rootdirs.append(root) - root = os.path.dirname(root) # up a level - - if verbose: - print("Tried directories %s but none started with prefix %s" % - (str(rootdirs), parentdir_prefix)) - raise NotThisMethod("rootdir doesn't start with parentdir_prefix") - - -SHORT_VERSION_PY = """ -# This file was generated by 'versioneer.py' (0.29) from -# revision-control system data, or from the parent directory name of an -# unpacked source archive. Distribution tarballs contain a pre-generated copy -# of this file. - -import json - -version_json = ''' -%s -''' # END VERSION_JSON - - -def get_versions(): - return json.loads(version_json) -""" - - -def versions_from_file(filename: str) -> Dict[str, Any]: - """Try to determine the version from _version.py if present.""" - try: - with open(filename) as f: - contents = f.read() - except OSError: - raise NotThisMethod("unable to read _version.py") - mo = re.search(r"version_json = '''\n(.*)''' # END VERSION_JSON", - contents, re.M | re.S) - if not mo: - mo = re.search(r"version_json = '''\r\n(.*)''' # END VERSION_JSON", - contents, re.M | re.S) - if not mo: - raise NotThisMethod("no version_json in _version.py") - return json.loads(mo.group(1)) - - -def write_to_version_file(filename: str, versions: Dict[str, Any]) -> None: - """Write the given version number to the given _version.py file.""" - contents = json.dumps(versions, sort_keys=True, - indent=1, separators=(",", ": ")) - with open(filename, "w") as f: - f.write(SHORT_VERSION_PY % contents) - - print("set %s to '%s'" % (filename, versions["version"])) - - -def plus_or_dot(pieces: Dict[str, Any]) -> str: - """Return a + if we don't already have one, else return a .""" - if "+" in pieces.get("closest-tag", ""): - return "." - return "+" - - -def render_pep440(pieces: Dict[str, Any]) -> str: - """Build up version string, with post-release "local version identifier". - - Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you - get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty - - Exceptions: - 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += plus_or_dot(pieces) - rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0+untagged.%d.g%s" % (pieces["distance"], - pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_branch(pieces: Dict[str, Any]) -> str: - """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . - - The ".dev0" means not master branch. Note that .dev0 sorts backwards - (a feature branch will appear "older" than the master branch). - - Exceptions: - 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0" - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += "+untagged.%d.g%s" % (pieces["distance"], - pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]: - """Split pep440 version string at the post-release segment. - - Returns the release segments before the post-release and the - post-release version number (or -1 if no post-release segment is present). - """ - vc = str.split(ver, ".post") - return vc[0], int(vc[1] or 0) if len(vc) == 2 else None - - -def render_pep440_pre(pieces: Dict[str, Any]) -> str: - """TAG[.postN.devDISTANCE] -- No -dirty. - - Exceptions: - 1: no tags. 0.post0.devDISTANCE - """ - if pieces["closest-tag"]: - if pieces["distance"]: - # update the post release segment - tag_version, post_version = pep440_split_post(pieces["closest-tag"]) - rendered = tag_version - if post_version is not None: - rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"]) - else: - rendered += ".post0.dev%d" % (pieces["distance"]) - else: - # no commits, use the tag as the version - rendered = pieces["closest-tag"] - else: - # exception #1 - rendered = "0.post0.dev%d" % pieces["distance"] - return rendered - - -def render_pep440_post(pieces: Dict[str, Any]) -> str: - """TAG[.postDISTANCE[.dev0]+gHEX] . - - The ".dev0" means dirty. Note that .dev0 sorts backwards - (a dirty tree will appear "older" than the corresponding clean one), - but you shouldn't be releasing software with -dirty anyways. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%s" % pieces["short"] - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += "+g%s" % pieces["short"] - return rendered - - -def render_pep440_post_branch(pieces: Dict[str, Any]) -> str: - """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . - - The ".dev0" means not master branch. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%s" % pieces["short"] - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += "+g%s" % pieces["short"] - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_old(pieces: Dict[str, Any]) -> str: - """TAG[.postDISTANCE[.dev0]] . - - The ".dev0" means dirty. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - return rendered - - -def render_git_describe(pieces: Dict[str, Any]) -> str: - """TAG[-DISTANCE-gHEX][-dirty]. - - Like 'git describe --tags --dirty --always'. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"]: - rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render_git_describe_long(pieces: Dict[str, Any]) -> str: - """TAG-DISTANCE-gHEX[-dirty]. - - Like 'git describe --tags --dirty --always -long'. - The distance/hash is unconditional. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]: - """Render the given version pieces into the requested style.""" - if pieces["error"]: - return {"version": "unknown", - "full-revisionid": pieces.get("long"), - "dirty": None, - "error": pieces["error"], - "date": None} - - if not style or style == "default": - style = "pep440" # the default - - if style == "pep440": - rendered = render_pep440(pieces) - elif style == "pep440-branch": - rendered = render_pep440_branch(pieces) - elif style == "pep440-pre": - rendered = render_pep440_pre(pieces) - elif style == "pep440-post": - rendered = render_pep440_post(pieces) - elif style == "pep440-post-branch": - rendered = render_pep440_post_branch(pieces) - elif style == "pep440-old": - rendered = render_pep440_old(pieces) - elif style == "git-describe": - rendered = render_git_describe(pieces) - elif style == "git-describe-long": - rendered = render_git_describe_long(pieces) - else: - raise ValueError("unknown style '%s'" % style) - - return {"version": rendered, "full-revisionid": pieces["long"], - "dirty": pieces["dirty"], "error": None, - "date": pieces.get("date")} - - -class VersioneerBadRootError(Exception): - """The project root directory is unknown or missing key files.""" - - -def get_versions(verbose: bool = False) -> Dict[str, Any]: - """Get the project version from whatever source is available. - - Returns dict with two keys: 'version' and 'full'. - """ - if "versioneer" in sys.modules: - # see the discussion in cmdclass.py:get_cmdclass() - del sys.modules["versioneer"] - - root = get_root() - cfg = get_config_from_root(root) - - assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg" - handlers = HANDLERS.get(cfg.VCS) - assert handlers, "unrecognized VCS '%s'" % cfg.VCS - verbose = verbose or bool(cfg.verbose) # `bool()` used to avoid `None` - assert cfg.versionfile_source is not None, \ - "please set versioneer.versionfile_source" - assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix" - - versionfile_abs = os.path.join(root, cfg.versionfile_source) - - # extract version from first of: _version.py, VCS command (e.g. 'git - # describe'), parentdir. This is meant to work for developers using a - # source checkout, for users of a tarball created by 'setup.py sdist', - # and for users of a tarball/zipball created by 'git archive' or github's - # download-from-tag feature or the equivalent in other VCSes. - - get_keywords_f = handlers.get("get_keywords") - from_keywords_f = handlers.get("keywords") - if get_keywords_f and from_keywords_f: - try: - keywords = get_keywords_f(versionfile_abs) - ver = from_keywords_f(keywords, cfg.tag_prefix, verbose) - if verbose: - print("got version from expanded keyword %s" % ver) - return ver - except NotThisMethod: - pass - - try: - ver = versions_from_file(versionfile_abs) - if verbose: - print("got version from file %s %s" % (versionfile_abs, ver)) - return ver - except NotThisMethod: - pass - - from_vcs_f = handlers.get("pieces_from_vcs") - if from_vcs_f: - try: - pieces = from_vcs_f(cfg.tag_prefix, root, verbose) - ver = render(pieces, cfg.style) - if verbose: - print("got version from VCS %s" % ver) - return ver - except NotThisMethod: - pass - - try: - if cfg.parentdir_prefix: - ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose) - if verbose: - print("got version from parentdir %s" % ver) - return ver - except NotThisMethod: - pass - - if verbose: - print("unable to compute version") - - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, "error": "unable to compute version", - "date": None} - - -def get_version() -> str: - """Get the short version string for this project.""" - return get_versions()["version"] - - -def get_cmdclass(cmdclass: Optional[Dict[str, Any]] = None): - """Get the custom setuptools subclasses used by Versioneer. - - If the package uses a different cmdclass (e.g. one from numpy), it - should be provide as an argument. - """ - if "versioneer" in sys.modules: - del sys.modules["versioneer"] - # this fixes the "python setup.py develop" case (also 'install' and - # 'easy_install .'), in which subdependencies of the main project are - # built (using setup.py bdist_egg) in the same python process. Assume - # a main project A and a dependency B, which use different versions - # of Versioneer. A's setup.py imports A's Versioneer, leaving it in - # sys.modules by the time B's setup.py is executed, causing B to run - # with the wrong versioneer. Setuptools wraps the sub-dep builds in a - # sandbox that restores sys.modules to it's pre-build state, so the - # parent is protected against the child's "import versioneer". By - # removing ourselves from sys.modules here, before the child build - # happens, we protect the child from the parent's versioneer too. - # Also see https://github.com/python-versioneer/python-versioneer/issues/52 - - cmds = {} if cmdclass is None else cmdclass.copy() - - # we add "version" to setuptools - from setuptools import Command - - class cmd_version(Command): - description = "report generated version string" - user_options: List[Tuple[str, str, str]] = [] - boolean_options: List[str] = [] - - def initialize_options(self) -> None: - pass - - def finalize_options(self) -> None: - pass - - def run(self) -> None: - vers = get_versions(verbose=True) - print("Version: %s" % vers["version"]) - print(" full-revisionid: %s" % vers.get("full-revisionid")) - print(" dirty: %s" % vers.get("dirty")) - print(" date: %s" % vers.get("date")) - if vers["error"]: - print(" error: %s" % vers["error"]) - cmds["version"] = cmd_version - - # we override "build_py" in setuptools - # - # most invocation pathways end up running build_py: - # distutils/build -> build_py - # distutils/install -> distutils/build ->.. - # setuptools/bdist_wheel -> distutils/install ->.. - # setuptools/bdist_egg -> distutils/install_lib -> build_py - # setuptools/install -> bdist_egg ->.. - # setuptools/develop -> ? - # pip install: - # copies source tree to a tempdir before running egg_info/etc - # if .git isn't copied too, 'git describe' will fail - # then does setup.py bdist_wheel, or sometimes setup.py install - # setup.py egg_info -> ? - - # pip install -e . and setuptool/editable_wheel will invoke build_py - # but the build_py command is not expected to copy any files. - - # we override different "build_py" commands for both environments - if 'build_py' in cmds: - _build_py: Any = cmds['build_py'] - else: - from setuptools.command.build_py import build_py as _build_py - - class cmd_build_py(_build_py): - def run(self) -> None: - root = get_root() - cfg = get_config_from_root(root) - versions = get_versions() - _build_py.run(self) - if getattr(self, "editable_mode", False): - # During editable installs `.py` and data files are - # not copied to build_lib - return - # now locate _version.py in the new build/ directory and replace - # it with an updated value - if cfg.versionfile_build: - target_versionfile = os.path.join(self.build_lib, - cfg.versionfile_build) - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, versions) - cmds["build_py"] = cmd_build_py - - if 'build_ext' in cmds: - _build_ext: Any = cmds['build_ext'] - else: - from setuptools.command.build_ext import build_ext as _build_ext - - class cmd_build_ext(_build_ext): - def run(self) -> None: - root = get_root() - cfg = get_config_from_root(root) - versions = get_versions() - _build_ext.run(self) - if self.inplace: - # build_ext --inplace will only build extensions in - # build/lib<..> dir with no _version.py to write to. - # As in place builds will already have a _version.py - # in the module dir, we do not need to write one. - return - # now locate _version.py in the new build/ directory and replace - # it with an updated value - if not cfg.versionfile_build: - return - target_versionfile = os.path.join(self.build_lib, - cfg.versionfile_build) - if not os.path.exists(target_versionfile): - print(f"Warning: {target_versionfile} does not exist, skipping " - "version update. This can happen if you are running build_ext " - "without first running build_py.") - return - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, versions) - cmds["build_ext"] = cmd_build_ext - - if "cx_Freeze" in sys.modules: # cx_freeze enabled? - from cx_Freeze.dist import build_exe as _build_exe # type: ignore - # nczeczulin reports that py2exe won't like the pep440-style string - # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g. - # setup(console=[{ - # "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION - # "product_version": versioneer.get_version(), - # ... - - class cmd_build_exe(_build_exe): - def run(self) -> None: - root = get_root() - cfg = get_config_from_root(root) - versions = get_versions() - target_versionfile = cfg.versionfile_source - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, versions) - - _build_exe.run(self) - os.unlink(target_versionfile) - with open(cfg.versionfile_source, "w") as f: - LONG = LONG_VERSION_PY[cfg.VCS] - f.write(LONG % - {"DOLLAR": "$", - "STYLE": cfg.style, - "TAG_PREFIX": cfg.tag_prefix, - "PARENTDIR_PREFIX": cfg.parentdir_prefix, - "VERSIONFILE_SOURCE": cfg.versionfile_source, - }) - cmds["build_exe"] = cmd_build_exe - del cmds["build_py"] - - if 'py2exe' in sys.modules: # py2exe enabled? - try: - from py2exe.setuptools_buildexe import py2exe as _py2exe # type: ignore - except ImportError: - from py2exe.distutils_buildexe import py2exe as _py2exe # type: ignore - - class cmd_py2exe(_py2exe): - def run(self) -> None: - root = get_root() - cfg = get_config_from_root(root) - versions = get_versions() - target_versionfile = cfg.versionfile_source - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, versions) - - _py2exe.run(self) - os.unlink(target_versionfile) - with open(cfg.versionfile_source, "w") as f: - LONG = LONG_VERSION_PY[cfg.VCS] - f.write(LONG % - {"DOLLAR": "$", - "STYLE": cfg.style, - "TAG_PREFIX": cfg.tag_prefix, - "PARENTDIR_PREFIX": cfg.parentdir_prefix, - "VERSIONFILE_SOURCE": cfg.versionfile_source, - }) - cmds["py2exe"] = cmd_py2exe - - # sdist farms its file list building out to egg_info - if 'egg_info' in cmds: - _egg_info: Any = cmds['egg_info'] - else: - from setuptools.command.egg_info import egg_info as _egg_info - - class cmd_egg_info(_egg_info): - def find_sources(self) -> None: - # egg_info.find_sources builds the manifest list and writes it - # in one shot - super().find_sources() - - # Modify the filelist and normalize it - root = get_root() - cfg = get_config_from_root(root) - self.filelist.append('versioneer.py') - if cfg.versionfile_source: - # There are rare cases where versionfile_source might not be - # included by default, so we must be explicit - self.filelist.append(cfg.versionfile_source) - self.filelist.sort() - self.filelist.remove_duplicates() - - # The write method is hidden in the manifest_maker instance that - # generated the filelist and was thrown away - # We will instead replicate their final normalization (to unicode, - # and POSIX-style paths) - from setuptools import unicode_utils - normalized = [unicode_utils.filesys_decode(f).replace(os.sep, '/') - for f in self.filelist.files] - - manifest_filename = os.path.join(self.egg_info, 'SOURCES.txt') - with open(manifest_filename, 'w') as fobj: - fobj.write('\n'.join(normalized)) - - cmds['egg_info'] = cmd_egg_info - - # we override different "sdist" commands for both environments - if 'sdist' in cmds: - _sdist: Any = cmds['sdist'] - else: - from setuptools.command.sdist import sdist as _sdist - - class cmd_sdist(_sdist): - def run(self) -> None: - versions = get_versions() - self._versioneer_generated_versions = versions - # unless we update this, the command will keep using the old - # version - self.distribution.metadata.version = versions["version"] - return _sdist.run(self) - - def make_release_tree(self, base_dir: str, files: List[str]) -> None: - root = get_root() - cfg = get_config_from_root(root) - _sdist.make_release_tree(self, base_dir, files) - # now locate _version.py in the new base_dir directory - # (remembering that it may be a hardlink) and replace it with an - # updated value - target_versionfile = os.path.join(base_dir, cfg.versionfile_source) - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, - self._versioneer_generated_versions) - cmds["sdist"] = cmd_sdist - - return cmds - - -CONFIG_ERROR = """ -setup.cfg is missing the necessary Versioneer configuration. You need -a section like: - - [versioneer] - VCS = git - style = pep440 - versionfile_source = src/myproject/_version.py - versionfile_build = myproject/_version.py - tag_prefix = - parentdir_prefix = myproject- - -You will also need to edit your setup.py to use the results: - - import versioneer - setup(version=versioneer.get_version(), - cmdclass=versioneer.get_cmdclass(), ...) - -Please read the docstring in ./versioneer.py for configuration instructions, -edit setup.cfg, and re-run the installer or 'python versioneer.py setup'. -""" - -SAMPLE_CONFIG = """ -# See the docstring in versioneer.py for instructions. Note that you must -# re-run 'versioneer.py setup' after changing this section, and commit the -# resulting files. - -[versioneer] -#VCS = git -#style = pep440 -#versionfile_source = -#versionfile_build = -#tag_prefix = -#parentdir_prefix = - -""" - -OLD_SNIPPET = """ -from ._version import get_versions -__version__ = get_versions()['version'] -del get_versions -""" - -INIT_PY_SNIPPET = """ -from . import {0} -__version__ = {0}.get_versions()['version'] -""" - - -def do_setup() -> int: - """Do main VCS-independent setup function for installing Versioneer.""" - root = get_root() - try: - cfg = get_config_from_root(root) - except (OSError, configparser.NoSectionError, - configparser.NoOptionError) as e: - if isinstance(e, (OSError, configparser.NoSectionError)): - print("Adding sample versioneer config to setup.cfg", - file=sys.stderr) - with open(os.path.join(root, "setup.cfg"), "a") as f: - f.write(SAMPLE_CONFIG) - print(CONFIG_ERROR, file=sys.stderr) - return 1 - - print(" creating %s" % cfg.versionfile_source) - with open(cfg.versionfile_source, "w") as f: - LONG = LONG_VERSION_PY[cfg.VCS] - f.write(LONG % {"DOLLAR": "$", - "STYLE": cfg.style, - "TAG_PREFIX": cfg.tag_prefix, - "PARENTDIR_PREFIX": cfg.parentdir_prefix, - "VERSIONFILE_SOURCE": cfg.versionfile_source, - }) - - ipy = os.path.join(os.path.dirname(cfg.versionfile_source), - "__init__.py") - maybe_ipy: Optional[str] = ipy - if os.path.exists(ipy): - try: - with open(ipy, "r") as f: - old = f.read() - except OSError: - old = "" - module = os.path.splitext(os.path.basename(cfg.versionfile_source))[0] - snippet = INIT_PY_SNIPPET.format(module) - if OLD_SNIPPET in old: - print(" replacing boilerplate in %s" % ipy) - with open(ipy, "w") as f: - f.write(old.replace(OLD_SNIPPET, snippet)) - elif snippet not in old: - print(" appending to %s" % ipy) - with open(ipy, "a") as f: - f.write(snippet) - else: - print(" %s unmodified" % ipy) - else: - print(" %s doesn't exist, ok" % ipy) - maybe_ipy = None - - # Make VCS-specific changes. For git, this means creating/changing - # .gitattributes to mark _version.py for export-subst keyword - # substitution. - do_vcs_install(cfg.versionfile_source, maybe_ipy) - return 0 - - -def scan_setup_py() -> int: - """Validate the contents of setup.py against Versioneer's expectations.""" - found = set() - setters = False - errors = 0 - with open("setup.py", "r") as f: - for line in f.readlines(): - if "import versioneer" in line: - found.add("import") - if "versioneer.get_cmdclass()" in line: - found.add("cmdclass") - if "versioneer.get_version()" in line: - found.add("get_version") - if "versioneer.VCS" in line: - setters = True - if "versioneer.versionfile_source" in line: - setters = True - if len(found) != 3: - print("") - print("Your setup.py appears to be missing some important items") - print("(but I might be wrong). Please make sure it has something") - print("roughly like the following:") - print("") - print(" import versioneer") - print(" setup( version=versioneer.get_version(),") - print(" cmdclass=versioneer.get_cmdclass(), ...)") - print("") - errors += 1 - if setters: - print("You should remove lines like 'versioneer.VCS = ' and") - print("'versioneer.versionfile_source = ' . This configuration") - print("now lives in setup.cfg, and should be removed from setup.py") - print("") - errors += 1 - return errors - - -def setup_command() -> NoReturn: - """Set up Versioneer and exit with appropriate error code.""" - errors = do_setup() - errors += scan_setup_py() - sys.exit(1 if errors else 0) - - -if __name__ == "__main__": - cmd = sys.argv[1] - if cmd == "setup": - setup_command() From cce9ab78aee93955b2a502cca7949ba728687353 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Jan=C3=9Fen?= Date: Mon, 27 Nov 2023 13:01:32 +0100 Subject: [PATCH 111/134] fixes --- .github/workflows/pypicheck.yml | 2 +- .github/workflows/unittest-flux.yml | 2 +- .github/workflows/unittest-mpich.yml | 2 +- .github/workflows/unittest-openmpi.yml | 2 +- .github/workflows/unittest-win.yml | 2 +- .github/workflows/unittests-old.yml | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/pypicheck.yml b/.github/workflows/pypicheck.yml index 32230668..5c8dbbf9 100644 --- a/.github/workflows/pypicheck.yml +++ b/.github/workflows/pypicheck.yml @@ -24,5 +24,5 @@ jobs: shell: bash -l {0} run: | pip install versioneer[toml]==0.29 - pip install --no-deps . + pip install . --no-deps --no-build-isolation pip check diff --git a/.github/workflows/unittest-flux.yml b/.github/workflows/unittest-flux.yml index 62b8f009..14fca90c 100644 --- a/.github/workflows/unittest-flux.yml +++ b/.github/workflows/unittest-flux.yml @@ -44,7 +44,7 @@ jobs: run: | mamba install -y flux-core coverage pip install versioneer[toml]==0.29 - pip install . -vv --no-deps --no-build-isolation + pip install . --no-deps --no-build-isolation cd tests coverage run --omit pympipool/_version.py -m unittest discover . env: diff --git a/.github/workflows/unittest-mpich.yml b/.github/workflows/unittest-mpich.yml index 0b560735..d3731c4d 100644 --- a/.github/workflows/unittest-mpich.yml +++ b/.github/workflows/unittest-mpich.yml @@ -56,6 +56,6 @@ jobs: timeout-minutes: 5 run: | pip install versioneer[toml]==0.29 - pip install . -vv --no-deps --no-build-isolation + pip install . --no-deps --no-build-isolation cd tests python -m unittest discover . diff --git a/.github/workflows/unittest-openmpi.yml b/.github/workflows/unittest-openmpi.yml index d1d5e5bb..d375e9ca 100644 --- a/.github/workflows/unittest-openmpi.yml +++ b/.github/workflows/unittest-openmpi.yml @@ -56,7 +56,7 @@ jobs: timeout-minutes: 5 run: | pip install versioneer[toml]==0.29 - pip install . -vv --no-deps --no-build-isolation + pip install . --no-deps --no-build-isolation cd tests python -m unittest discover . env: diff --git a/.github/workflows/unittest-win.yml b/.github/workflows/unittest-win.yml index 9a8fbb1f..89320cb6 100644 --- a/.github/workflows/unittest-win.yml +++ b/.github/workflows/unittest-win.yml @@ -33,6 +33,6 @@ jobs: timeout-minutes: 5 run: | pip install versioneer[toml]==0.29 - pip install . -vv --no-deps --no-build-isolation + pip install . --no-deps --no-build-isolation cd tests python -m unittest discover . diff --git a/.github/workflows/unittests-old.yml b/.github/workflows/unittests-old.yml index b8c486e8..dc49dafd 100644 --- a/.github/workflows/unittests-old.yml +++ b/.github/workflows/unittests-old.yml @@ -28,7 +28,7 @@ jobs: timeout-minutes: 5 run: | pip install versioneer[toml]==0.29 - pip install . -vv --no-deps --no-build-isolation + pip install . --no-deps --no-build-isolation cd tests python -m unittest discover . env: From 91b0322bbf4fe768a170892ecc07fd3eb4f67cd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Jan=C3=9Fen?= Date: Mon, 27 Nov 2023 13:15:19 +0100 Subject: [PATCH 112/134] Use conda environment to deploy --- .github/workflows/deploy.yml | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index e9212bd6..f27c0392 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -14,16 +14,20 @@ jobs: runs-on: ubuntu-latest environment: name: pypi - url: https://pypi.org/p/pympipool + url: https://pypi.org/p/${{ github.event.repository.name }} permissions: id-token: write steps: - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 + - uses: conda-incubator/setup-miniconda@v2.2.0 with: - python-version: "3.11" - - name: Install dependencies - run: python -m pip install --user --upgrade cloudpickle pyzmq setuptools tqdm versioneer wheel + python-version: 3.11 + mamba-version: "*" + channels: conda-forge + miniforge-variant: Mambaforge + channel-priority: strict + auto-update-conda: true + environment-file: .ci_support/environment-openmpi.yml - name: Convert dependencies run: | cp .ci_support/environment-old.yml environment.yml From 8d80522bc131960e5a868b476f65d598e704d494 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Jan=C3=9Fen?= Date: Mon, 27 Nov 2023 13:20:18 +0100 Subject: [PATCH 113/134] install versioneer via pip --- .github/workflows/deploy.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index f27c0392..6646ab71 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -33,6 +33,8 @@ jobs: cp .ci_support/environment-old.yml environment.yml python .ci_support/release.py; cat pyproject.toml - name: Build - run: python setup.py sdist bdist_wheel + run: | + pip install versioneer[toml]==0.29 + python setup.py sdist bdist_wheel - name: Publish distribution 📦 to PyPI uses: pypa/gh-action-pypi-publish@release/v1 From 4579516f5e532375db33cb3fce2e8ce3a5cdaf85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Jan=C3=9Fen?= Date: Mon, 27 Nov 2023 13:48:36 +0100 Subject: [PATCH 114/134] install python-build --- .github/workflows/deploy.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 6646ab71..95ba7754 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -19,6 +19,10 @@ jobs: id-token: write steps: - uses: actions/checkout@v2 + - name: Merge + run: | + cp .ci_support/environment-openmpi.yml environment.yml + echo "- python-build" >> environment.yml - uses: conda-incubator/setup-miniconda@v2.2.0 with: python-version: 3.11 @@ -27,7 +31,7 @@ jobs: miniforge-variant: Mambaforge channel-priority: strict auto-update-conda: true - environment-file: .ci_support/environment-openmpi.yml + environment-file: environment.yml - name: Convert dependencies run: | cp .ci_support/environment-old.yml environment.yml From 2e554e7674283db825b24171162d91a10112be59 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Mon, 27 Nov 2023 14:12:24 +0100 Subject: [PATCH 115/134] Update deploy.yml --- .github/workflows/deploy.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 95ba7754..b5dbe9e3 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -10,7 +10,7 @@ on: jobs: build: - if: startsWith(github.event.ref, 'refs/tags') || github.event_name == 'release' + # if: startsWith(github.event.ref, 'refs/tags') || github.event_name == 'release' runs-on: ubuntu-latest environment: name: pypi @@ -39,6 +39,7 @@ jobs: - name: Build run: | pip install versioneer[toml]==0.29 + pip list python setup.py sdist bdist_wheel - - name: Publish distribution 📦 to PyPI - uses: pypa/gh-action-pypi-publish@release/v1 + # - name: Publish distribution 📦 to PyPI + # uses: pypa/gh-action-pypi-publish@release/v1 From 92461e1cc3d7b641a31c013ab027fe44934aeb91 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Mon, 27 Nov 2023 14:15:27 +0100 Subject: [PATCH 116/134] Update deploy.yml --- .github/workflows/deploy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index b5dbe9e3..13a28469 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -37,9 +37,9 @@ jobs: cp .ci_support/environment-old.yml environment.yml python .ci_support/release.py; cat pyproject.toml - name: Build + shell: bash -l {0} run: | pip install versioneer[toml]==0.29 - pip list python setup.py sdist bdist_wheel # - name: Publish distribution 📦 to PyPI # uses: pypa/gh-action-pypi-publish@release/v1 From 42e116729d8c7caf9d1bd8179a425ce62b8e635f Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Mon, 27 Nov 2023 14:18:00 +0100 Subject: [PATCH 117/134] Update deploy.yml --- .github/workflows/deploy.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 13a28469..16372519 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -10,7 +10,7 @@ on: jobs: build: - # if: startsWith(github.event.ref, 'refs/tags') || github.event_name == 'release' + if: startsWith(github.event.ref, 'refs/tags') || github.event_name == 'release' runs-on: ubuntu-latest environment: name: pypi @@ -41,5 +41,5 @@ jobs: run: | pip install versioneer[toml]==0.29 python setup.py sdist bdist_wheel - # - name: Publish distribution 📦 to PyPI - # uses: pypa/gh-action-pypi-publish@release/v1 + - name: Publish distribution 📦 to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 From 36fa2e6afb9956321eda53900894b02314ae9b9d Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Mon, 27 Nov 2023 14:22:52 +0100 Subject: [PATCH 118/134] Update deploy.yml --- .github/workflows/deploy.yml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 16372519..8315c0b5 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -19,10 +19,6 @@ jobs: id-token: write steps: - uses: actions/checkout@v2 - - name: Merge - run: | - cp .ci_support/environment-openmpi.yml environment.yml - echo "- python-build" >> environment.yml - uses: conda-incubator/setup-miniconda@v2.2.0 with: python-version: 3.11 @@ -31,7 +27,7 @@ jobs: miniforge-variant: Mambaforge channel-priority: strict auto-update-conda: true - environment-file: environment.yml + environment-file: .ci_support/environment-openmpi.yml - name: Convert dependencies run: | cp .ci_support/environment-old.yml environment.yml From 56ed7d9bc8e10890bbdd34992f9df565a286542d Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Mon, 27 Nov 2023 14:32:14 +0100 Subject: [PATCH 119/134] Fix install dependencies --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 02db79b9..66929cb9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [build-system] -requires = ["setuptools"] +requires = ["cloudpickle", "mpi4py", "pyzmq", "setuptools", "tqdm", "versioneer[toml]==0.29"] build-backend = "setuptools.build_meta" [project] From 36dcd0202e1580f232399329efeed02a1a8bc38b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Jan=C3=9Fen?= Date: Mon, 27 Nov 2023 16:25:38 +0100 Subject: [PATCH 120/134] Update readthedocs environment --- .readthedocs.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.readthedocs.yml b/.readthedocs.yml index b4de3480..3e4be55b 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -6,9 +6,9 @@ version: 2 build: - os: "ubuntu-20.04" + os: "ubuntu-22.04" tools: - python: "mambaforge-4.10" + python: "mambaforge-22.9" # Build documentation in the docs/ directory with Sphinx sphinx: From e47af239e9214396ede1d42ec781f96a4913da8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Jan=C3=9Fen?= Date: Mon, 27 Nov 2023 16:38:19 +0100 Subject: [PATCH 121/134] update docs env --- .ci_support/environment-docs.yml | 21 +++++++++++---------- .github/workflows/dependabot.yml | 1 + 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/.ci_support/environment-docs.yml b/.ci_support/environment-docs.yml index 1a112178..1df656c6 100644 --- a/.ci_support/environment-docs.yml +++ b/.ci_support/environment-docs.yml @@ -1,13 +1,14 @@ channels: - conda-forge dependencies: - - nbsphinx - - sphinx - - myst-parser - - numpy - - mpich - - cloudpickle - - mpi4py - - tqdm - - pyzmq - - flux-core \ No newline at end of file +- nbsphinx +- sphinx +- myst-parser +- numpy +- openmpi +- cloudpickle =3.0.0 +- mpi4py =3.1.5 +- tqdm =4.66.1 +- pyzmq =25.1.1 +- flux-core +- versioneer =0.29 \ No newline at end of file diff --git a/.github/workflows/dependabot.yml b/.github/workflows/dependabot.yml index 85028d85..74b47f22 100644 --- a/.github/workflows/dependabot.yml +++ b/.github/workflows/dependabot.yml @@ -25,6 +25,7 @@ jobs: sed -i "/${package}/s/${from}/${to}/g" .ci_support/environment-mpich.yml sed -i "/${package}/s/${from}/${to}/g" .ci_support/environment-openmpi.yml sed -i "/${package}/s/${from}/${to}/g" .ci_support/environment-win.yml + sed -i "/${package}/s/${from}/${to}/g" .ci_support/environment-docs.yml - name: UpdateDependabotPR commit run: | git config --local user.email "pyiron@mpie.de" From bc08eb81692ec5bbfb3daa15fe0361a37de9a883 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Jan=C3=9Fen?= Date: Mon, 27 Nov 2023 17:03:06 +0100 Subject: [PATCH 122/134] Overwrite installation --- .readthedocs.yml | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/.readthedocs.yml b/.readthedocs.yml index 3e4be55b..00d0319d 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -9,6 +9,10 @@ build: os: "ubuntu-22.04" tools: python: "mambaforge-22.9" + commands: + - pip install versioneer[toml]==0.29 + - pip install . --no-deps --no-build-isolation + - python -m sphinx -T -E -b html -d _build/doctrees -D language=en . $READTHEDOCS_OUTPUT/html # Build documentation in the docs/ directory with Sphinx sphinx: @@ -19,10 +23,4 @@ formats: [] # Install pyiron from conda conda: - environment: .ci_support/environment-docs.yml - -# Optionally set the version of Python and requirements required to build your docs -python: - install: - - method: pip - path: . \ No newline at end of file + environment: .ci_support/environment-docs.yml \ No newline at end of file From 4744d3f26faf295ced3633f1c1bb293b7bf6fa7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Jan=C3=9Fen?= Date: Mon, 27 Nov 2023 17:05:33 +0100 Subject: [PATCH 123/134] continue debugging --- .ci_support/environment-docs.yml | 3 +-- .readthedocs.yml | 2 ++ 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.ci_support/environment-docs.yml b/.ci_support/environment-docs.yml index 1df656c6..cb755a1e 100644 --- a/.ci_support/environment-docs.yml +++ b/.ci_support/environment-docs.yml @@ -10,5 +10,4 @@ dependencies: - mpi4py =3.1.5 - tqdm =4.66.1 - pyzmq =25.1.1 -- flux-core -- versioneer =0.29 \ No newline at end of file +- flux-core \ No newline at end of file diff --git a/.readthedocs.yml b/.readthedocs.yml index 00d0319d..fa7d48e9 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -10,6 +10,8 @@ build: tools: python: "mambaforge-22.9" commands: + - cat .ci_support/environment-docs.yml + - mamba env create --quiet --name readthedocs --file .ci_support/environment-docs.yml - pip install versioneer[toml]==0.29 - pip install . --no-deps --no-build-isolation - python -m sphinx -T -E -b html -d _build/doctrees -D language=en . $READTHEDOCS_OUTPUT/html From f7c3ca5bd41d954864ec40ee48046444fa387a80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Jan=C3=9Fen?= Date: Mon, 27 Nov 2023 17:10:44 +0100 Subject: [PATCH 124/134] try pre-build --- .readthedocs.yml | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/.readthedocs.yml b/.readthedocs.yml index fa7d48e9..38c2541e 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -9,12 +9,10 @@ build: os: "ubuntu-22.04" tools: python: "mambaforge-22.9" - commands: - - cat .ci_support/environment-docs.yml - - mamba env create --quiet --name readthedocs --file .ci_support/environment-docs.yml - - pip install versioneer[toml]==0.29 - - pip install . --no-deps --no-build-isolation - - python -m sphinx -T -E -b html -d _build/doctrees -D language=en . $READTHEDOCS_OUTPUT/html + jobs: + pre_build: + - pip install versioneer[toml]==0.29 + - pip install . --no-deps --no-build-isolation # Build documentation in the docs/ directory with Sphinx sphinx: From d98e9245b1c2dfe2a856973af3bdc313e03c4af2 Mon Sep 17 00:00:00 2001 From: James Corbett Date: Mon, 27 Nov 2023 17:30:33 -0800 Subject: [PATCH 125/134] docs: grammar tweaks Problem: some of the grammar in the docs could be improved. Move some commas around and add some dashes. --- docs/source/examples.md | 2 +- docs/source/index.rst | 21 ++++++++++----------- docs/source/installation.md | 6 +++--- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/docs/source/examples.md b/docs/source/examples.md index 3e69e749..978cd4d8 100644 --- a/docs/source/examples.md +++ b/docs/source/examples.md @@ -14,7 +14,7 @@ with ThreadPoolExecutor( future = exe.submit(sum, [1, 1]) print(future.result()) ``` -In this case `max_workers=1` limits the number of threads uses by the `ThreadPoolExecutor` to one. Then the `sum()` +In this case `max_workers=1` limits the number of threads used by the `ThreadPoolExecutor` to one. Then the `sum()` function is submitted to the executor with a list with two ones `[1, 1]` as input. A [`concurrent.futures.Future`](https://docs.python.org/3/library/concurrent.futures.html#module-concurrent.futures) object is returned. The `Future` object allows to check the status of the execution with the `done()` method which returns `True` or `False` depending on the state of the execution. Or the main process can wait until the execution is diff --git a/docs/source/index.rst b/docs/source/index.rst index d11a4f5a..d166a2f5 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -8,29 +8,27 @@ pympipool - up-scale python functions for high performance computing Up-scaling python functions for high performance computing (HPC) can be challenging. While the python standard library provides interfaces for multiprocessing and asynchronous task execution, namely `multiprocessing `_ and -`concurrent.futures `_ both are +`concurrent.futures `_, both are limited to the execution on a single compute node. So a series of python libraries have been developed to address the up-scaling of python functions for HPC. Starting in the datascience and machine learning community with solutions -like `dask `_ over more HPC focused solutions like -`fireworks `_ and `parsl `_ up to Python +like `dask `_, over to more HPC-focused solutions like +`fireworks `_ and `parsl `_, up to Python bindings for the message passing interface (MPI) named `mpi4py `_. Each of these -solutions has their advantages and disadvantages, in particular scaling beyond serial python functions, including thread -based parallelism, MPI parallel python application or assignment of GPUs to individual python function remains -challenging. +solutions has its advantages and disadvantages. However, one disadvantage common to all these libraries is the relative difficulty of scaling from serial functions to functions that make use of thread-based, MPI-based, or GPU-based parallelism. To address these challenges :code:`pympipool` is developed with three goals in mind: * Extend the standard python library `concurrent.futures.Executor `_ interface, to minimize the barrier of up-scaling an existing workflow to be used on HPC resources. -* Integrate thread based parallelism, MPI parallel python functions based on `mpi4py `_ and GPU assignment. This allows the users to accelerate their workflows one function at a time. +* Integrate thread-based parallelism, MPI-parallel python functions based on `mpi4py `_, and GPU assignment. This allows users to accelerate their workflows one function at a time. * Embrace `Jupyter `_ notebooks for the interactive development of HPC workflows, as they allow the users to document their though process right next to the python code and their results all within one document. HPC Context ----------- -In contrast to frameworks like `dask `_, `fireworks `_ -and `parsl `_ which can be used to submit a number of worker processes directly the the HPC +Frameworks like `dask `_, `fireworks `_ +and `parsl `_ can be used to submit a number of worker processes directly to the HPC queuing system and then transfer tasks from either the login node or an interactive allocation to these worker processes -to accelerate the execution, `mpi4py `_ and :code:`pympipool` follow a different -approach. Here the user creates their HPC allocation first and then `mpi4py `_ or +to accelerate the execution. By contrast, `mpi4py `_ and :code:`pympipool` follow a different +approach, in which the user creates their HPC allocation first and then `mpi4py `_ or :code:`pympipool` can be used to distribute the tasks within this allocation. The advantage of this approach is that no central data storage is required as the workers and the scheduling task can communicate directly. @@ -69,6 +67,7 @@ The same code can also be executed inside a jupyter notebook directly which enab The standard `concurrent.futures.Executor `_ interface is extended by adding the option :code:`cores_per_worker=2` to assign multiple MPI ranks to each function call. To create two workers :code:`max_workers=2` each with two cores each requires a total of four CPU cores to be available. + After submitting the function :code:`calc()` with the corresponding parameter to the executor :code:`exe.submit(calc, 0)` a python `concurrent.futures.Future `_ is returned. Consequently, the :code:`pympipool.Executor` can be used as a drop-in replacement for the diff --git a/docs/source/installation.md b/docs/source/installation.md index 83abc2f0..6a4c7680 100644 --- a/docs/source/installation.md +++ b/docs/source/installation.md @@ -38,10 +38,10 @@ pip install pympipool ## High Performance Computing `pympipool` currently provides interfaces to the [SLURM workload manager](https://www.schedmd.com) and the [flux framework](https://flux-framework.org). With the [flux framework](https://flux-framework.org) being the -recommended solution as it can be installed without root user rights and it can be integrated in existing resource +recommended solution as it can be installed without root permissions and it can be integrated in existing resource managers like the [SLURM workload manager](https://www.schedmd.com). The advantages of using `pympipool` in combination -with these resource schedulers is the fine-grained resource allocation. In addition, to scaling beyond a single compute -node they add the ability to assign GPUs and thread based parallelism. The two resource manager are internally linked to +with these resource schedulers is the fine-grained resource allocation. In addition to scaling beyond a single compute +node, they add the ability to assign GPUs and thread based parallelism. The two resource manager are internally linked to two interfaces: * `pympipool.slurm.PySlurmExecutor`: The interface for the [SLURM workload manager](https://www.schedmd.com). From 81c4f1674be8a7d68a12cc398912c9e996d15347 Mon Sep 17 00:00:00 2001 From: liamhuber Date: Mon, 4 Dec 2023 11:15:44 -0800 Subject: [PATCH 126/134] Test on older version of OSX --- .github/workflows/unittest-mpich.yml | 2 +- .github/workflows/unittest-openmpi.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/unittest-mpich.yml b/.github/workflows/unittest-mpich.yml index d3731c4d..a9049d17 100644 --- a/.github/workflows/unittest-mpich.yml +++ b/.github/workflows/unittest-mpich.yml @@ -15,7 +15,7 @@ jobs: strategy: matrix: include: - - operating-system: macos-latest + - operating-system: macos-11 python-version: '3.11' label: osx-64-py-3-11-mpich prefix: /Users/runner/miniconda3/envs/my-env diff --git a/.github/workflows/unittest-openmpi.yml b/.github/workflows/unittest-openmpi.yml index d375e9ca..7f37d0e7 100644 --- a/.github/workflows/unittest-openmpi.yml +++ b/.github/workflows/unittest-openmpi.yml @@ -15,7 +15,7 @@ jobs: strategy: matrix: include: - - operating-system: macos-latest + - operating-system: macos-11 python-version: '3.11' label: osx-64-py-3-11-openmpi prefix: /Users/runner/miniconda3/envs/my-env From 97a52579dff6d4e336142c774c886b30b513d549 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 5 Dec 2023 12:18:11 +0000 Subject: [PATCH 127/134] Bump pyzmq from 25.1.1 to 25.1.2 Bumps [pyzmq](https://github.com/zeromq/pyzmq) from 25.1.1 to 25.1.2. - [Release notes](https://github.com/zeromq/pyzmq/releases) - [Commits](https://github.com/zeromq/pyzmq/compare/v25.1.1...v25.1.2) --- updated-dependencies: - dependency-name: pyzmq dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 66929cb9..98335502 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,7 @@ classifiers = [ dependencies = [ "cloudpickle==3.0.0", "mpi4py==3.1.5", - "pyzmq==25.1.1", + "pyzmq==25.1.2", "tqdm==4.66.1", ] dynamic = ["version"] From 98ccb26b6526641d148e030da2351b492afce9a2 Mon Sep 17 00:00:00 2001 From: pyironrunner Date: Tue, 5 Dec 2023 12:18:28 +0000 Subject: [PATCH 128/134] [dependabot skip] Update environment --- .ci_support/environment-docs.yml | 2 +- .ci_support/environment-mpich.yml | 2 +- .ci_support/environment-openmpi.yml | 2 +- .ci_support/environment-win.yml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.ci_support/environment-docs.yml b/.ci_support/environment-docs.yml index cb755a1e..98257ad7 100644 --- a/.ci_support/environment-docs.yml +++ b/.ci_support/environment-docs.yml @@ -9,5 +9,5 @@ dependencies: - cloudpickle =3.0.0 - mpi4py =3.1.5 - tqdm =4.66.1 -- pyzmq =25.1.1 +- pyzmq =25.1.2 - flux-core \ No newline at end of file diff --git a/.ci_support/environment-mpich.yml b/.ci_support/environment-mpich.yml index db00917a..510cd94d 100644 --- a/.ci_support/environment-mpich.yml +++ b/.ci_support/environment-mpich.yml @@ -7,4 +7,4 @@ dependencies: - cloudpickle =3.0.0 - mpi4py =3.1.5 - tqdm =4.66.1 -- pyzmq =25.1.1 +- pyzmq =25.1.2 diff --git a/.ci_support/environment-openmpi.yml b/.ci_support/environment-openmpi.yml index d9c93492..4f35bca9 100644 --- a/.ci_support/environment-openmpi.yml +++ b/.ci_support/environment-openmpi.yml @@ -7,4 +7,4 @@ dependencies: - cloudpickle =3.0.0 - mpi4py =3.1.5 - tqdm =4.66.1 -- pyzmq =25.1.1 +- pyzmq =25.1.2 diff --git a/.ci_support/environment-win.yml b/.ci_support/environment-win.yml index 7ca9a6d8..4fb6e616 100644 --- a/.ci_support/environment-win.yml +++ b/.ci_support/environment-win.yml @@ -7,4 +7,4 @@ dependencies: - cloudpickle =3.0.0 - mpi4py =3.1.5 - tqdm =4.66.1 -- pyzmq =25.1.1 +- pyzmq =25.1.2 From c317de0c6961a62e15626e8b826ba0dfb052e347 Mon Sep 17 00:00:00 2001 From: liamhuber Date: Wed, 6 Dec 2023 13:22:04 -0800 Subject: [PATCH 129/134] Add a cron test to check macos-latest --- .github/workflows/check-macos-latest.yml | 34 ++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 .github/workflows/check-macos-latest.yml diff --git a/.github/workflows/check-macos-latest.yml b/.github/workflows/check-macos-latest.yml new file mode 100644 index 00000000..e55e1688 --- /dev/null +++ b/.github/workflows/check-macos-latest.yml @@ -0,0 +1,34 @@ +# We are waiting on the macos-latest image to play nicely with MPI + +name: Is-macos-latest-working-yet + +on: + schedule: + - cron: '0 23 * * 2' + +jobs: + openmpi-on-macos-latest: + runs-on: macos-latest + steps: + - uses: actions/checkout@v2 + - uses: conda-incubator/setup-miniconda@v2.2.0 + with: + python-version: "3.11" + mamba-version: "*" + channels: conda-forge + miniforge-variant: Mambaforge + channel-priority: strict + auto-update-conda: true + environment-file: .ci_support/environment-openmpi.yml + - name: Test + shell: bash -l {0} + timeout-minutes: 10 + run: | + pip install versioneer[toml]==0.29 + pip install . --no-deps --no-build-isolation + cd tests + python -m unittest discover . + env: + OMPI_MCA_plm: 'isolated' + OMPI_MCA_rmaps_base_oversubscribe: 'yes' + OMPI_MCA_btl_vader_single_copy_mechanism: 'none' From b63a782a6971c43bad7bbec5b9f38afc6a2037e6 Mon Sep 17 00:00:00 2001 From: liamhuber Date: Wed, 6 Dec 2023 13:23:09 -0800 Subject: [PATCH 130/134] Make it so we can run it manually --- .github/workflows/check-macos-latest.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/check-macos-latest.yml b/.github/workflows/check-macos-latest.yml index e55e1688..dc28c9d4 100644 --- a/.github/workflows/check-macos-latest.yml +++ b/.github/workflows/check-macos-latest.yml @@ -5,6 +5,7 @@ name: Is-macos-latest-working-yet on: schedule: - cron: '0 23 * * 2' + workflow_dispatch: jobs: openmpi-on-macos-latest: From 9e0af3d67cd763a5dd276a88fca66e758ee369fa Mon Sep 17 00:00:00 2001 From: liamhuber Date: Wed, 6 Dec 2023 13:24:53 -0800 Subject: [PATCH 131/134] Run it on the CI this once I'll remove this once the test is behaving as expected. I just can't do the manual trigger on a workflow that's only on a branch --- .github/workflows/check-macos-latest.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/check-macos-latest.yml b/.github/workflows/check-macos-latest.yml index dc28c9d4..f4020eef 100644 --- a/.github/workflows/check-macos-latest.yml +++ b/.github/workflows/check-macos-latest.yml @@ -6,10 +6,11 @@ on: schedule: - cron: '0 23 * * 2' workflow_dispatch: + pull_request: jobs: openmpi-on-macos-latest: - runs-on: macos-latest + runs-on: macos-11 steps: - uses: actions/checkout@v2 - uses: conda-incubator/setup-miniconda@v2.2.0 From cbc2c2398b56087a025f74448d992778cae19e2a Mon Sep 17 00:00:00 2001 From: liamhuber Date: Wed, 6 Dec 2023 13:34:48 -0800 Subject: [PATCH 132/134] Test latest and not on every PR It worked just fine with the macos-11 tests, so there is nothing wrong with the workflow file. --- .github/workflows/check-macos-latest.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/check-macos-latest.yml b/.github/workflows/check-macos-latest.yml index f4020eef..dc28c9d4 100644 --- a/.github/workflows/check-macos-latest.yml +++ b/.github/workflows/check-macos-latest.yml @@ -6,11 +6,10 @@ on: schedule: - cron: '0 23 * * 2' workflow_dispatch: - pull_request: jobs: openmpi-on-macos-latest: - runs-on: macos-11 + runs-on: macos-latest steps: - uses: actions/checkout@v2 - uses: conda-incubator/setup-miniconda@v2.2.0 From 359647ed0f75db2cf48f9ac6b3f8a198c33e483a Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Thu, 11 Jan 2024 12:13:41 +0100 Subject: [PATCH 133/134] Delete .github/delete-merged-branch-config.yml Based on https://github.com/pyiron/infrastructure/issues/146 --- .github/delete-merged-branch-config.yml | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 .github/delete-merged-branch-config.yml diff --git a/.github/delete-merged-branch-config.yml b/.github/delete-merged-branch-config.yml deleted file mode 100644 index da32d1c9..00000000 --- a/.github/delete-merged-branch-config.yml +++ /dev/null @@ -1,3 +0,0 @@ -exclude: - - main -delete_closed_pr: false \ No newline at end of file From 2ad8c71886540d36863d1229de1dcbf596cada3a Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Thu, 18 Jan 2024 15:32:18 +0100 Subject: [PATCH 134/134] Update unittest-flux.yml --- .github/workflows/unittest-flux.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/unittest-flux.yml b/.github/workflows/unittest-flux.yml index 14fca90c..ec298d9a 100644 --- a/.github/workflows/unittest-flux.yml +++ b/.github/workflows/unittest-flux.yml @@ -42,7 +42,7 @@ jobs: shell: bash -l {0} timeout-minutes: 5 run: | - mamba install -y flux-core coverage + mamba install -y flux-core=0.58.0 coverage pip install versioneer[toml]==0.29 pip install . --no-deps --no-build-isolation cd tests