diff --git a/.ci_support/environment-docs.yml b/.ci_support/environment-docs.yml index 0b6f4e3cc..4063f5d2a 100644 --- a/.ci_support/environment-docs.yml +++ b/.ci_support/environment-docs.yml @@ -19,7 +19,7 @@ dependencies: - psutil =6.0.0 - pyfileindex =0.0.27 - pyiron_snippets =0.1.4 -- executorlib =0.0.1 +- executorlib =0.0.2 - pysqa =0.1.21 - pytables =3.10.1 - sqlalchemy =2.0.32 diff --git a/.ci_support/environment-mini.yml b/.ci_support/environment-mini.yml index aafc597ba..65e3cd8f0 100644 --- a/.ci_support/environment-mini.yml +++ b/.ci_support/environment-mini.yml @@ -10,7 +10,7 @@ dependencies: - psutil =6.0.0 - pyfileindex =0.0.27 - pyiron_snippets =0.1.3 -- executorlib =0.0.1 +- executorlib =0.0.2 - pysqa =0.1.21 - pytables =3.10.1 - sqlalchemy =2.0.32 diff --git a/.ci_support/environment.yml b/.ci_support/environment.yml index de400c55a..d58fc18d9 100644 --- a/.ci_support/environment.yml +++ b/.ci_support/environment.yml @@ -17,7 +17,7 @@ dependencies: - psutil =6.0.0 - pyfileindex =0.0.27 - pyiron_snippets =0.1.4 -- executorlib =0.0.1 +- executorlib =0.0.2 - pysqa =0.1.21 - pytables =3.10.1 - sqlalchemy =2.0.32 diff --git a/.github/workflows/unittests_old.yml b/.github/workflows/unittests_old.yml index 530e10dd4..c8cb5716c 100644 --- a/.github/workflows/unittests_old.yml +++ b/.github/workflows/unittests_old.yml @@ -13,7 +13,7 @@ jobs: - name: Setup Mambaforge uses: conda-incubator/setup-miniconda@v3 with: - python-version: '3.9' + python-version: '3.10' miniforge-variant: Mambaforge channels: conda-forge channel-priority: strict diff --git a/binder/environment.yml b/binder/environment.yml index aae94e696..afe8e4750 100644 --- a/binder/environment.yml +++ b/binder/environment.yml @@ -16,7 +16,7 @@ dependencies: - psutil =6.0.0 - pyfileindex =0.0.27 - pyiron_snippets =0.1.4 -- executorlib =0.0.1 +- executorlib =0.0.2 - pysqa =0.1.21 - pytables =3.10.1 - sqlalchemy =2.0.32 diff --git a/pyiron_base/interfaces/has_dict.py b/pyiron_base/interfaces/has_dict.py index e3eb01634..5e619df9b 100644 --- a/pyiron_base/interfaces/has_dict.py +++ b/pyiron_base/interfaces/has_dict.py @@ -16,6 +16,7 @@ """ from abc import ABC, abstractmethod +from collections import defaultdict from typing import Any from pyiron_base.interfaces.has_hdf import HasHDF @@ -54,7 +55,7 @@ def create_from_dict(obj_dict): type_field = obj_dict["TYPE"] module_path, class_name = _extract_module_class_name(type_field) class_object = _import_class(module_path, class_name) - version = obj_dict.get("VERSION", None) + version = obj_dict.get("DICT_VERSION", None) obj = class_object.instantiate(obj_dict, version) obj.from_dict(obj_dict, version) return obj @@ -122,6 +123,9 @@ def load(inner_dict): return {k: load(v) for k, v in inner_dict.items()} return create_from_dict(inner_dict) + obj_dict = self._split_children_dict(obj_dict) + if version is None: + version = obj_dict.get("DICT_VERSION", None) self._from_dict({k: load(v) for k, v in obj_dict.items()}, version) @abstractmethod @@ -208,9 +212,28 @@ def _join_children_dict(children: dict[str, dict[str, Any]]) -> dict[str, Any]: return { "/".join((k1, k2)): v2 for k1, v1 in children.items() - for k2, v2 in v2.items() + for k2, v2 in v1.items() } + @staticmethod + def _split_children_dict( + obj_dict: dict[str, Any], + ) -> dict[str, Any | dict[str, Any]]: + """ + Undoes _join_children_dict. + """ + subs = defaultdict(dict) + plain = {} + for k, v in obj_dict.items(): + if "/" not in k: + plain[k] = v + continue + root, k = k.split("/", maxsplit=1) + subs[root][k] = v + # using update keeps type stability, i.e. we always return a plain dict + plain.update(subs) + return plain + class HasHDFfromDict(HasHDF, HasDict): """ diff --git a/pyiron_base/jobs/job/extension/executable.py b/pyiron_base/jobs/job/extension/executable.py index 034d40fa4..8df0492b6 100644 --- a/pyiron_base/jobs/job/extension/executable.py +++ b/pyiron_base/jobs/job/extension/executable.py @@ -3,7 +3,7 @@ # Distributed under the terms of "New BSD License", see the LICENSE file. import os -from dataclasses import asdict +from dataclasses import asdict, fields from pyiron_snippets.resources import ExecutableResolver @@ -208,20 +208,17 @@ def executable_path(self, new_path): @classmethod def instantiate(cls, obj_dict: dict, version: str = None) -> "Self": - return cls(codename=obj_dict["name"]) + try: + codename = obj_dict["name"] + except KeyError: + codename = obj_dict["executable"]["name"] + return cls(codename=codename) def _to_dict(self): return asdict(self.storage) def _from_dict(self, obj_dict, version=None): - data_container_keys = [ - "version", - "name", - "operation_system_nt", - "executable", - "mpi", - "accepted_return_codes", - ] + data_container_keys = tuple(f.name for f in fields(ExecutableDataClass)) executable_class_dict = {} # Backwards compatibility; dict state used to be nested one level deeper if "executable" in obj_dict.keys() and isinstance(obj_dict["executable"], dict): diff --git a/pyiron_base/jobs/job/extension/server/generic.py b/pyiron_base/jobs/job/extension/server/generic.py index a08fc7cc8..df4b4f44d 100644 --- a/pyiron_base/jobs/job/extension/server/generic.py +++ b/pyiron_base/jobs/job/extension/server/generic.py @@ -8,7 +8,7 @@ import numbers import socket from concurrent.futures import Executor, Future -from dataclasses import asdict +from dataclasses import asdict, fields from typing import Union from pyiron_snippets.deprecate import deprecate @@ -564,7 +564,6 @@ def view_queues(): def _to_dict(self): self._data.run_mode = self._run_mode.mode return asdict(self._data) - return server_dict def _from_dict(self, obj_dict, version=None): # backwards compatibility @@ -578,9 +577,11 @@ def _from_dict(self, obj_dict, version=None): if "additional_arguments" not in obj_dict.keys(): obj_dict["additional_arguments"] = {} - # Reload dataclass - for key in ["NAME", "TYPE", "OBJECT", "VERSION", "DICT_VERSION"]: - if key in obj_dict.keys(): + # Reload dataclass and discard unknown keys + server_fields = tuple(f.name for f in fields(ServerDataClass)) + # force tuple otherwise dict complains about changing size + for key in tuple(obj_dict): + if key not in server_fields: del obj_dict[key] self._data = ServerDataClass(**obj_dict) self._run_mode = Runmode(mode=self._data.run_mode) diff --git a/pyiron_base/jobs/job/generic.py b/pyiron_base/jobs/job/generic.py index aa2a85426..a9107f624 100644 --- a/pyiron_base/jobs/job/generic.py +++ b/pyiron_base/jobs/job/generic.py @@ -1178,9 +1178,20 @@ def _from_dict(self, obj_dict, version=None): self._type_from_dict(type_dict=obj_dict) if "import_directory" in obj_dict.keys(): self._import_directory = obj_dict["import_directory"] - self._server = obj_dict["server"] + # Backwards compatibility: Previously server and executable were stored + # as plain dicts, but now they are dicts with additional info so that + # HasDict can load them automatically. + # We need to check whether that was possible with the instance check + # below and if not, call from_dict ourselves. + if isinstance(server := obj_dict["server"], Server): + self._server = server + else: + self._server.from_dict(server) if "executable" in obj_dict.keys() and obj_dict["executable"] is not None: - self._executable = obj_dict["executable"] + if isinstance(executable := obj_dict["executable"], Executable): + self._executable = executable + else: + self.executable.from_dict(executable) input_dict = obj_dict["input"] if "generic_dict" in input_dict.keys(): generic_dict = input_dict["generic_dict"] diff --git a/pyiron_base/jobs/job/util.py b/pyiron_base/jobs/job/util.py index bbb66b377..1bc03a995 100644 --- a/pyiron_base/jobs/job/util.py +++ b/pyiron_base/jobs/job/util.py @@ -95,12 +95,12 @@ def _get_project_for_copy(job, project, new_job_name): ): file_project = project.project hdf5_project = project.project_hdf5.open(new_job_name) - elif isinstance(project, job.project.__class__): + elif isinstance(job.project, project.__class__): file_project = project hdf5_project = job.project_hdf5.__class__( project=project, file_name=new_job_name, h5_path="/" + new_job_name ) - elif isinstance(project, job.project_hdf5.__class__): + elif isinstance(job.project_hdf5, project.__class__): file_project = project.project hdf5_project = project.open(new_job_name) elif project is None: diff --git a/pyiron_base/project/archiving/import_archive.py b/pyiron_base/project/archiving/import_archive.py index 07d6c4255..775cc5dd3 100644 --- a/pyiron_base/project/archiving/import_archive.py +++ b/pyiron_base/project/archiving/import_archive.py @@ -1,4 +1,5 @@ import os +import posixpath import tarfile import tempfile from shutil import copytree @@ -57,8 +58,8 @@ def import_jobs(project_instance, archive_directory): pr_import = project_instance.open(os.curdir) df["project"] = [ - os.path.normpath( - os.path.join(pr_import.project_path, os.path.relpath(p, common_path)) + posixpath.normpath( + posixpath.join(pr_import.project_path, posixpath.relpath(p, common_path)) ) + "/" for p in df["project"].values @@ -105,8 +106,8 @@ def transfer_files(origin_path: str, project_path: str): str: Common path. """ df = get_dataframe(origin_path=origin_path) - common_path = os.path.commonpath(list(df["project"])) - copytree(os.path.join(origin_path, common_path), project_path, dirs_exist_ok=True) + common_path = posixpath.commonpath(list(df["project"])) + copytree(posixpath.join(origin_path, common_path), project_path, dirs_exist_ok=True) return df, common_path diff --git a/pyiron_base/storage/datacontainer.py b/pyiron_base/storage/datacontainer.py index 46346c063..bba832713 100644 --- a/pyiron_base/storage/datacontainer.py +++ b/pyiron_base/storage/datacontainer.py @@ -40,6 +40,7 @@ "HDF_VERSION", "DICT_VERSION", "READ_ONLY", + "KEY_ORDER", ] @@ -827,6 +828,7 @@ def _on_unlock(self): class DataContainer(DataContainerBase, HasHDF, HasDict): + __dict_version__ = "0.2.0" __doc__ = f"""{DataContainerBase.__doc__} If instantiated with the argument `lazy=True`, data read from HDF5 later via :method:`.from_hdf` are not actually @@ -1027,13 +1029,28 @@ def to(v): return data def _to_dict(self): - return {"data": self.to_builtin(), "READ_ONLY": self.read_only} + # stringify keys in case we are acting like a list + data = {str(k): v for k, v in dict(self).items()} + order = list(data) + data["READ_ONLY"] = self.read_only + data["KEY_ORDER"] = order + return data def _from_dict(self, obj_dict, version=None): + if version == "0.2.0": + order = obj_dict.pop("KEY_ORDER") + else: + order = None + self.read_only = obj_dict.pop("READ_ONLY", False) + for key in _internal_hdf_nodes: + obj_dict.pop(key, None) with self.unlocked(): self.clear() - self.update(obj_dict["data"], wrap=True) - self.read_only = obj_dict.get("READ_ONLY", False) + if order is not None: + for key in order: + self[key] = obj_dict[key] + else: + self.update(obj_dict) HDFStub.register(DataContainer, lambda h, g: h[g].to_object(lazy=True)) diff --git a/pyproject.toml b/pyproject.toml index cd064fd2c..6118082bd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,14 +18,13 @@ classifiers = [ "License :: OSI Approved :: BSD License", "Intended Audience :: Science/Research", "Operating System :: OS Independent", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", ] dependencies = [ "cloudpickle==3.0.0", - "executorlib==0.0.1", + "executorlib==0.0.2", "h5io_browser==0.1.0", "h5py==3.11.0", "numpy==2.1.0", diff --git a/tests/static/pack/export.csv b/tests/static/pack/export.csv new file mode 100644 index 000000000..2a0997cb5 --- /dev/null +++ b/tests/static/pack/export.csv @@ -0,0 +1,2 @@ +,id,status,chemicalformula,job,subjob,project,timestart,timestop,totalcputime,computer,hamilton,hamversion,parentid,masterid +0,0,finished,,toy,/toy,test_pack/my_project,2024-08-22 16:10:26.556984,,,pyiron@7720454e9ac5#1,ToyJob,0.4,, diff --git a/tests/static/pack/test_pack.tar.gz b/tests/static/pack/test_pack.tar.gz new file mode 100644 index 000000000..5742bc3f6 Binary files /dev/null and b/tests/static/pack/test_pack.tar.gz differ diff --git a/tests/unit/archiving/test_import.py b/tests/unit/archiving/test_import.py index 15d92ae47..4ccc93d0e 100644 --- a/tests/unit/archiving/test_import.py +++ b/tests/unit/archiving/test_import.py @@ -3,7 +3,7 @@ from pyiron_base import Project from pandas._testing import assert_frame_equal from filecmp import dircmp -from shutil import rmtree +from shutil import rmtree, copytree import tarfile from pyiron_base._tests import PyironTestCase, ToyJob @@ -181,5 +181,26 @@ def test_backwards_compatibility(self): self.imp_pr.unpack(origin_path=self.arch_dir_comp, csv_file_name="ahoy.csv") +class TestUnpackingBackwardsCompatibility(PyironTestCase): + def test_import_old_tar(self): + copytree( + os.path.join( + os.path.dirname(os.path.abspath(__file__)), + "../../static/pack", + ), + os.getcwd(), + dirs_exist_ok=True, + ) + pr = Project("old_tar") + pr.unpack(origin_path="test_pack.tar.gz") + job = pr.load("toy") + self.assertEqual(job.job_name, "toy") + self.assertEqual(job.input.data_in, 100) + self.assertEqual(job.output.data_out, 101) + pr.remove(enable=True, enforce=True) + os.remove("test_pack.tar.gz") + os.remove("export.csv") + + if __name__ == "__main__": unittest.main()