From fc16e1f8b1568822db2ff321861be764b41d5cb3 Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Fri, 30 Aug 2024 17:12:33 +0200 Subject: [PATCH] Remove automatic recursive to_dict/from_dict again This turned out to have negative interactions when writing and reading objects from GenericJob which currently employs a mixture of to_hdf and newer to_dict interfaces. When reading the objects again the recursive strategy can get confused which interface to use. Since it is anyway mostly relevant for the DataContainer, I have extracted this functionality into stand- alone functions that operate on the obj_dicts. Classes that wish to use it and do not confuse the two interfaces can call these functions in their implementations of _to_dict and _from_dict as done by DataContainer now. --- pyiron_base/interfaces/has_dict.py | 129 +++++++++++++++------------ pyiron_base/jobs/job/generic.py | 16 +--- pyiron_base/storage/datacontainer.py | 5 +- 3 files changed, 76 insertions(+), 74 deletions(-) diff --git a/pyiron_base/interfaces/has_dict.py b/pyiron_base/interfaces/has_dict.py index 5e619df9b..84eb6d113 100644 --- a/pyiron_base/interfaces/has_dict.py +++ b/pyiron_base/interfaces/has_dict.py @@ -61,6 +61,73 @@ def create_from_dict(obj_dict): return obj +def _split_children_dict(obj_dict: dict[str, Any]) -> dict[str, Any | dict[str, Any]]: + """ + Undoes _join_children_dict. + """ + subs = defaultdict(dict) + plain = {} + for k, v in obj_dict.items(): + if "/" not in k: + plain[k] = v + continue + root, k = k.split("/", maxsplit=1) + subs[root][k] = v + # using update keeps type stability, i.e. we always return a plain dict + plain.update(subs) + return plain + +def _from_dict_children(obj_dict: dict) -> dict: + def load(inner_dict): + # object is a not a dict, so nothing to do + if not isinstance(inner_dict, dict): + return inner_dict + # if object is a dict but doesn't have type information, recurse through it to load any sub dicts that might + if not all( + k in inner_dict for k in ("NAME", "TYPE", "OBJECT", "DICT_VERSION") + ): + return {k: load(v) for k, v in inner_dict.items()} + # object has type info, so just load it + return create_from_dict(inner_dict) + return {k: load(v) for k, v in obj_dict.items()} + +def _join_children_dict(children: dict[str, dict[str, Any]]) -> dict[str, Any]: + """ + Given a nested dictionary, flatten the first level. + + >>> d = {'a': {'a1': 3}, 'b': {'b1': 4, 'b2': {'c': 42}}} + >>> _join_children_dict(d) + {'a/a1': 3, 'b/b1': 4, 'b/b2': {'c': 42}} + + This is intended as a utility function for nested HasDict objects, that + to_dict their children and then want to give a flattened dict for + writing to ProjectHDFio.write_dict_to_hdf + """ + return { + "/".join((k1, k2)): v2 + for k1, v1 in children.items() + for k2, v2 in v1.items() + } + +def _to_dict_children(obj_dict: dict) -> dict: + """ + Call to_dict on any objects that support it. + + Intended as a helper method for recursive object that want to to_dict + their nested objects automatically. + """ + data_dict = {} + child_dict = {} + for k, v in obj_dict.items(): + if isinstance(v, HasDict): + child_dict[k] = v.to_dict() + elif isinstance(v, HasHDF): + child_dict[k] = HasDictfromHDF.to_dict(v) + else: + data_dict[k] = v + return data_dict | _join_children_dict(child_dict) + + class HasDict(ABC): """ Abstract interface to convert objects to dictionaries for storage. @@ -114,19 +181,10 @@ def from_dict(self, obj_dict: dict, version: str = None): version (str): version tag written together with the data """ - def load(inner_dict): - if not isinstance(inner_dict, dict): - return inner_dict - if not all( - k in inner_dict for k in ("NAME", "TYPE", "OBJECT", "DICT_VERSION") - ): - return {k: load(v) for k, v in inner_dict.items()} - return create_from_dict(inner_dict) - - obj_dict = self._split_children_dict(obj_dict) + obj_dict = _split_children_dict(obj_dict) if version is None: version = obj_dict.get("DICT_VERSION", None) - self._from_dict({k: load(v) for k, v in obj_dict.items()}, version) + self._from_dict(obj_dict, version) @abstractmethod def _from_dict(self, obj_dict: dict, version: str = None): @@ -150,16 +208,7 @@ def to_dict(self): dict: serialized state of this object """ type_dict = self._type_to_dict() - data_dict = {} - child_dict = {} - for k, v in self._to_dict().items(): - if isinstance(v, HasDict): - child_dict[k] = v.to_dict() - elif isinstance(v, HasHDF): - child_dict[k] = HasDictfromHDF.to_dict(v) - else: - data_dict[k] = v - return data_dict | self._join_children_dict(child_dict) | type_dict + return self._to_dict() | type_dict @abstractmethod def _to_dict(self): @@ -196,44 +245,6 @@ def _type_to_dict(self): type_dict["VERSION"] = self.__version__ return type_dict - @staticmethod - def _join_children_dict(children: dict[str, dict[str, Any]]) -> dict[str, Any]: - """ - Given a nested dictionary, flatten the first level. - - >>> d = {'a': {'a1': 3}, 'b': {'b1': 4, 'b2': {'c': 42}}} - >>> _join_children_dict(d) - {'a/a1': 3, 'b/b1': 4, 'b/b2': {'c': 42}} - - This is intended as a utility function for nested HasDict objects, that - to_dict their children and then want to give a flattened dict for - writing to ProjectHDFio.write_dict_to_hdf - """ - return { - "/".join((k1, k2)): v2 - for k1, v1 in children.items() - for k2, v2 in v1.items() - } - - @staticmethod - def _split_children_dict( - obj_dict: dict[str, Any], - ) -> dict[str, Any | dict[str, Any]]: - """ - Undoes _join_children_dict. - """ - subs = defaultdict(dict) - plain = {} - for k, v in obj_dict.items(): - if "/" not in k: - plain[k] = v - continue - root, k = k.split("/", maxsplit=1) - subs[root][k] = v - # using update keeps type stability, i.e. we always return a plain dict - plain.update(subs) - return plain - class HasHDFfromDict(HasHDF, HasDict): """ diff --git a/pyiron_base/jobs/job/generic.py b/pyiron_base/jobs/job/generic.py index a9107f624..1e197fda7 100644 --- a/pyiron_base/jobs/job/generic.py +++ b/pyiron_base/jobs/job/generic.py @@ -1172,26 +1172,16 @@ def _to_dict(self): data_dict["files_to_compress"] = self._files_to_compress if len(self._files_to_remove) > 0: data_dict["files_to_compress"] = self._files_to_remove + data_dict["HDF_VERSION"] = self.__version__ return data_dict def _from_dict(self, obj_dict, version=None): self._type_from_dict(type_dict=obj_dict) if "import_directory" in obj_dict.keys(): self._import_directory = obj_dict["import_directory"] - # Backwards compatibility: Previously server and executable were stored - # as plain dicts, but now they are dicts with additional info so that - # HasDict can load them automatically. - # We need to check whether that was possible with the instance check - # below and if not, call from_dict ourselves. - if isinstance(server := obj_dict["server"], Server): - self._server = server - else: - self._server.from_dict(server) + self._server.from_dict(obj_dict["server"]) if "executable" in obj_dict.keys() and obj_dict["executable"] is not None: - if isinstance(executable := obj_dict["executable"], Executable): - self._executable = executable - else: - self.executable.from_dict(executable) + self.executable.from_dict(obj_dict["executable"]) input_dict = obj_dict["input"] if "generic_dict" in input_dict.keys(): generic_dict = input_dict["generic_dict"] diff --git a/pyiron_base/storage/datacontainer.py b/pyiron_base/storage/datacontainer.py index bba832713..1a5ccd64b 100644 --- a/pyiron_base/storage/datacontainer.py +++ b/pyiron_base/storage/datacontainer.py @@ -13,7 +13,7 @@ import numpy as np import pandas -from pyiron_base.interfaces.has_dict import HasDict, HasDictfromHDF +from pyiron_base.interfaces.has_dict import HasDict, HasDictfromHDF, _to_dict_children, _from_dict_children from pyiron_base.interfaces.has_groups import HasGroups from pyiron_base.interfaces.has_hdf import HasHDF from pyiron_base.interfaces.lockable import Lockable, sentinel @@ -1034,9 +1034,10 @@ def _to_dict(self): order = list(data) data["READ_ONLY"] = self.read_only data["KEY_ORDER"] = order - return data + return _to_dict_children(data) def _from_dict(self, obj_dict, version=None): + obj_dict = _from_dict_children(obj_dict) if version == "0.2.0": order = obj_dict.pop("KEY_ORDER") else: