Remove automatic recursive to_dict/from_dict again

This turned out to have negative interactions when writing and reading objects from GenericJob which currently employs a mixture of to_hdf and newer to_dict interfaces. When reading the objects again the recursive strategy can get confused which interface to use. Since it is anyway mostly relevant for the DataContainer, I have extracted this functionality into stand- alone functions that operate on the obj_dicts. Classes that wish to use it and do not confuse the two interfaces can call these functions in their implementations of _to_dict and _from_dict as done by DataContainer now.
pyiron · Aug 30, 2024 · fc16e1f · fc16e1f
1 parent 77d2bdc
commit fc16e1f
Show file tree

Hide file tree

Showing 3 changed files with 76 additions and 74 deletions.
diff --git a/pyiron_base/interfaces/has_dict.py b/pyiron_base/interfaces/has_dict.py
@@ -61,6 +61,73 @@ def create_from_dict(obj_dict):
     return obj
 
 
+def _split_children_dict(obj_dict: dict[str, Any]) -> dict[str, Any | dict[str, Any]]:
+    """
+    Undoes _join_children_dict.
+    """
+    subs = defaultdict(dict)
+    plain = {}
+    for k, v in obj_dict.items():
+        if "/" not in k:
+            plain[k] = v
+            continue
+        root, k = k.split("/", maxsplit=1)
+        subs[root][k] = v
+    # using update keeps type stability, i.e. we always return a plain dict
+    plain.update(subs)
+    return plain
+
+def _from_dict_children(obj_dict: dict) -> dict:
+    def load(inner_dict):
+        # object is a not a dict, so nothing to do
+        if not isinstance(inner_dict, dict):
+            return inner_dict
+        # if object is a dict but doesn't have type information, recurse through it to load any sub dicts that might
+        if not all(
+            k in inner_dict for k in ("NAME", "TYPE", "OBJECT", "DICT_VERSION")
+        ):
+            return {k: load(v) for k, v in inner_dict.items()}
+        # object has type info, so just load it
+        return create_from_dict(inner_dict)
+    return {k: load(v) for k, v in obj_dict.items()}
+
+def _join_children_dict(children: dict[str, dict[str, Any]]) -> dict[str, Any]:
+    """
+    Given a nested dictionary, flatten the first level.
+
+    >>> d = {'a': {'a1': 3}, 'b': {'b1': 4, 'b2': {'c': 42}}}
+    >>> _join_children_dict(d)
+    {'a/a1': 3, 'b/b1': 4, 'b/b2': {'c': 42}}
+
+    This is intended as a utility function for nested HasDict objects, that
+    to_dict their children and then want to give a flattened dict for
+    writing to ProjectHDFio.write_dict_to_hdf
+    """
+    return {
+        "/".join((k1, k2)): v2
+        for k1, v1 in children.items()
+        for k2, v2 in v1.items()
+    }
+
+def _to_dict_children(obj_dict: dict) -> dict:
+    """
+    Call to_dict on any objects that support it.
+
+    Intended as a helper method for recursive object that want to to_dict
+    their nested objects automatically.
+    """
+    data_dict = {}
+    child_dict = {}
+    for k, v in obj_dict.items():
+        if isinstance(v, HasDict):
+            child_dict[k] = v.to_dict()
+        elif isinstance(v, HasHDF):
+            child_dict[k] = HasDictfromHDF.to_dict(v)
+        else:
+            data_dict[k] = v
+    return data_dict | _join_children_dict(child_dict)
+
+
 class HasDict(ABC):
     """
     Abstract interface to convert objects to dictionaries for storage.
@@ -114,19 +181,10 @@ def from_dict(self, obj_dict: dict, version: str = None):
             version (str): version tag written together with the data
         """
 
-        def load(inner_dict):
-            if not isinstance(inner_dict, dict):
-                return inner_dict
-            if not all(
-                k in inner_dict for k in ("NAME", "TYPE", "OBJECT", "DICT_VERSION")
-            ):
-                return {k: load(v) for k, v in inner_dict.items()}
-            return create_from_dict(inner_dict)
-
-        obj_dict = self._split_children_dict(obj_dict)
+        obj_dict = _split_children_dict(obj_dict)
         if version is None:
             version = obj_dict.get("DICT_VERSION", None)
-        self._from_dict({k: load(v) for k, v in obj_dict.items()}, version)
+        self._from_dict(obj_dict, version)
 
     @abstractmethod
     def _from_dict(self, obj_dict: dict, version: str = None):
@@ -150,16 +208,7 @@ def to_dict(self):
             dict: serialized state of this object
         """
         type_dict = self._type_to_dict()
-        data_dict = {}
-        child_dict = {}
-        for k, v in self._to_dict().items():
-            if isinstance(v, HasDict):
-                child_dict[k] = v.to_dict()
-            elif isinstance(v, HasHDF):
-                child_dict[k] = HasDictfromHDF.to_dict(v)
-            else:
-                data_dict[k] = v
-        return data_dict | self._join_children_dict(child_dict) | type_dict
+        return self._to_dict() | type_dict
 
     @abstractmethod
     def _to_dict(self):
@@ -196,44 +245,6 @@ def _type_to_dict(self):
             type_dict["VERSION"] = self.__version__
         return type_dict
 
-    @staticmethod
-    def _join_children_dict(children: dict[str, dict[str, Any]]) -> dict[str, Any]:
-        """
-        Given a nested dictionary, flatten the first level.
-
-        >>> d = {'a': {'a1': 3}, 'b': {'b1': 4, 'b2': {'c': 42}}}
-        >>> _join_children_dict(d)
-        {'a/a1': 3, 'b/b1': 4, 'b/b2': {'c': 42}}
-
-        This is intended as a utility function for nested HasDict objects, that
-        to_dict their children and then want to give a flattened dict for
-        writing to ProjectHDFio.write_dict_to_hdf
-        """
-        return {
-            "/".join((k1, k2)): v2
-            for k1, v1 in children.items()
-            for k2, v2 in v1.items()
-        }
-
-    @staticmethod
-    def _split_children_dict(
-        obj_dict: dict[str, Any],
-    ) -> dict[str, Any | dict[str, Any]]:
-        """
-        Undoes _join_children_dict.
-        """
-        subs = defaultdict(dict)
-        plain = {}
-        for k, v in obj_dict.items():
-            if "/" not in k:
-                plain[k] = v
-                continue
-            root, k = k.split("/", maxsplit=1)
-            subs[root][k] = v
-        # using update keeps type stability, i.e. we always return a plain dict
-        plain.update(subs)
-        return plain
-
 
 class HasHDFfromDict(HasHDF, HasDict):
     """

diff --git a/pyiron_base/jobs/job/generic.py b/pyiron_base/jobs/job/generic.py
@@ -1172,26 +1172,16 @@ def _to_dict(self):
             data_dict["files_to_compress"] = self._files_to_compress
         if len(self._files_to_remove) > 0:
             data_dict["files_to_compress"] = self._files_to_remove
+        data_dict["HDF_VERSION"] = self.__version__
         return data_dict
 
     def _from_dict(self, obj_dict, version=None):
         self._type_from_dict(type_dict=obj_dict)
         if "import_directory" in obj_dict.keys():
             self._import_directory = obj_dict["import_directory"]
-        # Backwards compatibility: Previously server and executable were stored
-        # as plain dicts, but now they are dicts with additional info so that
-        # HasDict can load them automatically.
-        # We need to check whether that was possible with the instance check
-        # below and if not, call from_dict ourselves.
-        if isinstance(server := obj_dict["server"], Server):
-            self._server = server
-        else:
-            self._server.from_dict(server)
+        self._server.from_dict(obj_dict["server"])
         if "executable" in obj_dict.keys() and obj_dict["executable"] is not None:
-            if isinstance(executable := obj_dict["executable"], Executable):
-                self._executable = executable
-            else:
-                self.executable.from_dict(executable)
+            self.executable.from_dict(obj_dict["executable"])
         input_dict = obj_dict["input"]
         if "generic_dict" in input_dict.keys():
             generic_dict = input_dict["generic_dict"]

diff --git a/pyiron_base/storage/datacontainer.py b/pyiron_base/storage/datacontainer.py
@@ -13,7 +13,7 @@
 import numpy as np
 import pandas
 
-from pyiron_base.interfaces.has_dict import HasDict, HasDictfromHDF
+from pyiron_base.interfaces.has_dict import HasDict, HasDictfromHDF, _to_dict_children, _from_dict_children
 from pyiron_base.interfaces.has_groups import HasGroups
 from pyiron_base.interfaces.has_hdf import HasHDF
 from pyiron_base.interfaces.lockable import Lockable, sentinel
@@ -1034,9 +1034,10 @@ def _to_dict(self):
         order = list(data)
         data["READ_ONLY"] = self.read_only
         data["KEY_ORDER"] = order
-        return data
+        return _to_dict_children(data)
 
     def _from_dict(self, obj_dict, version=None):
+        obj_dict = _from_dict_children(obj_dict)
         if version == "0.2.0":
             order = obj_dict.pop("KEY_ORDER")
         else: