Skip to content

Commit

Permalink
Remove automatic recursive to_dict/from_dict again
Browse files Browse the repository at this point in the history
This turned out to have negative interactions when writing and reading objects from GenericJob
which currently employs a mixture of to_hdf and newer to_dict interfaces.  When reading the
objects again the recursive strategy can get confused which interface to use.  Since it is
anyway mostly relevant for the DataContainer, I have extracted this functionality into stand-
alone functions that operate on the obj_dicts.  Classes that wish to use it and do not confuse
the two interfaces can call these functions in their implementations of _to_dict and _from_dict
as done by DataContainer now.
  • Loading branch information
pmrv committed Aug 30, 2024
1 parent 77d2bdc commit fc16e1f
Show file tree
Hide file tree
Showing 3 changed files with 76 additions and 74 deletions.
129 changes: 70 additions & 59 deletions pyiron_base/interfaces/has_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,73 @@ def create_from_dict(obj_dict):
return obj


def _split_children_dict(obj_dict: dict[str, Any]) -> dict[str, Any | dict[str, Any]]:
"""
Undoes _join_children_dict.
"""
subs = defaultdict(dict)
plain = {}
for k, v in obj_dict.items():
if "/" not in k:
plain[k] = v
continue
root, k = k.split("/", maxsplit=1)
subs[root][k] = v
# using update keeps type stability, i.e. we always return a plain dict
plain.update(subs)
return plain

def _from_dict_children(obj_dict: dict) -> dict:
def load(inner_dict):
# object is a not a dict, so nothing to do
if not isinstance(inner_dict, dict):
return inner_dict
# if object is a dict but doesn't have type information, recurse through it to load any sub dicts that might
if not all(
k in inner_dict for k in ("NAME", "TYPE", "OBJECT", "DICT_VERSION")
):
return {k: load(v) for k, v in inner_dict.items()}
# object has type info, so just load it
return create_from_dict(inner_dict)
return {k: load(v) for k, v in obj_dict.items()}

def _join_children_dict(children: dict[str, dict[str, Any]]) -> dict[str, Any]:
"""
Given a nested dictionary, flatten the first level.
>>> d = {'a': {'a1': 3}, 'b': {'b1': 4, 'b2': {'c': 42}}}
>>> _join_children_dict(d)
{'a/a1': 3, 'b/b1': 4, 'b/b2': {'c': 42}}
This is intended as a utility function for nested HasDict objects, that
to_dict their children and then want to give a flattened dict for
writing to ProjectHDFio.write_dict_to_hdf
"""
return {
"/".join((k1, k2)): v2
for k1, v1 in children.items()
for k2, v2 in v1.items()
}

def _to_dict_children(obj_dict: dict) -> dict:
"""
Call to_dict on any objects that support it.
Intended as a helper method for recursive object that want to to_dict
their nested objects automatically.
"""
data_dict = {}
child_dict = {}
for k, v in obj_dict.items():
if isinstance(v, HasDict):
child_dict[k] = v.to_dict()
elif isinstance(v, HasHDF):
child_dict[k] = HasDictfromHDF.to_dict(v)
else:
data_dict[k] = v
return data_dict | _join_children_dict(child_dict)


class HasDict(ABC):
"""
Abstract interface to convert objects to dictionaries for storage.
Expand Down Expand Up @@ -114,19 +181,10 @@ def from_dict(self, obj_dict: dict, version: str = None):
version (str): version tag written together with the data
"""

def load(inner_dict):
if not isinstance(inner_dict, dict):
return inner_dict
if not all(
k in inner_dict for k in ("NAME", "TYPE", "OBJECT", "DICT_VERSION")
):
return {k: load(v) for k, v in inner_dict.items()}
return create_from_dict(inner_dict)

obj_dict = self._split_children_dict(obj_dict)
obj_dict = _split_children_dict(obj_dict)
if version is None:
version = obj_dict.get("DICT_VERSION", None)
self._from_dict({k: load(v) for k, v in obj_dict.items()}, version)
self._from_dict(obj_dict, version)

@abstractmethod
def _from_dict(self, obj_dict: dict, version: str = None):
Expand All @@ -150,16 +208,7 @@ def to_dict(self):
dict: serialized state of this object
"""
type_dict = self._type_to_dict()
data_dict = {}
child_dict = {}
for k, v in self._to_dict().items():
if isinstance(v, HasDict):
child_dict[k] = v.to_dict()
elif isinstance(v, HasHDF):
child_dict[k] = HasDictfromHDF.to_dict(v)
else:
data_dict[k] = v
return data_dict | self._join_children_dict(child_dict) | type_dict
return self._to_dict() | type_dict

@abstractmethod
def _to_dict(self):
Expand Down Expand Up @@ -196,44 +245,6 @@ def _type_to_dict(self):
type_dict["VERSION"] = self.__version__
return type_dict

@staticmethod
def _join_children_dict(children: dict[str, dict[str, Any]]) -> dict[str, Any]:
"""
Given a nested dictionary, flatten the first level.
>>> d = {'a': {'a1': 3}, 'b': {'b1': 4, 'b2': {'c': 42}}}
>>> _join_children_dict(d)
{'a/a1': 3, 'b/b1': 4, 'b/b2': {'c': 42}}
This is intended as a utility function for nested HasDict objects, that
to_dict their children and then want to give a flattened dict for
writing to ProjectHDFio.write_dict_to_hdf
"""
return {
"/".join((k1, k2)): v2
for k1, v1 in children.items()
for k2, v2 in v1.items()
}

@staticmethod
def _split_children_dict(
obj_dict: dict[str, Any],
) -> dict[str, Any | dict[str, Any]]:
"""
Undoes _join_children_dict.
"""
subs = defaultdict(dict)
plain = {}
for k, v in obj_dict.items():
if "/" not in k:
plain[k] = v
continue
root, k = k.split("/", maxsplit=1)
subs[root][k] = v
# using update keeps type stability, i.e. we always return a plain dict
plain.update(subs)
return plain


class HasHDFfromDict(HasHDF, HasDict):
"""
Expand Down
16 changes: 3 additions & 13 deletions pyiron_base/jobs/job/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1172,26 +1172,16 @@ def _to_dict(self):
data_dict["files_to_compress"] = self._files_to_compress
if len(self._files_to_remove) > 0:
data_dict["files_to_compress"] = self._files_to_remove
data_dict["HDF_VERSION"] = self.__version__
return data_dict

def _from_dict(self, obj_dict, version=None):
self._type_from_dict(type_dict=obj_dict)
if "import_directory" in obj_dict.keys():
self._import_directory = obj_dict["import_directory"]
# Backwards compatibility: Previously server and executable were stored
# as plain dicts, but now they are dicts with additional info so that
# HasDict can load them automatically.
# We need to check whether that was possible with the instance check
# below and if not, call from_dict ourselves.
if isinstance(server := obj_dict["server"], Server):
self._server = server
else:
self._server.from_dict(server)
self._server.from_dict(obj_dict["server"])
if "executable" in obj_dict.keys() and obj_dict["executable"] is not None:
if isinstance(executable := obj_dict["executable"], Executable):
self._executable = executable
else:
self.executable.from_dict(executable)
self.executable.from_dict(obj_dict["executable"])
input_dict = obj_dict["input"]
if "generic_dict" in input_dict.keys():
generic_dict = input_dict["generic_dict"]
Expand Down
5 changes: 3 additions & 2 deletions pyiron_base/storage/datacontainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import numpy as np
import pandas

from pyiron_base.interfaces.has_dict import HasDict, HasDictfromHDF
from pyiron_base.interfaces.has_dict import HasDict, HasDictfromHDF, _to_dict_children, _from_dict_children
from pyiron_base.interfaces.has_groups import HasGroups
from pyiron_base.interfaces.has_hdf import HasHDF
from pyiron_base.interfaces.lockable import Lockable, sentinel
Expand Down Expand Up @@ -1034,9 +1034,10 @@ def _to_dict(self):
order = list(data)
data["READ_ONLY"] = self.read_only
data["KEY_ORDER"] = order
return data
return _to_dict_children(data)

def _from_dict(self, obj_dict, version=None):
obj_dict = _from_dict_children(obj_dict)
if version == "0.2.0":
order = obj_dict.pop("KEY_ORDER")
else:
Expand Down

0 comments on commit fc16e1f

Please sign in to comment.