Skip to content

Commit

Permalink
Merge pull request #481 from pyiron/flatt_options
Browse files Browse the repository at this point in the history
Add methods to get padded and ragged arrays to FlattenedStorage
  • Loading branch information
pmrv authored Oct 26, 2021
2 parents a71f560 + 89dcb78 commit 28c60c8
Show file tree
Hide file tree
Showing 2 changed files with 107 additions and 0 deletions.
61 changes: 61 additions & 0 deletions pyiron_base/generic/flattenedstorage.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,16 @@ def add_array(self, name, shape=(), dtype=np.float64, fill=None, per="element"):
store[name] = np.empty(shape=shape, dtype=dtype)
else:
store[name] = np.full(shape=shape, fill_value=fill, dtype=dtype)

_default_fill_values = {
np.dtype("int32"): -1,
np.dtype("int64"): -1,
np.dtype("float32"): np.nan,
np.dtype("float64"): np.nan,
}
if fill is None and store[name].dtype in _default_fill_values:
fill = _default_fill_values[store[name].dtype]
if fill is not None:
self._fill_values[name] = fill

def get_array(self, name, frame=None):
Expand Down Expand Up @@ -339,6 +349,57 @@ def get_array(self, name, frame=None):
else:
raise KeyError(f"no array named {name}")

def get_array_ragged(self, name: str) -> np.ndarray:
"""
Return elements of array `name` in all chunks. Values are returned in a ragged array of dtype=object.
If `name` specifies a per chunk array, there's nothing to pad and this method is equivalent to
:method:`.get_array`.
Args:
name (str): name of array to fetch
Returns:
numpy.ndarray, dtype=object: ragged arrray of all elements in all chunks
"""
if name in self._per_chunk_arrays:
return self.get_array(name)
return np.array([self.get_array(name, i) for i in range(len(self))],
dtype=object)

def get_array_filled(self, name: str) -> np.ndarray:
"""
Return elements of array `name` in all chunks. Arrays are padded to be all of the same length.
The padding value depends on the datatpye of the array or can be configured via the `fill` parameter of
:method:`.add_array`.
If `name` specifies a per chunk array, there's nothing to pad and this method is equivalent to
:method:`.get_array`.
Args:
name (str): name of array to fetch
Returns:
numpy.ndarray: padded arrray of all elements in all chunks
"""
if name in self._per_chunk_arrays:
return self.get_array(name)
values = self.get_array_ragged(name)
max_len = self._per_chunk_arrays["length"].max()
def resize_and_pad(v):
l = len(v)
per_shape = self._per_element_arrays[name].shape[1:]
v = np.resize(v, max_len * np.prod(per_shape, dtype=int))
v = v.reshape((max_len,) + per_shape)
if name in self._fill_values:
fill = self._fill_values[name]
else:
fill = np.zeros(1, dtype=self._per_element_arrays[name].dtype)[0]
v[l:] = fill
return v
return np.array([ resize_and_pad(v) for v in values ])

def set_array(self, name, frame, value):
"""
Add array for given structure.
Expand Down
46 changes: 46 additions & 0 deletions tests/generic/test_flattenedstorage.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,52 @@ def test_get_array_full(self):
self.assertTrue(np.array_equal(chunk, [-1, -2, -3]),
f"get_array return did not return correct flat array, but {chunk}.")

def test_get_array_filled(self):
"""get_array_filled should return a padded array of all elements in the storage."""

store = FlattenedStorage(elem=[ [1], [2, 3], [4, 5, 6] ], chunk=[-1, -2, -3])
store.add_array("fill", fill=23.42)
store.set_array("fill", 0, [-1])
store.set_array("fill", 1, [-2, -3])
store.set_array("fill", 2, [-4, -5, -6])
store.add_array("complex", shape=(3,), dtype=np.float64)
store.set_array("complex", 0, [ [1, 1, 1] ])
store.set_array("complex", 1, [ [2, 2, 2],
[2, 2, 2],
])
store.set_array("complex", 2, [ [3, 3, 3],
[3, 3, 3],
[3, 3, 3],
])
val = store.get_array_filled("elem")
self.assertEqual(val.shape, (3, 3), "shape not correct!")
self.assertTrue(np.array_equal(val, [[1, -1, -1], [2, 3, -1], [4, 5, 6]]),
"values in returned array not the same as in original array!")
self.assertEqual(store.get_array_filled("fill")[0, 1], 23.42,
"incorrect fill value!")
val = store.get_array_filled("complex")
self.assertEqual(val.shape, (3, 3, 3), "shape not correct!")
self.assertTrue(np.array_equal(
store.get_array("chunk"),
store.get_array_filled("chunk"),
), "get_array_filled does not give same result as get_array for per chunk array")

def test_get_array_ragged(self):
"""get_array_ragged should return a raggend array of all elements in the storage."""

store = FlattenedStorage(elem=[ [1], [2, 3], [4, 5, 6] ], chunk=[-1, -2, -3])
val = store.get_array_ragged("elem")
self.assertEqual(val.shape, (3,), "shape not correct!")
for i, v in enumerate(val):
self.assertEqual(len(v), store._per_chunk_arrays["length"][i],
f"array {i} has incorrect length!")
self.assertTrue(np.array_equal(v, [[1], [2, 3], [4, 5, 6]][i]),
f"array {i} has incorrect values, {v}!")
self.assertTrue(np.array_equal(
store.get_array("chunk"),
store.get_array_ragged("chunk"),
), "get_array_ragged does not give same result as get_array for per chunk array")

def test_has_array(self):
"""hasarray should return correct information for added array; None otherwise."""

Expand Down

0 comments on commit 28c60c8

Please sign in to comment.