Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add methods to get padded and ragged arrays to FlattenedStorage #481

Merged
merged 4 commits into from
Oct 26, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 61 additions & 0 deletions pyiron_base/generic/flattenedstorage.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,16 @@ def add_array(self, name, shape=(), dtype=np.float64, fill=None, per="element"):
store[name] = np.empty(shape=shape, dtype=dtype)
else:
store[name] = np.full(shape=shape, fill_value=fill, dtype=dtype)

_default_fill_values = {
np.dtype("int32"): -1,
np.dtype("int64"): -1,
np.dtype("float32"): np.nan,
np.dtype("float64"): np.nan,
}
if fill is None and store[name].dtype in _default_fill_values:
fill = _default_fill_values[store[name].dtype]
if fill is not None:
self._fill_values[name] = fill

def get_array(self, name, frame=None):
Expand Down Expand Up @@ -339,6 +349,57 @@ def get_array(self, name, frame=None):
else:
raise KeyError(f"no array named {name}")

def get_array_ragged(self, name: str) -> np.ndarray:
"""
Return elements of array `name` in all chunks. Values are returned in a ragged array of dtype=object.

If `name` specifies a per chunk array, there's nothing to pad and this method is equivalent to
:method:`.get_array`.

Args:
name (str): name of array to fetch

Returns:
numpy.ndarray, dtype=object: ragged arrray of all elements in all chunks
"""
if name in self._per_chunk_arrays:
return self.get_array(name)
return np.array([self.get_array(name, i) for i in range(len(self))],
dtype=object)

def get_array_filled(self, name: str) -> np.ndarray:
"""
Return elements of array `name` in all chunks. Arrays are padded to be all of the same length.

The padding value depends on the datatpye of the array or can be configured via the `fill` parameter of
:method:`.add_array`.

If `name` specifies a per chunk array, there's nothing to pad and this method is equivalent to
:method:`.get_array`.

Args:
name (str): name of array to fetch

Returns:
numpy.ndarray: padded arrray of all elements in all chunks
"""
if name in self._per_chunk_arrays:
return self.get_array(name)
values = self.get_array_ragged(name)
max_len = self._per_chunk_arrays["length"].max()
def resize_and_pad(v):
l = len(v)
per_shape = self._per_element_arrays[name].shape[1:]
v = np.resize(v, max_len * np.prod(per_shape, dtype=int))
v = v.reshape((max_len,) + per_shape)
if name in self._fill_values:
fill = self._fill_values[name]
else:
fill = np.zeros(1, dtype=self._per_element_arrays[name].dtype)[0]
v[l:] = fill
return v
return np.array([ resize_and_pad(v) for v in values ])

def set_array(self, name, frame, value):
"""
Add array for given structure.
Expand Down
46 changes: 46 additions & 0 deletions tests/generic/test_flattenedstorage.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,52 @@ def test_get_array_full(self):
self.assertTrue(np.array_equal(chunk, [-1, -2, -3]),
f"get_array return did not return correct flat array, but {chunk}.")

def test_get_array_filled(self):
"""get_array_filled should return a padded array of all elements in the storage."""

store = FlattenedStorage(elem=[ [1], [2, 3], [4, 5, 6] ], chunk=[-1, -2, -3])
store.add_array("fill", fill=23.42)
store.set_array("fill", 0, [-1])
store.set_array("fill", 1, [-2, -3])
store.set_array("fill", 2, [-4, -5, -6])
store.add_array("complex", shape=(3,), dtype=np.float64)
store.set_array("complex", 0, [ [1, 1, 1] ])
store.set_array("complex", 1, [ [2, 2, 2],
[2, 2, 2],
])
store.set_array("complex", 2, [ [3, 3, 3],
[3, 3, 3],
[3, 3, 3],
])
val = store.get_array_filled("elem")
self.assertEqual(val.shape, (3, 3), "shape not correct!")
self.assertTrue(np.array_equal(val, [[1, -1, -1], [2, 3, -1], [4, 5, 6]]),
"values in returned array not the same as in original array!")
self.assertEqual(store.get_array_filled("fill")[0, 1], 23.42,
"incorrect fill value!")
val = store.get_array_filled("complex")
self.assertEqual(val.shape, (3, 3, 3), "shape not correct!")
self.assertTrue(np.array_equal(
store.get_array("chunk"),
store.get_array_filled("chunk"),
), "get_array_filled does not give same result as get_array for per chunk array")

def test_get_array_ragged(self):
"""get_array_ragged should return a raggend array of all elements in the storage."""

store = FlattenedStorage(elem=[ [1], [2, 3], [4, 5, 6] ], chunk=[-1, -2, -3])
val = store.get_array_ragged("elem")
self.assertEqual(val.shape, (3,), "shape not correct!")
for i, v in enumerate(val):
self.assertEqual(len(v), store._per_chunk_arrays["length"][i],
f"array {i} has incorrect length!")
self.assertTrue(np.array_equal(v, [[1], [2, 3], [4, 5, 6]][i]),
f"array {i} has incorrect values, {v}!")
self.assertTrue(np.array_equal(
store.get_array("chunk"),
store.get_array_ragged("chunk"),
), "get_array_ragged does not give same result as get_array for per chunk array")

def test_has_array(self):
"""hasarray should return correct information for added array; None otherwise."""

Expand Down