Skip to content

Commit

Permalink
Support dtype= and copy= arguments in __array__(), as required by num…
Browse files Browse the repository at this point in the history
…py 2. (#64)
  • Loading branch information
LTLA authored Oct 23, 2024
1 parent 622de03 commit 538d459
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 10 deletions.
27 changes: 24 additions & 3 deletions src/delayedarray/DelayedArray.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,15 +156,36 @@ def __repr__(self) -> str:
return preamble + "\n" + converted

# For NumPy:
def __array__(self) -> ndarray:
def __array__(self, dtype: Optional[numpy.dtype] = None, copy: bool = True) -> ndarray:
"""Convert a ``DelayedArray`` to a NumPy array, to be used by
:py:meth:`~numpy.array`.
Args:
dtype:
The desired NumPy type of the output array. If None, the
type of the seed is used.
copy:
Currently ignored. The output is never a reference to the
underlying seed, even if the seed is another NumPy array.
Returns:
NumPy array of the same type as :py:attr:`~dtype` and shape as
:py:attr:`~shape`.
:py:attr:`~shape`.
"""
return to_dense_array(self._seed)
if dtype is None or dtype == self.dtype:
return to_dense_array(self._seed)
else:
# Filling it chunk by chunk rather than doing a big coercion,
# to avoid creating an unnecessary intermediate full matrix.
output = numpy.ndarray(self.shape, dtype=dtype)
if is_masked(self._seed):
output = numpy.ma.array(output, mask=False)
def fill_output(job, part):
subsets = (*(slice(s, e) for s, e in job),)
output[subsets] = part
apply_over_blocks(self._seed, fill_output)
return output

def __array_ufunc__(self, ufunc, method, *inputs, **kwargs) -> "DelayedArray":
"""Interface with NumPy array methods. This is used to implement
Expand Down
23 changes: 17 additions & 6 deletions src/delayedarray/SparseNdarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,15 +210,24 @@ def __repr__(self) -> str:


# For NumPy:
def __array__(self) -> numpy.ndarray:
def __array__(self, dtype: Optional[numpy.dtype] = None, copy: bool = True) -> numpy.ndarray:
"""Convert a ``SparseNdarray`` to a NumPy array.
Args:
dtype:
The desired NumPy type of the output array. If None, the
type of the seed is used.
copy:
Currently ignored. The output is never a reference to the
underlying seed, even if the seed is another NumPy array.
Returns:
Dense array of the same type as :py:attr:`~dtype` and shape as
:py:attr:`~shape`.
"""
indices = _spawn_indices(self._shape)
return _extract_dense_array_from_SparseNdarray(self, indices)
return _extract_dense_array_from_SparseNdarray(self, indices, dtype=dtype)

# Assorted dunder methods.
def __add__(self, other) -> Union["SparseNdarray", numpy.ndarray]:
Expand Down Expand Up @@ -1231,18 +1240,20 @@ def _recursive_extract_dense_array(contents: numpy.ndarray, subset: Tuple[Sequen
pos += 1


def _extract_dense_array_from_SparseNdarray(x: SparseNdarray, subset: Tuple[Sequence[int], ...]) -> numpy.ndarray:
def _extract_dense_array_from_SparseNdarray(x: SparseNdarray, subset: Tuple[Sequence[int], ...], dtype: Optional[numpy.dtype] = None) -> numpy.ndarray:
idims = [len(y) for y in subset]
subset_summary = _characterize_indices(subset[0], x._shape[0])

# We reverse the dimensions so that we use F-contiguous storage. This also
# makes it slightly easier to do the recursion as we can just index by
# the first dimension to obtain a subarray at each recursive step.
output = numpy.zeros((*reversed(idims),), dtype=x._dtype)
if dtype is None:
dtype = x._dtype
output = numpy.zeros((*reversed(idims),), dtype=dtype)
if x._is_masked:
output = numpy.ma.MaskedArray(output, mask=False)

if x._contents is not None:
if x._is_masked:
output = numpy.ma.MaskedArray(output, mask=False)
ndim = len(x._shape)
if ndim > 1:
_recursive_extract_dense_array(x._contents, subset, subset_summary=subset_summary, output=output, dim=ndim-1)
Expand Down
18 changes: 17 additions & 1 deletion tests/test_DelayedArray.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,12 @@ def test_DelayedArray_dense():

dump = numpy.array(x)
assert isinstance(dump, numpy.ndarray)
assert dump.dtype == x.dtype
assert (dump == raw).all()

dump = numpy.array(x, dtype=numpy.float64)
assert isinstance(dump, numpy.ndarray)
assert dump.dtype == numpy.float64
assert (dump == raw).all()


Expand Down Expand Up @@ -69,6 +75,16 @@ def test_DelayedArray_masked():
x = delayedarray.wrap(y)
assert delayedarray.is_masked(x)

dump = numpy.array(x)
assert isinstance(dump, numpy.ndarray)
assert dump.dtype == x.dtype
assert (dump == numpy.array(y)).all()

dump = numpy.array(x, dtype=numpy.float32)
assert isinstance(dump, numpy.ndarray)
assert dump.dtype == numpy.float32
assert (dump == numpy.array(y, dtype=numpy.float32)).all()


#######################################################
#######################################################
Expand Down Expand Up @@ -356,4 +372,4 @@ def test_SparseNdarray_all_sparse(mask_rate, buffer_size):

# Zero-length array is respected.
y = delayedarray.wrap(delayedarray.SparseNdarray((0,), None, dtype=numpy.int32, index_dtype=numpy.int32)) * 50
assert y.all()
assert y.all()
2 changes: 2 additions & 0 deletions tests/test_SparseNdarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,8 @@ def test_SparseNdarray_extract_dense_array_3d(mask_rate):
# Full extraction.
output = delayedarray.to_dense_array(y)
assert_identical_ndarrays(output, convert_SparseNdarray_to_numpy(y))
assert_identical_ndarrays(numpy.array(output), numpy.array(y))
assert_identical_ndarrays(numpy.array(output, dtype=numpy.int32), numpy.array(y, dtype=numpy.int32))

# Sliced extraction.
slices = (slice(2, 15, 3), slice(0, 20, 2), slice(4, 8))
Expand Down

0 comments on commit 538d459

Please sign in to comment.