Skip to content

Commit

Permalink
Promote NumPy arrays to masked arrays during combining.
Browse files Browse the repository at this point in the history
This ensures that the masking status is properly preserved, otherwise
the regular numpy.concatenate() will just throw it away.
  • Loading branch information
LTLA committed Nov 9, 2023
1 parent 5691e3c commit aa98e25
Show file tree
Hide file tree
Showing 6 changed files with 48 additions and 6 deletions.
3 changes: 3 additions & 0 deletions src/biocutils/combine_columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ def combine_columns(*x: Any):
def _combine_columns_dense_arrays(*x: numpy.ndarray):
_check_array_dimensions(x, active=1)
x = [convert_to_dense(y) for y in x]
for y in x:
if numpy.ma.is_masked(y):
return numpy.ma.concatenate(x, axis=1)
return numpy.concatenate(x, axis=1)


Expand Down
3 changes: 3 additions & 0 deletions src/biocutils/combine_rows.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ def combine_rows(*x: Any):
def _combine_rows_dense_arrays(*x: numpy.ndarray):
_check_array_dimensions(x, active=0)
x = [convert_to_dense(y) for y in x]
for y in x:
if numpy.ma.is_masked(y):
return numpy.ma.concatenate(x)
return numpy.concatenate(x)


Expand Down
3 changes: 3 additions & 0 deletions src/biocutils/combine_sequences.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@ def _combine_sequences_lists(*x: list):

@combine_sequences.register(numpy.ndarray)
def _combine_sequences_dense_arrays(*x: numpy.ndarray):
for y in x:
if numpy.ma.is_masked(y):
return numpy.ma.concatenate(x, axis=None)
return numpy.concatenate(x, axis=None)


Expand Down
12 changes: 12 additions & 0 deletions tests/test_combine_columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,18 @@ def test_combine_columns_dense():
assert z.shape == (20, 15)


def test_combine_columns_masked():
num_rows = 20
x = np.ones(shape=(num_rows, 10))
y0 = np.zeros((num_rows, 5))
y = np.ma.array(y0, mask=True)

z = combine_columns(x, y)
expected = np.concatenate([x, y0], axis=1) == 0
assert (z.mask == expected).all()
assert z.shape == (20, 15)


def test_combine_columns_sparse():
num_rows = 20

Expand Down
12 changes: 12 additions & 0 deletions tests/test_combine_rows.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,18 @@ def test_combine_rows_dense():
assert z.shape == (15, 20)


def test_combine_rows_masked():
num_cols = 20
x = np.ones(shape=(10, num_cols))
y0 = np.zeros((5, num_cols))
y = np.ma.array(y0, mask=True)

z = combine_rows(x, y)
expected = np.concatenate([x, y0]) == 0
assert (z.mask == expected).all()
assert z.shape == (15, 20)


def test_combine_rows_sparse():
num_cols = 20

Expand Down
21 changes: 15 additions & 6 deletions tests/test_combine_sequences.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,26 @@ def test_basic_list():
def test_basic_dense():
x = [1, 2, 3]
y = [0.1, 0.2]
xd = np.array([1, 2, 3])
yd = np.array([0.1, 0.2], dtype=float)
xd = np.array(x)
yd = np.array(y)

zcomb = combine_sequences(xd, yd)

z = x + y
zd = np.array(z)
assert (zcomb == zd).all()


def test_basic_dense_masked():
x = [1, 2, 3]
y = [0.1, 0.2]
xd = np.array(x)
yd = np.ma.array(y, mask=[True]*2)

assert all(np.isclose(zcomb, zd)) is True
assert isinstance(zcomb, np.ndarray)
assert len(zcomb) == len(zd)
zcomb = combine_sequences(xd, yd)
z = x + y
zd = np.ma.array(z, mask=[False]*3 + [True]*2)
assert (zcomb == zd).all()
assert (zcomb.mask == zd.mask).all()


def test_basic_mixed_dense_list():
Expand Down

0 comments on commit aa98e25

Please sign in to comment.