Skip to content

Commit

Permalink
Throw in some more utilities for downstream use.
Browse files Browse the repository at this point in the history
- Get the 'height' of an object for use as column-likes.
- Check if an object is high-dimensional or not.
- Added subsetting methods for high-dimensional objects.
  • Loading branch information
LTLA committed Nov 8, 2023
1 parent 4eca0a3 commit bd5c6ba
Show file tree
Hide file tree
Showing 11 changed files with 142 additions and 8 deletions.
5 changes: 5 additions & 0 deletions src/biocutils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,12 @@
from .extract_row_names import extract_row_names
from .extract_column_names import extract_column_names

from .subset import subset
from .subset_rows import subset_rows
from .subset_sequence import subset_sequence

from .show_as_cell import show_as_cell
from .convert_to_dense import convert_to_dense

from .get_height import get_height
from .is_high_dimensional import is_high_dimensional
9 changes: 5 additions & 4 deletions src/biocutils/combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,16 @@

from .combine_rows import combine_rows
from .combine_sequences import combine_sequences
from .is_high_dimensional import is_high_dimensional


def combine(*x: Any):
"""
Generic combine that checks if the objects are n-dimensional for n > 1
(i.e. has a ``shape`` property of length greater than 1); if so, it calls
:py:func:`~biocgenerics.combine_rows.combine_rows` to combine them by
the first dimension, otherwise it assumes that they are vector-like and
calls :py:func:`~biocgenerics.combine_seqs.combine_seqs` instead.
:py:func:`~biocutils.combine_rows.combine_rows` to combine them by the
first dimension, otherwise it assumes that they are vector-like and calls
:py:func:`~biocutils.combine_sequences.combine_sequences` instead.
Args:
x: Objects to combine.
Expand All @@ -21,7 +22,7 @@ def combine(*x: Any):
has_1d = False
has_nd = False
for y in x:
if hasattr(y, "shape") and len(y.shape) > 1:
if is_high_dimensional(y):
has_nd = True
else:
has_1d = True
Expand Down
24 changes: 24 additions & 0 deletions src/biocutils/get_height.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from typing import Any
from functools import singledispatch

from .is_high_dimensional import is_high_dimensional


@singledispatch
def get_height(x: Any) -> int:
"""
Get the "height" of an object, i.e., as if it were a column of a data frame
or a similar container. This defaults to ``len`` for vector-like objects,
or the first dimension for high-dimensional objects with a ``shape``.
Args:
x:
Some kind of object.
Returns:
The height of the object.
"""
if is_high_dimensional(x):
return x.shape[0]
else:
return len(x)
17 changes: 17 additions & 0 deletions src/biocutils/is_high_dimensional.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from functools import singledispatch


@singledispatch
def is_high_dimensional(x):
"""
Whether an object is high-dimensional, i.e., has a ``shape``
attribute that is of length greater than 1.
Args:
x:
Some kind of object.
Returns:
Whether ``x`` is high-dimensional.
"""
return hasattr(x, "shape") and len(x.shape) > 1
25 changes: 25 additions & 0 deletions src/biocutils/subset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from typing import Any, Sequence

from .subset_rows import subset_rows
from .subset_sequence import subset_sequence
from .is_high_dimensional import is_high_dimensional


def subset(x: Any, indices: Sequence[int]):
"""
Generic subset that checks if the objects are n-dimensional for n > 1 (i.e.
has a ``shape`` property of length greater than 1); if so, it calls
:py:func:`~biocutils.subset_rows.subset_rows` to subset them along the
first dimension, otherwise it assumes that they are vector-like and calls
:py:func:`~biocutils.subset_sequence.subset_sequence` instead.
Args:
x: Object to be subsetted.
Returns:
The subsetted object, typically the same type as ``x``.
"""
if is_high_dimensional(x):
return subset_rows(x, indices)
else:
return subset_sequence(x, indices)
24 changes: 24 additions & 0 deletions src/biocutils/subset_rows.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from typing import Any, Sequence, Union
from functools import singledispatch


@singledispatch
def subset_rows(x: Any, indices: Sequence[int]) -> Any:
"""
Subset ``x`` by ``indices`` on the first dimension. The default
method attempts to use ``x``'s ``__getitem__`` method,
Args:
x:
Any high-dimensional object.
indices:
Sequence of non-negative integers specifying the integers of interest.
Returns:
The result of slicing ``x`` by ``indices``. The exact type
depends on what ``x``'s ``__getitem__`` method returns.
"""
tmp = [slice(None)] * len(x.shape)
tmp[0] = indices
return x[(*tmp,)]
7 changes: 3 additions & 4 deletions src/biocutils/subset_sequence.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
from typing import Any, Sequence, Union
from functools import singledispatch
import numpy


@singledispatch
def subset_sequence(x: Any, indices: Sequence) -> Any:
def subset_sequence(x: Any, indices: Sequence[int]) -> Any:
"""
Subset ``x`` by ``indices`` to obtain a new object with the desired
subset of elements. This attempts to use ``x``'s ``__getitem__`` method.
Subset ``x`` by ``indices`` to obtain a new object. The default method
attempts to use ``x``'s ``__getitem__`` method.
Args:
x:
Expand Down
8 changes: 8 additions & 0 deletions tests/test_get_height.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from biocutils import get_height
import numpy as np


def test_get_height():
assert get_height([1,2,3]) == 3
assert get_height(np.array([1,2,3])) == 3
assert get_height(np.random.rand(10, 20)) == 10
8 changes: 8 additions & 0 deletions tests/test_is_high_dimensional.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from biocutils import is_high_dimensional
import numpy


def test_is_high_dimensional():
assert not is_high_dimensional([1,2,3])
assert not is_high_dimensional(numpy.array([1,2,3]))
assert is_high_dimensional(numpy.random.rand(10, 20, 30))
10 changes: 10 additions & 0 deletions tests/test_subset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from biocutils import subset
import numpy as np


def test_subset_overall():
x = [1, 2, 3, 4, 5]
assert subset(x, [0, 2, 4]) == [1, 3, 5]

y = np.random.rand(10, 20)
assert (subset(y, range(5)) == y[0:5, :]).all()
13 changes: 13 additions & 0 deletions tests/test_subset_rows.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from biocutils import subset_rows
import numpy as np


def test_subset_numpy():
y = np.random.rand(10)
assert (subset_rows(y, range(5)) == y[0:5]).all()

y = np.random.rand(10, 20)
assert (subset_rows(y, range(5)) == y[0:5, :]).all()

y = np.random.rand(10, 20, 30)
assert (subset_rows(y, range(5)) == y[0:5, :, :]).all()

0 comments on commit bd5c6ba

Please sign in to comment.