-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Throw in some more utilities for downstream use.
- Get the 'height' of an object for use as column-likes. - Check if an object is high-dimensional or not. - Added subsetting methods for high-dimensional objects.
- Loading branch information
Showing
11 changed files
with
142 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
from typing import Any | ||
from functools import singledispatch | ||
|
||
from .is_high_dimensional import is_high_dimensional | ||
|
||
|
||
@singledispatch | ||
def get_height(x: Any) -> int: | ||
""" | ||
Get the "height" of an object, i.e., as if it were a column of a data frame | ||
or a similar container. This defaults to ``len`` for vector-like objects, | ||
or the first dimension for high-dimensional objects with a ``shape``. | ||
Args: | ||
x: | ||
Some kind of object. | ||
Returns: | ||
The height of the object. | ||
""" | ||
if is_high_dimensional(x): | ||
return x.shape[0] | ||
else: | ||
return len(x) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
from functools import singledispatch | ||
|
||
|
||
@singledispatch | ||
def is_high_dimensional(x): | ||
""" | ||
Whether an object is high-dimensional, i.e., has a ``shape`` | ||
attribute that is of length greater than 1. | ||
Args: | ||
x: | ||
Some kind of object. | ||
Returns: | ||
Whether ``x`` is high-dimensional. | ||
""" | ||
return hasattr(x, "shape") and len(x.shape) > 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
from typing import Any, Sequence | ||
|
||
from .subset_rows import subset_rows | ||
from .subset_sequence import subset_sequence | ||
from .is_high_dimensional import is_high_dimensional | ||
|
||
|
||
def subset(x: Any, indices: Sequence[int]): | ||
""" | ||
Generic subset that checks if the objects are n-dimensional for n > 1 (i.e. | ||
has a ``shape`` property of length greater than 1); if so, it calls | ||
:py:func:`~biocutils.subset_rows.subset_rows` to subset them along the | ||
first dimension, otherwise it assumes that they are vector-like and calls | ||
:py:func:`~biocutils.subset_sequence.subset_sequence` instead. | ||
Args: | ||
x: Object to be subsetted. | ||
Returns: | ||
The subsetted object, typically the same type as ``x``. | ||
""" | ||
if is_high_dimensional(x): | ||
return subset_rows(x, indices) | ||
else: | ||
return subset_sequence(x, indices) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
from typing import Any, Sequence, Union | ||
from functools import singledispatch | ||
|
||
|
||
@singledispatch | ||
def subset_rows(x: Any, indices: Sequence[int]) -> Any: | ||
""" | ||
Subset ``x`` by ``indices`` on the first dimension. The default | ||
method attempts to use ``x``'s ``__getitem__`` method, | ||
Args: | ||
x: | ||
Any high-dimensional object. | ||
indices: | ||
Sequence of non-negative integers specifying the integers of interest. | ||
Returns: | ||
The result of slicing ``x`` by ``indices``. The exact type | ||
depends on what ``x``'s ``__getitem__`` method returns. | ||
""" | ||
tmp = [slice(None)] * len(x.shape) | ||
tmp[0] = indices | ||
return x[(*tmp,)] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
from biocutils import get_height | ||
import numpy as np | ||
|
||
|
||
def test_get_height(): | ||
assert get_height([1,2,3]) == 3 | ||
assert get_height(np.array([1,2,3])) == 3 | ||
assert get_height(np.random.rand(10, 20)) == 10 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
from biocutils import is_high_dimensional | ||
import numpy | ||
|
||
|
||
def test_is_high_dimensional(): | ||
assert not is_high_dimensional([1,2,3]) | ||
assert not is_high_dimensional(numpy.array([1,2,3])) | ||
assert is_high_dimensional(numpy.random.rand(10, 20, 30)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
from biocutils import subset | ||
import numpy as np | ||
|
||
|
||
def test_subset_overall(): | ||
x = [1, 2, 3, 4, 5] | ||
assert subset(x, [0, 2, 4]) == [1, 3, 5] | ||
|
||
y = np.random.rand(10, 20) | ||
assert (subset(y, range(5)) == y[0:5, :]).all() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
from biocutils import subset_rows | ||
import numpy as np | ||
|
||
|
||
def test_subset_numpy(): | ||
y = np.random.rand(10) | ||
assert (subset_rows(y, range(5)) == y[0:5]).all() | ||
|
||
y = np.random.rand(10, 20) | ||
assert (subset_rows(y, range(5)) == y[0:5, :]).all() | ||
|
||
y = np.random.rand(10, 20, 30) | ||
assert (subset_rows(y, range(5)) == y[0:5, :, :]).all() |