-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Migrate the combining generics from biocgenerics to biocutils. (#8)
This aims to consolidate all generics into a single package, rather than scattering the various functions between here and biocgenerics.
- Loading branch information
Showing
13 changed files
with
597 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -68,6 +68,7 @@ testing = | |
pytest | ||
pytest-cov | ||
pandas | ||
scipy | ||
|
||
[options.entry_points] | ||
# Add here console scripts like: | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
def _check_array_dimensions(x, active: int) -> bool: | ||
first = x[0].shape | ||
for i in range(1, len(x)): | ||
current = x[i].shape | ||
if len(first) != len(current): | ||
raise ValueError("inconsistent dimensions for combining arrays (expected " + str(len(first)) + ", got " + str(len(current)) + " for array " + str(i) + ")") | ||
for j in range(len(first)): | ||
if j != active and first[j] != current[j]: | ||
raise ValueError("inconsistent dimension extents for combining arrays on dimension " + str(active) + " (expected " + str(first[active]) + ", got " + str(current[active]) + " for array " + str(i) + ")") | ||
|
||
|
||
def _coerce_sparse_matrix(first, combined, module): | ||
if isinstance(first, module.csr_matrix): | ||
return combined.tocsr() | ||
elif isinstance(first, module.csc_matrix): | ||
return combined.tocsc() | ||
elif isinstance(first, module.bsr_matrix): | ||
return combined.tobsr() | ||
elif isinstance(first, module.coo_matrix): | ||
return combined.tocoo() | ||
elif isinstance(first, module.dia_matrix): | ||
return combined.todia() | ||
elif isinstance(first, module.lil_matrix): | ||
return combined.tolil() | ||
else: | ||
return combined | ||
|
||
|
||
def _coerce_sparse_array(first, combined, module): | ||
if isinstance(first, module.csr_array): | ||
return combined.tocsr() | ||
elif isinstance(first, module.csc_array): | ||
return combined.tocsc() | ||
elif isinstance(first, module.bsr_array): | ||
return combined.tobsr() | ||
elif isinstance(first, module.coo_array): | ||
return combined.tocoo() | ||
elif isinstance(first, module.dia_array): | ||
return combined.todia() | ||
elif isinstance(first, module.lil_array): | ||
return combined.tolil() | ||
else: | ||
return combined |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
from typing import Any | ||
|
||
from .combine_rows import combine_rows | ||
from .combine_sequences import combine_sequences | ||
|
||
|
||
def combine(*x: Any): | ||
""" | ||
Generic combine that checks if the objects are n-dimensional for n > 1 | ||
(i.e. has a ``shape`` property of length greater than 1); if so, it calls | ||
:py:func:`~biocgenerics.combine_rows.combine_rows` to combine them by | ||
the first dimension, otherwise it assumes that they are vector-like and | ||
calls :py:func:`~biocgenerics.combine_seqs.combine_seqs` instead. | ||
Args: | ||
x: Objects to combine. | ||
Returns: | ||
A combined object, typically the same type as the first element in ``x``. | ||
""" | ||
if hasattr(x[0], "shape") and len(x[0].shape) > 1: | ||
return combine_rows(*x) | ||
else: | ||
return combine_sequences(*x) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
from functools import singledispatch | ||
from typing import Any | ||
from warnings import warn | ||
import numpy | ||
|
||
from ._utils_combine import _check_array_dimensions, _coerce_sparse_matrix, _coerce_sparse_array | ||
from .is_list_of_type import is_list_of_type | ||
from .package_utils import is_package_installed | ||
from .convert_to_dense import convert_to_dense | ||
|
||
__author__ = "jkanche" | ||
__copyright__ = "jkanche" | ||
__license__ = "MIT" | ||
|
||
|
||
@singledispatch | ||
def combine_columns(*x: Any): | ||
"""Combine n-dimensional objects along the second dimension. | ||
If all elements are :py:class:`~numpy.ndarray`, | ||
we combine them using numpy's :py:func:`~numpy.concatenate`. | ||
If all elements are either :py:class:`~scipy.sparse.spmatrix` or | ||
:py:class:`~scipy.sparse.sparray`, these objects are combined | ||
using scipy's :py:class:`~scipy.sparse.hstack`. | ||
If all elements are :py:class:`~pandas.DataFrame` objects, they are | ||
combined using :py:func:`~pandas.concat` along the second axis. | ||
Args: | ||
x: | ||
n-dimensional objects to combine. All elements of x are expected | ||
to be the same class. | ||
Returns: | ||
Combined object, typically the same type as the first entry of ``x`` | ||
""" | ||
raise NotImplementedError("no `combine_columns` method implemented for '" + type(x[0]).__name__ + "' objects") | ||
|
||
|
||
@combine_columns.register | ||
def _combine_columns_dense_arrays(*x: numpy.ndarray): | ||
_check_array_dimensions(x, active=1) | ||
x = [convert_to_dense(y) for y in x] | ||
return numpy.concatenate(x, axis=1) | ||
|
||
|
||
if is_package_installed("scipy") is True: | ||
import scipy.sparse as sp | ||
|
||
def _combine_columns_sparse_matrices(*x): | ||
_check_array_dimensions(x, 1) | ||
if is_list_of_type(x, sp.spmatrix): | ||
combined = sp.hstack(x) | ||
return _coerce_sparse_matrix(x[0], combined, sp) | ||
|
||
warn("not all elements are scipy sparse matrices") | ||
x = [convert_to_dense(y) for y in x] | ||
return numpy.concatenate(x, axis=1) | ||
|
||
try: | ||
combine_columns.register(sp.spmatrix, _combine_columns_sparse_matrices) | ||
except Exception: | ||
pass | ||
|
||
def _combine_columns_sparse_arrays(*x): | ||
_check_array_dimensions(x, 1) | ||
if is_list_of_type(x, sp.sparray): | ||
combined = sp.hstack(x) | ||
return _coerce_sparse_array(x[0], combined, sp) | ||
|
||
warn("not all elements are scipy sparse arrays") | ||
x = [convert_to_dense(y) for y in x] | ||
return numpy.concatenate(x, axis=1) | ||
|
||
|
||
if is_package_installed("pandas") is True: | ||
from pandas import DataFrame, concat | ||
|
||
@combine_columns.register(DataFrame) | ||
def _combine_columns_pandas_dataframe(*x): | ||
return concat(x, axis=1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
from functools import singledispatch | ||
from typing import Any | ||
from warnings import warn | ||
import numpy | ||
|
||
from ._utils_combine import _check_array_dimensions, _coerce_sparse_matrix, _coerce_sparse_array | ||
from .is_list_of_type import is_list_of_type | ||
from .package_utils import is_package_installed | ||
from .convert_to_dense import convert_to_dense | ||
|
||
__author__ = "jkanche" | ||
__copyright__ = "jkanche" | ||
__license__ = "MIT" | ||
|
||
|
||
@singledispatch | ||
def combine_rows(*x: Any): | ||
"""Combine n-dimensional objects along their first dimension. | ||
If all elements are :py:class:`~numpy.ndarray`, we combine them using | ||
numpy's :py:func:`~numpy.concatenate`. | ||
If all elements are either :py:class:`~scipy.sparse.spmatrix` or | ||
:py:class:`~scipy.sparse.sparray`, these objects are combined using scipy's | ||
:py:class:`~scipy.sparse.vstack`. | ||
If all elements are :py:class:`~pandas.DataFrame` objects, they are | ||
combined using :py:func:`~pandas.concat` along the first axis. | ||
Args: | ||
x: | ||
One or more n-dimensional objects to combine. All elements of x | ||
are expected to be the same class. | ||
Returns: | ||
Combined object, typically the same type as the first entry of ``x``. | ||
""" | ||
raise NotImplementedError("no `combine_rows` method implemented for '" + type(x[0]).__name__ + "' objects") | ||
|
||
|
||
@combine_rows.register(numpy.ndarray) | ||
def _combine_rows_dense_arrays(*x: numpy.ndarray): | ||
_check_array_dimensions(x, active=0) | ||
x = [convert_to_dense(y) for y in x] | ||
return numpy.concatenate(x) | ||
|
||
|
||
if is_package_installed("scipy"): | ||
import scipy.sparse as sp | ||
|
||
def _combine_rows_sparse_matrices(*x): | ||
_check_array_dimensions(x, 0) | ||
if is_list_of_type(x, sp.spmatrix): | ||
combined = sp.vstack(x) | ||
return _coerce_sparse_matrix(x[0], combined, sp) | ||
|
||
warn("not all elements are SciPy sparse matrices") | ||
x = [convert_to_dense(y) for y in x] | ||
return numpy.concatenate(x) | ||
|
||
try: | ||
combine_rows.register(sp.sparray, _combine_rows_sparse_arrays) | ||
except Exception: | ||
pass | ||
|
||
def _combine_rows_sparse_arrays(*x): | ||
_check_array_dimensions(x, 0) | ||
if is_list_of_type(x, sp.sparray): | ||
combined = sp.vstack(x) | ||
return _coerce_sparse_array(first, combined, sp) | ||
|
||
warn("not all elements are SciPy sparse arrays") | ||
x = [convert_to_dense(y) for y in x] | ||
return numpy.concatenate(x) | ||
|
||
try: | ||
combine_rows.register(sp.spmatrix, _combine_rows_sparse_matrices) | ||
except Exception: | ||
pass | ||
|
||
|
||
if is_package_installed("pandas"): | ||
from pandas import DataFrame, concat | ||
|
||
@combine_rows.register(DataFrame) | ||
def _combine_rows_pandas_dataframe(*x): | ||
return concat(x, axis=0) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
from functools import singledispatch | ||
from itertools import chain | ||
from typing import Any | ||
from warnings import warn | ||
import numpy | ||
|
||
from .is_list_of_type import is_list_of_type | ||
from .package_utils import is_package_installed | ||
|
||
__author__ = "jkanche" | ||
__copyright__ = "jkanche" | ||
__license__ = "MIT" | ||
|
||
|
||
@singledispatch | ||
def combine_sequences(*x: Any): | ||
"""Combine vector-like objects (1-dimensional arrays). | ||
If all elements are :py:class:`~numpy.ndarray`, | ||
we combine them using numpy's :py:func:`~numpy.concatenate`. | ||
If all elements are :py:class:`~pandas.Series` objects, they are combined | ||
using :py:func:`~pandas.concat`. | ||
For all other scenarios, all elements are coerced to a :py:class:`~list` | ||
and combined. | ||
Args: | ||
x: | ||
Vector-like objects to combine. | ||
All elements of ``x`` are expected to be the same class or | ||
atleast compatible with each other. | ||
Returns: | ||
A combined object, ideally of the same type as the first element in ``x``. | ||
""" | ||
raise NotImplementedError("no `combine_sequences` method implemented for '" + type(x[0]).__name__ + "' objects") | ||
|
||
|
||
@combine_sequences.register(list) | ||
def _combine_sequences_lists(*x: list): | ||
return list(chain(*x)) | ||
|
||
|
||
@combine_sequences.register(numpy.ndarray) | ||
def _combine_sequences_dense_arrays(*x: numpy.ndarray): | ||
return numpy.concatenate(x, axis=None) | ||
|
||
|
||
if is_package_installed("pandas") is True: | ||
from pandas import Series, concat | ||
|
||
@combine_sequences.register(Series) | ||
def _combine_sequences_pandas_series(*x): | ||
if not is_list_of_type(x, Series): | ||
elems = [] | ||
for elem in x: | ||
if not isinstance(elem, Series): | ||
elems.append(Series(elem)) | ||
else: | ||
elems.append(elem) | ||
x = elems | ||
return concat(x) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
from functools import singledispatch | ||
from typing import Any | ||
import numpy | ||
|
||
from .package_utils import is_package_installed | ||
|
||
|
||
@singledispatch | ||
def convert_to_dense(x: Any) -> numpy.ndarray: | ||
""" | ||
Convert something to a NumPy dense array of the same shape. | ||
This is typically used a fallback for the various combining | ||
methods when there are lots of different array types that | ||
``numpy.concatenate`` doesn't understand. | ||
Args: | ||
x: Some array-like object to be stored as a NumPy array. | ||
Returns: | ||
A NumPy array. | ||
""" | ||
return numpy.array(x) | ||
|
||
|
||
@convert_to_dense.register | ||
def _convert_to_dense_numpy(x: numpy.ndarray) -> numpy.ndarray: | ||
return x | ||
|
||
|
||
if is_package_installed("scipy"): | ||
import scipy.sparse as sp | ||
|
||
def _convert_sparse_to_dense(x): | ||
return x.todense() | ||
|
||
try: | ||
convert_to_dense.register(sp.spmatrix, _convert_sparse_to_dense) | ||
except Exception: | ||
pass | ||
|
||
try: | ||
convert_to_dense.register(sp.sparray, _convert_sparse_to_dense) | ||
except Exception: | ||
pass |
Oops, something went wrong.