From c68334c924ddb4adf50b08946a2bf18b674ade35 Mon Sep 17 00:00:00 2001 From: Victor Alexandru Date: Sat, 9 Apr 2022 16:46:52 -0400 Subject: [PATCH] Support multi functions (#1) * Interface to multi-apply * update documentation, README Co-authored-by: jkanche --- AUTHORS.md | 3 ++- CHANGELOG.md | 4 +++ README.md | 2 +- docs/index.md | 18 +++++-------- docs/tutorial.md | 62 ++++++++++++++++++++++++++++++++++++++++++++ src/mopsy/helpers.py | 41 +++++++++++++++++++++++------ src/mopsy/mops.py | 52 +++++++++++++++++++++++++++++++++++-- src/mopsy/nops.py | 3 ++- src/mopsy/sops.py | 6 ++--- tests/test_nops.py | 47 +++++++++++++++++++++++++++++++++ tests/test_sops.py | 46 ++++++++++++++++++++++++++++++++ 11 files changed, 257 insertions(+), 27 deletions(-) create mode 100644 docs/tutorial.md diff --git a/AUTHORS.md b/AUTHORS.md index 368055a..6ea8945 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -1,3 +1,4 @@ # Contributors -* jkanche +* [Jayaram Kancherla](https://github.com/jkanche) +* [Victor Alexandru](https://github.com/VictorAlex1) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4b677f7..d203ca4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog + +## Version 0.2.0 +- Support multi apply + ## Version 0.1 (development) - first release diff --git a/README.md b/README.md index b31a240..b12168a 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # mopsy - Matrix Operations in Python -Convenient library that provides methods to perform row/column operations over numpy and scipy matrices in Python. The goal of this library is to provide a similar interface to perform base R matrix methods/MatrixStats methods in python. +Convenience library to perform row/column operations over numpy and scipy matrices. Provides an interface similar to base R matrix methods/MatrixStats methods in python. ## Installation diff --git a/docs/index.md b/docs/index.md index ed8b51f..d71eadd 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,21 +1,17 @@ # mopsy +Convenience library to perform row/column operations over numpy and scipy matrices. Provides an interface similar to base R matrix methods/MatrixStats methods in python. -Add a short description here! +## Installation +Install from [pypi](https://pypi.org/project/mopsy/) -## Note - -> This is the main page of your project's [Sphinx] documentation. It is -> formatted in [Markdown]. Add additional pages by creating md-files in -> `docs` or rst-files (formated in [reStructuredText]) and adding links to -> them in the `Contents` section below. -> -> Please check [Sphinx], [recommonmark] and [autostructify] for more information -> about how to document your project and how to configure your preferences. - +```shell +pip install mopsy +``` ## Contents +* [Tutorial](tutorial) * [Overview](readme) * [License](license) * [Authors](authors) diff --git a/docs/tutorial.md b/docs/tutorial.md new file mode 100644 index 0000000..959461d --- /dev/null +++ b/docs/tutorial.md @@ -0,0 +1,62 @@ +# Tutorial + +## Sample data + +For the purpose of this, lets generate a test matrix and groups + +```python +from mopsy import colsum +import random from rd +# generate a random sparse array with some density +from scipy.sparse import random +mat = random(10, 150, 0.25) + +# generate random groups +ngrps = 15 +gsets = [x for x in range(15)] +groups = [rd.choice(gsets) for x in range(mat.shape[axis])] +``` + +## apply a function along an axis + +Methods are available to perform `sum`, `median`, `mean` along any axis. + +To apply any of these methods + +```python +colsum(mat, groups) +``` + +# Bring your own function + +`mopsy` provides a generic `apply` method is also available for perform row-wise or column-wise operations. + +lets define our own function to count the number of non-zero elements in the array + +```python +import numpy as np + +def nz_func(arr): + return np.count_nonzero(arr) +``` + +now lets apply the function, + +```python +from mopsy import multi_apply + +apply(nz_func, mat, axis=1) +``` + +## Multiple functions + +`mopsy` also supports multiple functions. + +```python +from mopsy import multi_apply +import numpy as np + +multi_apply([np.sum, np.mean], mat, axis=0) +``` + +That's all for today! \ No newline at end of file diff --git a/src/mopsy/helpers.py b/src/mopsy/helpers.py index 0a9db21..74aad40 100644 --- a/src/mopsy/helpers.py +++ b/src/mopsy/helpers.py @@ -1,7 +1,7 @@ from statistics import mean, median from .utils import get_matrix_type -from typing import Union, Callable, Any +from typing import List, Union, Callable, Any import numpy import scipy @@ -17,7 +17,7 @@ def colsum( Args: mat (Union[numpy.ndarray, scipy.sparse.spmatrix]): matrix - group (_type_, optional): group variable. Defaults to None. + group (list, optional): group variable. Defaults to None. Returns: numpy.ndarray: matrix @@ -32,7 +32,7 @@ def rowsum( Args: mat (Union[numpy.ndarray, scipy.sparse.spmatrix]): matrix - group (_type_, optional): group variable. Defaults to None. + group (list, optional): group variable. Defaults to None. Returns: numpy.ndarray: matrix @@ -47,7 +47,7 @@ def colmean( Args: mat (Union[numpy.ndarray, scipy.sparse.spmatrix]): matrix - group (_type_, optional): group variable. Defaults to None. + group (list, optional): group variable. Defaults to None. Returns: numpy.ndarray: matrix @@ -62,7 +62,7 @@ def rowmean( Args: mat (Union[numpy.ndarray, scipy.sparse.spmatrix]): matrix - group (_type_, optional): group variable. Defaults to None. + group (list, optional): group variable. Defaults to None. Returns: numpy.ndarray: matrix @@ -77,7 +77,7 @@ def colmedian( Args: mat (Union[numpy.ndarray, scipy.sparse.spmatrix]): matrix - group (_type_, optional): group variable. Defaults to None. + group (list, optional): group variable. Defaults to None. Returns: numpy.ndarray: matrix @@ -92,7 +92,7 @@ def rowmedian( Args: mat (Union[numpy.ndarray, scipy.sparse.spmatrix]): matrix - group (_type_, optional): group variable. Defaults to None. + group (list, optional): group variable. Defaults to None. Returns: numpy.ndarray: matrix @@ -109,11 +109,36 @@ def apply( """a generic apply function Args: + func (Callable): function to be called. mat (Union[numpy.ndarray, scipy.sparse.spmatrix]): matrix - group (_type_, optional): group variable. Defaults to None. + group (list, optional): group variable. Defaults to None. + axis (int): 0 for rows, 1 for columns. Returns: numpy.ndarray: matrix """ tmat = get_matrix_type(mat) return tmat.apply(func, group=group, axis=axis) + + +def multi_apply( + funcs: List[Callable[[list], Any]], + mat: Union[numpy.ndarray, scipy.sparse.spmatrix], + group: list, + axis: int, +): + """Apply multiple functions, the first axis + of the ndarray specifies the results of the inputs functions in + the same order + + Args: + funcs (List[Callable[[list], Any]]): functions to be called. + mat (Union[numpy.ndarray, scipy.sparse.spmatrix]): matrix + group (list, optional): group variable. Defaults to None. + axis (int): 0 for rows, 1 for columns. + + Returns: + numpy.ndarray: matrix + """ + tmat = get_matrix_type(mat) + return tmat.multi_apply(funcs, group=group, axis=axis) diff --git a/src/mopsy/mops.py b/src/mopsy/mops.py index 033f8b9..e35c926 100644 --- a/src/mopsy/mops.py +++ b/src/mopsy/mops.py @@ -1,6 +1,6 @@ from itertools import groupby import numpy as np -from typing import Any, Callable +from typing import Any, Callable, List __author__ = "jkanche" __copyright__ = "jkanche" @@ -47,7 +47,7 @@ def _apply(self, func: Callable[[list], Any], axis: int): return np.apply_along_axis(func, axis, self.matrix) def apply( - self, func: Callable[[list], Any], group: list = None, axis: int = 1 + self, func: Callable[[list], Any], group: list = None, axis: int = 0 ) -> np.ndarray: """apply a function to groups along an axis @@ -77,3 +77,51 @@ def apply( raise Exception("ApplyFuncError") return result + + def multi_apply( + self, + funcs: List[Callable[[list], Any]], + group: list = None, + axis: int = 0, + ) -> np.ndarray: + """Apply multiple functions, the first axis + of the ndarray specifies the results of the inputs functions in + the same order + + Args: + funcs (List[Callable[[list], Any]]): functions to be called. + group (list, optional): group variable. Defaults to None. + axis (int, optional): 0 for rows, 1 for columns. Defaults to 0. + + Raises: + Exception: ApplyFuncError, when a function cannot be applied + + Returns: + numpy.ndarray: a matrix + """ + result = [] + try: + + if group is None: + tmats = [self._apply(f, axis=axis) for f in funcs] + nmats = [ + x[np.newaxis] if axis == 0 else x[np.newaxis].T + for x in tmats + ] + result = np.stack(nmats) + else: + tmats = [] + for g, kmat in self.iter(group, axis): + tmats.append([kmat._apply(f, axis=axis) for f in funcs]) + + nmats = [] + for smats in zip(*tmats): + nmats.append(np.stack(smats, axis=axis)) + + result = np.stack(nmats) + + except Exception as e: + print(f"Error: applying function: {str(e)}") + raise Exception("ApplyFuncError") + + return result diff --git a/src/mopsy/nops.py b/src/mopsy/nops.py index c8cd511..b87a65b 100644 --- a/src/mopsy/nops.py +++ b/src/mopsy/nops.py @@ -1,3 +1,4 @@ +from typing import Any, Iterator, Tuple, Type from .mops import Mops import numpy as np @@ -18,7 +19,7 @@ def __init__(self, mat: np.ndarray) -> None: """ super().__init__(mat) - def iter(self, group=None, axis=0) -> tuple: + def iter(self, group=None, axis=0) -> Iterator[Tuple]: """an Iterator over groups and an axis Args: diff --git a/src/mopsy/sops.py b/src/mopsy/sops.py index 0f60f70..bbfc988 100644 --- a/src/mopsy/sops.py +++ b/src/mopsy/sops.py @@ -4,7 +4,7 @@ import scipy.sparse as sp import numpy as np -from typing import Callable, Any +from typing import Callable, Any, Iterator, Tuple __author__ = "jkanche" __copyright__ = "jkanche" @@ -22,7 +22,7 @@ def __init__(self, mat: sp.spmatrix) -> None: """ super().__init__(mat) - def iter(self, group: list = None, axis: int = 0) -> tuple: + def iter(self, group: list = None, axis: int = 0) -> Iterator[Tuple]: """an Iterator over groups and an axis Args: @@ -35,7 +35,7 @@ def iter(self, group: list = None, axis: int = 0) -> tuple: mat = self.matrix.tocsr() if axis == 0 else self.matrix.tocsc() if group is None: - yield (group, Sops(mat)) + yield (group, self) else: idx_groups = self.groupby_indices(group) for k, v in idx_groups.items(): diff --git a/tests/test_nops.py b/tests/test_nops.py index e8f066c..d5e672c 100644 --- a/tests/test_nops.py +++ b/tests/test_nops.py @@ -1,3 +1,4 @@ +import numpy as np from mopsy.nops import Nops from scipy.sparse import eye @@ -66,3 +67,49 @@ def test_group_apply_col_None(): assert rmat[ :, ].flatten().tolist() == [1.0, 1.0, 1.0, 1.0, 1.0] + + +def test_multi_apply_rows_None(): + tmat = Nops(mat) + rmat = tmat.multi_apply([np.sum, np.mean], axis=0) + print(rmat.shape) + assert rmat is not None + assert rmat.shape[0] == 2 + assert rmat.shape[1] == 1 + assert rmat.shape[2] == 5 + assert rmat[:, 0].tolist() == [[1.0, 1.0, 1.0, 1.0, 1.0], [0.2, 0.2, 0.2, 0.2, 0.2]] + + +def test_multi_apply_cols_None(): + tmat = Nops(mat) + rmat = tmat.multi_apply([np.sum, np.mean], axis=1) + print(rmat.shape) + assert rmat is not None + assert rmat.shape[0] == 2 + assert rmat.shape[1] == 5 + assert rmat.shape[2] == 1 + assert rmat[:, 0].tolist() == [[1.0], [0.2]] + +def test_multi_apply_rows(): + tmat = Nops(mat) + rmat = tmat.multi_apply([np.sum, np.mean], group=group, axis=0) + print(rmat.shape) + print(rmat) + assert rmat is not None + assert rmat.shape[0] == 2 + assert rmat.shape[1] == 2 + assert rmat.shape[2] == 5 + assert rmat[:, 0].tolist() == [[1.0, 0.0, 1.0, 0.0, 0.0], [0.5, 0.0, 0.5, 0.0, 0.0]] + + +def test_multi_apply_cols(): + tmat = Nops(mat) + rmat = tmat.multi_apply([np.sum, np.mean], group=group, axis=1) + print(rmat.shape) + print(rmat) + assert rmat is not None + assert rmat.shape[0] == 2 + assert rmat.shape[1] == 5 + assert rmat.shape[2] == 2 + assert rmat[:, 0].tolist() == [[1.0, 0.0], [0.5, 0.0]] + diff --git a/tests/test_sops.py b/tests/test_sops.py index e01ee24..9f0a76c 100644 --- a/tests/test_sops.py +++ b/tests/test_sops.py @@ -1,3 +1,4 @@ +import numpy as np from mopsy.sops import Sops from scipy.sparse import eye @@ -66,3 +67,48 @@ def test_group_apply_col_None(): assert rmat[ :, ].flatten().tolist() == [1.0, 1.0, 1.0, 1.0, 1.0] + + +def test_multi_apply_rows(): + tmat = Sops(mat) + rmat = tmat.multi_apply([np.sum, np.mean], axis=0) + print(rmat.shape) + assert rmat is not None + assert rmat.shape[0] == 2 + assert rmat.shape[1] == 1 + assert rmat.shape[2] == 5 + assert rmat[:, 0].tolist() == [[1.0, 1.0, 1.0, 1.0, 1.0], [0.2, 0.2, 0.2, 0.2, 0.2]] + + +def test_multi_apply_cols(): + tmat = Sops(mat) + rmat = tmat.multi_apply([np.sum, np.mean], axis=1) + print(rmat.shape) + assert rmat is not None + assert rmat.shape[0] == 2 + assert rmat.shape[1] == 5 + assert rmat.shape[2] == 1 + assert rmat[:, 0].tolist() == [[1.0], [0.2]] + +def test_multi_apply_rows(): + tmat = Sops(mat) + rmat = tmat.multi_apply([np.sum, np.mean], group=group, axis=0) + print(rmat.shape) + print(rmat) + assert rmat is not None + assert rmat.shape[0] == 2 + assert rmat.shape[1] == 2 + assert rmat.shape[2] == 5 + assert rmat[:, 0].tolist() == [[1.0, 0.0, 1.0, 0.0, 0.0], [0.5, 0.0, 0.5, 0.0, 0.0]] + + +def test_multi_apply_cols(): + tmat = Sops(mat) + rmat = tmat.multi_apply([np.sum, np.mean], group=group, axis=1) + print(rmat.shape) + print(rmat) + assert rmat is not None + assert rmat.shape[0] == 2 + assert rmat.shape[1] == 5 + assert rmat.shape[2] == 2 + assert rmat[:, 0].tolist() == [[1.0, 0.0], [0.5, 0.0]] \ No newline at end of file