From 2d484e064ae80c1b7207becad99708a8090cdc5a Mon Sep 17 00:00:00 2001 From: LTLA Date: Tue, 7 Nov 2023 17:00:43 -0800 Subject: [PATCH] Migrated the remaining generics from biocgenerics. --- src/biocutils/__init__.py | 3 +++ src/biocutils/extract_column_names.py | 30 +++++++++++++++++++++++++++ src/biocutils/extract_row_names.py | 30 +++++++++++++++++++++++++++ src/biocutils/show_as_cell.py | 26 +++++++++++++++++++++++ tests/test_extract_column_names.py | 8 +++++++ tests/test_extract_row_names.py | 10 +++++++++ tests/test_show_as_cell.py | 6 ++++++ 7 files changed, 113 insertions(+) create mode 100644 src/biocutils/extract_column_names.py create mode 100644 src/biocutils/extract_row_names.py create mode 100644 src/biocutils/show_as_cell.py create mode 100644 tests/test_extract_column_names.py create mode 100644 tests/test_extract_row_names.py create mode 100644 tests/test_show_as_cell.py diff --git a/src/biocutils/__init__.py b/src/biocutils/__init__.py index 783b530..d6531ef 100644 --- a/src/biocutils/__init__.py +++ b/src/biocutils/__init__.py @@ -33,4 +33,7 @@ from .combine_columns import combine_columns from .combine_sequences import combine_sequences +from .extract_row_names import extract_row_names +from .extract_column_names import extract_column_names +from .show_as_cell import show_as_cell from .convert_to_dense import convert_to_dense diff --git a/src/biocutils/extract_column_names.py b/src/biocutils/extract_column_names.py new file mode 100644 index 0000000..04d0104 --- /dev/null +++ b/src/biocutils/extract_column_names.py @@ -0,0 +1,30 @@ +from functools import singledispatch +from typing import Any +import numpy + +from .package_utils import is_package_installed + +__author__ = "jkanche" +__copyright__ = "jkanche" +__license__ = "MIT" + + +@singledispatch +def extract_column_names(x: Any) -> numpy.ndarray: + """Access column names from 2-dimensional representations. + + Args: + x: Any object. + + Returns: + Array of strings containing column names. + """ + raise NotImplementedError(f"`colnames` is not supported for class: '{type(x)}'.") + + +if is_package_installed("pandas") is True: + from pandas import DataFrame + + @extract_column_names.register(DataFrame) + def _colnames_dataframe(x: DataFrame) -> list: + return numpy.array(x.columns, dtype=str) diff --git a/src/biocutils/extract_row_names.py b/src/biocutils/extract_row_names.py new file mode 100644 index 0000000..e9f06ce --- /dev/null +++ b/src/biocutils/extract_row_names.py @@ -0,0 +1,30 @@ +from functools import singledispatch +from typing import Any +import numpy + +from .package_utils import is_package_installed + +__author__ = "jkanche" +__copyright__ = "jkanche" +__license__ = "MIT" + + +@singledispatch +def extract_row_names(x: Any) -> numpy.ndarray: + """Access row names from 2-dimensional representations. + + Args: + x: Any object. + + Returns: + Array of strings containing row names. + """ + raise NotImplementedError(f"`rownames` do not exist for class: '{type(x)}'.") + + +if is_package_installed("pandas") is True: + from pandas import DataFrame + + @extract_row_names.register(DataFrame) + def _rownames_dataframe(x: DataFrame) -> list: + return numpy.array(x.index, dtype=str) diff --git a/src/biocutils/show_as_cell.py b/src/biocutils/show_as_cell.py new file mode 100644 index 0000000..83f1d30 --- /dev/null +++ b/src/biocutils/show_as_cell.py @@ -0,0 +1,26 @@ +from typing import Sequence, List, Any, Optional +from functools import singledispatch + + +@singledispatch +def show_as_cell(x: Any, indices: Sequence[int]) -> List[str]: + """ + Show the contents of ``x`` as a cell of a table, typically for use in the + ``__str__`` method of a class that contains ``x``. + + Args: + x: + Any object. By default, we assume that it can be treated as + a sequence, with a valid ``__getitem__`` method for an index. + + indices: + List of indices to be extracted. + + Returns: + List of strings of length equal to ``indices``, containing a + string summary of each of the specified elements of ``x``. + """ + output = [] + for i in indices: + output.append(str(x[i])) + return output diff --git a/tests/test_extract_column_names.py b/tests/test_extract_column_names.py new file mode 100644 index 0000000..63be726 --- /dev/null +++ b/tests/test_extract_column_names.py @@ -0,0 +1,8 @@ +from biocutils import extract_column_names +import pandas +import numpy + + +def test_pandas_column_names(): + p = pandas.DataFrame({ "A": [1,2,3,4,5], "B": ["a", "b", "c", "d", "e" ]}) + assert (extract_column_names(p) == numpy.array(["A", "B"])).all() diff --git a/tests/test_extract_row_names.py b/tests/test_extract_row_names.py new file mode 100644 index 0000000..92e681d --- /dev/null +++ b/tests/test_extract_row_names.py @@ -0,0 +1,10 @@ +from biocutils import extract_row_names +import pandas +import numpy + + +def test_pandas_row_names(): + p = pandas.DataFrame({ "A": [1,2,3,4,5] }) + rn = ["a", "b", "c", "d", "e" ] + p.index = rn + assert (extract_row_names(p) == numpy.array(rn)).all() diff --git a/tests/test_show_as_cell.py b/tests/test_show_as_cell.py new file mode 100644 index 0000000..3a5bed6 --- /dev/null +++ b/tests/test_show_as_cell.py @@ -0,0 +1,6 @@ +from biocutils import show_as_cell + + +def test_show_as_cell(): + assert show_as_cell([1, 2, 3, 4], range(4)) == ["1", "2", "3", "4"] + assert show_as_cell([1, 2, 3, 4], [1, 3]) == ["2", "4"]