Skip to content

Commit

Permalink
Cleaned up documentation and tests for factorize().
Browse files Browse the repository at this point in the history
Specifically, check that factorize works correctly on an existing Factor.
  • Loading branch information
LTLA committed Dec 16, 2024
1 parent ad08f29 commit 40be075
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 9 deletions.
10 changes: 5 additions & 5 deletions src/biocutils/factorize.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,12 @@ def factorize(
:py:func:`~biocutils.match.match` for details.
Returns:
Tuple where the first list contains the unique levels and the second
array contains the integer index into the first list. Indexing the
first list by the second array will recover ``x``; except for any None
or masked values in ``x``, which will be -1 in the second array.
Tuple where the first element is a list of unique levels and the second
element in a NumPy array containing integer codes, i.e., indices into
the first list. Indexing the first list by the second array will
recover ``x``, with the exception of any None or masked values in ``x``
that will instead be represented by -1 in the second array.
"""

if levels is None:
present = set()
levels = []
Expand Down
19 changes: 15 additions & 4 deletions tests/test_factorize.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from biocutils import factorize
from biocutils import factorize, Factor


def test_factor_simple():
def test_factorize_simple():
lev, ind = factorize([1, 3, 5, 5, 3, 1])
assert lev == [1, 3, 5]
assert list(ind) == [0, 1, 2, 2, 1, 0]
Expand All @@ -17,7 +17,7 @@ def test_factor_simple():
assert list(ind) == [0, -1, 1, -1, 2, -1]


def test_factor_levels():
def test_factorize_levels():
revlev = [5, 4, 3, 2, 1]
lev, ind = factorize([1, 3, 5, 5, 3, 1], levels=revlev)
assert lev == revlev
Expand All @@ -36,7 +36,7 @@ def test_factor_levels():
assert list(ind) == [1, 3, 5, 5, 3, 1]


def test_factor_sorted():
def test_factorize_sorted():
lev, ind = factorize(["C", "D", "A", "B", "C", "A"], sort_levels=True)
assert lev == ["A", "B", "C", "D"]
assert list(ind) == [2, 3, 0, 1, 2, 0]
Expand All @@ -47,3 +47,14 @@ def test_factor_sorted():
)
assert lev == ["D", "C", "B", "A"]
assert list(ind) == [1, 0, 3, 2, 1, 3]


def test_factorize_factor():
f = Factor([4, 3, 2, 1, 0], ["A", "B", "C", "D", "E"])
lev, ind = factorize(f)
assert lev == ["E", "D", "C", "B", "A"]
assert list(ind) == [0, 1, 2, 3, 4]

lev, ind = factorize(f, sort_levels=True)
assert lev == f.levels.as_list()
assert list(ind) == [4, 3, 2, 1, 0]

0 comments on commit 40be075

Please sign in to comment.