Skip to content

Commit

Permalink
Minor fixes to Factor construction and associated tests.
Browse files Browse the repository at this point in the history
In particular, we convert unsigned numpy arrays into signed integers to provide
some scope for representing missing values via the '-1' placeholder.
  • Loading branch information
LTLA committed Nov 8, 2023
1 parent e8ec403 commit cfa408a
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 15 deletions.
4 changes: 2 additions & 2 deletions src/biocutils/Factor.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ def __init__(self, codes: Sequence[int], levels: Sequence[str], ordered: bool =
else:
replacement[i] = x
codes = replacement
elif not numpy.issubdtype(codes.dtype, numpy.signedinteger): # force it to be signed.
codes = codes.astype(numpy.min_scalar_type(-len(levels)))
self._codes = codes

if not isinstance(levels, numpy.ndarray):
Expand All @@ -66,8 +68,6 @@ def __init__(self, codes: Sequence[int], levels: Sequence[str], ordered: bool =
self._ordered = bool(ordered)

if validate:
if not numpy.issubdtype(self._codes.dtype, numpy.signedinteger):
raise TypeError("all entries of 'codes' should be signed integers")
if len(self._codes.shape) != 1:
raise TypeError("'codes' should be a 1-dimensional array")

Expand Down
31 changes: 18 additions & 13 deletions tests/test_Factor.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,42 @@
from biocutils import Factor, combine
import pytest
import copy
import numpy


def test_Factor_basics():
f = Factor([0, 1, 2, 0, 2, 4], levels=["A", "B", "C", "D", "E"])
assert len(f) == 6
assert list(f) == ["A", "B", "C", "A", "C", "E"]
assert f.get_codes() == [0, 1, 2, 0, 2, 4]
assert f.get_levels() == ["A", "B", "C", "D", "E"]
assert list(f.get_codes()) == [0, 1, 2, 0, 2, 4]
assert list(f.get_levels()) == ["A", "B", "C", "D", "E"]
assert not f.get_ordered()

with pytest.raises(TypeError) as ex:
Factor([0, "WHEE"], ["A", "B"])
assert str(ex.value).find("should be integers") >= 0
# Works with missing values.
f = Factor([0, 1, None, 0, numpy.ma.masked, 4], levels=["A", "B", "C", "D", "E"])
assert len(f) == 6
assert list(f) == ["A", "B", None, "A", None, "E"]
assert list(f.get_codes()) == [0, 1, -1, 0, -1, 4]

f = Factor([None] * 10, levels=["A", "B", "C", "D", "E"])
assert list(f) == [None] * 10

with pytest.raises(TypeError) as ex:
Factor([0, 1], ["A", None, "B"])
assert str(ex.value).find("non-missing strings") >= 0
# Works with NumPy inputs.
f = Factor(numpy.array([4,3,2,1,0], dtype=numpy.uint8), levels=numpy.array(["A", "B", "C", "D", "E"]))
assert len(f) == 5
assert f.get_codes().dtype == numpy.int8
assert numpy.issubdtype(f.get_levels().dtype, numpy.str_)

with pytest.raises(ValueError) as ex:
Factor([0, 1, -1], ["A"])
Factor([0, 1, 100], ["A"])
assert str(ex.value).find("refer to an entry") >= 0

with pytest.raises(ValueError) as ex:
Factor([0, 1], ["A", "B", "A"])
assert str(ex.value).find("should be unique") >= 0

f = Factor([None] * 10, levels=["A", "B", "C", "D", "E"])
assert list(f) == [None] * 10


def test_Factor_basics():
def test_Factor_print():
f = Factor([0, 1, 2, 0, 2, 4], levels=["A", "B", "C", "D", "E"])
assert repr(f).startswith("Factor(")
assert str(f).startswith("Factor of length")
Expand Down

0 comments on commit cfa408a

Please sign in to comment.