Skip to content

Commit

Permalink
Enforce no naming on the factor levels.
Browse files Browse the repository at this point in the history
  • Loading branch information
LTLA committed Nov 14, 2023
1 parent a7d1cfa commit 9d6aac2
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 20 deletions.
22 changes: 12 additions & 10 deletions src/biocutils/Factor.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class Factor:
easier numerical analysis.
"""

def __init__(self, codes: Sequence[int], levels: Sequence[str], ordered: bool = False, validate: bool = True):
def __init__(self, codes: Sequence[int], levels: Sequence[str], ordered: bool = False, _validate: bool = True):
"""Initialize a Factor object.
Args:
Expand All @@ -38,7 +38,7 @@ def __init__(self, codes: Sequence[int], levels: Sequence[str], ordered: bool =
ordered:
Whether the levels are ordered.
validate:
_validate:
Whether to validate the arguments. Internal use only.
"""
if not isinstance(codes, numpy.ndarray):
Expand All @@ -57,12 +57,14 @@ def __init__(self, codes: Sequence[int], levels: Sequence[str], ordered: bool =

if not isinstance(levels, StringList):
levels = StringList(levels)
if levels.get_names() is not None:
levels = levels.set_names(None)

self._codes = codes
self._levels = levels
self._ordered = bool(ordered)

if validate:
if _validate:
if any(x is None for x in levels):
raise TypeError("all entries of 'levels' should be non-missing")
if len(set(levels)) < len(levels):
Expand Down Expand Up @@ -166,7 +168,7 @@ def __getitem__(self, sub: Union[int, bool, Sequence]) -> Union[str, "Factor"]:
return self._levels[x]
else:
return None
return type(self)(self._codes[sub], self._levels, self._ordered, validate=False)
return type(self)(self._codes[sub], self._levels, self._ordered, _validate=False)

def replace(self, sub: Sequence, value: Union[str, "Factor"], in_place: bool = False):
"""
Expand Down Expand Up @@ -218,7 +220,7 @@ def replace(self, sub: Sequence, value: Union[str, "Factor"], in_place: bool = F
self._codes = codes
return self
else:
return type(self)(codes, self._levels, self._ordered, validate=False)
return type(self)(codes, self._levels, self._ordered, _validate=False)

def __setitem__(self, args: Sequence[int], value: "Factor"):
"""See :py:attr:`~replace` for details."""
Expand Down Expand Up @@ -264,7 +266,7 @@ def drop_unused_levels(self, in_place: bool = False) -> "Factor":
return self
else:
current_class_const = type(self)
return current_class_const(new_codes, new_levels, self._ordered, validate=False)
return current_class_const(new_codes, new_levels, self._ordered, _validate=False)

def set_levels(self, levels: Union[str, Sequence[str]], in_place: bool = False) -> "Factor":
"""Set or replace levels.
Expand Down Expand Up @@ -337,7 +339,7 @@ def set_levels(self, levels: Union[str, Sequence[str]], in_place: bool = False)
return self
else:
current_class_const = type(self)
return current_class_const(new_codes, new_levels, self._ordered, validate=False)
return current_class_const(new_codes, new_levels, self._ordered, _validate=False)

@levels.setter
def levels(self, levels: Union[str, List[str]]):
Expand All @@ -351,7 +353,7 @@ def __copy__(self) -> "Factor":
A shallow copy of the ``Factor`` object.
"""
current_class_const = type(self)
return current_class_const(self._codes, self._levels, self._ordered, validate=False)
return current_class_const(self._codes, self._levels, self._ordered, _validate=False)

def __deepcopy__(self, memo) -> "Factor":
"""
Expand All @@ -363,7 +365,7 @@ def __deepcopy__(self, memo) -> "Factor":
deepcopy(self._codes, memo),
deepcopy(self._levels, memo),
self._ordered,
validate=False,
_validate=False,
)

def to_pandas(self):
Expand Down Expand Up @@ -448,4 +450,4 @@ def _combine_factors(*x: Factor):
new_codes.append(curout)
new_ordered = False

return Factor(combine_sequences(*new_codes), new_levels, new_ordered, validate=False)
return Factor(combine_sequences(*new_codes), new_levels, new_ordered, _validate=False)
20 changes: 20 additions & 0 deletions src/biocutils/NamedList.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,18 +68,38 @@ def __len__(self) -> int:
return len(self._data)

def __repr__(self) -> str:
"""
Returns:
Representation of the current list.
"""
message = type(self).__name__ + "(data=" + repr(self._data)
if self._names is not None:
message += ", names=" + repr(self._names)
message += ")"
return message

def __str__(self) -> str:
"""
Returns:
Pretty-printed representation of the current list, along with its
names if any exist.
"""
if self._names is not None:
return "[" + ", ".join(repr(self._names[i]) + "=" + repr(x) for i, x in enumerate(self._data)) + "]"
else:
return repr(self._data)

def __eq__(self, other: "NamedList") -> bool:
"""
Args:
other: Another ``NamedList``.
Returns:
Whether the current object is equal to ``other``, i.e.,
same data and names.
"""
return self.get_data() == other.get_data() and self.get_names() == other.get_names()

def get_data(self) -> list:
"""
Returns:
Expand Down
20 changes: 10 additions & 10 deletions tests/test_Factor.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ def test_Factor_basics():
assert len(f) == 6
assert list(f) == ["A", "B", "C", "A", "C", "E"]
assert list(f.get_codes()) == [0, 1, 2, 0, 2, 4]
assert f.get_levels() == ["A", "B", "C", "D", "E"]
assert f.get_levels().get_data() == ["A", "B", "C", "D", "E"]
assert not f.get_ordered()

# Works with missing values.
Expand Down Expand Up @@ -83,48 +83,48 @@ def test_Factor_setitem():

f[0:2] = f2[2:4]
assert list(f.get_codes()) == [2, 3, 2, 3, 2, 1]
assert f.get_levels() == ["A", "B", "C", "D", "E"]
assert f.get_levels().get_data() == ["A", "B", "C", "D", "E"]

f = Factor([0, 1, 2, 3, 2, 1], levels=["A", "B", "C", "D", "E"])
f2 = Factor([0, 1, 2, 3, 2, 1], levels=["E", "D", "C", "B", "A"])
f[[-3, -2, -1]] = f2[0:3]
assert list(f.get_codes()) == [0, 1, 2, 4, 3, 2]
assert f.get_levels() == ["A", "B", "C", "D", "E"]
assert f.get_levels().get_data() == ["A", "B", "C", "D", "E"]

f = Factor([0, 1, 2, 3, 2, 1], levels=["A", "B", "C", "D", "E"])
f2 = Factor([0, 1, 2, 3, 2, 1], levels=["e", "d", "c", "b", "a"])
f[:] = f2[:]
assert list(f.get_codes()) == [-1] * 6
assert f.get_levels() == ["A", "B", "C", "D", "E"]
assert f.get_levels().get_data() == ["A", "B", "C", "D", "E"]


def test_Factor_drop_unused_levels():
f = Factor([0, 1, 2, 0, 2, 4], levels=["A", "B", "C", "D", "E"])
f2 = f.drop_unused_levels()
assert f2.get_levels() == ["A", "B", "C", "E"]
assert f2.get_levels().get_data() == ["A", "B", "C", "E"]
assert list(f2) == list(f)

f = Factor([3, 4, 2, 3, 2, 4], levels=["A", "B", "C", "D", "E"])
f2 = f.drop_unused_levels(in_place=True)
assert f2.get_levels() == ["C", "D", "E"]
assert f2.get_levels().get_data() == ["C", "D", "E"]
assert list(f2) == ["D", "E", "C", "D", "C", "E"]


def test_Factor_set_levels():
f = Factor([0, 1, 2, 0, 2, 4], levels=["A", "B", "C", "D", "E"])
f2 = f.set_levels(["E", "D", "C", "B", "A"])
assert f2.get_levels() == ["E", "D", "C", "B", "A"]
assert f2.get_levels().get_data() == ["E", "D", "C", "B", "A"]
assert list(f2.get_codes()) == [4, 3, 2, 4, 2, 0]
assert list(f2) == list(f)

f = Factor([0, 1, 2, 0, 2, 4], levels=["A", "B", "C", "D", "E"])
f2 = f.set_levels(["E", "C", "A"], in_place=True)
assert f2.get_levels() == ["E", "C", "A"]
assert f2.get_levels().get_data() == ["E", "C", "A"]
assert list(f2.get_codes()) == [2, -1, 1, 2, 1, 0]

f = Factor([0, 1, 2, 0, 2, 4], levels=["A", "B", "C", "D", "E"])
f2 = f.set_levels("E") # reorders
assert f2.get_levels() == ["E", "A", "B", "C", "D"]
assert f2.get_levels().get_data() == ["E", "A", "B", "C", "D"]
assert list(f2.get_codes()) == [1, 2, 3, 1, 3, 0]

with pytest.raises(ValueError) as ex:
Expand Down Expand Up @@ -164,7 +164,7 @@ def test_Factor_combine():
f1 = Factor([0, 2, 4, 2, 0], levels=["A", "B", "C", "D", "E"])
f2 = Factor([1, 3, 1], levels=["D", "E", "F", "G"])
out = combine(f1, f2)
assert out.get_levels() == ["A", "B", "C", "D", "E", "F", "G"]
assert out.get_levels().get_data() == ["A", "B", "C", "D", "E", "F", "G"]
assert list(out.get_codes()) == [0, 2, 4, 2, 0, 4, 6, 4]

f2 = Factor([1, 3, None], levels=["D", "E", "F", "G"])
Expand Down

0 comments on commit 9d6aac2

Please sign in to comment.