Skip to content

Commit

Permalink
[pre-commit.ci] auto fixes from pre-commit.com hooks
Browse files Browse the repository at this point in the history
for more information, see https://pre-commit.ci
  • Loading branch information
pre-commit-ci[bot] committed Nov 7, 2023
1 parent 0f86142 commit b0075b2
Show file tree
Hide file tree
Showing 5 changed files with 106 additions and 40 deletions.
105 changes: 76 additions & 29 deletions src/biocutils/Factor.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,17 @@ def _check_levels_type(levels: numpy.ndarray):
class Factor:
"""Factor class, equivalent to R's ``factor``.
This is a vector of integer codes, each of which is an index into a list of
unique strings. The aim is to encode a list of strings as integers for
easier numerical analysis.
This is a vector of integer codes, each of which is an index into a list of unique strings. The aim is to encode a
list of strings as integers for easier numerical analysis.
"""

def __init__(self, codes: Sequence[int], levels: Sequence[str], ordered: bool = False, validate: bool = True):
def __init__(
self,
codes: Sequence[int],
levels: Sequence[str],
ordered: bool = False,
validate: bool = True,
):
"""Initialize a Factor object.
Args:
Expand All @@ -46,7 +51,9 @@ def __init__(self, codes: Sequence[int], levels: Sequence[str], ordered: bool =
Whether to validate the arguments. Internal use only.
"""
if not isinstance(codes, numpy.ndarray):
replacement = numpy.ndarray(len(codes), dtype=numpy.min_scalar_type(-len(levels))) # get a signed type.
replacement = numpy.ndarray(
len(codes), dtype=numpy.min_scalar_type(-len(levels))
) # get a signed type.
for i, x in enumerate(codes):
if is_missing_scalar(x) or x < 0:
replacement[i] = -1
Expand All @@ -71,7 +78,9 @@ def __init__(self, codes: Sequence[int], levels: Sequence[str], ordered: bool =

for x in codes:
if x >= len(self._levels):
raise ValueError("all entries of 'codes' should refer to an entry of 'levels'")
raise ValueError(
"all entries of 'codes' should refer to an entry of 'levels'"
)

if len(set(self._levels)) < len(self._levels):
raise ValueError("all entries of 'levels' should be unique")
Expand Down Expand Up @@ -126,7 +135,12 @@ def __repr__(self) -> str:
Returns:
A stringified representation of this object.
"""
tmp = "Factor(codes=" + print_truncated_list(self._codes) + ", levels=" + print_truncated_list(self._levels)
tmp = (
"Factor(codes="
+ print_truncated_list(self._codes)
+ ", levels="
+ print_truncated_list(self._levels)
)
if self._ordered:
tmp += ", ordered=True"
tmp += ")"
Expand All @@ -137,12 +151,26 @@ def __str__(self) -> str:
Returns:
A pretty-printed representation of this object.
"""
message = "Factor of length " + str(len(self._codes)) + " with " + str(len(self._levels)) + " level"
message = (
"Factor of length "
+ str(len(self._codes))
+ " with "
+ str(len(self._levels))
+ " level"
)
if len(self._levels) != 0:
message += "s"
message += "\n"
message += "values: " + print_truncated_list(self._codes, transform=lambda i: self._levels[i]) + "\n"
message += "levels: " + print_truncated_list(self._levels, transform=lambda x: x) + "\n"
message += (
"values: "
+ print_truncated_list(self._codes, transform=lambda i: self._levels[i])
+ "\n"
)
message += (
"levels: "
+ print_truncated_list(self._levels, transform=lambda x: x)
+ "\n"
)
message += "ordered: " + str(self._ordered)
return message

Expand All @@ -169,24 +197,23 @@ def __getitem__(self, sub: Union[int, bool, Sequence]) -> Union[str, "Factor"]:
if x >= 0:
return self._levels[x]
else:
return None
return None
return type(self)(self._codes[sub], self._levels, self._ordered, validate=False)

def replace(self, sub: Sequence, value: Union[str, "Factor"], in_place: bool = False):
"""
Replace items in the ``Factor`` list. The ``subs`` elements in the
current object are replaced with the corresponding values in ``value``.
This is performed by finding the level for each entry of the
replacement ``value``, matching it to a level in the current object,
and replacing the entry of ``codes`` with the code of the matched
level. If there is no matching level, a missing value is inserted.
def replace(
self, sub: Sequence, value: Union[str, "Factor"], in_place: bool = False
):
"""Replace items in the ``Factor`` list. The ``subs`` elements in the current object are replaced with the
corresponding values in ``value``. This is performed by finding the level for each entry of the replacement
``value``, matching it to a level in the current object, and replacing the entry of ``codes`` with the code of
the matched level. If there is no matching level, a missing value is inserted.
Args:
sub:
sub:
Sequence of integers or booleans specifying the items to be
replaced.
value:
value:
If ``sub`` is a sequence, a ``Factor`` of the same length
containing the replacement values.
Expand All @@ -206,7 +233,10 @@ def replace(self, sub: Sequence, value: Union[str, "Factor"], in_place: bool = F
if not in_place:
codes = codes.copy()

if len(self._levels) == len(value._levels) and (self._levels == value._levels).all():
if (
len(self._levels) == len(value._levels)
and (self._levels == value._levels).all()
):
for i, x in enumerate(sub):
codes[x] = value._codes[i]
else:
Expand Down Expand Up @@ -269,9 +299,13 @@ def drop_unused_levels(self, in_place: bool = False) -> "Factor":
return self
else:
current_class_const = type(self)
return current_class_const(new_codes, new_levels, self._ordered, validate=False)
return current_class_const(
new_codes, new_levels, self._ordered, validate=False
)

def set_levels(self, levels: Union[str, List[str]], in_place: bool = False) -> "Factor":
def set_levels(
self, levels: Union[str, List[str]], in_place: bool = False
) -> "Factor":
"""Set or replace levels.
Args:
Expand Down Expand Up @@ -339,12 +373,17 @@ def set_levels(self, levels: Union[str, List[str]], in_place: bool = False) -> "
return self
else:
current_class_const = type(self)
return current_class_const(new_codes, new_levels, self._ordered, validate=False)
return current_class_const(
new_codes, new_levels, self._ordered, validate=False
)

@levels.setter
def levels(self, levels: Union[str, List[str]]):
"""See :py:attr:`~set_levels`."""
warn("Setting property 'levels'is an in-place operation, use 'set_levels' instead", UserWarning)
warn(
"Setting property 'levels'is an in-place operation, use 'set_levels' instead",
UserWarning,
)
self.set_levels(levels, in_place=True)

def __copy__(self) -> "Factor":
Expand All @@ -353,7 +392,9 @@ def __copy__(self) -> "Factor":
A shallow copy of the ``Factor`` object.
"""
current_class_const = type(self)
return current_class_const(self._codes, self._levels, self._ordered, validate=False)
return current_class_const(
self._codes, self._levels, self._ordered, validate=False
)

def __deepcopy__(self, memo) -> "Factor":
"""
Expand All @@ -375,17 +416,23 @@ def to_pandas(self):
Categorical: A :py:class:`~pandas.Categorical` object.
"""
from pandas import Categorical

return Categorical(
values=[self._levels[c] for c in self._codes],
ordered=self._ordered,
)

@staticmethod
def from_sequence(x: Sequence[str], levels: Optional[Sequence[str]] = None, sort_levels: bool = True, ordered: bool = False) -> "Factor":
def from_sequence(
x: Sequence[str],
levels: Optional[Sequence[str]] = None,
sort_levels: bool = True,
ordered: bool = False,
) -> "Factor":
"""Convert a sequence of hashable values into a factor.
Args:
x:
x:
A sequence of strings. Any value may be None to indicate
missingness.
Expand Down
6 changes: 4 additions & 2 deletions src/biocutils/factorize.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@
from .is_missing_scalar import is_missing_scalar


def factorize(x: Sequence, levels: Optional[Sequence] = None, sort_levels: bool = False) -> Tuple[list, numpy.ndarray]:
def factorize(
x: Sequence, levels: Optional[Sequence] = None, sort_levels: bool = False
) -> Tuple[list, numpy.ndarray]:
"""Convert a sequence of hashable values into a factor.
Args:
x:
x:
A sequence of hashable values.
Any value may be None to indicate missingness.
Expand Down
12 changes: 9 additions & 3 deletions src/biocutils/match.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
from typing import List, Sequence, Union
from typing import Sequence, Union
import numpy

from .map_to_index import DUPLICATE_METHOD, map_to_index


def match(x: Sequence, targets: Union[dict, Sequence], duplicate_method: DUPLICATE_METHOD = "first") -> numpy.ndarray:
def match(
x: Sequence,
targets: Union[dict, Sequence],
duplicate_method: DUPLICATE_METHOD = "first",
) -> numpy.ndarray:
"""Find a matching value of each element of ``x`` in ``target``.
Args:
Expand All @@ -23,7 +27,9 @@ def match(x: Sequence, targets: Union[dict, Sequence], duplicate_method: DUPLICA
if not isinstance(targets, dict):
targets = map_to_index(targets, duplicate_method=duplicate_method)

indices = numpy.zeros(len(x), dtype=numpy.min_scalar_type(-len(targets))) # get a signed type
indices = numpy.zeros(
len(x), dtype=numpy.min_scalar_type(-len(targets))
) # get a signed type
for i, y in enumerate(x):
if y not in targets:
indices[i] = -1
Expand Down
21 changes: 16 additions & 5 deletions src/biocutils/normalize_subscript.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,15 @@


def _raise_int(idx: int, length):
raise IndexError("subscript (" + str(idx) + ") out of range for vector-like object of length " + str(length))
raise IndexError(
"subscript ("
+ str(idx)
+ ") out of range for vector-like object of length "
+ str(length)
)


def _is_scalar_bool(sub):
def _is_scalar_bool(sub):
return isinstance(sub, bool) or isinstance(sub, numpy.bool_)


Expand Down Expand Up @@ -55,7 +60,7 @@ def normalize_subscript(
specifying the subscript elements, and (ii) a boolean indicating whether
``sub`` was a scalar.
"""
if _is_scalar_bool(sub): # before ints, as bools are ints.
if _is_scalar_bool(sub): # before ints, as bools are ints.
if sub:
return [0], True
else:
Expand All @@ -70,7 +75,11 @@ def normalize_subscript(

if isinstance(sub, str):
if names is None:
raise IndexError("failed to find subscript '" + sub + "' for vector-like object with no names")
raise IndexError(
"failed to find subscript '"
+ sub
+ "' for vector-like object with no names"
)
return [names.index(sub)], True

if isinstance(sub, slice):
Expand Down Expand Up @@ -138,7 +147,9 @@ def normalize_subscript(

if len(has_strings):
if names is None:
raise IndexError("cannot find string subscripts for vector-like object with no names")
raise IndexError(
"cannot find string subscripts for vector-like object with no names"
)

mapping = {}
for i, y in enumerate(names):
Expand Down
2 changes: 1 addition & 1 deletion tests/test_Factor.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def test_Factor_copy():
assert (f.get_levels() == out.get_levels()).all()


#def test_Factor_combine():
# def test_Factor_combine():
# # Same levels.
# f1 = Factor([0, 2, 4, 2, 0], levels=["A", "B", "C", "D", "E"])
# f2 = Factor([1, 3, 1], levels=["A", "B", "C", "D", "E"])
Expand Down

0 comments on commit b0075b2

Please sign in to comment.