[pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
BiocPy · Nov 7, 2023 · b0075b2 · b0075b2
1 parent 0f86142
commit b0075b2
Show file tree

Hide file tree

Showing 5 changed files with 106 additions and 40 deletions.
diff --git a/src/biocutils/Factor.py b/src/biocutils/Factor.py
@@ -22,12 +22,17 @@ def _check_levels_type(levels: numpy.ndarray):
 class Factor:
     """Factor class, equivalent to R's ``factor``.
 
-    This is a vector of integer codes, each of which is an index into a list of
-    unique strings. The aim is to encode a list of strings as integers for
-    easier numerical analysis.
+    This is a vector of integer codes, each of which is an index into a list of unique strings. The aim is to encode a
+    list of strings as integers for easier numerical analysis.
     """
 
-    def __init__(self, codes: Sequence[int], levels: Sequence[str], ordered: bool = False, validate: bool = True):
+    def __init__(
+        self,
+        codes: Sequence[int],
+        levels: Sequence[str],
+        ordered: bool = False,
+        validate: bool = True,
+    ):
         """Initialize a Factor object.
 
         Args:
@@ -46,7 +51,9 @@ def __init__(self, codes: Sequence[int], levels: Sequence[str], ordered: bool =
                 Whether to validate the arguments. Internal use only.
         """
         if not isinstance(codes, numpy.ndarray):
-            replacement = numpy.ndarray(len(codes), dtype=numpy.min_scalar_type(-len(levels))) # get a signed type.
+            replacement = numpy.ndarray(
+                len(codes), dtype=numpy.min_scalar_type(-len(levels))
+            )  # get a signed type.
             for i, x in enumerate(codes):
                 if is_missing_scalar(x) or x < 0:
                     replacement[i] = -1
@@ -71,7 +78,9 @@ def __init__(self, codes: Sequence[int], levels: Sequence[str], ordered: bool =
 
             for x in codes:
                 if x >= len(self._levels):
-                    raise ValueError("all entries of 'codes' should refer to an entry of 'levels'")
+                    raise ValueError(
+                        "all entries of 'codes' should refer to an entry of 'levels'"
+                    )
 
             if len(set(self._levels)) < len(self._levels):
                 raise ValueError("all entries of 'levels' should be unique")
@@ -126,7 +135,12 @@ def __repr__(self) -> str:
         Returns:
             A stringified representation of this object.
         """
-        tmp = "Factor(codes=" + print_truncated_list(self._codes) + ", levels=" + print_truncated_list(self._levels)
+        tmp = (
+            "Factor(codes="
+            + print_truncated_list(self._codes)
+            + ", levels="
+            + print_truncated_list(self._levels)
+        )
         if self._ordered:
             tmp += ", ordered=True"
         tmp += ")"
@@ -137,12 +151,26 @@ def __str__(self) -> str:
         Returns:
             A pretty-printed representation of this object.
         """
-        message = "Factor of length " + str(len(self._codes)) + " with " + str(len(self._levels)) + " level"
+        message = (
+            "Factor of length "
+            + str(len(self._codes))
+            + " with "
+            + str(len(self._levels))
+            + " level"
+        )
         if len(self._levels) != 0:
             message += "s"
         message += "\n"
-        message += "values: " + print_truncated_list(self._codes, transform=lambda i: self._levels[i]) + "\n"
-        message += "levels: " + print_truncated_list(self._levels, transform=lambda x: x) + "\n"
+        message += (
+            "values: "
+            + print_truncated_list(self._codes, transform=lambda i: self._levels[i])
+            + "\n"
+        )
+        message += (
+            "levels: "
+            + print_truncated_list(self._levels, transform=lambda x: x)
+            + "\n"
+        )
         message += "ordered: " + str(self._ordered)
         return message
 
@@ -169,24 +197,23 @@ def __getitem__(self, sub: Union[int, bool, Sequence]) -> Union[str, "Factor"]:
             if x >= 0:
                 return self._levels[x]
             else:
-                return None 
+                return None
         return type(self)(self._codes[sub], self._levels, self._ordered, validate=False)
 
-    def replace(self, sub: Sequence, value: Union[str, "Factor"], in_place: bool = False):
-        """
-        Replace items in the ``Factor`` list.  The ``subs`` elements in the
-        current object are replaced with the corresponding values in ``value``.
-        This is performed by finding the level for each entry of the
-        replacement ``value``, matching it to a level in the current object,
-        and replacing the entry of ``codes`` with the code of the matched
-        level. If there is no matching level, a missing value is inserted.
+    def replace(
+        self, sub: Sequence, value: Union[str, "Factor"], in_place: bool = False
+    ):
+        """Replace items in the ``Factor`` list.  The ``subs`` elements in the current object are replaced with the
+        corresponding values in ``value``. This is performed by finding the level for each entry of the replacement
+        ``value``, matching it to a level in the current object, and replacing the entry of ``codes`` with the code of
+        the matched level. If there is no matching level, a missing value is inserted.
 
         Args:
-            sub: 
+            sub:
                 Sequence of integers or booleans specifying the items to be
                 replaced.
 
-            value: 
+            value:
                 If ``sub`` is a sequence, a ``Factor`` of the same length
                 containing the replacement values.
 
@@ -206,7 +233,10 @@ def replace(self, sub: Sequence, value: Union[str, "Factor"], in_place: bool = F
         if not in_place:
             codes = codes.copy()
 
-        if len(self._levels) == len(value._levels) and (self._levels == value._levels).all():
+        if (
+            len(self._levels) == len(value._levels)
+            and (self._levels == value._levels).all()
+        ):
             for i, x in enumerate(sub):
                 codes[x] = value._codes[i]
         else:
@@ -269,9 +299,13 @@ def drop_unused_levels(self, in_place: bool = False) -> "Factor":
             return self
         else:
             current_class_const = type(self)
-            return current_class_const(new_codes, new_levels, self._ordered, validate=False)
+            return current_class_const(
+                new_codes, new_levels, self._ordered, validate=False
+            )
 
-    def set_levels(self, levels: Union[str, List[str]], in_place: bool = False) -> "Factor":
+    def set_levels(
+        self, levels: Union[str, List[str]], in_place: bool = False
+    ) -> "Factor":
         """Set or replace levels.
 
         Args:
@@ -339,12 +373,17 @@ def set_levels(self, levels: Union[str, List[str]], in_place: bool = False) -> "
             return self
         else:
             current_class_const = type(self)
-            return current_class_const(new_codes, new_levels, self._ordered, validate=False)
+            return current_class_const(
+                new_codes, new_levels, self._ordered, validate=False
+            )
 
     @levels.setter
     def levels(self, levels: Union[str, List[str]]):
         """See :py:attr:`~set_levels`."""
-        warn("Setting property 'levels'is an in-place operation, use 'set_levels' instead", UserWarning)
+        warn(
+            "Setting property 'levels'is an in-place operation, use 'set_levels' instead",
+            UserWarning,
+        )
         self.set_levels(levels, in_place=True)
 
     def __copy__(self) -> "Factor":
@@ -353,7 +392,9 @@ def __copy__(self) -> "Factor":
             A shallow copy of the ``Factor`` object.
         """
         current_class_const = type(self)
-        return current_class_const(self._codes, self._levels, self._ordered, validate=False)
+        return current_class_const(
+            self._codes, self._levels, self._ordered, validate=False
+        )
 
     def __deepcopy__(self, memo) -> "Factor":
         """
@@ -375,17 +416,23 @@ def to_pandas(self):
             Categorical: A :py:class:`~pandas.Categorical` object.
         """
         from pandas import Categorical
+
         return Categorical(
             values=[self._levels[c] for c in self._codes],
             ordered=self._ordered,
         )
 
     @staticmethod
-    def from_sequence(x: Sequence[str], levels: Optional[Sequence[str]] = None, sort_levels: bool = True, ordered: bool = False) -> "Factor":
+    def from_sequence(
+        x: Sequence[str],
+        levels: Optional[Sequence[str]] = None,
+        sort_levels: bool = True,
+        ordered: bool = False,
+    ) -> "Factor":
         """Convert a sequence of hashable values into a factor.
 
         Args:
-            x: 
+            x:
                 A sequence of strings. Any value may be None to indicate
                 missingness.
 

diff --git a/src/biocutils/factorize.py b/src/biocutils/factorize.py
@@ -5,11 +5,13 @@
 from .is_missing_scalar import is_missing_scalar
 
 
-def factorize(x: Sequence, levels: Optional[Sequence] = None, sort_levels: bool = False) -> Tuple[list, numpy.ndarray]:
+def factorize(
+    x: Sequence, levels: Optional[Sequence] = None, sort_levels: bool = False
+) -> Tuple[list, numpy.ndarray]:
     """Convert a sequence of hashable values into a factor.
 
     Args:
-        x: 
+        x:
             A sequence of hashable values.
             Any value may be None to indicate missingness.
 

diff --git a/src/biocutils/match.py b/src/biocutils/match.py
@@ -1,10 +1,14 @@
-from typing import List, Sequence, Union
+from typing import Sequence, Union
 import numpy
 
 from .map_to_index import DUPLICATE_METHOD, map_to_index
 
 
-def match(x: Sequence, targets: Union[dict, Sequence], duplicate_method: DUPLICATE_METHOD = "first") -> numpy.ndarray:
+def match(
+    x: Sequence,
+    targets: Union[dict, Sequence],
+    duplicate_method: DUPLICATE_METHOD = "first",
+) -> numpy.ndarray:
     """Find a matching value of each element of ``x`` in ``target``.
 
     Args:
@@ -23,7 +27,9 @@ def match(x: Sequence, targets: Union[dict, Sequence], duplicate_method: DUPLICA
     if not isinstance(targets, dict):
         targets = map_to_index(targets, duplicate_method=duplicate_method)
 
-    indices = numpy.zeros(len(x), dtype=numpy.min_scalar_type(-len(targets))) # get a signed type
+    indices = numpy.zeros(
+        len(x), dtype=numpy.min_scalar_type(-len(targets))
+    )  # get a signed type
     for i, y in enumerate(x):
         if y not in targets:
             indices[i] = -1

diff --git a/src/biocutils/normalize_subscript.py b/src/biocutils/normalize_subscript.py
@@ -3,10 +3,15 @@
 
 
 def _raise_int(idx: int, length):
-    raise IndexError("subscript (" + str(idx) + ") out of range for vector-like object of length " + str(length))
+    raise IndexError(
+        "subscript ("
+        + str(idx)
+        + ") out of range for vector-like object of length "
+        + str(length)
+    )
 
 
-def _is_scalar_bool(sub): 
+def _is_scalar_bool(sub):
     return isinstance(sub, bool) or isinstance(sub, numpy.bool_)
 
 
@@ -55,7 +60,7 @@ def normalize_subscript(
         specifying the subscript elements, and (ii) a boolean indicating whether
         ``sub`` was a scalar.
     """
-    if _is_scalar_bool(sub): # before ints, as bools are ints.
+    if _is_scalar_bool(sub):  # before ints, as bools are ints.
         if sub:
             return [0], True
         else:
@@ -70,7 +75,11 @@ def normalize_subscript(
 
     if isinstance(sub, str):
         if names is None:
-            raise IndexError("failed to find subscript '" + sub + "' for vector-like object with no names")
+            raise IndexError(
+                "failed to find subscript '"
+                + sub
+                + "' for vector-like object with no names"
+            )
         return [names.index(sub)], True
 
     if isinstance(sub, slice):
@@ -138,7 +147,9 @@ def normalize_subscript(
 
     if len(has_strings):
         if names is None:
-            raise IndexError("cannot find string subscripts for vector-like object with no names")
+            raise IndexError(
+                "cannot find string subscripts for vector-like object with no names"
+            )
 
         mapping = {}
         for i, y in enumerate(names):

diff --git a/tests/test_Factor.py b/tests/test_Factor.py
@@ -148,7 +148,7 @@ def test_Factor_copy():
     assert (f.get_levels() == out.get_levels()).all()
 
 
-#def test_Factor_combine():
+# def test_Factor_combine():
 #    # Same levels.
 #    f1 = Factor([0, 2, 4, 2, 0], levels=["A", "B", "C", "D", "E"])
 #    f2 = Factor([1, 3, 1], levels=["A", "B", "C", "D", "E"])