Introduce the NormalizedSubscript class for normalized subscripts.

This just returns the subscript directly when passed to normalize_subscript, allowing us to signal to child classes that no further normalization is required to avoid unnecessary operations.
BiocPy · Nov 14, 2023 · 86fa4cb · 86fa4cb
1 parent 1b37da2
commit 86fa4cb
Show file tree

Hide file tree

Showing 3 changed files with 62 additions and 14 deletions.
diff --git a/src/biocutils/Factor.py b/src/biocutils/Factor.py
@@ -7,7 +7,7 @@
 from .Names import Names, _name_to_position, _sanitize_names, _combine_names
 from .match import match
 from .factorize import factorize
-from .normalize_subscript import normalize_subscript, SubscriptTypes
+from .normalize_subscript import normalize_subscript, SubscriptTypes, NormalizedSubscript
 from .is_missing_scalar import is_missing_scalar
 from .print_truncated import print_truncated_list
 
@@ -302,7 +302,7 @@ def __getitem__(self, index: SubscriptTypes) -> Union[str, "Factor"]:
         if scalar:
             return self.get_value(index[0])
         else:
-            return self.get_slice(index)
+            return self.get_slice(NormalizedSubscript(index))
 
     def set_value(self, index: Union[str, int], value: Union[str, None], in_place: bool = False) -> "Factor":
         """
@@ -404,7 +404,7 @@ def __setitem__(self, index: SubscriptTypes, value: Union[str, "Factor"]):
         if scalar:
             self.set_value(index, value, in_place=True)
         else:
-            self.set_slice(index, value, in_place=True)
+            self.set_slice(NormalizedSubscript(index), value, in_place=True)
 
     #################################
     #####>>>> Level setting <<<<#####
@@ -598,12 +598,12 @@ def from_sequence(x: Sequence[str], levels: Optional[Sequence[str]] = None, sort
 
 @subset_sequence.register
 def _subset_sequence_Factor(x: Factor, indices: Sequence[int]) -> Factor:
-    return x.get_slice(indices)
+    return x.get_slice(NormalizedSubscript(indices))
 
 
 @assign_sequence.register
 def _assign_sequence_Factor(x: Factor, indices: Sequence[int], other: Factor) -> Factor:
-    return x.set_slice(indices, other)
+    return x.set_slice(NormalizedSubscript(indices), other)
 
 
 @combine_sequences.register(Factor)

diff --git a/src/biocutils/NamedList.py b/src/biocutils/NamedList.py
@@ -2,7 +2,7 @@
 from copy import deepcopy
 
 from .Names import Names, _name_to_position, _sanitize_names
-from .normalize_subscript import normalize_subscript, SubscriptTypes
+from .normalize_subscript import normalize_subscript, SubscriptTypes, NormalizedSubscript
 from .subset_sequence import subset_sequence
 from .combine_sequences import combine_sequences
 from .assign_sequence import assign_sequence
@@ -194,7 +194,7 @@ def __getitem__(self, index: SubscriptTypes) -> Union["NamedList", Any]:
         if scalar:
             return self.get_value(index[0])
         else:
-            return self.get_slice(index)
+            return self.get_slice(NormalizedSubscript(index))
 
     def set_value(self, index: Union[str, int], value: Any, in_place: bool = False) -> "NamedList":
         """
@@ -300,7 +300,7 @@ def __setitem__(self, index: SubscriptTypes, value: Any):
             if scalar:
                 self.set_value(index[0], value, in_place=True)
             else:
-                self.set_slice(index, value, in_place=True)
+                self.set_slice(NormalizedSubscript(index), value, in_place=True)
 
     def _define_output(self, in_place: bool) -> "NamedList":
         if in_place:
@@ -451,7 +451,7 @@ def from_dict(x: dict) -> "NamedList":
 
 @subset_sequence.register
 def _subset_sequence_NamedList(x: NamedList, indices: Sequence[int]) -> NamedList:
-    return x.get_slice(indices)
+    return x.get_slice(NormalizedSubscript(indices))
 
 
 @combine_sequences.register
@@ -472,4 +472,4 @@ def _assign_sequence_NamedList(x: NamedList, indices: Sequence[int], other: Sequ
         # of names, and it would be weird for the same sequence of names to 
         # suddently become an invalid indexing vector after an assignment.
         other = other._data
-    return type(x)(assign_sequence(x._data, indices, other), names=x._names)
+    return type(x)(assign_sequence(x._data, NormalizedSubscript(indices), other), names=x._names)
diff --git a/src/biocutils/normalize_subscript.py b/src/biocutils/normalize_subscript.py
@@ -1,12 +1,9 @@
-from typing import Optional, Sequence, Tuple, Union
+from typing import Optional, Sequence, Tuple, Union, Any
 import numpy
 
 from .Names import Names
 
 
-SubscriptTypes = Union[slice, range, Sequence, int, str, bool]
-
-
 def _raise_int(idx: int, length):
     raise IndexError("subscript (" + str(idx) + ") out of range for vector-like object of length " + str(length))
 
@@ -15,6 +12,52 @@ def _is_scalar_bool(sub):
     return isinstance(sub, bool) or isinstance(sub, numpy.bool_)
 
 
+class NormalizedSubscript:
+    """
+    Subscript normalized by :py:func:`~normalize_subscript`. This 
+    is used to indicate that no further normalization is required,
+    such that :py:func:`~normalize_subscript` is just a no-op.
+    """
+    def __init__(self, subscript: Sequence[int]):
+        """
+        Args:
+            subscript:
+                Sequence of integers for a normalized subscript.
+        """
+        self._subscript = subscript
+
+    @property
+    def subscript(self) -> Sequence[int]:
+        """
+        Returns:
+            The subscript, as a sequence of integer positions.
+        """
+        return self._subscript
+
+    def __getitem__(self, index: Any) -> Any:
+        """
+        Args:
+            index: 
+                Any argument accepted by the ``__getitem__`` method of the
+                :py:attr:`~subscript`.
+
+        Returns:
+            The same return value as the ``__getitem__`` method of the
+            subscript. This should be an integer if ``index`` is an integer.
+        """
+        return self._subscript[index]
+
+    def __len__(self) -> int:
+        """
+        Returns:
+            Length of the subscript.
+        """
+        return len(self._subscript)
+
+
+SubscriptTypes = Union[slice, range, Sequence, int, str, bool, NormalizedSubscript]
+
+
 def normalize_subscript(
     sub: SubscriptTypes,
     length: int,
@@ -44,6 +87,8 @@ def normalize_subscript(
               as described above. Integers should be indices to an element.
               Each truthy boolean is converted to an index equal to its
               position in ``sub``, and each Falsey boolean is ignored.
+            - A :py:class:`~NormalizedSubscript`, in which case the
+              ``subscript`` property is directly returned.
 
         length:
             Length of the object.
@@ -62,6 +107,9 @@ def normalize_subscript(
         specifying the subscript elements, and (ii) a boolean indicating whether
         ``sub`` was a scalar.
     """
+    if isinstance(sub, NormalizedSubscript):
+        return sub.subscript, False
+
     if _is_scalar_bool(sub): # before ints, as bools are ints.
         if sub:
             return [0], True