Skip to content

Commit

Permalink
Introduce the NormalizedSubscript class for normalized subscripts.
Browse files Browse the repository at this point in the history
This just returns the subscript directly when passed to
normalize_subscript, allowing us to signal to child classes that no
further normalization is required to avoid unnecessary operations.
  • Loading branch information
LTLA committed Nov 14, 2023
1 parent 1b37da2 commit 86fa4cb
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 14 deletions.
10 changes: 5 additions & 5 deletions src/biocutils/Factor.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from .Names import Names, _name_to_position, _sanitize_names, _combine_names
from .match import match
from .factorize import factorize
from .normalize_subscript import normalize_subscript, SubscriptTypes
from .normalize_subscript import normalize_subscript, SubscriptTypes, NormalizedSubscript
from .is_missing_scalar import is_missing_scalar
from .print_truncated import print_truncated_list

Expand Down Expand Up @@ -302,7 +302,7 @@ def __getitem__(self, index: SubscriptTypes) -> Union[str, "Factor"]:
if scalar:
return self.get_value(index[0])
else:
return self.get_slice(index)
return self.get_slice(NormalizedSubscript(index))

def set_value(self, index: Union[str, int], value: Union[str, None], in_place: bool = False) -> "Factor":
"""
Expand Down Expand Up @@ -404,7 +404,7 @@ def __setitem__(self, index: SubscriptTypes, value: Union[str, "Factor"]):
if scalar:
self.set_value(index, value, in_place=True)
else:
self.set_slice(index, value, in_place=True)
self.set_slice(NormalizedSubscript(index), value, in_place=True)

#################################
#####>>>> Level setting <<<<#####
Expand Down Expand Up @@ -598,12 +598,12 @@ def from_sequence(x: Sequence[str], levels: Optional[Sequence[str]] = None, sort

@subset_sequence.register
def _subset_sequence_Factor(x: Factor, indices: Sequence[int]) -> Factor:
return x.get_slice(indices)
return x.get_slice(NormalizedSubscript(indices))


@assign_sequence.register
def _assign_sequence_Factor(x: Factor, indices: Sequence[int], other: Factor) -> Factor:
return x.set_slice(indices, other)
return x.set_slice(NormalizedSubscript(indices), other)


@combine_sequences.register(Factor)
Expand Down
10 changes: 5 additions & 5 deletions src/biocutils/NamedList.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from copy import deepcopy

from .Names import Names, _name_to_position, _sanitize_names
from .normalize_subscript import normalize_subscript, SubscriptTypes
from .normalize_subscript import normalize_subscript, SubscriptTypes, NormalizedSubscript
from .subset_sequence import subset_sequence
from .combine_sequences import combine_sequences
from .assign_sequence import assign_sequence
Expand Down Expand Up @@ -194,7 +194,7 @@ def __getitem__(self, index: SubscriptTypes) -> Union["NamedList", Any]:
if scalar:
return self.get_value(index[0])
else:
return self.get_slice(index)
return self.get_slice(NormalizedSubscript(index))

def set_value(self, index: Union[str, int], value: Any, in_place: bool = False) -> "NamedList":
"""
Expand Down Expand Up @@ -300,7 +300,7 @@ def __setitem__(self, index: SubscriptTypes, value: Any):
if scalar:
self.set_value(index[0], value, in_place=True)
else:
self.set_slice(index, value, in_place=True)
self.set_slice(NormalizedSubscript(index), value, in_place=True)

def _define_output(self, in_place: bool) -> "NamedList":
if in_place:
Expand Down Expand Up @@ -451,7 +451,7 @@ def from_dict(x: dict) -> "NamedList":

@subset_sequence.register
def _subset_sequence_NamedList(x: NamedList, indices: Sequence[int]) -> NamedList:
return x.get_slice(indices)
return x.get_slice(NormalizedSubscript(indices))


@combine_sequences.register
Expand All @@ -472,4 +472,4 @@ def _assign_sequence_NamedList(x: NamedList, indices: Sequence[int], other: Sequ
# of names, and it would be weird for the same sequence of names to
# suddently become an invalid indexing vector after an assignment.
other = other._data
return type(x)(assign_sequence(x._data, indices, other), names=x._names)
return type(x)(assign_sequence(x._data, NormalizedSubscript(indices), other), names=x._names)
56 changes: 52 additions & 4 deletions src/biocutils/normalize_subscript.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
from typing import Optional, Sequence, Tuple, Union
from typing import Optional, Sequence, Tuple, Union, Any
import numpy

from .Names import Names


SubscriptTypes = Union[slice, range, Sequence, int, str, bool]


def _raise_int(idx: int, length):
raise IndexError("subscript (" + str(idx) + ") out of range for vector-like object of length " + str(length))

Expand All @@ -15,6 +12,52 @@ def _is_scalar_bool(sub):
return isinstance(sub, bool) or isinstance(sub, numpy.bool_)


class NormalizedSubscript:
"""
Subscript normalized by :py:func:`~normalize_subscript`. This
is used to indicate that no further normalization is required,
such that :py:func:`~normalize_subscript` is just a no-op.
"""
def __init__(self, subscript: Sequence[int]):
"""
Args:
subscript:
Sequence of integers for a normalized subscript.
"""
self._subscript = subscript

@property
def subscript(self) -> Sequence[int]:
"""
Returns:
The subscript, as a sequence of integer positions.
"""
return self._subscript

def __getitem__(self, index: Any) -> Any:
"""
Args:
index:
Any argument accepted by the ``__getitem__`` method of the
:py:attr:`~subscript`.
Returns:
The same return value as the ``__getitem__`` method of the
subscript. This should be an integer if ``index`` is an integer.
"""
return self._subscript[index]

def __len__(self) -> int:
"""
Returns:
Length of the subscript.
"""
return len(self._subscript)


SubscriptTypes = Union[slice, range, Sequence, int, str, bool, NormalizedSubscript]


def normalize_subscript(
sub: SubscriptTypes,
length: int,
Expand Down Expand Up @@ -44,6 +87,8 @@ def normalize_subscript(
as described above. Integers should be indices to an element.
Each truthy boolean is converted to an index equal to its
position in ``sub``, and each Falsey boolean is ignored.
- A :py:class:`~NormalizedSubscript`, in which case the
``subscript`` property is directly returned.
length:
Length of the object.
Expand All @@ -62,6 +107,9 @@ def normalize_subscript(
specifying the subscript elements, and (ii) a boolean indicating whether
``sub`` was a scalar.
"""
if isinstance(sub, NormalizedSubscript):
return sub.subscript, False

if _is_scalar_bool(sub): # before ints, as bools are ints.
if sub:
return [0], True
Expand Down

0 comments on commit 86fa4cb

Please sign in to comment.