Skip to content

Commit

Permalink
Improved support for Names in normalize_subscript's names=.
Browse files Browse the repository at this point in the history
This makes it work for scalars, and also adds proper error checks when
the name isn't found. Also updated all tests to check this case.
  • Loading branch information
LTLA committed Nov 14, 2023
1 parent af70930 commit fe03a7b
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 33 deletions.
25 changes: 21 additions & 4 deletions src/biocutils/normalize_subscript.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
from .Names import Names


SubscriptTypes = Union[slice, range, Sequence, int, str, bool]


def _raise_int(idx: int, length):
raise IndexError("subscript (" + str(idx) + ") out of range for vector-like object of length " + str(length))

Expand All @@ -13,7 +16,7 @@ def _is_scalar_bool(sub):


def normalize_subscript(
sub: Union[slice, range, Sequence, int, str, bool],
sub: SubscriptTypes,
length: int,
names: Optional[Sequence[str]] = None,
non_negative_only: bool = True,
Expand Down Expand Up @@ -46,7 +49,8 @@ def normalize_subscript(
names:
List of names for each entry in the object. If not None, this
should have length equal to ``length``.
should have length equal to ``length``. Some optimizations
are possible if this is a :py:class:`~Names.Names` object.
non_negative_only:
Whether negative indices must be converted into non-negative
Expand All @@ -73,7 +77,17 @@ def normalize_subscript(
if isinstance(sub, str):
if names is None:
raise IndexError("failed to find subscript '" + sub + "' for vector-like object with no names")
return [names.index(sub)], True
i = -1
if isinstance(names, Names):
i = names.map(sub)
else:
for j, n in enumerate(names):
if n == sub:
i = j
break
if i < 0:
raise IndexError("cannot find subscript '" + sub + "' in the names")
return [i], True

if isinstance(sub, slice):
return range(*sub.indices(length)), False
Expand Down Expand Up @@ -126,7 +140,10 @@ def normalize_subscript(
for i, x in enumerate(sub):
if isinstance(x, str):
if are_names_indexed:
output.append(names.map(x))
i = names.map(x)
if i < 0:
raise IndexError("cannot find subscript '" + x + "' in the names")
output.append(i)
else:
has_strings.add(x)
string_positions.append(len(output))
Expand Down
45 changes: 16 additions & 29 deletions tests/test_normalize_subscript.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,8 @@ def test_normalize_subscript_scalars():
assert normalize_subscript(True, 100) == ([0], True)
assert normalize_subscript(False, 100) == ([], False)
assert normalize_subscript("C", 5, ["A", "B", "C", "D", "E"]) == ([2], True)
assert normalize_subscript("B", 5, ["A", "B", "C", "B", "E"]) == (
[1],
True,
) # takes first occurence.
assert normalize_subscript("B", 5, ["A", "B", "C", "B", "E"]) == ([1], True,) # takes first occurence.
assert normalize_subscript("B", 5, Names(["A", "B", "C", "B", "E"])) == ([1], True,) # takes first occurence.

with pytest.raises(IndexError) as ex:
normalize_subscript(100, 10)
Expand All @@ -26,8 +24,13 @@ def test_normalize_subscript_scalars():
normalize_subscript("foor", 10)
assert str(ex.value).find("subscript 'foor'") >= 0

with pytest.raises(ValueError) as ex:
with pytest.raises(IndexError) as ex:
normalize_subscript("F", 5, ["A", "B", "C", "D", "E"])
assert str(ex.value).find("subscript 'F'") >= 0

with pytest.raises(IndexError) as ex:
normalize_subscript("F", 5, Names(["A", "B", "C", "D", "E"]))
assert str(ex.value).find("subscript 'F'") >= 0


def test_normalize_subscript_slice():
Expand Down Expand Up @@ -76,42 +79,26 @@ def test_normalize_subscript_chaos():
normalize_subscript([0, 2, 50, 6, 8], 50)
assert str(ex.value).find("subscript (50)") >= 0

assert normalize_subscript([0, -1, 2, -3, 4, -5, 6, -7, 8], 50) == (
[0, 49, 2, 47, 4, 45, 6, 43, 8],
False,
)
assert normalize_subscript([0, -1, 2, -3, 4, -5, 6, -7, 8], 50) == ([0, 49, 2, 47, 4, 45, 6, 43, 8], False)

with pytest.raises(IndexError) as ex:
normalize_subscript([0, 2, -51, 6, 8], 50)
assert str(ex.value).find("subscript (-51)") >= 0

assert normalize_subscript([False, 10, True, 20, False, 30, True], 50) == (
[10, 2, 20, 30, 6],
False,
)
assert normalize_subscript([False, 10, True, 20, False, 30, True], 50) == ([10, 2, 20, 30, 6], False)

names = ["A", "B", "C", "D", "E", "F"]
assert normalize_subscript(["B", 1, "D", 2, "F", 3, "A"], 6, names) == (
[1, 1, 3, 2, 5, 3, 0],
False,
)
assert normalize_subscript(["B", 1, "D", 2, "F", 3, "A"], 6, Names(names)) == (
[1, 1, 3, 2, 5, 3, 0],
False,
)
assert normalize_subscript(
["B", 1, "A", 2, "B", 3, "A"], 6, ["A", "B", "A", "B", "A", "B"]
) == (
[1, 1, 0, 2, 1, 3, 0],
False,
) # Takes the first occurence.
assert normalize_subscript(["B", 1, "D", 2, "F", 3, "A"], 6, names) == ([1, 1, 3, 2, 5, 3, 0], False)
assert normalize_subscript(["B", 1, "D", 2, "F", 3, "A"], 6, Names(names)) == ([1, 1, 3, 2, 5, 3, 0], False)
assert normalize_subscript(["B", 1, "A", 2, "B", 3, "A"], 6, ["A", "B", "A", "B", "A", "B"]) == ([1, 1, 0, 2, 1, 3, 0], False) # Takes the first occurence.

with pytest.raises(KeyError) as ex:
normalize_subscript(["B", 1, "D", 2, "G", 3, "A"], 6, names)
assert str(ex.value).find("'G'") >= 0

with pytest.raises(IndexError) as ex:
normalize_subscript(["B", 1, "D", 2, "F", 3, "A"], 6)
assert str(ex.value).find("vector-like object with no names") >= 0
normalize_subscript(["B", 1, "D", 2, "G", 3, "A"], 6, Names(names))
assert str(ex.value).find("subscript 'G'") >= 0


def test_normalize_subscript_numpy():
Expand Down

0 comments on commit fe03a7b

Please sign in to comment.