Improved support for Names in normalize_subscript's names=.

This makes it work for scalars, and also adds proper error checks when the name isn't found. Also updated all tests to check this case.
BiocPy · Nov 14, 2023 · fe03a7b · fe03a7b
1 parent af70930
commit fe03a7b
Show file tree

Hide file tree

Showing 2 changed files with 37 additions and 33 deletions.
diff --git a/src/biocutils/normalize_subscript.py b/src/biocutils/normalize_subscript.py
@@ -4,6 +4,9 @@
 from .Names import Names
 
 
+SubscriptTypes = Union[slice, range, Sequence, int, str, bool]
+
+
 def _raise_int(idx: int, length):
     raise IndexError("subscript (" + str(idx) + ") out of range for vector-like object of length " + str(length))
 
@@ -13,7 +16,7 @@ def _is_scalar_bool(sub):
 
 
 def normalize_subscript(
-    sub: Union[slice, range, Sequence, int, str, bool],
+    sub: SubscriptTypes,
     length: int,
     names: Optional[Sequence[str]] = None,
     non_negative_only: bool = True,
@@ -46,7 +49,8 @@ def normalize_subscript(
 
         names:
             List of names for each entry in the object. If not None, this
-            should have length equal to ``length``.
+            should have length equal to ``length``. Some optimizations
+            are possible if this is a :py:class:`~Names.Names` object.
 
         non_negative_only:
             Whether negative indices must be converted into non-negative
@@ -73,7 +77,17 @@ def normalize_subscript(
     if isinstance(sub, str):
         if names is None:
             raise IndexError("failed to find subscript '" + sub + "' for vector-like object with no names")
-        return [names.index(sub)], True
+        i = -1
+        if isinstance(names, Names):
+            i = names.map(sub)
+        else:
+            for j, n in enumerate(names):
+                if n == sub:
+                    i = j
+                    break
+        if i < 0:
+            raise IndexError("cannot find subscript '" + sub + "' in the names") 
+        return [i], True
 
     if isinstance(sub, slice):
         return range(*sub.indices(length)), False
@@ -126,7 +140,10 @@ def normalize_subscript(
     for i, x in enumerate(sub):
         if isinstance(x, str):
             if are_names_indexed:
-                output.append(names.map(x))
+                i = names.map(x)
+                if i < 0:
+                    raise IndexError("cannot find subscript '" + x + "' in the names") 
+                output.append(i)
             else:
                 has_strings.add(x)
                 string_positions.append(len(output))

diff --git a/tests/test_normalize_subscript.py b/tests/test_normalize_subscript.py
@@ -9,10 +9,8 @@ def test_normalize_subscript_scalars():
     assert normalize_subscript(True, 100) == ([0], True)
     assert normalize_subscript(False, 100) == ([], False)
     assert normalize_subscript("C", 5, ["A", "B", "C", "D", "E"]) == ([2], True)
-    assert normalize_subscript("B", 5, ["A", "B", "C", "B", "E"]) == (
-        [1],
-        True,
-    )  # takes first occurence.
+    assert normalize_subscript("B", 5, ["A", "B", "C", "B", "E"]) == ([1], True,)  # takes first occurence.
+    assert normalize_subscript("B", 5, Names(["A", "B", "C", "B", "E"])) == ([1], True,)  # takes first occurence.
 
     with pytest.raises(IndexError) as ex:
         normalize_subscript(100, 10)
@@ -26,8 +24,13 @@ def test_normalize_subscript_scalars():
         normalize_subscript("foor", 10)
     assert str(ex.value).find("subscript 'foor'") >= 0
 
-    with pytest.raises(ValueError) as ex:
+    with pytest.raises(IndexError) as ex:
         normalize_subscript("F", 5, ["A", "B", "C", "D", "E"])
+    assert str(ex.value).find("subscript 'F'") >= 0
+
+    with pytest.raises(IndexError) as ex:
+        normalize_subscript("F", 5, Names(["A", "B", "C", "D", "E"]))
+    assert str(ex.value).find("subscript 'F'") >= 0
 
 
 def test_normalize_subscript_slice():
@@ -76,42 +79,26 @@ def test_normalize_subscript_chaos():
         normalize_subscript([0, 2, 50, 6, 8], 50)
     assert str(ex.value).find("subscript (50)") >= 0
 
-    assert normalize_subscript([0, -1, 2, -3, 4, -5, 6, -7, 8], 50) == (
-        [0, 49, 2, 47, 4, 45, 6, 43, 8],
-        False,
-    )
+    assert normalize_subscript([0, -1, 2, -3, 4, -5, 6, -7, 8], 50) == ([0, 49, 2, 47, 4, 45, 6, 43, 8], False)
 
     with pytest.raises(IndexError) as ex:
         normalize_subscript([0, 2, -51, 6, 8], 50)
     assert str(ex.value).find("subscript (-51)") >= 0
 
-    assert normalize_subscript([False, 10, True, 20, False, 30, True], 50) == (
-        [10, 2, 20, 30, 6],
-        False,
-    )
+    assert normalize_subscript([False, 10, True, 20, False, 30, True], 50) == ([10, 2, 20, 30, 6], False)
 
     names = ["A", "B", "C", "D", "E", "F"]
-    assert normalize_subscript(["B", 1, "D", 2, "F", 3, "A"], 6, names) == (
-        [1, 1, 3, 2, 5, 3, 0],
-        False,
-    )
-    assert normalize_subscript(["B", 1, "D", 2, "F", 3, "A"], 6, Names(names)) == (
-        [1, 1, 3, 2, 5, 3, 0],
-        False,
-    )
-    assert normalize_subscript(
-        ["B", 1, "A", 2, "B", 3, "A"], 6, ["A", "B", "A", "B", "A", "B"]
-    ) == (
-        [1, 1, 0, 2, 1, 3, 0],
-        False,
-    )  # Takes the first occurence.
+    assert normalize_subscript(["B", 1, "D", 2, "F", 3, "A"], 6, names) == ([1, 1, 3, 2, 5, 3, 0], False)
+    assert normalize_subscript(["B", 1, "D", 2, "F", 3, "A"], 6, Names(names)) == ([1, 1, 3, 2, 5, 3, 0], False)
+    assert normalize_subscript(["B", 1, "A", 2, "B", 3, "A"], 6, ["A", "B", "A", "B", "A", "B"]) == ([1, 1, 0, 2, 1, 3, 0], False)  # Takes the first occurence.
 
     with pytest.raises(KeyError) as ex:
         normalize_subscript(["B", 1, "D", 2, "G", 3, "A"], 6, names)
+    assert str(ex.value).find("'G'") >= 0
 
     with pytest.raises(IndexError) as ex:
-        normalize_subscript(["B", 1, "D", 2, "F", 3, "A"], 6)
-    assert str(ex.value).find("vector-like object with no names") >= 0
+        normalize_subscript(["B", 1, "D", 2, "G", 3, "A"], 6, Names(names))
+    assert str(ex.value).find("subscript 'G'") >= 0
 
 
 def test_normalize_subscript_numpy():