From 5733e1f89f039a507f9070ca8c02baf1d780bf19 Mon Sep 17 00:00:00 2001 From: LTLA Date: Wed, 8 Nov 2023 11:21:13 -0800 Subject: [PATCH] Register subset_sequence, combine_sequence methods for ranges. --- src/biocutils/combine_sequences.py | 34 ++++++++++++++++++++++++++++++ src/biocutils/subset_sequence.py | 8 +++++++ tests/test_combine_sequences.py | 15 +++++++++++++ tests/test_subset_sequence.py | 6 ++++++ 4 files changed, 63 insertions(+) diff --git a/src/biocutils/combine_sequences.py b/src/biocutils/combine_sequences.py index d280f23..51fdd51 100644 --- a/src/biocutils/combine_sequences.py +++ b/src/biocutils/combine_sequences.py @@ -47,6 +47,40 @@ def _combine_sequences_dense_arrays(*x: numpy.ndarray): return numpy.concatenate(x, axis=None) +@combine_sequences.register +def _combine_sequences_ranges(*x: range): + for current in x: + if not isinstance(current, range): + return list(chain(*x)) + + found = None + for i, current in enumerate(x): + if len(current) != 0: + found = i + start = current.start + step = current.step + stop = current.stop + last = current[-1] + break + + if found is None: + return x[0] + + failed = False + for i in range(found + 1, len(x)): + current = x[i] + if len(current) != 0: + if current[0] != last + step or (len(current) > 1 and step != current.step): + failed = True + break + last = current[-1] + stop = current.stop + + if not failed: + return range(start, stop, step) + return list(chain(*x)) + + if is_package_installed("pandas") is True: from pandas import Series, concat diff --git a/src/biocutils/subset_sequence.py b/src/biocutils/subset_sequence.py index 2eb3177..bf730f9 100644 --- a/src/biocutils/subset_sequence.py +++ b/src/biocutils/subset_sequence.py @@ -25,3 +25,11 @@ def subset_sequence(x: Any, indices: Sequence[int]) -> Any: @subset_sequence.register def _subset_sequence_list(x: list, indices: Sequence) -> list: return type(x)(x[i] for i in indices) + + +@subset_sequence.register +def _subset_sequence_range(x: range, indices: Sequence) -> Union[list, range]: + if isinstance(indices, range): + return x[slice(indices.start, indices.stop, indices.step)] + else: + return [x[i] for i in indices] diff --git a/tests/test_combine_sequences.py b/tests/test_combine_sequences.py index 4e15ac5..d22ee52 100644 --- a/tests/test_combine_sequences.py +++ b/tests/test_combine_sequences.py @@ -73,3 +73,18 @@ def test_pandas_series(): z = combine_sequences(s1, x) assert isinstance(z, pd.Series) assert len(z) == 4 + + +def test_ranges(): + assert combine_sequences(range(0, 10), range(10, 54)) == range(0, 54) + assert combine_sequences(range(2, 5), range(5, 9), range(9, 20)) == range(2, 20) + assert combine_sequences(range(2, 9, 2), range(10, 54, 2)) == range(2, 54, 2) + assert combine_sequences(range(10, 5, -1), range(5, -1, -1)) == range(10, -1, -1) + + # Trigger a fallback. + assert combine_sequences(range(0, 10), [10, 11, 12, 13]) == list(range(0, 14)) + assert combine_sequences(range(0, 10), range(20, 54)) == list(range(0, 10)) + list(range(20, 54)) + + # Empty ranges are handled correctly. + assert combine_sequences(range(10, 10), range(50, 50), range(20, 20)) == range(10, 10) + assert combine_sequences(range(0, 10), range(50, 50), range(10, 20)) == range(0, 20) diff --git a/tests/test_subset_sequence.py b/tests/test_subset_sequence.py index bc127af..a28dc91 100644 --- a/tests/test_subset_sequence.py +++ b/tests/test_subset_sequence.py @@ -19,3 +19,9 @@ def test_subset_numpy(): y = np.random.rand(10, 20) assert (subset_sequence(y, range(5)) == y[0:5, :]).all() + + +def test_subset_range(): + x = range(10, 20) + assert subset_sequence(x, range(2, 8, 2)) == range(12, 18, 2) + assert subset_sequence(x, [0, 1, 5, 9]) == [10, 11, 15, 19]