Skip to content

Commit

Permalink
Register subset_sequence, combine_sequences methods for ranges.
Browse files Browse the repository at this point in the history
This allows downstream code to preserve ranges for as long as possible,
to avoid expansion into a full list (and associated memory consumption).
  • Loading branch information
LTLA committed Nov 8, 2023
1 parent 3c9b64f commit 205e286
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 0 deletions.
34 changes: 34 additions & 0 deletions src/biocutils/combine_sequences.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,40 @@ def _combine_sequences_dense_arrays(*x: numpy.ndarray):
return numpy.concatenate(x, axis=None)


@combine_sequences.register
def _combine_sequences_ranges(*x: range):
for current in x:
if not isinstance(current, range):
return list(chain(*x))

found = None
for i, current in enumerate(x):
if len(current) != 0:
found = i
start = current.start
step = current.step
stop = current.stop
last = current[-1]
break

if found is None:
return x[0]

failed = False
for i in range(found + 1, len(x)):
current = x[i]
if len(current) != 0:
if current[0] != last + step or (len(current) > 1 and step != current.step):
failed = True
break
last = current[-1]
stop = current.stop

if not failed:
return range(start, stop, step)
return list(chain(*x))


if is_package_installed("pandas") is True:
from pandas import Series, concat

Expand Down
8 changes: 8 additions & 0 deletions src/biocutils/subset_sequence.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,11 @@ def subset_sequence(x: Any, indices: Sequence[int]) -> Any:
@subset_sequence.register
def _subset_sequence_list(x: list, indices: Sequence) -> list:
return type(x)(x[i] for i in indices)


@subset_sequence.register
def _subset_sequence_range(x: range, indices: Sequence) -> Union[list, range]:
if isinstance(indices, range):
return x[slice(indices.start, indices.stop, indices.step)]
else:
return [x[i] for i in indices]
19 changes: 19 additions & 0 deletions tests/test_combine_sequences.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,22 @@ def test_pandas_series():
z = combine_sequences(s1, x)
assert isinstance(z, pd.Series)
assert len(z) == 4


def test_ranges():
assert combine_sequences(range(0, 10), range(10, 54)) == range(0, 54)
assert combine_sequences(range(2, 5), range(5, 9), range(9, 20)) == range(2, 20)
assert combine_sequences(range(2, 9, 2), range(10, 54, 2)) == range(2, 54, 2)
assert combine_sequences(range(10, 5, -1), range(5, -1, -1)) == range(10, -1, -1)

# Trigger a fallback.
assert combine_sequences(range(0, 10), [10, 11, 12, 13]) == list(range(0, 14))
assert combine_sequences(range(0, 10), range(20, 54)) == list(range(0, 10)) + list(range(20, 54))

# Empty ranges are handled correctly.
assert combine_sequences(range(10, 10), range(50, 50), range(20, 20)) == range(10, 10)
assert combine_sequences(range(0, 10), range(50, 50), range(10, 20)) == range(0, 20)

# Different steps trigger a fallback, unless it's of length 1.
assert combine_sequences(range(0, 10), range(10, 50, 2)) == list(range(0, 10)) + list(range(10, 50, 2))
assert combine_sequences(range(0, 10), range(10, 11, 5), range(11, 19)) == range(0, 19)
6 changes: 6 additions & 0 deletions tests/test_subset_sequence.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,9 @@ def test_subset_numpy():

y = np.random.rand(10, 20)
assert (subset_sequence(y, range(5)) == y[0:5, :]).all()


def test_subset_range():
x = range(10, 20)
assert subset_sequence(x, range(2, 8, 2)) == range(12, 18, 2)
assert subset_sequence(x, [0, 1, 5, 9]) == [10, 11, 15, 19]

0 comments on commit 205e286

Please sign in to comment.