Skip to content

Commit

Permalink
Subset an empty GenomicRangesList (#56).
Browse files Browse the repository at this point in the history
  • Loading branch information
jkanche authored Dec 30, 2023
1 parent e683fbc commit d5b2f9b
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 36 deletions.
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ python_requires = >=3.8
install_requires =
importlib-metadata; python_version<"3.8"
biocframe>=0.5.3,<0.6.0
iranges[optional]>=0.2.0,<0.3.0
iranges[optional]>=0.2.2,<0.3.0
biocutils>=0.1.3,<0.2.0
numpy

Expand Down
62 changes: 28 additions & 34 deletions src/genomicranges/GenomicRangesList.py
Original file line number Diff line number Diff line change
Expand Up @@ -762,8 +762,7 @@ def __getitem__(
If ``args`` is not a supported slice argument.
Returns:
The genomic element as a ``GenomicRanges`` object
or a new ``GenomicRangesList`` of the slice.
A new ``GenomicRangesList`` of the slice.
"""
if isinstance(args, int):
return self._ranges[args]
Expand All @@ -772,49 +771,44 @@ def __getitem__(
_idx = self.names.map(args)
return self._ranges[_idx]
else:
new_ranges = None
new_range_lengths = None
new_names = None
new_mcols = None
new_metadata = self.metadata

if isinstance(args, tuple):
# TODO: should figure out what to do with the second dimension later.
if len(args) >= 1:
args = args[0]

if isinstance(args, list):
if ut.is_list_of_type(args, bool):
if len(args) != len(self):
idx, _ = ut.normalize_subscript(args, len(self), self._names)

if isinstance(idx, list):
if ut.is_list_of_type(idx, bool):
if len(idx) != len(self):
raise ValueError(
"`indices` is a boolean vector, length should match the size of the data."
)

args = [i for i in range(len(args)) if args[i] is True]
idx = [i for i in range(len(idx)) if idx[i] is True]

new_ranges = [self.ranges[i] for i in args]
new_range_lengths = [self._range_lengths[i] for i in args]
if self.names is not None:
new_names = [self.names[i] for i in args]
new_ranges = [self.ranges[i] for i in idx]
new_range_lengths = [self._range_lengths[i] for i in idx]

if self.mcols is not None:
new_mcols = self.mcols[args, :]
elif isinstance(args, slice):
new_ranges = self.ranges[args]
new_range_lengths = self._range_lengths[args]
new_names = None
if self.names is not None:
new_names = self.names[args]
new_names = [self.names[i] for i in idx]

new_mcols = None
if self.mcols is not None:
new_mcols = self.mcols[args, :]
else:
raise TypeError("Arguments to slice is not a list of supported types.")
new_mcols = self.mcols[idx, :]

return GenomicRangesList(
new_ranges, new_range_lengths, new_names, new_mcols, new_metadata
)
return GenomicRangesList(
new_ranges, new_range_lengths, new_names, new_mcols, self._metadata
)
elif isinstance(idx, (slice, range)):
if isinstance(idx, range):
idx = slice(idx.start, idx.stop, idx.step)

return GenomicRangesList(
self._ranges[idx],
self._range_lengths[idx],
self._names[idx] if self._names is not None else self._names,
self._mcols[idx, :],
self._metadata,
)

raise TypeError("Arguments to slice is not supported.")
raise TypeError("Arguments to subset `GenomicRangesList` is not supported.")

##########################
######>> empty <<#########
Expand Down
10 changes: 10 additions & 0 deletions tests/test_gr_methods_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,16 @@ def test_slices():
assert subset_gr.seqnames == ["chr1", "chr3", "chr3"]


def test_gr_empty_subset():
gre = GenomicRanges.empty()

assert gre is not None
assert isinstance(gre, GenomicRanges)
assert len(gre) == 0

subset = gre[0:10]


def test_export():
df = gr.to_pandas()

Expand Down
36 changes: 35 additions & 1 deletion tests/test_grl_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,37 @@ def test_is_empty_slice():

assert grl.is_empty() == False

sgrl = grl[0:1, :]
sgrl = grl[0:1]
assert sgrl is not None
assert isinstance(sgrl, GenomicRangesList)
assert len(sgrl) == 1


def test_slice_by_name():
grl = GenomicRangesList(ranges=[a, b], names=["a", "b"])

assert grl.is_empty() == False

sgrl = grl[["a"]]
assert sgrl is not None
assert isinstance(sgrl, GenomicRangesList)
assert len(sgrl) == 1


def test_slice_by_bool():
grl = GenomicRangesList(ranges=[a, b], names=["a", "b"])

assert grl.is_empty() == False

sgrl = grl[[True, False]]
assert sgrl is not None
assert isinstance(sgrl, GenomicRangesList)
assert len(sgrl) == 1

with pytest.raises(Exception):
grl[[False]]


def test_is_empty_True():
grl = GenomicRangesList(GenomicRanges.empty(), range_lengths=[0])

Expand Down Expand Up @@ -95,3 +120,12 @@ def test_combine():
cgrl = combine_sequences(grla, grlb)

assert len(cgrl) == 3


def test_empty_grl_slice():
grl = GenomicRangesList.empty(n=100)
assert isinstance(grl, GenomicRangesList)

subset = grl[0:10]
assert isinstance(subset, GenomicRangesList)
assert len(subset) == 10

0 comments on commit d5b2f9b

Please sign in to comment.