From 615d396adda9e582a548ce1e59c3b8c6452b32b7 Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Wed, 20 Sep 2023 19:51:39 -0700 Subject: [PATCH] Support more slice ops on GenomicRangesList (#25) --- src/genomicranges/GenomicRangesList.py | 60 +++++++++++++++++++++++--- tests/test_grl_methods.py | 20 +++++++++ 2 files changed, 75 insertions(+), 5 deletions(-) diff --git a/src/genomicranges/GenomicRangesList.py b/src/genomicranges/GenomicRangesList.py index 5a0d737..c7b48d0 100644 --- a/src/genomicranges/GenomicRangesList.py +++ b/src/genomicranges/GenomicRangesList.py @@ -331,20 +331,28 @@ def to_pandas(self) -> DataFrame: def add_element(self, key, value, element_metadata): raise NotImplementedError("Adding new elements is not yet implemented!") - def __getitem__(self, args: Union[str, int]) -> "GenomicRanges": + def __getitem__( + self, args: Union[str, int, tuple, list, slice] + ) -> Union[GenomicRanges, "GenomicRangesList"]: """Access individual genomic elements. Args: - args (Union[str, int]): Name of the genomic element to access. + args (Union[str, int, tuple, list, slice]): Name of the genomic element to access. Alternatively, if names of genomic elements are not available, you may provide an index position of the genomic element to access. + Alternatively, ``args`` may also specify a list of positions to slice specified either as a + :py:class:`~list` or :py:class:`~slice`. + + A tuple may also be specified along each dimension. Currently if the tuple contains more than + one dimension, its ignored. + Raises: - TypeError: If ``args`` is not a string nor integer. + TypeError: If ``args`` is not a supported slice argument. Returns: - GenomicRanges: The genomic element. + Union[GenomicRanges, GenomicRangesList]: The genomic element or a new GenomicRangesList of the slice. """ if isinstance(args, int): return self.ranges[args] @@ -352,8 +360,50 @@ def __getitem__(self, args: Union[str, int]) -> "GenomicRanges": if self.names is not None: _idx = self.names.index(args) return self.ranges[_idx] + else: + new_ranges = None + new_range_lengths = None + new_names = None + new_mcols = None + new_metadata = self.metadata + + if isinstance(args, tuple): + # TODO: probably should figure out what to do with the second dimension later. + if len(args) >= 1: + args = args[0] + + if isinstance(args, list): + if is_list_of_type(args, bool): + if len(args) != len(self): + raise ValueError( + "`indices` is a boolean vector, length should match the size of the data." + ) + + args = [i for i in range(len(args)) if args[i] is True] + + new_ranges = [self.ranges[i] for i in args] + new_range_lengths = [self._range_lengths[i] for i in args] + if self.names is not None: + new_names = [self.names[i] for i in args] + + if self.mcols is not None: + new_mcols = self.mcols[args, :] + elif isinstance(args, slice): + new_ranges = self.ranges[args] + new_range_lengths = self._range_lengths[args] + if self.names is not None: + new_names = self.names[args] + + if self.mcols is not None: + new_mcols = self.mcols[args, :] + else: + raise TypeError("`args` is not supported.") + + return GenomicRangesList( + new_ranges, new_range_lengths, new_names, new_mcols, new_metadata + ) - raise TypeError("args must be either a string or an integer.") + raise TypeError("`args` must be either a string or an integer.") def __len__(self) -> int: """Number of genomic elements. diff --git a/tests/test_grl_methods.py b/tests/test_grl_methods.py index 8dcc539..26d77d6 100644 --- a/tests/test_grl_methods.py +++ b/tests/test_grl_methods.py @@ -36,6 +36,17 @@ def test_is_empty_False(): assert grl.is_empty() == False +def test_is_empty_slice(): + grl = GenomicRangesList(ranges=[a, b], names=["a", "b"]) + + assert grl.is_empty() == False + + sgrl = grl[0:1, :] + assert sgrl is not None + assert isinstance(sgrl, GenomicRangesList) + assert len(sgrl) == 1 + + def test_is_empty_True(): grl = GenomicRangesList(GenomicRanges.empty(), range_lengths=[0] * 10) @@ -43,6 +54,15 @@ def test_is_empty_True(): assert len(grl) == 10 +def test_is_empty_True_slice(): + grl = GenomicRangesList(GenomicRanges.empty(), range_lengths=[0] * 10) + + sgrl = grl[1:5] + assert sgrl is not None + assert isinstance(sgrl, GenomicRangesList) + assert len(sgrl) == 4 + + def test_nrows(): grl = GenomicRangesList(ranges=[a, b], names=["a", "b"])