From e35209d66d70e35466e334ac717492df1cea85e5 Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Mon, 6 Jan 2025 14:37:52 -0500 Subject: [PATCH] Fix subsetting when slice arguments are numpy vectors (#87) --- CHANGELOG.md | 3 ++- setup.cfg | 6 ++--- src/summarizedexperiment/BaseSE.py | 22 +++++++++---------- .../RangedSummarizedExperiment.py | 4 ++-- 4 files changed, 18 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ae5240f..8651aa7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,9 +1,10 @@ # Changelog -## Version 0.5.1 - 0.5.2 +## Version 0.5.1 - 0.5.3 - Add wrapper methods to combine Summarized and RangedSummarized by rows or columns. - Implement getters and setters to access and modify an assay. +- Fixed an issue with numpy arrays as slice arguments. Code now uses Biocutils's subset functions to perform these operations. ## Version 0.5.0 diff --git a/setup.cfg b/setup.cfg index edb07b0..f5a5cc3 100644 --- a/setup.cfg +++ b/setup.cfg @@ -49,9 +49,9 @@ python_requires = >=3.9 # For more information, check out https://semver.org/. install_requires = importlib-metadata; python_version<"3.8" - genomicranges>=0.4.18 - biocframe>=0.5.10 - biocutils>=0.1.4 + genomicranges>=0.5.1 + biocframe>=0.6.2 + biocutils>=0.2.1 [options.packages.find] where = src diff --git a/src/summarizedexperiment/BaseSE.py b/src/summarizedexperiment/BaseSE.py index 7558473..2b05c84 100644 --- a/src/summarizedexperiment/BaseSE.py +++ b/src/summarizedexperiment/BaseSE.py @@ -986,7 +986,7 @@ def set_assay(self, name: str, assay: Any, in_place: bool = False) -> "BaseSE": or as a reference to the (in-place-modified) original. """ if assay.shape != self.shape: - raise ValueError("Porvided assay does not match the dimensions of the experiment.") + raise ValueError("Provided assay does not match the dimensions of the experiment.") output = self._define_output(in_place) if in_place is False: @@ -1000,14 +1000,14 @@ def set_assay(self, name: str, assay: Any, in_place: bool = False) -> "BaseSE": def _normalize_row_slice(self, rows: Union[str, int, bool, Sequence]): _scalar = None - if rows != slice(None): + if not (isinstance(rows, slice) and rows == slice(None)): rows, _scalar = ut.normalize_subscript(rows, len(self._rows), self._row_names) return rows, _scalar def _normalize_column_slice(self, columns: Union[str, int, bool, Sequence]): _scalar = None - if columns != slice(None): + if not (isinstance(columns, slice) and columns == slice(None)): columns, _scalar = ut.normalize_subscript(columns, len(self._cols), self._column_names) return columns, _scalar @@ -1056,10 +1056,10 @@ def subset_assays( new_assays = OrderedDict() for asy, mat in self.assays.items(): - if rows != slice(None): + if not (isinstance(rows, slice) and rows == slice(None)): mat = mat[rows, :] - if columns != slice(None): + if not (isinstance(columns, slice) and columns == slice(None)): mat = mat[:, columns] new_assays[asy] = mat @@ -1105,19 +1105,19 @@ def _generic_slice( if columns is None: columns = slice(None) - if rows is not None: + if not (isinstance(rows, slice) and rows == slice(None)): rows, _ = self._normalize_row_slice(rows=rows) - new_rows = new_rows[rows, :] + new_rows = ut.subset(new_rows, rows) if new_row_names is not None: - new_row_names = new_row_names[rows] + new_row_names = ut.subset_sequence(new_row_names, rows) - if columns is not None and self.column_data is not None: + if not (isinstance(columns, slice) and columns == slice(None)) and self.column_data is not None: columns, _ = self._normalize_column_slice(columns=columns) - new_cols = new_cols[columns, :] + new_cols = ut.subset(new_cols, columns) if new_col_names is not None: - new_col_names = new_col_names[columns] + new_col_names = ut.subset_sequence(new_col_names, columns) new_assays = self.subset_assays(rows=rows, columns=columns) diff --git a/src/summarizedexperiment/RangedSummarizedExperiment.py b/src/summarizedexperiment/RangedSummarizedExperiment.py index af1320a..83d2fde 100644 --- a/src/summarizedexperiment/RangedSummarizedExperiment.py +++ b/src/summarizedexperiment/RangedSummarizedExperiment.py @@ -398,8 +398,8 @@ def get_slice( slicer = self._generic_slice(rows=rows, columns=columns) new_row_ranges = None - if slicer.row_indices != slice(None): - new_row_ranges = self.row_ranges[slicer.row_indices] + if not (isinstance(slicer.row_indices, slice) and slicer.row_indices == slice(None)): + new_row_ranges = ut.subset_sequence(self.row_ranges, slicer.row_indices) current_class_const = type(self) return current_class_const(