diff --git a/src/summarizedexperiment/BaseSE.py b/src/summarizedexperiment/BaseSE.py index d32d3d8..b8b41e8 100644 --- a/src/summarizedexperiment/BaseSE.py +++ b/src/summarizedexperiment/BaseSE.py @@ -6,6 +6,7 @@ import biocframe import biocutils as ut +from ._assayutils import check_assays_are_equal, merge_assays from ._frameutils import _sanitize_frame from .type_checks import is_matrix_like from .types import SliceResult @@ -275,8 +276,8 @@ def __repr__(self) -> str: pattern = ( f"Class {type(self).__name__} with {self.shape[0]} features and {self.shape[1]} samples \n" f" assays: {', '.join(list(self.assays.keys()))} \n" - f" row_data: {self._rows.names if self._rows is not None else None} \n" - f" column_data: {self._cols.names if self._cols is not None else None}" + f" row_data: {self._rows.row_names if self._rows is not None else None} \n" + f" column_data: {self._cols.column_names if self._cols is not None else None}" ) return pattern @@ -591,7 +592,7 @@ def get_columnnames(self) -> Optional[ut.Names]: Returns: List of column names, or None if no column names are available. """ - return self._cols.get_rownames() + return self._column_names def set_columnnames( self, names: Optional[List[str]], in_place: bool = False @@ -614,7 +615,7 @@ def set_columnnames( if names is not None and not isinstance(names, ut.Names): names = ut.Names(names) - _validate_rows(self._cols, names, self.shape) + _validate_cols(self._cols, names, self.shape) output = self._define_output(in_place) output._column_names = names @@ -640,7 +641,7 @@ def columnnames(self, names: Optional[List[str]]): @property def colnames(self) -> Optional[ut.Names]: """Alias for :py:attr:`~get_columnnames`, provided for back-compatibility.""" - return self.get_colnames() + return self.get_columnnames() @colnames.setter def colnames(self, names: Optional[List[str]]): @@ -657,7 +658,7 @@ def colnames(self, names: Optional[List[str]]): @property def col_names(self) -> Optional[ut.Names]: """Alias for :py:attr:`~get_columnnames`, provided for back-compatibility.""" - return self.get_colnames() + return self.get_columnnames() @col_names.setter def col_names(self, names: Optional[List[str]]): @@ -1044,40 +1045,44 @@ def to_anndata(self): ######>> combine ops <<##### ############################ - # def combine_rows(self, *experiments: "BaseSE"): - # _all_objects = [self] + experiments + def combine_rows(self, *experiments: "BaseSE"): + _all_objects = [self] + experiments - # _new_assays = merge_assays([x.assays for x in _all_objects]) + _all_assays = [x.assays for x in _all_objects] + check_assays_are_equal(_all_assays) + _new_assays = merge_assays(_all_assays, by="row") - # _all_rows = [x._rows for x in _all_objects] - # _new_rows = ut.combine_rows(_all_rows) + _all_rows = [x._rows for x in _all_objects] + _new_rows = ut.combine_rows(_all_rows) - # _all_cols = [x._cols for x in _all_objects] - # _new_cols = ut.combine_columns(_all_cols) + _all_cols = [x._cols for x in _all_objects] + _new_cols = ut.combine_columns(_all_cols) - # current_class_const = type(self) - # return current_class_const( - # assays=_new_assays, - # row_data=_new_rows, - # column_data=_new_cols, - # metadata=self._metadata, - # ) + current_class_const = type(self) + return current_class_const( + assays=_new_assays, + row_data=_new_rows, + column_data=_new_cols, + metadata=self._metadata, + ) - # def combine_cols(self, *experiments: "BaseSE"): - # _all_objects = [self] + experiments + def combine_cols(self, *experiments: "BaseSE"): + _all_objects = [self] + experiments - # _new_assays = merge_assays([x.assays for x in _all_objects]) + _all_assays = [x.assays for x in _all_objects] + check_assays_are_equal(_all_assays) + _new_assays = merge_assays(_all_assays, by="column") - # _all_rows = [x._rows for x in _all_objects] - # _new_rows = ut.combine_columns(_all_rows) + _all_rows = [x._rows for x in _all_objects] + _new_rows = ut.combine_columns(_all_rows) - # _all_cols = [x._cols for x in _all_objects] - # _new_cols = ut.combine_rows(_all_cols) + _all_cols = [x._cols for x in _all_objects] + _new_cols = ut.combine_rows(_all_cols) - # current_class_const = type(self) - # return current_class_const( - # assays=_new_assays, - # row_data=_new_rows, - # column_data=_new_cols, - # metadata=self._metadata, - # ) + current_class_const = type(self) + return current_class_const( + assays=_new_assays, + row_data=_new_rows, + column_data=_new_cols, + metadata=self._metadata, + ) diff --git a/src/summarizedexperiment/_assayutils.py b/src/summarizedexperiment/_assayutils.py index b8fdd60..4a3f2d8 100644 --- a/src/summarizedexperiment/_assayutils.py +++ b/src/summarizedexperiment/_assayutils.py @@ -1,24 +1,35 @@ import itertools +import biocutils as ut + __author__ = "jkanche" __copyright__ = "jkanche" __license__ = "MIT" -def merge_assays(assays): - _all_keys = [list(x.keys() for x in assays)] +def merge_assays(assays, by): + if by not in ["row", "column"]: + raise ValueError("'by' must be either 'row' or 'column'.") + + _all_keys = [list(x.keys()) for x in assays] + _all_keys = list(set(itertools.chain.from_iterable(_all_keys))) + + _all_assays = {} + for k in _all_keys: + _all_mats = [x[k] for x in assays] + + if by == "row": + _all_assays[k] = ut.combine_rows(*_all_mats) + else: + _all_assays[k] = ut.combine_columns(*_all_mats) - _set = set() - for k_idx in range(len(_all_keys)): - kx = _all_keys[k_idx] - for ky in kx: - if ky in _set: - ky = f"{ky}_{k_idx}" + return _all_assays - _set.add(ky) - _new_all_keys = list(_set) +def check_assays_are_equal(assays): + _first = assays[0] + _first_keys = set(list(_first.keys())) - _all_assays = [list(x.values()) for x in assays] - _all_assays = list(itertools.chain.from_iterable(_all_assays)) - return dict(zip(_new_all_keys, _all_assays)) + for x in assays[1:]: + if len(list(_first_keys - set(x.keys()))) != 0: + raise ValueError("Not all experiments contain the same assays.") diff --git a/tests/test_RSE_methods.py b/tests/test_RSE_methods.py index d50c7a5..e84713c 100644 --- a/tests/test_RSE_methods.py +++ b/tests/test_RSE_methods.py @@ -73,7 +73,7 @@ def test_RSE_props(): assert tse.width is not None assert tse.rownames is None - assert tse.colnames is not None + assert tse.colnames is None def test_RSE_subset(): diff --git a/tests/test_SE.py b/tests/test_SE.py index 1f72ee6..6a8ae8a 100644 --- a/tests/test_SE.py +++ b/tests/test_SE.py @@ -83,7 +83,7 @@ def test_SE_no_row_or_col_data(): assert tse.col_data is not None assert isinstance(tse.col_data, BiocFrame) - tse.row_data = tse.row_data.set_row_names([f"row_{i}" for i in range(200)]) + tse.row_names = [f"row_{i}" for i in range(200)] assert tse.rownames is not None assert len(tse.rownames) == 200 assert tse.row_data.shape[0] == 200 @@ -92,7 +92,7 @@ def test_SE_no_row_or_col_data(): assert tse.col_data is not None assert isinstance(tse.col_data, BiocFrame) - tse.col_data = tse.coldata.set_row_names([f"col_{i}" for i in range(6)]) + tse.col_names = [f"col_{i}" for i in range(6)] assert tse.colnames is not None assert len(tse.colnames) == 6 assert tse.col_data.shape[0] == 6