diff --git a/src/summarizedexperiment/BaseSE.py b/src/summarizedexperiment/BaseSE.py index e953565..fc8f007 100644 --- a/src/summarizedexperiment/BaseSE.py +++ b/src/summarizedexperiment/BaseSE.py @@ -3,18 +3,37 @@ from typing import Any, Dict, List, Optional, Sequence, Tuple, Union from warnings import warn +import biocutils as ut from biocframe import BiocFrame from genomicranges import GenomicRanges from ._frameutils import _sanitize_frame from .type_checks import is_matrix_like +from .types import SliceResult __author__ = "jkanche, keviny2" __copyright__ = "jkanche" __license__ = "MIT" -def _validate_assays(assays: dict, shape): +def _guess_assay_shape(assays, rows, cols) -> tuple: + _keys = list(assays.keys()) + if len(_keys) > 0: + _first = _keys[0] + return assays[_first].shape + + _r = 0 + if rows is not None: + _r = rows.shape[0] + + _c = 0 + if cols is not None: + _c = cols.shape[0] + + return (_r, _c) + + +def _validate_assays(assays, shape) -> tuple: if assays is None or not isinstance(assays, dict) or len(assays.keys()) == 0: raise Exception( "`assays` must be a dictionary and contain atleast one 2-dimensional matrix." @@ -40,27 +59,25 @@ def _validate_assays(assays: dict, shape): f" but provided '{mat.shape}'." ) - return shape - -def _validate_rows(self, rows, shape): +def _validate_rows(rows, shape): if not isinstance(rows, BiocFrame): raise TypeError("'row_data' is not a `BiocFrame` object.") if rows.shape[0] != shape[0]: raise ValueError( - f"Number of features mismatch with number of rows in assays. Must be '{self._shape[0]}'" + f"Number of features mismatch with number of rows in assays. Must be '{shape[0]}'" f" but provided '{rows.shape[0]}'." ) -def _validate_cols(self, cols, shape): +def _validate_cols(cols, shape): if not isinstance(cols, BiocFrame): raise TypeError("'col_data' is not a `BiocFrame` object.") - if cols.shape[0] != self._shape[1]: + if cols.shape[0] != shape[1]: raise ValueError( - f"Number of samples mismatch with number of columns in assays. Must be '{self._shape[1]}'" + f"Number of samples mismatch with number of columns in assays. Must be '{shape[1]}'" f" but provided '{cols.shape[0]}'." ) @@ -123,19 +140,18 @@ def __init__( validate: Internal use only. """ - - self._shape = None - self._assays = assays + + self._shape = _guess_assay_shape(assays, rows, cols) self._rows = _sanitize_frame(rows, self._shape[0]) self._cols = _sanitize_frame(cols, self._shape[1]) self._metadata = metadata if metadata is not None else {} if validate: - self._shape = _validate_assays(self._assays, self._shape) + _validate_assays(self._assays, self._shape) if self._shape is None: - raise RuntimeError("Cannot extract shape from assays!") + raise RuntimeError("Cannot extract 'shape' from assays!") _validate_rows(self._rows, self._shape) _validate_cols(self._cols, self._shape) @@ -338,6 +354,23 @@ def rowdata(self, rows: Optional[BiocFrame]): ) self.set_rowdata(rows, in_place=True) + @property + def row_data(self) -> Dict[str, Any]: + """Alias for :py:meth:`~get_rowdata`.""" + return self.get_rowdata() + + @row_data.setter + def row_data(self, rows: Optional[BiocFrame]): + """Alias for :py:meth:`~set_rowdata` with ``in_place = True``. + + As this mutates the original object, a warning is raised. + """ + warn( + "Setting property 'rowdata' is an in-place operation, use 'set_rowdata' instead", + UserWarning, + ) + self.set_rowdata(rows, in_place=True) + ########################## ######>> col_data <<###### ########################## @@ -392,6 +425,23 @@ def coldata(self, rows: Optional[BiocFrame]): ) self.set_coldata(rows, in_place=True) + @property + def col_data(self) -> Dict[str, Any]: + """Alias for :py:meth:`~get_coldata`.""" + return self.get_coldata() + + @col_data.setter + def col_data(self, rows: Optional[BiocFrame]): + """Alias for :py:meth:`~set_coldata` with ``in_place = True``. + + As this mutates the original object, a warning is raised. + """ + warn( + "Setting property 'coldata' is an in-place operation, use 'set_coldata' instead", + UserWarning, + ) + self.set_coldata(rows, in_place=True) + ########################### ######>> metadata <<####### ########################### @@ -534,25 +584,43 @@ def assay(self, assay: Union[int, str]) -> Any: ######>> slicers <<####### ########################## + def _normalize_row_slice(self, rows: Union[str, int, bool, Sequence]): + _scalar = None + if rows != slice(None): + rows, _scalar = ut.normalize_subscript( + rows, len(self._rows), self._rows.row_names + ) + + return rows, _scalar + + def _normalize_column_slice(self, columns: Union[str, int, bool, Sequence]): + _scalar = None + if columns != slice(None): + columns, _scalar = ut.normalize_subscript( + columns, len(self._cols), self._cols.row_names + ) + + return columns, _scalar + def subset_assays( self, - row_indices: Optional[Union[Sequence, int, str, bool, slice, range]] = None, - col_indices: Optional[Union[Sequence, int, str, bool, slice, range]] = None, + rows: Optional[Union[str, int, bool, Sequence]], + columns: Optional[Union[str, int, bool, Sequence]], ) -> Dict[str, Any]: - """Subset all assays using a slice defined by rows and columns. + """Subset all assays by the slice defined by rows and columns. - If both ``row_indices`` and ``col_indices`` are None, a copy of the + If both ``row_indices`` and ``col_indices`` are None, a shallow copy of the current assays is returned. Args: - row_indices: + rows: Row indices to subset. Integer indices, a boolean filter, or (if the current object is named) names specifying the ranges to be extracted, see :py:meth:`~biocutils.normalize_subscript.normalize_subscript`. - col_indices: + columns: Column indices to subset. Integer indices, a boolean filter, or (if the current object is @@ -563,160 +631,171 @@ def subset_assays( Sliced experiment data. """ - if row_indices is None and col_indices is None: + if rows is None and columns is None: warnings.warn("No slice is provided, this returns a copy of all assays!") return self.assays.copy() + if rows is None: + rows = slice(None) + + if columns is None: + columns = slice(None) + + rows, _ = self._normalize_row_slice(rows) + columns, _ = self._normalize_column_slice(columns) + new_assays = OrderedDict() for asy, mat in self.assays.items(): - if row_indices is not None: - mat = mat[row_indices, :] + if rows != slice(None): + mat = mat[rows, :] - if col_indices is not None: - mat = mat[:, col_indices] + if columns != slice(None): + mat = mat[:, columns] new_assays[asy] = mat return new_assays - def _slice( + def _generic_slice( self, - args: SlicerArgTypes, - ) -> SlicerResult: - """Internal method to perform slicing. + rows: Optional[Union[str, int, bool, Sequence]], + columns: Optional[Union[str, int, bool, Sequence]], + ) -> SliceResult: + """Slice ``SummarizedExperiment`` along the rows and/or columns, based on their indices or names. Args: - args (SlicerArgTypes): Indices or names to slice. The tuple contains - slices along dimensions (rows, cols). + rows: + Rows to be extracted. - Each element in the tuple may be either a integer vector (integer positions), - boolean vector or :py:class:`~slice` object. + Integer indices, a boolean filter, or (if the current object is + named) names specifying the ranges to be extracted, see + :py:meth:`~biocutils.normalize_subscript.normalize_subscript`. - Defaults to None. + columns: + Columns to be extracted. - Raises: - ValueError: If too many or too few slices provided. + Integer indices, a boolean filter, or (if the current object is + named) names specifying the ranges to be extracted, see + :py:meth:`~biocutils.normalize_subscript.normalize_subscript`. Returns: - SlicerResult: The sliced tuple. + The sliced tuple containing the new rows, columns, assays and realized indices + for use in downstream methods. """ - if isinstance(args, tuple): - if len(args) == 0: - raise ValueError("`args` must contain at least one slice.") - - row_indices = args[0] - col_indices = None - - if len(args) > 1: - col_indices = args[1] - elif len(args) > 2: - raise ValueError("`args` contains too many slices.") - elif isinstance(args, list) or isinstance(args, slice): - row_indices = args - col_indices = None - else: - raise ValueError("`args` contains unsupported type.") - new_rows = self.row_data new_cols = self.col_data - new_assays = None + new_assays = {} - if row_indices is not None and self.row_data is not None: - if is_list_of_type(row_indices, str): - row_indices = get_indexes_from_names( - get_rownames(self.row_data), row_indices - ) - elif is_list_of_type(row_indices, bool): - if len(row_indices) != self.shape[0]: - raise ValueError( - "`row_indices` is a boolean vector, length of vector must match the", - "number of rows.", - ) - row_indices = get_indexes_from_bools(row_indices) - - if is_list_of_type(row_indices, int) or isinstance(row_indices, slice): - if isinstance(self.row_data, DataFrame): - new_rows = new_rows.iloc[row_indices] - else: - new_rows = new_rows[row_indices, :] - else: - raise TypeError("`row_indices` is not supported!") + if rows is None: + rows = slice(None) - if col_indices is not None and self.col_data is not None: - if is_list_of_type(col_indices, str): - col_indices = get_indexes_from_names( - get_rownames(self.col_data), col_indices - ) - elif is_list_of_type(col_indices, bool): - if len(col_indices) != self.shape[1]: - raise ValueError( - "`col_indices` is a boolean vector, length of vector must match the", - "number of columns.", - ) - col_indices = get_indexes_from_bools(col_indices) - - if is_list_of_type(col_indices, int) or isinstance(col_indices, slice): - if isinstance(self.col_data, DataFrame): - new_cols = new_cols.iloc[col_indices] - else: - new_cols = new_cols[col_indices, :] - else: - raise TypeError("`col_indices` not supported!") + if columns is None: + columns = slice(None) - new_assays = self.subset_assays( - row_indices=row_indices, col_indices=col_indices - ) + if rows is not None: + rows, _ = self._normalize_row_slice(rows=rows) + new_rows = new_rows[rows, :] - return SlicerResult(new_rows, new_cols, new_assays, row_indices, col_indices) + if columns is not None and self.col_data is not None: + columns, _ = self._normalize_column_slice(columns=columns) + new_cols = new_cols[columns, :] - @property - def row_names(self) -> List[str]: - """Get row/feature index. + new_assays = self.subset_assays(rows=rows, columns=columns) - Returns: - List[str]: List of row names. - """ - return get_rownames(self.row_data) + return SliceResult(new_rows, new_cols, new_assays, rows, columns) + + def get_slice( + self, + rows: Optional[Union[str, int, bool, Sequence]], + columns: Optional[Union[str, int, bool, Sequence]], + ) -> "BaseSE": + """Alias for :py:attr:`~__getitem__`, for back-compatibility.""" + current_class_const = type(self) + slicer = self._generic_slice(rows=rows, columns=columns) - @row_names.setter - def row_names(self, names: List[str]): - """Set row/feature names for the experiment. + return current_class_const( + assays=slicer.assays, + rows=slicer.rows, + columns=slicer.columns, + metadata=self._metadata, + ) + + def __getitem__( + self, + args: Union[int, str, Sequence, tuple], + ) -> "BaseSE": + """Subset a `SummarizedExperiment`. Args: - names (List[str]): New feature names. + args: + A sequence or a scalar integer or string, specifying the + columns to retain based on their names or indices. + + Alternatively a tuple of length 1. The first entry specifies + the rows to retain based on their names or indices. + + Alternatively a tuple of length 2. The first entry specifies + the rows to retain, while the second entry specifies the + columns to retain, based on their names or indices. Raises: - ValueError: If length of ``names`` is not same as the number of rows. + ValueError: If too many or too few slices provided. + + Returns: + Same type as caller with the sliced rows and columns. """ - if len(names) != self.shape[0]: - raise ValueError("Length of `names` must be the same as number of rows.") + if isinstance(args, (str, int)): + return self.get_slice(args, slice(None)) - self._rows = set_rownames(self.row_data, names) + if isinstance(args, tuple): + if len(args) == 0: + raise ValueError("At least one slicing argument must be provided.") - @property - def colnames(self) -> List[str]: - """Get column/sample names. + if len(args) == 1: + return self.get_slice(args[0], slice(None)) + elif len(args) == 2: + return self.get_slice(args[0], args[1]) + else: + raise ValueError( + f"`{type(self).__name__}` only supports 2-dimensional slicing." + ) + + raise TypeError( + "args must be a sequence or a scalar integer or string or a tuple of atmost 2 values." + ) + ############################### + ######>> names accessor <<##### + ############################### + + def get_row_names(self) -> Optional[ut.Names]: + """ Returns: - List[str]: List of sample names. + List of row names, or None if no row names are available. """ - return get_colnames(self.col_data) + return self._rows.get_row_names() - @colnames.setter - def colnames(self, names: List[str]): - """Set column/sample names for the experiment. - - Args: - names (List[str]): New samples names. + @property + def rownames(self) -> List[str]: + """Alias for :py:attr:`~get_rownames`, provided for back-compatibility.""" + return self.get_row_names() - Raises: - ValueError: If length of ``names`` is not same as the number of rows. + def get_column_names(self) -> Optional[ut.Names]: """ - if len(names) != self.shape[1]: - raise ValueError("Length of `names` must be the same as number of columns.") + Returns: + List of column names, or None if no column names are available. + """ + return self._cols.get_row_names() + + @property + def colnames(self) -> List[str]: + """Alias for :py:attr:`~get_rownames`, provided for back-compatibility.""" + return self.get_column_names() - self._cols = set_colnames(self.col_data, names) + ################################ + ######>> AnnData interop <<##### + ################################ def to_anndata(self): """Transform :py:class:`summarizedexperiment.BaseSE`-like into a :py:class:`~anndata.AnnData` representation. diff --git a/src/summarizedexperiment/RangedSummarizedExperiment.py b/src/summarizedexperiment/RangedSummarizedExperiment.py index a1dfe46..0f2849e 100644 --- a/src/summarizedexperiment/RangedSummarizedExperiment.py +++ b/src/summarizedexperiment/RangedSummarizedExperiment.py @@ -129,7 +129,7 @@ def __init__( metadata (Dict, optional): Additional experimental metadata describing the methods. Defaults to None. """ - super().__init__(assays, row_data, col_data, metadata) + super().__init__(assays, rows=row_data, cols=col_data, metadata=metadata) if row_ranges is None: row_ranges = GenomicRangesList.empty(n=self._shape[0]) diff --git a/src/summarizedexperiment/SummarizedExperiment.py b/src/summarizedexperiment/SummarizedExperiment.py index ab205f9..c740953 100644 --- a/src/summarizedexperiment/SummarizedExperiment.py +++ b/src/summarizedexperiment/SummarizedExperiment.py @@ -2,9 +2,10 @@ from warnings import warn from genomicranges import GenomicRanges +from biocframe import BiocFrame from .BaseSE import BaseSE -from .types import BiocOrPandasFrame, MatrixTypes, SlicerArgTypes +from .types import MatrixTypes, SlicerArgTypes __author__ = "jkanche" __copyright__ = "jkanche" @@ -29,11 +30,11 @@ class SummarizedExperiment(BaseSE): All matrices in assays must be 2-dimensional and have the same shape (number of rows, number of columns). - row_data (BiocOrPandasFrame, optional): Features, which must be of the same length as the rows of + row_data (BiocFrame, optional): Features, which must be of the same length as the rows of the matrices in assays. Features can be either a :py:class:`~pandas.DataFrame` or :py:class:`~biocframe.BiocFrame.BiocFrame`. Defaults to None. - col_data (BiocOrPandasFrame, optional): Sample data, which must be of the same length as the + col_data (BiocFrame, optional): Sample data, which must be of the same length as the columns of the matrices in assays. Sample Information can be either a :py:class:`~pandas.DataFrame` or :py:class:`~biocframe.BiocFrame.BiocFrame`. Defaults to None. @@ -43,8 +44,8 @@ class SummarizedExperiment(BaseSE): def __init__( self, assays: Dict[str, MatrixTypes], - row_data: Optional[BiocOrPandasFrame] = None, - col_data: Optional[BiocOrPandasFrame] = None, + row_data: Optional[BiocFrame] = None, + col_data: Optional[BiocFrame] = None, metadata: Optional[Dict] = None, ) -> None: """Initialize a Summarized Experiment (SE). @@ -60,11 +61,11 @@ def __init__( All matrices in assays must be 2-dimensional and have the same shape (number of rows, number of columns). - row_data (BiocOrPandasFrame, optional): Features, which must be of the same length as the rows of + row_data (BiocFrame, optional): Features, which must be of the same length as the rows of the matrices in assays. Features can be either a :py:class:`~pandas.DataFrame` or :py:class:`~biocframe.BiocFrame.BiocFrame`. Defaults to None. - col_data (BiocOrPandasFrame, optional): Sample data, which must be of the same length as the + col_data (BiocFrame, optional): Sample data, which must be of the same length as the columns of the matrices in assays. Sample Information can be either a :py:class:`~pandas.DataFrame` or :py:class:`~biocframe.BiocFrame.BiocFrame`. Defaults to None. @@ -76,7 +77,7 @@ def __init__( "`row_data` is `GenomicRanges`, consider using `RangeSummarizedExperiment`." ) - super().__init__(assays, row_data, col_data, metadata) + super().__init__(assays, rows=row_data, cols=col_data, metadata=metadata) def __getitem__( self, diff --git a/src/summarizedexperiment/__init__.py b/src/summarizedexperiment/__init__.py index 728b695..f0745e3 100644 --- a/src/summarizedexperiment/__init__.py +++ b/src/summarizedexperiment/__init__.py @@ -16,4 +16,4 @@ del version, PackageNotFoundError from .SummarizedExperiment import SummarizedExperiment -from .RangedSummarizedExperiment import RangedSummarizedExperiment +# from .RangedSummarizedExperiment import RangedSummarizedExperiment diff --git a/src/summarizedexperiment/_frameutils.py b/src/summarizedexperiment/_frameutils.py index 52b1944..9da4ed1 100644 --- a/src/summarizedexperiment/_frameutils.py +++ b/src/summarizedexperiment/_frameutils.py @@ -1,6 +1,6 @@ -from biocframe import BiocFrame, from_pandas +from typing import Any -from .type_checks import is_pandas +from biocframe import BiocFrame, from_pandas __author__ = "jkanche" __copyright__ = "jkanche" @@ -14,3 +14,19 @@ def _sanitize_frame(frame, num_rows: int): frame = from_pandas(frame) return frame + + +def is_pandas(x: Any) -> bool: + """Check if ``x`` is a :py:class:`~pandas.DataFrame`. + + Args: + x: + Any object. + + Returns: + True if ``x`` is a :py:class:`~pandas.DataFrame`. + """ + if hasattr(x, "dtypes"): + return True + + return False diff --git a/src/summarizedexperiment/type_checks.py b/src/summarizedexperiment/type_checks.py index 71e5449..6b88e1d 100644 --- a/src/summarizedexperiment/type_checks.py +++ b/src/summarizedexperiment/type_checks.py @@ -74,19 +74,3 @@ def is_list_of_type(x: Any, target_type: Callable) -> bool: return (isinstance(x, list) or isinstance(x, tuple)) and all( isinstance(item, target_type) for item in x ) - - -def is_pandas(x: Any) -> bool: - """Check if ``x`` is a :py:class:`~pandas.DataFrame`. - - Args: - x: - Any object. - - Returns: - True if ``x`` is a :py:class:`~pandas.DataFrame`. - """ - if hasattr(x, "dtypes"): - return True - - return False diff --git a/src/summarizedexperiment/types.py b/src/summarizedexperiment/types.py index 2a5e8cc..67a39c6 100644 --- a/src/summarizedexperiment/types.py +++ b/src/summarizedexperiment/types.py @@ -13,6 +13,7 @@ MatrixSlicerTypes = Union[List[int], List[bool], slice] SlicerTypes = Union[List[int], List[bool], List[str], slice] SlicerArgTypes = Union[Tuple[SlicerTypes], List[SlicerTypes], slice] -SlicerResult = namedtuple( - "SlicerResult", ["row_data", "col_data", "assays", "row_indices", "col_indices"] -) + +SliceResult = namedtuple( + "SlicerResult", ["rows", "columns", "assays", "row_indices", "col_indices"] +) \ No newline at end of file diff --git a/tests/data/summarized_experiments.py b/tests/data/summarized_experiments.py index 4fb5857..573b421 100644 --- a/tests/data/summarized_experiments.py +++ b/tests/data/summarized_experiments.py @@ -10,8 +10,8 @@ se_unnamed = SummarizedExperiment( assays={"counts": np.random.poisson(lam=10, size=(nrows, ncols))} ) -se_unnamed.col_data["A"] = [1] * ncols -se_unnamed.row_data["A"] = [1] * nrows +se_unnamed.coldata["A"] = [1] * ncols +se_unnamed.rowdata["A"] = [1] * nrows se_unnamed_2 = SummarizedExperiment( assays={ @@ -19,11 +19,11 @@ "normalized": np.random.normal(size=(nrows, ncols)), } ) -se_unnamed_2.col_data["A"] = [2] * ncols -se_unnamed_2.col_data["B"] = [3] * ncols -se_unnamed_2.row_data["B"] = ["B"] * nrows +se_unnamed_2.coldata["A"] = [2] * ncols +se_unnamed_2.coldata["B"] = [3] * ncols +se_unnamed_2.rowdata["B"] = ["B"] * nrows -row_data1 = pd.DataFrame( +rowdata1 = pd.DataFrame( { "seqnames": ["chr_5", "chr_3", "chr_2"], "start": [500, 300, 200], @@ -31,7 +31,7 @@ }, index=["HER2", "BRCA1", "TPFK"], ) -col_data1 = pd.DataFrame( +coldata1 = pd.DataFrame( { "sample": ["SAM_1", "SAM_2", "SAM_3"], "disease": ["True", "True", "True"], @@ -43,12 +43,12 @@ "counts": np.random.poisson(lam=5, size=(3, 3)), "lognorm": np.random.lognormal(size=(3, 3)), }, - row_data=row_data1, - col_data=col_data1, + row_data=rowdata1, + col_data=coldata1, metadata={"seq_type": "paired"}, ) -row_data2 = pd.DataFrame( +rowdata2 = pd.DataFrame( { "seqnames": ["chr_5", "chr_3", "chr_2"], "start": [500, 300, 200], @@ -56,7 +56,7 @@ }, index=["HER2", "BRCA1", "TPFK"], ) -col_data2 = pd.DataFrame( +coldata2 = pd.DataFrame( { "sample": ["SAM_4", "SAM_5", "SAM_6"], "disease": ["True", "False", "True"], @@ -69,12 +69,12 @@ "counts": np.random.poisson(lam=5, size=(3, 3)), "lognorm": np.random.lognormal(size=(3, 3)), }, - row_data=row_data2, - col_data=col_data2, + row_data=rowdata2, + col_data=coldata2, metadata={"seq_platform": "Illumina NovaSeq 6000"}, ) -row_data3 = pd.DataFrame( +rowdata3 = pd.DataFrame( { "seqnames": ["chr_7", "chr_1", "chr_9"], "start": [700, 100, 900], @@ -82,7 +82,7 @@ }, index=["MYC", "BRCA2", "TPFK"], ) -col_data3 = pd.DataFrame( +coldata3 = pd.DataFrame( { "sample": ["SAM_7", "SAM_8", "SAM_9"], "disease": ["True", "False", "False"], @@ -95,12 +95,12 @@ "counts": np.random.poisson(lam=5, size=(3, 3)), "lognorm": np.random.lognormal(size=(3, 3)), }, - row_data=row_data3, - col_data=col_data3, + row_data=rowdata3, + col_data=coldata3, metadata={"seq_platform": "Illumina NovaSeq 6000"}, ) -row_data4 = pd.DataFrame( +rowdata4 = pd.DataFrame( { "seqnames": ["chr_7", "chr_5", "chr_1", "chr_9", "chr_3"], "start": [700, 500, 100, 900, 300], @@ -108,7 +108,7 @@ }, index=["MYC", "BRCA1", "BRCA2", "TPFK", "GSS"], ) -col_data4 = pd.DataFrame( +coldata4 = pd.DataFrame( { "sample": ["SAM_10", "SAM_11", "SAM_12"], "disease": ["True", "False", "False"], @@ -122,12 +122,12 @@ "lognorm": np.random.lognormal(size=(5, 3)), "beta": np.random.beta(a=1, b=1, size=(5, 3)), }, - row_data=row_data4, - col_data=col_data4, + row_data=rowdata4, + col_data=coldata4, metadata={"seq_platform": "Illumina NovaSeq 6000"}, ) -row_data5 = pd.DataFrame( +rowdata5 = pd.DataFrame( { "seqnames": ["chr_7", "chr_5", "chr_4", "chr_9", "chr_8"], "start": [700, 500, 400, 900, 800], @@ -135,7 +135,7 @@ }, index=["MYC", "BRCA1", "PIK3CA", "TPFK", "HRAS"], ) -col_data5 = pd.DataFrame( +coldata5 = pd.DataFrame( { "sample": ["SAM_13", "SAM_14", "SAM_15"], "disease": ["True", "True", "True"], @@ -149,12 +149,12 @@ "lognorm": sp.lil_matrix(np.random.lognormal(size=(5, 3))), "beta": sp.lil_matrix(np.random.beta(a=2, b=1, size=(5, 3))), }, - row_data=row_data5, - col_data=col_data5, + row_data=rowdata5, + col_data=coldata5, metadata={"seq_platform": "Illumina NovaSeq 6000"}, ) -col_data6 = pd.DataFrame( +coldata6 = pd.DataFrame( { "sample": ["SAM_10", "SAM_11", "SAM_12"], "disease": ["True", "False", "False"], @@ -168,16 +168,16 @@ "lognorm": np.random.lognormal(size=(5, 3)), "beta": np.random.beta(a=1, b=1, size=(5, 3)), }, - row_data=row_data4, - col_data=col_data6, + row_data=rowdata4, + col_data=coldata6, metadata={"seq_platform": "Illumina NovaSeq 6000"}, ) -row_data_nonames = pd.DataFrame( +rowdata_nonames = pd.DataFrame( {}, index=["HER2", "BRCA1", "TPFK"], ) -col_data_nonames = pd.DataFrame( +coldata_nonames = pd.DataFrame( {}, index=["cell_1", "cell_2", "cell_3"], ) @@ -186,12 +186,12 @@ "counts": np.random.poisson(lam=5, size=(3, 3)), "lognorm": np.random.lognormal(size=(3, 3)), }, - row_data=row_data_nonames, - col_data=col_data_nonames, + row_data=rowdata_nonames, + col_data=coldata_nonames, metadata={}, ) -row_data_null_row_name = pd.DataFrame( +rowdata_null_row_name = pd.DataFrame( { "seqnames": ["chr_5", "chr_3", "chr_2"], "start": [500, 300, 200], @@ -201,12 +201,12 @@ ) se_null_row_name = SummarizedExperiment( assays={"counts": np.random.poisson(lam=5, size=(3, 3))}, - row_data=row_data_null_row_name, - col_data=col_data1, + row_data=rowdata_null_row_name, + col_data=coldata1, metadata={"seq_type": "paired"}, ) -row_data_duplicated_row_name = pd.DataFrame( +rowdata_duplicated_row_name = pd.DataFrame( { "seqnames": ["chr_5", "chr_3", "chr_2"], "start": [500, 300, 200], @@ -216,12 +216,12 @@ ) se_duplicated_row_name = SummarizedExperiment( assays={"counts": np.random.poisson(lam=5, size=(3, 3))}, - row_data=row_data_duplicated_row_name, - col_data=col_data1, + row_data=rowdata_duplicated_row_name, + col_data=coldata1, metadata={"seq_type": "paired"}, ) -col_data_duplicated_sample_name = pd.DataFrame( +coldata_duplicated_sample_name = pd.DataFrame( { "sample": ["SAM_1", "SAM_1", "SAM_3"], "disease": ["True", "True", "True"], @@ -230,12 +230,12 @@ ) se_duplicated_sample_name = SummarizedExperiment( assays={"counts": np.random.poisson(lam=5, size=(3, 3))}, - row_data=row_data1, - col_data=col_data_duplicated_sample_name, + row_data=rowdata1, + col_data=coldata_duplicated_sample_name, metadata={"seq_type": "paired"}, ) -row_data_biocframe_1 = BiocFrame( +rowdata_biocframe_1 = BiocFrame( { "seqnames": ["chr_5", "chr_3", "chr_2"], "start": [500, 300, 200], @@ -243,7 +243,7 @@ }, row_names=["HER2", "BRCA1", "TPFK"], ) -col_data_biocframe_1 = BiocFrame( +coldata_biocframe_1 = BiocFrame( { "sample": ["SAM_1", "SAM_2", "SAM_3"], "disease": ["True", "True", "True"], @@ -255,12 +255,12 @@ "counts": np.random.poisson(lam=5, size=(3, 3)), "lognorm": np.random.lognormal(size=(3, 3)), }, - row_data=row_data_biocframe_1, - col_data=col_data_biocframe_1, + row_data=rowdata_biocframe_1, + col_data=coldata_biocframe_1, metadata={"seq_platform": "Illumina NovaSeq 6000"}, ) -row_data_biocframe_2 = BiocFrame( +rowdata_biocframe_2 = BiocFrame( { "seqnames": ["chr_5", "chr_3", "chr_2"], "start": [500, 300, 200], @@ -268,7 +268,7 @@ }, row_names=["HER2", "BRCA1", "TPFK"], ) -col_data_biocframe_2 = BiocFrame( +coldata_biocframe_2 = BiocFrame( { "sample": ["SAM_4", "SAM_5", "SAM_6"], "disease": ["True", "False", "True"], @@ -281,7 +281,7 @@ "counts": np.random.poisson(lam=7, size=(3, 3)), "lognorm": np.random.lognormal(size=(3, 3)), }, - row_data=row_data_biocframe_2, - col_data=col_data_biocframe_2, + row_data=rowdata_biocframe_2, + col_data=coldata_biocframe_2, metadata={"seq_platform": "Illumina NovaSeq 6000"}, )