Skip to content

Commit

Permalink
getting started on combine ops
Browse files Browse the repository at this point in the history
  • Loading branch information
jkanche committed Dec 30, 2023
1 parent 20f96ea commit 4276829
Showing 1 changed file with 164 additions and 0 deletions.
164 changes: 164 additions & 0 deletions src/singlecellexperiment/SingleCellExperiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@
import biocframe
import biocutils as ut
from genomicranges import GenomicRanges
from summarizedexperiment._combineutils import (
check_assays_are_equal,
merge_assays,
merge_se_colnames,
merge_se_rownames,
relaxed_merge_assays,
)
from summarizedexperiment.RangedSummarizedExperiment import (
GRangesOrGRangesList,
RangedSummarizedExperiment,
Expand Down Expand Up @@ -1074,3 +1081,160 @@ def to_mudata(self):
expts[str(exptName)] = expt

return MuData(expts)


############################
######>> combine ops <<#####
############################


@ut.combine_rows.register(SingleCellExperiment)
def combine_rows(*x: SingleCellExperiment) -> SingleCellExperiment:
"""Combine multiple ``SingleCellExperiment`` objects by row.
All assays must contain the same assay names. If you need a
flexible combine operation, checkout :py:func:`~relaxed_combine_rows`.
Returns:
A combined ``SingleCellExperiment``.
"""
first = x[0]
_all_assays = [y.assays for y in x]
check_assays_are_equal(_all_assays)
_new_assays = merge_assays(_all_assays, by="row")

_all_rows = [y._rows for y in x]
_new_rows = ut.combine_rows(*_all_rows)
_new_row_names = merge_se_rownames(x)

_all_row_ranges = [y._row_ranges for y in x]
_new_row_ranges = ut.combine_sequences(*_all_row_ranges)

current_class_const = type(first)
return current_class_const(
assays=_new_assays,
row_ranges=_new_row_ranges,
row_data=_new_rows,
column_data=first._cols,
row_names=_new_row_names,
column_names=first._column_names,
metadata=first._metadata,
)


@ut.combine_columns.register(SingleCellExperiment)
def combine_columns(*x: SingleCellExperiment) -> SingleCellExperiment:
"""Combine multiple ``SingleCellExperiment`` objects by column.
All assays must contain the same assay names. If you need a
flexible combine operation, checkout :py:func:`~relaxed_combine_columns`.
Returns:
A combined ``SingleCellExperiment``.
"""
first = x[0]
_all_assays = [y.assays for y in x]
check_assays_are_equal(_all_assays)
_new_assays = merge_assays(_all_assays, by="column")

_all_cols = [y._cols for y in x]
_new_cols = ut.combine_rows(*_all_cols)
_new_col_names = merge_se_colnames(x)

current_class_const = type(first)
return current_class_const(
assays=_new_assays,
row_ranges=first._row_ranges,
row_data=first._rows,
column_data=_new_cols,
row_names=first._row_names,
column_names=_new_col_names,
metadata=first._metadata,
)


@ut.relaxed_combine_rows.register(SingleCellExperiment)
def relaxed_combine_rows(*x: SingleCellExperiment) -> SingleCellExperiment:
"""A relaxed version of the :py:func:`~biocutils.combine_rows.combine_rows` method for
:py:class:`~SingleCellExperiment` objects. Whereas ``combine_rows`` expects that all objects have the same
columns, ``relaxed_combine_rows`` allows for different columns. Absent columns in any object are filled in with
appropriate placeholder values before combining.
Args:
x:
One or more ``SingleCellExperiment`` objects, possibly with differences in the
number and identity of their columns.
Returns:
A ``SingleCellExperiment`` that combines all ``experiments`` along their rows and contains
the union of all columns. Columns absent in any ``x`` are filled in
with placeholders consisting of Nones or masked NumPy values.
"""
first = x[0]
_new_assays = relaxed_merge_assays(x, by="row")

_all_rows = [y._rows for y in x]
_new_rows = biocframe.relaxed_combine_rows(*_all_rows)
_new_row_names = merge_se_rownames(x)

_all_row_ranges = [y._row_ranges for y in x]
_new_row_ranges = ut.combine_sequences(*_all_row_ranges)

current_class_const = type(first)
return current_class_const(
assays=_new_assays,
row_ranges=_new_row_ranges,
row_data=_new_rows,
column_data=first._cols,
row_names=_new_row_names,
column_names=first._column_names,
metadata=first._metadata,
)


@ut.relaxed_combine_columns.register(SingleCellExperiment)
def relaxed_combine_columns(
*x: SingleCellExperiment,
) -> SingleCellExperiment:
"""A relaxed version of the :py:func:`~biocutils.combine_rows.combine_columns` method for
:py:class:`~SingleCellExperiment` objects. Whereas ``combine_columns`` expects that all objects have the same
rows, ``relaxed_combine_columns`` allows for different rows. Absent columns in any object are filled in with
appropriate placeholder values before combining.
Args:
x:
One or more ``SingleCellExperiment`` objects, possibly with differences in the
number and identity of their rows.
Returns:
A ``SingleCellExperiment`` that combines all ``experiments`` along their columns and contains
the union of all rows. Rows absent in any ``x`` are filled in
with placeholders consisting of Nones or masked NumPy values.
"""
first = x[0]
_new_assays = relaxed_merge_assays(x, by="column")

_all_cols = [y._cols for y in x]
_new_cols = biocframe.relaxed_combine_rows(*_all_cols)
_new_col_names = merge_se_colnames(x)

current_class_const = type(first)
return current_class_const(
assays=_new_assays,
row_ranges=first._row_ranges,
row_data=first._rows,
column_data=_new_cols,
row_names=first._row_names,
column_names=_new_col_names,
metadata=first._metadata,
)


@ut.extract_row_names.register(SingleCellExperiment)
def _rownames_rse(x: SingleCellExperiment):
return x.get_row_names()


@ut.extract_column_names.register(SingleCellExperiment)
def _colnames_rse(x: SingleCellExperiment):
return x.get_column_names()

0 comments on commit 4276829

Please sign in to comment.