diff --git a/README.md b/README.md index 83533bd..ff6d27b 100644 --- a/README.md +++ b/README.md @@ -43,15 +43,9 @@ Package is published to [PyPI](https://pypi.org/project/rds2py/) ```shell pip install rds2py -``` - -## Quick Start - -```python -from rds2py import read_rds -# Read any RDS file -r_obj = read_rds("path/to/file.rds") +# or install optional dependencies +pip install rds2py[optional] ``` ## Usage diff --git a/setup.cfg b/setup.cfg index 09acc5f..ce2bd72 100644 --- a/setup.cfg +++ b/setup.cfg @@ -51,11 +51,11 @@ install_requires = importlib-metadata; python_version<"3.8" numpy scipy + biocframe biocutils>=0.1.5 - singlecellexperiment>=0.4.1 - summarizedexperiment>=0.4.1 genomicranges>=0.4.9 - biocframe + summarizedexperiment>=0.4.1 + singlecellexperiment>=0.4.1 multiassayexperiment [options.packages.find] @@ -69,12 +69,15 @@ exclude = # PDF = ReportLab; RXP optional = pandas + hdf5array # Add here test requirements (semicolon/line-separated) testing = setuptools pytest pytest-cov + pandas + hdf5array [options.entry_points] # Add here console scripts like: diff --git a/src/rds2py/generics.py b/src/rds2py/generics.py index 565d1cd..cdd1e92 100644 --- a/src/rds2py/generics.py +++ b/src/rds2py/generics.py @@ -58,6 +58,8 @@ # multi assay experiment "MultiAssayExperiment": "rds2py.read_mae.read_multi_assay_experiment", "ExperimentList": "rds2py.read_dict.read_dict", + # delayed matrices + "H5SparseMatrix": "rds2py.read_delayed_matrix.read_hdf5_sparse", } diff --git a/src/rds2py/read_delayed_matrix.py b/src/rds2py/read_delayed_matrix.py new file mode 100644 index 0000000..01316c5 --- /dev/null +++ b/src/rds2py/read_delayed_matrix.py @@ -0,0 +1,41 @@ +"""Functions and classes for parsing R delayed matrix objects from HDF5Array.""" + +from hdf5array import Hdf5CompressedSparseMatrix + +from .generics import _dispatcher +from .rdsutils import get_class + +__author__ = "jkanche" +__copyright__ = "jkanche" +__license__ = "MIT" + + +def read_hdf5_sparse(robject: dict, **kwargs) -> Hdf5CompressedSparseMatrix: + """Convert an R delayed sparse array (H5-backed). + + Args: + robject: + Dictionary containing parsed delayed sparse array. + + **kwargs: + Additional arguments. + + Returns: + A Hdf5CompressedSparseMatrix from the 'hdf5array' package. + """ + _cls = get_class(robject) + if _cls not in ["H5SparseMatrix"]: + raise RuntimeError(f"`robject` does not contain not a 'H5SparseMatrix' object, contains `{_cls}`.") + + by_column = False + # get seed package name + _seed_cls = get_class(robject["attributes"]["seed"]) + if _seed_cls in ["CSC_H5SparseMatrixSeed"]: + by_column = True + + _seed_obj = robject["attributes"]["seed"] + shape = tuple(_dispatcher(_seed_obj["attributes"]["dim"], **kwargs)) + fpath = list(_dispatcher(_seed_obj["attributes"]["filepath"], **kwargs))[0] + group_name = list(_dispatcher(_seed_obj["attributes"]["group"], **kwargs))[0] + + return Hdf5CompressedSparseMatrix(path=fpath, group_name=group_name, shape=shape, by_column=by_column) diff --git a/tests/data/example_anndata.h5ad b/tests/data/example_anndata.h5ad new file mode 100644 index 0000000..3cbd2d1 Binary files /dev/null and b/tests/data/example_anndata.h5ad differ diff --git a/tests/data/generate_files.R b/tests/data/generate_files.R index 31166ce..592e2c2 100644 --- a/tests/data/generate_files.R +++ b/tests/data/generate_files.R @@ -177,3 +177,13 @@ simpleMultiAssay <- MultiAssayExperiment(experiments=doubleExp) simpleMultiAssay2 <- MultiAssayExperiment(experiments=doubleExp, colData=patient.data) saveRDS(simpleMultiAssay2, "simple_mae.rds") + +## Delayed Arrays + +library(zellkonverter) +h5ad_file <- system.file("extdata", "example_anndata.h5ad", + package="zellkonverter") +h5ls(h5ad_file) + +M <- H5SparseMatrix(h5ad_file, "/obsp/connectivities") +saveRDS(M, "h5sparse.rds") diff --git a/tests/data/h5sparse.rds b/tests/data/h5sparse.rds new file mode 100644 index 0000000..1bfba42 Binary files /dev/null and b/tests/data/h5sparse.rds differ diff --git a/tests/test_delayedmatrices.py b/tests/test_delayedmatrices.py new file mode 100644 index 0000000..e6c2f9d --- /dev/null +++ b/tests/test_delayedmatrices.py @@ -0,0 +1,16 @@ +import pytest + +from rds2py import read_rds +from hdf5array import Hdf5CompressedSparseMatrix + +__author__ = "jkanche" +__copyright__ = "jkanche" +__license__ = "MIT" + +@pytest.mark.skip(reason="delayedarray uses full file paths. this should be run locally.") +def test_read_h5sparse(): + array = read_rds("tests/data/h5sparse.rds") + + assert array is not None + assert isinstance(array, Hdf5CompressedSparseMatrix) + assert array.shape == (200, 200)