Skip to content

Commit

Permalink
Parse H5 backed delayed objects (#50)
Browse files Browse the repository at this point in the history
* Update tests, documentation and README
  • Loading branch information
jkanche authored Nov 1, 2024
1 parent c8e0d7c commit 6b9f3b8
Show file tree
Hide file tree
Showing 8 changed files with 77 additions and 11 deletions.
10 changes: 2 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,15 +43,9 @@ Package is published to [PyPI](https://pypi.org/project/rds2py/)

```shell
pip install rds2py
```

## Quick Start

```python
from rds2py import read_rds

# Read any RDS file
r_obj = read_rds("path/to/file.rds")
# or install optional dependencies
pip install rds2py[optional]
```

## Usage
Expand Down
9 changes: 6 additions & 3 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,11 @@ install_requires =
importlib-metadata; python_version<"3.8"
numpy
scipy
biocframe
biocutils>=0.1.5
singlecellexperiment>=0.4.1
summarizedexperiment>=0.4.1
genomicranges>=0.4.9
biocframe
summarizedexperiment>=0.4.1
singlecellexperiment>=0.4.1
multiassayexperiment

[options.packages.find]
Expand All @@ -69,12 +69,15 @@ exclude =
# PDF = ReportLab; RXP
optional =
pandas
hdf5array

# Add here test requirements (semicolon/line-separated)
testing =
setuptools
pytest
pytest-cov
pandas
hdf5array

[options.entry_points]
# Add here console scripts like:
Expand Down
2 changes: 2 additions & 0 deletions src/rds2py/generics.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@
# multi assay experiment
"MultiAssayExperiment": "rds2py.read_mae.read_multi_assay_experiment",
"ExperimentList": "rds2py.read_dict.read_dict",
# delayed matrices
"H5SparseMatrix": "rds2py.read_delayed_matrix.read_hdf5_sparse",
}


Expand Down
41 changes: 41 additions & 0 deletions src/rds2py/read_delayed_matrix.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
"""Functions and classes for parsing R delayed matrix objects from HDF5Array."""

from hdf5array import Hdf5CompressedSparseMatrix

from .generics import _dispatcher
from .rdsutils import get_class

__author__ = "jkanche"
__copyright__ = "jkanche"
__license__ = "MIT"


def read_hdf5_sparse(robject: dict, **kwargs) -> Hdf5CompressedSparseMatrix:
"""Convert an R delayed sparse array (H5-backed).
Args:
robject:
Dictionary containing parsed delayed sparse array.
**kwargs:
Additional arguments.
Returns:
A Hdf5CompressedSparseMatrix from the 'hdf5array' package.
"""
_cls = get_class(robject)
if _cls not in ["H5SparseMatrix"]:
raise RuntimeError(f"`robject` does not contain not a 'H5SparseMatrix' object, contains `{_cls}`.")

by_column = False
# get seed package name
_seed_cls = get_class(robject["attributes"]["seed"])
if _seed_cls in ["CSC_H5SparseMatrixSeed"]:
by_column = True

_seed_obj = robject["attributes"]["seed"]
shape = tuple(_dispatcher(_seed_obj["attributes"]["dim"], **kwargs))
fpath = list(_dispatcher(_seed_obj["attributes"]["filepath"], **kwargs))[0]
group_name = list(_dispatcher(_seed_obj["attributes"]["group"], **kwargs))[0]

return Hdf5CompressedSparseMatrix(path=fpath, group_name=group_name, shape=shape, by_column=by_column)
Binary file added tests/data/example_anndata.h5ad
Binary file not shown.
10 changes: 10 additions & 0 deletions tests/data/generate_files.R
Original file line number Diff line number Diff line change
Expand Up @@ -177,3 +177,13 @@ simpleMultiAssay <- MultiAssayExperiment(experiments=doubleExp)
simpleMultiAssay2 <- MultiAssayExperiment(experiments=doubleExp,
colData=patient.data)
saveRDS(simpleMultiAssay2, "simple_mae.rds")

## Delayed Arrays

library(zellkonverter)
h5ad_file <- system.file("extdata", "example_anndata.h5ad",
package="zellkonverter")
h5ls(h5ad_file)

M <- H5SparseMatrix(h5ad_file, "/obsp/connectivities")
saveRDS(M, "h5sparse.rds")
Binary file added tests/data/h5sparse.rds
Binary file not shown.
16 changes: 16 additions & 0 deletions tests/test_delayedmatrices.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import pytest

from rds2py import read_rds
from hdf5array import Hdf5CompressedSparseMatrix

__author__ = "jkanche"
__copyright__ = "jkanche"
__license__ = "MIT"

@pytest.mark.skip(reason="delayedarray uses full file paths. this should be run locally.")
def test_read_h5sparse():
array = read_rds("tests/data/h5sparse.rds")

assert array is not None
assert isinstance(array, Hdf5CompressedSparseMatrix)
assert array.shape == (200, 200)

0 comments on commit 6b9f3b8

Please sign in to comment.