From e61f63e9bb006f25925c7e6040102c15bf1c0c35 Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Fri, 1 Nov 2024 11:47:18 -0700 Subject: [PATCH] Parse H5 backed delayed objects --- src/rds2py/generics.py | 2 ++ src/rds2py/read_delayed_matrix.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 src/rds2py/read_delayed_matrix.py diff --git a/src/rds2py/generics.py b/src/rds2py/generics.py index 565d1cd..cdd1e92 100644 --- a/src/rds2py/generics.py +++ b/src/rds2py/generics.py @@ -58,6 +58,8 @@ # multi assay experiment "MultiAssayExperiment": "rds2py.read_mae.read_multi_assay_experiment", "ExperimentList": "rds2py.read_dict.read_dict", + # delayed matrices + "H5SparseMatrix": "rds2py.read_delayed_matrix.read_hdf5_sparse", } diff --git a/src/rds2py/read_delayed_matrix.py b/src/rds2py/read_delayed_matrix.py new file mode 100644 index 0000000..c7a9ce4 --- /dev/null +++ b/src/rds2py/read_delayed_matrix.py @@ -0,0 +1,31 @@ +"""Functions and classes for parsing R delayed matrix objects from HDF5Array.""" + +from typing import Literal + +from hdf5array import Hdf5CompressedSparseMatrix + +from .generics import _dispatcher +from .rdsutils import get_class + +__author__ = "jkanche" +__copyright__ = "jkanche" +__license__ = "MIT" + + +def read_hdf5_sparse(robject: dict, **kwargs) -> Hdf5CompressedSparseMatrix: + _cls = get_class(robject) + + if _cls not in ["H5SparseMatrix"]: + raise RuntimeError(f"`robject` does not contain not a 'H5SparseMatrix' object, contains `{_cls}`.") + + by_column = False + # get seed package name + _seed_cls = get_class(robject["attributes"]["seed"]) + if _seed_cls in ["CSC_H5SparseMatrixSeed"]: + by_column = True + + shape = _dispatcher(robject["attributes"]["seed"]["dim"], **kwargs) + fpath = list(_dispatcher(robject["attributes"]["seed"]["filepath"], **kwargs))[0] + group_name = list(_dispatcher(robject["attributes"]["seed"]["group"], **kwargs))[0] + + return Hdf5CompressedSparseMatrix(path=fpath, group_name=group_name, shape=shape, by_column=by_column)