EOD: parse dicts, fix for dframes with nrows, and trying to parse SCE

BiocPy · Oct 23, 2024 · 6ab9a69 · 6ab9a69
1 parent dfc8d65
commit 6ab9a69
Show file tree

Hide file tree

Showing 11 changed files with 76 additions and 5 deletions.
diff --git a/src/rds2py/__init__.py b/src/rds2py/__init__.py
@@ -15,13 +15,13 @@
 finally:
     del version, PackageNotFoundError
 
-# from .core import *
-
 from .generics import read_rds
 from .read_atomic import parse_boolean_vector, parse_double_vector, parse_integer_vector, parse_string_vector
 from .read_matrix import parse_dgcmatrix, parse_dgrmatrix, parse_dgtmatrix, parse_ndarray
 from .read_frame import parse_data_frame, parse_dframe
 from .read_factor import parse_factor
+from .read_dict import parse_vector
 from .read_granges import parse_genomic_ranges, parse_granges_list
 from .read_rle import parse_rle
 from .read_se import parse_summarized_experiment, parse_ranged_summarized_experiment
+from .read_sce import parse_single_cell_experiment
diff --git a/src/rds2py/generics.py b/src/rds2py/generics.py
@@ -71,7 +71,7 @@ def read_rds(path: str, **kwargs):
         Some kind of object.
     """
     _robj = parse_rds(path=path)
-    print("FULL OBJECT", _robj)
+    # print("FULL OBJECT", _robj)
     return _dispatcher(_robj, **kwargs)
 
 

diff --git a/src/rds2py/read_dict.py b/src/rds2py/read_dict.py
@@ -0,0 +1,24 @@
+from .generics import _dispatcher
+from .rdsutils import get_class
+
+__author__ = "jkanche"
+__copyright__ = "jkanche"
+__license__ = "MIT"
+
+
+def parse_vector(robject: dict):
+    print("in parse vector")
+    _cls = get_class(robject)
+
+    if _cls not in ["vector"]:
+        raise RuntimeError(f"`robject` does not contain not a vector/dictionary object, contains `{_cls}`.")
+
+    dict_keys = list(_dispatcher(robject["attributes"]["names"]))
+
+    print(dict_keys)
+    print("final_vec")
+    final_vec = {}
+    for idx, dkey in enumerate(dict_keys):
+        final_vec[dkey] = _dispatcher(robject["data"][idx])
+
+    return final_vec
diff --git a/src/rds2py/read_frame.py b/src/rds2py/read_frame.py
@@ -67,10 +67,15 @@ def parse_dframe(robject: dict):
     if robject["attributes"]["rownames"]["data"]:
         index = _dispatcher(robject["attributes"]["rownames"])
 
+    nrows = None
+    if robject["attributes"]["nrows"]["data"]:
+        nrows = list(_dispatcher(robject["attributes"]["nrows"]))[0]
+
     df = BiocFrame(
         data,
         # column_names=col_names,
         row_names=index,
+        number_of_rows=nrows,
     )
 
     return df
diff --git a/src/rds2py/read_sce.py b/src/rds2py/read_sce.py
@@ -32,14 +32,14 @@ def parse_single_cell_experiment(robject: dict):
     # check red. dims, alternative expts
     robj_reduced_dims = None
     robj_altExps = None
-    col_attrs = _dispatcher(robject["attributes"]["int_colData"]["attributes"]["listData"]["attributes"]["names"])
+    col_attrs = list(_dispatcher(robject["attributes"]["int_colData"]["attributes"]["listData"]["attributes"]["names"]))
 
     for idx in range(len(col_attrs)):
         idx_col = col_attrs[idx]
         idx_value = robject["attributes"]["int_colData"]["attributes"]["listData"]["data"][idx]
 
         if idx_col == "reducedDims" and idx_value["data"] is not None:
-            robj_reduced_dims = _dispatcher(robject["attributes"]["int_colData"]["attributes"]["listData"]["data"])
+            robj_reduced_dims = _dispatcher(idx_value)
 
         if idx_col == "altExps":
             alt_names = idx_value["attributes"]["listData"]["attributes"]["names"]["data"]

diff --git a/tests/data/generate_files.R b/tests/data/generate_files.R
@@ -142,3 +142,10 @@ rse <- SummarizedExperiment(assays=list(counts=counts),
                             rowRanges = rowRanges, colData=colData)
 saveRDS(se, "sumexpt.rds")
 saveRDS(rse, "ranged_se.rds")
+
+# SingleCell Experiment
+
+library(scRNAseq)
+sce <- ReprocessedAllenData("tophat_counts")
+sce_subset <- sce[1:100, 1:100]
+saveRDS(sce_subset, "simple_sce.rds")
diff --git a/tests/data/simple_list.rds b/tests/data/simple_list.rds
diff --git a/tests/data/simple_sce.rds b/tests/data/simple_sce.rds
diff --git a/tests/test_dict.py b/tests/test_dict.py
@@ -0,0 +1,17 @@
+import pytest
+
+from rds2py import read_rds
+
+__author__ = "jkanche"
+__copyright__ = "jkanche"
+__license__ = "MIT"
+
+
+def test_read_simple_lists():
+    obj = read_rds("tests/data/simple_list.rds")
+
+    assert obj is not None
+    assert len(obj) > 0
+
+    assert "collab" in obj
+    assert len(obj["collab"]) > 0
diff --git a/tests/test_sce.py b/tests/test_sce.py
@@ -0,0 +1,18 @@
+import pytest
+
+from rds2py import read_rds
+
+from singlecellexperiment import SingleCellExperiment
+
+__author__ = "jkanche"
+__copyright__ = "jkanche"
+__license__ = "MIT"
+
+
+def test_read_sce():
+    data = read_rds("tests/data/simple_sce.rds")
+
+    assert data is not None
+    assert isinstance(data, SingleCellExperiment)
+    assert data.shape == (100, 100)
+
diff --git a/tests/test_sumexpt.py → tests/test_se.py b/tests/test_sumexpt.py → tests/test_se.py