Skip to content

Commit

Permalink
EOD: parse dicts, fix for dframes with nrows, and trying to parse SCE
Browse files Browse the repository at this point in the history
  • Loading branch information
jkanche committed Oct 23, 2024
1 parent dfc8d65 commit 6ab9a69
Show file tree
Hide file tree
Showing 11 changed files with 76 additions and 5 deletions.
4 changes: 2 additions & 2 deletions src/rds2py/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@
finally:
del version, PackageNotFoundError

# from .core import *

from .generics import read_rds
from .read_atomic import parse_boolean_vector, parse_double_vector, parse_integer_vector, parse_string_vector
from .read_matrix import parse_dgcmatrix, parse_dgrmatrix, parse_dgtmatrix, parse_ndarray
from .read_frame import parse_data_frame, parse_dframe
from .read_factor import parse_factor
from .read_dict import parse_vector
from .read_granges import parse_genomic_ranges, parse_granges_list
from .read_rle import parse_rle
from .read_se import parse_summarized_experiment, parse_ranged_summarized_experiment
from .read_sce import parse_single_cell_experiment
2 changes: 1 addition & 1 deletion src/rds2py/generics.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def read_rds(path: str, **kwargs):
Some kind of object.
"""
_robj = parse_rds(path=path)
print("FULL OBJECT", _robj)
# print("FULL OBJECT", _robj)
return _dispatcher(_robj, **kwargs)


Expand Down
24 changes: 24 additions & 0 deletions src/rds2py/read_dict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from .generics import _dispatcher
from .rdsutils import get_class

__author__ = "jkanche"
__copyright__ = "jkanche"
__license__ = "MIT"


def parse_vector(robject: dict):
print("in parse vector")
_cls = get_class(robject)

if _cls not in ["vector"]:
raise RuntimeError(f"`robject` does not contain not a vector/dictionary object, contains `{_cls}`.")

dict_keys = list(_dispatcher(robject["attributes"]["names"]))

print(dict_keys)
print("final_vec")
final_vec = {}
for idx, dkey in enumerate(dict_keys):
final_vec[dkey] = _dispatcher(robject["data"][idx])

return final_vec
5 changes: 5 additions & 0 deletions src/rds2py/read_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,15 @@ def parse_dframe(robject: dict):
if robject["attributes"]["rownames"]["data"]:
index = _dispatcher(robject["attributes"]["rownames"])

nrows = None
if robject["attributes"]["nrows"]["data"]:
nrows = list(_dispatcher(robject["attributes"]["nrows"]))[0]

df = BiocFrame(
data,
# column_names=col_names,
row_names=index,
number_of_rows=nrows,
)

return df
4 changes: 2 additions & 2 deletions src/rds2py/read_sce.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,14 @@ def parse_single_cell_experiment(robject: dict):
# check red. dims, alternative expts
robj_reduced_dims = None
robj_altExps = None
col_attrs = _dispatcher(robject["attributes"]["int_colData"]["attributes"]["listData"]["attributes"]["names"])
col_attrs = list(_dispatcher(robject["attributes"]["int_colData"]["attributes"]["listData"]["attributes"]["names"]))

for idx in range(len(col_attrs)):
idx_col = col_attrs[idx]
idx_value = robject["attributes"]["int_colData"]["attributes"]["listData"]["data"][idx]

if idx_col == "reducedDims" and idx_value["data"] is not None:
robj_reduced_dims = _dispatcher(robject["attributes"]["int_colData"]["attributes"]["listData"]["data"])
robj_reduced_dims = _dispatcher(idx_value)

if idx_col == "altExps":
alt_names = idx_value["attributes"]["listData"]["attributes"]["names"]["data"]
Expand Down
7 changes: 7 additions & 0 deletions tests/data/generate_files.R
Original file line number Diff line number Diff line change
Expand Up @@ -142,3 +142,10 @@ rse <- SummarizedExperiment(assays=list(counts=counts),
rowRanges = rowRanges, colData=colData)
saveRDS(se, "sumexpt.rds")
saveRDS(rse, "ranged_se.rds")

# SingleCell Experiment

library(scRNAseq)
sce <- ReprocessedAllenData("tophat_counts")
sce_subset <- sce[1:100, 1:100]
saveRDS(sce_subset, "simple_sce.rds")
Binary file added tests/data/simple_list.rds
Binary file not shown.
Binary file added tests/data/simple_sce.rds
Binary file not shown.
17 changes: 17 additions & 0 deletions tests/test_dict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import pytest

from rds2py import read_rds

__author__ = "jkanche"
__copyright__ = "jkanche"
__license__ = "MIT"


def test_read_simple_lists():
obj = read_rds("tests/data/simple_list.rds")

assert obj is not None
assert len(obj) > 0

assert "collab" in obj
assert len(obj["collab"]) > 0
18 changes: 18 additions & 0 deletions tests/test_sce.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import pytest

from rds2py import read_rds

from singlecellexperiment import SingleCellExperiment

__author__ = "jkanche"
__copyright__ = "jkanche"
__license__ = "MIT"


def test_read_sce():
data = read_rds("tests/data/simple_sce.rds")

assert data is not None
assert isinstance(data, SingleCellExperiment)
assert data.shape == (100, 100)

File renamed without changes.

0 comments on commit 6ab9a69

Please sign in to comment.