test basic methods

BiocPy · Nov 17, 2023 · 6b58478 · 6b58478
1 parent cb7ffa8
commit 6b58478
Show file tree

Hide file tree

Showing 3 changed files with 62 additions and 38 deletions.
diff --git a/src/genomicranges/GenomicRanges.py b/src/genomicranges/GenomicRanges.py
@@ -176,9 +176,7 @@ def __init__(
 
         if strand is None:
             strand = np.repeat(0, len(self._seqnames))
-        else:
-            strand = sanitize_strand_vector(strand)
-        self._strand = strand
+        self._strand = sanitize_strand_vector(strand)
 
         if names is not None and not isinstance(names, ut.Names):
             names = ut.Names(names)
@@ -344,9 +342,9 @@ def show_slot(data, colname):
             if self._strand is not None:
                 columns.append(show_slot(self._strand, "strand"))
 
-            if self._mcols is not None:
-                for col in self._mcols.colnames:
-                    columns.append(show_slot(self._mcols[col], col))
+            # if self._mcols is not None:
+            #     for col in self._mcols.colnames:
+            #         columns.append(show_slot(self._mcols[col], col))
 
             output += ut.print_wrapped_table(columns, floating_names=floating)
             added_table = True
@@ -866,7 +864,7 @@ def get_subset(self, subset: Union[str, int, bool, Sequence]) -> "GenomicRanges"
 
         current_class_const = type(self)
         return current_class_const(
-            seqnames=self._seqnames[idx],
+            seqnames=[self._seqinfo.seqnames[x] for x in self._seqnames[idx]],
             ranges=self._ranges[idx],
             strand=self._strand[idx],
             names=self._names[idx] if self._names is not None else None,
@@ -964,7 +962,7 @@ def to_pandas(self) -> "pandas.DataFrame":
 
         if self._mcols is not None:
             if self._mcols.shape[1] > 0:
-                _rdf = pd.concat([_rdf, self._mcols.to_pandas()])
+                _rdf = pd.concat([_rdf, self._mcols.to_pandas()], axis=1)
 
         return _rdf
 

diff --git a/src/genomicranges/utils.py b/src/genomicranges/utils.py
@@ -10,7 +10,9 @@
 STRAND_MAP = {"+": 1, "-": -1, "*": 0}
 
 
-def sanitize_strand_vector(strand: Union[Sequence[str], Sequence[int]]) -> np.ndarray:
+def sanitize_strand_vector(
+    strand: Union[Sequence[str], Sequence[int], np.ndarray]
+) -> np.ndarray:
     """Create a numpy representation for ``strand``.
 
     Mapping: 1 for "+" (forward strand), 0 for "*" (any strand) and -1 for "-" (reverse strand).
@@ -38,15 +40,14 @@ def sanitize_strand_vector(strand: Union[Sequence[str], Sequence[int]]) -> np.nd
             raise ValueError(
                 "'strand' must only contain values 1 (forward strand), -1 (reverse strand) or 0 (reverse strand)."
             )
-
         return strand
 
-    if isinstance(strand, ut.StringList):
-        if not set(strand).issubset(["+", "-", "+"]):
+    if ut.is_list_of_type(strand, str):
+        if not set(strand).issubset(["+", "-", "*"]):
             raise ValueError("Values in 'strand' must be either +, - or *.")
-        return np.ndarray([STRAND_MAP[x] for x in strand])
+        return np.array([STRAND_MAP[x] for x in strand])
     elif ut.is_list_of_type(strand, int):
-        return np.ndarray(strand)
+        return np.array(strand)
     else:
         TypeError(
             "'strand' must be either a numpy vector, a list of integers or strings representing strand."

diff --git a/tests/test_gr_methods_basic.py b/tests/test_gr_methods_basic.py
@@ -1,17 +1,51 @@
 import pytest
 import pandas as pd
 import numpy as np
-from genomicranges.GenomicRanges import GenomicRanges
+from genomicranges import GenomicRanges
+from biocframe import BiocFrame
+from iranges import IRanges
 from random import random
 import genomicranges
 
 __author__ = "jkanche"
 __copyright__ = "jkanche"
 __license__ = "MIT"
 
-df_gr = pd.DataFrame(
-    {
-        "seqnames": [
+gr = GenomicRanges(
+    seqnames=[
+        "chr1",
+        "chr2",
+        "chr2",
+        "chr2",
+        "chr1",
+        "chr1",
+        "chr3",
+        "chr3",
+        "chr3",
+        "chr3",
+    ],
+    ranges=IRanges(start=range(100, 110), width=range(110, 120)),
+    strand=["-", "+", "+", "*", "*", "+", "+", "+", "-", "-"],
+    mcols=BiocFrame(
+        {
+            "score": range(0, 10),
+            "GC": [random() for _ in range(10)],
+        }
+    ),
+)
+
+
+def test_granges():
+    assert gr is not None
+    assert len(gr) == 10
+    assert gr.mcols is not None
+    assert len(gr.mcols) == 10
+    assert gr.ranges is not None
+
+
+def test_slices():
+    gr = GenomicRanges(
+        seqnames=[
             "chr1",
             "chr2",
             "chr2",
@@ -23,30 +57,21 @@
             "chr3",
             "chr3",
         ],
-        "starts": range(100, 110),
-        "ends": range(110, 120),
-        "strand": ["-", "+", "+", "*", "*", "+", "+", "+", "-", "-"],
-        "score": range(0, 10),
-        "GC": [random() for _ in range(10)],
-    }
-)
-
-gr = genomicranges.from_pandas(df_gr)
-
-
-def test_granges():
-    assert gr is not None
-    assert len(gr) == df_gr.shape[0]
-    assert gr.mcols() is not None
-    assert len(gr.mcols().keys()) == df_gr.shape[1] - 4
-    assert gr.granges() is not None
-
-
-def test_slices():
+        ranges=IRanges(start=range(100, 110), width=range(110, 120)),
+        strand=["-", "+", "+", "*", "*", "+", "+", "+", "-", "-"],
+        mcols=BiocFrame(
+            {
+                "score": range(0, 10),
+                "GC": [random() for _ in range(10)],
+            }
+        ),
+    )
+
     subset_gr = gr[5:8]
 
     assert subset_gr is not None
     assert len(subset_gr) == 3
+    assert subset_gr.seqnames == ["chr1", "chr3", "chr3"]
 
 
 def test_export():