Skip to content

Commit

Permalink
test basic methods
Browse files Browse the repository at this point in the history
  • Loading branch information
jkanche committed Nov 17, 2023
1 parent cb7ffa8 commit 6b58478
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 38 deletions.
14 changes: 6 additions & 8 deletions src/genomicranges/GenomicRanges.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,9 +176,7 @@ def __init__(

if strand is None:
strand = np.repeat(0, len(self._seqnames))
else:
strand = sanitize_strand_vector(strand)
self._strand = strand
self._strand = sanitize_strand_vector(strand)

if names is not None and not isinstance(names, ut.Names):
names = ut.Names(names)
Expand Down Expand Up @@ -344,9 +342,9 @@ def show_slot(data, colname):
if self._strand is not None:
columns.append(show_slot(self._strand, "strand"))

if self._mcols is not None:
for col in self._mcols.colnames:
columns.append(show_slot(self._mcols[col], col))
# if self._mcols is not None:
# for col in self._mcols.colnames:
# columns.append(show_slot(self._mcols[col], col))

output += ut.print_wrapped_table(columns, floating_names=floating)
added_table = True
Expand Down Expand Up @@ -866,7 +864,7 @@ def get_subset(self, subset: Union[str, int, bool, Sequence]) -> "GenomicRanges"

current_class_const = type(self)
return current_class_const(
seqnames=self._seqnames[idx],
seqnames=[self._seqinfo.seqnames[x] for x in self._seqnames[idx]],
ranges=self._ranges[idx],
strand=self._strand[idx],
names=self._names[idx] if self._names is not None else None,
Expand Down Expand Up @@ -964,7 +962,7 @@ def to_pandas(self) -> "pandas.DataFrame":

if self._mcols is not None:
if self._mcols.shape[1] > 0:
_rdf = pd.concat([_rdf, self._mcols.to_pandas()])
_rdf = pd.concat([_rdf, self._mcols.to_pandas()], axis=1)

return _rdf

Expand Down
13 changes: 7 additions & 6 deletions src/genomicranges/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@
STRAND_MAP = {"+": 1, "-": -1, "*": 0}


def sanitize_strand_vector(strand: Union[Sequence[str], Sequence[int]]) -> np.ndarray:
def sanitize_strand_vector(
strand: Union[Sequence[str], Sequence[int], np.ndarray]
) -> np.ndarray:
"""Create a numpy representation for ``strand``.
Mapping: 1 for "+" (forward strand), 0 for "*" (any strand) and -1 for "-" (reverse strand).
Expand Down Expand Up @@ -38,15 +40,14 @@ def sanitize_strand_vector(strand: Union[Sequence[str], Sequence[int]]) -> np.nd
raise ValueError(
"'strand' must only contain values 1 (forward strand), -1 (reverse strand) or 0 (reverse strand)."
)

return strand

if isinstance(strand, ut.StringList):
if not set(strand).issubset(["+", "-", "+"]):
if ut.is_list_of_type(strand, str):
if not set(strand).issubset(["+", "-", "*"]):
raise ValueError("Values in 'strand' must be either +, - or *.")
return np.ndarray([STRAND_MAP[x] for x in strand])
return np.array([STRAND_MAP[x] for x in strand])
elif ut.is_list_of_type(strand, int):
return np.ndarray(strand)
return np.array(strand)
else:
TypeError(
"'strand' must be either a numpy vector, a list of integers or strings representing strand."
Expand Down
73 changes: 49 additions & 24 deletions tests/test_gr_methods_basic.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,51 @@
import pytest
import pandas as pd
import numpy as np
from genomicranges.GenomicRanges import GenomicRanges
from genomicranges import GenomicRanges
from biocframe import BiocFrame
from iranges import IRanges
from random import random
import genomicranges

__author__ = "jkanche"
__copyright__ = "jkanche"
__license__ = "MIT"

df_gr = pd.DataFrame(
{
"seqnames": [
gr = GenomicRanges(
seqnames=[
"chr1",
"chr2",
"chr2",
"chr2",
"chr1",
"chr1",
"chr3",
"chr3",
"chr3",
"chr3",
],
ranges=IRanges(start=range(100, 110), width=range(110, 120)),
strand=["-", "+", "+", "*", "*", "+", "+", "+", "-", "-"],
mcols=BiocFrame(
{
"score": range(0, 10),
"GC": [random() for _ in range(10)],
}
),
)


def test_granges():
assert gr is not None
assert len(gr) == 10
assert gr.mcols is not None
assert len(gr.mcols) == 10
assert gr.ranges is not None


def test_slices():
gr = GenomicRanges(
seqnames=[
"chr1",
"chr2",
"chr2",
Expand All @@ -23,30 +57,21 @@
"chr3",
"chr3",
],
"starts": range(100, 110),
"ends": range(110, 120),
"strand": ["-", "+", "+", "*", "*", "+", "+", "+", "-", "-"],
"score": range(0, 10),
"GC": [random() for _ in range(10)],
}
)

gr = genomicranges.from_pandas(df_gr)


def test_granges():
assert gr is not None
assert len(gr) == df_gr.shape[0]
assert gr.mcols() is not None
assert len(gr.mcols().keys()) == df_gr.shape[1] - 4
assert gr.granges() is not None


def test_slices():
ranges=IRanges(start=range(100, 110), width=range(110, 120)),
strand=["-", "+", "+", "*", "*", "+", "+", "+", "-", "-"],
mcols=BiocFrame(
{
"score": range(0, 10),
"GC": [random() for _ in range(10)],
}
),
)

subset_gr = gr[5:8]

assert subset_gr is not None
assert len(subset_gr) == 3
assert subset_gr.seqnames == ["chr1", "chr3", "chr3"]


def test_export():
Expand Down

0 comments on commit 6b58478

Please sign in to comment.