Skip to content

Commit

Permalink
Use an appropriate NumPy dtype based on the number of unique sequences (
Browse files Browse the repository at this point in the history
#118)

Add tests to check the `dtype` assignment.
  • Loading branch information
jkanche authored Aug 14, 2024
1 parent 1ab20d9 commit 6c8d32e
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 3 deletions.
12 changes: 9 additions & 3 deletions src/genomicranges/GenomicRanges.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,9 +220,15 @@ def _sanitize_seqnames(self, seqnames, seqinfo):
self._build_reverse_seqindex(seqinfo)

if not isinstance(seqnames, np.ndarray):
seqnames = np.asarray(
[self._reverse_seqindex[x] for x in seqnames], dtype=np.int8
)
seqnames = np.asarray([self._reverse_seqindex[x] for x in seqnames])

num_uniq = len(np.unique(seqnames))
if num_uniq < 2**8:
seqnames = seqnames.astype(np.int8)
elif num_uniq < 2**16:
seqnames = seqnames.astype(np.int16)
elif num_uniq < 2**32:
seqnames = seqnames.astype(np.int32)

return seqnames

Expand Down
37 changes: 37 additions & 0 deletions tests/test_gr_init_seqnames.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import pytest
from genomicranges import GenomicRanges
from iranges import IRanges
from biocframe import BiocFrame
from random import random
import pandas as pd
import numpy as np

__author__ = "jkanche"
__copyright__ = "jkanche"
__license__ = "MIT"


def test_create_gr():
gr = GenomicRanges(
seqnames=["chr1"] * 10,
ranges=IRanges(start=range(100, 110), width=range(110, 120)),
)

assert gr is not None
assert gr._seqnames.dtype == np.int8

gr16 = GenomicRanges(
seqnames=[f"chr{i}" for i in range(500)],
ranges=IRanges(start=range(0, 500), width=range(10, 510)),
)

assert gr16 is not None
assert gr16._seqnames.dtype == np.int16

gr32 = GenomicRanges(
seqnames=[f"chr{i}" for i in range(2**16 + 1)],
ranges=IRanges(start=range(0, 2**16 + 1), width=range(10, 2**16 + 11)),
)

assert gr32 is not None
assert gr32._seqnames.dtype == np.int32

0 comments on commit 6c8d32e

Please sign in to comment.