Skip to content

Commit

Permalink
Use relaxed_combine_rows when merging mcols (#99)
Browse files Browse the repository at this point in the history
* As noted in #98, using `relaxed_combine_rows` allows to perform operations on granges objects that may contain different metadata columns.  
* Set numpy version to < 2.0; since a few operations are incompatible with the new release
* Add tests
  • Loading branch information
jkanche authored Jun 19, 2024
1 parent 52171ea commit 03a980f
Show file tree
Hide file tree
Showing 5 changed files with 85 additions and 3 deletions.
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ install_requires =
biocframe>=0.5.11
iranges[optional]>=0.2.6
biocutils>=0.1.3
numpy
numpy<2.0.0

[options.packages.find]
where = src
Expand Down
2 changes: 1 addition & 1 deletion src/genomicranges/GenomicRanges.py
Original file line number Diff line number Diff line change
Expand Up @@ -2848,7 +2848,7 @@ def _combine_GenomicRanges(*x: GenomicRanges) -> GenomicRanges:
seqnames=ut.combine_sequences(*[y._seqnames for y in x]),
strand=ut.combine_sequences(*[y._strand for y in x]),
names=all_names,
mcols=ut.combine_rows(*[y._mcols for y in x]),
mcols=ut.relaxed_combine_rows(*[y._mcols for y in x]),
seqinfo=merge_SeqInfo([y._seqinfo for y in x]),
metadata=x[0]._metadata,
validate=False,
Expand Down
2 changes: 1 addition & 1 deletion src/genomicranges/GenomicRangesList.py
Original file line number Diff line number Diff line change
Expand Up @@ -873,7 +873,7 @@ def _combine_grl(*x: GenomicRangesList):
ranges=ut.combine_sequences(*[y._ranges for y in x]),
range_lengths=ut.combine_sequences(*[y._range_lengths for y in x]),
names=all_names,
mcols=ut.combine_rows(*[y._mcols for y in x]),
mcols=ut.relaxed_combine_rows(*[y._mcols for y in x]),
metadata=x[0]._metadata,
validate=False,
)
42 changes: 42 additions & 0 deletions tests/test_gr_methods_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import numpy as np
from genomicranges import GenomicRanges
from biocframe import BiocFrame
import biocutils as ut
from iranges import IRanges
from random import random
import genomicranges
Expand Down Expand Up @@ -102,3 +103,44 @@ def test_export():
"chr3",
]
assert df["strand"].tolist() == ["-", "+", "+", "*", "*", "+", "+", "+", "-", "-"]


def test_combine():
g_src = GenomicRanges(
seqnames=["chr1", "chr2", "chr1", "chr3", "chr2"],
ranges=IRanges(
start=[101, 102, 103, 104, 109], width=[112, 103, 128, 134, 111]
),
strand=["*", "-", "*", "+", "-"],
)

g_tgt = GenomicRanges(
seqnames=[
"chr1",
"chr2",
"chr2",
"chr2",
"chr1",
"chr1",
"chr3",
"chr3",
"chr3",
"chr3",
],
ranges=IRanges(start=range(101, 111), width=range(121, 131)),
strand=["*", "-", "-", "*", "*", "+", "+", "+", "-", "-"],
mcols=BiocFrame(
{
"score": range(0, 10),
"GC": [random() for _ in range(10)],
}
),
)
assert g_src is not None
assert g_tgt is not None

out: GenomicRanges = ut.combine_sequences(g_src, g_tgt)

assert out is not None
assert len(out) == 15
assert len(out.get_mcols().get_column_names()) == 2
40 changes: 40 additions & 0 deletions tests/test_gr_set_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,43 @@ def test_intersect():
assert (out.start == np.array([9])).all()
assert (out.width == np.array([2])).all()
assert (out.strand == np.array([-1])).all()


def test_intersect():
g_src = GenomicRanges(
seqnames=["chr1", "chr2", "chr1", "chr3", "chr2"],
ranges=IRanges(
start=[101, 102, 103, 104, 109], width=[112, 103, 128, 134, 111]
),
strand=["*", "-", "*", "+", "-"],
)

g_tgt = GenomicRanges(
seqnames=[
"chr1",
"chr2",
"chr2",
"chr2",
"chr1",
"chr1",
"chr3",
"chr3",
"chr3",
"chr3",
],
ranges=IRanges(start=range(101, 111), width=range(121, 131)),
strand=["*", "-", "-", "*", "*", "+", "+", "+", "-", "-"],
mcols=BiocFrame(
{
"score": range(0, 10),
"GC": [random() for _ in range(10)],
}
),
)
assert g_src is not None
assert g_tgt is not None

out = g_src.intersect(g_tgt)

assert out is not None
assert len(out) == 3

0 comments on commit 03a980f

Please sign in to comment.