Skip to content

Commit

Permalink
feat: improve filters for compression/read speed
Browse files Browse the repository at this point in the history
  • Loading branch information
js2264 committed Aug 5, 2024
1 parent eeba313 commit a0d9c19
Showing 1 changed file with 23 additions and 10 deletions.
33 changes: 23 additions & 10 deletions src/momics/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,9 @@ def tracks(self):

return tracks

def add_tracks(self, bws: dict):
def add_tracks(
self, bws: dict, max_bws: int = 9999, tile: int = 10000, compression: int = 3
):
"""
A method to ingest big wig coverage tracks to the `.momics` repository.
Expand All @@ -84,14 +86,17 @@ def add_tracks(self, bws: dict):
# If `path/coverage/tracks.tdb` (and `{chroms.tdb}`) do not exist, create it
tdb = os.path.join(self.path, "coverage", "tracks.tdb")
if self.tracks().empty:
# Create path/coverage/tracks.tdb
dom = tiledb.Domain(
tiledb.Dim(name="idx", domain=(0, 9999), dtype=np.int64),
tiledb.Dim(name="idx", domain=(0, max_bws), dtype=np.int64, tile=1),
)
attr1 = tiledb.Attr(name="label", dtype="ascii")
attr2 = tiledb.Attr(name="path", dtype="ascii")
schema = tiledb.ArraySchema(domain=dom, attrs=[attr1, attr2], sparse=False)
tiledb.Array.create(tdb, schema)
chroms = self.chroms()

# Create every path/coverage/{chrom}.tdb
for chrom in chroms["chr"]:
chrom_length = np.array(chroms[chroms["chr"] == chrom]["length"])[0]
tdb = os.path.join(self.path, "coverage", f"{chrom}.tdb")
Expand All @@ -100,26 +105,31 @@ def add_tracks(self, bws: dict):
name="position",
domain=(0, chrom_length - 1),
dtype=np.int64,
tile=tile,
),
tiledb.Dim(
name="idx",
domain=(0, 999),
dtype=np.int64,
),
tiledb.Dim(name="idx", domain=(0, max_bws), dtype=np.int64, tile=1),
)
attr = tiledb.Attr(
name="scores",
dtype=np.float32,
filters=tiledb.FilterList(
[tiledb.ZstdFilter(level=-3)], chunksize=10000
[
tiledb.LZ4Filter(),
tiledb.ZstdFilter(level=compression),
],
chunksize=1000,
),
)
schema = tiledb.ArraySchema(
domain=dom,
attrs=[attr],
sparse=True,
coords_filters=tiledb.FilterList(
[tiledb.ZstdFilter(level=-3)], chunksize=10000
[
tiledb.LZ4Filter(),
tiledb.ZstdFilter(level=compression),
],
chunksize=1000,
),
)
tiledb.Array.create(tdb, schema)
Expand Down Expand Up @@ -166,7 +176,10 @@ def add_chroms(self, chr_lengths: dict, genome_version: str = ""):
tdb = os.path.join(self.path, "genome", "chroms.tdb")
dom_genome = tiledb.Domain(
tiledb.Dim(
name="chrom_index", domain=(0, len(chr_lengths) - 1), dtype=np.int32
name="chrom_index",
domain=(0, len(chr_lengths) - 1),
dtype=np.int32,
tile=len(chr_lengths),
)
)
attr_chr = tiledb.Attr(name="chr", dtype="ascii", var=True)
Expand Down

0 comments on commit a0d9c19

Please sign in to comment.