Skip to content

Commit

Permalink
Merge pull request #60 from GavinHuttley/develop
Browse files Browse the repository at this point in the history
Initial effort at support for exporting genomic alignments
  • Loading branch information
GavinHuttley authored Jan 8, 2024
2 parents 1c798b5 + f0378fa commit 6aa197b
Show file tree
Hide file tree
Showing 6 changed files with 260 additions and 85 deletions.
6 changes: 6 additions & 0 deletions src/ensembl_lite/_aligndb.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ class AlignRecordType(typing.TypedDict):
ReturnType = typing.Tuple[str, tuple] # the sql statement and corresponding values


# todo add a table and methods to support storing the species tree used
# for the alignment and for getting the species tree
class AlignDb(SqliteDbMixin):
# table schema for user provided annotations
table_name = "align"
Expand Down Expand Up @@ -126,6 +128,10 @@ def get_records_matching(
results[record["block_id"]].append(AlignRecordType(**record))
return results.values()

def get_species_names(self) -> typing.List[str]:
"""return the list of species names"""
return list(self.get_distinct("species"))


def get_alignment(
align_db: AlignDb,
Expand Down
22 changes: 22 additions & 0 deletions src/ensembl_lite/_config.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import configparser
import fnmatch
import os
import pathlib

Expand Down Expand Up @@ -140,6 +141,27 @@ def list_genomes(self):
"""returns list of installed genomes"""
return [p.name for p in self.genomes_path.glob("*") if p.name in Species]

def path_to_alignment(self, pattern: str) -> os.PathLike | None:
"""returns the full path to alignment matching the name
Parameters
----------
pattern
glob pattern for the Ensembl alignment name
"""
align_dirs = [
d for d in self.aligns_path.glob("*") if fnmatch.fnmatch(d.name, pattern)
]
if not align_dirs:
return None

if len(align_dirs) > 1:
raise ValueError(
f"{pattern!r} matches too many directories in {self.aligns_path}"
)

return align_dirs[0]


def write_installed_cfg(config: Config) -> os.PathLike:
"""writes an ini file under config.installed_path"""
Expand Down
10 changes: 5 additions & 5 deletions src/ensembl_lite/_genomedb.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def __init__(
) -> None:
self.species = species
self._seqs = seqs
self._annotdb = annots
self.annotations = annots

def get_seq(
self, *, seqid: str, start: int | None = None, end: int | None = None
Expand All @@ -170,9 +170,9 @@ def get_seq(
"""
seq = self._seqs.get_seq(seqid=seqid, start=start, end=end)
seq = make_seq(seq, name=seqid, moltype="dna")
if self._annotdb:
if self.annotations:
seq.annotation_offset = start or 0
seq.annotation_db = self._annotdb.subset(
seq.annotation_db = self.annotations.subset(
seqid=seq.name, start=start, end=end, allow_partial=True
)
return seq
Expand All @@ -192,7 +192,7 @@ def get_features(
seqids = [seqid]
else:
seqids = {
ft["seqid"] for ft in self._annotdb.get_features_matching(**kwargs)
ft["seqid"] for ft in self.annotations.get_features_matching(**kwargs)
}
for seqid in seqids:
try:
Expand All @@ -204,7 +204,7 @@ def get_features(

def close(self):
self._seqs.close()
self._annotdb.db.close()
self.annotations.db.close()


def load_genome(*, cfg: InstalledConfig, species: str):
Expand Down
Loading

0 comments on commit 6aa197b

Please sign in to comment.