Skip to content

Commit

Permalink
doc: minor fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
js2264 committed Oct 23, 2024
1 parent f558d0f commit 792fdb7
Show file tree
Hide file tree
Showing 9 changed files with 70 additions and 50 deletions.
2 changes: 1 addition & 1 deletion docs/source/cli/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,6 @@ Contents

----

.. click:: momics.cli:cli
.. click:: momics.cli.cli:cli
:prog: momics
:nested: full
Empty file.
7 changes: 1 addition & 6 deletions src/momics/chromnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,11 @@
class ChromNN:
"""
This class implements a convolutional neural network for the prediction of
chromatin modality from another modality. The model implements a series of
chromatin modality from another modality. The model consists of a series of
convolutional blocks with residual connections and dropout layers.
Attributes
----------
"""

def __init__(self, input=DEFAULT_CHROMNN_INPUT_LAYER, output=DEFAULT_CHROMNN_OUTPUT_LAYER) -> None:
""" """

kernel_init = tf.keras.initializers.VarianceScaling()

# First convolutional block
Expand Down
32 changes: 20 additions & 12 deletions src/momics/cli/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,24 +15,32 @@
def tree(ctx, path):
"""List all the TileDB tables already ingested."""

mom = m.Momics(path)
chrs = mom.chroms()["chrom"]

if path.endswith("/"):
name = Path(os.path.dirname(path)).with_suffix("").name
else:
name = Path(path).with_suffix("").name

vfs = mom.cfg.vfs
chroms_uri = mom._build_uri("genome", "chroms") + ".tdb"
sequence_uri = mom._build_uri("genome", chrs[0]) + ".tdb"
tracks_uri = mom._build_uri("coverage", "tracks") + ".tdb"
features_uri = mom._build_uri("annotations", "features") + ".tdb"
mom = m.Momics(path)
chrs = mom.chroms()["chrom"]

if chrs.empty:
has_chroms = False
has_seq = False
has_tracks = False
has_features = False

else:
has_chroms = True

vfs = mom.cfg.vfs
chroms_uri = mom._build_uri("genome", "chroms") + ".tdb"
sequence_uri = mom._build_uri("genome", chrs[0]) + ".tdb"
tracks_uri = mom._build_uri("coverage", "tracks") + ".tdb"
features_uri = mom._build_uri("annotations", "features") + ".tdb"

has_chroms = vfs.is_dir(chroms_uri)
has_seq = vfs.is_dir(sequence_uri)
has_tracks = vfs.is_dir(tracks_uri)
has_features = vfs.is_dir(features_uri)
has_seq = vfs.is_dir(sequence_uri)
has_tracks = vfs.is_dir(tracks_uri)
has_features = vfs.is_dir(features_uri)

if has_chroms:
print("\u2714 Chromosomes registered")
Expand Down
5 changes: 2 additions & 3 deletions src/momics/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,8 @@ class MomicsDataset(tf.data.Dataset):
For a more basic generator to stream a `momics` by batches of ranges,
see `momics.streamer.MomicsStreamer`.
See Also
--------
`momics.streamer.MomicsStreamer`
See Also:
:class:`momics.streamer.MomicsStreamer`
"""

def __new__(
Expand Down
31 changes: 22 additions & 9 deletions src/momics/momics.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,23 @@ class Momics:
"""
A class to manipulate `.momics` repositories.
Attributes
----------
path : str
Path to a `.momics` repository.
`.momics` repositories are a TileDB-backed storage system for genomics data.
They are structured as follows:
- `./genome/chroms.tdb` - table for ingested chromosomes;
- `./coverage/tracks.tdb` - table for ingested bigwig tracks;
- `./annotations/features.tdb` - table for ingested feature sets.
In each subdirectory, there is also one `.tdb` file per chromosome, which
stores the following data:
- In `./genome/{X}.tdb`: the reference sequence of the chromosome;
- In `./coverage/{X}.tdb`: the coverage scores of the chromosome;
- In `./annotations/{X}.tdb`: the genomic features of the chromosome.
Attributes:
path (str): Path to a `.momics` repository.
cfg (MomicsConfig): Configuration object.
"""

def __init__(
Expand All @@ -85,7 +98,7 @@ def __init__(
self.cfg = config

## Check if folder exists. If not, create it.
if not self.cfg.vfs.is_dir(self.path):
if not utils._repo_exists(self.path, self.cfg):
self.cfg.vfs.create_dir(self.path)
self._create_repository()
logger.info(f"Created {self.path}")
Expand Down Expand Up @@ -879,7 +892,7 @@ def remove_directory_until_success(vfs, dir_uri, max_retries=10, retry_delay=2):
return True

def export_track(self, track: str, output: Path) -> "Momics":
"""Export a track from a `.momics` repository as a `.bw `file.
"""Export a track from a `.momics` repository as a `.bw` file.
Args:
track (str): Which track to remove
Expand Down Expand Up @@ -908,10 +921,10 @@ def export_track(self, track: str, output: Path) -> "Momics":
return self

def export_sequence(self, output: Path) -> "Momics":
"""Export sequence from a `.momics` repository as a `.fa `file.
"""Export sequence from a `.momics` repository as a `.fa` file.
Args:
output (Path): Prefix of the output bigwig file
output (Path): Prefix of the output fasta file
Returns:
Momics: An updated Momics object
Expand All @@ -933,7 +946,7 @@ def export_sequence(self, output: Path) -> "Momics":
return self

def export_features(self, features: str, output: Path) -> "Momics":
"""Export a features set from a `.momics` repository as a `.bed `file.
"""Export a features set from a `.momics` repository as a `.bed` file.
Args:
features (str): Which features to remove
Expand Down
15 changes: 7 additions & 8 deletions src/momics/momicsquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,13 @@
class MomicsQuery:
"""A class to query `.momics` repositories.
Attributes
----------
momics (Momics): a local `.momics` repositoryasdcasdc.
queries (dict): Dict. of pr.PyRanges object.
coverage (dict): Dictionary of coverage scores extracted from the \
`.momics` repository, populated after calling `q.query_tracks()`
seq (dict): Dictionary of sequences extracted from the `.momics` \
repository, populated after calling `q.query_seq()`
Attributes:
momics (Momics): a local `.momics` repository.
queries (pr.PyRanges): `pr.PyRanges` object
coverage (dict): Dictionary of coverage scores extracted from the \
`.momics` repository, populated after calling `q.query_tracks()`
seq (dict): Dictionary of sequences extracted from the `.momics` \
repository, populated after calling `q.query_seq()`
"""

def __init__(self, momics: Momics, bed: pr.PyRanges):
Expand Down
20 changes: 9 additions & 11 deletions src/momics/streamer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,15 @@ class MomicsStreamer:
For a tensorflow DataSet constructor, see `momics.dataset.MomicsDataset`.
See Also
--------
`momics.dataset.MomicsDataset`
Attributes
----------
momics (Momics): a local `.momics` repository.
ranges (dict): pr.PyRanges object.
batch_size (int): the batch size
features (list): list of track labels to query
silent (bool): whether to suppress info messages
See Also:
:class:`momics.dataset.MomicsDataset`
Attributes:
momics (Momics): a local `.momics` repository.
ranges (dict): pr.PyRanges object.
batch_size (int): the batch size
features (list): list of track labels to query
silent (bool): whether to suppress info messages
"""

def __init__(
Expand Down
8 changes: 8 additions & 0 deletions src/momics/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,14 @@
import pyfaidx


def _repo_exists(path, cfg) -> bool:
x = cfg.vfs.is_dir(path)
if x:
return True
else:
return False


def _check_fasta_lengths(fasta, chroms) -> None:
reference_lengths = dict(zip(chroms["chrom"], chroms["length"]))
if isinstance(fasta, Path):
Expand Down

0 comments on commit 792fdb7

Please sign in to comment.