From 792fdb7466103833def60fa410c073b2d739f061 Mon Sep 17 00:00:00 2001 From: js2264 Date: Wed, 23 Oct 2024 15:52:15 +0200 Subject: [PATCH] doc: minor fixes --- docs/source/cli/index.rst | 2 +- docs/source/tutorials/nn-training.md | 0 src/momics/chromnn.py | 7 +----- src/momics/cli/tree.py | 32 +++++++++++++++++----------- src/momics/dataset.py | 5 ++--- src/momics/momics.py | 31 +++++++++++++++++++-------- src/momics/momicsquery.py | 15 ++++++------- src/momics/streamer.py | 20 ++++++++--------- src/momics/utils.py | 8 +++++++ 9 files changed, 70 insertions(+), 50 deletions(-) delete mode 100644 docs/source/tutorials/nn-training.md diff --git a/docs/source/cli/index.rst b/docs/source/cli/index.rst index ed20746..a62489c 100644 --- a/docs/source/cli/index.rst +++ b/docs/source/cli/index.rst @@ -97,6 +97,6 @@ Contents ---- -.. click:: momics.cli:cli +.. click:: momics.cli.cli:cli :prog: momics :nested: full diff --git a/docs/source/tutorials/nn-training.md b/docs/source/tutorials/nn-training.md deleted file mode 100644 index e69de29..0000000 diff --git a/src/momics/chromnn.py b/src/momics/chromnn.py index 337e64e..f29a341 100644 --- a/src/momics/chromnn.py +++ b/src/momics/chromnn.py @@ -10,16 +10,11 @@ class ChromNN: """ This class implements a convolutional neural network for the prediction of - chromatin modality from another modality. The model implements a series of + chromatin modality from another modality. The model consists of a series of convolutional blocks with residual connections and dropout layers. - - Attributes - ---------- """ def __init__(self, input=DEFAULT_CHROMNN_INPUT_LAYER, output=DEFAULT_CHROMNN_OUTPUT_LAYER) -> None: - """ """ - kernel_init = tf.keras.initializers.VarianceScaling() # First convolutional block diff --git a/src/momics/cli/tree.py b/src/momics/cli/tree.py index f6faf1b..deef712 100644 --- a/src/momics/cli/tree.py +++ b/src/momics/cli/tree.py @@ -15,24 +15,32 @@ def tree(ctx, path): """List all the TileDB tables already ingested.""" - mom = m.Momics(path) - chrs = mom.chroms()["chrom"] - if path.endswith("/"): name = Path(os.path.dirname(path)).with_suffix("").name else: name = Path(path).with_suffix("").name - vfs = mom.cfg.vfs - chroms_uri = mom._build_uri("genome", "chroms") + ".tdb" - sequence_uri = mom._build_uri("genome", chrs[0]) + ".tdb" - tracks_uri = mom._build_uri("coverage", "tracks") + ".tdb" - features_uri = mom._build_uri("annotations", "features") + ".tdb" + mom = m.Momics(path) + chrs = mom.chroms()["chrom"] + + if chrs.empty: + has_chroms = False + has_seq = False + has_tracks = False + has_features = False + + else: + has_chroms = True + + vfs = mom.cfg.vfs + chroms_uri = mom._build_uri("genome", "chroms") + ".tdb" + sequence_uri = mom._build_uri("genome", chrs[0]) + ".tdb" + tracks_uri = mom._build_uri("coverage", "tracks") + ".tdb" + features_uri = mom._build_uri("annotations", "features") + ".tdb" - has_chroms = vfs.is_dir(chroms_uri) - has_seq = vfs.is_dir(sequence_uri) - has_tracks = vfs.is_dir(tracks_uri) - has_features = vfs.is_dir(features_uri) + has_seq = vfs.is_dir(sequence_uri) + has_tracks = vfs.is_dir(tracks_uri) + has_features = vfs.is_dir(features_uri) if has_chroms: print("\u2714 Chromosomes registered") diff --git a/src/momics/dataset.py b/src/momics/dataset.py index df9d438..d051204 100644 --- a/src/momics/dataset.py +++ b/src/momics/dataset.py @@ -18,9 +18,8 @@ class MomicsDataset(tf.data.Dataset): For a more basic generator to stream a `momics` by batches of ranges, see `momics.streamer.MomicsStreamer`. - See Also - -------- - `momics.streamer.MomicsStreamer` + See Also: + :class:`momics.streamer.MomicsStreamer` """ def __new__( diff --git a/src/momics/momics.py b/src/momics/momics.py index 3c60db2..b97de0e 100644 --- a/src/momics/momics.py +++ b/src/momics/momics.py @@ -59,10 +59,23 @@ class Momics: """ A class to manipulate `.momics` repositories. - Attributes - ---------- - path : str - Path to a `.momics` repository. + `.momics` repositories are a TileDB-backed storage system for genomics data. + They are structured as follows: + + - `./genome/chroms.tdb` - table for ingested chromosomes; + - `./coverage/tracks.tdb` - table for ingested bigwig tracks; + - `./annotations/features.tdb` - table for ingested feature sets. + + In each subdirectory, there is also one `.tdb` file per chromosome, which + stores the following data: + + - In `./genome/{X}.tdb`: the reference sequence of the chromosome; + - In `./coverage/{X}.tdb`: the coverage scores of the chromosome; + - In `./annotations/{X}.tdb`: the genomic features of the chromosome. + + Attributes: + path (str): Path to a `.momics` repository. + cfg (MomicsConfig): Configuration object. """ def __init__( @@ -85,7 +98,7 @@ def __init__( self.cfg = config ## Check if folder exists. If not, create it. - if not self.cfg.vfs.is_dir(self.path): + if not utils._repo_exists(self.path, self.cfg): self.cfg.vfs.create_dir(self.path) self._create_repository() logger.info(f"Created {self.path}") @@ -879,7 +892,7 @@ def remove_directory_until_success(vfs, dir_uri, max_retries=10, retry_delay=2): return True def export_track(self, track: str, output: Path) -> "Momics": - """Export a track from a `.momics` repository as a `.bw `file. + """Export a track from a `.momics` repository as a `.bw` file. Args: track (str): Which track to remove @@ -908,10 +921,10 @@ def export_track(self, track: str, output: Path) -> "Momics": return self def export_sequence(self, output: Path) -> "Momics": - """Export sequence from a `.momics` repository as a `.fa `file. + """Export sequence from a `.momics` repository as a `.fa` file. Args: - output (Path): Prefix of the output bigwig file + output (Path): Prefix of the output fasta file Returns: Momics: An updated Momics object @@ -933,7 +946,7 @@ def export_sequence(self, output: Path) -> "Momics": return self def export_features(self, features: str, output: Path) -> "Momics": - """Export a features set from a `.momics` repository as a `.bed `file. + """Export a features set from a `.momics` repository as a `.bed` file. Args: features (str): Which features to remove diff --git a/src/momics/momicsquery.py b/src/momics/momicsquery.py index cf8b1d5..56f6274 100644 --- a/src/momics/momicsquery.py +++ b/src/momics/momicsquery.py @@ -22,14 +22,13 @@ class MomicsQuery: """A class to query `.momics` repositories. - Attributes - ---------- - momics (Momics): a local `.momics` repositoryasdcasdc. - queries (dict): Dict. of pr.PyRanges object. - coverage (dict): Dictionary of coverage scores extracted from the \ - `.momics` repository, populated after calling `q.query_tracks()` - seq (dict): Dictionary of sequences extracted from the `.momics` \ - repository, populated after calling `q.query_seq()` + Attributes: + momics (Momics): a local `.momics` repository. + queries (pr.PyRanges): `pr.PyRanges` object + coverage (dict): Dictionary of coverage scores extracted from the \ + `.momics` repository, populated after calling `q.query_tracks()` + seq (dict): Dictionary of sequences extracted from the `.momics` \ + repository, populated after calling `q.query_seq()` """ def __init__(self, momics: Momics, bed: pr.PyRanges): diff --git a/src/momics/streamer.py b/src/momics/streamer.py index 8071608..c165c1c 100644 --- a/src/momics/streamer.py +++ b/src/momics/streamer.py @@ -17,17 +17,15 @@ class MomicsStreamer: For a tensorflow DataSet constructor, see `momics.dataset.MomicsDataset`. - See Also - -------- - `momics.dataset.MomicsDataset` - - Attributes - ---------- - momics (Momics): a local `.momics` repository. - ranges (dict): pr.PyRanges object. - batch_size (int): the batch size - features (list): list of track labels to query - silent (bool): whether to suppress info messages + See Also: + :class:`momics.dataset.MomicsDataset` + + Attributes: + momics (Momics): a local `.momics` repository. + ranges (dict): pr.PyRanges object. + batch_size (int): the batch size + features (list): list of track labels to query + silent (bool): whether to suppress info messages """ def __init__( diff --git a/src/momics/utils.py b/src/momics/utils.py index dadfa0e..c51609f 100644 --- a/src/momics/utils.py +++ b/src/momics/utils.py @@ -8,6 +8,14 @@ import pyfaidx +def _repo_exists(path, cfg) -> bool: + x = cfg.vfs.is_dir(path) + if x: + return True + else: + return False + + def _check_fasta_lengths(fasta, chroms) -> None: reference_lengths = dict(zip(chroms["chrom"], chroms["length"])) if isinstance(fasta, Path):