Skip to content

Commit

Permalink
feat: add stats to CLI
Browse files Browse the repository at this point in the history
  • Loading branch information
js2264 committed Feb 16, 2024
1 parent f32e9fe commit b6ebedb
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 9 deletions.
26 changes: 23 additions & 3 deletions hicstuff/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,10 @@
-view (map visualization)
-pipeline (whole contact map generation)
-distancelaw (Analysis tool and plot for the distance law)
-stats (Extract stats from log)
Running 'pipeline' implies running 'digest', but not
iteralign or filter unless specified, because they can
'iteralign' or 'filter' unless specified, because they can
take up a lot of time for dimnishing returns.
Note
Expand All @@ -38,7 +39,7 @@
import re
import sys, os, shutil
import tempfile
from os.path import join, dirname
from os.path import join, dirname, basename
from matplotlib import pyplot as plt
from matplotlib import cm
from docopt import docopt
Expand All @@ -54,6 +55,7 @@
import hicstuff.digest as hcd
import hicstuff.iteralign as hci
import hicstuff.filter as hcf
import hicstuff.stats as hcs
from hicstuff.version import __version__
import hicstuff.io as hio
from hicstuff.log import logger
Expand Down Expand Up @@ -1656,6 +1658,25 @@ def execute(self):
)
logger.info("Output image saved at %s.", out)

class Stats(AbstractCommand):
"""Extract stats from a hicstuff log file.
usage:
stats <log>
arguments:
log Path to a hicstuff log file.
"""

def execute(self):
log_file = self.args["<log>"]
hcs.get_pipeline_stats(log_file)
prefix = re.sub(".hicstuff.*", "", basename(log_file))
out_dir = dirname(log_file)
stats_file_path = join(out_dir, prefix + ".stats.txt")
with open(stats_file_path, 'r') as file:
lines = [line for line in file]
print(''.join(lines))

def parse_bin_str(bin_str):
"""Bin string parsing
Expand Down Expand Up @@ -1696,7 +1717,6 @@ def parse_bin_str(bin_str):

return binning


def parse_ucsc(ucsc_str, bins):
"""
Take a UCSC region in UCSC notation and a list of bin chromosomes and
Expand Down
1 change: 1 addition & 0 deletions hicstuff/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
iteralign Iteratively aligns reads to a reference genome.
missview Preview missing Hi-C bins in based on the genome and read length.
pipeline Hi-C pipeline to generate contact matrix from fastq files.
stats Extract mapping statistics from a hicstuff pipeline log file.
rebin Bin the matrix and regenerate files accordingly.
subsample Bootstrap subsampling of contacts from a Hi-C map.
view Visualize a Hi-C matrix.
Expand Down
2 changes: 1 addition & 1 deletion hicstuff/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -1140,7 +1140,7 @@ def _out_file(fname):
# Get stats on the pipeline
try:
logger.info("Fetching mapping and pairing stats")
stats = hcs.get_pipeline_stats(prefix, out_dir, log_file)
stats = hcs.get_pipeline_stats(log_file)
logger.info(stats)
except IndexError:
logger.warning("IndexError. Stats not compiled.")
Expand Down
10 changes: 5 additions & 5 deletions hicstuff/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,15 @@

import re
from os.path import join
from os.path import dirname
from os.path import basename
import json

def get_pipeline_stats(prefix, out_dir, log_file):
def get_pipeline_stats(log_file):
"""Get stats after pipeline execution.
Parameters
----------
prefix : str
The prefix used to create output files by the pipeline.
out_dir : str
The prefix used to create output files by the pipeline.
log_file : str
Path to hicstuff log file.
Expand All @@ -36,6 +34,8 @@ def get_pipeline_stats(prefix, out_dir, log_file):
- Trans ratio
"""

prefix = re.sub(".hicstuff.*", "", basename(log_file))
out_dir = dirname(log_file)
with open(log_file) as file:
log_lines = [line.rstrip() for line in file]

Expand Down

0 comments on commit b6ebedb

Please sign in to comment.