Skip to content

Commit

Permalink
Merge pull request #412 from jodyphelan/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
jodyphelan authored Dec 2, 2024
2 parents bcbb7b0 + 5a9644a commit 245a797
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 11 deletions.
Binary file modified db/default_template.docx
Binary file not shown.
27 changes: 19 additions & 8 deletions tb-profiler
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#! /usr/bin/env python3
import sys
import pathogenprofiler as pp
from pathogenprofiler import TempFilePrefix
from pathogenprofiler import TempFilePrefix, TempFolder
import pathogenprofiler.variant_calling as vc
import argparse
from rich_argparse import ArgumentDefaultsRichHelpFormatter
Expand All @@ -17,6 +17,7 @@ from joblib import Parallel, delayed
from tqdm import tqdm
import logging
from rich.logging import RichHandler
from copy import deepcopy

import importlib
import pkgutil
Expand Down Expand Up @@ -81,18 +82,28 @@ contents of the error log (%s)
###############################################################################
""" % (outfile))

def vcf_job(args: argparse.Namespace,sample_name: str):
# logging.info(f"\nExtracting variants and running pipeline for {sample_name}\n")
copy_of_args = deepcopy(args)
copy_of_args.prefix = sample_name
with TempFolder() as tmpfolder:
cmd = f"bcftools view -s {sample_name} -ac 1 {args.vcf} | bcftools +fixploidy -Oz -o {tmpfolder}/{args.vcf} && bcftools index {tmpfolder}/{args.vcf} " % vars(copy_of_args)
pp.run_cmd(cmd)
os.chdir(tmpfolder)
copy_of_args.files_prefix = os.path.abspath(f"{os.getcwd()}/{copy_of_args.prefix}")
main_profile(copy_of_args)
os.chdir('../')


def multisample_vcf_run(args):
vcf_obj = pp.Vcf(args.vcf)
args.original_vcf = args.vcf
jobs = []

for sample_name in vcf_obj.samples:
logging.info(f"\nExtracting variants and running pipeline for {sample_name}\n")
args.prefix = sample_name
args.tmp_vcf = "%s.%s.vcf.gz" % (args.files_prefix,uuid4())
pp.run_cmd("bcftools view -s %(prefix)s -ac 1 %(original_vcf)s | bcftools +fixploidy -Oz -o %(tmp_vcf)s && bcftools index %(tmp_vcf)s " % vars(args))
args.vcf = args.tmp_vcf
main_profile(args)
jobs.append((args,sample_name))

parallel = Parallel(n_jobs=args.threads, return_as='generator')
[r for r in tqdm(parallel(delayed(vcf_job)(cmd[0],cmd[1]) for cmd in jobs),total=len(jobs),desc="Running jobs")]

def create_output_directories(args,directories=["bam","vcf","results"]):
if pp.nofolder(args.dir):
Expand Down
2 changes: 1 addition & 1 deletion tbprofiler/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from .docx import *
from abc import ABC, abstractmethod

__version__ = "6.4.1"
__version__ = "6.5.0"


class ProfilePlugin:
Expand Down
5 changes: 3 additions & 2 deletions tbprofiler/consensus.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ def get_consensus_vcf(sample: str,input_vcf: str,args: argparse.Namespace) -> st
tmp_aln = str(uuid4())
run_cmd(f"cat {args.conf['ref']} {consensus_file}> {tmp_aln}")
outfile = f"{args.files_prefix}.masked.vcf"
run_cmd(f"faToVcf -includeNoAltN {tmp_aln} {outfile}")
os.remove(tmp_aln)
run_cmd(f"fa2vcf.py {tmp_aln} {outfile}")
run_cmd(f'rm {tmp_aln} {tmp_aln}.fai')

return outfile

4 changes: 4 additions & 0 deletions tbprofiler/snp_dists.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import argparse
from .models import ProfileResult, LinkedSample
from typing import List, Tuple
from datetime import datetime

def extract_variant_set(vcf_file: str) -> Tuple[set,set]:
ref_diffs = set()
Expand Down Expand Up @@ -74,6 +75,7 @@ def store(self,result: ProfileResult, vcf_file: str) -> None:
self.missing = missing
def search(self,result: ProfileResult, vcf_file: str, cutoff: int = 20) -> List[LinkedSample]:
logging.info("Searching for close samples in %s" % self.filename)
start_timestamp = datetime.now()
self.c.execute("SELECT sample, diffs, missing FROM variants WHERE lineage=?",(result.sub_lineage,))
self.diffs,self.missing = extract_variant_set(vcf_file)
sample_dists = []
Expand All @@ -89,6 +91,8 @@ def search(self,result: ProfileResult, vcf_file: str, cutoff: int = 20) -> List[
positions = list(dist)
)
)
end_timestamp = datetime.now()
logging.info("Finished searching for close samples in %s. Took %s" % (self.filename,end_timestamp-start_timestamp))
logging.info("Found %s close samples" % len(sample_dists))
return sample_dists

Expand Down

0 comments on commit 245a797

Please sign in to comment.