Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dev #412

Merged
merged 7 commits into from
Dec 2, 2024
Merged

Dev #412

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified db/default_template.docx
Binary file not shown.
27 changes: 19 additions & 8 deletions tb-profiler
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#! /usr/bin/env python3
import sys
import pathogenprofiler as pp
from pathogenprofiler import TempFilePrefix
from pathogenprofiler import TempFilePrefix, TempFolder
import pathogenprofiler.variant_calling as vc
import argparse
from rich_argparse import ArgumentDefaultsRichHelpFormatter
Expand All @@ -17,6 +17,7 @@ from joblib import Parallel, delayed
from tqdm import tqdm
import logging
from rich.logging import RichHandler
from copy import deepcopy

import importlib
import pkgutil
Expand Down Expand Up @@ -81,18 +82,28 @@ contents of the error log (%s)
###############################################################################
""" % (outfile))

def vcf_job(args: argparse.Namespace,sample_name: str):
# logging.info(f"\nExtracting variants and running pipeline for {sample_name}\n")
copy_of_args = deepcopy(args)
copy_of_args.prefix = sample_name
with TempFolder() as tmpfolder:
cmd = f"bcftools view -s {sample_name} -ac 1 {args.vcf} | bcftools +fixploidy -Oz -o {tmpfolder}/{args.vcf} && bcftools index {tmpfolder}/{args.vcf} " % vars(copy_of_args)
pp.run_cmd(cmd)
os.chdir(tmpfolder)
copy_of_args.files_prefix = os.path.abspath(f"{os.getcwd()}/{copy_of_args.prefix}")
main_profile(copy_of_args)
os.chdir('../')


def multisample_vcf_run(args):
vcf_obj = pp.Vcf(args.vcf)
args.original_vcf = args.vcf
jobs = []

for sample_name in vcf_obj.samples:
logging.info(f"\nExtracting variants and running pipeline for {sample_name}\n")
args.prefix = sample_name
args.tmp_vcf = "%s.%s.vcf.gz" % (args.files_prefix,uuid4())
pp.run_cmd("bcftools view -s %(prefix)s -ac 1 %(original_vcf)s | bcftools +fixploidy -Oz -o %(tmp_vcf)s && bcftools index %(tmp_vcf)s " % vars(args))
args.vcf = args.tmp_vcf
main_profile(args)
jobs.append((args,sample_name))

parallel = Parallel(n_jobs=args.threads, return_as='generator')
[r for r in tqdm(parallel(delayed(vcf_job)(cmd[0],cmd[1]) for cmd in jobs),total=len(jobs),desc="Running jobs")]

def create_output_directories(args,directories=["bam","vcf","results"]):
if pp.nofolder(args.dir):
Expand Down
2 changes: 1 addition & 1 deletion tbprofiler/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from .docx import *
from abc import ABC, abstractmethod

__version__ = "6.4.1"
__version__ = "6.5.0"


class ProfilePlugin:
Expand Down
5 changes: 3 additions & 2 deletions tbprofiler/consensus.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ def get_consensus_vcf(sample: str,input_vcf: str,args: argparse.Namespace) -> st
tmp_aln = str(uuid4())
run_cmd(f"cat {args.conf['ref']} {consensus_file}> {tmp_aln}")
outfile = f"{args.files_prefix}.masked.vcf"
run_cmd(f"faToVcf -includeNoAltN {tmp_aln} {outfile}")
os.remove(tmp_aln)
run_cmd(f"fa2vcf.py {tmp_aln} {outfile}")
run_cmd(f'rm {tmp_aln} {tmp_aln}.fai')

return outfile

4 changes: 4 additions & 0 deletions tbprofiler/snp_dists.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import argparse
from .models import ProfileResult, LinkedSample
from typing import List, Tuple
from datetime import datetime

def extract_variant_set(vcf_file: str) -> Tuple[set,set]:
ref_diffs = set()
Expand Down Expand Up @@ -74,6 +75,7 @@ def store(self,result: ProfileResult, vcf_file: str) -> None:
self.missing = missing
def search(self,result: ProfileResult, vcf_file: str, cutoff: int = 20) -> List[LinkedSample]:
logging.info("Searching for close samples in %s" % self.filename)
start_timestamp = datetime.now()
self.c.execute("SELECT sample, diffs, missing FROM variants WHERE lineage=?",(result.sub_lineage,))
self.diffs,self.missing = extract_variant_set(vcf_file)
sample_dists = []
Expand All @@ -89,6 +91,8 @@ def search(self,result: ProfileResult, vcf_file: str, cutoff: int = 20) -> List[
positions = list(dist)
)
)
end_timestamp = datetime.now()
logging.info("Finished searching for close samples in %s. Took %s" % (self.filename,end_timestamp-start_timestamp))
logging.info("Found %s close samples" % len(sample_dists))
return sample_dists

Expand Down
Loading