Skip to content
This repository has been archived by the owner on Nov 6, 2020. It is now read-only.

Commit

Permalink
Merge pull request #10 from dnanexus/cnvnator_svtyper_fixes
Browse files Browse the repository at this point in the history
Cnvnator svtyper fixes
  • Loading branch information
AndrewCarroll authored Jul 16, 2018
2 parents fb10679 + a04ac17 commit 97517b1
Show file tree
Hide file tree
Showing 16,144 changed files with 1,766,073 additions and 472,726 deletions.
The diff you're trying to view is too large. We only load the first 3000 changed files.
33 changes: 19 additions & 14 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,6 @@ FROM ubuntu:14.04
# File Author / Maintainer
MAINTAINER Samantha Zarate

ENV PATH=${PATH}:/home/dnanexus/
ENV PATH=${PATH}:/opt/conda/bin/
ENV PATH=${PATH}:/usr/bin/
ENV PYTHONPATH=${PYTHONPATH}:/opt/conda/bin/
ENV ROOTSYS=/usr/lib/root
ENV LD_LIBRARY_PATH=/usr/lib/root/lib
ENV DYLD_LIBRARY_PATH=/usr/lib/root/lib
ENV HTSLIB_LIBRARY_DIR=/usr/local/lib
ENV HTSLIB_INCLUDE_DIR=/usr/local/include

# System packages
RUN apt-get update && apt-get install -y curl wget

Expand All @@ -22,7 +12,7 @@ RUN curl -LO http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh
RUN bash Miniconda-latest-Linux-x86_64.sh -p /miniconda -b
RUN rm Miniconda-latest-Linux-x86_64.sh
ENV PATH=/miniconda/bin:${PATH}
RUN conda update -y conda
# RUN conda update -y conda

RUN /bin/bash -c "echo 'deb http://dnanexus-apt-prod.s3.amazonaws.com/ubuntu trusty/amd64/' > /etc/apt/sources.list.d/dnanexus.list"
RUN /bin/bash -c "echo 'deb http://dnanexus-apt-prod.s3.amazonaws.com/ubuntu trusty/all/' >> /etc/apt/sources.list.d/dnanexus.list"
Expand Down Expand Up @@ -76,11 +66,11 @@ RUN apt-get update
RUN conda config --add channels r
RUN conda config --add channels conda-forge
RUN conda config --add channels bioconda
RUN conda install -y samtools==0.1.19
RUN conda install -c bcbio bx-python -y
RUN conda install -c faircloth-lab samtools
RUN conda install -c bioconda sambamba -y
RUN conda install -c bcbio bx-python -y
RUN conda install -c anaconda networkx -y
RUN conda install -c bioconda samblaster -y
RUN conda install -y -c anaconda networkx
RUN conda install gcc_linux-64 -y
RUN conda install -c bioconda manta

Expand All @@ -92,6 +82,10 @@ RUN rm -rf /resources/
RUN conda install -y numpy
RUN pip install --upgrade pip
RUN pip install https://github.com/bioinform/breakseq2/archive/2.2.tar.gz
RUN pip install pycparser
RUN pip install asn1crypto
RUN pip install idna
RUN pip install ipaddress

RUN pip install dxpy

Expand All @@ -104,6 +98,17 @@ COPY parliament2.sh .

RUN /bin/bash -c "source /etc/profile.d/dnanexus.environment.sh"

ENV PATH=${PATH}:/home/dnanexus/
ENV PATH=${PATH}:/opt/conda/bin/
ENV PATH=${PATH}:/usr/bin/
ENV PYTHONPATH=${PYTHONPATH}:/opt/conda/bin/
ENV ROOTSYS=/home/dnanexus/root
ENV LD_LIBRARY_PATH=/usr/lib/root/lib
ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/home/dnanexus/root/lib
ENV DYLD_LIBRARY_PATH=/usr/lib/root/lib
ENV HTSLIB_LIBRARY_DIR=/usr/local/lib
ENV HTSLIB_INCLUDE_DIR=/usr/local/include

WORKDIR /home/dnanexus
RUN ["chmod", "+x", "parliament2.py"]
RUN ["chmod", "+x", "parliament2.sh"]
Expand Down
19 changes: 17 additions & 2 deletions dx_app_code/parliament2/dxapp.json
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,13 @@
"class": "boolean",
"default": false
},
{
"name": "output_log_files",
"label": "Output Log Files?",
"help": "If selected, log files (if applicable) will be uploaded as output. Currently only implemented for Breakseq and Manta.",
"class": "boolean",
"default": true
},
{
"name": "run_genotype_candidates",
"label": "Genotype Candidates?",
Expand Down Expand Up @@ -193,6 +200,14 @@
"class": "file",
"patterns": ["*.tar.gz"],
"optional": true
},
{
"name": "log_files",
"label": "Log Files",
"help": "(Optional) Log files for structural variant callers.",
"class": "file",
"patterns": ["*.txt"],
"optional": true
}
],
"runSpec": {
Expand Down Expand Up @@ -226,10 +241,10 @@
"aws:us-east-1": {
"assetDepends": [
{
"project": "project-FBbQqQj026K9PF53332ZFBB9",
"project": "project-FBbG4280Qg43bZJ1Jyyk8F8v",
"folder": "/",
"version": "0.0.1",
"name": "parliament2:0.0.1"
"name": "parliament2"
}
],
"systemRequirements": {
Expand Down
14 changes: 12 additions & 2 deletions dx_app_code/parliament2/parliament2.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def main(**job_inputs):
ref_name = "/home/dnanexus/in/{0}".format(ref_genome.name)
dxpy.download_dxfile(ref_genome, ref_name)

docker_call = ['dx-docker', 'run', '-v', '/home/dnanexus/in/:/home/dnanexus/in/', '-v', '/home/dnanexus/out/:/home/dnanexus/out/','parliament2:0.0.1', '--bam', bam_name, '-r', ref_name, '--prefix', str(prefix)]
docker_call = ['dx-docker', 'run', '-v', '/home/dnanexus/in/:/home/dnanexus/in/', '-v', '/home/dnanexus/out/:/home/dnanexus/out/','parliament2', '--bam', bam_name, '-r', ref_name, '--prefix', str(prefix)]

if 'illumina_bai' in job_inputs:
input_bai = dxpy.open_dxfile(job_inputs['illumina_bai'])
Expand Down Expand Up @@ -76,9 +76,19 @@ def main(**job_inputs):
sv_caller_results_upload.append(dxpy.dxlink(dxpy.upload_local_file(name)))

output = {
'sv_caller_results' : sv_caller_results_upload,
'sv_caller_results' : sv_caller_results_upload
}

subprocess.check_call(['ls', '-sh', '/home/dnanexus/out/svtyped_vcfs/'])

# if job_inputs['output_log_files']:
# if job_inputs['run_breakseq'] or job_inputs['run_manta']:
# log_file_names = glob.glob('/home/dnanexus/out/log_files/*')
# log_file_upload = []
# for name in log_file_names:
# log_file_upload.append(dxpy.dxlink(dxpy.upload_local_file(name)))
# output['log_files'] = log_file_upload

if job_inputs['run_genotype_candidates']:
svtyped_vcf_names = glob.glob('/home/dnanexus/out/svtyped_vcfs/*')
svtyped_vcfs_upload = []
Expand Down
70 changes: 45 additions & 25 deletions parliament2.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ dnanexus=${18}
cp "${ref_fasta}" ref.fa

echo "Classify FASTA"
echo "$prefix"

samtools faidx ref.fa &
ref_genome=$(python /home/dnanexus/get_reference.py)
Expand Down Expand Up @@ -66,6 +67,8 @@ echo "Generate contigs"

samtools view -H input.bam | python /getContigs.py "$filter_short_contigs" > contigs

mkdir -p /home/dnanexus/out/log_files/

if [[ "$run_breakseq" == "True" || "$run_manta" == "True" ]]; then
echo "Launching jobs that cannot be parallelized by contig"
fi
Expand All @@ -81,13 +84,13 @@ if [[ "$run_breakseq" == "True" ]]; then
--bwa /usr/local/bin/bwa --samtools /usr/local/bin/samtools \
--bplib_gff "$bplib" \
--nthreads "$(nproc)" --bplib_gff "$bplib" \
--sample "$prefix" 1> /home/dnanexus/out/breakseq.stdout.log 2> /home/dnanexus/out/breakseq.stderr.log &
--sample "$prefix" 1> /home/dnanexus/out/log_files/breakseq.stdout.log 2> /home/dnanexus/out/log_files/breakseq.stderr.log &
fi

# MANTA
if [[ "$run_manta" == "True" ]]; then
echo "Manta"
timeout 6h runManta 1> /home/dnanexus/out/manta.stdout.log 2> /home/dnanexus/out/manta.stderr.log &
timeout 6h runManta 1> /home/dnanexus/out/log_files/manta.stdout.log 2> /home/dnanexus/out/log_files/manta.stderr.log &
fi

# PREPARE FOR BREAKDANCER
Expand Down Expand Up @@ -188,6 +191,10 @@ if [[ "$run_cnvnator" == "True" ]] || [[ "$run_delly" == "True" ]] || [[ "$run_b
fi

wait
# Only install SVTyper if necessary
if [[ "$run_genotype_candidates" == "True" ]]; then
pip install git+https://github.com/hall-lab/svtyper.git -q &
fi

echo "Converting results to VCF format"
mkdir -p /home/dnanexus/out/sv_caller_results/
Expand All @@ -205,7 +212,7 @@ fi) &
(if [[ "$run_manta" == "True" ]]; then
echo "Convert Manta results to VCF format"
cp manta/results/variants/diploidSV.vcf.gz /home/dnanexus/out/sv_caller_results/"$prefix".manta.diploidSV.vcf.gz
cp manta/results/variants/candidateSV.vcf.gz /home/dnanexus/out/sv_caller_results/"$prefix".manta.candidateSV.vcf.gz
# cp manta/results/variants/candidateSV.vcf.gz /home/dnanexus/out/sv_caller_results/"$prefix".manta.candidateSV.vcf.gz
cp manta/results/stats/alignmentStatsSummary.txt /home/dnanexus/out/sv_caller_results/"$prefix".manta.alignmentStatsSummary.txt

mv manta/results/variants/diploidSV.vcf.gz .
Expand Down Expand Up @@ -282,6 +289,25 @@ fi) &

wait

set -e
# Verify that there are VCF files available
if [[ -z $(find . -name "*.vcf") ]]; then
if [[ "$dnanexus" == "True" ]]; then
dx-jobutil-report-error "ERROR: SVTyper requested, but candidate VCF files required to genotype. No VCF files found."
else
echo "ERROR: SVTyper requested, but candidate VCF files required to genotype. No VCF files found."
exit 1
fi
fi
set +e

# See which chromosomes are in the BAM file
samtools idxstats input.bam | cut -f 1 | head -3

# Check that all VCF files have all chromosomes
# for item in *svtyped.vcf; do


# Run SVtyper and SVviz
if [[ "$run_genotype_candidates" == "True" ]]; then
echo "Running SVTyper"
Expand All @@ -292,31 +318,22 @@ if [[ "$run_genotype_candidates" == "True" ]]; then
fi

mkdir -p /home/dnanexus/out/svtyped_vcfs/
set -e
# Verify that there are VCF files available
if [[ -z $(find . -name "*.vcf") ]]; then
if [[ "$dnanexus" == "True" ]]; then
dx-jobutil-report-error "ERROR: SVTyper requested, but candidate VCF files required to genotype. No VCF files found."
else
echo "ERROR: SVTyper requested, but candidate VCF files required to genotype. No VCF files found."
exit 1
fi
fi
set +e

i=0
# Breakdancer
if [[ "$run_breakdancer" == "True" ]]; then
echo "Running SVTyper on Breakdancer outputs"
mkdir /home/dnanexus/svtype_breakdancer
timeout -k 500 60m bash ./parallelize_svtyper.sh /home/dnanexus/breakdancer.vcf svtype_breakdancer /home/dnanexus/"${prefix}".breakdancer.svtyped.vcf input.bam
bash ./parallelize_svtyper.sh /home/dnanexus/breakdancer.vcf svtype_breakdancer /home/dnanexus/"${prefix}".breakdancer.svtyped.vcf input.bam
# timeout -k 500 60m bash ./parallelize_svtyper.sh /home/dnanexus/breakdancer.vcf svtype_breakdancer /home/dnanexus/"${prefix}".breakdancer.svtyped.vcf input.bam
fi

# Breakseq
if [[ "$run_breakseq" == "True" ]]; then
echo "Running SVTyper on BreakSeq outputs"
mkdir /home/dnanexus/svtype_breakseq
timeout -k 500 60m bash ./parallelize_svtyper.sh /home/dnanexus/breakseq.vcf svtype_breakseq /home/dnanexus/"${prefix}".breakseq.svtyped.vcf input.bam
bash ./parallelize_svtyper.sh /home/dnanexus/breakseq.vcf svtype_breakseq /home/dnanexus/"${prefix}".breakseq.svtyped.vcf input.bam
# timeout -k 500 60m bash ./parallelize_svtyper.sh /home/dnanexus/breakseq.vcf svtype_breakseq /home/dnanexus/"${prefix}".breakseq.svtyped.vcf input.bam

if [[ -f breakseq.vcf ]]; then
echo breakseq.vcf >> survivor_inputs
Expand All @@ -328,15 +345,17 @@ if [[ "$run_genotype_candidates" == "True" ]]; then
echo "Running SVTyper on CNVnator outputs"
mkdir /home/dnanexus/svtype_cnvnator
cat cnvnator.vcf | python /get_uncalled_cnvnator.py | python /add_ciend.py 1000 > /home/dnanexus/cnvnator.ci.vcf
timeout -k 500 60m bash ./parallelize_svtyper.sh /home/dnanexus/cnvnator.vcf svtype_cnvnator "${prefix}".cnvnator.svtyped.vcf input.bam
# timeout -k 500 60m bash ./parallelize_svtyper.sh /home/dnanexus/cnvnator.vcf svtype_cnvnator "${prefix}".cnvnator.svtyped.vcf input.bam
bash ./parallelize_svtyper.sh /home/dnanexus/cnvnator.vcf svtype_cnvnator "${prefix}".cnvnator.svtyped.vcf input.bam
fi

# Delly
if [[ "$run_delly" == "True" ]]; then
echo "Running SVTyper on Delly outputs"
for item in delly*vcf; do
mkdir /home/dnanexus/svtype_delly_"$i"
timeout -k 500 60m bash ./parallelize_svtyper.sh /home/dnanexus/"${item}" svtype_delly_"$i" /home/dnanexus/delly.svtyper."$i".vcf input.bam
bash ./parallelize_svtyper.sh /home/dnanexus/"${item}" svtype_delly_"$i" /home/dnanexus/delly.svtyper."$i".vcf input.bam
# timeout -k 500 60m bash ./parallelize_svtyper.sh /home/dnanexus/"${item}" svtype_delly_"$i" /home/dnanexus/delly.svtyper."$i".vcf input.bam
i=$((i + 1))
done

Expand All @@ -351,19 +370,20 @@ if [[ "$run_genotype_candidates" == "True" ]]; then
if [[ "$run_lumpy" == "True" ]]; then
echo "Running SVTyper on Lumpy outputs"
mkdir /home/dnanexus/svtype_lumpy
timeout -k 500 60m bash ./parallelize_svtyper.sh /home/dnanexus/lumpy.vcf svtype_lumpy /home/dnanexus/"${prefix}".lumpy.svtyped.vcf input.bam
bash ./parallelize_svtyper.sh /home/dnanexus/lumpy.vcf svtype_lumpy /home/dnanexus/"${prefix}".lumpy.svtyped.vcf input.bam
# timeout -k 500 60m bash ./parallelize_svtyper.sh /home/dnanexus/lumpy.vcf svtype_lumpy /home/dnanexus/"${prefix}".lumpy.svtyped.vcf input.bam
fi

# Manta
if [[ "$run_manta" == "True" ]]; then
echo "Running SVTyper on Manta outputs"
zcat manta/results/variants/candidateSV.vcf.gz | python /add_ciend.py 100 > /home/dnanexus/manta.input.vcf
mkdir /home/dnanexus/svtype_manta
timeout -k 500 60m bash ./parallelize_svtyper.sh /home/dnanexus/manta.input.vcf svtype_manta /home/dnanexus/"${prefix}".manta.svtyped.vcf input.bam
# echo "Running SVTyper on Manta outputs"
# zcat manta/results/variants/candidateSV.vcf.gz | python /add_ciend.py 100 > /home/dnanexus/manta.input.vcf
# mkdir /home/dnanexus/svtype_manta
# timeout -k 500 60m bash ./parallelize_svtyper.sh /home/dnanexus/manta.input.vcf svtype_manta /home/dnanexus/"${prefix}".manta.svtyped.vcf input.bam

if [[ -f diploidSV.vcf ]]; then
mv diploidSV.vcf manta.diploid.vcf
echo manta.diploid.vcf >> survivor_inputs
mv diploidSV.vcf /home/dnanexus/"${prefix}".manta.svtyped.vcf
echo /home/dnanexus/"${prefix}".manta.svtyped.vcf >> survivor_inputs
fi
fi

Expand Down
39 changes: 0 additions & 39 deletions resources/home/dnanexus/CMakeCPackOptions.cmake

This file was deleted.

17 changes: 0 additions & 17 deletions resources/home/dnanexus/CMakeFiles/3.11.0/CMakeASMCompiler.cmake

This file was deleted.

Loading

0 comments on commit 97517b1

Please sign in to comment.