-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path14-haplotypecaller.py
executable file
·105 lines (91 loc) · 3.56 KB
/
14-haplotypecaller.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#! /usr/bin/env python
# PBS cluster job submission in Python
# Uses GATK-3.3.0 Haplotype Caller for variant calling performed per-sample
# By Jean P. Elbers
# Last modified 22 Jan 2015
###############################################################################
Usage = """
14-hc_per_sample.py - version 1.0
Command:
cd InDir = /work/jelber2/immunome_2014/combined/call-SNPs-recal03
1.Analyze patterns of covariation in the sequence dataset
java -Xmx8g -jar ~/bin/GATK-3.3.0/GenomeAnalysisTK.jar \
-T HaplotypeCaller \
-R RefDir/GCF_000241765.3_Chrysemys_picta_bellii-3.0.3_genomic.fna \
-I InDir/Sample-recal03.bam \
--emitRefConfidence GVCF \
--variant_index_type LINEAR \
--variant_index_parameter 128000 \
-L RefDir/immunome_baits_C_picta-3.0.3.interval.list \
-stand_call_conf 30 \
-stand_emit_conf 10 \
-o Sample-raw-snps-indels.vcf
File Info
InDir = /work/jelber2/immunome_2014/combined/call-SNPs-recal03
Input Files =
Sample-recal03.bam
OutDir = /work/jelber2/immunome_2014/combined/hc
Output Files = Sample-raw-snps-indels.vcf
Usage (execute following code in InDir):
find . -name '*-recal03.bam' -not -name 'ALL-samples-*' -exec ~/scripts/immunome_2014/14-hc_per_sample.py {} \;
"""
###############################################################################
import os, sys, subprocess #imports os, sys, subprocess modules
if len(sys.argv)<2:
print Usage
else:
FileList = sys.argv[1:]
RefDir = "/work/jelber2/reference"
InDir = "/work/jelber2/immunome_2014/combined/call-SNPs-recal03"
OutDir1 = "hc"
os.chdir(InDir)
os.chdir("..") # go up one directory
if not os.path.exists(OutDir1):
os.mkdir(OutDir1) # if OutDir1 does not exist, then make it
os.chdir(InDir)
for InFileName in FileList: # do the following steps for each file in the inputstream
FileSuffix = "-recal03.bam"
FilePrefix = "./"
Samplepre = InFileName.replace(FileSuffix,'') # creates Samplepre string
Sample = Samplepre.replace(FilePrefix,'') # creates Sample string
# Customize your options here
Queue = "single"
Allocation = "hpc_gopo01"
Processors = "nodes=1:ppn=4"
WallTime = "01:00:00"
LogOut = "/work/jelber2/immunome_2014/combined/hc"
LogMerge = "oe"
JobName = "hc_per_sample-%s" % (Sample)
Command ="""
java -Xmx8g -jar ~/bin/GATK-3.3.0/GenomeAnalysisTK.jar \
-T HaplotypeCaller \
-R %s/GCF_000241765.3_Chrysemys_picta_bellii-3.0.3_genomic.fna \
-I %s/%s-recal03.bam \
--emitRefConfidence GVCF \
--variant_index_type LINEAR \
--variant_index_parameter 128000 \
-L %s/immunome_baits_C_picta-3.0.3.interval.list \
-stand_call_conf 30 \
-stand_emit_conf 10 \
-o ../hc/%s-raw-snps-indels.vcf""" % \
(RefDir, InDir, Sample, RefDir, Sample)
JobString = """
#!/bin/bash
#PBS -q %s
#PBS -A %s
#PBS -l %s
#PBS -l walltime=%s
#PBS -o %s
#PBS -j %s
#PBS -N %s
cd %s
%s\n""" % (Queue, Allocation, Processors, WallTime, LogOut, LogMerge, JobName, InDir, Command)
#Create pipe to qsub
proc = subprocess.Popen(['qsub'], shell=True,
stdin=subprocess.PIPE, stdout=subprocess.PIPE, close_fds=True)
(child_stdout, child_stdin) = (proc.stdout, proc.stdin)
#Print JobString
JobName = proc.communicate(JobString)[0]
print JobString
print JobName