forked from harrybPHDcode/tools_assessment_hervk_SR-WGS
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsteak
51 lines (36 loc) · 1.55 KB
/
steak
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#!/bin/bash
# job name
#SBATCH --job-name=steak_script
# How much memory do you need **per core**?
#SBATCH --mem-per-cpu=10G
# time to run
#SBATCH --time=0-12:00
# Number of cores you need
#SBATCH --ntasks=2
# output name
#SBATCH --output=/scratch/steak.out
module add utilities/use.dev
module add apps/steak/20190912-singularity
# specify input file and nix reference file
input=LP6005878-DNA_A05
inputnix=/users/k1893288/t_account/brc_scratch/t_scratch/hg19.nix
# convert the input file to SAM and remove bam
samtools view -h -o $input.sam $input.bam
rm $input.bam
# sort the sam file by queryname and delte the so-ordinate sorted SAM
picard SortSam I=$input.sam O=$input.sorted.sam SORT_ORDER=queryname
rm $input.sam
# if needed, novoindex the hg19 reference file
#novoindex hg19.nix hg19.fa
# run steak
steak --input $input.sorted.sam --paired --aligned --TE-reference hervk_LTR5Hs_Dfam_UC.fa
# run steak post processing steps (see steak documentation)
# novoalign the steak output reads to novoindex reference
novoalign -d $inputnix -o SAM -o FullNW -f $input.sorted.1.fastq $input.sorted.2.fastq > $input.sorted.detection.sam
# convert sam to bam ad delete sam
samtools view -S -b $input.sorted.detection.sam > $input.sorted.detection.bam
rm $input.detection.sam
# convert bam to bed
bedtools bamtobed -i $input.sorted.detection.bam > $input.sorted.detection.bed
# merge the bed file and extract out only the high quality predictions
bedtools merge -c 4 -o count -d 1000 -i $input.sorted.detection.bed | awk '{if ($4 >=5) print($0);}'- > filtered_novel_ints.bed