-
Notifications
You must be signed in to change notification settings - Fork 9
/
ntedit-make
executable file
·139 lines (126 loc) · 5.06 KB
/
ntedit-make
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#!/usr/bin/make -f
# Pipeline for ntedit
# Written by Talha Murathan Goktas
allreads:=$(wildcard $(reads)*)
draftPrefix=$(basename $(draft))
#common parameters
b=$(draftPrefix)_k$k
t=1
#ntHits parameters
p=$(reads)
solid=false
#ntEdit parameters
z=100
i=4
d=5
x=5.000
y=9.000
cap=$(shell echo $$(( $(k)*3/2 )))
m=0
v=0
a=0
j=3
s=0
SHELL=bash -e -o pipefail
ifeq ($(shell zsh -e -o pipefail -c 'true' 2>/dev/null; echo $$?), 0)
#Set pipefail to ensure that all commands of a pipe succeed.
SHELL=zsh -e -o pipefail
# Report run time and memory usage with zsh.
export REPORTTIME=1
export TIMEFMT=time user=%U system=%S elapsed=%E cpu=%P memory=%M job=%J
endif
# Record run time and memory usage in a file using GNU time.
ifdef time
ifneq ($(shell command -v gtime),)
gtimeNtedit=command gtime -v -o ntedit_k$k.time
gtimeNthits=command gtime -v -o nthits_k$k.time
else
gtimeNtedit=command time -v -o ntedit_k$k.time
gtimeNthits=command time -v -o nthits_k$k.time
endif
endif
.PHONY: all help check ntedit
.DELETE_ON_ERROR:
.PRECIOUS: $(draftPrefix)_edited.fa %.tsv %.bf
all: help
# Help
help:
@echo ""
@echo "Usage: ./ntedit-make ntedit [OPTION=VALUE]..."
@echo ""
@echo "Options:"
@echo " draft draft genome assembly. Must be specified with exact FILE NAME. Ex: draft=myDraft.fa (FASTA, Multi-FASTA, and/or gzipped compatible), REQUIRED"
@echo " reads prefix of reads file(s). All files in the working directory with the specified prefix will be used for polishing (fastq, fasta, gz, bz, zip), REQUIRED"
@echo " time logs time and memory usage to file for main steps (Set to 1 to enable logging)"
@echo " k kmer size, REQUIRED FOR NTHITS. AS OF v1.3.1, NO LONGER REQUIRED BY NTEDIT."
@echo " t number of threads [default=1]"
@echo " b output file prefix, OPTIONAL"
@echo ""
@echo "Options specific to ntHits:"
@echo " p the prefix for output file name (bloom filter)"
@echo " solid output the solid k-mers (non-erroneous k-mers)"
@echo " cutoff the maximum coverage of kmers in output bloom filter, REQUIRED or set solid=true"
@echo ""
@echo "Options specific to ntEdit:"
@echo " z minimum contig length [default=100]"
@echo " i maximum number of insertion bases to try, range 0-5, [default=4]"
@echo " d maximum number of deletions bases to try, range 0-5, [default=5]"
@echo " x k/x ratio for the number of kmers that should be missing, [default=5.000]"
@echo " y k/y ratio for the number of editted kmers that should be present, [default=9.000]"
@echo " j controls size of kmer subset. When checking subset of kmers, check every jth kmer, [default=3]"
@echo " cap cap for the number of base insertions that can be made at one position, [default=k*1.5]"
@echo " X ratio of number of kmers in the k subset that should be missing in order to attempt fix (higher=stringent), [default=0.5]"
@echo " Y ratio of number of kmers in the k subset that should be present to accept an edit (higher=stringent), [default=0.5]"
@echo " m mode of editing, range 0-2, [default=0]"
@echo " 0: best substitution, or first good indel"
@echo " 1: best substitution, or best indel"
@echo " 2: best edit overall (suggestion that you reduce i and d for performance)"
@echo " a Soft masks missing kmer positions having no fix (-v 1 = yes, default = 0, no)"
@echo " s SNV mode. Overrides draft kmer checks, forcing reassessment at each position (1 = yes, default = 0, no. EXPERIMENTAL)"
@echo " v verbose mode (1 = yes, default = 0, no)"
@echo ""
@echo "Example: Polishing myDraft.fa with myReads1.fq and myReads2.fq"
@echo " ./ntedit-make ntedit draft=myDraft.fa reads=myReads cutoff=2 or"
@echo " ./ntedit-make ntedit draft=myDraft.fa reads=myReads solid=true"
@echo ""
@echo "Make sure your read files all have the same prefix, as indicated by 'reads=<prefix>'. The makefile will use all files in the current working directory with this prefix for polishing."
@echo "To ensure that the pipeline runs correctly, make sure that the following tools are in your PATH: ntedit, nthits"
@echo "You must either specify the cutoff parameter or define solid=true in your command to set it automatically"
@echo "If one of X/Y is set, ntEdit will use those parameters instead. Otherwise, it uses x/y by default."
#Run ntEdit
ntedit: check $(draftPrefix)_k$k_edited.fa
check:
ifndef draft
$(error draft is not defined)
endif
ifndef reads
$(error reads is not defined)
else
$(info read files that are found: $(allreads))
endif
ifndef k
$(error kmer size is not defined)
endif
ifndef cutoff
ifneq ($(solid),true)
$(error set cutoff parameter or define solid=true to set automatically)
endif
endif
$p_k$k.bf: $(allreads)
ifeq ($(solid),true)
$(gtimeNthits) nthits --solid --outbloom -p$p -k$k -t$t $^
else
$(gtimeNthits) nthits -c$(cutoff) --outbloom -p$p -k$k -t$t $^
endif
ifdef X
Y?=0.5
endif
ifdef Y
X?=0.5
endif
$(draftPrefix)_k$k_edited.fa: $p_k$k.bf $(draft)
ifdef X
$(gtimeNtedit) ntedit -r $< -f $(word 2,$^) -b $b -t $t -z $z -i $i -d $d -x $x -y $y -c $(cap) -m $m -v $v -a $a -j $j -X $X -Y $Y -s $s
else
$(gtimeNtedit) ntedit -r $< -f $(word 2,$^) -b $b -t $t -z $z -i $i -d $d -x $x -y $y -c $(cap) -m $m -v $v -a $a -j $j -s $s
endif