-
Notifications
You must be signed in to change notification settings - Fork 0
/
trycycler_consensus.sh
executable file
·98 lines (81 loc) · 2.48 KB
/
trycycler_consensus.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#!/bin/bash
# Run the last three steps of Trycycler: msa, partition, and consensus.
# Copyright (C) 2024 Yu Wan <[email protected]>
# Licensed under the GNU General Public Licence version 3 (GPLv3) <https://www.gnu.org/licenses/>.
# First version: 12 November 2024; last update: 12 November 2024
# Help information ###############
display_usage() {
echo "
This script carries out the last three steps of Trycycler: msa, partition, and consensus.
Parameters:
-r=*: The input long reads
-d=*: The input directory or the output directory of 'trcycyler cluster', which serves as the input directory for 'trycycler reconcile' (default: 2_clusters)
-t=*: Number of threads (default: 1)
Example useage:
~/bin/Assembly_toolkit/trycycler_consensus.sh -r=sample1.fastq.gz -d=2_clusters -t=16 >trycycler_consensus.log 2>&1
Dependencies: trycycler, perl.
"
}
if [ -z "$1" ]
then
display_usage
exit
fi
# Helper function ###############
clean_log() {
perl -pe 's/\x1b\[[0-9;]*[mG]//g' "$1"
}
# Main procedure ###############
# Set default values ===============
dir_in='2_clusters'
t=1
# Read parameters ===============
for arg in "$@"
do
case $arg in
-r=*)
long_reads="${arg#*=}"
;;
-d=*)
dir_in="${arg#*=}"
;;
-t=*)
t="${arg#*=}"
;;
*)
;;
esac
done
# Sanity check ===============
if [ -f "$long_reads" ]
then
echo "[$(date)] Input read file: $long_reads"
else
echo "[$(date)] Error: read file $long_reads was not found."
exit 1
fi
if [ $t -lt 1 ]
then
echo "[$(date)] Error: the number of threads is negative. Reset to 1."
t=1
fi
echo "[$(date)] Number of threads: $t"
# Multi-sequence aligment ===============
cluster_dirs=( $(ls -1 -d "$dir_in"/cluster_*/) )
echo "[$(date)] Found ${#cluster_dirs[@]} clusters in $dir_in"
echo "[$(date)] Conduct multi-sequence alignment"
for d in "${cluster_dirs[@]}"
do
trycycler msa --threads "$t" --cluster_dir "${d%/}"
done
# Partitioning reads
echo "[$(date)] Partition long reads"
trycycler partition --threads "$t" --reads "$long_reads" --cluster_dirs "$dir_in"/cluster_00?
# Generating a consensus ===============
echo "[$(date)] Produce consensus sequences"
for d in "${cluster_dirs[@]}"
do
trycycler consensus --threads "$t" --cluster_dir "${d%/}" >> trycycler_consensus.txt 2>&1
done
# Finish up ===============
clean_log trycycler_consensus.txt > trycycler_consensus.log && rm trycycler_consensus.txt