-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathhumann.submit
executable file
·48 lines (40 loc) · 2.21 KB
/
humann.submit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#!/bin/bash
#SBATCH --mem=20gb
#SBATCH --time=168:00:00
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=8
#SBATCH --job-name=humann
#SBATCH --error=humann.%J.err
#SBATCH --output=humann.%J.out
#SBATCH --partition=batch
module purge
module load humann/3.6
module load biodata/1.0
# The CHOCOPhlAn and UniRef databases are already downloaded on our clusters
# These databases can be accessed with the variable $HUMANN2 once the biodata module is loaded
# Run HUMAnN
# Here, input paired-end reads are the output reads generated with KneadData
# Paired-end reads need to be concatenated before running HUMAnN:
cat ./kneaddata_output_demo1/demo1.R1_kneaddata_paired_1.fastq ./kneaddata_output_demo1/demo1.R1_kneaddata_paired_2.fastq >> ./kneaddata_output_demo1/demo1_pair_1_2_cat.fastq
# To save computational time, when working with multiple samples,
# only one sample can be run with the taxonomic profile generated by MetaPhlAn
# Then, the remaining samples can be run with the bowtie2 indexed custom ChocoPhlAn database
# generated when running the first sample
# More information on this can be found here, https://github.com/biobakery/humann#joint-taxonomic-profile
# Run HUMAnN on one sample
# Make sure the "Run HUMAnN on the remaining samples" Section is commented out when running this
humann -i ./kneaddata_output_demo1/demo1_pair_1_2_cat.fastq \
--input-format fastq --threads 8 \
--taxonomic-profile all_metaphlan_output.txt \
-o demo1_humann_output
# Run HUMAnN on the remaining samples
# Make sure the "Run HUMAnN on one sample" Section is commented out when running this
humann -i ./kneaddata_output_demo2/demo2_pair_1_2_cat.fastq \
--input-format fastq --threads 8 \
--nucleotide-database ./demo1_humann_output/demo1_pair_1_2_merged_humann_temp/ \
--bypass-nucleotide-index \
-o demo2_humann_output
# If needed, output files from multiple samples can be merged with:
# humann_join_tables -i *_humann_output/ --file_name "merged_genefamilies.tsv" --output all_merged_genefamilies.tsv
# humann_join_tables -i *_humann_output/ --file_name "merged_pathabundance.tsv" --output all_merged_pathabundance.tsv
# humann_join_tables -i *_humann_output/--file_name "merged_pathcoverage.tsv" --output all_merged_pathcoverage.tsv