biobakery/humann.submit

#!/bin/bash
#SBATCH --mem=20gb
#SBATCH --time=168:00:00
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=8
#SBATCH --job-name=humann
#SBATCH --error=humann.%J.err
#SBATCH --output=humann.%J.out
#SBATCH --partition=batch

module purge
module load humann/3.6
module load biodata/1.0

# The CHOCOPhlAn and UniRef databases are already downloaded on our clusters
# These databases can be accessed with the variable $HUMANN2 once the biodata module is loaded

# Run HUMAnN

# Here, input paired-end reads are the output reads generated with KneadData
# Paired-end reads need to be concatenated before running HUMAnN:
cat ./kneaddata_output_demo1/demo1.R1_kneaddata_paired_1.fastq ./kneaddata_output_demo1/demo1.R1_kneaddata_paired_2.fastq >> ./kneaddata_output_demo1/demo1_pair_1_2_cat.fastq

# To save computational time, when working with multiple samples,
# only one sample can be run with the taxonomic profile generated by MetaPhlAn
# Then, the remaining samples can be run with the bowtie2 indexed custom ChocoPhlAn database
# generated when running the first sample
# More information on this can be found here, https://github.com/biobakery/humann#joint-taxonomic-profile

# Run HUMAnN on one sample
# Make sure the "Run HUMAnN on the remaining samples" Section is commented out when running this
humann -i ./kneaddata_output_demo1/demo1_pair_1_2_cat.fastq \
    --input-format fastq --threads 8 \
    --taxonomic-profile all_metaphlan_output.txt \
    -o demo1_humann_output

# Run HUMAnN on the remaining samples
# Make sure the "Run HUMAnN on one sample" Section is commented out when running this
humann -i ./kneaddata_output_demo2/demo2_pair_1_2_cat.fastq \
    --input-format fastq --threads 8 \
    --nucleotide-database ./demo1_humann_output/demo1_pair_1_2_merged_humann_temp/ \
    --bypass-nucleotide-index \
    -o demo2_humann_output

# If needed, output files from multiple samples can be merged with:
# humann_join_tables -i *_humann_output/ --file_name "merged_genefamilies.tsv" --output all_merged_genefamilies.tsv
# humann_join_tables -i *_humann_output/ --file_name "merged_pathabundance.tsv" --output all_merged_pathabundance.tsv
# humann_join_tables -i *_humann_output/--file_name "merged_pathcoverage.tsv" --output all_merged_pathcoverage.tsv