Skip to content

Commit

Permalink
added circulocov and blobtools
Browse files Browse the repository at this point in the history
  • Loading branch information
erinyoung committed Feb 22, 2024
1 parent 4072652 commit f4133db
Showing 1 changed file with 24 additions and 21 deletions.
45 changes: 24 additions & 21 deletions bin/for_multiqc.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
amrfinder_output = 'amrfinderplus_mqc.txt'
blobtools_input = 'blobtools_summary.txt'
blobtools_output = 'blobtools_mqc.tsv'
circulocov_input = 'circulocov_summary.tsv'
circulocov_output = 'circulocov_mqc.tsv'
core_genome_input = 'core_genome_evaluation.csv'
core_genome_output = 'core_genome_evaluation_mqc.csv'
drprg_input = 'drprg_summary.tsv'
Expand Down Expand Up @@ -55,38 +57,39 @@
snpdists_input = 'snp_matrix.txt'
snpdists_output = 'snpdists_matrix_mqc.txt'


##########################################
# getting ready for multiqc #
##########################################

if exists(blobtools_input) :
blobtools_df = pd.read_table(blobtools_input)
#TODO:
# organisms=($(cut -f 2 blobtools_summary.txt | grep -v all | grep -v name | sort | uniq ))
# samples=($(cut -f 1 blobtools_summary.txt | grep -v all | grep -v sample | sort | uniq ))

# echo \${organisms[@]} | tr ' ' '\t' | awk '{print "sample\t" \$0}' > blobtools_mqc.tsv

# for sample in \${samples[@]}
# do
# line="\$sample"

# for organism in \${organisms[@]}
# do
# num=\$(grep -w ^"\$sample" blobtools_summary.txt | grep -w "\$organism" | cut -f 13 )
# if [ -z "\$num" ] ; then num=0 ; fi
# line="\$line\t\$num"
# done
# echo -e \$line | sed 's/,//g' >> blobtools_mqc.tsv
# done
# fi
blobtools_df.to_csv(blobtools_output, index=False, sep="\t")
blobtools_df = blobtools_df[~blobtools_df['name'].isin(['all', 'no-hit', 'undef'])]

samples = blobtools_df['sample'].drop_duplicates().tolist()
organisms = sorted(blobtools_df['name'].drop_duplicates().tolist())
blobtools_result_df = pd.DataFrame(columns=["sample"] + organisms)

for sample in samples:
result = [sample]
for organism in organisms:
readper = blobtools_df.loc[(blobtools_df['sample'] == sample) & (blobtools_df['name'] == organism), 'bam0_read_map_p']
orgper = readper.iloc[0] if not readper.empty else 0
result = result + [orgper]

blobtools_result_df.loc[len(blobtools_result_df.index)] = result

blobtools_result_df.to_csv(blobtools_output, index=False, sep="\t")

if exists(drprg_input):
drprg_df = pd.read_table(drprg_input)
drprg_df.to_csv(drprg_output, index=False, sep="\t")

if exists(circulocov_input):
circulocov_df = pd.read_table(circulocov_input)
circulocov_df = circulocov_df[circulocov_df['contigs'] == 'all']
circulocov_df = circulocov_df.drop(['circ', 'contigs'], axis=1)
circulocov_df.to_csv(circulocov_output, index=False, sep="\t")

if exists(elgato_input):
elgato_df = pd.read_table(elgato_input)
elgato_df.to_csv(elgato_output, index=False, sep="\t")
Expand Down

0 comments on commit f4133db

Please sign in to comment.