Skip to content

Commit

Permalink
Merge pull request #6308 from marieBvr/virannot_0.1.0
Browse files Browse the repository at this point in the history
Update to Virannot 0.1.0
  • Loading branch information
bgruening authored Sep 8, 2024
2 parents 956c476 + 714770c commit 16701bf
Show file tree
Hide file tree
Showing 21 changed files with 5,340 additions and 13,825 deletions.
78 changes: 43 additions & 35 deletions tools/virAnnot/otu.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,8 @@ def _align_sequences(options, hits_collection):
os.mkdir(cdd_output)
if os.path.exists(cdd_output + "/seq_to_align.fasta"):
os.remove(cdd_output + "/seq_to_align.fasta")
if os.path.exists(cdd_output + "/seq_nucc.fasta"):
os.remove(cdd_output + "/seq_nucc.fasta")
file_seq_to_align = cdd_output + "/seq_to_align.fasta"
file_color_config = cdd_output + "/color_config.txt"
f = open(file_seq_to_align, "a")
Expand Down Expand Up @@ -298,6 +300,7 @@ def _get_stats(options, hits_collection):
cdd_output = options.output + "/" + hits_collection[cdd_id]["short_description"].replace(" ", "_")
worksheet = workbook.add_worksheet(hits_collection[cdd_id]["short_description"]) # add a worksheet
file_cluster = cdd_output + '/otu_cluster.csv'
file_fasta_nucc = cdd_output + '/representative_nucc.fasta'
with open(file_cluster, 'r') as clust:
otu_reader = csv.reader(clust, delimiter=',')
samples_list = []
Expand Down Expand Up @@ -342,6 +345,8 @@ def _get_stats(options, hits_collection):
if sample not in ['contigs_list', 'global_taxonomy']:
total_nb_read = 0
for contig in otu_collection[otu][sample]:
if otu_collection[otu][sample][contig]['nb'] == '':
otu_collection[otu][sample][contig]['nb'] = 0
total_nb_read += int(otu_collection[otu][sample][contig]['nb'])
otu_collection[otu][sample]['total_nb_read'] = total_nb_read
row = 0
Expand All @@ -355,26 +360,30 @@ def _get_stats(options, hits_collection):
worksheet.write(row, column + 2, 'contigs_list')
row = 1
# column = 0
for otu in otu_collection:
if isinstance(otu_collection[otu], dict):
column = 0
worksheet.write(row, column, otu)
# prepare table with 0 in each cells
for sample in otu_collection[otu]:
column = 1
for samp in samples_list:
worksheet.write(row, column, 0)
column += 1
# fill in table with nb of read for each sample and each OTU
for sample in otu_collection[otu]:
column = 1
for samp in samples_list:
if samp == sample:
worksheet.write(row, column, otu_collection[otu][sample]['total_nb_read'])
column += 1
worksheet.write(row, len(samples_list) + 1, otu_collection[otu]['global_taxonomy'].replace(';', ' '))
worksheet.write(row, len(samples_list) + 2, ",".join(otu_collection[otu]['contigs_list']))
row += 1
with open(file_fasta_nucc, "w+") as f_nucc:
for otu in otu_collection:
log.info(otu)
if isinstance(otu_collection[otu], dict):
column = 0
worksheet.write(row, column, otu)
# prepare table with 0 in each cells
for sample in otu_collection[otu]:
column = 1
for samp in samples_list:
worksheet.write(row, column, 0)
column += 1
# fill in table with nb of read for each sample and each OTU
for sample in otu_collection[otu]:
column = 1
for samp in samples_list:
if samp == sample:
worksheet.write(row, column, otu_collection[otu][sample]['total_nb_read'])
column += 1
worksheet.write(row, len(samples_list) + 1, otu_collection[otu]['global_taxonomy'].replace(';', ' '))
worksheet.write(row, len(samples_list) + 2, ",".join(otu_collection[otu]['contigs_list']))
row += 1
f_nucc.write(">" + cdd_id + "_" + otu + "_" + otu_collection[otu]['contigs_list'][0] + "\n")
f_nucc.write(str(hits_collection[cdd_id][otu_collection[otu]['contigs_list'][0]]['nuccleotide']) + "\n")
workbook.close()
read_file = pd.ExcelFile(file_xlsx)
for sheet in read_file.sheet_names:
Expand All @@ -392,21 +401,20 @@ def _create_html(options, hits_collection):
if os.path.exists(map_file_path):
os.remove(map_file_path)

map_file = open(map_file_path, "w+")
headers = ['#cdd_id', 'align_files', 'tree_files', 'cluster_files', 'cluster_nb_reads_files', 'pairwise_files', 'description', 'full_description\n']
map_file.write("\t".join(headers))
for cdd_id in hits_collection:
cdd_output = hits_collection[cdd_id]["short_description"].replace(" ", "_")
short_description = cdd_output
file_seq_aligned = cdd_output + '/seq_aligned.final_tree.fa'
tree_file = cdd_output + '/tree.dnd.png'
file_cluster = cdd_output + '/otu_cluster.csv'
file_matrix = cdd_output + "/identity_matrix.csv"
cluster_nb_reads_files = cdd_output + "/cluster_nb_reads_files.tab"
map_file.write(cdd_id + "\t" + file_seq_aligned + "\t" + tree_file + "\t")
map_file.write(file_cluster + "\t" + cluster_nb_reads_files + "\t" + file_matrix + "\t")
map_file.write(short_description + "\t" + hits_collection[cdd_id]["full_description"] + "\n")
map_file.close()
with open(map_file_path, "w+") as map_file:
headers = ['#cdd_id', 'align_files', 'tree_files', 'cluster_files', 'cluster_nb_reads_files', 'pairwise_files', 'description', 'full_description\n']
map_file.write("\t".join(headers))
for cdd_id in hits_collection:
cdd_output = hits_collection[cdd_id]["short_description"].replace(" ", "_")
short_description = cdd_output
file_seq_aligned = cdd_output + '/seq_aligned.final_tree.fa'
tree_file = cdd_output + '/tree.dnd.png'
file_cluster = cdd_output + '/otu_cluster.csv'
file_matrix = cdd_output + "/identity_matrix.csv"
cluster_nb_reads_files = cdd_output + "/cluster_nb_reads_files.tab"
map_file.write(cdd_id + "\t" + file_seq_aligned + "\t" + tree_file + "\t")
map_file.write(file_cluster + "\t" + cluster_nb_reads_files + "\t" + file_matrix + "\t")
map_file.write(short_description + "\t" + hits_collection[cdd_id]["full_description"] + "\n")
log.info("Writing HTML report")
html_cmd = os.path.join(options.tool_path, 'rps2tree_html.py') + ' -m ' + map_file_path + ' -o ' + options.output
log.debug(html_cmd)
Expand Down
2 changes: 1 addition & 1 deletion tools/virAnnot/rps2tsv.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def _read_xml(options):
hsp["evalue"] = hit_evalue
hsp["startQ"] = hit_startQ
hsp["endQ"] = hit_endQ
hsp["query_id"] = blast_record.query_id
hsp["query_id"] = blast_record.query
hsp["cdd_id"] = aln.hit_def.split(",")[0]
hsp["hit_id"] = aln.hit_id
hsp["query_length"] = blast_record.query_length # length of the query
Expand Down
Loading

0 comments on commit 16701bf

Please sign in to comment.