From d096ca20d5e47e181600b06812ba158307a2f3df Mon Sep 17 00:00:00 2001 From: KatharinaHoff Date: Fri, 29 Sep 2023 16:36:20 +0200 Subject: [PATCH] I think we have a first working version for further testing --- scripts/stringtie2utr.py | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/scripts/stringtie2utr.py b/scripts/stringtie2utr.py index 53ef6a6..26148ca 100755 --- a/scripts/stringtie2utr.py +++ b/scripts/stringtie2utr.py @@ -1,5 +1,19 @@ #!/usr/bin/env python3 +""" +Script Name: stringtie2utr.py +Description: This script decorates a gtf file with genes generated by AUGUSTUS, + BRAKER, or TSEBRA, which UTR features from a stringtie gtf file. +Author: Katharina J. Hoff +Email: katharina.hoff@uni-greifswald.de +Date: September 29th 2023 + +Copyright (C) The year of copyright, Katharina J. Hoff, University of Greifswald + +This program is free software; you can redistribute it and/or modify +it under the terms of the Artistic License. +""" + import argparse import re from intervaltree import IntervalTree, Interval @@ -450,9 +464,10 @@ def print_gtf(gtf_dict, gene_dict, tx_to_gene_dict, tx_dict): printed_gene = {} # Iterate over gene_dict entries for tx_id, tx_line in tx_dict.items(): - if not tx_id in printed_gene: + gene_id = tx_to_gene_dict[tx_id] + if not gene_id in printed_gene: print(gene_dict[tx_to_gene_dict[tx_id]]) - gene_printed = True + printed_gene[gene_id] = True print(tx_line) sorted_features = sorted(gtf_dict.get(tx_id, []), key=lambda x: int(x.split('\t')[3])) for feature in sorted_features: @@ -463,7 +478,6 @@ def print_gtf(gtf_dict, gene_dict, tx_to_gene_dict, tx_dict): # build new gtf line print(fields[0] + "\tstringtie2utr\t", "\t".join(fields[2:8]), "\ttranscript_id \"" + tx_id + "\"; gene_id \"" + tx_to_gene_dict[tx_id] + "\";") else: - #elif "StringTie" not in feature: print(feature) def build_tree(data): @@ -669,8 +683,5 @@ def main(): # print the updated tsebra_gtf print_gtf(tsebra_gtf, tsebra_gene_line_dict, tsebra_tx_to_gene_dict, tsebra_tx_dict) - - print(overlaps) - if __name__ == "__main__": main()