-
Notifications
You must be signed in to change notification settings - Fork 82
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
I think we have a first working version for further testing
- Loading branch information
1 parent
48a3b77
commit d096ca2
Showing
1 changed file
with
17 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,19 @@ | ||
#!/usr/bin/env python3 | ||
|
||
""" | ||
Script Name: stringtie2utr.py | ||
Description: This script decorates a gtf file with genes generated by AUGUSTUS, | ||
BRAKER, or TSEBRA, which UTR features from a stringtie gtf file. | ||
Author: Katharina J. Hoff | ||
Email: [email protected] | ||
Date: September 29th 2023 | ||
Copyright (C) The year of copyright, Katharina J. Hoff, University of Greifswald | ||
This program is free software; you can redistribute it and/or modify | ||
it under the terms of the Artistic License. | ||
""" | ||
|
||
import argparse | ||
import re | ||
from intervaltree import IntervalTree, Interval | ||
|
@@ -450,9 +464,10 @@ def print_gtf(gtf_dict, gene_dict, tx_to_gene_dict, tx_dict): | |
printed_gene = {} | ||
# Iterate over gene_dict entries | ||
for tx_id, tx_line in tx_dict.items(): | ||
if not tx_id in printed_gene: | ||
gene_id = tx_to_gene_dict[tx_id] | ||
if not gene_id in printed_gene: | ||
print(gene_dict[tx_to_gene_dict[tx_id]]) | ||
gene_printed = True | ||
printed_gene[gene_id] = True | ||
print(tx_line) | ||
sorted_features = sorted(gtf_dict.get(tx_id, []), key=lambda x: int(x.split('\t')[3])) | ||
for feature in sorted_features: | ||
|
@@ -463,7 +478,6 @@ def print_gtf(gtf_dict, gene_dict, tx_to_gene_dict, tx_dict): | |
# build new gtf line | ||
print(fields[0] + "\tstringtie2utr\t", "\t".join(fields[2:8]), "\ttranscript_id \"" + tx_id + "\"; gene_id \"" + tx_to_gene_dict[tx_id] + "\";") | ||
else: | ||
#elif "StringTie" not in feature: | ||
print(feature) | ||
|
||
def build_tree(data): | ||
|
@@ -669,8 +683,5 @@ def main(): | |
# print the updated tsebra_gtf | ||
print_gtf(tsebra_gtf, tsebra_gene_line_dict, tsebra_tx_to_gene_dict, tsebra_tx_dict) | ||
|
||
|
||
print(overlaps) | ||
|
||
if __name__ == "__main__": | ||
main() |