From 36308d31b845dbaf8fd2abafbf817bb8cb676a74 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fabi=C3=A1n=20Robledo?= <fabianry97@gmail.com>
Date: Tue, 8 Oct 2024 09:41:50 +0200
Subject: [PATCH] FIX: gtf.cds.gff is now created when --chunks options is used

---
 sqanti3_qc.py | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/sqanti3_qc.py b/sqanti3_qc.py
index 5ad74ee..441e430 100755
--- a/sqanti3_qc.py
+++ b/sqanti3_qc.py
@@ -431,7 +431,8 @@ def get_corr_filenames(args, dir=None):
     corrSAM = corrPathPrefix +"_corrected.sam"
     corrFASTA = corrPathPrefix +"_corrected.fasta"
     corrORF =  corrPathPrefix +"_corrected.faa"
-    return corrGTF, corrSAM, corrFASTA, corrORF
+    corrCDS_GTF_GFF = corrPathPrefix + "_corrected.gtf.cds.gff"
+    return corrGTF, corrSAM, corrFASTA, corrORF, corrCDS_GTF_GFF
 
 def get_isoform_hits_name(args, dir=None):
     d = dir if dir is not None else args.dir
@@ -461,7 +462,7 @@ def correctionPlusORFpred(args, genome_dict):
     global corrSAM
     global corrFASTA
 
-    corrGTF, corrSAM, corrFASTA, corrORF = get_corr_filenames(args)
+    corrGTF, corrSAM, corrFASTA, corrORF , corrCDS_GTF_GFF = get_corr_filenames(args)
     p = os.path.splitext(os.path.basename(corrSAM))[0]
     n_cpu = max(1, args.cpus // args.chunks)
 
@@ -1856,7 +1857,7 @@ def run(args):
     global isoform_hits_name
     global badstrandGTF
 
-    corrGTF, corrSAM, corrFASTA, corrORF = get_corr_filenames(args)
+    corrGTF, corrSAM, corrFASTA, corrORF , corrCDS_GTF_GFF = get_corr_filenames(args)
     badstrandGTF = args.dir + "/unknown_strand.gtf"
     outputClassPath, outputJuncPath = get_class_junc_filenames(args)
 
@@ -2365,7 +2366,7 @@ def combine_split_runs(args, split_dirs):
     Combine .faa, .fasta, .gtf, .classification.txt, .junctions.txt
     Then write out the PDF report
     """
-    corrGTF, corrSAM, corrFASTA, corrORF = get_corr_filenames(args)
+    corrGTF, corrSAM, corrFASTA, corrORF , corrCDS_GTF_GFF = get_corr_filenames(args)
     outputClassPath, outputJuncPath = get_class_junc_filenames(args)
 
     if not args.skipORF:
@@ -2374,9 +2375,10 @@ def combine_split_runs(args, split_dirs):
     f_gtf = open(corrGTF, 'w')
     f_class = open(outputClassPath, 'w')
     f_junc = open(outputJuncPath, 'w')
+    f_cds_gtf_gff = open(corrCDS_GTF_GFF, 'w')
 
     for i,split_d in enumerate(split_dirs):
-        _gtf, _sam, _fasta, _orf = get_corr_filenames(args, split_d)
+        _gtf, _sam, _fasta, _orf , _CDS_GTF_GFF = get_corr_filenames(args, split_d)
         _class, _junc = get_class_junc_filenames(args, split_d)
         if not args.skipORF:
             with open(_orf) as h: f_faa.write(h.read())
@@ -2394,11 +2396,18 @@ def combine_split_runs(args, split_dirs):
             else:
                 h.readline()
             f_junc.write(h.read())
+        with open(_CDS_GTF_GFF) as h:
+            if i == 0: # This if condition checks if its the first file to write the header or not in the final file
+                f_cds_gtf_gff.write(h.readline())
+            else:
+                h.readline()
+            f_cds_gtf_gff.write(h.read())
 
     f_fasta.close()
     f_gtf.close()
     f_class.close()
     f_junc.close()
+    f_cds_gtf_gff.close()
     if not args.skipORF:
         f_faa.close()
 
@@ -2579,7 +2588,7 @@ def main():
     if args.chunks == 1:
         run(args)
         if args.isoAnnotLite:
-            corrGTF, corrSAM, corrFASTA, corrORF = get_corr_filenames(args)
+            corrGTF, corrSAM, corrFASTA, corrORF , corrCDS_GTF_GFF = get_corr_filenames(args)
             outputClassPath, outputJuncPath = get_class_junc_filenames(args)
             run_isoAnnotLite(corrGTF, outputClassPath, outputJuncPath, args.output, args.gff3)
     else:
@@ -2588,7 +2597,7 @@ def main():
         SPLIT_ROOT_DIR = get_split_dir(args)
         shutil.rmtree(SPLIT_ROOT_DIR)
         if args.isoAnnotLite:
-            corrGTF, corrSAM, corrFASTA, corrORF = get_corr_filenames(args)
+            corrGTF, corrSAM, corrFASTA, corrORF , corrCDS_GTF_GFF = get_corr_filenames(args)
             outputClassPath, outputJuncPath = get_class_junc_filenames(args)
             run_isoAnnotLite(corrGTF, outputClassPath, outputJuncPath, args.output, args.gff3)