From 720aba0092a715b19f83e8ecb416639cb2c14391 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fabi=C3=A1n=20Robledo?= <fabianry97@gmail.com>
Date: Thu, 29 Aug 2024 13:19:33 +0200
Subject: [PATCH 1/3] Added compatibility for .gz compressed fasta/fastq
 isoforms to SQANTI2_qc

---
 sqanti3_qc.py | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/sqanti3_qc.py b/sqanti3_qc.py
index 9a6dac2..069c97b 100755
--- a/sqanti3_qc.py
+++ b/sqanti3_qc.py
@@ -17,6 +17,8 @@
 import math
 import csv
 import numpy as np
+import gzip
+
 from statistics import mean
 from collections import defaultdict, Counter, namedtuple
 from collections.abc import Iterable
@@ -517,7 +519,13 @@ def correctionPlusORFpred(args, genome_dict):
             print("Skipping aligning of sequences because GTF file was provided.", file=sys.stdout)
 
             ind = 0
-            with open(args.isoforms, 'r') as isoforms_gtf:
+            # gzip.open and open have different default open modes:
+            # gzip.open uses "rb" (read in binary format)
+            # open uses "rt" (read in text format)
+            # This can be solved by making explicit read text mode
+            # as they share this parameter
+            open_function = gzip.open if args.isoforms.endswith('.gz') else open
+            with open_function(args.isoforms, mode='rt') as isoforms_gtf:
                 for line in isoforms_gtf:
                     if line[0] != "#" and len(line.split("\t"))!=9:
                         sys.stderr.write("\nERROR: input isoforms file with not GTF format.\n")
@@ -2149,10 +2157,16 @@ def rename_isoform_seqids(input_fasta, force_id_ignore=False):
     :return: output fasta with the cleaned up sequence ID, is_fusion flag
     """
     type = 'fasta'
-    with open(input_fasta) as h:
+    # gzip.open and open have different default open modes:
+    # gzip.open uses "rb" (read in binary format)
+    # open uses "rt" (read in text format)
+    # This can be solved by making explicit the read text mode (which is required
+    # by SeqIO.parse)
+    open_function = gzip.open if input_fasta.endswith('.gz') else open
+    with open_function(input_fasta, mode="rt") as h:
         if h.readline().startswith('@'): type = 'fastq'
-    f = open(input_fasta[:input_fasta.rfind('.')]+'.renamed.fasta', 'w')
-    for r in SeqIO.parse(open(input_fasta), type):
+    f = open_function(input_fasta[:input_fasta.rfind('.')]+'.renamed.fasta', mode='wt')
+    for r in SeqIO.parse(open_function(input_fasta, "rt"), type):
         m1 = seqid_rex1.match(r.id)
         m2 = seqid_rex2.match(r.id)
         m3 = seqid_fusion.match(r.id)

From ea84f8c493b92f7e8551024b2d9236ef24bad32e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fabi=C3=A1n=20Robledo?= <fabianry97@gmail.com>
Date: Wed, 11 Sep 2024 10:03:18 +0200
Subject: [PATCH 2/3] Fix: loading gz files only when --fasta option is
 indicated

---
 sqanti3_qc.py | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/sqanti3_qc.py b/sqanti3_qc.py
index 069c97b..abfd1c4 100755
--- a/sqanti3_qc.py
+++ b/sqanti3_qc.py
@@ -519,13 +519,7 @@ def correctionPlusORFpred(args, genome_dict):
             print("Skipping aligning of sequences because GTF file was provided.", file=sys.stdout)
 
             ind = 0
-            # gzip.open and open have different default open modes:
-            # gzip.open uses "rb" (read in binary format)
-            # open uses "rt" (read in text format)
-            # This can be solved by making explicit read text mode
-            # as they share this parameter
-            open_function = gzip.open if args.isoforms.endswith('.gz') else open
-            with open_function(args.isoforms, mode='rt') as isoforms_gtf:
+            with open(args.isoforms) as isoforms_gtf:
                 for line in isoforms_gtf:
                     if line[0] != "#" and len(line.split("\t"))!=9:
                         sys.stderr.write("\nERROR: input isoforms file with not GTF format.\n")

From 8c25e271022cc429b71614da2fbb1d06404b3122 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fabi=C3=A1n=20Robledo=20Yag=C3=BCe?=
 <57529812+Fabian-RY@users.noreply.github.com>
Date: Wed, 11 Sep 2024 10:26:46 +0200
Subject: [PATCH 3/3] Fix write compressed renamed fasta file that should not

---
 sqanti3_qc.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sqanti3_qc.py b/sqanti3_qc.py
index abfd1c4..3e79f11 100755
--- a/sqanti3_qc.py
+++ b/sqanti3_qc.py
@@ -2159,7 +2159,7 @@ def rename_isoform_seqids(input_fasta, force_id_ignore=False):
     open_function = gzip.open if input_fasta.endswith('.gz') else open
     with open_function(input_fasta, mode="rt") as h:
         if h.readline().startswith('@'): type = 'fastq'
-    f = open_function(input_fasta[:input_fasta.rfind('.')]+'.renamed.fasta', mode='wt')
+    f = open(input_fasta[:input_fasta.rfind('.')]+'.renamed.fasta', mode='wt')
     for r in SeqIO.parse(open_function(input_fasta, "rt"), type):
         m1 = seqid_rex1.match(r.id)
         m2 = seqid_rex2.match(r.id)