nf-core · pinin4fjords · Nov 20, 2023 · Nov 20, 2023 · Nov 20, 2023 · Nov 20, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,8 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Enhancements and fixes
 
-- [PR #1126](https://github.com/nf-core/rnaseq/pull/1126) - Fixes error when transcript_fasta not provided and skip_gtf_filter set to true
-- [#1125](https://github.com/nf-core/rnaseq/issues/1125) - Pipeline fails if transcript_fasta not provided and skip_gtf_filter = true
+- [[PR #1126](https://github.com/nf-core/rnaseq/pull/1126)] [[#1125](https://github.com/nf-core/rnaseq/issues/1125)] - Pipeline fails if transcript_fasta not provided and `skip_gtf_filter = true`.
+- [[PR #1127](https://github.com/nf-core/rnaseq/pull/1127)] - Enlarge sampling to determine the number of columns in `filter_gtf.py` script.
 
 ## [[3.13.1](https://github.com/nf-core/rnaseq/releases/tag/3.13.1)] - 2023-11-17
 

diff --git a/bin/filter_gtf.py b/bin/filter_gtf.py
@@ -23,14 +23,14 @@ def extract_fasta_seq_names(fasta_name: str) -> Set[str]:
 def tab_delimited(file: str) -> float:
     """Check if file is tab-delimited and return median number of tabs."""
     with open(file, "r") as f:
-        data = f.read(1024)
+        data = f.read(102400)
         return statistics.median(line.count("\t") for line in data.split("\n"))
 
 
 def filter_gtf(fasta: str, gtf_in: str, filtered_gtf_out: str, skip_transcript_id_check: bool) -> None:
     """Filter GTF file based on FASTA sequence names."""
     if tab_delimited(gtf_in) != 8:
-        raise ValueError("Invalid GTF file: Expected 8 tab-separated columns.")
+        raise ValueError("Invalid GTF file: Expected nine tab-separated columns.")
 
     seq_names_in_genome = extract_fasta_seq_names(fasta)
     logger.info(f"Extracted chromosome sequence names from {fasta}")