From 1ce395a572f6026a649e5f19b635a4bd3abd0bdb Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Mon, 11 Sep 2023 16:10:24 +0200 Subject: [PATCH] update README tests etc... --- workflows/transcriptomics/rnaseq-sr/CHANGELOG.md | 7 ++++++- workflows/transcriptomics/rnaseq-sr/README.md | 11 +++++++---- .../transcriptomics/rnaseq-sr/rnaseq-sr-tests.yml | 11 ++++++++++- 3 files changed, 23 insertions(+), 6 deletions(-) diff --git a/workflows/transcriptomics/rnaseq-sr/CHANGELOG.md b/workflows/transcriptomics/rnaseq-sr/CHANGELOG.md index 7dbd00111..53778ac2f 100644 --- a/workflows/transcriptomics/rnaseq-sr/CHANGELOG.md +++ b/workflows/transcriptomics/rnaseq-sr/CHANGELOG.md @@ -3,7 +3,12 @@ ## [0.5] 2023-03-17 ### Automatic update -- `toolshed.g2.bx.psu.edu/repos/iuc/rgrnastar/rna_star/2.7.8a+galaxy1` was updated to `toolshed.g2.bx.psu.edu/repos/iuc/rgrnastar/rna_star/2.7.10b+galaxy3` +- `toolshed.g2.bx.psu.edu/repos/iuc/rgrnastar/rna_star/2.7.8a+galaxy1` was updated to `toolshed.g2.bx.psu.edu/repos/iuc/rgrnastar/rna_star/2.7.10b+galaxy4` + +### Manual update +- Use STAR to compute normalized strand splitted coverage +- Propose StringTie to compute FPKM etc... +- Put cufflinks step optional ## [0.4] 2023-01-16 diff --git a/workflows/transcriptomics/rnaseq-sr/README.md b/workflows/transcriptomics/rnaseq-sr/README.md index 7340d24e5..4f2a8819a 100644 --- a/workflows/transcriptomics/rnaseq-sr/README.md +++ b/workflows/transcriptomics/rnaseq-sr/README.md @@ -17,14 +17,17 @@ chrM chrM_gene exon 0 16299 . - . gene_id "chrM_gene_minus"; transcript_id "chrM - forward adapter sequence: this depends on the library preparation. Usually classical RNA libraries are Truseq and ISML (relatively new Illumina library) is Nextera. If you don't know, use FastQC to determine if it is Truseq or Nextera. If the read length is relatively short (50bp), there is probably no adapter. - reference_genome: this field will be adapted to the genomes available for STAR -- strandness: For stranded RNA, reverse means that the read is complementary to the coding sequence, forward means that the read is in the same orientation as the coding sequence. This will help you to get from STAR only the counts corresponding to your library preparation. This is also used for the stranded coverage and for FPKM computation with cufflinks. +- strandness: For stranded RNA, reverse means that the read is complementary to the coding sequence, forward means that the read is in the same orientation as the coding sequence. This will help you to get from STAR only the counts corresponding to your library preparation. This is also used for the stranded coverage and for FPKM computation with cufflinks/StringTie. +- cufflinks_FPKM: Whether you want to get FPKM with Cufflinks (pretty long) +- stringtie_FPKM: Whether you want to get FPKM/TPM etc... with Cufflinks. ## Processing - The workflow will remove adapters and low quality bases and filter out any read smaller than 15bp -- The filtered reads are mapped with STAR with ENCODE parameters (for long RNA-seq but I use it for short also). STAR is also used to count reads per gene. +- The filtered reads are mapped with STAR with ENCODE parameters (for long RNA-seq but I use it for short also). STAR is also used to count reads per gene and stranded specific normalized coverage (on uniquely mapped reads). - A multiQC is run to have an overview of the QC. This can also be used to get the strandness. -- FPKM values for reads and transcripts are computed with cufflinks using correction for multi-mapped reads. +- FPKM values for tenes and transcripts are computed with cufflinks using correction for multi-mapped reads (optionnal). +- FPKM/TMP values for genes are computed with SstringTie. - The BAM is filtered to keep only uniquely mapped reads (tag NH:i:1). - Coverage unstranded, and each strand independently is computed with bedtools and normalized to the number of million uniquely mapped reads. - The three coverage files are converted to bigwig. @@ -34,7 +37,7 @@ chrM chrM_gene exon 0 16299 . - . gene_id "chrM_gene_minus"; transcript_id "chrM - The coverage stranded output depends on the strandness of the library: - If you have an unstranded library, stranded coverages are useless - If you have a forward stranded library, the label matches the orientation of reads. - - If you have a reverse stranded library, `positive strand coverage` should correspond to genes on the forward strand and uses the reads mapped on the reverse strand. `negative strand coverage` should correspond to genes on the reverse strand and uses the reads mapped on the forward strand. + - If you have a reverse stranded library, `forward` should correspond to genes on the forward strand and uses the reads mapped on the reverse strand. `reverse` should correspond to genes on the reverse strand and uses the reads mapped on the forward strand. ## Contribution diff --git a/workflows/transcriptomics/rnaseq-sr/rnaseq-sr-tests.yml b/workflows/transcriptomics/rnaseq-sr/rnaseq-sr-tests.yml index 88b4c4a3f..f2daf61ff 100644 --- a/workflows/transcriptomics/rnaseq-sr/rnaseq-sr-tests.yml +++ b/workflows/transcriptomics/rnaseq-sr/rnaseq-sr-tests.yml @@ -14,7 +14,6 @@ forward_adapter: GATCGGAAGAGCACACGTCTGAACTCCAGTCAC reference_genome: dm6 strandness: unstranded - split coverage by strand: false cufflinks_FPKM: true stringtie_FPKM: true outputs: @@ -99,3 +98,13 @@ has_size: value: 6075761 delta: 600000 + stranded coverage: + element_tests: + GSM461177_reverse: + has_size: + value: 3103918 + delta: 300000 + GSM461177_forward: + has_size: + value: 3103918 + delta: 300000