sheynkman-lab · cgpu · Nov 24, 2020 · Nov 7, 2020 · Nov 7, 2020 · Nov 7, 2020
diff --git a/conf/test.config b/conf/test.config
@@ -0,0 +1,19 @@
+/*
+ * -------------------------------------------------
+ *  Nextflow config file for running tests
+ * -------------------------------------------------
+ * Defines bundled input files and everything required
+ * to run a fast and simple test. Use as follows:
+ *   nextflow run sheynkman-lab/Long-Read-Proteogenomics -profile test,<docker/singularity>
+ */
+
+params {
+  config_profile_name = 'Test profile'
+  config_profile_description = 'Minimal test dataset to check pipeline function'
+  // Limit resources so that this can run on GitHub Actions
+  max_cpus = 2
+  max_memory = 6.GB
+  max_time = 48.h
+
+  // Input data
+}
diff --git a/environment.yml b/environment.yml
@@ -6,11 +6,30 @@ channels:
   - bioconda
   - defaults
 dependencies:
-  - conda-forge::python=3.7.3
-  - conda-forge::markdown=3.1.1
-  - conda-forge::pymdown-extensions=6.0
-  - conda-forge::pygments=2.5.2
-  - bioconda::fastqc=0.11.8
-  - bioconda::multiqc=1.7
+  # General utils
+  - python=3.7.3
+  - markdown=3.1.1
+  - pymdown-extensions=6.0
+  - pygments=2.5.2
+  - multiqc=1.7
+  - biopython
+  # Module 1: SMARTLink - CCS
   - pbccs
-  - 
+  - pbbam
+  # Module 2: Iso-Seq 3
+  - isoseq3
+  - lima
+  - pbmm2
+  - pbcoretools
+  - bamtools
+  # Module 3: SQANTI3, separate docker image
+  # Module 4: CPAT
+  # Module 5: 6 Frame Translation
+  - biopython
+  # Module 6: Transcriptome Summary
+  - numpy
+  - pandas
+  # Module 7: ORF Calling
+  # Module 8: Refined Db Generation
+  # Module 9: Db Annotation
+  # Module 10: MetaMorpheus
diff --git a/main.nf b/main.nf
@@ -74,6 +74,38 @@ summary['Config Profile'] = workflow.profile
 log.info summary.collect { k,v -> "${k.padRight(18)}: $v" }.join("\n")
 log.info "-\033[2m--------------------------------------------------\033[0m-"
 
+/*
+ * Configuring channels based on input parameters
+ */
+
+// Fail early: Nothing to analyze if the user does not provide an input pb_bams_folder
+if (!params.pb_bams_folder ) {
+    exit 1, "Please provide an input folder with --pb_bams_folder to proceed, see --help for more information"
+}
+
+if (params.pb_bams_folder && hasExtension(params.pb_bams_folder, "tar.gz")) {
+  ch_pb_bams_folder_tar_gz = Channel.fromPath(params.pb_bams_folder)
+}
+
+if (params.pb_bams_folder && !hasExtension(params.pb_bams_folder, "tar.gz")) {
+// ch_pb_bams_folder = params.pb_bams_folder ? Channel.fromFilePairs("${params.pb_bams_folder}/*.{bam,${params.bai_suffix}}", flat: true) : null
+ch_pb_bams_folder = params.pb_bams_folder ? Channel.fromPath("${params.pb_bams_folder}/*.bam") : null
+}
+
+// If the user has provided input folder
+if (params.pb_bams_folder ) {
+    ch_pb_bams_folder
+       .set { ch_pb_subreads_bams }
+}
+
+(ch_pb_subreads_bams_for_pbi,
+ch_pb_subreads_bams_to_display) = ch_pb_subreads_bams.into(2)
+
+ch_pb_subreads_bams_to_display.view()
+
+ch_ccs_chunks = Channel.from(1.."${params.number_of_ccs_chunks}".toInteger())
+(ch_ccs_chunks, ch_ccs_chunks_to_display) = ch_ccs_chunks.into(2)
+
 /*
  * STEP  - validate template
  */
@@ -90,6 +122,86 @@ process validate {
     """
 }
 
+/*
+ *  Module 1: SMARTLink - CCS
+ */
+
+// Generate pbi index required for using the ccs --chunk parallelisation
+process generate_pbi {
+    tag "${pb_subreads_bam.simpleName}"
+    cpus 1
+    echo true
+
+    input:
+    file(pb_subreads_bam) from ch_pb_subreads_bams_for_pbi
+
+    output:
+    set val("${pb_subreads_bam.simpleName}"),
+        file("${pb_subreads_bam.baseName}.bam"), 
+        file("${pb_subreads_bam.baseName}.bam.pbi") into ch_pb_subreads_bams_for_ccs
+
+    script:
+    """
+    pbindex ${pb_subreads_bam}
+    """
+}
+
+ch_ccs_chucked_bams = ch_ccs_chunks.combine(ch_pb_subreads_bams_for_ccs)
+
+if (!params.mock_ccs) {
+  process smartlink_ccs {
+      tag "sample:${sample},chunk:${ith_chunk}"
+      publishDir "${params.outdir}/smartlink_ccs/", mode: params.publish_dir_mode
+      cpus 1
+
+      input:
+      set val(ith_chunk), val(sample), file(pb_subreads_bam), file(pb_subreads_bai) from ch_ccs_chucked_bams
+
+      output:
+      set val("${sample}"), 
+          file("${sample}.ccs.${ith_chunk}.bam"), 
+          file("${sample}.ccs.${ith_chunk}.bam.pbi") into ch_ccs_pacbio_bams
+
+      script:
+      // Hardcoded example from docs:
+      // ccs movie.subreads.bam movie.ccs.1.bam --chunk 1/10 -j <THREADS>
+      """
+      ccs ${pb_subreads_bam} ${sample}.ccs.${ith_chunk}.bam --chunk ${ith_chunk}/${params.number_of_ccs_chunks} -j ${task.cpus}
+      """
+  }
+}
+
+if (params.mock_ccs) {
+  process smartlink_ccs_mock {
+      tag "sample:${sample},chunk:${ith_chunk}"
+      publishDir "${params.outdir}/smartlink_ccs/", mode: params.publish_dir_mode
+      cpus 1
+
+      input:
+      set val(ith_chunk), val(sample), file(pb_subreads_bam), file(pb_subreads_bai) from ch_ccs_chucked_bams
+
+      output:
+      set val("${sample}"), 
+          file("${sample}.ccs.${ith_chunk}.bam"), 
+          file("${sample}.ccs.${ith_chunk}.bam.pbi") into ch_ccs_pacbio_bams
+
+      script:
+      // Hardcoded example from docs:
+      // ccs movie.subreads.bam movie.ccs.1.bam --chunk 1/10 -j <THREADS>
+      """
+      # ccs ${pb_subreads_bam} ${sample}.ccs.${ith_chunk}.bam --chunk ${ith_chunk}/${params.number_of_ccs_chunks} -j ${task.cpus}
+      touch ${sample}.ccs.${ith_chunk}.bam ${sample}.ccs.${ith_chunk}.bam.bai ${sample}.ccs.${ith_chunk}.bam.pbi
+      """
+  }
+}
+
+
+
+
+
+
+
+
 def logHeader() {
     // Log colors ANSI codes
     c_black = params.monochrome_logs ? '' : "\033[0;30m";
@@ -128,3 +240,11 @@ def logHeader() {
     -${c_dim}--------------------------------------------------${c_reset}-
     """.stripIndent()
 }
+
+// Functions
+// Credits for most of the functions to https://github.com/nf-core/sarek developers
+
+// Check file extension
+def hasExtension(it, extension) {
+    it.toString().toLowerCase().endsWith(extension.toLowerCase())
+}
diff --git a/nextflow.config b/nextflow.config
@@ -20,11 +20,21 @@ params {
   max_cpus = 16
   max_time = 240.h
 
+  // Module 1: SMARTLink - CCS
+  bai_suffix = 'bam.bai' // CAUTION: be sure that you declare bam.bai or .bai explicitly
+  pb_bams_folder = 'testdata'
+  number_of_ccs_chunks = 10
+
 }
 
 // Container slug. Stable releases should specify release tag!
 // Developmental code should specify :dev
-process.container = 'sheynkmanlab/proteogenomics-base:dev'
+
+docker.enabled = true  
+
+process {
+  container = 'cgpu/proteogenomics:1.0dev'
+ }
 
 profiles {
   docker { 
@@ -37,7 +47,7 @@ profiles {
   podman {
     podman.enabled = true
   }
-  test { includeConfig 'conf/executors/test.config' }
+  test { includeConfig 'conf/test.config' }
 }
 
 // Export these variables to prevent local Python/R libraries from conflicting with those in the container