Merge pull request #4 from jguhlin/nextflow_reboot_jg

Add main.nf into the new branch
oushujun · Aug 2, 2024 · d8d9ea2 · d8d9ea2
2 parents f140b69 + 82ae3cd
commit d8d9ea2
Show file tree

Hide file tree

Showing 5 changed files with 192 additions and 0 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -0,0 +1,80 @@
+name: EDTA Nextflow CI
+on:
+  push:
+    branches:
+      - nextflow_reboot
+  pull_request:
+    branches:
+      - nextflow_reboot
+
+env:
+  NXF_ANSI_LOG: false
+  NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/.singularity
+  NXF_SINGULARITY_LIBRARYDIR: ${{ github.workspace }}/.singularity
+
+concurrency:
+  group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}"
+  cancel-in-progress: true
+
+jobs:
+  test:
+    name: Run pipeline with test data
+    # Only run on push if this is the jguhlin nextflow_reboot branch (merged PRs)
+    if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'jguhlin/EDTA') }}"
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        nextflow:
+          - '23.04.0'
+        profile:
+          - docker
+          - singularity
+          - conda
+
+    steps:
+      - name: Check out pipeline code
+        uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4
+
+      - name: Install Nextflow
+        uses: nf-core/setup-nextflow@v2
+        with:
+          version: ${{ matrix.nextflow }}
+
+      # - name: Disk space cleanup
+      #   uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
+      # Will be needed with large data sets. Can take long to run
+
+      - name: Setup apptainer
+        if: matrix.profile == 'singularity'
+        uses: eWaterCycle/setup-apptainer@main
+
+      - name: Set up Singularity
+        if: matrix.profile == 'singularity'
+        run: |
+          mkdir -p $NXF_SINGULARITY_CACHEDIR
+          mkdir -p $NXF_SINGULARITY_LIBRARYDIR
+      
+      - name: Set up miniconda
+        if: matrix.profile == 'conda'
+        uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3
+        with:
+          miniconda-version: "latest"
+          auto-update-conda: true
+          channels: conda-forge,bioconda
+
+      - name: Conda setup
+        if: matrix.profile == 'conda'
+        run: |
+          conda clean -a
+          conda install -n base conda-libmamba-solver
+          conda config --set solver libmamba
+          echo $(realpath $CONDA)/condabin >> $GITHUB_PATH
+          echo $(realpath python) >> $GITHUB_PATH
+
+      - name: Run pipeline with test data
+        run: |
+          nextflow run \
+            ${GITHUB_WORKSPACE} \
+            -profile ${{ matrix.profile }} \
+            --genomes https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/genome/genome.fasta \
+            --outdir ./results
diff --git a/bin/fffx b/bin/fffx
diff --git a/cleanNXF.sh b/cleanNXF.sh
@@ -0,0 +1,11 @@
+#!/usr/bin/env bash
+
+rm -rf .nextflow*
+echo "Cleaned .nextflow..."
+rm -rf .nextflow.pid
+echo "Cleaned .nextflow.pid..."
+for i in $(ls work | grep -v "conda");
+do
+    rm -rf "work/$i"
+done
+echo "Cleaned work..."
diff --git a/main.nf b/main.nf
@@ -0,0 +1,61 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+params.genomes          = 'genomes/*' // To allow for more flexibility when specifying params.
+// I'll be testing the whole pipeline with a single public chromosome from nf-core test datasets
+params.species          = 'others'
+params.cds              = ''
+params.curatedlib       = ''
+params.rmlib            = ''
+params.sensitive        = false
+params.anno             = false
+params.rmout            = ''
+params.maxdiv           = 40
+params.evaluate         = true
+params.exclude          = ''
+params.maxint           = 5000
+params.outdir           = 'results'
+
+// TODO: Check inputed repeat libraries, CDS, etc...
+// TODO: Check exclude file
+
+// Rename FASTA headers (just makes everything easier later)
+// TODO: Put fffx on bioconda or somewhere so it just runs, otherwise tiny container
+process sanitize {
+    tag "${x.baseName}"
+
+    // On Linux conda is not needed for this process
+    container "${ workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer'
+        ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04'
+        : 'nf-core/ubuntu:20.04' }"
+
+    publishDir "${params.outdir}/sanitized_genomes"
+    time "10m"
+    memory 3.GB
+    cpus 1
+
+    input:
+        path x
+    output:
+        tuple val("${x.baseName}"), path("${x.baseName}_sanitized.fasta"), path("${x.baseName}_sanitized.translation_table.tsv")
+
+    """
+    fffx length-filter ${x} filtered.fa 1000
+    fffx sanitize filtered.fa ${x.baseName}_sanitized
+    """
+}
+
+// Test run: 
+// ./main.nf --genomes https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/genome/genome.fasta -profile docker
+workflow {
+    // - All nf-core pipelines/modules the [ [`meta`], data] pattern. I think we should follow that
+    // so that our local and nf-core modules are interoperable
+    //
+    // - I am also adding a bit of personal code style, please rever it if you don't like it
+
+    ch_genome                           = Channel.fromPath(params.genomes) // The channel emits a single genome at a time. That's why ch_genome
+
+    // MODULE: sanitize - All modules and workflow names should follow CAPITAL_SNAKE
+    sanitize ( ch_genome )
+}
diff --git a/nextflow.config b/nextflow.config
@@ -0,0 +1,40 @@
+profiles {
+    singularity {
+        singularity.enabled     = true
+        singularity.autoMounts  = true
+    }
+    apptainer {
+        apptainer.enabled       = true
+        apptainer.autoMounts    = true
+    }
+    conda {
+        conda.enabled           = true
+    }
+    mamba {
+        conda.enabled           = true
+        conda.useMamba          = true
+    }
+    podman {
+        podman.enabled          = true
+        podman.userEmulation    = true
+        podman.runOptions       = "--runtime crun --platform linux/x86_64 --systemd=always"
+    }
+    docker {
+        docker.enabled          = true
+        docker.runOptions       = '-u $(id -u):$(id -g) --platform=linux/amd64'
+    }
+}
+
+docker.registry                 = 'quay.io'
+podman.registry                 = 'quay.io'
+singularity.registry            = 'quay.io'
+apptainer.registry              = 'quay.io'
+
+// Increase time available to build Conda environment
+conda {
+    createTimeout               = "120 min"
+}
+
+manifest {
+    nextflowVersion             = '!>=23.04.0'
+}