Skip to content

Commit

Permalink
Merge pull request #4 from jguhlin/nextflow_reboot_jg
Browse files Browse the repository at this point in the history
Add main.nf into the new branch
  • Loading branch information
jguhlin authored Aug 2, 2024
2 parents f140b69 + 82ae3cd commit d8d9ea2
Show file tree
Hide file tree
Showing 5 changed files with 192 additions and 0 deletions.
80 changes: 80 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
name: EDTA Nextflow CI
on:
push:
branches:
- nextflow_reboot
pull_request:
branches:
- nextflow_reboot

env:
NXF_ANSI_LOG: false
NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/.singularity
NXF_SINGULARITY_LIBRARYDIR: ${{ github.workspace }}/.singularity

concurrency:
group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}"
cancel-in-progress: true

jobs:
test:
name: Run pipeline with test data
# Only run on push if this is the jguhlin nextflow_reboot branch (merged PRs)
if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'jguhlin/EDTA') }}"
runs-on: ubuntu-latest
strategy:
matrix:
nextflow:
- '23.04.0'
profile:
- docker
- singularity
- conda

steps:
- name: Check out pipeline code
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4

- name: Install Nextflow
uses: nf-core/setup-nextflow@v2
with:
version: ${{ matrix.nextflow }}

# - name: Disk space cleanup
# uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
# Will be needed with large data sets. Can take long to run

- name: Setup apptainer
if: matrix.profile == 'singularity'
uses: eWaterCycle/setup-apptainer@main

- name: Set up Singularity
if: matrix.profile == 'singularity'
run: |
mkdir -p $NXF_SINGULARITY_CACHEDIR
mkdir -p $NXF_SINGULARITY_LIBRARYDIR
- name: Set up miniconda
if: matrix.profile == 'conda'
uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3
with:
miniconda-version: "latest"
auto-update-conda: true
channels: conda-forge,bioconda

- name: Conda setup
if: matrix.profile == 'conda'
run: |
conda clean -a
conda install -n base conda-libmamba-solver
conda config --set solver libmamba
echo $(realpath $CONDA)/condabin >> $GITHUB_PATH
echo $(realpath python) >> $GITHUB_PATH
- name: Run pipeline with test data
run: |
nextflow run \
${GITHUB_WORKSPACE} \
-profile ${{ matrix.profile }} \
--genomes https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/genome/genome.fasta \
--outdir ./results
Binary file added bin/fffx
Binary file not shown.
11 changes: 11 additions & 0 deletions cleanNXF.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/usr/bin/env bash

rm -rf .nextflow*
echo "Cleaned .nextflow..."
rm -rf .nextflow.pid
echo "Cleaned .nextflow.pid..."
for i in $(ls work | grep -v "conda");
do
rm -rf "work/$i"
done
echo "Cleaned work..."
61 changes: 61 additions & 0 deletions main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#!/usr/bin/env nextflow

nextflow.enable.dsl = 2

params.genomes = 'genomes/*' // To allow for more flexibility when specifying params.
// I'll be testing the whole pipeline with a single public chromosome from nf-core test datasets
params.species = 'others'
params.cds = ''
params.curatedlib = ''
params.rmlib = ''
params.sensitive = false
params.anno = false
params.rmout = ''
params.maxdiv = 40
params.evaluate = true
params.exclude = ''
params.maxint = 5000
params.outdir = 'results'

// TODO: Check inputed repeat libraries, CDS, etc...
// TODO: Check exclude file

// Rename FASTA headers (just makes everything easier later)
// TODO: Put fffx on bioconda or somewhere so it just runs, otherwise tiny container
process sanitize {
tag "${x.baseName}"

// On Linux conda is not needed for this process
container "${ workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer'
? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04'
: 'nf-core/ubuntu:20.04' }"

publishDir "${params.outdir}/sanitized_genomes"
time "10m"
memory 3.GB
cpus 1

input:
path x
output:
tuple val("${x.baseName}"), path("${x.baseName}_sanitized.fasta"), path("${x.baseName}_sanitized.translation_table.tsv")

"""
fffx length-filter ${x} filtered.fa 1000
fffx sanitize filtered.fa ${x.baseName}_sanitized
"""
}

// Test run:
// ./main.nf --genomes https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/genome/genome.fasta -profile docker
workflow {
// - All nf-core pipelines/modules the [ [`meta`], data] pattern. I think we should follow that
// so that our local and nf-core modules are interoperable
//
// - I am also adding a bit of personal code style, please rever it if you don't like it

ch_genome = Channel.fromPath(params.genomes) // The channel emits a single genome at a time. That's why ch_genome

// MODULE: sanitize - All modules and workflow names should follow CAPITAL_SNAKE
sanitize ( ch_genome )
}
40 changes: 40 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
profiles {
singularity {
singularity.enabled = true
singularity.autoMounts = true
}
apptainer {
apptainer.enabled = true
apptainer.autoMounts = true
}
conda {
conda.enabled = true
}
mamba {
conda.enabled = true
conda.useMamba = true
}
podman {
podman.enabled = true
podman.userEmulation = true
podman.runOptions = "--runtime crun --platform linux/x86_64 --systemd=always"
}
docker {
docker.enabled = true
docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64'
}
}

docker.registry = 'quay.io'
podman.registry = 'quay.io'
singularity.registry = 'quay.io'
apptainer.registry = 'quay.io'

// Increase time available to build Conda environment
conda {
createTimeout = "120 min"
}

manifest {
nextflowVersion = '!>=23.04.0'
}

0 comments on commit d8d9ea2

Please sign in to comment.