Skip to content

Commit

Permalink
Nf composition (#15)
Browse files Browse the repository at this point in the history
This merge will change some workflow inputs, so it'll require additional commits to fix, but this is necessary for the repository restructuring happening.

* runAssembly in pipeline

* host removal testing + cleanup

* tests for runAssembly

* reads to contig restructured as subworkflow. adding nf-test CI

* fixing JDK version for CI

* testing Apptainer for CI

* cleanup

* updated snapshots for JDK 17

* adding debugging output

* more testing output for GH actions

* added Git LFS to testing yml

* adding sharding to tests - checking to see if this resolves space issues with the runner

* removing files from LFS

* basic testing for runReadsToContig

* adding nf-test file

* attempting optimized testing requirements

* reverting testing strategy

* host removal testing accounts for inconsistent file naming
  • Loading branch information
aw-watson authored Dec 23, 2024
1 parent 02cd579 commit cdcda0e
Show file tree
Hide file tree
Showing 46 changed files with 892 additions and 512 deletions.
Empty file added .gitattributes
Empty file.
39 changes: 39 additions & 0 deletions .github/workflows/ci-tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
name: CI Tests

on: [push]

jobs:
test:
runs-on: ubuntu-latest
strategy:
matrix:
shard: [1, 2, 3, 4]

steps:
- name: Checkout
uses: actions/checkout@v4

- name: Set up JDK 17
uses: actions/setup-java@v2
with:
java-version: '17'
distribution: 'adopt'

- name: Set up Apptainer 1.3.0
uses: eWaterCycle/setup-apptainer@v2
with:
apptainer-version: 1.3.0


- name: Setup Nextflow 24.10.1
uses: nf-core/setup-nextflow@v1
with:
version: "24.10.1"

- name: Install nf-test
run: |
wget -qO- https://get.nf-test.com | bash
sudo mv nf-test /usr/local/bin/
- name: Run Tests (Shard ${{ matrix.shard }}/${{ strategy.job-total }})
run: nf-test test --ci --shard ${{ matrix.shard }}/${{ strategy.job-total }}
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
**/__pycache__/*
.nextflow*
**/work**
**/work/**
*/logs/*
**/ec_info/*
.nf-test*
25 changes: 24 additions & 1 deletion main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,20 @@
include {SRA2FASTQ} from './modules/sra2fastq/sra2fastq.nf'
include {COUNTFASTQ} from './modules/countFastq/countFastq.nf'
include {FAQCS} from './modules/runFaQCs/runFaQCs.nf'
include {HOSTREMOVAL} from './modules/hostRemoval/hostRemoval.nf'
include {ASSEMBLY} from './modules/runAssembly/runAssembly.nf'
include {READSTOCONTIGS} from './modules/runReadsToContig/runReadsToContig.nf'

workflow {

//input specification

fastqFiles = channel.fromPath(params.shared.inputFastq, checkIfExists:true)
contigs = channel.empty()
if(params.r2c.useAssembledContigs) {
contigs = channel.fromPath(params.shared.inputContigs, checkIfExists:true)
}


if(params.modules.sra2fastq) {
SRA2FASTQ(params.sra2fastq.plus(params.shared))
Expand All @@ -23,6 +31,21 @@ workflow {

if(params.modules.faqcs) {
FAQCS(params.faqcs.plus(params.shared), fastqFiles,avgLen)
paired = FAQCS.out.paired.ifEmpty(params.pairedFiles)
unpaired = FAQCS.out.unpaired.ifEmpty(params.unpairedFiles)
}

if(params.modules.hostRemoval) {
HOSTREMOVAL(params.hostRemoval.plus(params.shared),paired,unpaired)
paired = HOSTREMOVAL.out.paired.ifEmpty(params.pairedFiles)
unpaired = HOSTREMOVAL.out.unpaired.ifEmpty(params.unpairedFiles)
}

if(params.modules.runAssembly && !params.r2c.useAssembledContigs) {
ASSEMBLY(params.assembly.plus(params.shared), paired, unpaired, avgLen)
contigs = ASSEMBLY.out.outContigs
READSTOCONTIGS(params.r2c.plus(params.shared), paired, unpaired, contigs)
}


}
20 changes: 11 additions & 9 deletions modules/hostRemoval/hostRemoval.nf
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ process collectCleanPairedReads {
path(hostFiles, stageAs: 'host?.fastq')

output:
path "hostclean.{1,2}.fastq"
path "hostclean.{1,2}.fastq", emit: paired
path "merged_host_unique.fastq", emit: hostMerged

script:
Expand All @@ -97,7 +97,7 @@ process collectCleanPairedReadsOneHost {
path cleanedFiles

output:
path "hostclean.{1,2}.fastq"
path "hostclean.{1,2}.fastq", emit:paired


script:
Expand All @@ -119,7 +119,7 @@ process collectCleanSingleReads {
path remainingUnpairedReads

output:
path "hostclean.unpaired.fastq"
path "hostclean.unpaired.fastq", emit:unpaired

script:
"""
Expand Down Expand Up @@ -158,31 +158,33 @@ workflow HOSTREMOVAL{
unpaired

main:

providedRef = channel.fromPath(settings["host"], checkIfExists:true)

//remove host reads in parallel
hostRemoval(settings, paired, unpaired, providedRef.collect())

cleaned1_ch = hostRemoval.out.cleaned1.collect()
cleaned2_ch = hostRemoval.out.cleaned2.collect()

//more than one host
if (settings["host"].size() > 1) {
if (([] + settings["host"]).size() > 1) {
//merge clean paired-end reads (intersection)
collectCleanPairedReads(settings, cleaned1_ch, cleaned2_ch, hostRemoval.out.hostReads.collect())
paired = collectCleanPairedReads.out.paired
//calculate overall stats and create PDF
hostRemovalStats(settings, hostRemoval.out.cleanstats.collect(), collectCleanPairedReads.out.hostMerged)
}
else {
//no need to merge if only reads from one host were removed
collectCleanPairedReadsOneHost(settings, cleaned1_ch.concat(cleaned2_ch))
paired = collectCleanPairedReadsOneHost(settings, cleaned1_ch.concat(cleaned2_ch)).collect()
//calculate overall stats and create PDF
hostRemovalStats(settings, hostRemoval.out.cleanstats.collect(), hostRemoval.out.hostReads)
}

//merge clean unpaired reads (removing any duplicates by read name)
collectCleanSingleReads(settings, hostRemoval.out.cleanedSingleton.collect())
unpaired = collectCleanSingleReads(settings, hostRemoval.out.cleanedSingleton.collect())

emit:
paired
unpaired


}

This file was deleted.

This file was deleted.

This file was deleted.

9 changes: 4 additions & 5 deletions runAssembly/Dockerfile → modules/runAssembly/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ RUN conda install -n assembly git
RUN conda install -c conda-forge conda-pack

ADD bin/extractLongReads.pl /opt/conda/envs/assembly/bin
ADD bin/getAvgLen.pl /opt/conda/envs/assembly/bin
ADD bin/renameFilterFasta.pl /opt/conda/envs/assembly/bin

RUN conda-pack -n assembly -o /tmp/env.tar && \
Expand All @@ -43,12 +42,12 @@ RUN /venv/bin/conda-unpack
FROM debian:latest AS runtime

COPY --from=build /venv /venv
ENV PERL5LIB=/venv/lib/perl5/core_perl

RUN /venv/bin/git clone --depth 1 https://gitlab.com/chienchi/long_read_assembly.git
RUN apt-get update && apt-get install procps -y && apt-get clean

ENV PATH="/venv/bin:$PATH"
RUN git clone --depth 1 https://gitlab.com/chienchi/long_read_assembly.git
ENV PATH="/long_read_assembly:$PATH"
ENV PATH="/venv/bin:/long_read_assembly:$PATH"
ENV PERL5LIB=/venv/lib/perl5/core_perl

SHELL ["/bin/bash", "-c"]
CMD /bin/bash
File renamed without changes.
File renamed without changes.
Loading

0 comments on commit cdcda0e

Please sign in to comment.