Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sylph #65

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 2 additions & 7 deletions assets/samplesheet.csv
Original file line number Diff line number Diff line change
@@ -1,7 +1,2 @@
sample,fastq_1,fastq_2,rundir,tags
SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz,/path/to/rundir,paired_sample:lane1
SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A2_S2_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A2_S2_L002_R2_001.fastq.gz,/path/to/rundir,paired_sample:lane1
SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A3_S3_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A3_S3_L002_R2_001.fastq.gz,/path/to/rundir,paired_sample:lane2
SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir,group1
SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir,group2
SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir,group3
sample,fastq_1,fastq_2,rundir,tags,reference
test2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test2_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test2_2.fastq.gz,,,https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/sarscov2/genome/genome.fasta
6 changes: 6 additions & 0 deletions assets/schema_input.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@
"pattern": "^\\S+\\.f(ast)?q\\.gz$",
"errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'"
},
"reference": {
"type": "string",
"format": "file-path",
"pattern": "^\\S+\\.(fasta|fa|fna|tar)(\\.gz)?$",
"errorMessage": "FASTA file must be gzipped with extensions '.fasta.gz', '.fa.gz', 'tar.gz' or '.fna.gz'"
},
"rundir": {
"type": "string",
"format": "path",
Expand Down
5 changes: 4 additions & 1 deletion conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,10 @@ params {
// Input data
// TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
// TODO nf-core: Give any required params for the test so that command line flags are not needed
input = params.pipelines_testdata_base_path + 'seqinspector/testdata/NovaSeq6000/samplesheet.csv'
//input = params.pipelines_testdata_base_path + 'seqinspector/testdata/NovaSeq6000/samplesheet.csv'

// In this samplesheet.csv , I added extra references col
input = './assets/samplesheet.csv'

// Genome references
genome = 'R64-1-1'
Expand Down
7 changes: 7 additions & 0 deletions modules/local/sylph/profile/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
channels:
- conda-forge
- bioconda
dependencies:
- bioconda::sylph==v0.6.1
47 changes: 47 additions & 0 deletions modules/local/sylph/profile/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
process SYLPH_PROFILE {
tag "$meta.id"
label 'process_high'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/sylph:0.6.1--h4ac6f70_0' :
'biocontainers/sylph:0.6.1--h4ac6f70_0' }"

input:
tuple val(meta), path(sketch_fastq), path(sketch_fastq_genome)

output:
tuple val(meta), path('profile_out.tsv'), emit: profile_out

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"

"""
sylph profile \\
$args \\
$sketch_fastq \\
$sketch_fastq_genome \\
-o profile_out.tsv

cat <<-END_VERSIONS > versions.yml
"${task.process}":
sylph: \$(sylph -V|awk '{print \$2}')
END_VERSIONS

"""

stub:
"""
touch profile_out.tsv

cat <<-END_VERSIONS > versions.yml
"${task.process}":
sylph: \$(sylph -V|awk '{print \$2}')
END_VERSIONS
"""

}
7 changes: 7 additions & 0 deletions modules/local/sylph/sketch/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
channels:
- conda-forge
- bioconda
dependencies:
- bioconda::sylph==v0.6.1
52 changes: 52 additions & 0 deletions modules/local/sylph/sketch/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
process SYLPH_SKETCH {
tag "$meta.id"
label 'process_high'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/sylph:0.6.1--h4ac6f70_0' :
'biocontainers/sylph:0.6.1--h4ac6f70_0' }"

input:
tuple val(meta), path(reads), path(reference)

output:
tuple val(meta), path('my_sketches/*.sylsp'), path('database.syldb'), emit: sketch_fastq_genome

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def fastq = meta.single_end ? "-r ${reads[0]}" : "-1 ${reads[0]} -2 ${reads[1]}"
"""
sylph sketch \\
$args \\
$fastq \\
-g $reference \\
-S $prefix \\
-d my_sketches \\
-c 200 \\
-k 31

cat <<-END_VERSIONS > versions.yml
"${task.process}":
sylph: \$(sylph -V|awk '{print \$2}')
END_VERSIONS
"""

stub:
def prefix = task.ext.prefix ?: "${meta.id}"
def end = meta.single_end ?"": ".paired"
"""
touch ${prefix}${end}.slysp

cat <<-END_VERSIONS > versions.yml
"${task.process}":
sylph: \$(sylph -V|awk '{print \$2}')
END_VERSIONS
"""


}
15 changes: 9 additions & 6 deletions subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -77,20 +77,22 @@ workflow PIPELINE_INITIALISATION {
.toList()
.flatMap { it.withIndex().collect { entry, idx -> entry + "${idx+1}" } }
.map {
meta, fastq_1, fastq_2, idx ->
meta, fastq_1, fastq_2, reference, idx ->
def tags = meta.tags ? meta.tags.tokenize(":") : []
def updated_meta = meta + [ id:"${meta.sample}_${idx}", tags:tags ]
if (!fastq_2) {
return [
updated_meta.id,
updated_meta + [ single_end:true ],
[ fastq_1 ]
[ fastq_1 ],
reference
]
} else {
return [
updated_meta.id,
updated_meta + [ single_end:false ],
[ fastq_1, fastq_2 ]
[ fastq_1, fastq_2 ],
reference
]
}
}
Expand All @@ -103,11 +105,12 @@ workflow PIPELINE_INITIALISATION {
// meta, fastqs ->
// return [ meta, fastqs.flatten() ]
// }
.view{"dadadad$it"}
.set { ch_samplesheet }

ch_samplesheet
.map {
meta, fastqs -> meta.tags
meta, fastqs, reference -> meta.tags
}
.flatten()
.unique()
Expand Down Expand Up @@ -189,15 +192,15 @@ def validateInputParameters() {
// Validate channels from input samplesheet
//
def validateInputSamplesheet(input) {
def (metas, fastqs) = input[1..2]
def (metas, fastqs, reference) = input[1..3]

// Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end
def endedness_ok = metas.collect{ meta -> meta.single_end }.unique().size == 1
if (!endedness_ok) {
error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}")
}

return [ metas[0], fastqs ]
return [ metas[0], fastqs, reference ]
}
//
// Get attribute from genome config file e.g. fasta
Expand Down
19 changes: 17 additions & 2 deletions workflows/seqinspector.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
include { SEQTK_SAMPLE } from '../modules/nf-core/seqtk/sample/main'
include { FASTQC } from '../modules/nf-core/fastqc/main'

include {SYLPH_SKETCH } from '../modules/local/sylph/sketch/main'
include {SYLPH_PROFILE } from '../modules/local/sylph/profile/main'

include { MULTIQC as MULTIQC_GLOBAL } from '../modules/nf-core/multiqc/main'
include { MULTIQC as MULTIQC_PER_TAG } from '../modules/nf-core/multiqc/main'

Expand Down Expand Up @@ -38,7 +41,7 @@ workflow SEQINSPECTOR {
if (params.sample_size > 0 ) {
ch_sample_sized = SEQTK_SAMPLE(
ch_samplesheet.map {
meta, reads -> [meta, reads, params.sample_size]
meta, reads, reference -> [meta, reads, params.sample_size]
}
).reads
ch_versions = ch_versions.mix(SEQTK_SAMPLE.out.versions.first())
Expand All @@ -52,7 +55,7 @@ workflow SEQINSPECTOR {
//
FASTQC (
ch_sample_sized.map {
meta, subsampled -> [meta, subsampled]
meta, subsampled, reference -> [meta, subsampled]
}
)
ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip)
Expand All @@ -69,6 +72,18 @@ workflow SEQINSPECTOR {
newLine: true
).set { ch_collated_versions }

//
// MODULE: Run SYLPH
//
SYLPH_SKETCH (
ch_samplesheet
)

sketch_files = SYLPH_SKETCH.out.sketch_fastq_genome

SYLPH_PROFILE (
sketch_files
)

//
// MODULE: MultiQC
Expand Down
Loading