Skip to content

Commit

Permalink
moved another subworkflow
Browse files Browse the repository at this point in the history
  • Loading branch information
toniher committed Oct 17, 2024
1 parent 437ec73 commit e5352e3
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 30 deletions.
35 changes: 6 additions & 29 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -96,26 +96,21 @@ if ( !blosumfile.exists() ) exit 1, "Missing blosum file: ${blosumfile}!"
LOCAL_MODULES='./modules/local/exorthist'
LOCAL_SUBWORKFLOWS='./subworkflows/local/exorthist'

include { CHECK_INPUT } from "${LOCAL_MODULES}/check_input.nf"
include { CLUSTER_EXS } from "${LOCAL_MODULES}/cluster_exons.nf"
include { COLLAPSE_OVERLAPPING_MATCHES } from "${LOCAL_MODULES}/collapse_matches.nf"
include { FILTER_AND_SELECT_BEST_EX_MATCHES_BY_TARGETGENE } from "${LOCAL_MODULES}/filter_matches.nf"
include { FORMAT_EX_CLUSTERS_INPUT } from "${LOCAL_MODULES}/format_input.nf"
include { FORMAT_EX_CLUSTERS_OUTPUT } from "${LOCAL_MODULES}/format_output.nf"
include { GENERATE_ANNOTATIONS } from "${LOCAL_MODULES}/generate_annotations.nf"
include { JOIN_FILTERED_EX_MATCHES } from "${LOCAL_MODULES}/join_matches.nf"
include { MERGE_PROT_EX_INT_ALN_INFO } from "${LOCAL_MODULES}/merge_aligns.nf"
include { PARSE_IPA_PROT_ALN } from "${LOCAL_MODULES}/align_pairs.nf"
include { REALIGN_EX_PAIRS } from "${LOCAL_MODULES}/realign_pairs.nf"
include { RECLUSTER_EXS_BY_SPECIES_PAIR } from "${LOCAL_MODULES}/recluster_exs.nf"
include { RECLUSTER_GENES_BY_SPECIES_PAIR } from "${LOCAL_MODULES}/recluster_genes.nf"
include { SCORE_EX_MATCHES } from "${LOCAL_MODULES}/score_matches.nf"
include { SPLIT_CLUSTERS_IN_CHUNKS } from "${LOCAL_MODULES}/split_clusters_chunks.nf"
include { SPLIT_CLUSTERS_BY_SPECIES_PAIRS } from "${LOCAL_MODULES}/split_clusters_species.nf"
include { SPLIT_EX_PAIRS_TO_REALIGN } from "${LOCAL_MODULES}/split_pairs.nf"


include { ALIGN } from "${LOCAL_SUBWORKFLOWS}/align.nf"
include { PREPARE_INPUT } from "${LOCAL_SUBWORKFLOWS}/prepare_input.nf"


/*
* Validate input and print log file
*/
Expand Down Expand Up @@ -168,27 +163,9 @@ workflow {
params.extraexons
)

// the last argument is the protein similarity alignment.
// if a prevaln folder is provided, the protein alignments present in each species pair subfolder will not be repeated.
PARSE_IPA_PROT_ALN(blosumfile, PREPARE_INPUT.out.alignment_input)

// Collapse EXs_to_split in batches of 500 files
EXs_to_split = PARSE_IPA_PROT_ALN.out.EXs_to_split
EXs_to_split_batches = EXs_to_split.toSortedList().flatten().buffer(size : 500, remainder: true)
// Split exons pairs to realign
SPLIT_EX_PAIRS_TO_REALIGN(EXs_to_split_batches)
EXs_to_realign_batches = SPLIT_EX_PAIRS_TO_REALIGN.out.EXs_to_realign_batches
// Flatten the results from the previous batch run and combine with sp1 and sp2 information, using sp1-sp2 as key.
EXs_to_realign = EXs_to_realign_batches.flatten().map{[it.getName().toString().split("_")[0],it]}.groupTuple().join(PREPARE_INPUT.out.clusters_split_ch).transpose()
// Realign exons pairs (with multiple hits)
REALIGN_EX_PAIRS(blosumfile, EXs_to_realign)
// Combine all the aln_info with the realigned_exon_info for each species pair
aligned_subclusters_4_splitting = PARSE_IPA_PROT_ALN.out.aligned_subclusters_4_splitting
realigned_exons_4_merge = REALIGN_EX_PAIRS.out.realigned_exons_4_merge
data_4_merge = aligned_subclusters_4_splitting.groupTuple().join(realigned_exons_4_merge.groupTuple())
// Merge alignments information
MERGE_PROT_EX_INT_ALN_INFO(data_4_merge)
folder_jscores = MERGE_PROT_EX_INT_ALN_INFO.out.folder_jscores
ALIGN(blosumfile, PREPARE_INPUT.out.alignment_input, PREPARE_INPUT.out.clusters_split_ch)

folder_jscores = ALIGN.out.folder_jscores
data_to_score = folder_jscores.join(PREPARE_INPUT.out.clusters_split_ch).map{ [it[0], it[1..-1] ]}
// Score EX matches from aln info
SCORE_EX_MATCHES(data_to_score)
Expand Down
42 changes: 42 additions & 0 deletions subworkflows/local/exorthist/align.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
LOCAL_MODULES='../../../modules/local/exorthist'

include { MERGE_PROT_EX_INT_ALN_INFO } from "${LOCAL_MODULES}/merge_aligns.nf"
include { PARSE_IPA_PROT_ALN } from "${LOCAL_MODULES}/align_pairs.nf"
include { REALIGN_EX_PAIRS } from "${LOCAL_MODULES}/realign_pairs.nf"
include { SPLIT_EX_PAIRS_TO_REALIGN } from "${LOCAL_MODULES}/split_pairs.nf"

workflow ALIGN {

take:
blosumfile
alignment_input
clusters_split_ch

main:
// the last argument is the protein similarity alignment.
// if a prevaln folder is provided, the protein alignments present in each species pair subfolder will not be repeated.
PARSE_IPA_PROT_ALN(blosumfile, alignment_input)

// Collapse EXs_to_split in batches of 500 files
EXs_to_split = PARSE_IPA_PROT_ALN.out.EXs_to_split
EXs_to_split_batches = EXs_to_split.toSortedList().flatten().buffer(size : 500, remainder: true)
// Split exons pairs to realign
SPLIT_EX_PAIRS_TO_REALIGN(EXs_to_split_batches)
EXs_to_realign_batches = SPLIT_EX_PAIRS_TO_REALIGN.out.EXs_to_realign_batches
// Flatten the results from the previous batch run and combine with sp1 and sp2 information, using sp1-sp2 as key.
EXs_to_realign = EXs_to_realign_batches.flatten().map{[it.getName().toString().split("_")[0],it]}.groupTuple().join(clusters_split_ch).transpose()
// Realign exons pairs (with multiple hits)
REALIGN_EX_PAIRS(blosumfile, EXs_to_realign)
// Combine all the aln_info with the realigned_exon_info for each species pair
aligned_subclusters_4_splitting = PARSE_IPA_PROT_ALN.out.aligned_subclusters_4_splitting
realigned_exons_4_merge = REALIGN_EX_PAIRS.out.realigned_exons_4_merge
data_4_merge = aligned_subclusters_4_splitting.groupTuple().join(realigned_exons_4_merge.groupTuple())
// Merge alignments information
MERGE_PROT_EX_INT_ALN_INFO(data_4_merge)

emit:
folder_jscores = MERGE_PROT_EX_INT_ALN_INFO.out.folder_jscores
aln_features = MERGE_PROT_EX_INT_ALN_INFO.out.aln_features
exint_aln = MERGE_PROT_EX_INT_ALN_INFO.out.exint_aln

}
1 change: 0 additions & 1 deletion subworkflows/local/exorthist/prepare_input.nf
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ workflow PREPARE_INPUT {
.flatMap()
.map { ["${it[0][0]}-${it[1][0]}".toString(), it[0][1], it[1][1]] }


// Copy the gene cluster file to output to use for the exint_plotter and compare_exon_sets modules
SPLIT_CLUSTERS_BY_SPECIES_PAIRS(clusterfile_ch)

Expand Down

0 comments on commit e5352e3

Please sign in to comment.