moved another subworkflow

biocorecrg · Oct 17, 2024 · e5352e3 · e5352e3
1 parent 437ec73
commit e5352e3
Show file tree

Hide file tree

Showing 3 changed files with 48 additions and 30 deletions.
diff --git a/main.nf b/main.nf
@@ -96,26 +96,21 @@ if ( !blosumfile.exists() ) exit 1, "Missing blosum file: ${blosumfile}!"
 LOCAL_MODULES='./modules/local/exorthist'
 LOCAL_SUBWORKFLOWS='./subworkflows/local/exorthist'
 
-include { CHECK_INPUT } from "${LOCAL_MODULES}/check_input.nf"
 include { CLUSTER_EXS } from "${LOCAL_MODULES}/cluster_exons.nf"
 include { COLLAPSE_OVERLAPPING_MATCHES } from "${LOCAL_MODULES}/collapse_matches.nf"
 include { FILTER_AND_SELECT_BEST_EX_MATCHES_BY_TARGETGENE } from "${LOCAL_MODULES}/filter_matches.nf"
 include { FORMAT_EX_CLUSTERS_INPUT } from "${LOCAL_MODULES}/format_input.nf"
 include { FORMAT_EX_CLUSTERS_OUTPUT } from "${LOCAL_MODULES}/format_output.nf"
-include { GENERATE_ANNOTATIONS } from "${LOCAL_MODULES}/generate_annotations.nf"
 include { JOIN_FILTERED_EX_MATCHES } from "${LOCAL_MODULES}/join_matches.nf"
-include { MERGE_PROT_EX_INT_ALN_INFO } from "${LOCAL_MODULES}/merge_aligns.nf"
-include { PARSE_IPA_PROT_ALN } from "${LOCAL_MODULES}/align_pairs.nf"
-include { REALIGN_EX_PAIRS } from "${LOCAL_MODULES}/realign_pairs.nf"
 include { RECLUSTER_EXS_BY_SPECIES_PAIR } from "${LOCAL_MODULES}/recluster_exs.nf"
 include { RECLUSTER_GENES_BY_SPECIES_PAIR } from "${LOCAL_MODULES}/recluster_genes.nf"
 include { SCORE_EX_MATCHES } from "${LOCAL_MODULES}/score_matches.nf"
-include { SPLIT_CLUSTERS_IN_CHUNKS } from "${LOCAL_MODULES}/split_clusters_chunks.nf"
-include { SPLIT_CLUSTERS_BY_SPECIES_PAIRS } from "${LOCAL_MODULES}/split_clusters_species.nf"
-include { SPLIT_EX_PAIRS_TO_REALIGN } from "${LOCAL_MODULES}/split_pairs.nf"
 
+
+include { ALIGN } from "${LOCAL_SUBWORKFLOWS}/align.nf"
 include { PREPARE_INPUT } from "${LOCAL_SUBWORKFLOWS}/prepare_input.nf"
 
+
 /*
  * Validate input and print log file
  */
@@ -168,27 +163,9 @@ workflow {
         params.extraexons
     )
 
-    // the last argument is the protein similarity alignment.
-    // if a prevaln folder is provided, the protein alignments present in each species pair subfolder will not be repeated.
-    PARSE_IPA_PROT_ALN(blosumfile, PREPARE_INPUT.out.alignment_input)
-
-    // Collapse EXs_to_split in batches of 500 files
-    EXs_to_split = PARSE_IPA_PROT_ALN.out.EXs_to_split
-    EXs_to_split_batches = EXs_to_split.toSortedList().flatten().buffer(size : 500, remainder: true)
-    // Split exons pairs to realign
-    SPLIT_EX_PAIRS_TO_REALIGN(EXs_to_split_batches)
-    EXs_to_realign_batches = SPLIT_EX_PAIRS_TO_REALIGN.out.EXs_to_realign_batches
-    // Flatten the results from the previous batch run and combine with sp1 and sp2 information, using sp1-sp2 as key.
-    EXs_to_realign = EXs_to_realign_batches.flatten().map{[it.getName().toString().split("_")[0],it]}.groupTuple().join(PREPARE_INPUT.out.clusters_split_ch).transpose()
-    //  Realign exons pairs (with multiple hits)
-    REALIGN_EX_PAIRS(blosumfile, EXs_to_realign)
-    // Combine all the aln_info with the realigned_exon_info for each species pair
-    aligned_subclusters_4_splitting = PARSE_IPA_PROT_ALN.out.aligned_subclusters_4_splitting
-    realigned_exons_4_merge = REALIGN_EX_PAIRS.out.realigned_exons_4_merge
-    data_4_merge = aligned_subclusters_4_splitting.groupTuple().join(realigned_exons_4_merge.groupTuple())
-    // Merge alignments information
-    MERGE_PROT_EX_INT_ALN_INFO(data_4_merge)
-    folder_jscores = MERGE_PROT_EX_INT_ALN_INFO.out.folder_jscores
+    ALIGN(blosumfile, PREPARE_INPUT.out.alignment_input, PREPARE_INPUT.out.clusters_split_ch)
+
+    folder_jscores = ALIGN.out.folder_jscores
     data_to_score = folder_jscores.join(PREPARE_INPUT.out.clusters_split_ch).map{ [it[0], it[1..-1] ]}
     // Score EX matches from aln info
     SCORE_EX_MATCHES(data_to_score)

diff --git a/subworkflows/local/exorthist/align.nf b/subworkflows/local/exorthist/align.nf
@@ -0,0 +1,42 @@
+LOCAL_MODULES='../../../modules/local/exorthist'
+
+include { MERGE_PROT_EX_INT_ALN_INFO } from "${LOCAL_MODULES}/merge_aligns.nf"
+include { PARSE_IPA_PROT_ALN } from "${LOCAL_MODULES}/align_pairs.nf"
+include { REALIGN_EX_PAIRS } from "${LOCAL_MODULES}/realign_pairs.nf"
+include { SPLIT_EX_PAIRS_TO_REALIGN } from "${LOCAL_MODULES}/split_pairs.nf"
+
+workflow ALIGN {
+
+    take:
+    blosumfile
+    alignment_input
+    clusters_split_ch
+
+    main:
+    // the last argument is the protein similarity alignment.
+    // if a prevaln folder is provided, the protein alignments present in each species pair subfolder will not be repeated.
+    PARSE_IPA_PROT_ALN(blosumfile, alignment_input)
+
+    // Collapse EXs_to_split in batches of 500 files
+    EXs_to_split = PARSE_IPA_PROT_ALN.out.EXs_to_split
+    EXs_to_split_batches = EXs_to_split.toSortedList().flatten().buffer(size : 500, remainder: true)
+    // Split exons pairs to realign
+    SPLIT_EX_PAIRS_TO_REALIGN(EXs_to_split_batches)
+    EXs_to_realign_batches = SPLIT_EX_PAIRS_TO_REALIGN.out.EXs_to_realign_batches
+    // Flatten the results from the previous batch run and combine with sp1 and sp2 information, using sp1-sp2 as key.
+    EXs_to_realign = EXs_to_realign_batches.flatten().map{[it.getName().toString().split("_")[0],it]}.groupTuple().join(clusters_split_ch).transpose()
+    //  Realign exons pairs (with multiple hits)
+    REALIGN_EX_PAIRS(blosumfile, EXs_to_realign)
+    // Combine all the aln_info with the realigned_exon_info for each species pair
+    aligned_subclusters_4_splitting = PARSE_IPA_PROT_ALN.out.aligned_subclusters_4_splitting
+    realigned_exons_4_merge = REALIGN_EX_PAIRS.out.realigned_exons_4_merge
+    data_4_merge = aligned_subclusters_4_splitting.groupTuple().join(realigned_exons_4_merge.groupTuple())
+    // Merge alignments information
+    MERGE_PROT_EX_INT_ALN_INFO(data_4_merge)
+
+    emit:
+    folder_jscores = MERGE_PROT_EX_INT_ALN_INFO.out.folder_jscores
+    aln_features = MERGE_PROT_EX_INT_ALN_INFO.out.aln_features
+    exint_aln = MERGE_PROT_EX_INT_ALN_INFO.out.exint_aln
+
+}
diff --git a/subworkflows/local/exorthist/prepare_input.nf b/subworkflows/local/exorthist/prepare_input.nf
@@ -58,7 +58,6 @@ workflow PREPARE_INPUT {
         .flatMap()
         .map { ["${it[0][0]}-${it[1][0]}".toString(), it[0][1], it[1][1]] }
 
-
     // Copy the gene cluster file to output to use for the exint_plotter and compare_exon_sets modules
     SPLIT_CLUSTERS_BY_SPECIES_PAIRS(clusterfile_ch)