Skip to content

Commit

Permalink
Merge pull request #132 from Clinical-Genomics/add_nallo
Browse files Browse the repository at this point in the history
Add nallo
  • Loading branch information
rannick authored Feb 12, 2025
2 parents 07e0cec + 0bd12a2 commit 2fc375a
Show file tree
Hide file tree
Showing 10 changed files with 669 additions and 6 deletions.
5 changes: 5 additions & 0 deletions cg_hermes/cli/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from cg_hermes.config.mip_dna import MIP_DNA_TAGS
from cg_hermes.config.mip_rna import MIP_RNA_TAGS
from cg_hermes.config.mutant import MUTANT_COMMON_TAGS
from cg_hermes.config.nallo import NALLO_TAGS
from cg_hermes.config.raredisease import RAREDISEASE_TAGS
from cg_hermes.config.rnafusion import RNAFUSION_TAGS
from cg_hermes.config.taxprofiler import TAXPROFILER_TAGS
Expand Down Expand Up @@ -68,6 +69,10 @@ class OutputFormat(StrEnum):
"header": ["Mutant tags", "Mandatory", "HK tags", "Used by"],
"tags": MUTANT_COMMON_TAGS,
},
Workflow.NALLO: {
"header": ["Nallo tags", "Mandatory", "HK tags", "Used by"],
"tags": NALLO_TAGS,
},
Workflow.RAREDISEASE: {
"header": ["Raredisease tags", "Mandatory", "HK tags", "Used by"],
"tags": RAREDISEASE_TAGS,
Expand Down
3 changes: 3 additions & 0 deletions cg_hermes/cli/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from cg_hermes.config.mip_dna import MIP_DNA_TAGS
from cg_hermes.config.mip_rna import MIP_RNA_TAGS
from cg_hermes.config.mutant import MUTANT_COMMON_TAGS
from cg_hermes.config.nallo import NALLO_TAGS
from cg_hermes.config.raredisease import RAREDISEASE_TAGS
from cg_hermes.config.rnafusion import RNAFUSION_TAGS
from cg_hermes.config.taxprofiler import TAXPROFILER_TAGS
Expand Down Expand Up @@ -73,6 +74,8 @@ def validate_tags_cmd(workflow: Workflow) -> None:
tag_map = FLUFFY_COMMON_TAGS
elif workflow == Workflow.MUTANT:
tag_map = MUTANT_COMMON_TAGS
elif workflow == Workflow.NALLO:
tag_map = NALLO_TAGS
elif workflow == Workflow.RAREDISEASE:
tag_map = RAREDISEASE_TAGS
elif workflow == Workflow.RNAFUSION:
Expand Down
290 changes: 290 additions & 0 deletions cg_hermes/config/nallo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,290 @@
"""Tags that are defined in deliverables file mapped to tags used in CG
The tag sets that exists in all files are set to mandatory. Tag sets that exists in < 4 deliverables are not
mandatory by default. However, the tags that are available to a particular analysis is mandatory for that analysis.
"""

from cg_hermes.config.nextflow import NEXTFLOW_TAGS
from cg_hermes.constants.tags import (
AlignmentTags,
AnalysisTags,
BioinfoToolsTags,
FamilyTags,
NextflowTags,
NalloTags,
ReportTags,
VariantTags,
UsageTags,
)

NALLO_COMMON_TAGS = {
frozenset(["alignment", "alignment_haplotags"]): {
"tags": [AlignmentTags.BAM, NalloTags.HAPLOTAGS],
"is_mandatory": True,
"used_by": [UsageTags.SCOUT, UsageTags.CLINICAL_DELIVERY, UsageTags.LONG_TERM_STORAGE],
},
frozenset(["alignment", "alignment_haplotags_index"]): {
"tags": [AlignmentTags.BAM_INDEX, NalloTags.HAPLOTAGS],
"is_mandatory": True,
"used_by": [UsageTags.SCOUT, UsageTags.CLINICAL_DELIVERY, UsageTags.LONG_TERM_STORAGE],
},
frozenset(["assembly", "summary_hap1"]): {
"tags": [NalloTags.HAP1, NalloTags.ASSEMBLY, NalloTags.ASSEMBLY_SUMMARY],
"is_mandatory": True,
"used_by": [UsageTags.CLINICAL_DELIVERY, UsageTags.LONG_TERM_STORAGE],
},
frozenset(["assembly", "summary_hap2"]): {
"tags": [NalloTags.HAP2, NalloTags.ASSEMBLY, NalloTags.ASSEMBLY_SUMMARY],
"is_mandatory": True,
"used_by": [UsageTags.CLINICAL_DELIVERY, UsageTags.LONG_TERM_STORAGE],
},
frozenset(["assembly", "assembly_hap1_mapped"]): {
"tags": [AlignmentTags.BAM, NalloTags.HAP1, NalloTags.ASSEMBLY],
"is_mandatory": True,
"used_by": [UsageTags.CLINICAL_DELIVERY, UsageTags.LONG_TERM_STORAGE],
},
frozenset(["assembly", "assembly_hap1_mapped_index"]): {
"tags": [AlignmentTags.BAM_INDEX, NalloTags.HAP1, NalloTags.ASSEMBLY],
"is_mandatory": True,
"used_by": [UsageTags.CLINICAL_DELIVERY, UsageTags.LONG_TERM_STORAGE],
},
frozenset(["assembly", "assembly_hap2_mapped"]): {
"tags": [AlignmentTags.BAM, NalloTags.HAP2, NalloTags.ASSEMBLY],
"is_mandatory": True,
"used_by": [UsageTags.CLINICAL_DELIVERY, UsageTags.LONG_TERM_STORAGE],
},
frozenset(["assembly", "assembly_hap2_mapped_index"]): {
"tags": [AlignmentTags.BAM_INDEX, NalloTags.HAP2, NalloTags.ASSEMBLY],
"is_mandatory": True,
"used_by": [UsageTags.CLINICAL_DELIVERY, UsageTags.LONG_TERM_STORAGE],
},
frozenset(["assembly", "assembly_hap1_mapped"]): {
"tags": [AnalysisTags.BED, NalloTags.HAP1, BioinfoToolsTags.MODKIT_PILEUP],
"is_mandatory": True,
"used_by": [UsageTags.CLINICAL_DELIVERY, UsageTags.LONG_TERM_STORAGE],
},
frozenset(["assembly", "assembly_hap1_mapped_index"]): {
"tags": [AnalysisTags.BED_INDEX, NalloTags.HAP1, BioinfoToolsTags.MODKIT_PILEUP],
"is_mandatory": True,
"used_by": [UsageTags.CLINICAL_DELIVERY, UsageTags.LONG_TERM_STORAGE],
},
frozenset(["summary_counts", "hap1"]): {
"tags": [AnalysisTags.BED, NalloTags.HAP1, BioinfoToolsTags.MODKIT_PILEUP],
"is_mandatory": True,
"used_by": [UsageTags.CLINICAL_DELIVERY, UsageTags.LONG_TERM_STORAGE],
},
frozenset(["summary_counts", "hap1_index"]): {
"tags": [AnalysisTags.BED_INDEX, NalloTags.HAP1, BioinfoToolsTags.MODKIT_PILEUP],
"is_mandatory": True,
"used_by": [UsageTags.CLINICAL_DELIVERY, UsageTags.LONG_TERM_STORAGE],
},
frozenset(["summary_counts", "hap2"]): {
"tags": [AnalysisTags.BED, NalloTags.HAP2, BioinfoToolsTags.MODKIT_PILEUP],
"is_mandatory": True,
"used_by": [UsageTags.CLINICAL_DELIVERY, UsageTags.LONG_TERM_STORAGE],
},
frozenset(["summary_counts", "hap2_index"]): {
"tags": [AnalysisTags.BED_INDEX, NalloTags.HAP2, BioinfoToolsTags.MODKIT_PILEUP],
"is_mandatory": True,
"used_by": [UsageTags.CLINICAL_DELIVERY, UsageTags.LONG_TERM_STORAGE],
},
frozenset(["summary_counts", "ungrouped"]): {
"tags": [AnalysisTags.BED, NalloTags.UNGROUPED, BioinfoToolsTags.MODKIT_PILEUP],
"is_mandatory": True,
"used_by": [UsageTags.CLINICAL_DELIVERY, UsageTags.LONG_TERM_STORAGE],
},
frozenset(["summary_counts", "ungrouped_index"]): {
"tags": [AnalysisTags.BED_INDEX, NalloTags.UNGROUPED, BioinfoToolsTags.MODKIT_PILEUP],
"is_mandatory": True,
"used_by": [UsageTags.CLINICAL_DELIVERY, UsageTags.LONG_TERM_STORAGE],
},
frozenset(["qc_bam", "mosdepth_d4"]): {
"tags": [AnalysisTags.COVERAGE, ReportTags.D4],
"is_mandatory": True,
"used_by": [UsageTags.SCOUT, UsageTags.CLINICAL_DELIVERY, UsageTags.LONG_TERM_STORAGE],
},
frozenset({"multiqc", "multiqc-html"}): {
"is_mandatory": True,
"tags": [ReportTags.MULTIQC_HTML],
"used_by": [UsageTags.SCOUT, UsageTags.CLINICAL_DELIVERY, UsageTags.LONG_TERM_STORAGE],
},
frozenset({"multiqc", "multiqc-json"}): {
"is_mandatory": True,
"tags": [ReportTags.MULTIQC_JSON],
"used_by": [UsageTags.LONG_TERM_STORAGE],
},
frozenset(["pedigree", "pedigree_fam"]): {
"tags": [FamilyTags.PEDIGREE],
"is_mandatory": True,
"used_by": [UsageTags.CLINICAL_DELIVERY, UsageTags.SCOUT, UsageTags.LONG_TERM_STORAGE],
},
frozenset(["somalier", "relate_html"]): {
"tags": [BioinfoToolsTags.SOMALIER, NalloTags.RELATE_HTML],
"is_mandatory": True,
"used_by": [UsageTags.CLINICAL_DELIVERY, UsageTags.SCOUT, UsageTags.LONG_TERM_STORAGE],
},
frozenset(["somalier", "relate_pairs"]): {
"tags": [BioinfoToolsTags.SOMALIER, NalloTags.RELATE_PAIRS],
"is_mandatory": True,
"used_by": [UsageTags.SCOUT, UsageTags.LONG_TERM_STORAGE],
},
frozenset(["somalier", "relate_pairs"]): {
"tags": [BioinfoToolsTags.SOMALIER, NalloTags.RELATE_PAIRS],
"is_mandatory": True,
"used_by": [UsageTags.SCOUT, UsageTags.LONG_TERM_STORAGE],
},
frozenset(["somalier", "relate_samples"]): {
"tags": [BioinfoToolsTags.SOMALIER, NalloTags.RELATE_SAMPLES],
"is_mandatory": True,
"used_by": [UsageTags.SCOUT, UsageTags.LONG_TERM_STORAGE],
},
frozenset(["deepvariant", "report"]): {
"tags": [NalloTags.DEEPVARIANT_REPORT],
"is_mandatory": True,
"used_by": [UsageTags.CLINICAL_DELIVERY, UsageTags.LONG_TERM_STORAGE],
},
frozenset(["paraphase", "paraphase"]): {
"tags": [AlignmentTags.BAM, BioinfoToolsTags.PARAPHASE],
"is_mandatory": True,
"used_by": [UsageTags.CLINICAL_DELIVERY, UsageTags.LONG_TERM_STORAGE],
},
frozenset(["paraphase", "paraphase_index"]): {
"tags": [AlignmentTags.BAM_INDEX, BioinfoToolsTags.PARAPHASE],
"is_mandatory": True,
"used_by": [UsageTags.CLINICAL_DELIVERY, UsageTags.LONG_TERM_STORAGE],
},
frozenset(["paraphase", "json"]): {
"tags": [BioinfoToolsTags.PARAPHASE, ReportTags.JSON],
"is_mandatory": True,
"used_by": [UsageTags.CLINICAL_DELIVERY, UsageTags.LONG_TERM_STORAGE],
},
frozenset(["paraphase", "vcf"]): {
"tags": [AlignmentTags.BAM_INDEX, BioinfoToolsTags.PARAPHASE, VariantTags.VCF],
"is_mandatory": False,
"used_by": [UsageTags.CLINICAL_DELIVERY, UsageTags.LONG_TERM_STORAGE],
},
frozenset(["paraphase", "vcf_index"]): {
"tags": [AlignmentTags.BAM_INDEX, BioinfoToolsTags.PARAPHASE, VariantTags.VCF_INDEX],
"is_mandatory": False,
"used_by": [UsageTags.CLINICAL_DELIVERY, UsageTags.LONG_TERM_STORAGE],
},
frozenset(["sorted_repeats", "vcf_str"]): {
"tags": [NalloTags.REPEATS, NalloTags.SORTED, VariantTags.VCF],
"is_mandatory": True,
"used_by": [UsageTags.LONG_TERM_STORAGE],
},
frozenset(["sorted_repeats", "vcf_str_index"]): {
"tags": [NalloTags.REPEATS, NalloTags.SORTED, VariantTags.VCF_INDEX],
"is_mandatory": True,
"used_by": [UsageTags.LONG_TERM_STORAGE],
},
frozenset(["spanning_repeats", "bam"]): {
"tags": [NalloTags.REPEATS, NalloTags.SPANNING, AlignmentTags.BAM],
"is_mandatory": True,
"used_by": [UsageTags.LONG_TERM_STORAGE],
},
frozenset(["spanning_repeats", "bam_index"]): {
"tags": [NalloTags.REPEATS, NalloTags.SPANNING, AlignmentTags.BAM_INDEX],
"is_mandatory": True,
"used_by": [UsageTags.LONG_TERM_STORAGE],
},
frozenset(["repeats_annotated", "vcf_str"]): {
"tags": [NalloTags.REPEATS, VariantTags.VCF_STR],
"is_mandatory": True,
"used_by": [UsageTags.SCOUT, UsageTags.CLINICAL_DELIVERY, UsageTags.LONG_TERM_STORAGE],
},
frozenset(["repeats_annotated", "vcf_str_index"]): {
"tags": [NalloTags.REPEATS, NalloTags.SORTED, VariantTags.VCF_STR_INDEX],
"is_mandatory": True,
"used_by": [UsageTags.SCOUT, UsageTags.CLINICAL_DELIVERY, UsageTags.LONG_TERM_STORAGE],
},
frozenset(["snv_annotated", "vcf_snv_research"]): {
"tags": [
AnalysisTags.RESEARCH,
VariantTags.VCF_SNV,
],
"is_mandatory": True,
"used_by": [UsageTags.SCOUT, UsageTags.CLINICAL_DELIVERY, UsageTags.LONG_TERM_STORAGE],
},
frozenset(["snv_annotated", "vcf_snv_research_index"]): {
"tags": [AnalysisTags.RESEARCH, VariantTags.VCF_SNV_INDEX],
"is_mandatory": True,
"used_by": [UsageTags.SCOUT, UsageTags.CLINICAL_DELIVERY, UsageTags.LONG_TERM_STORAGE],
},
frozenset(["snv_annotated_filtered", "vcf_snv_clinical"]): {
"tags": [
AnalysisTags.CLINICAL,
VariantTags.VCF_SNV,
],
"is_mandatory": True,
"used_by": [UsageTags.SCOUT, UsageTags.CLINICAL_DELIVERY, UsageTags.LONG_TERM_STORAGE],
},
frozenset(["snv_annotated_filtered", "vcf_snv_clinical_index"]): {
"tags": [AnalysisTags.CLINICAL, VariantTags.VCF_SNV_INDEX],
"is_mandatory": True,
"used_by": [UsageTags.SCOUT, UsageTags.CLINICAL_DELIVERY, UsageTags.LONG_TERM_STORAGE],
},
frozenset(["sv_annotated_ranked", "vcf_sv_research"]): {
"tags": [
AnalysisTags.RESEARCH,
VariantTags.VCF_SV,
],
"is_mandatory": True,
"used_by": [UsageTags.SCOUT, UsageTags.CLINICAL_DELIVERY, UsageTags.LONG_TERM_STORAGE],
},
frozenset(["sv_annotated_ranked", "vcf_sv_research_index"]): {
"tags": [AnalysisTags.RESEARCH, VariantTags.VCF_SV_INDEX],
"is_mandatory": True,
"used_by": [UsageTags.SCOUT, UsageTags.CLINICAL_DELIVERY, UsageTags.LONG_TERM_STORAGE],
},
frozenset(["sv_annotated_ranked_filtered", "vcf_sv_clinical"]): {
"tags": [
AnalysisTags.CLINICAL,
VariantTags.VCF_SV,
],
"is_mandatory": True,
"used_by": [UsageTags.SCOUT, UsageTags.CLINICAL_DELIVERY, UsageTags.LONG_TERM_STORAGE],
},
frozenset(["sv_annotated_ranked_filtered", "vcf_sv_clinical_index"]): {
"tags": [AnalysisTags.CLINICAL, VariantTags.VCF_SV_INDEX],
"is_mandatory": True,
"used_by": [UsageTags.SCOUT, UsageTags.CLINICAL_DELIVERY, UsageTags.LONG_TERM_STORAGE],
},
frozenset(["qc_bam", "mosdepth_d4"]): {
"tags": [AnalysisTags.COVERAGE, ReportTags.D4],
"is_mandatory": True,
"used_by": [UsageTags.SCOUT, UsageTags.CLINICAL_DELIVERY, UsageTags.LONG_TERM_STORAGE],
},
frozenset(["copy_number", "bedgraph"]): {
"tags": [VariantTags.CNV, NalloTags.BEDGRAPH],
"is_mandatory": True,
"used_by": [UsageTags.CLINICAL_DELIVERY, UsageTags.LONG_TERM_STORAGE],
},
frozenset(["depth_track", "bigwig"]): {
"tags": [BioinfoToolsTags.HIFICNV, AnalysisTags.BIGWIG],
"is_mandatory": True,
"used_by": [UsageTags.CLINICAL_DELIVERY, UsageTags.LONG_TERM_STORAGE],
},
frozenset(["maf_depth_track", "bigwig"]): {
"tags": [BioinfoToolsTags.HIFICNV, AnalysisTags.BIGWIG, NalloTags.MAF],
"is_mandatory": True,
"used_by": [UsageTags.CLINICAL_DELIVERY, UsageTags.LONG_TERM_STORAGE],
},
frozenset({"nextflow-params"}): {
"is_mandatory": True,
"tags": [NextflowTags.NEXTFLOW_PARAMS],
"used_by": [UsageTags.CG, UsageTags.LONG_TERM_STORAGE],
},
frozenset({"nextflow-config"}): {
"is_mandatory": True,
"tags": [NextflowTags.NEXTFLOW_CONFIG],
"used_by": [UsageTags.CG, UsageTags.LONG_TERM_STORAGE],
},
frozenset({"samplesheet"}): {
"is_mandatory": True,
"tags": [NextflowTags.SAMPLESHEET],
"used_by": [UsageTags.CG, UsageTags.LONG_TERM_STORAGE],
},
}

NALLO_TAGS = {**NALLO_COMMON_TAGS, **NEXTFLOW_TAGS}
4 changes: 1 addition & 3 deletions cg_hermes/config/nextflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
mandatory by default. However, the tags that are available to a particular analysis is mandatory for that analysis.
"""

from cg_hermes.constants.tags import UsageTags, NextflowTags

from cg_hermes.constants.tags import (
NextflowTags,
QCTags,
Expand All @@ -26,7 +24,7 @@
"tags": [QCTags.QC_METRICS, ReportTags.DELIVERABLE],
"used_by": [UsageTags.CG, UsageTags.LONG_TERM_STORAGE],
},
frozenset({"manifest", "manifest"}): {
frozenset({"manifest"}): {
"is_mandatory": False,
"tags": [NextflowTags.MANIFEST],
"used_by": [UsageTags.SCOUT, UsageTags.LONG_TERM_STORAGE],
Expand Down
Loading

0 comments on commit 2fc375a

Please sign in to comment.