diff --git a/pom.xml b/pom.xml index 20776d1416c..0481b295fe7 100644 --- a/pom.xml +++ b/pom.xml @@ -444,9 +444,27 @@ cgds.sql migration.sql + clickhouse/clickhouse.sql + clickhouse/clickhouse_views.sql + clickhouse/clickhouse_migration.sql + + + src/main/resources/db-scripts + false + + clickhouse/clickhouse.sql + clickhouse/views.sql + clickhouse/clickhouse_migration.sql + + + + src/test/resources + false + + diff --git a/src/main/java/org/cbioportal/persistence/helper/AlterationFilterHelper.java b/src/main/java/org/cbioportal/persistence/helper/AlterationFilterHelper.java new file mode 100644 index 00000000000..75bd6e1cdbf --- /dev/null +++ b/src/main/java/org/cbioportal/persistence/helper/AlterationFilterHelper.java @@ -0,0 +1,138 @@ +package org.cbioportal.persistence.helper; + +import org.cbioportal.model.AlterationFilter; +import org.cbioportal.model.CNA; +import org.cbioportal.model.MutationEventType; +import org.cbioportal.model.util.Select; +import org.springframework.lang.NonNull; +import org.springframework.lang.Nullable; + +import java.util.Objects; + +public final class AlterationFilterHelper { + + public static AlterationFilterHelper build(@Nullable AlterationFilter alterationFilter) { + if (Objects.isNull(alterationFilter)) { + alterationFilter = new AlterationFilter(); + } + return new AlterationFilterHelper(alterationFilter); + } + + private final AlterationFilter alterationFilter; + private final Select mappedMutationTypes; + + private AlterationFilterHelper(@NonNull AlterationFilter alterationFilter){ + this.alterationFilter = alterationFilter; + this.mappedMutationTypes = buildMutationTypeList(); + } + + private Select buildMutationTypeList() { + if (alterationFilter.getMutationTypeSelect().hasNone()) { + return Select.none(); + } + if (alterationFilter.getMutationTypeSelect().hasAll()) { + return Select.all(); + } + Select typeSelects = alterationFilter.getMutationTypeSelect().map(MutationEventType::getMutationType); + typeSelects.inverse(alterationFilter.getMutationTypeSelect().inverse()); + + return typeSelects; + } + + public Select getMutationTypeList() { + return mappedMutationTypes; + } + + public Select getCnaTypeList() { + if (alterationFilter.getCNAEventTypeSelect().hasNone()) { + return Select.none(); + } + if (alterationFilter.getCNAEventTypeSelect().hasAll()) { + return Select.all(); + } + return alterationFilter.getCNAEventTypeSelect().map(CNA::getCode); + } + + public boolean hasDriver() { + return alterationFilter.getIncludeDriver(); + } + + public boolean hasVUSDriver() { + return alterationFilter.getIncludeVUS(); + } + + public boolean hasUnknownOncogenicity() { + return alterationFilter.getIncludeUnknownOncogenicity(); + } + + public boolean hasGermline() { + return alterationFilter.getIncludeGermline(); + } + + public boolean hasSomatic() { + return alterationFilter.getIncludeSomatic(); + } + + public boolean hasUnknownMutationStatus() { + return alterationFilter.getIncludeUnknownStatus(); + } + + public Select getSelectedTiers() { + return alterationFilter.getSelectedTiers(); + } + + public boolean hasUnknownTier() { + return alterationFilter.getIncludeUnknownTier(); + } + + public boolean isAllDriverAnnotationSelected() { + return alterationFilter.getIncludeDriver() && alterationFilter.getIncludeVUS() && alterationFilter.getIncludeUnknownOncogenicity(); + } + + public boolean isNoDriverAnnotationSelected() { + return !alterationFilter.getIncludeDriver() && !alterationFilter.getIncludeVUS() && !alterationFilter.getIncludeUnknownOncogenicity(); + } + + public boolean isSomeDriverAnnotationsSelected() { + return !isAllDriverAnnotationSelected() && !isNoDriverAnnotationSelected(); + } + + public boolean isAllMutationStatusSelected() { + return alterationFilter.getIncludeGermline() + && alterationFilter.getIncludeSomatic() + && alterationFilter.getIncludeUnknownStatus(); + } + + public boolean isNoMutationStatusSelected() { + return !alterationFilter.getIncludeGermline() + && !alterationFilter.getIncludeSomatic() + && !alterationFilter.getIncludeUnknownStatus(); + } + + public boolean isSomeMutationStatusSelected() { + return !isAllMutationStatusSelected() && !isNoMutationStatusSelected(); + } + + public boolean isAllTierOptionsSelected() { + return !Objects.isNull(alterationFilter.getSelectedTiers()) + && alterationFilter.getSelectedTiers().hasAll() + && alterationFilter.getIncludeUnknownTier(); + } + + public boolean isNoTierOptionsSelected() { + return (Objects.isNull(alterationFilter.getSelectedTiers()) || alterationFilter.getSelectedTiers().hasNone()) + && !alterationFilter.getIncludeUnknownTier(); + } + + public boolean isSomeTierOptionsSelected() { + return !isAllTierOptionsSelected() && !isNoTierOptionsSelected(); + } + + public boolean shouldApply() { + return isSomeDriverAnnotationsSelected() + || isSomeMutationStatusSelected() + || isSomeTierOptionsSelected() + || mappedMutationTypes.hasNone() + || (!mappedMutationTypes.hasNone() && !mappedMutationTypes.hasAll()); + } +} diff --git a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java index f8077f16315..b8c247f415b 100644 --- a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java +++ b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java @@ -4,6 +4,7 @@ import org.cbioportal.model.ClinicalData; import org.cbioportal.model.ClinicalDataCount; import org.cbioportal.model.Sample; +import org.cbioportal.persistence.helper.AlterationFilterHelper; import org.cbioportal.web.parameter.CategorizedClinicalDataCountFilter; import org.cbioportal.web.parameter.StudyViewFilter; @@ -12,7 +13,8 @@ public interface StudyViewMapper { List getFilteredSamples(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters); - List getMutatedGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters); + List getMutatedGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, + boolean applyPatientIdFilters, AlterationFilterHelper alterationFilterHelper); List getPatientClinicalDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters, List attributeIds, List filteredAttributeValues); diff --git a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java index 5afbb5c5754..1d2ae017f57 100644 --- a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java +++ b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java @@ -7,6 +7,7 @@ import org.cbioportal.persistence.StudyViewRepository; import org.cbioportal.persistence.enums.ClinicalAttributeDataSource; import org.cbioportal.persistence.enums.ClinicalAttributeDataType; +import org.cbioportal.persistence.helper.AlterationFilterHelper; import org.cbioportal.web.parameter.CategorizedClinicalDataCountFilter; import org.cbioportal.web.parameter.StudyViewFilter; import org.springframework.beans.factory.annotation.Autowired; @@ -32,7 +33,9 @@ public List getFilteredSamples(StudyViewFilter studyViewFilter, Categori @Override public List getMutatedGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter) { - return mapper.getMutatedGenes(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter)); + return mapper.getMutatedGenes(studyViewFilter, categorizedClinicalDataCountFilter, + shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter), + AlterationFilterHelper.build(studyViewFilter.getAlterationFilter())); } @Override diff --git a/src/main/java/org/cbioportal/web/columnar/StudyViewColumnStoreController.java b/src/main/java/org/cbioportal/web/columnar/StudyViewColumnStoreController.java index e6deab5a65b..cf2ddec1ba2 100644 --- a/src/main/java/org/cbioportal/web/columnar/StudyViewColumnStoreController.java +++ b/src/main/java/org/cbioportal/web/columnar/StudyViewColumnStoreController.java @@ -75,15 +75,8 @@ public ResponseEntity> fetchMutatedGenes( @RequestAttribute(required = false, value = "interceptedStudyViewFilter") StudyViewFilter interceptedStudyViewFilter ) throws StudyNotFoundException { AlterationFilter annotationFilters = interceptedStudyViewFilter.getAlterationFilter(); - List samples = studyViewColumnarService.getFilteredSamples(interceptedStudyViewFilter); - List studyIds = new ArrayList<>(); - List sampleIds = new ArrayList<>(); - for(Sample sample : samples) { - studyIds.add(sample.getCancerStudyIdentifier()); - sampleIds.add(sample.getStableId()); - } return new ResponseEntity<>( - studyViewService.getMutationAlterationCountByGenes(studyIds, sampleIds, annotationFilters), + studyViewColumnarService.getMutatedGenes(interceptedStudyViewFilter), HttpStatus.OK ); } diff --git a/src/main/resources/db-scripts/clickhouse/clickhouse.sql b/src/main/resources/db-scripts/clickhouse/clickhouse.sql new file mode 100644 index 00000000000..97729afabaf --- /dev/null +++ b/src/main/resources/db-scripts/clickhouse/clickhouse.sql @@ -0,0 +1,29 @@ +DROP TABLE IF EXISTS genomic_event_mutation; +DROP TABLE IF EXISTS genomic_event; + +CREATE TABLE IF NOT EXISTS genomic_event +( + sample_unique_id String, + variant String, + variant_type String, + hugo_gene_symbol String, + gene_panel_stable_id String, + cancer_study_identifier String, + genetic_profile_stable_id String +) ENGINE = MergeTree + ORDER BY ( variant_type, sample_unique_id, hugo_gene_symbol); + +CREATE TABLE IF NOT EXISTS genomic_event_mutation +( + sample_unique_id String, + variant String, + hugo_gene_symbol String, + gene_panel_stable_id String, + cancer_study_identifier String, + genetic_profile_stable_id String, + mutation_type String, + mutation_status String, + driver_filter String, + driver_tiers_filter String +) ENGINE = MergeTree +ORDER BY ( hugo_gene_symbol, genetic_profile_stable_id); \ No newline at end of file diff --git a/src/main/resources/db-scripts/clickhouse/clickhouse_migration.sql b/src/main/resources/db-scripts/clickhouse/clickhouse_migration.sql new file mode 100644 index 00000000000..8cea8b08993 --- /dev/null +++ b/src/main/resources/db-scripts/clickhouse/clickhouse_migration.sql @@ -0,0 +1,91 @@ + +-- Genomic Event Mutation Data +Insert into genomic_event_mutation +SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) as sample_unique_id, + me.protein_change as variant, + gene.hugo_gene_symbol as hugo_gene_symbol, + gp.stable_id as gene_panel_stable_id, + cs.cancer_study_identifier as cancer_study_identifier, + g.stable_id as genetic_profile_stable_id, + me.mutation_type as mutation_type, + mutation.mutation_status as mutation_status, + 'NA' as driver_filter, + 'NA' as drivet_tiers_filter +FROM mutation + INNER JOIN mutation_event as me on mutation.mutation_event_id = me.mutation_event_id + INNER JOIN sample_profile sp + on mutation.sample_id = sp.sample_id and mutation.genetic_profile_id = sp.genetic_profile_id + LEFT JOIN gene_panel gp on sp.panel_id = gp.internal_id + LEFT JOIN genetic_profile g on sp.genetic_profile_id = g.genetic_profile_id + INNER JOIN cancer_study cs on g.cancer_study_id = cs.cancer_study_id + INNER JOIN sample on mutation.sample_id = sample.internal_id + LEFT JOIN gene on mutation.entrez_gene_id = gene.entrez_gene_id; + +-- Genomic Event Data +Insert into genomic_event +SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) as sample_unique_id, + me.protein_change as variant, + 'mutation' as variant_type, + gene.hugo_gene_symbol as hugo_gene_symbol, + gp.stable_id as gene_panel_stable_id, + cs.cancer_study_identifier as cancer_study_identifier, + g.stable_id as genetic_profile_stable_id +FROM mutation + INNER JOIN mutation_event as me on mutation.mutation_event_id = me.mutation_event_id + INNER JOIN sample_profile sp + on mutation.sample_id = sp.sample_id and mutation.genetic_profile_id = sp.genetic_profile_id + LEFT JOIN gene_panel gp on sp.panel_id = gp.internal_id + LEFT JOIN genetic_profile g on sp.genetic_profile_id = g.genetic_profile_id + INNER JOIN cancer_study cs on g.cancer_study_id = cs.cancer_study_id + INNER JOIN sample on mutation.sample_id = sample.internal_id + LEFT JOIN gene on mutation.entrez_gene_id = gene.entrez_gene_id +UNION ALL +SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) as sample_unique_id, + toString(ce.alteration) as variant, + 'cna' as variant_type, + gene.hugo_gene_symbol as hugo_gene_symbol, + gp.stable_id as gene_panel_stable_id, + cs.cancer_study_identifier as cancer_study_identifier, + g.stable_id as genetic_profile_stable_id +FROM cna_event ce + INNER JOIN sample_cna_event sce ON ce.cna_event_id = sce.cna_event_id + INNER JOIN sample_profile sp ON sce.sample_id = sp.sample_id AND sce.genetic_profile_id = sp.genetic_profile_id + INNER JOIN gene_panel gp ON sp.panel_id = gp.internal_id + INNER JOIN genetic_profile g ON sp.genetic_profile_id = g.genetic_profile_id + INNER JOIN cancer_study cs ON g.cancer_study_id = cs.cancer_study_id + INNER JOIN sample ON sce.sample_id = sample.internal_id + INNER JOIN gene ON ce.entrez_gene_id = gene.entrez_gene_id +UNION ALL +SELECT + concat(cs.cancer_study_identifier, '_', s.stable_id) as sample_unique_id, + event_info as variant, + 'structural_variant' as variant_type, + gene2.hugo_gene_symbol as hugo_gene_symbol, + gene_panel.stable_id as gene_panel_stable_id, + cs.cancer_study_identifier as cancer_study_identifier, + gp.stable_id as genetic_profile_stable_id +FROM + structural_variant sv + INNER JOIN genetic_profile gp ON sv.genetic_profile_id = gp.genetic_profile_id + INNER JOIN sample s ON sv.sample_id = s.internal_id + INNER JOIN cancer_study cs ON gp.cancer_study_id = cs.cancer_study_id + INNER JOIN gene gene2 ON sv.site2_entrez_gene_id = gene2.entrez_gene_id + INNER JOIN sample_profile on s.internal_id = sample_profile.sample_id + INNER JOIN gene_panel on sample_profile.panel_id = gene_panel.internal_id +UNION ALL +SELECT + concat(cs.cancer_study_identifier, '_', s.stable_id) as sample_unique_id, + event_info as variant, + 'structural_variant' as variant_type, + gene1.hugo_gene_symbol as hugo_gene_symbol, + gene_panel.stable_id as gene_panel_stable_id, + cs.cancer_study_identifier as cancer_study_identifier, + gp.stable_id as genetic_profile_stable_id +FROM + structural_variant sv + INNER JOIN genetic_profile gp ON sv.genetic_profile_id = gp.genetic_profile_id + INNER JOIN sample s ON sv.sample_id = s.internal_id + INNER JOIN cancer_study cs ON gp.cancer_study_id = cs.cancer_study_id + INNER JOIN gene gene1 ON sv.site1_entrez_gene_id = gene1.entrez_gene_id + INNER JOIN sample_profile on s.internal_id = sample_profile.sample_id + INNER JOIN gene_panel on sample_profile.panel_id = gene_panel.internal_id; \ No newline at end of file diff --git a/src/main/resources/db-scripts/columnar/materialized_views.sql b/src/main/resources/db-scripts/clickhouse/materialized_views.sql similarity index 84% rename from src/main/resources/db-scripts/columnar/materialized_views.sql rename to src/main/resources/db-scripts/clickhouse/materialized_views.sql index 114ae42912c..bf688803c69 100644 --- a/src/main/resources/db-scripts/columnar/materialized_views.sql +++ b/src/main/resources/db-scripts/clickhouse/materialized_views.sql @@ -218,20 +218,22 @@ FROM sample_list as sl CREATE TABLE IF NOT EXISTS genomic_event ( - sample_unique_id VARCHAR(45), - variant VARCHAR(45), - variant_type VARCHAR(45), - hugo_gene_symbol VARCHAR(45), - gene_panel_stable_id VARCHAR(45), - cancer_study_identifier VARCHAR(45), - genetic_profile_stable_id VARCHAR(45) -) - ENGINE = MergeTree - ORDER BY (sample_unique_id, variant, hugo_gene_symbol, cancer_study_identifier, genetic_profile_stable_id) - PRIMARY KEY (sample_unique_id, variant_type, variant, hugo_gene_symbol, cancer_study_identifier, - genetic_profile_stable_id); + sample_unique_id String, + variant String, + variant_type String, + hugo_gene_symbol String, + gene_panel_stable_id String, + cancer_study_identifier String, + genetic_profile_stable_id String +) ENGINE = MergeTree +ORDER BY + ( + variant_type, + sample_unique_id, + hugo_gene_symbol + ); -INSERT INTO genomic_event +Insert into genomic_event SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) as sample_unique_id, me.protein_change as variant, 'mutation' as variant_type, @@ -240,13 +242,13 @@ SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) as sample_uniqu cs.cancer_study_identifier as cancer_study_identifier, g.stable_id as genetic_profile_stable_id FROM mutation - LEFT JOIN mutation_event as me on mutation.mutation_event_id = me.mutation_event_id - LEFT JOIN sample_profile sp - on mutation.sample_id = sp.sample_id and mutation.genetic_profile_id = sp.genetic_profile_id + INNER JOIN mutation_event as me on mutation.mutation_event_id = me.mutation_event_id + INNER JOIN sample_profile sp + on mutation.sample_id = sp.sample_id and mutation.genetic_profile_id = sp.genetic_profile_id LEFT JOIN gene_panel gp on sp.panel_id = gp.internal_id LEFT JOIN genetic_profile g on sp.genetic_profile_id = g.genetic_profile_id - LEFT JOIN cancer_study cs on g.cancer_study_id = cs.cancer_study_id - LEFT JOIN sample on mutation.sample_id = sample.internal_id + INNER JOIN cancer_study cs on g.cancer_study_id = cs.cancer_study_id + INNER JOIN sample on mutation.sample_id = sample.internal_id LEFT JOIN gene on mutation.entrez_gene_id = gene.entrez_gene_id UNION ALL SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) as sample_unique_id, @@ -255,7 +257,7 @@ SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) as sample_uniqu gene.hugo_gene_symbol as hugo_gene_symbol, gp.stable_id as gene_panel_stable_id, cs.cancer_study_identifier as cancer_study_identifier, - gp.stable_id as genetic_profile_stable_id + g.stable_id as genetic_profile_stable_id FROM cna_event ce INNER JOIN sample_cna_event sce ON ce.cna_event_id = sce.cna_event_id INNER JOIN sample_profile sp ON sce.sample_id = sp.sample_id AND sce.genetic_profile_id = sp.genetic_profile_id @@ -265,35 +267,39 @@ FROM cna_event ce INNER JOIN sample ON sce.sample_id = sample.internal_id INNER JOIN gene ON ce.entrez_gene_id = gene.entrez_gene_id UNION ALL -SELECT concat(cs.cancer_study_identifier, '_', s.stable_id) as sample_unique_id, - event_info as variant, - 'structural_variant' as variant_type, - gene1.hugo_gene_symbol as hugo_gene_symbol, - gene_panel.stable_id as gene_panel_stable_id, - cs.cancer_study_identifier as cancer_study_identifier, - gp.stable_id as genetic_profile_stable_id -FROM structural_variant sv - INNER JOIN genetic_profile gp ON sv.genetic_profile_id = gp.genetic_profile_id - INNER JOIN sample s ON sv.sample_id = s.internal_id - INNER JOIN cancer_study cs ON gp.cancer_study_id = cs.cancer_study_id - INNER JOIN gene gene1 ON sv.site1_entrez_gene_id = gene1.entrez_gene_id - INNER JOIN sample_profile on s.internal_id = sample_profile.sample_id - INNER JOIN gene_panel on sample_profile.panel_id = gene_panel.internal_id +SELECT + concat(cs.cancer_study_identifier, '_', s.stable_id) as sample_unique_id, + event_info as variant, + 'structural_variant' as variant_type, + gene2.hugo_gene_symbol as hugo_gene_symbol, + gene_panel.stable_id as gene_panel_stable_id, + cs.cancer_study_identifier as cancer_study_identifier, + gp.stable_id as genetic_profile_stable_id +FROM + structural_variant sv + INNER JOIN genetic_profile gp ON sv.genetic_profile_id = gp.genetic_profile_id + INNER JOIN sample s ON sv.sample_id = s.internal_id + INNER JOIN cancer_study cs ON gp.cancer_study_id = cs.cancer_study_id + INNER JOIN gene gene2 ON sv.site2_entrez_gene_id = gene2.entrez_gene_id + INNER JOIN sample_profile on s.internal_id = sample_profile.sample_id + INNER JOIN gene_panel on sample_profile.panel_id = gene_panel.internal_id UNION ALL -SELECT concat(cs.cancer_study_identifier, '_', s.stable_id) as sample_unique_id, - event_info as variant, - 'structural_variant' as variant_type, - gene2.hugo_gene_symbol as hugo_gene_symbol, - gene_panel.stable_id as gene_panel_stable_id, - cs.cancer_study_identifier as cancer_study_identifier, - gp.stable_id as genetic_profile_stable_id -FROM structural_variant sv - INNER JOIN genetic_profile gp ON sv.genetic_profile_id = gp.genetic_profile_id - INNER JOIN sample s ON sv.sample_id = s.internal_id - INNER JOIN cancer_study cs ON gp.cancer_study_id = cs.cancer_study_id - INNER JOIN gene gene2 ON sv.site2_entrez_gene_id = gene2.entrez_gene_id - INNER JOIN sample_profile on s.internal_id = sample_profile.sample_id - INNER JOIN gene_panel on sample_profile.panel_id = gene_panel.internal_id; +SELECT + concat(cs.cancer_study_identifier, '_', s.stable_id) as sample_unique_id, + event_info as variant, + 'structural_variant' as variant_type, + gene1.hugo_gene_symbol as hugo_gene_symbol, + gene_panel.stable_id as gene_panel_stable_id, + cs.cancer_study_identifier as cancer_study_identifier, + gp.stable_id as genetic_profile_stable_id +FROM + structural_variant sv + INNER JOIN genetic_profile gp ON sv.genetic_profile_id = gp.genetic_profile_id + INNER JOIN sample s ON sv.sample_id = s.internal_id + INNER JOIN cancer_study cs ON gp.cancer_study_id = cs.cancer_study_id + INNER JOIN gene gene1 ON sv.site1_entrez_gene_id = gene1.entrez_gene_id + INNER JOIN sample_profile on s.internal_id = sample_profile.sample_id + INNER JOIN gene_panel on sample_profile.panel_id = gene_panel.internal_id; CREATE MATERIALIZED VIEW genomic_event_mutation_mv TO genomic_event AS SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) as sample_unique_id, diff --git a/src/main/resources/db-scripts/clickhouse/views.sql b/src/main/resources/db-scripts/clickhouse/views.sql new file mode 100644 index 00000000000..60d6c5f8388 --- /dev/null +++ b/src/main/resources/db-scripts/clickhouse/views.sql @@ -0,0 +1,78 @@ +DROP VIEW IF EXISTS sample_clinical_attribute_numeric_view; +DROP VIEW IF EXISTS sample_clinical_attribute_categorical_view; +DROP VIEW IF EXISTS patient_clinical_attribute_numeric_view; +DROP VIEW IF EXISTS patient_clinical_attribute_categorical_view; +DROP VIEW IF EXISTS sample_view; +DROP VIEW IF EXISTS sample_list_view; + +CREATE VIEW sample_clinical_attribute_numeric_view + AS +SELECT concat(cs.cancer_study_identifier, '_', s.stable_id) as sample_unique_id, + concat(cs.cancer_study_identifier, '_', p.stable_id) as patient_unique_id, + clinical_sample.attr_id as attribute_name, + cast(clinical_sample.attr_value as float) as attribute_value, + cs.cancer_study_identifier as cancer_study_identifier +FROM cancer_study cs + INNER JOIN patient p on cs.cancer_study_id = p.cancer_study_id + INNER JOIN sample s on p.internal_id = s.patient_id + INNER JOIN clinical_sample ON s.internal_id = clinical_sample.internal_id +WHERE match(clinical_sample.attr_value, '^[\d\.]+$'); + +CREATE VIEW sample_clinical_attribute_categorical_view + AS +SELECT concat(cs.cancer_study_identifier, '_', s.stable_id) as sample_unique_id, + concat(cs.cancer_study_identifier, '_', p.stable_id) as patient_unique_id, + cl.attr_id as attribute_name, + cl.attr_value as attribute_value, + cs.cancer_study_identifier as cancer_study_identifier +FROM cancer_study cs + INNER JOIN patient p on cs.cancer_study_id = p.cancer_study_id + INNER JOIN sample s on p.internal_id = s.patient_id + INNER JOIN clinical_sample cl on s.internal_id = cl.internal_id +WHERE NOT match(cl.attr_value, '^[\d\.]+$'); + +CREATE VIEW patient_clinical_attribute_numeric_view + AS +SELECT concat(cs.cancer_study_identifier, '_', p.stable_id) as patient_unique_id, + cp.attr_id as attribute_name, + cast(cp.attr_value as float) as attribute_value, + cs.cancer_study_identifier as cancer_study_identifier +FROM cancer_study cs + INNER JOIN patient p on cs.cancer_study_id = p.cancer_study_id + INNER JOIN clinical_patient cp on p.internal_id = cp.internal_id +WHERE match(cp.attr_value, '^[\d\.]+$'); + +CREATE VIEW patient_clinical_attribute_categorical_view + AS +SELECT concat(cs.cancer_study_identifier, '_', p.stable_id) as patient_unique_id, + cp.attr_id as attribute_name, + cp.attr_value as attribute_value, + cs.cancer_study_identifier as cancer_study_identifier +FROM cancer_study cs + INNER JOIN patient p on cs.cancer_study_id = p.cancer_study_id + INNER JOIN clinical_patient cp on p.internal_id = cp.internal_id +WHERE NOT match(cp.attr_value, '^[\d\.]+$'); + +CREATE VIEW sample_view + AS +SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) as sample_unique_id, + base64Encode(sample.stable_id) as sample_unique_id_base64, + sample.stable_id as sample_stable_id, + concat(cs.cancer_study_identifier, '_', p.stable_id) as patient_unique_id, + p.stable_id as patient_stable_id, + base64Encode(p.stable_id) as patient_unique_id_base64, + cs.cancer_study_identifier as cancer_study_identifier +FROM sample + INNER JOIN patient p on sample.patient_id = p.internal_id + INNER JOIN cancer_study cs on p.cancer_study_id = cs.cancer_study_id; + +CREATE VIEW sample_list_view + AS +SELECT concat(cs.cancer_study_identifier, '_', s.stable_id) as sample_unique_id, + sl.stable_id as sample_list_stable_id, + sl.name as name, + cs.cancer_study_identifier as cancer_study_identifier +FROM sample_list as sl + INNER JOIN sample_list_list AS sll on sll.list_id = sl.list_id + INNER JOIN sample AS s on s.internal_id = sll.sample_id + INNER JOIN cancer_study cs on sl.cancer_study_id = cs.cancer_study_id; \ No newline at end of file diff --git a/src/main/resources/db-scripts/columnar/views.sql b/src/main/resources/db-scripts/columnar/views.sql deleted file mode 100644 index af1cc18dc2b..00000000000 --- a/src/main/resources/db-scripts/columnar/views.sql +++ /dev/null @@ -1,144 +0,0 @@ -DROP VIEW IF EXISTS sample_clinical_attribute_numeric_view; -DROP VIEW IF EXISTS sample_clinical_attribute_categorical_view; -DROP VIEW IF EXISTS patient_clinical_attribute_numeric_view; -DROP VIEW IF EXISTS patient_clinical_attribute_categorical_view; -DROP VIEW IF EXISTS sample_view; -DROP VIEW IF EXISTS sample_list_view; -DROP VIEW IF EXISTS genomic_event_view; - -CREATE VIEW sample_clinical_attribute_numeric_view - AS -SELECT concat(cs.cancer_study_identifier, '_', s.stable_id) as sample_unique_id, - concat(cs.cancer_study_identifier, '_', p.stable_id) as patient_unique_id, - clinical_sample.attr_id as attribute_name, - cast(clinical_sample.attr_value as float) as attribute_value, - cs.cancer_study_identifier as cancer_study_identifier -FROM cancer_study cs - INNER JOIN patient p on cs.cancer_study_id = p.cancer_study_id - INNER JOIN sample s on p.internal_id = s.patient_id - INNER JOIN clinical_sample ON s.internal_id = clinical_sample.internal_id -WHERE match(clinical_sample.attr_value, '^[\d\.]+$'); - -CREATE VIEW sample_clinical_attribute_categorical_view - AS -SELECT concat(cs.cancer_study_identifier, '_', s.stable_id) as sample_unique_id, - concat(cs.cancer_study_identifier, '_', p.stable_id) as patient_unique_id, - cl.attr_id as attribute_name, - cl.attr_value as attribute_value, - cs.cancer_study_identifier as cancer_study_identifier -FROM cancer_study cs - INNER JOIN patient p on cs.cancer_study_id = p.cancer_study_id - INNER JOIN sample s on p.internal_id = s.patient_id - INNER JOIN clinical_sample cl on s.internal_id = cl.internal_id -WHERE NOT match(cl.attr_value, '^[\d\.]+$'); - -CREATE VIEW patient_clinical_attribute_numeric_view - AS -SELECT concat(cs.cancer_study_identifier, '_', p.stable_id) as patient_unique_id, - cp.attr_id as attribute_name, - cast(cp.attr_value as float) as attribute_value, - cs.cancer_study_identifier as cancer_study_identifier -FROM cancer_study cs - INNER JOIN patient p on cs.cancer_study_id = p.cancer_study_id - INNER JOIN clinical_patient cp on p.internal_id = cp.internal_id -WHERE match(cp.attr_value, '^[\d\.]+$'); - -CREATE VIEW patient_clinical_attribute_categorical_view - AS -SELECT concat(cs.cancer_study_identifier, '_', p.stable_id) as patient_unique_id, - cp.attr_id as attribute_name, - cp.attr_value as attribute_value, - cs.cancer_study_identifier as cancer_study_identifier -FROM cancer_study cs - INNER JOIN patient p on cs.cancer_study_id = p.cancer_study_id - INNER JOIN clinical_patient cp on p.internal_id = cp.internal_id -WHERE NOT match(cp.attr_value, '^[\d\.]+$'); - -CREATE VIEW sample_view - AS -SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) as sample_unique_id, - base64Encode(sample.stable_id) as sample_unique_id_base64, - sample.stable_id as sample_stable_id, - concat(cs.cancer_study_identifier, '_', p.stable_id) as patient_unique_id, - p.stable_id as patient_stable_id, - base64Encode(p.stable_id) as patient_unique_id_base64, - cs.cancer_study_identifier as cancer_study_identifier -FROM sample - INNER JOIN patient p on sample.patient_id = p.internal_id - INNER JOIN cancer_study cs on p.cancer_study_id = cs.cancer_study_id; - -CREATE VIEW sample_list_view - AS -SELECT concat(cs.cancer_study_identifier, '_', s.stable_id) as sample_unique_id, - sl.stable_id as sample_list_stable_id, - sl.name as name, - cs.cancer_study_identifier as cancer_study_identifier -FROM sample_list as sl - INNER JOIN sample_list_list AS sll on sll.list_id = sl.list_id - INNER JOIN sample AS s on s.internal_id = sll.sample_id - INNER JOIN cancer_study cs on sl.cancer_study_id = cs.cancer_study_id; - -CREATE VIEW genomic_event_view - AS -SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) as sample_unique_id, - me.protein_change as variant, - 'mutation' as variant_type, - gene.hugo_gene_symbol as hugo_gene_symbol, - gp.stable_id as gene_panel_stable_id, - cs.cancer_study_identifier as cancer_study_identifier, - g.stable_id as genetic_profile_stable_id -FROM mutation - LEFT JOIN mutation_event as me on mutation.mutation_event_id = me.mutation_event_id - LEFT JOIN sample_profile sp - on mutation.sample_id = sp.sample_id and mutation.genetic_profile_id = sp.genetic_profile_id - LEFT JOIN gene_panel gp on sp.panel_id = gp.internal_id - LEFT JOIN genetic_profile g on sp.genetic_profile_id = g.genetic_profile_id - LEFT JOIN cancer_study cs on g.cancer_study_id = cs.cancer_study_id - LEFT JOIN sample on mutation.sample_id = sample.internal_id - LEFT JOIN gene on mutation.entrez_gene_id = gene.entrez_gene_id -UNION ALL -SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) as sample_unique_id, - toString(ce.alteration) as variant, - 'cna' as variant_type, - gene.hugo_gene_symbol as hugo_gene_symbol, - gp.stable_id as gene_panel_stable_id, - cs.cancer_study_identifier as cancer_study_identifier, - gp.stable_id as genetic_profile_stable_id -FROM cna_event ce - INNER JOIN sample_cna_event sce ON ce.cna_event_id = sce.cna_event_id - INNER JOIN sample_profile sp ON sce.sample_id = sp.sample_id AND sce.genetic_profile_id = sp.genetic_profile_id - INNER JOIN gene_panel gp ON sp.panel_id = gp.internal_id - INNER JOIN genetic_profile g ON sp.genetic_profile_id = g.genetic_profile_id - INNER JOIN cancer_study cs ON g.cancer_study_id = cs.cancer_study_id - INNER JOIN sample ON sce.sample_id = sample.internal_id - INNER JOIN gene ON ce.entrez_gene_id = gene.entrez_gene_id -UNION ALL -SELECT concat(cs.cancer_study_identifier, '_', s.stable_id) as sample_unique_id, - event_info as variant, - 'structural_variant' as variant_type, - gene1.hugo_gene_symbol as hugo_gene_symbol, - gene_panel.stable_id as gene_panel_stable_id, - cs.cancer_study_identifier as cancer_study_identifier, - gp.stable_id as genetic_profile_stable_id -FROM structural_variant sv - INNER JOIN genetic_profile gp ON sv.genetic_profile_id = gp.genetic_profile_id - INNER JOIN sample s ON sv.sample_id = s.internal_id - INNER JOIN cancer_study cs ON gp.cancer_study_id = cs.cancer_study_id - INNER JOIN gene gene1 ON sv.site1_entrez_gene_id = gene1.entrez_gene_id - INNER JOIN sample_profile on s.internal_id = sample_profile.sample_id - INNER JOIN gene_panel on sample_profile.panel_id = gene_panel.internal_id -UNION ALL -SELECT concat(cs.cancer_study_identifier, '_', s.stable_id) as sample_unique_id, - event_info as variant, - 'structural_variant' as variant_type, - gene2.hugo_gene_symbol as hugo_gene_symbol, - gene_panel.stable_id as gene_panel_stable_id, - cs.cancer_study_identifier as cancer_study_identifier, - gp.stable_id as genetic_profile_stable_id -FROM structural_variant sv - INNER JOIN genetic_profile gp ON sv.genetic_profile_id = gp.genetic_profile_id - INNER JOIN sample s ON sv.sample_id = s.internal_id - INNER JOIN cancer_study cs ON gp.cancer_study_id = cs.cancer_study_id - INNER JOIN gene gene2 ON sv.site2_entrez_gene_id = gene2.entrez_gene_id - INNER JOIN sample_profile on s.internal_id = sample_profile.sample_id - INNER JOIN gene_panel on sample_profile.panel_id = gene_panel.internal_id; \ No newline at end of file diff --git a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewAlterationFilterMapper.xml b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewAlterationFilterMapper.xml new file mode 100644 index 00000000000..28e537b309b --- /dev/null +++ b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewAlterationFilterMapper.xml @@ -0,0 +1,105 @@ + + + + + + + NULL + + AND + lower(genomic_event_mutation.mutation_type) + + + NOT IN + + + IN + + + + lower(#{type}) + + + + + + + + + + + + + OR + lower(genomic_event_mutation.mutation_status) LIKE '%germline%' + + + OR + lower(genomic_event_mutation.mutation_status) = 'somatic' + + + OR + lower(genomic_event_mutation.mutation_status) != 'somatic' AND lower(genomic_event_mutation.mutation_status) NOT LIKE '%germline%' + + + + + AND NULL + + + + + + + + + + + + + OR lower(driver_filter) = 'putative_driver' + + + OR lower(driver_filter) = 'putative_passenger' + + + OR driver_filter IS NULL + OR lower(driver_filter) IN ('unknown', 'na', '') + + + + + AND NULL + + + + + + + + + + + + + + OR driver_tiers_filter IN + + #{item} + + + + OR driver_tiers_filter IS NULL + OR lower(driver_tiers_filter) IN ('', 'na', 'unknown') + + + + + AND NULL + + + + + + + \ No newline at end of file diff --git a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml index 524cf7df78b..b55843cb348 100644 --- a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml +++ b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml @@ -31,15 +31,16 @@ COUNT(DISTINCT sample_unique_id) as numberOfProfiledCases, COUNT(DISTINCT sample_unique_id) as numberOfAlteredCases, COUNT(*) as totalCount - FROM genomic_event_view + FROM genomic_event_mutation - variant_type = 'mutation' - AND sample_unique_id IN ( INTERSECT ) + + + GROUP BY hugo_gene_symbol ORDER BY totalCount DESC; diff --git a/src/test/java/org/cbioportal/persistence/helper/AlterationFilterHelperTest.java b/src/test/java/org/cbioportal/persistence/helper/AlterationFilterHelperTest.java new file mode 100644 index 00000000000..d0cd5f244bc --- /dev/null +++ b/src/test/java/org/cbioportal/persistence/helper/AlterationFilterHelperTest.java @@ -0,0 +1,167 @@ +package org.cbioportal.persistence.helper; + +import org.cbioportal.model.AlterationFilter; +import org.cbioportal.model.MutationEventType; +import org.junit.Test; + +import java.util.HashMap; +import java.util.Map; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +public class AlterationFilterHelperTest { + + @Test + public void build() { + assertNotNull(AlterationFilterHelper.build(null)); + } + + @Test + public void getMutationTypeList() { + // Create AlterationFilter + AlterationFilter alterationFilter = new AlterationFilter(); + Map mutationEventTypeFilterMap = new HashMap<>(); + mutationEventTypeFilterMap.put(MutationEventType.nonsense_mutation, Boolean.TRUE); + mutationEventTypeFilterMap.put(MutationEventType.other, Boolean.FALSE); + alterationFilter.setMutationEventTypes(mutationEventTypeFilterMap); + + AlterationFilterHelper helper = AlterationFilterHelper.build(alterationFilter); + var mutationList = helper.getMutationTypeList(); + assertFalse(mutationList.hasNone()); + assertFalse(mutationList.hasAll()); + assertTrue(mutationList.hasValues()); + + } + + @Test + public void hasDriver() { + AlterationFilter alterationFilter = new AlterationFilter(); + alterationFilter.setIncludeDriver(true); + assertTrue(AlterationFilterHelper.build(alterationFilter).hasDriver()); + } + + @Test + public void hasVUSDriver() { + AlterationFilter alterationFilter = new AlterationFilter(); + alterationFilter.setIncludeVUS(true); + assertTrue(AlterationFilterHelper.build(alterationFilter).hasVUSDriver()); + } + + @Test + public void hasUnknownOncogenicity() { + AlterationFilter alterationFilter = new AlterationFilter(); + alterationFilter.setIncludeUnknownOncogenicity(true); + assertTrue(AlterationFilterHelper.build(alterationFilter).hasUnknownOncogenicity()); + } + + @Test + public void hasGermline() { + AlterationFilter alterationFilter = new AlterationFilter(); + alterationFilter.setIncludeGermline(true); + assertTrue(AlterationFilterHelper.build(alterationFilter).hasGermline()); + } + + @Test + public void hasSomatic() { + AlterationFilter alterationFilter = new AlterationFilter(); + alterationFilter.setIncludeSomatic(true); + assertTrue(AlterationFilterHelper.build(alterationFilter).hasSomatic()); + } + + @Test + public void hasUnknownMutationStatus() { + AlterationFilter alterationFilter = new AlterationFilter(); + alterationFilter.setIncludeUnknownStatus(true); + assertTrue(AlterationFilterHelper.build(alterationFilter).hasUnknownMutationStatus()); + } + + @Test + public void getSelectedTiers() { + AlterationFilter alterationFilter = new AlterationFilter(); + Map tiersMap = new HashMap<>(); + alterationFilter.setTiersBooleanMap(tiersMap); + assertNotNull(AlterationFilterHelper.build(alterationFilter).getSelectedTiers()); + } + + @Test + public void hasUnknownTier() { + AlterationFilter alterationFilter = new AlterationFilter(); + alterationFilter.setIncludeUnknownTier(true); + assertTrue(AlterationFilterHelper.build(alterationFilter).hasUnknownTier()); + } + + @Test + public void isAllDriverAnnotationSelected() { + AlterationFilter alterationFilter = new AlterationFilter(); + alterationFilter.setIncludeDriver(true); + alterationFilter.setIncludeVUS(true); + alterationFilter.setIncludeUnknownOncogenicity(true); + assertTrue(AlterationFilterHelper.build(alterationFilter).isAllDriverAnnotationSelected()); + } + + @Test + public void isNoDriverAnnotationSelected() { + AlterationFilter alterationFilter = new AlterationFilter(); + alterationFilter.setIncludeDriver(false); + alterationFilter.setIncludeVUS(false); + alterationFilter.setIncludeUnknownOncogenicity(false); + assertTrue(AlterationFilterHelper.build(alterationFilter).isNoDriverAnnotationSelected()); + } + + @Test + public void isSomeDriverAnnotationsSelected() { + AlterationFilter alterationFilter = new AlterationFilter(); + alterationFilter.setIncludeDriver(true); + alterationFilter.setIncludeVUS(false); + alterationFilter.setIncludeUnknownOncogenicity(false); + assertTrue(AlterationFilterHelper.build(alterationFilter).isSomeDriverAnnotationsSelected()); + } + + @Test + public void isAllMutationStatusSelected() { + AlterationFilter alterationFilter = new AlterationFilter(); + alterationFilter.setIncludeGermline(true); + alterationFilter.setIncludeSomatic(true); + alterationFilter.setIncludeUnknownStatus(true); + assertTrue(AlterationFilterHelper.build(alterationFilter).isAllMutationStatusSelected()); + } + + @Test + public void isNoMutationStatusSelected() { + AlterationFilter alterationFilter = new AlterationFilter(); + alterationFilter.setIncludeGermline(false); + alterationFilter.setIncludeSomatic(false); + alterationFilter.setIncludeUnknownStatus(false); + assertTrue(AlterationFilterHelper.build(alterationFilter).isNoMutationStatusSelected()); + } + + @Test + public void isSomeMutationStatusSelected() { + AlterationFilter alterationFilter = new AlterationFilter(); + alterationFilter.setIncludeGermline(false); + alterationFilter.setIncludeSomatic(true); + alterationFilter.setIncludeUnknownStatus(false); + assertTrue(AlterationFilterHelper.build(alterationFilter).isSomeMutationStatusSelected()); + } + + @Test + public void isAllTierOptionsSelected() { + AlterationFilter alterationFilter = new AlterationFilter(); + Map tiersMap = new HashMap<>(); + alterationFilter.setTiersBooleanMap(tiersMap); + alterationFilter.setIncludeUnknownTier(true); + assertTrue(AlterationFilterHelper.build(alterationFilter).isAllTierOptionsSelected()); + } + + @Test + public void shouldApply() { + AlterationFilter alterationFilter = new AlterationFilter(); + alterationFilter.setIncludeDriver(true); + alterationFilter.setIncludeVUS(false); + alterationFilter.setIncludeUnknownOncogenicity(false); + assertTrue(AlterationFilterHelper.build(alterationFilter).shouldApply()); + + } +} \ No newline at end of file diff --git a/src/test/java/org/cbioportal/persistence/mybatisclickhouse/AbstractTestcontainers.java b/src/test/java/org/cbioportal/persistence/mybatisclickhouse/AbstractTestcontainers.java index 584921dbd63..2f31fc81594 100644 --- a/src/test/java/org/cbioportal/persistence/mybatisclickhouse/AbstractTestcontainers.java +++ b/src/test/java/org/cbioportal/persistence/mybatisclickhouse/AbstractTestcontainers.java @@ -19,15 +19,18 @@ public static void beforeAll() { @ClassRule public static final ClickHouseContainer clickhouseContainer = new ClickHouseContainer("clickhouse/clickhouse-server:22.6") - .withDatabaseName("cbioportal") .withUsername("cbio_user") .withPassword("P@ssword1") .withClasspathResourceMapping("clickhouse_cgds.sql", "/docker-entrypoint-initdb.d/a_schema.sql", BindMode.READ_ONLY) .withClasspathResourceMapping("clickhouse_data.sql", "/docker-entrypoint-initdb.d/b_schema.sql", BindMode.READ_ONLY) - .withClasspathResourceMapping("clickhouse_views.sql", "/docker-entrypoint-initdb.d/c_schema.sql", - BindMode.READ_ONLY);; + .withClasspathResourceMapping("clickhouse/views.sql", "/docker-entrypoint-initdb.d/c_schema.sql", + BindMode.READ_ONLY) + .withClasspathResourceMapping("clickhouse/clickhouse.sql", "/docker-entrypoint-initdb.d/d_schema.sql", + BindMode.READ_ONLY) + .withClasspathResourceMapping("clickhouse/clickhouse_migration.sql", "/docker-entrypoint-initdb.d/e_schema.sql", + BindMode.READ_ONLY); public static class Initializer implements ApplicationContextInitializer { diff --git a/src/test/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapperTest.java b/src/test/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapperTest.java index be77604fb7f..de343ee5f2f 100644 --- a/src/test/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapperTest.java +++ b/src/test/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapperTest.java @@ -1,5 +1,8 @@ package org.cbioportal.persistence.mybatisclickhouse; +import org.cbioportal.model.AlterationFilter; +import org.cbioportal.model.MutationEventType; +import org.cbioportal.persistence.helper.AlterationFilterHelper; import org.cbioportal.persistence.mybatisclickhouse.config.MyBatisConfig; import org.cbioportal.web.parameter.CategorizedClinicalDataCountFilter; import org.cbioportal.web.parameter.StudyViewFilter; @@ -13,6 +16,10 @@ import org.springframework.test.context.junit4.SpringRunner; import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; import static org.junit.Assert.assertEquals; @@ -37,4 +44,59 @@ public void getFilteredSamples() { assertEquals(19, filteredSamples.size()); } + @Test + public void getMutatedGenes() { + StudyViewFilter studyViewFilter = new StudyViewFilter(); + studyViewFilter.setStudyIds(List.of(STUDY_TCGA_PUB)); + var alterationCountByGenes = studyViewMapper.getMutatedGenes(studyViewFilter, + CategorizedClinicalDataCountFilter.getBuilder().build(), false, + AlterationFilterHelper.build(studyViewFilter.getAlterationFilter())); + assertEquals(3, alterationCountByGenes.size()); + + var testBrca1AlterationCount = alterationCountByGenes.stream().filter(a -> Objects.equals(a.getHugoGeneSymbol(), "brca1")).findFirst(); + assert(testBrca1AlterationCount.isPresent()); + assertEquals(Integer.valueOf(5), testBrca1AlterationCount.get().getTotalCount()); + } + + @Test + public void getMutatedGenesWithAlterationFilter() { + StudyViewFilter studyViewFilter = new StudyViewFilter(); + studyViewFilter.setStudyIds(List.of(STUDY_TCGA_PUB)); + + // Create AlterationFilter + AlterationFilter alterationFilter = new AlterationFilter(); + Map mutationEventTypeFilterMap = new HashMap<>(); + mutationEventTypeFilterMap.put(MutationEventType.nonsense_mutation, Boolean.TRUE); + mutationEventTypeFilterMap.put(MutationEventType.other, Boolean.FALSE); + alterationFilter.setMutationEventTypes(mutationEventTypeFilterMap); + + var alterationCountByGenes = studyViewMapper.getMutatedGenes(studyViewFilter, + CategorizedClinicalDataCountFilter.getBuilder().build(), false, + AlterationFilterHelper.build(alterationFilter)); + assertEquals(2, alterationCountByGenes.size()); + + AlterationFilter onlyMutationStatusFilter = new AlterationFilter(); + onlyMutationStatusFilter.setMutationEventTypes(new HashMap<>()); + onlyMutationStatusFilter.setIncludeGermline(false); + onlyMutationStatusFilter.setIncludeSomatic(false); + onlyMutationStatusFilter.setIncludeUnknownStatus(true); + + var alterationCountByGenes1 = studyViewMapper.getMutatedGenes(studyViewFilter, + CategorizedClinicalDataCountFilter.getBuilder().build(), false, + AlterationFilterHelper.build(onlyMutationStatusFilter)); + assertEquals(1, alterationCountByGenes1.size()); + + AlterationFilter mutationTypeAndStatusFilter = new AlterationFilter(); + mutationTypeAndStatusFilter.setMutationEventTypes(mutationEventTypeFilterMap); + mutationTypeAndStatusFilter.setMutationEventTypes(new HashMap<>()); + mutationTypeAndStatusFilter.setIncludeGermline(false); + mutationTypeAndStatusFilter.setIncludeSomatic(false); + mutationTypeAndStatusFilter.setIncludeUnknownStatus(true); + + var alterationCountByGenes2 = studyViewMapper.getMutatedGenes(studyViewFilter, + CategorizedClinicalDataCountFilter.getBuilder().build(), false, + AlterationFilterHelper.build(onlyMutationStatusFilter)); + assertEquals(1, alterationCountByGenes2.size()); + } + } \ No newline at end of file diff --git a/src/test/java/org/cbioportal/persistence/mybatisclickhouse/config/MyBatisConfig.java b/src/test/java/org/cbioportal/persistence/mybatisclickhouse/config/MyBatisConfig.java index 81d04c54da2..25d1e098c64 100644 --- a/src/test/java/org/cbioportal/persistence/mybatisclickhouse/config/MyBatisConfig.java +++ b/src/test/java/org/cbioportal/persistence/mybatisclickhouse/config/MyBatisConfig.java @@ -34,8 +34,9 @@ public SqlSessionFactoryBean sqlColumnarSessionFactory(ResourceLoader resourceLo sessionFactory.setDataSource(dataSource); var studyViewMapperResource = resourceLoader.getResource("classpath:org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml") ; var studyViewFilterMapperResource = resourceLoader.getResource("classpath:org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml"); + var alterationFilterMapperResource = resourceLoader.getResource("classpath:org/cbioportal/persistence/mybatisclickhouse/StudyViewAlterationFilterMapper.xml"); sessionFactory.setMapperLocations( - studyViewMapperResource,studyViewFilterMapperResource + studyViewMapperResource,studyViewFilterMapperResource, alterationFilterMapperResource ); return sessionFactory; } diff --git a/src/test/resources/clickhouse_views.sql b/src/test/resources/clickhouse_views.sql deleted file mode 100644 index 9e9101ebccb..00000000000 --- a/src/test/resources/clickhouse_views.sql +++ /dev/null @@ -1,216 +0,0 @@ -DROP TABLE IF EXISTS cbioportal.sample_clinical_attribute_numeric; -DROP TABLE IF EXISTS cbioportal.sample_clinical_attribute_categorical; -DROP TABLE IF EXISTS cbioportal.patient_clinical_attribute_numeric; -DROP TABLE IF EXISTS cbioportal.patient_clinical_attribute_categorical; -DROP TABLE IF EXISTS cbioportal.sample_columnstore; -DROP TABLE IF EXISTS cbioportal.sample_list_columnstore; -DROP TABLE IF EXISTS cbioportal.genomic_event; - - -CREATE TABLE cbioportal.sample_clinical_attribute_numeric -( - sample_unique_id VARCHAR(45), - patient_unique_id VARCHAR(45), - attribute_name VARCHAR(45), - attribute_value FLOAT, - cancer_study_identifier VARCHAR(45) -) - ENGINE = MergeTree() - ORDER BY (sample_unique_id, patient_unique_id, attribute_name, cancer_study_identifier); - -INSERT INTO cbioportal.sample_clinical_attribute_numeric -SELECT concat(cs.cancer_study_identifier, '_', s.stable_id) as sample_unique_id, - concat(cs.cancer_study_identifier, '_', p.stable_id) as patient_unique_id, - clinical_sample.attr_id as attribute_name, - cast(clinical_sample.attr_value as float) as attribute_value, - cs.cancer_study_identifier as cancer_study_identifier -FROM cancer_study cs - INNER JOIN patient p on cs.cancer_study_id = p.cancer_study_id - INNER JOIN sample s on p.internal_id = s.patient_id - INNER JOIN clinical_sample ON s.internal_id = clinical_sample.internal_id -WHERE match(clinical_sample.attr_value, '^[\d\.]+$'); - -CREATE TABLE cbioportal.sample_clinical_attribute_categorical -( - sample_unique_id VARCHAR(45), - patient_unique_id VARCHAR(45), - attribute_name VARCHAR(45), - attribute_value VARCHAR(45), - cancer_study_identifier VARCHAR(45) -) - ENGINE = MergeTree() - ORDER BY (sample_unique_id, patient_unique_id, attribute_name, cancer_study_identifier); - -INSERT INTO cbioportal.sample_clinical_attribute_categorical -SELECT concat(cs.cancer_study_identifier, '_', s.stable_id) as sample_unique_id, - concat(cs.cancer_study_identifier, '_', p.stable_id) as patient_unique_id, - cl.attr_id as attribute_name, - cl.attr_value as attribute_value, - cs.cancer_study_identifier as cancer_study_identifier -FROM cancer_study cs - INNER JOIN patient p on cs.cancer_study_id = p.cancer_study_id - INNER JOIN sample s on p.internal_id = s.patient_id - INNER JOIN clinical_sample cl on s.internal_id = cl.internal_id -WHERE NOT match(cl.attr_value, '^[\d\.]+$'); - -CREATE TABLE cbioportal.patient_clinical_attribute_numeric -( - patient_unique_id VARCHAR(45), - attribute_name VARCHAR(45), - attribute_value FLOAT, - cancer_study_identifier VARCHAR(45) -) - ENGINE = MergeTree() - ORDER BY (patient_unique_id, attribute_name, cancer_study_identifier); - -INSERT INTO cbioportal.patient_clinical_attribute_numeric -SELECT concat(cs.cancer_study_identifier, '_', p.stable_id) as patient_unique_id, - cp.attr_id as attribute_name, - cast(cp.attr_value as float) as attribute_value, - cs.cancer_study_identifier as cancer_study_identifier -FROM cancer_study cs - INNER JOIN patient p on cs.cancer_study_id = p.cancer_study_id - INNER JOIN clinical_patient cp on p.internal_id = cp.internal_id -WHERE match(cp.attr_value, '^[\d\.]+$'); - -CREATE TABLE cbioportal.patient_clinical_attribute_categorical -( - patient_unique_id VARCHAR(45), - attribute_name VARCHAR(45), - attribute_value VARCHAR(45), - cancer_study_identifier VARCHAR(45) -) - ENGINE = MergeTree() - ORDER BY (patient_unique_id, attribute_name, cancer_study_identifier); - -INSERT INTO cbioportal.patient_clinical_attribute_categorical -SELECT concat(cs.cancer_study_identifier, '_', p.stable_id) as patient_unique_id, - cp.attr_id as attribute_name, - cp.attr_value as attribute_value, - cs.cancer_study_identifier as cancer_study_identifier -FROM cancer_study cs - INNER JOIN patient p on cs.cancer_study_id = p.cancer_study_id - INNER JOIN clinical_patient cp on p.internal_id = cp.internal_id -WHERE NOT match(cp.attr_value, '^[\d\.]+$'); - -CREATE TABLE IF NOT EXISTS cbioportal.sample_columnstore -( - sample_unique_id VARCHAR(45), - sample_unique_id_base64 VARCHAR(45), - sample_stable_id VARCHAR(45), - patient_unique_id VARCHAR(45), - patient_unique_id_base64 VARCHAR(45), - patient_stable_id VARCHAR(45), - cancer_study_identifier VARCHAR(45) -) - ENGINE = MergeTree - ORDER BY (sample_unique_id, patient_unique_id, cancer_study_identifier); - -INSERT INTO cbioportal.sample_columnstore -SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) as sample_unique_id, - base64Encode(sample.stable_id) as sample_unique_id_base64, - sample.stable_id as sample_stable_id, - concat(cs.cancer_study_identifier, '_', p.stable_id) as patient_unique_id, - p.stable_id as patient_stable_id, - base64Encode(p.stable_id) as patient_unique_id_base64, - cs.cancer_study_identifier as cancer_study_identifier -FROM sample - INNER JOIN patient p on sample.patient_id = p.internal_id - INNER JOIN cancer_study cs on p.cancer_study_id = cs.cancer_study_id; - -CREATE TABLE IF NOT EXISTS cbioportal.sample_list_columnstore -( - sample_unique_id VARCHAR(45), - sample_list_stable_id VARCHAR(45), - name VARCHAR(45), - cancer_study_identifier VARCHAR(45) -) - ENGINE = MergeTree - ORDER BY (sample_unique_id, sample_list_stable_id, name, cancer_study_identifier); - -INSERT INTO cbioportal.sample_list_columnstore -SELECT concat(cs.cancer_study_identifier, '_', s.stable_id) as sample_unique_id, - sl.stable_id as sample_list_stable_id, - sl.name as name, - cs.cancer_study_identifier as cancer_study_identifier -FROM sample_list as sl - INNER JOIN sample_list_list AS sll on sll.list_id = sl.list_id - INNER JOIN sample AS s on s.internal_id = sll.sample_id - INNER JOIN cancer_study cs on sl.cancer_study_id = cs.cancer_study_id; - -CREATE TABLE IF NOT EXISTS cbioportal.genomic_event -( - sample_unique_id VARCHAR(45), - variant VARCHAR(45), - variant_type VARCHAR(45), - hugo_gene_symbol VARCHAR(45), - gene_panel_stable_id VARCHAR(45), - cancer_study_identifier VARCHAR(45), - genetic_profile_stable_id VARCHAR(45) -) - ENGINE = MergeTree - ORDER BY (sample_unique_id, variant, variant_type, hugo_gene_symbol, cancer_study_identifier, gene_panel_stable_id, genetic_profile_stable_id); - -INSERT INTO cbioportal.genomic_event -SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) as sample_unique_id, - me.protein_change as variant, - 'mutation' as variant_type, - gene.hugo_gene_symbol as hugo_gene_symbol, - gp.stable_id as gene_panel_stable_id, - cs.cancer_study_identifier as cancer_study_identifier, - g.stable_id as genetic_profile_stable_id -FROM mutation - LEFT JOIN mutation_event as me on mutation.mutation_event_id = me.mutation_event_id - LEFT JOIN sample_profile sp - on mutation.sample_id = sp.sample_id and mutation.genetic_profile_id = sp.genetic_profile_id - LEFT JOIN gene_panel gp on sp.panel_id = gp.internal_id - LEFT JOIN genetic_profile g on sp.genetic_profile_id = g.genetic_profile_id - LEFT JOIN cancer_study cs on g.cancer_study_id = cs.cancer_study_id - LEFT JOIN sample on mutation.sample_id = sample.internal_id - LEFT JOIN gene on mutation.entrez_gene_id = gene.entrez_gene_id -UNION ALL -SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) as sample_unique_id, - toString(ce.alteration) as variant, - 'cna' as variant_type, - gene.hugo_gene_symbol as hugo_gene_symbol, - gp.stable_id as gene_panel_stable_id, - cs.cancer_study_identifier as cancer_study_identifier, - gp.stable_id as genetic_profile_stable_id -FROM cna_event ce - INNER JOIN sample_cna_event sce ON ce.cna_event_id = sce.cna_event_id - INNER JOIN sample_profile sp ON sce.sample_id = sp.sample_id AND sce.genetic_profile_id = sp.genetic_profile_id - INNER JOIN gene_panel gp ON sp.panel_id = gp.internal_id - INNER JOIN genetic_profile g ON sp.genetic_profile_id = g.genetic_profile_id - INNER JOIN cancer_study cs ON g.cancer_study_id = cs.cancer_study_id - INNER JOIN sample ON sce.sample_id = sample.internal_id - INNER JOIN gene ON ce.entrez_gene_id = gene.entrez_gene_id -UNION ALL -SELECT concat(cs.cancer_study_identifier, '_', s.stable_id) as sample_unique_id, - event_info as variant, - 'structural_variant' as variant_type, - gene1.hugo_gene_symbol as hugo_gene_symbol, - gene_panel.stable_id as gene_panel_stable_id, - cs.cancer_study_identifier as cancer_study_identifier, - gp.stable_id as genetic_profile_stable_id -FROM structural_variant sv - INNER JOIN genetic_profile gp ON sv.genetic_profile_id = gp.genetic_profile_id - INNER JOIN sample s ON sv.sample_id = s.internal_id - INNER JOIN cancer_study cs ON gp.cancer_study_id = cs.cancer_study_id - INNER JOIN gene gene1 ON sv.site1_entrez_gene_id = gene1.entrez_gene_id - INNER JOIN sample_profile on s.internal_id = sample_profile.sample_id - INNER JOIN gene_panel on sample_profile.panel_id = gene_panel.internal_id -UNION ALL -SELECT concat(cs.cancer_study_identifier, '_', s.stable_id) as sample_unique_id, - event_info as variant, - 'structural_variant' as variant_type, - gene2.hugo_gene_symbol as hugo_gene_symbol, - gene_panel.stable_id as gene_panel_stable_id, - cs.cancer_study_identifier as cancer_study_identifier, - gp.stable_id as genetic_profile_stable_id -FROM structural_variant sv - INNER JOIN genetic_profile gp ON sv.genetic_profile_id = gp.genetic_profile_id - INNER JOIN sample s ON sv.sample_id = s.internal_id - INNER JOIN cancer_study cs ON gp.cancer_study_id = cs.cancer_study_id - INNER JOIN gene gene2 ON sv.site2_entrez_gene_id = gene2.entrez_gene_id - INNER JOIN sample_profile on s.internal_id = sample_profile.sample_id - INNER JOIN gene_panel on sample_profile.panel_id = gene_panel.internal_id;