From f5660756bfe82b1b3da94f42be2538170bafa603 Mon Sep 17 00:00:00 2001
From: Charles Haynes <33608920+haynescd@users.noreply.github.com>
Date: Wed, 12 Jun 2024 13:58:20 -0400
Subject: [PATCH] Feature/mutated genes total profiled counts and gene panels
(#10824)
* Add Support for TotalProfiledCase Counts for Mutated-genes endpoint.
* Create sql files to create new tables
* Add unit test for totalProfiledCount
* Add matching gene panel ids
* Add TotalProfiledCountsWithoutPanelData
* Add profileCount for genes without gene panel data
* Add Comments for SQL
* Update matching Gene Panel Ids
* Clean up code
* Fix test
* Add query to get correct Gene Panels
* Fix unit test
* Add comments
---
pom.xml | 2 +
.../org/cbioportal/model/AlterationType.java | 6 +-
.../persistence/StudyViewRepository.java | 7 +
.../mybatisclickhouse/StudyViewMapper.java | 10 ++
.../StudyViewMyBatisRepository.java | 19 +++
.../typehandler/GenePanelIdsTypeHandler.java | 39 +++++
.../service/AlterationCountService.java | 4 +
.../impl/AlterationCountServiceImpl.java | 99 +++++++++--
.../impl/StudyViewColumnarServiceImpl.java | 20 ++-
.../db-scripts/clickhouse/clickhouse.sql | 22 ++-
.../clickhouse/clickhouse_migration.sql | 28 +++-
.../clickhouse/materialized_views.sql | 154 +-----------------
.../StudyViewFilterMapper.xml | 2 +-
.../mybatisclickhouse/StudyViewMapper.xml | 72 +++++++-
.../AbstractTestcontainers.java | 2 +
.../StudyViewMapperTest.java | 16 ++
.../impl/AlterationCountServiceImplTest.java | 31 +++-
src/test/resources/clickhouse_data.sql | 28 ++--
src/test/resources/logback.xml | 27 +--
19 files changed, 363 insertions(+), 225 deletions(-)
create mode 100644 src/main/java/org/cbioportal/persistence/mybatisclickhouse/typehandler/GenePanelIdsTypeHandler.java
diff --git a/pom.xml b/pom.xml
index 0481b295fe7..cf431226e6d 100644
--- a/pom.xml
+++ b/pom.xml
@@ -447,6 +447,7 @@
clickhouse/clickhouse.sql
clickhouse/clickhouse_views.sql
clickhouse/clickhouse_migration.sql
+ clickhouse/materialized_views.sql
@@ -458,6 +459,7 @@
clickhouse/clickhouse.sql
clickhouse/views.sql
clickhouse/clickhouse_migration.sql
+ clickhouse/materialized_views.sql
diff --git a/src/main/java/org/cbioportal/model/AlterationType.java b/src/main/java/org/cbioportal/model/AlterationType.java
index c7b77bea59d..b20a642669d 100644
--- a/src/main/java/org/cbioportal/model/AlterationType.java
+++ b/src/main/java/org/cbioportal/model/AlterationType.java
@@ -1,6 +1,8 @@
package org.cbioportal.model;
public enum AlterationType {
- MUTATION,
- COPY_NUMBER_ALTERATION
+ MUTATION_EXTENDED,
+ COPY_NUMBER_ALTERATION,
+ STRUCTURAL_VARIANT,
+ GENERIC_ASSAY;
}
diff --git a/src/main/java/org/cbioportal/persistence/StudyViewRepository.java b/src/main/java/org/cbioportal/persistence/StudyViewRepository.java
index 91733a0151b..ae8f285a104 100644
--- a/src/main/java/org/cbioportal/persistence/StudyViewRepository.java
+++ b/src/main/java/org/cbioportal/persistence/StudyViewRepository.java
@@ -10,6 +10,7 @@
import org.cbioportal.web.parameter.StudyViewFilter;
import java.util.List;
+import java.util.Map;
public interface StudyViewRepository {
List getFilteredSamples(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter);
@@ -28,4 +29,10 @@ public interface StudyViewRepository {
List getClinicalDataAttributeNames(ClinicalAttributeDataSource clinicalAttributeDataSource, ClinicalAttributeDataType dataType);
+ Map getTotalProfiledCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, String alterationType);
+
+ int getFilteredSamplesCount(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter);
+
+ Map getMatchingGenePanelIds(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, String alterationType);
+
}
diff --git a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java
index b8c247f415b..7b0c8ac91c0 100644
--- a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java
+++ b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java
@@ -1,5 +1,6 @@
package org.cbioportal.persistence.mybatisclickhouse;
+import org.apache.ibatis.annotations.MapKey;
import org.cbioportal.model.AlterationCountByGene;
import org.cbioportal.model.ClinicalData;
import org.cbioportal.model.ClinicalDataCount;
@@ -9,6 +10,7 @@
import org.cbioportal.web.parameter.StudyViewFilter;
import java.util.List;
+import java.util.Map;
public interface StudyViewMapper {
List getFilteredSamples(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters);
@@ -31,4 +33,12 @@ List getClinicalDataCounts(StudyViewFilter studyViewFilter, C
List getPatientClinicalDataFromStudyViewFilter(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters, List attributeIds);
+ @MapKey("hugoGeneSymbol")
+ Map getTotalProfiledCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters, String alterationType);
+
+ int getFilteredSamplesCount(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters);
+
+ @MapKey("hugoGeneSymbol")
+ Map getMatchingGenePanelIds(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters, String alterationType);
+
}
diff --git a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java
index 1d2ae017f57..c723806885b 100644
--- a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java
+++ b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java
@@ -14,6 +14,7 @@
import org.springframework.stereotype.Repository;
import java.util.List;
+import java.util.Map;
@Repository
public class StudyViewMyBatisRepository implements StudyViewRepository {
@@ -74,4 +75,22 @@ public List getSampleClinicalData(StudyViewFilter studyViewFilter,
public List getPatientClinicalData(StudyViewFilter studyViewFilter, List attributeIds, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter) {
return mapper.getPatientClinicalDataFromStudyViewFilter(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter), attributeIds);
}
+
+ public Map getTotalProfiledCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, String alterationType) {
+ return mapper.getTotalProfiledCounts(studyViewFilter, categorizedClinicalDataCountFilter,
+ shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter), alterationType);
+ }
+
+ @Override
+ public int getFilteredSamplesCount(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter) {
+ return mapper.getFilteredSamplesCount(studyViewFilter, categorizedClinicalDataCountFilter,
+ shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter));
+ }
+
+ @Override
+ public Map getMatchingGenePanelIds(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, String alterationType) {
+ return mapper.getMatchingGenePanelIds(studyViewFilter, categorizedClinicalDataCountFilter,
+ shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter), alterationType);
+ }
+
}
\ No newline at end of file
diff --git a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/typehandler/GenePanelIdsTypeHandler.java b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/typehandler/GenePanelIdsTypeHandler.java
new file mode 100644
index 00000000000..397cdf01bd2
--- /dev/null
+++ b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/typehandler/GenePanelIdsTypeHandler.java
@@ -0,0 +1,39 @@
+package org.cbioportal.persistence.mybatisclickhouse.typehandler;
+
+import org.apache.ibatis.type.BaseTypeHandler;
+import org.apache.ibatis.type.JdbcType;
+
+import java.sql.CallableStatement;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.Arrays;
+
+public class GenePanelIdsTypeHandler extends BaseTypeHandler> {
+
+ @Override
+ public void setNonNullParameter(PreparedStatement ps, int i, Set parameter, JdbcType jdbcType) throws SQLException {
+ // Convert Set to array for storage (if needed)
+ throw new UnsupportedOperationException("Storage of GenePanelIds not supported");
+ }
+
+ @Override
+ public Set getNullableResult(ResultSet rs, String columnName) throws SQLException {
+ String[] array = (String[]) rs.getArray(columnName).getArray();
+ return new HashSet<>(Arrays.asList(array));
+ }
+
+ @Override
+ public Set getNullableResult(ResultSet rs, int columnIndex) throws SQLException {
+ String[] array = (String[]) rs.getArray(columnIndex).getArray();
+ return new HashSet<>(Arrays.asList(array));
+ }
+
+ @Override
+ public Set getNullableResult(CallableStatement cs, int columnIndex) throws SQLException {
+ String[] array = (String[]) cs.getArray(columnIndex).getArray();
+ return new HashSet<>(Arrays.asList(array));
+ }
+}
diff --git a/src/main/java/org/cbioportal/service/AlterationCountService.java b/src/main/java/org/cbioportal/service/AlterationCountService.java
index 5966f725a8d..2ce5c649158 100644
--- a/src/main/java/org/cbioportal/service/AlterationCountService.java
+++ b/src/main/java/org/cbioportal/service/AlterationCountService.java
@@ -3,6 +3,8 @@
import org.apache.commons.math3.util.Pair;
import org.cbioportal.model.*;
import org.cbioportal.model.util.Select;
+import org.cbioportal.web.parameter.CategorizedClinicalDataCountFilter;
+import org.cbioportal.web.parameter.StudyViewFilter;
import java.util.List;
@@ -75,4 +77,6 @@ Pair, Long> getPatientCnaGeneCounts(List getMutatedGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter);
+
}
diff --git a/src/main/java/org/cbioportal/service/impl/AlterationCountServiceImpl.java b/src/main/java/org/cbioportal/service/impl/AlterationCountServiceImpl.java
index 4175ce3cf10..6fc13d12294 100644
--- a/src/main/java/org/cbioportal/service/impl/AlterationCountServiceImpl.java
+++ b/src/main/java/org/cbioportal/service/impl/AlterationCountServiceImpl.java
@@ -1,17 +1,35 @@
package org.cbioportal.service.impl;
-import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.math3.util.Pair;
-import org.cbioportal.model.*;
+import org.cbioportal.model.AlterationCountBase;
+import org.cbioportal.model.AlterationCountByGene;
+import org.cbioportal.model.AlterationCountByStructuralVariant;
+import org.cbioportal.model.AlterationFilter;
+import org.cbioportal.model.AlterationType;
+import org.cbioportal.model.CopyNumberCountByGene;
+import org.cbioportal.model.MolecularProfile;
+import org.cbioportal.model.MolecularProfileCaseIdentifier;
import org.cbioportal.model.util.Select;
import org.cbioportal.persistence.AlterationRepository;
import org.cbioportal.persistence.MolecularProfileRepository;
+import org.cbioportal.persistence.StudyViewRepository;
import org.cbioportal.service.AlterationCountService;
import org.cbioportal.service.util.AlterationEnrichmentUtil;
+import org.cbioportal.web.parameter.CategorizedClinicalDataCountFilter;
+import org.cbioportal.web.parameter.StudyViewFilter;
import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.lang.NonNull;
+import org.springframework.lang.Nullable;
import org.springframework.stereotype.Service;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeSet;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.BiFunction;
import java.util.function.Function;
@@ -20,17 +38,30 @@
@Service
public class AlterationCountServiceImpl implements AlterationCountService {
+ private final AlterationRepository alterationRepository;
+ private final AlterationEnrichmentUtil alterationEnrichmentUtil;
+ private final AlterationEnrichmentUtil alterationEnrichmentUtilCna;
+ private final AlterationEnrichmentUtil alterationEnrichmentUtilStructVar;
+ private final MolecularProfileRepository molecularProfileRepository;
+
+ private final StudyViewRepository studyViewRepository;
+
+ private static final String WHOLE_EXOME_SEQUENCING = "WES";
+
+
@Autowired
- private AlterationRepository alterationRepository;
- @Autowired
- private AlterationEnrichmentUtil alterationEnrichmentUtil;
- @Autowired
- private AlterationEnrichmentUtil alterationEnrichmentUtilCna;
- @Autowired
- private AlterationEnrichmentUtil alterationEnrichmentUtilStructVar;
- @Autowired
- private MolecularProfileRepository molecularProfileRepository;
-
+ public AlterationCountServiceImpl(AlterationRepository alterationRepository, AlterationEnrichmentUtil alterationEnrichmentUtil,
+ AlterationEnrichmentUtil alterationEnrichmentUtilCna,
+ AlterationEnrichmentUtil alterationEnrichmentUtilStructVar,
+ MolecularProfileRepository molecularProfileRepository,
+ StudyViewRepository studyViewRepository) {
+ this.alterationRepository = alterationRepository;
+ this.alterationEnrichmentUtil = alterationEnrichmentUtil;
+ this.alterationEnrichmentUtilCna = alterationEnrichmentUtilCna;
+ this.alterationEnrichmentUtilStructVar = alterationEnrichmentUtilStructVar;
+ this.molecularProfileRepository = molecularProfileRepository;
+ this.studyViewRepository = studyViewRepository;
+ }
@Override
public Pair, Long> getSampleAlterationGeneCounts(List molecularProfileCaseIdentifiers,
Select entrezGeneIds,
@@ -223,6 +254,48 @@ public Pair, Long> getPatientCnaGeneCounts(List getMutatedGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter) {
+ var alterationCountByGenes = studyViewRepository.getMutatedGenes(studyViewFilter, categorizedClinicalDataCountFilter);
+ var profiledCountsMap = studyViewRepository.getTotalProfiledCounts(studyViewFilter,
+ categorizedClinicalDataCountFilter,
+ AlterationType.MUTATION_EXTENDED.toString());
+ var profiledCountWithoutGenePanelData = studyViewRepository.getFilteredSamplesCount(studyViewFilter, categorizedClinicalDataCountFilter);
+ var matchingGenePanelIdsMap = studyViewRepository.getMatchingGenePanelIds(studyViewFilter,
+ categorizedClinicalDataCountFilter, AlterationType.MUTATION_EXTENDED.toString());
+
+ alterationCountByGenes.parallelStream()
+ .forEach(alterationCountByGene -> {
+ String hugoGeneSymbol = alterationCountByGene.getHugoGeneSymbol();
+ var matchingGenePanelIds = matchingGenePanelIdsMap.get(hugoGeneSymbol) != null ?
+ matchingGenePanelIdsMap.get(hugoGeneSymbol).getMatchingGenePanelIds() : null;
+
+ int totalProfiledCount = getTotalProfiledCount(alterationCountByGene.getHugoGeneSymbol(),
+ profiledCountsMap, profiledCountWithoutGenePanelData, matchingGenePanelIds);
+
+ alterationCountByGene.setNumberOfProfiledCases(totalProfiledCount);
+
+ alterationCountByGene.setMatchingGenePanelIds(matchingGenePanelIds);
+ });
+
+ return alterationCountByGenes;
+ }
+
+ private int getTotalProfiledCount(@NonNull String hugoGeneSymbol, @NonNull Map profiledCountsMap,
+ int profiledCountWithoutGenePanelData, @Nullable Set matchingGenePanelIds) {
+ int totalProfiledCount = profiledCountWithoutGenePanelData;
+
+ if (hasGenePanelData(matchingGenePanelIds)) {
+ totalProfiledCount = profiledCountsMap.get(hugoGeneSymbol).getNumberOfProfiledCases();
+ }
+ return totalProfiledCount;
+ }
+
+ private boolean hasGenePanelData(@Nullable Set matchingGenePanelIds) {
+ return matchingGenePanelIds != null && matchingGenePanelIds.contains(WHOLE_EXOME_SEQUENCING)
+ && matchingGenePanelIds.size() > 1;
+ }
+
private Pair, Long> getAlterationGeneCounts(
List molecularProfileCaseIdentifiers,
boolean includeFrequency,
diff --git a/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java b/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java
index e3961a9eeed..3178105b6da 100644
--- a/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java
+++ b/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java
@@ -8,6 +8,7 @@
import org.cbioportal.persistence.StudyViewRepository;
import org.cbioportal.persistence.enums.ClinicalAttributeDataSource;
import org.cbioportal.persistence.enums.ClinicalAttributeDataType;
+import org.cbioportal.service.AlterationCountService;
import org.cbioportal.service.StudyViewColumnarService;
import org.cbioportal.web.parameter.CategorizedClinicalDataCountFilter;
import org.cbioportal.web.parameter.StudyViewFilter;
@@ -27,21 +28,24 @@ public class StudyViewColumnarServiceImpl implements StudyViewColumnarService {
private final StudyViewRepository studyViewRepository;
+ private final AlterationCountService alterationCountService;
+
@Autowired
- public StudyViewColumnarServiceImpl(StudyViewRepository studyViewRepository) {
+ public StudyViewColumnarServiceImpl(StudyViewRepository studyViewRepository, AlterationCountService alterationCountService) {
this.studyViewRepository = studyViewRepository;
+ this.alterationCountService = alterationCountService;
}
-
+
@Override
public List getFilteredSamples(StudyViewFilter studyViewFilter) {
CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter);
- return studyViewRepository.getFilteredSamples(studyViewFilter, categorizedClinicalDataCountFilter);
+ return studyViewRepository.getFilteredSamples(studyViewFilter, categorizedClinicalDataCountFilter);
}
-
+
@Override
public List getMutatedGenes(StudyViewFilter studyViewFilter) {
CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter);
- return studyViewRepository.getMutatedGenes(studyViewFilter, categorizedClinicalDataCountFilter);
+ return alterationCountService.getMutatedGenes(studyViewFilter, categorizedClinicalDataCountFilter);
}
@Override
@@ -59,11 +63,11 @@ public List getClinicalDataCounts(StudyViewFilter studyVi
}
private CategorizedClinicalDataCountFilter extractClinicalDataCountFilters(final StudyViewFilter studyViewFilter) {
- if(clinicalAttributeNameMap.isEmpty()) {
+ if (clinicalAttributeNameMap.isEmpty()) {
buildClinicalAttributeNameMap();
}
- if(studyViewFilter.getClinicalDataFilters() == null) {
+ if (studyViewFilter.getClinicalDataFilters() == null) {
return CategorizedClinicalDataCountFilter.getBuilder().build();
}
@@ -90,7 +94,7 @@ private CategorizedClinicalDataCountFilter extractClinicalDataCountFilters(final
private void buildClinicalAttributeNameMap() {
List clinicalAttributeDataSources = List.of(ClinicalAttributeDataSource.values());
- for(ClinicalAttributeDataSource clinicalAttributeDataSource : clinicalAttributeDataSources) {
+ for (ClinicalAttributeDataSource clinicalAttributeDataSource : clinicalAttributeDataSources) {
String categoricalKey = clinicalAttributeDataSource.getValue() + ClinicalAttributeDataType.CATEGORICAL;
String numericKey = clinicalAttributeDataSource.getValue() + ClinicalAttributeDataType.NUMERIC;
clinicalAttributeNameMap.put(categoricalKey, studyViewRepository.getClinicalDataAttributeNames(clinicalAttributeDataSource, ClinicalAttributeDataType.CATEGORICAL));
diff --git a/src/main/resources/db-scripts/clickhouse/clickhouse.sql b/src/main/resources/db-scripts/clickhouse/clickhouse.sql
index 97729afabaf..fb8f2c94885 100644
--- a/src/main/resources/db-scripts/clickhouse/clickhouse.sql
+++ b/src/main/resources/db-scripts/clickhouse/clickhouse.sql
@@ -1,5 +1,7 @@
DROP TABLE IF EXISTS genomic_event_mutation;
DROP TABLE IF EXISTS genomic_event;
+DROP TABLE IF EXISTS sample_to_gene_panel;
+DROP TABLE IF EXISTS gene_panel_to_gene;
CREATE TABLE IF NOT EXISTS genomic_event
(
@@ -11,7 +13,7 @@ CREATE TABLE IF NOT EXISTS genomic_event
cancer_study_identifier String,
genetic_profile_stable_id String
) ENGINE = MergeTree
- ORDER BY ( variant_type, sample_unique_id, hugo_gene_symbol);
+ORDER BY ( variant_type, sample_unique_id, hugo_gene_symbol);
CREATE TABLE IF NOT EXISTS genomic_event_mutation
(
@@ -26,4 +28,20 @@ CREATE TABLE IF NOT EXISTS genomic_event_mutation
driver_filter String,
driver_tiers_filter String
) ENGINE = MergeTree
-ORDER BY ( hugo_gene_symbol, genetic_profile_stable_id);
\ No newline at end of file
+ORDER BY ( hugo_gene_symbol, genetic_profile_stable_id);
+
+CREATE TABLE sample_to_gene_panel
+(
+ sample_unique_id String,
+ alteration_type String,
+ gene_panel_id String,
+ cancer_study_identifier String
+) ENGINE = MergeTree()
+ORDER BY (gene_panel_id, alteration_type, sample_unique_id);
+
+CREATE TABLE gene_panel_to_gene
+(
+ gene_panel_id String,
+ gene String
+) ENGINE = MergeTree()
+ORDER BY (gene_panel_id);
\ No newline at end of file
diff --git a/src/main/resources/db-scripts/clickhouse/clickhouse_migration.sql b/src/main/resources/db-scripts/clickhouse/clickhouse_migration.sql
index 8cea8b08993..5595c729f8e 100644
--- a/src/main/resources/db-scripts/clickhouse/clickhouse_migration.sql
+++ b/src/main/resources/db-scripts/clickhouse/clickhouse_migration.sql
@@ -88,4 +88,30 @@ FROM
INNER JOIN cancer_study cs ON gp.cancer_study_id = cs.cancer_study_id
INNER JOIN gene gene1 ON sv.site1_entrez_gene_id = gene1.entrez_gene_id
INNER JOIN sample_profile on s.internal_id = sample_profile.sample_id
- INNER JOIN gene_panel on sample_profile.panel_id = gene_panel.internal_id;
\ No newline at end of file
+ INNER JOIN gene_panel on sample_profile.panel_id = gene_panel.internal_id;
+
+INSERT INTO sample_to_gene_panel
+select
+ concat(cs.cancer_study_identifier, '_', sample.stable_id) as sample_unique_id,
+ genetic_alteration_type as alteration_type,
+ ifnull(gene_panel.stable_id, 'WES') as gene_panel_id,
+ cs.cancer_study_identifier as cancer_study_identifier
+from sample_profile sp
+ inner join genetic_profile gp on sample_profile.genetic_profile_id = gp.genetic_profile_id
+ left join gene_panel on sp.panel_id = gene_panel.internal_id
+ inner join sample on sp.sample_id = sample.internal_id
+ inner join cancer_study cs on gp.cancer_study_id = cs.cancer_study_id;
+
+INSERT INTO gene_panel_to_gene
+select
+ gp.stable_id as gene_panel_id,
+ g.hugo_gene_symbol as gene
+from gene_panel gp
+ inner join gene_panel_list gpl ON gp.internal_id = gpl.internal_id
+ inner join gene g ON g.entrez_gene_id = gpl.gene_id
+UNION ALL
+select
+ 'WES' as gene_panel_id,
+ gene.hugo_gene_symbol as gene
+from gene
+where gene.entrez_gene_id > 0;
\ No newline at end of file
diff --git a/src/main/resources/db-scripts/clickhouse/materialized_views.sql b/src/main/resources/db-scripts/clickhouse/materialized_views.sql
index d14d18b88e5..6704ecd9bad 100644
--- a/src/main/resources/db-scripts/clickhouse/materialized_views.sql
+++ b/src/main/resources/db-scripts/clickhouse/materialized_views.sql
@@ -11,9 +11,6 @@ DROP VIEW IF EXISTS patient_clinical_attribute_numeric_mv;
DROP VIEW IF EXISTS patient_clinical_attribute_categorical_mv;
DROP VIEW IF EXISTS sample_columnstore_mv;
DROP VIEW IF EXISTS sample_list_columnstore_mv;
-DROP VIEW IF EXISTS genomic_event_mutation_mv;
-DROP VIEW IF EXISTS genomic_event_cna_mv;
-DROP VIEW IF EXISTS genomic_event_struct_var_mv;
CREATE TABLE sample_clinical_attribute_numeric
(
@@ -216,162 +213,13 @@ FROM sample_list as sl
INNER JOIN sample AS s on s.internal_id = sll.sample_id
INNER JOIN cancer_study cs on sl.cancer_study_id = cs.cancer_study_id;
-CREATE TABLE IF NOT EXISTS genomic_event
-(
- sample_unique_id String,
- variant String,
- variant_type String,
- hugo_gene_symbol String,
- gene_panel_stable_id String,
- cancer_study_identifier String,
- genetic_profile_stable_id String
-) ENGINE = MergeTree
-ORDER BY
- (
- variant_type,
- sample_unique_id,
- hugo_gene_symbol
- );
-
-Insert into genomic_event
-SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) as sample_unique_id,
- me.protein_change as variant,
- 'mutation' as variant_type,
- gene.hugo_gene_symbol as hugo_gene_symbol,
- gp.stable_id as gene_panel_stable_id,
- cs.cancer_study_identifier as cancer_study_identifier,
- g.stable_id as genetic_profile_stable_id
-FROM mutation
- INNER JOIN mutation_event as me on mutation.mutation_event_id = me.mutation_event_id
- INNER JOIN sample_profile sp
- on mutation.sample_id = sp.sample_id and mutation.genetic_profile_id = sp.genetic_profile_id
- LEFT JOIN gene_panel gp on sp.panel_id = gp.internal_id
- LEFT JOIN genetic_profile g on sp.genetic_profile_id = g.genetic_profile_id
- INNER JOIN cancer_study cs on g.cancer_study_id = cs.cancer_study_id
- INNER JOIN sample on mutation.sample_id = sample.internal_id
- LEFT JOIN gene on mutation.entrez_gene_id = gene.entrez_gene_id
-UNION ALL
-SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) as sample_unique_id,
- toString(ce.alteration) as variant,
- 'cna' as variant_type,
- gene.hugo_gene_symbol as hugo_gene_symbol,
- gp.stable_id as gene_panel_stable_id,
- cs.cancer_study_identifier as cancer_study_identifier,
- g.stable_id as genetic_profile_stable_id
-FROM cna_event ce
- INNER JOIN sample_cna_event sce ON ce.cna_event_id = sce.cna_event_id
- INNER JOIN sample_profile sp ON sce.sample_id = sp.sample_id AND sce.genetic_profile_id = sp.genetic_profile_id
- INNER JOIN gene_panel gp ON sp.panel_id = gp.internal_id
- INNER JOIN genetic_profile g ON sp.genetic_profile_id = g.genetic_profile_id
- INNER JOIN cancer_study cs ON g.cancer_study_id = cs.cancer_study_id
- INNER JOIN sample ON sce.sample_id = sample.internal_id
- INNER JOIN gene ON ce.entrez_gene_id = gene.entrez_gene_id
-UNION ALL
-SELECT
- concat(cs.cancer_study_identifier, '_', s.stable_id) as sample_unique_id,
- event_info as variant,
- 'structural_variant' as variant_type,
- gene2.hugo_gene_symbol as hugo_gene_symbol,
- gene_panel.stable_id as gene_panel_stable_id,
- cs.cancer_study_identifier as cancer_study_identifier,
- gp.stable_id as genetic_profile_stable_id
-FROM
- structural_variant sv
- INNER JOIN genetic_profile gp ON sv.genetic_profile_id = gp.genetic_profile_id
- INNER JOIN sample s ON sv.sample_id = s.internal_id
- INNER JOIN cancer_study cs ON gp.cancer_study_id = cs.cancer_study_id
- INNER JOIN gene gene2 ON sv.site2_entrez_gene_id = gene2.entrez_gene_id
- INNER JOIN sample_profile on s.internal_id = sample_profile.sample_id
- INNER JOIN gene_panel on sample_profile.panel_id = gene_panel.internal_id
-UNION ALL
-SELECT
- concat(cs.cancer_study_identifier, '_', s.stable_id) as sample_unique_id,
- event_info as variant,
- 'structural_variant' as variant_type,
- gene1.hugo_gene_symbol as hugo_gene_symbol,
- gene_panel.stable_id as gene_panel_stable_id,
- cs.cancer_study_identifier as cancer_study_identifier,
- gp.stable_id as genetic_profile_stable_id
-FROM
- structural_variant sv
- INNER JOIN genetic_profile gp ON sv.genetic_profile_id = gp.genetic_profile_id
- INNER JOIN sample s ON sv.sample_id = s.internal_id
- INNER JOIN cancer_study cs ON gp.cancer_study_id = cs.cancer_study_id
- INNER JOIN gene gene1 ON sv.site1_entrez_gene_id = gene1.entrez_gene_id
- INNER JOIN sample_profile on s.internal_id = sample_profile.sample_id
- INNER JOIN gene_panel on sample_profile.panel_id = gene_panel.internal_id;
-
-CREATE MATERIALIZED VIEW genomic_event_mutation_mv TO genomic_event AS
-SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) as sample_unique_id,
- me.protein_change as variant,
- 'mutation' as variant_type,
- gene.hugo_gene_symbol as hugo_gene_symbol,
- gp.stable_id as gene_panel_stable_id,
- cs.cancer_study_identifier as cancer_study_identifier,
- g.stable_id as genetic_profile_stable_id
-FROM mutation
- LEFT JOIN mutation_event as me on mutation.mutation_event_id = me.mutation_event_id
- LEFT JOIN sample_profile sp
- on mutation.sample_id = sp.sample_id and mutation.genetic_profile_id = sp.genetic_profile_id
- LEFT JOIN gene_panel gp on sp.panel_id = gp.internal_id
- LEFT JOIN genetic_profile g on sp.genetic_profile_id = g.genetic_profile_id
- LEFT JOIN cancer_study cs on g.cancer_study_id = cs.cancer_study_id
- LEFT JOIN sample on mutation.sample_id = sample.internal_id
- LEFT JOIN gene on mutation.entrez_gene_id = gene.entrez_gene_id;
-
-CREATE MATERIALIZED VIEW genomic_event_cna_mv TO genomic_event AS
-SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) as sample_unique_id,
- toString(ce.alteration) as variant,
- 'cna' as variant_type,
- gene.hugo_gene_symbol as hugo_gene_symbol,
- gp.stable_id as gene_panel_stable_id,
- cs.cancer_study_identifier as cancer_study_identifier,
- gp.stable_id as genetic_profile_stable_id
-FROM cna_event ce
- INNER JOIN sample_cna_event sce ON ce.cna_event_id = sce.cna_event_id
- INNER JOIN sample_profile sp ON sce.sample_id = sp.sample_id AND sce.genetic_profile_id = sp.genetic_profile_id
- INNER JOIN gene_panel gp ON sp.panel_id = gp.internal_id
- INNER JOIN genetic_profile g ON sp.genetic_profile_id = g.genetic_profile_id
- INNER JOIN cancer_study cs ON g.cancer_study_id = cs.cancer_study_id
- INNER JOIN sample ON sce.sample_id = sample.internal_id
- INNER JOIN gene ON ce.entrez_gene_id = gene.entrez_gene_id;
-CREATE MATERIALIZED VIEW genomic_event_struct_var_mv TO genomic_event AS
-SELECT concat(cs.cancer_study_identifier, '_', s.stable_id) as sample_unique_id,
- event_info as variant,
- 'structural_variant' as variant_type,
- gene1.hugo_gene_symbol as hugo_gene_symbol,
- gene_panel.stable_id as gene_panel_stable_id,
- cs.cancer_study_identifier as cancer_study_identifier,
- gp.stable_id as genetic_profile_stable_id
-FROM structural_variant sv
- INNER JOIN genetic_profile gp ON sv.genetic_profile_id = gp.genetic_profile_id
- INNER JOIN sample s ON sv.sample_id = s.internal_id
- INNER JOIN cancer_study cs ON gp.cancer_study_id = cs.cancer_study_id
- INNER JOIN gene gene1 ON sv.site1_entrez_gene_id = gene1.entrez_gene_id
- INNER JOIN sample_profile on s.internal_id = sample_profile.sample_id
- INNER JOIN gene_panel on sample_profile.panel_id = gene_panel.internal_id
-UNION ALL
-SELECT concat(cs.cancer_study_identifier, '_', s.stable_id) as sample_unique_id,
- event_info as variant,
- 'structural_variant' as variant_type,
- gene2.hugo_gene_symbol as hugo_gene_symbol,
- gene_panel.stable_id as gene_panel_stable_id,
- cs.cancer_study_identifier as cancer_study_identifier,
- gp.stable_id as genetic_profile_stable_id
-FROM structural_variant sv
- INNER JOIN genetic_profile gp ON sv.genetic_profile_id = gp.genetic_profile_id
- INNER JOIN sample s ON sv.sample_id = s.internal_id
- INNER JOIN cancer_study cs ON gp.cancer_study_id = cs.cancer_study_id
- INNER JOIN gene gene2 ON sv.site2_entrez_gene_id = gene2.entrez_gene_id
- INNER JOIN sample_profile on s.internal_id = sample_profile.sample_id
- INNER JOIN gene_panel on sample_profile.panel_id = gene_panel.internal_id;
-- SAMPLE_MV
DROP VIEW IF EXISTS sample_mv;
CREATE MATERIALIZED VIEW sample_mv
ENGINE = AggregatingMergeTree()
- ORDER BY cancer_study_identifier
+ ORDER BY internal_id
SETTINGS allow_nullable_key = 1
POPULATE
AS
diff --git a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml
index c200c081586..5ed126acaa6 100644
--- a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml
+++ b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml
@@ -26,7 +26,7 @@
SELECT sample_unique_id
- FROM genomic_event_view
+ FROM genomic_event_mutation
genetic_profile_stable_id IN
diff --git a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml
index ab48ba8e3e5..d177d4a5892 100644
--- a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml
+++ b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml
@@ -22,13 +22,11 @@
ORDER BY sample_stable_id ASC;
-
-