From 5474b2343aba7868120068aaa1ae37b065ff1dab Mon Sep 17 00:00:00 2001 From: Charles Haynes Date: Fri, 26 Jul 2024 09:36:49 -0400 Subject: [PATCH 1/5] Fix alteration count services --- .../persistence/StudyViewRepository.java | 4 ++- .../mybatisclickhouse/StudyViewMapper.java | 5 ++-- .../StudyViewMyBatisRepository.java | 15 ++++++++-- .../impl/AlterationCountServiceImpl.java | 29 +++++++------------ .../mybatisclickhouse/StudyViewMapper.xml | 20 ++++++++++--- 5 files changed, 45 insertions(+), 28 deletions(-) diff --git a/src/main/java/org/cbioportal/persistence/StudyViewRepository.java b/src/main/java/org/cbioportal/persistence/StudyViewRepository.java index 1c93f3ef57d..cc2b2629a58 100644 --- a/src/main/java/org/cbioportal/persistence/StudyViewRepository.java +++ b/src/main/java/org/cbioportal/persistence/StudyViewRepository.java @@ -38,7 +38,7 @@ public interface StudyViewRepository { List getCaseListDataCounts(StudyViewFilter studyViewFilter); - Map getTotalProfiledCounts(StudyViewFilter studyViewFilter, String alterationType); + Map getTotalProfiledCounts(StudyViewFilter studyViewFilter, String alterationType); int getFilteredSamplesCount(StudyViewFilter studyViewFilter); @@ -46,5 +46,7 @@ public interface StudyViewRepository { int getTotalProfiledCountsByAlterationType(StudyViewFilter studyViewFilter, String alterationType); + int getSampleProfileCountWithoutPanelData(StudyViewFilter studyViewFilter, String alterationType); + List getClinicalEventTypeCounts(StudyViewFilter studyViewFilter); } diff --git a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java index 0e762a1d07a..a249333641e 100644 --- a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java +++ b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java @@ -44,14 +44,15 @@ List getClinicalDataCounts(StudyViewFilter studyViewFilter, C List getPatientClinicalDataFromStudyViewFilter(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters, List attributeIds); - @MapKey("hugoGeneSymbol") - Map getTotalProfiledCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters, String alterationType); + List getTotalProfiledCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters, String alterationType); int getFilteredSamplesCount(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters); List getMatchingGenePanelIds(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters, String alterationType); int getTotalProfiledCountByAlterationType(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters, String alterationType); + + int getSampleProfileCountWithoutPanelData(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters, String alterationType); List getClinicalEventTypeCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters); } diff --git a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java index a0709db0e78..1690ba82a8b 100644 --- a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java +++ b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java @@ -132,10 +132,13 @@ public List getPatientClinicalData(StudyViewFilter studyViewFilter } @Override - public Map getTotalProfiledCounts(StudyViewFilter studyViewFilter, String alterationType) { + public Map getTotalProfiledCounts(StudyViewFilter studyViewFilter, String alterationType) { CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); return mapper.getTotalProfiledCounts(studyViewFilter, categorizedClinicalDataCountFilter, - shouldApplyPatientIdFilters(studyViewFilter,categorizedClinicalDataCountFilter), alterationType); + shouldApplyPatientIdFilters(studyViewFilter,categorizedClinicalDataCountFilter), alterationType) + .stream() + .collect(Collectors.groupingBy(AlterationCountByGene::getHugoGeneSymbol, + Collectors.mapping(AlterationCountByGene::getNumberOfProfiledCases, Collectors.summingInt(Integer::intValue)))); } @Override @@ -162,6 +165,14 @@ public int getTotalProfiledCountsByAlterationType(StudyViewFilter studyViewFilte shouldApplyPatientIdFilters(studyViewFilter,categorizedClinicalDataCountFilter), alterationType); } + @Override + public int getSampleProfileCountWithoutPanelData(StudyViewFilter studyViewFilter, String alterationType) { + CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); + return mapper.getSampleProfileCountWithoutPanelData(studyViewFilter, categorizedClinicalDataCountFilter, + shouldApplyPatientIdFilters(studyViewFilter,categorizedClinicalDataCountFilter), alterationType); + } + + @Override public List getClinicalEventTypeCounts(StudyViewFilter studyViewFilter) { CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); diff --git a/src/main/java/org/cbioportal/service/impl/AlterationCountServiceImpl.java b/src/main/java/org/cbioportal/service/impl/AlterationCountServiceImpl.java index 29f94f0fd0f..a8aa0dbe723 100644 --- a/src/main/java/org/cbioportal/service/impl/AlterationCountServiceImpl.java +++ b/src/main/java/org/cbioportal/service/impl/AlterationCountServiceImpl.java @@ -273,37 +273,28 @@ public List getStructuralVariantGenes(StudyViewFilter stu private < T extends AlterationCountByGene> List populateAlterationCounts(@NonNull List alterationCounts, @NonNull StudyViewFilter studyViewFilter, @NonNull AlterationType alterationType) { - var updatedAlterationCounts = alterationCounts.stream().map(SerializationUtils::clone).toList(); - var profiledCountsMap = studyViewRepository.getTotalProfiledCounts(studyViewFilter, - alterationType.toString()); - var profiledCountWithoutGenePanelData = studyViewRepository.getTotalProfiledCountsByAlterationType(studyViewFilter, alterationType.toString()); - var matchingGenePanelIdsMap = studyViewRepository.getMatchingGenePanelIds(studyViewFilter, alterationType.toString()); + final int profiledCountWithoutGenePanelData = studyViewRepository.getTotalProfiledCountsByAlterationType(studyViewFilter, alterationType.toString()); + var profiledCountsMap = studyViewRepository.getTotalProfiledCounts(studyViewFilter, alterationType.toString()); + final var matchingGenePanelIdsMap = studyViewRepository.getMatchingGenePanelIds(studyViewFilter, alterationType.toString()); + final int sampleProfileCountWithoutGenePanelData = studyViewRepository.getSampleProfileCountWithoutPanelData(studyViewFilter, alterationType.toString()); - updatedAlterationCounts.parallelStream() + alterationCounts.parallelStream() .forEach(alterationCountByGene -> { String hugoGeneSymbol = alterationCountByGene.getHugoGeneSymbol(); Set matchingGenePanelIds = matchingGenePanelIdsMap.get(hugoGeneSymbol) != null ? matchingGenePanelIdsMap.get(hugoGeneSymbol) : Collections.emptySet(); - - int totalProfiledCount = getTotalProfiledCount(hugoGeneSymbol, - profiledCountsMap, profiledCountWithoutGenePanelData, matchingGenePanelIds); + + int totalProfiledCount = hasGenePanelData(matchingGenePanelIds) + ? profiledCountsMap.getOrDefault(hugoGeneSymbol, 0) + sampleProfileCountWithoutGenePanelData + : profiledCountWithoutGenePanelData; alterationCountByGene.setNumberOfProfiledCases(totalProfiledCount); alterationCountByGene.setMatchingGenePanelIds(matchingGenePanelIds); }); - return updatedAlterationCounts; + return alterationCounts; } - private int getTotalProfiledCount(@NonNull String hugoGeneSymbol, @NonNull Map profiledCountsMap, - int profiledCountWithoutGenePanelData, @NonNull Set matchingGenePanelIds) { - int totalProfiledCount = profiledCountWithoutGenePanelData; - - if (hasGenePanelData(matchingGenePanelIds) && profiledCountsMap.containsKey(hugoGeneSymbol)) { - totalProfiledCount = profiledCountsMap.get(hugoGeneSymbol).getNumberOfProfiledCases(); - } - return totalProfiledCount; - } private boolean hasGenePanelData(@NonNull Set matchingGenePanelIds) { return matchingGenePanelIds.contains(WHOLE_EXOME_SEQUENCING) diff --git a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml index 576c5b7378b..335569c8fcd 100644 --- a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml +++ b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml @@ -233,14 +233,15 @@ - + SELECT gene as hugoGeneSymbol, COUNT(*) as numberOfProfiledCases FROM sample_to_gene_panel_derived stgp INNER JOIN gene_panel_to_gene_derived gptg on stgp.gene_panel_id = gptg.gene_panel_id - stgp.alteration_type = '${alterationType}' + stgp.alteration_type = '${alterationType}' + AND stgp.gene_panel_id != 'WES' AND @@ -248,7 +249,18 @@ GROUP BY gptg.gene; - + + From 37979dd03cccc7cec4ad07896af8dc6fec1201ce Mon Sep 17 00:00:00 2001 From: Charles Haynes Date: Fri, 26 Jul 2024 09:41:05 -0400 Subject: [PATCH 2/5] Only add genes that are protein coding to gene_panel_to_gene table for WES --- src/main/resources/db-scripts/clickhouse/clickhouse.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/resources/db-scripts/clickhouse/clickhouse.sql b/src/main/resources/db-scripts/clickhouse/clickhouse.sql index 5cad1670464..3c4b42cbc1f 100644 --- a/src/main/resources/db-scripts/clickhouse/clickhouse.sql +++ b/src/main/resources/db-scripts/clickhouse/clickhouse.sql @@ -47,7 +47,7 @@ SELECT 'WES' AS gene_panel_id, gene.hugo_gene_symbol AS gene FROM gene -WHERE gene.entrez_gene_id > 0; +WHERE gene.entrez_gene_id > 0 AND gene.type = 'protein-coding'; CREATE TABLE sample_derived ( From 7313f27bedd96d26deab7a69ed23b0051f1175e3 Mon Sep 17 00:00:00 2001 From: Charles Haynes Date: Mon, 29 Jul 2024 09:20:39 -0400 Subject: [PATCH 3/5] fix test --- .../persistence/mybatisclickhouse/StudyViewMapperTest.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/test/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapperTest.java b/src/test/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapperTest.java index f8019f72e54..9b4a98ad162 100644 --- a/src/test/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapperTest.java +++ b/src/test/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapperTest.java @@ -1,5 +1,6 @@ package org.cbioportal.persistence.mybatisclickhouse; +import org.cbioportal.model.AlterationCountByGene; import org.cbioportal.model.AlterationFilter; import org.cbioportal.model.MutationEventType; import org.cbioportal.persistence.helper.AlterationFilterHelper; @@ -114,8 +115,9 @@ public void getTotalProfiledCountsByGene() { assertEquals(3, totalProfiledCountsMap.size()); - var akt2TotalProfiledCounts = totalProfiledCountsMap.get("akt2"); - assertEquals(4, akt2TotalProfiledCounts.getNumberOfProfiledCases().intValue()); + var akt2TotalProfiledCounts = totalProfiledCountsMap.stream().filter(c -> c.getHugoGeneSymbol().equals("akt2")).findFirst(); + assertTrue(akt2TotalProfiledCounts.isPresent()); + assertEquals(4, akt2TotalProfiledCounts.get().getNumberOfProfiledCases().intValue()); } @Test From cef2f282c6354b1821eb280c49454cc72f1db10d Mon Sep 17 00:00:00 2001 From: Charles Haynes Date: Mon, 29 Jul 2024 10:16:00 -0400 Subject: [PATCH 4/5] fix sonar issues --- .../persistence/mybatisclickhouse/StudyViewMapperTest.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/test/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapperTest.java b/src/test/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapperTest.java index 9b4a98ad162..0a6279ae937 100644 --- a/src/test/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapperTest.java +++ b/src/test/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapperTest.java @@ -1,6 +1,5 @@ package org.cbioportal.persistence.mybatisclickhouse; -import org.cbioportal.model.AlterationCountByGene; import org.cbioportal.model.AlterationFilter; import org.cbioportal.model.MutationEventType; import org.cbioportal.persistence.helper.AlterationFilterHelper; From 67cc1241f46e512731825e05c671e12d875da944 Mon Sep 17 00:00:00 2001 From: Charles Haynes Date: Mon, 29 Jul 2024 12:11:11 -0400 Subject: [PATCH 5/5] Add documentation for filtering out WES --- .../persistence/mybatisclickhouse/StudyViewMapper.xml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml index 335569c8fcd..e4e226be8c2 100644 --- a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml +++ b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml @@ -232,7 +232,13 @@ JOIN cancer_study cs on cs.cancer_study_id = cam.cancer_study_id - +