Skip to content

Commit

Permalink
Feature/mutated genes total profiled counts and gene panels (#10824)
Browse files Browse the repository at this point in the history
* Add Support for TotalProfiledCase Counts for Mutated-genes endpoint.

* Create sql files to create new tables

* Add unit test for totalProfiledCount

* Add matching gene panel ids

* Add TotalProfiledCountsWithoutPanelData

* Add profileCount for genes without gene panel data

* Add Comments for SQL

* Update matching Gene Panel Ids

* Clean up code

* Fix test

* Add query to get correct Gene Panels

* Fix unit test

* Add comments
  • Loading branch information
haynescd authored and haynescd committed Nov 24, 2024
1 parent b5fb29e commit f566075
Show file tree
Hide file tree
Showing 19 changed files with 363 additions and 225 deletions.
2 changes: 2 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -447,6 +447,7 @@
<include>clickhouse/clickhouse.sql</include>
<include>clickhouse/clickhouse_views.sql</include>
<include>clickhouse/clickhouse_migration.sql</include>
<include>clickhouse/materialized_views.sql</include>
</includes>
</resource>
</resources>
Expand All @@ -458,6 +459,7 @@
<include>clickhouse/clickhouse.sql</include>
<include>clickhouse/views.sql</include>
<include>clickhouse/clickhouse_migration.sql</include>
<include>clickhouse/materialized_views.sql</include>
</includes>
</testResource>
<testResource>
Expand Down
6 changes: 4 additions & 2 deletions src/main/java/org/cbioportal/model/AlterationType.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package org.cbioportal.model;

public enum AlterationType {
MUTATION,
COPY_NUMBER_ALTERATION
MUTATION_EXTENDED,
COPY_NUMBER_ALTERATION,
STRUCTURAL_VARIANT,
GENERIC_ASSAY;
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import org.cbioportal.web.parameter.StudyViewFilter;

import java.util.List;
import java.util.Map;

public interface StudyViewRepository {
List<Sample> getFilteredSamples(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter);
Expand All @@ -28,4 +29,10 @@ public interface StudyViewRepository {

List<String> getClinicalDataAttributeNames(ClinicalAttributeDataSource clinicalAttributeDataSource, ClinicalAttributeDataType dataType);

Map<String, AlterationCountByGene> getTotalProfiledCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, String alterationType);

int getFilteredSamplesCount(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter);

Map<String, AlterationCountByGene> getMatchingGenePanelIds(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, String alterationType);

}
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.cbioportal.persistence.mybatisclickhouse;

import org.apache.ibatis.annotations.MapKey;
import org.cbioportal.model.AlterationCountByGene;
import org.cbioportal.model.ClinicalData;
import org.cbioportal.model.ClinicalDataCount;
Expand All @@ -9,6 +10,7 @@
import org.cbioportal.web.parameter.StudyViewFilter;

import java.util.List;
import java.util.Map;

public interface StudyViewMapper {
List<Sample> getFilteredSamples(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters);
Expand All @@ -31,4 +33,12 @@ List<ClinicalDataCount> getClinicalDataCounts(StudyViewFilter studyViewFilter, C

List<ClinicalData> getPatientClinicalDataFromStudyViewFilter(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters, List<String> attributeIds);

@MapKey("hugoGeneSymbol")
Map<String, AlterationCountByGene> getTotalProfiledCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters, String alterationType);

int getFilteredSamplesCount(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters);

@MapKey("hugoGeneSymbol")
Map<String, AlterationCountByGene> getMatchingGenePanelIds(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters, String alterationType);

}
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import org.springframework.stereotype.Repository;

import java.util.List;
import java.util.Map;

@Repository
public class StudyViewMyBatisRepository implements StudyViewRepository {
Expand Down Expand Up @@ -74,4 +75,22 @@ public List<ClinicalData> getSampleClinicalData(StudyViewFilter studyViewFilter,
public List<ClinicalData> getPatientClinicalData(StudyViewFilter studyViewFilter, List<String> attributeIds, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter) {
return mapper.getPatientClinicalDataFromStudyViewFilter(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter), attributeIds);
}

public Map<String, AlterationCountByGene> getTotalProfiledCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, String alterationType) {
return mapper.getTotalProfiledCounts(studyViewFilter, categorizedClinicalDataCountFilter,
shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter), alterationType);
}

@Override
public int getFilteredSamplesCount(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter) {
return mapper.getFilteredSamplesCount(studyViewFilter, categorizedClinicalDataCountFilter,
shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter));
}

@Override
public Map<String, AlterationCountByGene> getMatchingGenePanelIds(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, String alterationType) {
return mapper.getMatchingGenePanelIds(studyViewFilter, categorizedClinicalDataCountFilter,
shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter), alterationType);
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package org.cbioportal.persistence.mybatisclickhouse.typehandler;

import org.apache.ibatis.type.BaseTypeHandler;
import org.apache.ibatis.type.JdbcType;

import java.sql.CallableStatement;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.HashSet;
import java.util.Set;
import java.util.Arrays;

public class GenePanelIdsTypeHandler extends BaseTypeHandler<Set<String>> {

@Override
public void setNonNullParameter(PreparedStatement ps, int i, Set<String> parameter, JdbcType jdbcType) throws SQLException {
// Convert Set to array for storage (if needed)
throw new UnsupportedOperationException("Storage of GenePanelIds not supported");
}

@Override
public Set<String> getNullableResult(ResultSet rs, String columnName) throws SQLException {
String[] array = (String[]) rs.getArray(columnName).getArray();
return new HashSet<>(Arrays.asList(array));
}

@Override
public Set<String> getNullableResult(ResultSet rs, int columnIndex) throws SQLException {
String[] array = (String[]) rs.getArray(columnIndex).getArray();
return new HashSet<>(Arrays.asList(array));
}

@Override
public Set<String> getNullableResult(CallableStatement cs, int columnIndex) throws SQLException {
String[] array = (String[]) cs.getArray(columnIndex).getArray();
return new HashSet<>(Arrays.asList(array));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import org.apache.commons.math3.util.Pair;
import org.cbioportal.model.*;
import org.cbioportal.model.util.Select;
import org.cbioportal.web.parameter.CategorizedClinicalDataCountFilter;
import org.cbioportal.web.parameter.StudyViewFilter;

import java.util.List;

Expand Down Expand Up @@ -75,4 +77,6 @@ Pair<List<CopyNumberCountByGene>, Long> getPatientCnaGeneCounts(List<MolecularPr
boolean includeMissingAlterationsFromGenePanel,
AlterationFilter alterationFilter);

List<AlterationCountByGene> getMutatedGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter);

}
Original file line number Diff line number Diff line change
@@ -1,17 +1,35 @@
package org.cbioportal.service.impl;

import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.math3.util.Pair;
import org.cbioportal.model.*;
import org.cbioportal.model.AlterationCountBase;
import org.cbioportal.model.AlterationCountByGene;
import org.cbioportal.model.AlterationCountByStructuralVariant;
import org.cbioportal.model.AlterationFilter;
import org.cbioportal.model.AlterationType;
import org.cbioportal.model.CopyNumberCountByGene;
import org.cbioportal.model.MolecularProfile;
import org.cbioportal.model.MolecularProfileCaseIdentifier;
import org.cbioportal.model.util.Select;
import org.cbioportal.persistence.AlterationRepository;
import org.cbioportal.persistence.MolecularProfileRepository;
import org.cbioportal.persistence.StudyViewRepository;
import org.cbioportal.service.AlterationCountService;
import org.cbioportal.service.util.AlterationEnrichmentUtil;
import org.cbioportal.web.parameter.CategorizedClinicalDataCountFilter;
import org.cbioportal.web.parameter.StudyViewFilter;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.lang.NonNull;
import org.springframework.lang.Nullable;
import org.springframework.stereotype.Service;

import java.util.*;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.BiFunction;
import java.util.function.Function;
Expand All @@ -20,17 +38,30 @@
@Service
public class AlterationCountServiceImpl implements AlterationCountService {

private final AlterationRepository alterationRepository;
private final AlterationEnrichmentUtil<AlterationCountByGene> alterationEnrichmentUtil;
private final AlterationEnrichmentUtil<CopyNumberCountByGene> alterationEnrichmentUtilCna;
private final AlterationEnrichmentUtil<AlterationCountByStructuralVariant> alterationEnrichmentUtilStructVar;
private final MolecularProfileRepository molecularProfileRepository;

private final StudyViewRepository studyViewRepository;

private static final String WHOLE_EXOME_SEQUENCING = "WES";


@Autowired
private AlterationRepository alterationRepository;
@Autowired
private AlterationEnrichmentUtil<AlterationCountByGene> alterationEnrichmentUtil;
@Autowired
private AlterationEnrichmentUtil<CopyNumberCountByGene> alterationEnrichmentUtilCna;
@Autowired
private AlterationEnrichmentUtil<AlterationCountByStructuralVariant> alterationEnrichmentUtilStructVar;
@Autowired
private MolecularProfileRepository molecularProfileRepository;

public AlterationCountServiceImpl(AlterationRepository alterationRepository, AlterationEnrichmentUtil<AlterationCountByGene> alterationEnrichmentUtil,
AlterationEnrichmentUtil<CopyNumberCountByGene> alterationEnrichmentUtilCna,
AlterationEnrichmentUtil<AlterationCountByStructuralVariant> alterationEnrichmentUtilStructVar,
MolecularProfileRepository molecularProfileRepository,
StudyViewRepository studyViewRepository) {
this.alterationRepository = alterationRepository;
this.alterationEnrichmentUtil = alterationEnrichmentUtil;
this.alterationEnrichmentUtilCna = alterationEnrichmentUtilCna;
this.alterationEnrichmentUtilStructVar = alterationEnrichmentUtilStructVar;
this.molecularProfileRepository = molecularProfileRepository;
this.studyViewRepository = studyViewRepository;
}
@Override
public Pair<List<AlterationCountByGene>, Long> getSampleAlterationGeneCounts(List<MolecularProfileCaseIdentifier> molecularProfileCaseIdentifiers,
Select<Integer> entrezGeneIds,
Expand Down Expand Up @@ -223,6 +254,48 @@ public Pair<List<CopyNumberCountByGene>, Long> getPatientCnaGeneCounts(List<Mole
);
}

@Override
public List<AlterationCountByGene> getMutatedGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter) {
var alterationCountByGenes = studyViewRepository.getMutatedGenes(studyViewFilter, categorizedClinicalDataCountFilter);
var profiledCountsMap = studyViewRepository.getTotalProfiledCounts(studyViewFilter,
categorizedClinicalDataCountFilter,
AlterationType.MUTATION_EXTENDED.toString());
var profiledCountWithoutGenePanelData = studyViewRepository.getFilteredSamplesCount(studyViewFilter, categorizedClinicalDataCountFilter);
var matchingGenePanelIdsMap = studyViewRepository.getMatchingGenePanelIds(studyViewFilter,
categorizedClinicalDataCountFilter, AlterationType.MUTATION_EXTENDED.toString());

alterationCountByGenes.parallelStream()
.forEach(alterationCountByGene -> {
String hugoGeneSymbol = alterationCountByGene.getHugoGeneSymbol();
var matchingGenePanelIds = matchingGenePanelIdsMap.get(hugoGeneSymbol) != null ?
matchingGenePanelIdsMap.get(hugoGeneSymbol).getMatchingGenePanelIds() : null;

int totalProfiledCount = getTotalProfiledCount(alterationCountByGene.getHugoGeneSymbol(),
profiledCountsMap, profiledCountWithoutGenePanelData, matchingGenePanelIds);

alterationCountByGene.setNumberOfProfiledCases(totalProfiledCount);

alterationCountByGene.setMatchingGenePanelIds(matchingGenePanelIds);
});

return alterationCountByGenes;
}

private int getTotalProfiledCount(@NonNull String hugoGeneSymbol, @NonNull Map<String, AlterationCountByGene> profiledCountsMap,
int profiledCountWithoutGenePanelData, @Nullable Set<String> matchingGenePanelIds) {
int totalProfiledCount = profiledCountWithoutGenePanelData;

if (hasGenePanelData(matchingGenePanelIds)) {
totalProfiledCount = profiledCountsMap.get(hugoGeneSymbol).getNumberOfProfiledCases();
}
return totalProfiledCount;
}

private boolean hasGenePanelData(@Nullable Set<String> matchingGenePanelIds) {
return matchingGenePanelIds != null && matchingGenePanelIds.contains(WHOLE_EXOME_SEQUENCING)
&& matchingGenePanelIds.size() > 1;
}

private <S extends AlterationCountBase> Pair<List<S>, Long> getAlterationGeneCounts(
List<MolecularProfileCaseIdentifier> molecularProfileCaseIdentifiers,
boolean includeFrequency,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import org.cbioportal.persistence.StudyViewRepository;
import org.cbioportal.persistence.enums.ClinicalAttributeDataSource;
import org.cbioportal.persistence.enums.ClinicalAttributeDataType;
import org.cbioportal.service.AlterationCountService;
import org.cbioportal.service.StudyViewColumnarService;
import org.cbioportal.web.parameter.CategorizedClinicalDataCountFilter;
import org.cbioportal.web.parameter.StudyViewFilter;
Expand All @@ -27,21 +28,24 @@ public class StudyViewColumnarServiceImpl implements StudyViewColumnarService {

private final StudyViewRepository studyViewRepository;

private final AlterationCountService alterationCountService;

@Autowired
public StudyViewColumnarServiceImpl(StudyViewRepository studyViewRepository) {
public StudyViewColumnarServiceImpl(StudyViewRepository studyViewRepository, AlterationCountService alterationCountService) {
this.studyViewRepository = studyViewRepository;
this.alterationCountService = alterationCountService;
}

@Override
public List<Sample> getFilteredSamples(StudyViewFilter studyViewFilter) {
CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter);
return studyViewRepository.getFilteredSamples(studyViewFilter, categorizedClinicalDataCountFilter);
return studyViewRepository.getFilteredSamples(studyViewFilter, categorizedClinicalDataCountFilter);
}

@Override
public List<AlterationCountByGene> getMutatedGenes(StudyViewFilter studyViewFilter) {
CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter);
return studyViewRepository.getMutatedGenes(studyViewFilter, categorizedClinicalDataCountFilter);
return alterationCountService.getMutatedGenes(studyViewFilter, categorizedClinicalDataCountFilter);
}

@Override
Expand All @@ -59,11 +63,11 @@ public List<ClinicalDataCountItem> getClinicalDataCounts(StudyViewFilter studyVi
}

private CategorizedClinicalDataCountFilter extractClinicalDataCountFilters(final StudyViewFilter studyViewFilter) {
if(clinicalAttributeNameMap.isEmpty()) {
if (clinicalAttributeNameMap.isEmpty()) {
buildClinicalAttributeNameMap();
}

if(studyViewFilter.getClinicalDataFilters() == null) {
if (studyViewFilter.getClinicalDataFilters() == null) {
return CategorizedClinicalDataCountFilter.getBuilder().build();
}

Expand All @@ -90,7 +94,7 @@ private CategorizedClinicalDataCountFilter extractClinicalDataCountFilters(final

private void buildClinicalAttributeNameMap() {
List<ClinicalAttributeDataSource> clinicalAttributeDataSources = List.of(ClinicalAttributeDataSource.values());
for(ClinicalAttributeDataSource clinicalAttributeDataSource : clinicalAttributeDataSources) {
for (ClinicalAttributeDataSource clinicalAttributeDataSource : clinicalAttributeDataSources) {
String categoricalKey = clinicalAttributeDataSource.getValue() + ClinicalAttributeDataType.CATEGORICAL;
String numericKey = clinicalAttributeDataSource.getValue() + ClinicalAttributeDataType.NUMERIC;
clinicalAttributeNameMap.put(categoricalKey, studyViewRepository.getClinicalDataAttributeNames(clinicalAttributeDataSource, ClinicalAttributeDataType.CATEGORICAL));
Expand Down
22 changes: 20 additions & 2 deletions src/main/resources/db-scripts/clickhouse/clickhouse.sql
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
DROP TABLE IF EXISTS genomic_event_mutation;
DROP TABLE IF EXISTS genomic_event;
DROP TABLE IF EXISTS sample_to_gene_panel;
DROP TABLE IF EXISTS gene_panel_to_gene;

CREATE TABLE IF NOT EXISTS genomic_event
(
Expand All @@ -11,7 +13,7 @@ CREATE TABLE IF NOT EXISTS genomic_event
cancer_study_identifier String,
genetic_profile_stable_id String
) ENGINE = MergeTree
ORDER BY ( variant_type, sample_unique_id, hugo_gene_symbol);
ORDER BY ( variant_type, sample_unique_id, hugo_gene_symbol);

CREATE TABLE IF NOT EXISTS genomic_event_mutation
(
Expand All @@ -26,4 +28,20 @@ CREATE TABLE IF NOT EXISTS genomic_event_mutation
driver_filter String,
driver_tiers_filter String
) ENGINE = MergeTree
ORDER BY ( hugo_gene_symbol, genetic_profile_stable_id);
ORDER BY ( hugo_gene_symbol, genetic_profile_stable_id);

CREATE TABLE sample_to_gene_panel
(
sample_unique_id String,
alteration_type String,
gene_panel_id String,
cancer_study_identifier String
) ENGINE = MergeTree()
ORDER BY (gene_panel_id, alteration_type, sample_unique_id);

CREATE TABLE gene_panel_to_gene
(
gene_panel_id String,
gene String
) ENGINE = MergeTree()
ORDER BY (gene_panel_id);
Loading

0 comments on commit f566075

Please sign in to comment.