Skip to content

Commit

Permalink
Implement study view sample-lists-counts endpoint in Clickhouse (#10849)
Browse files Browse the repository at this point in the history
Co-authored-by: Bryan Lai <[email protected]>
2 people authored and haynescd committed Nov 24, 2024
1 parent 5b0f2bd commit 3cac3a2
Showing 9 changed files with 81 additions and 4 deletions.
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.cbioportal.persistence;

import org.cbioportal.model.AlterationCountByGene;
import org.cbioportal.model.CaseListDataCount;
import org.cbioportal.model.ClinicalData;
import org.cbioportal.model.ClinicalDataCount;
import org.cbioportal.model.CopyNumberCountByGene;
@@ -35,6 +36,9 @@ public interface StudyViewRepository {

List<GenomicDataCount> getGenomicDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter);


List<CaseListDataCount> getCaseListDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter);

List<String> getClinicalDataAttributeNames(ClinicalAttributeDataSource clinicalAttributeDataSource, ClinicalAttributeDataType dataType);

Map<String, AlterationCountByGene> getTotalProfiledCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, String alterationType);
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
package org.cbioportal.persistence.mybatisclickhouse;

import org.cbioportal.model.*;
import org.apache.ibatis.annotations.MapKey;
import org.cbioportal.model.AlterationCountByGene;
import org.cbioportal.model.CaseListDataCount;
import org.cbioportal.model.ClinicalData;
import org.cbioportal.model.ClinicalDataCount;
import org.cbioportal.model.CopyNumberCountByGene;
import org.cbioportal.model.GenomicDataCount;
import org.cbioportal.model.Sample;
import org.cbioportal.persistence.helper.AlterationFilterHelper;
import org.cbioportal.web.parameter.CategorizedClinicalDataCountFilter;
@@ -14,6 +15,7 @@
import java.util.List;
import java.util.Map;


public interface StudyViewMapper {
List<Sample> getFilteredSamples(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters);

@@ -36,6 +38,8 @@ List<ClinicalDataCount> getSampleClinicalDataCounts(StudyViewFilter studyViewFil

List<ClinicalDataCount> getClinicalDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter,
boolean applyPatientIdFilters, List<String> attributeIds, List<String> filteredAttributeValues);

List<CaseListDataCount> getCaseListDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters);

List<String> getClinicalAttributeNames(String tableName);

Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.cbioportal.persistence.mybatisclickhouse;
import org.cbioportal.model.AlterationCountByGene;
import org.cbioportal.model.CaseListDataCount;
import org.cbioportal.model.ClinicalData;
import org.cbioportal.model.ClinicalDataCount;
import org.cbioportal.model.GenePanelToGene;
@@ -82,6 +83,11 @@ public List<ClinicalDataCount> getPatientClinicalDataCounts(StudyViewFilter stud
filteredAttributes, FILTERED_CLINICAL_ATTR_VALUES);
}

@Override
public List<CaseListDataCount> getCaseListDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter) {
return mapper.getCaseListDataCounts(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter));
}

@Override
public List<String> getClinicalDataAttributeNames(ClinicalAttributeDataSource clinicalAttributeDataSource, ClinicalAttributeDataType dataType) {
String tableName = clinicalAttributeDataSource.getValue().toLowerCase() + "_clinical_attribute_" + dataType.getValue().toLowerCase() + "_mv";
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.cbioportal.service;

import org.cbioportal.model.AlterationCountByGene;
import org.cbioportal.model.CaseListDataCount;
import org.cbioportal.model.ClinicalData;
import org.cbioportal.model.ClinicalDataCountItem;
import org.cbioportal.model.GenomicDataCount;
@@ -20,6 +21,8 @@ public interface StudyViewColumnarService {

List<ClinicalDataCountItem> getClinicalDataCounts(StudyViewFilter studyViewFilter, List<String> filteredAttributes);

List<CaseListDataCount> getCaseListDataCounts(StudyViewFilter studyViewFilter);

List<ClinicalData> getPatientClinicalData(StudyViewFilter studyViewFilter, List<String> attributeIds);

List<ClinicalData> getSampleClinicalData(StudyViewFilter studyViewFilter, List<String> attributeIds);
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.cbioportal.service.impl;

import org.cbioportal.model.AlterationCountByGene;
import org.cbioportal.model.CaseListDataCount;
import org.cbioportal.model.ClinicalData;
import org.cbioportal.model.ClinicalDataCount;
import org.cbioportal.model.ClinicalDataCountItem;
@@ -15,6 +16,7 @@
import org.cbioportal.web.parameter.CategorizedClinicalDataCountFilter;
import org.cbioportal.web.parameter.StudyViewFilter;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.cache.annotation.Cacheable;
import org.springframework.stereotype.Service;

import java.util.HashMap;
@@ -38,6 +40,7 @@ public StudyViewColumnarServiceImpl(StudyViewRepository studyViewRepository, Alt
this.alterationCountService = alterationCountService;
}

@Cacheable(cacheResolver = "generalRepositoryCacheResolver", condition = "@cacheEnabledConfig.getEnabled()")
@Override
public List<Sample> getFilteredSamples(StudyViewFilter studyViewFilter) {
CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter);
@@ -83,6 +86,12 @@ public List<ClinicalDataCountItem> getClinicalDataCounts(StudyViewFilter studyVi
}).collect(Collectors.toList());
}

@Override
public List<CaseListDataCount> getCaseListDataCounts(StudyViewFilter studyViewFilter) {
CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter);
return studyViewRepository.getCaseListDataCounts(studyViewFilter, categorizedClinicalDataCountFilter);
}

private CategorizedClinicalDataCountFilter extractClinicalDataCountFilters(final StudyViewFilter studyViewFilter) {
if (clinicalAttributeNameMap.isEmpty()) {
buildClinicalAttributeNameMap();
Original file line number Diff line number Diff line change
@@ -6,6 +6,7 @@
import org.cbioportal.web.parameter.*;
import org.cbioportal.web.util.DataBinner;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.cache.annotation.Cacheable;
import org.springframework.stereotype.Component;

import java.util.*;
@@ -26,6 +27,7 @@ public ClinicalDataBinner(
this.dataBinner = dataBinner;
}

@Cacheable(cacheResolver = "generalRepositoryCacheResolver", condition = "@cacheEnabledConfig.getEnabled()")
public List<ClinicalDataBin> fetchClinicalDataBinCounts(
DataBinMethod dataBinMethod,
ClinicalDataBinCountFilter dataBinCountFilter,
Original file line number Diff line number Diff line change
@@ -9,6 +9,7 @@
import jakarta.validation.Valid;
import org.cbioportal.model.AlterationCountByGene;
import org.cbioportal.model.AlterationFilter;
import org.cbioportal.model.CaseListDataCount;
import org.cbioportal.model.ClinicalData;
import org.cbioportal.model.ClinicalDataBin;
import org.cbioportal.model.ClinicalDataCountItem;
@@ -170,6 +171,22 @@ public ResponseEntity<List<ClinicalDataCountItem>> fetchClinicalDataCounts(

}

@PreAuthorize("hasPermission(#involvedCancerStudies, 'Collection<CancerStudyId>', T(org.cbioportal.utils.security.AccessLevel).READ)")
@RequestMapping(value = "/column-store/sample-lists-counts/fetch", method = RequestMethod.POST,
consumes = MediaType.APPLICATION_JSON_VALUE, produces = MediaType.APPLICATION_JSON_VALUE)
@Operation(description = "Fetch case list sample counts by study view filter")
public List<CaseListDataCount> fetchCaseListCounts(
@Parameter(required = true, description = "Study view filter")
@Valid @RequestBody(required = false) StudyViewFilter studyViewFilter,
@Parameter(hidden = true) // prevent reference to this attribute in the swagger-ui interface
@RequestAttribute(required = false, value = "involvedCancerStudies") Collection<String> involvedCancerStudies,
@Parameter(hidden = true) // prevent reference to this attribute in the swagger-ui interface. this attribute is needed for the @PreAuthorize tag above.
@Valid @RequestAttribute(required = false, value = "interceptedStudyViewFilter") StudyViewFilter interceptedStudyViewFilter) {

return studyViewColumnarService.getCaseListDataCounts(interceptedStudyViewFilter);

}

@PreAuthorize("hasPermission(#involvedCancerStudies, 'Collection<CancerStudyId>', T(org.cbioportal.utils.security.AccessLevel).READ)")
@RequestMapping(value = "/column-store/clinical-data-bin-counts/fetch", method = RequestMethod.POST,
consumes = MediaType.APPLICATION_JSON_VALUE, produces = MediaType.APPLICATION_JSON_VALUE)
Original file line number Diff line number Diff line change
@@ -13,8 +13,26 @@
#{studyId}
</foreach>
</if>


<!-- filter for samples which belong to the sample lists (aka case lists) selected by the user and appearing in the studyViewFilter.caseLists collection -->
<if test="studyViewFilter.caseLists != null and !studyViewFilter.caseLists.isEmpty()">
INTERSECT
-- case list filtering allows both UNION (OR) and INTERSECTION (AND) LOGIC
-- caseLists is an array of arrays wherein the top level is INTERSECTION
-- AND THE INTERNAL ARRAYS ARE UNION (OR)
SELECT * FROM (
<foreach item="listGroup" collection="studyViewFilter.caseLists" separator="INTERSECT">
SELECT sample_unique_id
FROM sample_list_list sll
LEFT JOIN sample_mv s ON sll.sample_id=s.internal_id
LEFT JOIN sample_list sl on sll.list_id=sl.list_id
WHERE
<foreach item="list" collection="listGroup" separator="OR">
sl.stable_id LIKE '%_${list}'
</foreach>
</foreach>
)
</if>

<if test="studyViewFilter.genomicProfiles != null and !studyViewFilter.genomicProfiles.isEmpty()">
INTERSECT
SELECT * FROM (
@@ -34,7 +52,6 @@
gp.stable_id LIKE '%_${genomicProfileId}'
</foreach>
</where>

</foreach>
)

Original file line number Diff line number Diff line change
@@ -151,6 +151,21 @@
GROUP BY genetic_profile.stable_id, genetic_profile.name, sample_derived.cancer_study_identifier;
</select>


<!-- for /sample-lists-counts/fetch (returns CaseListDataCount) -->
<select id="getCaseListDataCounts" resultType="org.cbioportal.model.CaseListDataCount">
SELECT
name AS label,
REPLACE(stable_id, CONCAT(cancer_study_identifier, '_'), '') AS value,
count(sample_id) AS count
FROM sample_list_list sll
LEFT JOIN sample_mv s ON sll.sample_id=s.internal_id
LEFT JOIN sample_list sl on sll.list_id=sl.list_id
<where>
sample_unique_id IN ( <include refid="sampleUniqueIdsFromStudyViewFilter"/>)
</where>
GROUP BY s.cancer_study_identifier, sl.stable_id, sl.name;
</select>

<sql id="getCategoricalClinicalDataCountsQuery">
SELECT

0 comments on commit 3cac3a2

Please sign in to comment.