Skip to content

Commit

Permalink
use clinical data counts instead of the actual clinical data, get rid…
Browse files Browse the repository at this point in the history
… of redundant NA count logic
  • Loading branch information
onursumer committed Jun 25, 2024
1 parent e6d0ae2 commit de88ff7
Show file tree
Hide file tree
Showing 9 changed files with 83 additions and 138 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ public interface StudyViewRepository {

List<ClinicalData> getPatientClinicalData(StudyViewFilter studyViewFilter, List<String> attributeIds);

List<ClinicalDataCount> getSampleCountWithoutClinicalData(StudyViewFilter studyViewFilter, List<String> attributeIds, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter);
List<ClinicalDataCount>getSampleClinicalDataCountsForBinning(StudyViewFilter studyViewFilter, List<String> attributeIds);

List<ClinicalDataCount> getPatientCountWithoutClinicalData(StudyViewFilter studyViewFilter, List<String> attributeIds, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter);
List<ClinicalDataCount>getPatientClinicalDataCountsForBinning(StudyViewFilter studyViewFilter, List<String> attributeIds);

List<AlterationCountByGene> getMutatedGenes(StudyViewFilter studyViewFilter);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@ List<ClinicalDataCount> getClinicalDataCounts(StudyViewFilter studyViewFilter, C

List<ClinicalData> getPatientClinicalDataFromStudyViewFilter(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters, List<String> attributeIds);

List<ClinicalDataCount> getSampleCountWithoutClinicalData(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters, List<String> attributeIds);
List<ClinicalDataCount> getPatientCountWithoutClinicalData(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters, List<String> attributeIds);
List<ClinicalDataCount> getSampleClinicalDataCountsForBinning(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters, List<String> attributeIds);

List<ClinicalDataCount> getPatientClinicalDataCountsForBinning(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters, List<String> attributeIds);

@MapKey("hugoGeneSymbol")
Map<String, AlterationCountByGene> getTotalProfiledCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters, String alterationType);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,12 +101,14 @@ public List<ClinicalData> getSampleClinicalData(StudyViewFilter studyViewFilter,
return mapper.getSampleClinicalDataFromStudyViewFilter(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter), attributeIds);
}

public List<ClinicalDataCount> getSampleCountWithoutClinicalData(StudyViewFilter studyViewFilter, List<String> attributeIds, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter) {
return mapper.getSampleCountWithoutClinicalData(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter), attributeIds);
public List<ClinicalDataCount> getSampleClinicalDataCountsForBinning(StudyViewFilter studyViewFilter, List<String> attributeIds) {
CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter);
return mapper.getSampleClinicalDataCountsForBinning(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter), attributeIds);
}

public List<ClinicalDataCount> getPatientCountWithoutClinicalData(StudyViewFilter studyViewFilter, List<String> attributeIds, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter) {
return mapper.getPatientCountWithoutClinicalData(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter), attributeIds);
public List<ClinicalDataCount> getPatientClinicalDataCountsForBinning(StudyViewFilter studyViewFilter, List<String> attributeIds) {
CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter);
return mapper.getPatientClinicalDataCountsForBinning(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter), attributeIds);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@ public interface StudyViewColumnarService {

List<ClinicalData> getSampleClinicalData(StudyViewFilter studyViewFilter, List<String> attributeIds);

List<ClinicalDataCount> getSampleCountWithoutClinicalData(StudyViewFilter studyViewFilter, List<String> attributeIds);
List<ClinicalDataCount> getPatientCountWithoutClinicalData(StudyViewFilter studyViewFilter, List<String> attributeIds);
List<ClinicalDataCount> getSampleClinicalDataCountsForBinning(StudyViewFilter studyViewFilter, List<String> attributeIds);

List<ClinicalDataCount> getPatientClinicalDataCountsForBinning(StudyViewFilter studyViewFilter, List<String> attributeIds);

List<GenomicDataCount> getGenomicDataCounts(StudyViewFilter studyViewFilter);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,15 +87,13 @@ public List<ClinicalData> getSampleClinicalData(StudyViewFilter studyViewFilter,
}

@Override
public List<ClinicalDataCount> getSampleCountWithoutClinicalData(StudyViewFilter studyViewFilter, List<String> attributeIds) {
CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter);
return studyViewRepository.getSampleCountWithoutClinicalData(studyViewFilter, attributeIds, categorizedClinicalDataCountFilter);
public List<ClinicalDataCount> getSampleClinicalDataCountsForBinning(StudyViewFilter studyViewFilter, List<String> attributeIds) {
return studyViewRepository.getSampleClinicalDataCountsForBinning(studyViewFilter, attributeIds);
}

@Override
public List<ClinicalDataCount> getPatientCountWithoutClinicalData(StudyViewFilter studyViewFilter, List<String> attributeIds) {
CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter);
return studyViewRepository.getPatientCountWithoutClinicalData(studyViewFilter, attributeIds, categorizedClinicalDataCountFilter);
public List<ClinicalDataCount> getPatientClinicalDataCountsForBinning(StudyViewFilter studyViewFilter, List<String> attributeIds) {
return studyViewRepository.getPatientClinicalDataCountsForBinning(studyViewFilter, attributeIds);
}

}
82 changes: 32 additions & 50 deletions src/main/java/org/cbioportal/web/columnar/ClinicalDataBinner.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,34 +27,24 @@ public ClinicalDataBinner(
this.dataBinner = dataBinner;
}

public Map<String, Integer> countSamplesWithNoClinicalData(
List<String> attributeIds,
StudyViewFilter studyViewFilter
) {
return studyViewColumnarService
.getSampleCountWithoutClinicalData(studyViewFilter, attributeIds)
.stream()
.collect(
Collectors.toMap(
ClinicalDataCount::getAttributeId,
ClinicalDataCount::getCount
)
);
}

public Map<String, Integer> countPatientsWithNoClinicalData(
List<String> attributeIds,
StudyViewFilter studyViewFilter
) {
return studyViewColumnarService
.getPatientCountWithoutClinicalData(studyViewFilter, attributeIds)
// TODO move this to a utility class?
public List<ClinicalData> convertCountsToData(List<ClinicalDataCount> clinicalDataCounts)
{
return clinicalDataCounts
.stream()
.collect(
Collectors.toMap(
ClinicalDataCount::getAttributeId,
ClinicalDataCount::getCount
)
);
.map(c -> {
// TODO get rid of the for loop and use something nicer and more efficient?
List<ClinicalData> data = new ArrayList<>(c.getCount());
for (int i=0; i < c.getCount(); i++) {
ClinicalData d = new ClinicalData();
d.setAttrId(c.getAttributeId());
d.setAttrValue(c.getValue());
data.add(d);
}
return data;
})
.flatMap(Collection::stream)
.toList();
}

@Cacheable(cacheResolver = "generalRepositoryCacheResolver", condition = "@cacheEnabledConfig.getEnabled()")
Expand Down Expand Up @@ -83,28 +73,26 @@ public List<ClinicalDataBin> fetchClinicalDataBinCounts(
partialFilter.setStudyIds(studyViewFilter.getStudyIds());
partialFilter.setSampleIdentifiers(studyViewFilter.getSampleIdentifiers());

// TODO we don't actually need every single data point,
// instead of fetching clinical data we can just fetch clinical data counts

// we need the clinical data for the partial filter in order to generate the bins for initial state
// we use the filtered data to calculate the counts for each bin, we do not regenerate bins for the filtered data
List<ClinicalData> unfilteredClinicalDataForSamples = studyViewColumnarService.getSampleClinicalData(partialFilter, attributeIds);
List<ClinicalData> filteredClinicalDataForSamples = studyViewColumnarService.getSampleClinicalData(studyViewFilter, attributeIds);
List<ClinicalData> unfilteredClinicalDataForPatients = studyViewColumnarService.getPatientClinicalData(partialFilter, attributeIds);
List<ClinicalData> filteredClinicalDataForPatients = studyViewColumnarService.getPatientClinicalData(studyViewFilter, attributeIds);

// TODO investigate if we can directly use studyViewColumnarService.getClinicalDataCounts instead of adding new SQL
List<ClinicalDataCount> unfilteredClinicalDataCountsForSamples = studyViewColumnarService.getSampleClinicalDataCountsForBinning(partialFilter, attributeIds);
List<ClinicalDataCount> filteredClinicalDataCountsForSamples = studyViewColumnarService.getSampleClinicalDataCountsForBinning(studyViewFilter, attributeIds);
List<ClinicalDataCount> unfilteredClinicalDataCountsForPatients = studyViewColumnarService.getPatientClinicalDataCountsForBinning(partialFilter, attributeIds);
List<ClinicalDataCount> filteredClinicalDataCountsForPatients = studyViewColumnarService.getPatientClinicalDataCountsForBinning(studyViewFilter, attributeIds);

List<ClinicalData> unfilteredClinicalDataForSamples = convertCountsToData(unfilteredClinicalDataCountsForSamples);
List<ClinicalData> filteredClinicalDataForSamples = convertCountsToData(filteredClinicalDataCountsForSamples);
List<ClinicalData> unfilteredClinicalDataForPatients = convertCountsToData(unfilteredClinicalDataCountsForPatients);
List<ClinicalData> filteredClinicalDataForPatients = convertCountsToData(filteredClinicalDataCountsForPatients);

Map<String, ClinicalDataType> attributeDatatypeMap = NewClinicalDataBinUtil.toAttributeDatatypeMap(
unfilteredClinicalDataForSamples.stream().map(ClinicalData::getAttrId).collect(Collectors.toList()),
unfilteredClinicalDataForPatients.stream().map(ClinicalData::getAttrId).collect(Collectors.toList()),
unfilteredClinicalDataCountsForSamples.stream().map(ClinicalDataCount::getAttributeId).collect(Collectors.toList()),
unfilteredClinicalDataCountsForPatients.stream().map(ClinicalDataCount::getAttributeId).collect(Collectors.toList()),
Collections.emptyList() // TODO ignoring conflictingPatientAttributeIds for now
);

// Map<attributeId, number of samples/patients without clinical data>
Map<String, Integer> unfilteredSamplesCountWithoutClinicalData = countSamplesWithNoClinicalData(attributeIds, partialFilter);
Map<String, Integer> filteredSamplesCountWithoutClinicalData = countSamplesWithNoClinicalData(attributeIds, studyViewFilter);
Map<String, Integer> unfilteredPatientsCountWithoutClinicalData = countPatientsWithNoClinicalData(attributeIds, partialFilter);
Map<String, Integer> filteredPatientsCountWithoutClinicalData = countPatientsWithNoClinicalData(attributeIds, studyViewFilter);

List<Binnable> unfilteredClinicalData = Stream.of(
unfilteredClinicalDataForSamples,
unfilteredClinicalDataForPatients
Expand Down Expand Up @@ -132,11 +120,7 @@ public List<ClinicalDataBin> fetchClinicalDataBinCounts(
attributes,
attributeDatatypeMap,
unfilteredClinicalDataByAttributeId,
filteredClinicalDataByAttributeId,
unfilteredSamplesCountWithoutClinicalData,
unfilteredPatientsCountWithoutClinicalData,
filteredSamplesCountWithoutClinicalData,
filteredPatientsCountWithoutClinicalData
filteredClinicalDataByAttributeId
);
}
}
Expand All @@ -149,9 +133,7 @@ public List<ClinicalDataBin> fetchClinicalDataBinCounts(
dataBinner,
attributes,
attributeDatatypeMap,
filteredClinicalDataByAttributeId,
filteredSamplesCountWithoutClinicalData,
filteredPatientsCountWithoutClinicalData
filteredClinicalDataByAttributeId
);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,31 +69,17 @@ public static List<ClinicalDataBin> calculateStaticDataBins(
List<ClinicalDataBinFilter> attributes,
Map<String, ClinicalDataType> attributeDatatypeMap,
Map<String, List<Binnable>> unfilteredClinicalDataByAttributeId,
Map<String, List<Binnable>> filteredClinicalDataByAttributeId,
Map<String, Integer> unfilteredSamplesCountWithoutClinicalData,
Map<String, Integer> unfilteredPatientsCountWithoutClinicalData,
Map<String, Integer> filteredSamplesCountWithoutClinicalData,
Map<String, Integer> filteredPatientsCountWithoutClinicalData
Map<String, List<Binnable>> filteredClinicalDataByAttributeId
) {
List<ClinicalDataBin> clinicalDataBins = new ArrayList<>();

for (ClinicalDataBinFilter attribute : attributes) {
if (attributeDatatypeMap.containsKey(attribute.getAttributeId())) {
ClinicalDataType clinicalDataType = attributeDatatypeMap.get(attribute.getAttributeId());
Integer numberOfFilteredCasesWithoutClinicalData = clinicalDataType == ClinicalDataType.PATIENT
? filteredPatientsCountWithoutClinicalData.getOrDefault(attribute.getAttributeId(), 0)
: filteredSamplesCountWithoutClinicalData.getOrDefault(attribute.getAttributeId(), 0);
Integer numberOfUnfilteredCasesWithoutClinicalData = clinicalDataType == ClinicalDataType.PATIENT
? unfilteredPatientsCountWithoutClinicalData.getOrDefault(attribute.getAttributeId(), 0)
: unfilteredSamplesCountWithoutClinicalData.getOrDefault(attribute.getAttributeId(), 0);

List<ClinicalDataBin> dataBins = dataBinner
.calculateClinicalDataBins(
attribute,
filteredClinicalDataByAttributeId.getOrDefault(attribute.getAttributeId(), emptyList()),
unfilteredClinicalDataByAttributeId.getOrDefault(attribute.getAttributeId(), emptyList()),
numberOfFilteredCasesWithoutClinicalData,
numberOfUnfilteredCasesWithoutClinicalData
unfilteredClinicalDataByAttributeId.getOrDefault(attribute.getAttributeId(), emptyList())
)
.stream()
.map(dataBin -> dataBinToClinicalDataBin(attribute, dataBin))
Expand All @@ -110,26 +96,18 @@ public static List<ClinicalDataBin> calculateDynamicDataBins(
DataBinner dataBinner,
List<ClinicalDataBinFilter> attributes,
Map<String, ClinicalDataType> attributeDatatypeMap,
Map<String, List<Binnable>> filteredClinicalDataByAttributeId,
Map<String, Integer> filteredSamplesCountWithoutClinicalData,
Map<String, Integer> filteredPatientsCountWithoutClinicalData
Map<String, List<Binnable>> filteredClinicalDataByAttributeId
) {
List<ClinicalDataBin> clinicalDataBins = new ArrayList<>();

for (ClinicalDataBinFilter attribute : attributes) {

// if there is clinical data for requested attribute
if (attributeDatatypeMap.containsKey(attribute.getAttributeId())) {
ClinicalDataType clinicalDataType = attributeDatatypeMap.get(attribute.getAttributeId());
Integer numberOfFilteredCasesWithoutClinicalData = clinicalDataType == ClinicalDataType.PATIENT
? filteredPatientsCountWithoutClinicalData.getOrDefault(attribute.getAttributeId(), 0)
: filteredSamplesCountWithoutClinicalData.getOrDefault(attribute.getAttributeId(), 0);

List<ClinicalDataBin> dataBins = dataBinner
.calculateDataBins(
attribute,
filteredClinicalDataByAttributeId.getOrDefault(attribute.getAttributeId(), emptyList()),
numberOfFilteredCasesWithoutClinicalData.longValue()
filteredClinicalDataByAttributeId.getOrDefault(attribute.getAttributeId(), emptyList())
)
.stream()
.map(dataBin -> dataBinToClinicalDataBin(attribute, dataBin))
Expand Down
Loading

0 comments on commit de88ff7

Please sign in to comment.