Skip to content

Commit

Permalink
Export case list data
Browse files Browse the repository at this point in the history
forus committed Dec 18, 2024
1 parent b99b1d1 commit 1ec3771
Showing 3 changed files with 62 additions and 2 deletions.
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.cbioportal.file.export;

import org.cbioportal.file.model.CancerStudyMetadata;
import org.cbioportal.file.model.CaseListMetadata;
import org.cbioportal.file.model.GenericDatatypeMetadata;
import org.cbioportal.file.model.GenericProfileDatatypeMetadata;

@@ -67,6 +68,15 @@ public void write(GenericProfileDatatypeMetadata genericProfileDatatypeMetadata)
write(metadata);
}

public void write(CaseListMetadata caseListMetadata) {
LinkedHashMap<String, String> metadata = new LinkedHashMap<>();
metadata.put("cancer_study_identifier", caseListMetadata.cancerStudyIdentifier());
metadata.put("stable_id", caseListMetadata.stableId());
metadata.put("case_list_name", caseListMetadata.name());
metadata.put("case_list_description", caseListMetadata.description());
metadata.put("case_list_ids", String.join("\t", caseListMetadata.samplIds()));
write(metadata);
}
private void write(LinkedHashMap<String, String> metadata) {
metadata.forEach((key, value) -> {
try {
11 changes: 11 additions & 0 deletions src/main/java/org/cbioportal/file/model/CaseListMetadata.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package org.cbioportal.file.model;

import java.util.SequencedSet;

public record CaseListMetadata(
String cancerStudyIdentifier,
String stableId,
String name,
String description,
SequencedSet<String> samplIds
) implements StudyRelatedMetadata {}
43 changes: 41 additions & 2 deletions src/main/java/org/cbioportal/service/impl/ExportService.java
Original file line number Diff line number Diff line change
@@ -4,13 +4,15 @@
import org.apache.commons.lang3.tuple.Pair;
import org.cbioportal.file.export.*;
import org.cbioportal.file.model.CancerStudyMetadata;
import org.cbioportal.file.model.CaseListMetadata;
import org.cbioportal.file.model.ClinicalAttributeData;
import org.cbioportal.file.model.ClinicalSampleAttributesMetadata;
import org.cbioportal.file.model.MafRecord;
import org.cbioportal.file.model.GenericProfileDatatypeMetadata;
import org.cbioportal.model.CancerStudy;
import org.cbioportal.model.MolecularProfile;
import org.cbioportal.model.Sample;
import org.cbioportal.model.SampleList;
import org.cbioportal.service.*;
import org.cbioportal.service.util.SessionServiceRequestHandler;
import org.cbioportal.web.parameter.VirtualStudy;
@@ -31,19 +33,22 @@ public class ExportService {
private final MolecularProfileService molecularProfileService;
private final MafRecordFetcher mafRecordFetcher;
private final ClinicalAttributeDataFetcher clinicalAttributeDataFetcher;
private final SampleListService sampleListService;

public ExportService(StudyService studyService,
SessionServiceRequestHandler sessionServiceRequestHandler,
SampleService sampleService,
MolecularProfileService molecularProfileService,
MafRecordFetcher mafRecordFetcher,
ClinicalAttributeDataFetcher clinicalAttributeDataFetcher) {
ClinicalAttributeDataFetcher clinicalAttributeDataFetcher,
SampleListService sampleListService) {
this.studyService = studyService;
this.sessionServiceRequestHandler = sessionServiceRequestHandler;
this.sampleService = sampleService;
this.molecularProfileService = molecularProfileService;
this.mafRecordFetcher = mafRecordFetcher;
this.clinicalAttributeDataFetcher = clinicalAttributeDataFetcher;
this.sampleListService = sampleListService;
}


@@ -74,7 +79,8 @@ public void exportStudyData(FileWriterFactory fileWriterFactory, String studyId)
}
}

Map<String, List<MolecularProfile>> molecularProfilesByStableId = this.molecularProfileService.getMolecularProfilesInStudies(cancerStudyInfo.studyToSampleMap.keySet().stream().toList(), "SUMMARY").stream()
List<String> studyIds = cancerStudyInfo.studyToSampleMap.keySet().stream().toList();
Map<String, List<MolecularProfile>> molecularProfilesByStableId = this.molecularProfileService.getMolecularProfilesInStudies(studyIds, "SUMMARY").stream()
.collect(Collectors.groupingBy(molecularProfile -> molecularProfile.getStableId().replace(molecularProfile.getCancerStudyIdentifier() + "_", "")));
for (Map.Entry<String, List<MolecularProfile>> molecularProfiles : molecularProfilesByStableId.entrySet()) {
String stableId = molecularProfiles.getKey();
@@ -115,6 +121,39 @@ public void exportStudyData(FileWriterFactory fileWriterFactory, String studyId)
}
}
}

//TODO Move logic to newly created case list fetcher
List<SampleList> sampleLists = sampleListService.getAllSampleListsInStudies(studyIds, "DETAILED");
Map<String, List<SampleList>> sampleListsBySuffix = sampleLists.stream()
.map(sl -> {
sl.getSampleIds().retainAll(cancerStudyInfo.studyToSampleMap.get(sl.getCancerStudyIdentifier()));
return sl;
})
.filter(sl -> !sl.getSampleIds().isEmpty())
.collect(Collectors.groupingBy(sampleList -> sampleList.getStableId().replace(sampleList.getCancerStudyIdentifier(), "")));
for (Map.Entry<String, List<SampleList>> entry: sampleListsBySuffix.entrySet()) {
String suffix = entry.getKey();
//we skip this one as we have addGlobalCaseList=true for study
if ("_all".equals(suffix)) {
continue;
}
List<SampleList> suffixedSampleLists = entry.getValue();
String newStableId = cancerStudyInfo.metadata.cancerStudyIdentifier() + suffix;
LinkedHashSet<String> mergedSapleIds = suffixedSampleLists.stream()
.flatMap(sl -> sl.getSampleIds().stream())
.collect(Collectors.toCollection(LinkedHashSet::new));
try (Writer caseListWriter = fileWriterFactory.newWriter("case_lists/cases" + suffix + ".txt")) {
new KeyValueMetadataWriter(caseListWriter).write(new CaseListMetadata(
studyId,
newStableId,
//TODO Sometime name/description could contain number of samples from the original study
//maybe composing its own name and description would work better
suffixedSampleLists.getFirst().getName(),
suffixedSampleLists.getFirst().getDescription(),
mergedSapleIds
));
}
}
}

record CancerStudyInfo(

0 comments on commit 1ec3771

Please sign in to comment.