From 1ec37713f149c639b26d75c36f676a4044d0dc72 Mon Sep 17 00:00:00 2001 From: Ruslan Forostianov Date: Wed, 18 Dec 2024 22:38:56 +0100 Subject: [PATCH] Export case list data --- .../file/export/KeyValueMetadataWriter.java | 10 +++++ .../file/model/CaseListMetadata.java | 11 +++++ .../service/impl/ExportService.java | 43 ++++++++++++++++++- 3 files changed, 62 insertions(+), 2 deletions(-) create mode 100644 src/main/java/org/cbioportal/file/model/CaseListMetadata.java diff --git a/src/main/java/org/cbioportal/file/export/KeyValueMetadataWriter.java b/src/main/java/org/cbioportal/file/export/KeyValueMetadataWriter.java index b5899f5cf3b..a3a140e35e4 100644 --- a/src/main/java/org/cbioportal/file/export/KeyValueMetadataWriter.java +++ b/src/main/java/org/cbioportal/file/export/KeyValueMetadataWriter.java @@ -1,6 +1,7 @@ package org.cbioportal.file.export; import org.cbioportal.file.model.CancerStudyMetadata; +import org.cbioportal.file.model.CaseListMetadata; import org.cbioportal.file.model.GenericDatatypeMetadata; import org.cbioportal.file.model.GenericProfileDatatypeMetadata; @@ -67,6 +68,15 @@ public void write(GenericProfileDatatypeMetadata genericProfileDatatypeMetadata) write(metadata); } + public void write(CaseListMetadata caseListMetadata) { + LinkedHashMap metadata = new LinkedHashMap<>(); + metadata.put("cancer_study_identifier", caseListMetadata.cancerStudyIdentifier()); + metadata.put("stable_id", caseListMetadata.stableId()); + metadata.put("case_list_name", caseListMetadata.name()); + metadata.put("case_list_description", caseListMetadata.description()); + metadata.put("case_list_ids", String.join("\t", caseListMetadata.samplIds())); + write(metadata); + } private void write(LinkedHashMap metadata) { metadata.forEach((key, value) -> { try { diff --git a/src/main/java/org/cbioportal/file/model/CaseListMetadata.java b/src/main/java/org/cbioportal/file/model/CaseListMetadata.java new file mode 100644 index 00000000000..cf2c24c1a64 --- /dev/null +++ b/src/main/java/org/cbioportal/file/model/CaseListMetadata.java @@ -0,0 +1,11 @@ +package org.cbioportal.file.model; + +import java.util.SequencedSet; + +public record CaseListMetadata( + String cancerStudyIdentifier, + String stableId, + String name, + String description, + SequencedSet samplIds +) implements StudyRelatedMetadata {} \ No newline at end of file diff --git a/src/main/java/org/cbioportal/service/impl/ExportService.java b/src/main/java/org/cbioportal/service/impl/ExportService.java index 353af5ad324..cca4d101e5d 100644 --- a/src/main/java/org/cbioportal/service/impl/ExportService.java +++ b/src/main/java/org/cbioportal/service/impl/ExportService.java @@ -4,6 +4,7 @@ import org.apache.commons.lang3.tuple.Pair; import org.cbioportal.file.export.*; import org.cbioportal.file.model.CancerStudyMetadata; +import org.cbioportal.file.model.CaseListMetadata; import org.cbioportal.file.model.ClinicalAttributeData; import org.cbioportal.file.model.ClinicalSampleAttributesMetadata; import org.cbioportal.file.model.MafRecord; @@ -11,6 +12,7 @@ import org.cbioportal.model.CancerStudy; import org.cbioportal.model.MolecularProfile; import org.cbioportal.model.Sample; +import org.cbioportal.model.SampleList; import org.cbioportal.service.*; import org.cbioportal.service.util.SessionServiceRequestHandler; import org.cbioportal.web.parameter.VirtualStudy; @@ -31,19 +33,22 @@ public class ExportService { private final MolecularProfileService molecularProfileService; private final MafRecordFetcher mafRecordFetcher; private final ClinicalAttributeDataFetcher clinicalAttributeDataFetcher; + private final SampleListService sampleListService; public ExportService(StudyService studyService, SessionServiceRequestHandler sessionServiceRequestHandler, SampleService sampleService, MolecularProfileService molecularProfileService, MafRecordFetcher mafRecordFetcher, - ClinicalAttributeDataFetcher clinicalAttributeDataFetcher) { + ClinicalAttributeDataFetcher clinicalAttributeDataFetcher, + SampleListService sampleListService) { this.studyService = studyService; this.sessionServiceRequestHandler = sessionServiceRequestHandler; this.sampleService = sampleService; this.molecularProfileService = molecularProfileService; this.mafRecordFetcher = mafRecordFetcher; this.clinicalAttributeDataFetcher = clinicalAttributeDataFetcher; + this.sampleListService = sampleListService; } @@ -74,7 +79,8 @@ public void exportStudyData(FileWriterFactory fileWriterFactory, String studyId) } } - Map> molecularProfilesByStableId = this.molecularProfileService.getMolecularProfilesInStudies(cancerStudyInfo.studyToSampleMap.keySet().stream().toList(), "SUMMARY").stream() + List studyIds = cancerStudyInfo.studyToSampleMap.keySet().stream().toList(); + Map> molecularProfilesByStableId = this.molecularProfileService.getMolecularProfilesInStudies(studyIds, "SUMMARY").stream() .collect(Collectors.groupingBy(molecularProfile -> molecularProfile.getStableId().replace(molecularProfile.getCancerStudyIdentifier() + "_", ""))); for (Map.Entry> molecularProfiles : molecularProfilesByStableId.entrySet()) { String stableId = molecularProfiles.getKey(); @@ -115,6 +121,39 @@ public void exportStudyData(FileWriterFactory fileWriterFactory, String studyId) } } } + + //TODO Move logic to newly created case list fetcher + List sampleLists = sampleListService.getAllSampleListsInStudies(studyIds, "DETAILED"); + Map> sampleListsBySuffix = sampleLists.stream() + .map(sl -> { + sl.getSampleIds().retainAll(cancerStudyInfo.studyToSampleMap.get(sl.getCancerStudyIdentifier())); + return sl; + }) + .filter(sl -> !sl.getSampleIds().isEmpty()) + .collect(Collectors.groupingBy(sampleList -> sampleList.getStableId().replace(sampleList.getCancerStudyIdentifier(), ""))); + for (Map.Entry> entry: sampleListsBySuffix.entrySet()) { + String suffix = entry.getKey(); + //we skip this one as we have addGlobalCaseList=true for study + if ("_all".equals(suffix)) { + continue; + } + List suffixedSampleLists = entry.getValue(); + String newStableId = cancerStudyInfo.metadata.cancerStudyIdentifier() + suffix; + LinkedHashSet mergedSapleIds = suffixedSampleLists.stream() + .flatMap(sl -> sl.getSampleIds().stream()) + .collect(Collectors.toCollection(LinkedHashSet::new)); + try (Writer caseListWriter = fileWriterFactory.newWriter("case_lists/cases" + suffix + ".txt")) { + new KeyValueMetadataWriter(caseListWriter).write(new CaseListMetadata( + studyId, + newStableId, + //TODO Sometime name/description could contain number of samples from the original study + //maybe composing its own name and description would work better + suffixedSampleLists.getFirst().getName(), + suffixedSampleLists.getFirst().getDescription(), + mergedSapleIds + )); + } + } } record CancerStudyInfo(