Skip to content

Commit

Permalink
Merge molecular data with the same stable id only
Browse files Browse the repository at this point in the history
  • Loading branch information
forus committed Dec 17, 2024
1 parent 2acbb73 commit b45d066
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 81 deletions.
16 changes: 10 additions & 6 deletions src/main/java/org/cbioportal/file/export/MafRecordFetcher.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;

@Component
public class MafRecordFetcher {
Expand All @@ -23,16 +24,19 @@ public MafRecordFetcher(MolecularProfileService molecularProfileService, Mutatio
this.mutationService = mutationService;
}

public Iterator<MafRecord> fetch(Map<String, Set<String>> sampleIdsByStudyId) {
List<String> studyIds = List.copyOf(sampleIdsByStudyId.keySet());
List<String> molecularProfileStableIds = this.molecularProfileService.getMolecularProfilesInStudies(studyIds, "ID").stream()
.map(MolecularProfile::getStableId).toList();
List<String> sampleIds = List.copyOf(sampleIdsByStudyId.values().stream().flatMap(Set::stream).toList());
public Iterator<MafRecord> fetch(Map<MolecularProfile, Set<String>> molecularProfileToSamplesMap) {
List<String> molecularProfileStableIds = List.copyOf(molecularProfileToSamplesMap.keySet().stream().map(MolecularProfile::getStableId).toList());
if (molecularProfileStableIds.size() > 1) {
throw new IllegalArgumentException("Merging multiple molecular profiles with different stable Id is not supported");
}
List<String> sampleIds = List.copyOf(molecularProfileToSamplesMap.values().stream().flatMap(Set::stream).toList());
List<Integer> entrezGeneIds = List.of();
List<Mutation> mutationList = mutationService.getMutationsInMultipleMolecularProfiles(
molecularProfileStableIds, sampleIds, entrezGeneIds, "EXPORT", null, null, null, null);
Map<String, Set<String>> studyIdToSamplesMap = molecularProfileToSamplesMap.entrySet().stream()
.collect(Collectors.toMap(molecularProfile -> molecularProfile.getKey().getCancerStudyIdentifier(), Map.Entry::getValue));
return mutationList.stream()
.filter(mutation -> sampleIdsByStudyId.get(mutation.getStudyId()).contains(mutation.getSampleId()))
.filter(mutation -> studyIdToSamplesMap.get(mutation.getStudyId()).contains(mutation.getSampleId()))
.map(mutation -> new MafRecord(
mutation.getGene().getHugoGeneSymbol(),
mutation.getGene().getEntrezGeneId().toString(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,16 @@

import java.util.Optional;

public interface GenericProfileDatatypeMetadata extends GenericDatatypeMetadata {
String stableId();
Boolean showProfileInAnalysisTab();
String profileName();
String profileDescription();
Optional<String> genePanel();
public record GenericProfileDatatypeMetadata(
String stableId,
String geneticAlterationType,
String datatype,
String cancerStudyIdentifier,
String dataFilename,
String profileName,
String profileDescription,
Optional<String> genePanel,
Boolean showProfileInAnalysisTab
) implements GenericDatatypeMetadata {

}
28 changes: 0 additions & 28 deletions src/main/java/org/cbioportal/file/model/MutationMetadata.java

This file was deleted.

78 changes: 40 additions & 38 deletions src/main/java/org/cbioportal/service/impl/ExportService.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,11 @@
import org.cbioportal.file.model.ClinicalAttributeData;
import org.cbioportal.file.model.ClinicalSampleAttributesMetadata;
import org.cbioportal.file.model.MafRecord;
import org.cbioportal.file.model.MutationMetadata;
import org.cbioportal.file.model.GenericProfileDatatypeMetadata;
import org.cbioportal.model.CancerStudy;
import org.cbioportal.model.MolecularProfile;
import org.cbioportal.model.MolecularProfileCaseIdentifier;
import org.cbioportal.model.Sample;
import org.cbioportal.service.*;
import org.cbioportal.service.exception.MolecularProfileNotFoundException;
import org.cbioportal.service.util.SessionServiceRequestHandler;
import org.cbioportal.web.parameter.VirtualStudy;
import org.cbioportal.web.parameter.VirtualStudyData;
Expand All @@ -21,6 +19,7 @@
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.*;
import java.util.AbstractMap.SimpleEntry;
import java.util.stream.Collectors;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
Expand Down Expand Up @@ -79,44 +78,47 @@ public void exportStudyDataToZip(OutputStream outputStream, String studyId) thro
zipOutputStream.closeEntry();
}

//TODO what happens here with virtual studies? Do we merge the data from all studies?
List<MolecularProfileCaseIdentifier> molecularProfileCaseIdentifiers = cancerStudyInfo.studyToSampleMap.entrySet().stream().flatMap(entry -> {
List<String> studyIds = List.of(entry.getKey());
List<String> sampleIds = List.copyOf(entry.getValue());
//FIXME it does not do exactly what I though it should do
return molecularProfileService.getMolecularProfileCaseIdentifiers(studyIds, sampleIds).stream();
}).toList();
for (MolecularProfileCaseIdentifier molecularProfileCaseIdentifier : molecularProfileCaseIdentifiers) {
MolecularProfile molecularProfile = null;
try {
molecularProfile = molecularProfileService.getMolecularProfile(molecularProfileCaseIdentifier.getMolecularProfileId());
MolecularProfile.MolecularAlterationType molecularAlterationType = molecularProfile.getMolecularAlterationType();
switch (molecularAlterationType) {
case MUTATION_EXTENDED -> {
Iterator<MafRecord> mafRecordIterator = mafRecordFetcher.fetch(cancerStudyInfo.studyToSampleMap);
if (mafRecordIterator.hasNext()) {
zipOutputStream.putNextEntry(new ZipEntry("meta_mutations.txt"));
MutationMetadata mutationMetadata = new MutationMetadata(
studyId,
"data_mutations.txt",
molecularProfile.getName(),
molecularProfile.getDescription(),
//TODO where to get gene panel from?
Optional.empty()
);
new KeyValueMetadataWriter(writer).write(mutationMetadata);
zipOutputStream.closeEntry();
Map<String, List<MolecularProfile>> molecularProfilesByStableId = this.molecularProfileService.getMolecularProfilesInStudies(cancerStudyInfo.studyToSampleMap.keySet().stream().toList(), "SUMMARY").stream().collect(Collectors.groupingBy(MolecularProfile::getStableId));
for (Map.Entry<String, List<MolecularProfile>> molecularProfiles: molecularProfilesByStableId.entrySet()) {
String stableId = molecularProfiles.getKey();
List<MolecularProfile> molecularProfileList = molecularProfiles.getValue();
Map<MolecularProfile.MolecularAlterationType, String> molecularAlterationTypeToDatatype = molecularProfileList.stream()
.collect(Collectors.toMap(MolecularProfile::getMolecularAlterationType, MolecularProfile::getDatatype));
if (molecularAlterationTypeToDatatype.size() > 1) {
throw new IllegalStateException("Molecular profiles with the same stable Id ("
+ stableId + ") have different molecular alteration types and datatypes:" + molecularAlterationTypeToDatatype);
}
//TODO compose Map<MolecularProfile, Set<String> sampleIds> (all molecular profiles has to have the same stable Id)
Map<MolecularProfile, Set<String>> molecularProfileToSampleMap = molecularProfileList.stream().collect(Collectors.toMap(molecularProfile -> molecularProfile,
molecularProfile -> cancerStudyInfo.studyToSampleMap.get(molecularProfile.getCancerStudyIdentifier())));

if ("MAF".equals(molecularAlterationTypeToDatatype.get(MolecularProfile.MolecularAlterationType.MUTATION_EXTENDED))) {
Iterator<MafRecord> mafRecordIterator = mafRecordFetcher.fetch(molecularProfileToSampleMap);
if (mafRecordIterator.hasNext()) {
zipOutputStream.putNextEntry(new ZipEntry("meta_mutations.txt"));
GenericProfileDatatypeMetadata genericProfileDatatypeMetadata = new GenericProfileDatatypeMetadata(
stableId,
//TODO Use mol. alteration type and datatype from the map above instead
MolecularProfile.MolecularAlterationType.MUTATION_EXTENDED.toString(),
"MAF",
studyId,
"data_mutations.txt",
molecularProfileList.getFirst().getName(),
molecularProfileList.getFirst().getDescription(),
//TODO where to get gene panel from?
Optional.empty(),
//Is it true for all data types?
true
);
new KeyValueMetadataWriter(writer).write(genericProfileDatatypeMetadata);
zipOutputStream.closeEntry();

zipOutputStream.putNextEntry(new ZipEntry("data_mutations.txt"));
MafRecordWriter mafRecordWriter = new MafRecordWriter(writer);
mafRecordWriter.write(mafRecordIterator);
zipOutputStream.closeEntry();
}
zipOutputStream.putNextEntry(new ZipEntry("data_mutations.txt"));
MafRecordWriter mafRecordWriter = new MafRecordWriter(writer);
mafRecordWriter.write(mafRecordIterator);
zipOutputStream.closeEntry();
}
}
} catch (MolecularProfileNotFoundException e) {
throw new RuntimeException(e);
}
}
}
}
Expand Down
10 changes: 7 additions & 3 deletions src/test/java/org/cbioportal/file/export/MetadataWriterTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import org.cbioportal.file.model.CancerStudyMetadata;
import org.cbioportal.file.model.ClinicalSampleAttributesMetadata;
import org.cbioportal.file.model.MutationMetadata;
import org.cbioportal.file.model.GenericProfileDatatypeMetadata;
import org.junit.Test;

import java.io.StringWriter;
Expand Down Expand Up @@ -61,12 +61,16 @@ public void testClinicalSampleAttributesMetadataWriter() {

@Test
public void testMutationMetadataWriter() {
writer.write(new MutationMetadata(
writer.write(new GenericProfileDatatypeMetadata(
"mutations",
"MUTATION_EXTENDED",
"MAF",
"study_id1",
"data_file.txt",
"profile name",
"profile description",
Optional.of("gene_panel")
Optional.of("gene_panel"),
true
));

assertEquals("""
Expand Down

0 comments on commit b45d066

Please sign in to comment.