From f24c4e32a1b13bfaec84aa46ff14f0c0ec27dc68 Mon Sep 17 00:00:00 2001 From: ramari16 Date: Mon, 23 Oct 2023 15:43:55 -0400 Subject: [PATCH] [ALS-5050] Ignore invalid concept paths in anyRecordOf queries (#87) --- .../hpds/processing/AbstractProcessor.java | 7 +- .../processing/AbstractProcessorTest.java | 80 ++++++++++++++++++- 2 files changed, 83 insertions(+), 4 deletions(-) diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessor.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessor.java index e67d24cb..0a196f5b 100644 --- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessor.java +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessor.java @@ -273,7 +273,12 @@ private void addIdSetsForAnyRecordOf(List anyRecordOfFilters, ArrayList< addIdSetsForVariantSpecCategoryFilters(new String[]{"0/1", "1/1"}, path, patientsInScope, bucketCache); return patientsInScope.stream(); } else { - return (Stream) getCube(path).keyBasedIndex().stream(); + try { + return (Stream) getCube(path).keyBasedIndex().stream(); + } catch (InvalidCacheLoadException e) { + // return an empty stream if this concept doesn't exist + return Stream.empty(); + } } }).collect(Collectors.toSet()); filteredIdSets.add(anyRecordOfPatientSet); diff --git a/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessorTest.java b/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessorTest.java index e22bea5e..47153ee3 100644 --- a/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessorTest.java +++ b/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessorTest.java @@ -1,7 +1,10 @@ package edu.harvard.hms.dbmi.avillach.hpds.processing; +import com.google.common.cache.CacheLoader; +import com.google.common.cache.LoadingCache; import edu.harvard.hms.dbmi.avillach.hpds.data.genotype.FileBackedByteIndexedInfoStore; +import edu.harvard.hms.dbmi.avillach.hpds.data.phenotype.PhenoCube; import edu.harvard.hms.dbmi.avillach.hpds.data.query.Query; import edu.harvard.hms.dbmi.avillach.hpds.storage.FileBackedByteIndexedStorage; import org.junit.Before; @@ -12,6 +15,7 @@ import org.mockito.junit.MockitoJUnitRunner; import java.util.*; +import java.util.concurrent.ExecutionException; import static org.mockito.ArgumentMatchers.any; import static org.junit.Assert.*; @@ -33,6 +37,9 @@ public class AbstractProcessorTest { @Mock private PatientVariantJoinHandler patientVariantJoinHandler; + @Mock + private LoadingCache> mockLoadingCache; + public static final String GENE_WITH_VARIANT_KEY = "Gene_with_variant"; private static final String VARIANT_SEVERITY_KEY = "Variant_severity"; public static final List EXAMPLE_GENES_WITH_VARIANT = List.of("CDH8", "CDH9", "CDH10"); @@ -61,7 +68,7 @@ public void setup() { new TreeMap<>(), new TreeSet<>() ), - null, + mockLoadingCache, infoStores, null, variantService, @@ -125,20 +132,87 @@ public void getPatientSubsetForQuery_twoVariantCategoryFilters_intersectFilters( when(patientVariantJoinHandler.getPatientIdsForIntersectionOfVariantSets(any(), argumentCaptor.capture())).thenReturn(List.of(Set.of(42))); Map categoryVariantInfoFilters = Map.of( - GENE_WITH_VARIANT_KEY, new String[] {EXAMPLE_GENES_WITH_VARIANT.get(0)}, - VARIANT_SEVERITY_KEY, new String[] {EXAMPLE_VARIANT_SEVERITIES.get(0)} + GENE_WITH_VARIANT_KEY, new String[] {EXAMPLE_GENES_WITH_VARIANT.get(0)}, + VARIANT_SEVERITY_KEY, new String[] {EXAMPLE_VARIANT_SEVERITIES.get(0)} + ); + Query.VariantInfoFilter variantInfoFilter = new Query.VariantInfoFilter(); + variantInfoFilter.categoryVariantInfoFilters = categoryVariantInfoFilters; + + List variantInfoFilters = List.of(variantInfoFilter); + + Query query = new Query(); + query.setVariantInfoFilters(variantInfoFilters); + + TreeSet patientSubsetForQuery = abstractProcessor.getPatientSubsetForQuery(query); + assertFalse(patientSubsetForQuery.isEmpty()); + // Expected result is the intersection of the two filters + assertEquals(argumentCaptor.getValue(), new SparseVariantIndex(Set.of(4, 6))); + } + + @Test + public void getPatientSubsetForQuery_anyRecordOf_applyOrLogic() throws ExecutionException { + when(variantIndexCache.get(GENE_WITH_VARIANT_KEY, EXAMPLE_GENES_WITH_VARIANT.get(0))).thenReturn(new SparseVariantIndex(Set.of(2, 4, 6))); + when(variantIndexCache.get(VARIANT_SEVERITY_KEY, EXAMPLE_VARIANT_SEVERITIES.get(0))).thenReturn(new SparseVariantIndex(Set.of(4, 5, 6, 7))); + + ArgumentCaptor argumentCaptor = ArgumentCaptor.forClass(VariantIndex.class); + ArgumentCaptor>> listArgumentCaptor = ArgumentCaptor.forClass(List.class); + when(patientVariantJoinHandler.getPatientIdsForIntersectionOfVariantSets(listArgumentCaptor.capture(), argumentCaptor.capture())).thenReturn(List.of(Set.of(42))); + + Map categoryVariantInfoFilters = Map.of( + GENE_WITH_VARIANT_KEY, new String[] {EXAMPLE_GENES_WITH_VARIANT.get(0)}, + VARIANT_SEVERITY_KEY, new String[] {EXAMPLE_VARIANT_SEVERITIES.get(0)} ); Query.VariantInfoFilter variantInfoFilter = new Query.VariantInfoFilter(); variantInfoFilter.categoryVariantInfoFilters = categoryVariantInfoFilters; List variantInfoFilters = List.of(variantInfoFilter); + PhenoCube mockPhenoCube = mock(PhenoCube.class); + when(mockPhenoCube.keyBasedIndex()).thenReturn(List.of(42, 101)); + when(mockLoadingCache.get("good concept")).thenReturn(mockPhenoCube); + when(mockLoadingCache.get("bad concept")).thenThrow(CacheLoader.InvalidCacheLoadException.class); + + Query query = new Query(); + query.setVariantInfoFilters(variantInfoFilters); + query.setAnyRecordOf(List.of("good concept", "bad concept")); + + TreeSet patientSubsetForQuery = abstractProcessor.getPatientSubsetForQuery(query); + assertFalse(patientSubsetForQuery.isEmpty()); + // Expected result is the intersection of the two filters + assertEquals(argumentCaptor.getValue(), new SparseVariantIndex(Set.of(4, 6))); + assertEquals(listArgumentCaptor.getValue().get(0), Set.of(42, 101)); + } + + + + @Test + public void getPatientSubsetForQuery_anyRecordOfInvalidKey_returnEmpty() throws ExecutionException { + when(variantIndexCache.get(GENE_WITH_VARIANT_KEY, EXAMPLE_GENES_WITH_VARIANT.get(0))).thenReturn(new SparseVariantIndex(Set.of(2, 4, 6))); + when(variantIndexCache.get(VARIANT_SEVERITY_KEY, EXAMPLE_VARIANT_SEVERITIES.get(0))).thenReturn(new SparseVariantIndex(Set.of(4, 5, 6, 7))); + + ArgumentCaptor argumentCaptor = ArgumentCaptor.forClass(VariantIndex.class); + ArgumentCaptor>> listArgumentCaptor = ArgumentCaptor.forClass(List.class); + when(patientVariantJoinHandler.getPatientIdsForIntersectionOfVariantSets(listArgumentCaptor.capture(), argumentCaptor.capture())).thenReturn(List.of(Set.of(42))); + + Map categoryVariantInfoFilters = Map.of( + GENE_WITH_VARIANT_KEY, new String[] {EXAMPLE_GENES_WITH_VARIANT.get(0)}, + VARIANT_SEVERITY_KEY, new String[] {EXAMPLE_VARIANT_SEVERITIES.get(0)} + ); + Query.VariantInfoFilter variantInfoFilter = new Query.VariantInfoFilter(); + variantInfoFilter.categoryVariantInfoFilters = categoryVariantInfoFilters; + + List variantInfoFilters = List.of(variantInfoFilter); + + when(mockLoadingCache.get("bad concept")).thenThrow(CacheLoader.InvalidCacheLoadException.class); + Query query = new Query(); query.setVariantInfoFilters(variantInfoFilters); + query.setAnyRecordOf(List.of("bad concept")); TreeSet patientSubsetForQuery = abstractProcessor.getPatientSubsetForQuery(query); assertFalse(patientSubsetForQuery.isEmpty()); // Expected result is the intersection of the two filters assertEquals(argumentCaptor.getValue(), new SparseVariantIndex(Set.of(4, 6))); + assertEquals(listArgumentCaptor.getValue().get(0), Set.of()); } }