From 5912b01ac5b1ff446904c0a4d1e8ebdef6f1be53 Mon Sep 17 00:00:00 2001 From: Ryan Amari Date: Thu, 24 Aug 2023 11:29:45 -0400 Subject: [PATCH 1/4] ALS-4947: Change filter logic to be AND not OR between 2 any record of filters --- .../hpds/processing/AbstractProcessor.java | 22 ++++++++----------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessor.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessor.java index e2f6c44a..d76d3d61 100644 --- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessor.java +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessor.java @@ -262,20 +262,16 @@ private void addIdSetsForRequiredFields(Query query, ArrayList> fil private void addIdSetsForAnyRecordOf(Query query, ArrayList> filteredIdSets) { if(!query.getAnyRecordOf().isEmpty()) { - Set patientsInScope = new ConcurrentSkipListSet(); - VariantBucketHolder bucketCache = new VariantBucketHolder(); - query.getAnyRecordOf().parallelStream().forEach(path->{ - if(patientsInScope.size() bucketCache = new VariantBucketHolder<>(); + filteredIdSets.addAll(query.getAnyRecordOf().parallelStream().map(path->{ + if(VariantUtils.pathIsVariantSpec(path)) { + TreeSet patientsInScope = new TreeSet<>(); + addIdSetsForVariantSpecCategoryFilters(new String[]{"0/1","1/1"}, path, patientsInScope, bucketCache); + return patientsInScope; + } else { + return new TreeSet(getCube(path).keyBasedIndex()); } - }); - filteredIdSets.add(patientsInScope); + }).collect(Collectors.toSet())); } } From 5bdb474f5280c8cdcde84893e776d0202ce40fb3 Mon Sep 17 00:00:00 2001 From: Ryan Amari Date: Fri, 25 Aug 2023 13:23:09 -0400 Subject: [PATCH 2/4] ALS-4947: Allow multiple sets of anyRecordOf filters --- .../dbmi/avillach/hpds/data/query/Query.java | 19 +++++++++- .../hpds/processing/AbstractProcessor.java | 37 +++++++++++-------- 2 files changed, 38 insertions(+), 18 deletions(-) diff --git a/client-api/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/data/query/Query.java b/client-api/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/data/query/Query.java index eb7ff4d7..3465d5ef 100644 --- a/client-api/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/data/query/Query.java +++ b/client-api/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/data/query/Query.java @@ -37,6 +37,7 @@ public Query(Query query) { private List fields = new ArrayList<>(); private List requiredFields = new ArrayList<>(); private List anyRecordOf = new ArrayList<>(); + private List> anyRecordOfMulti = new ArrayList<>(); private Map numericFilters = new HashMap<>(); private Map categoryFilters = new HashMap<>(); private List variantInfoFilters = new ArrayList<>(); @@ -62,6 +63,14 @@ public List getRequiredFields() { public List getAnyRecordOf() { return anyRecordOf; } + public List> getAnyRecordOfMulti() { + return anyRecordOfMulti; + } + public List> getAllAnyRecordOf() { + List> anyRecordOfMultiCopy = new ArrayList<>(anyRecordOfMulti); + anyRecordOfMultiCopy.add(anyRecordOf); + return anyRecordOfMultiCopy; + } public Map getNumericFilters() { return numericFilters; @@ -98,6 +107,9 @@ public void setRequiredFields(Collection requiredFields) { public void setAnyRecordOf(Collection anyRecordOf) { this.anyRecordOf = anyRecordOf != null ? new ArrayList<>(anyRecordOf) : new ArrayList<>(); } + public void setAnyRecordOfMulti(Collection> anyRecordOfMulti) { + this.anyRecordOfMulti = anyRecordOfMulti != null ? new ArrayList<>(anyRecordOfMulti) : new ArrayList<>(); + } public void setNumericFilters(Map numericFilters) { this.numericFilters = numericFilters != null ? new HashMap<>(numericFilters) : new HashMap<>(); @@ -191,7 +203,10 @@ public String toString() { writePartFormat("Numeric filters", numericFilters, builder); writePartFormat("Category filters", categoryFilters, builder); writePartFormat("Variant Info filters", variantInfoFilters, builder, false); - writePartFormat("Any-Record-Of filters", anyRecordOf, builder, true); + + List> allAnyRecordOf = new ArrayList<>(anyRecordOfMulti); + allAnyRecordOf.add(anyRecordOf); + writePartFormat("Any-Record-Of filters", allAnyRecordOf, builder, true); return builder.toString(); } @@ -234,7 +249,7 @@ private static void showTopLevelValues(Collection varList, StringBuilder builder Integer count = countMap.get(firstLevel); if(count == null) { - count = new Integer(1); + count = 1; } else { count = count + 1; } diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessor.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessor.java index d76d3d61..28c8595e 100644 --- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessor.java +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessor.java @@ -6,6 +6,7 @@ import java.util.Map.Entry; import java.util.concurrent.*; import java.util.stream.Collectors; +import java.util.stream.Stream; import java.util.zip.GZIPInputStream; import com.google.common.util.concurrent.UncheckedExecutionException; @@ -195,24 +196,27 @@ protected Set applyBooleanLogic(List> filteredIdSets) { * @return */ protected List> idSetsForEachFilter(Query query) { - ArrayList> filteredIdSets = new ArrayList>(); + final ArrayList> filteredIdSets = new ArrayList<>(); try { - addIdSetsForAnyRecordOf(query, filteredIdSets); + query.getAllAnyRecordOf().forEach(anyRecordOfFilterList -> { + addIdSetsForAnyRecordOf(anyRecordOfFilterList, filteredIdSets); + }); addIdSetsForRequiredFields(query, filteredIdSets); addIdSetsForNumericFilters(query, filteredIdSets); addIdSetsForCategoryFilters(query, filteredIdSets); } catch (InvalidCacheLoadException e) { log.warn("Invalid query supplied: " + e.getLocalizedMessage()); - filteredIdSets.add(new HashSet()); // if an invalid path is supplied, no patients should match. + filteredIdSets.add(new HashSet<>()); // if an invalid path is supplied, no patients should match. } //AND logic to make sure all patients match each filter if(filteredIdSets.size()>1) { - filteredIdSets = new ArrayList>(List.of(applyBooleanLogic(filteredIdSets))); + List> processedFilteredIdSets = new ArrayList<>(List.of(applyBooleanLogic(filteredIdSets))); + return addIdSetsForVariantInfoFilters(query, processedFilteredIdSets); + } else { + return addIdSetsForVariantInfoFilters(query, filteredIdSets); } - - return addIdSetsForVariantInfoFilters(query, filteredIdSets); } /** @@ -248,7 +252,7 @@ public TreeSet getPatientSubsetForQuery(Query query) { private void addIdSetsForRequiredFields(Query query, ArrayList> filteredIdSets) { if(!query.getRequiredFields().isEmpty()) { VariantBucketHolder bucketCache = new VariantBucketHolder<>(); - filteredIdSets.addAll(query.getRequiredFields().parallelStream().map(path->{ + filteredIdSets.addAll(query.getRequiredFields().stream().map(path->{ if(VariantUtils.pathIsVariantSpec(path)) { TreeSet patientsInScope = new TreeSet<>(); addIdSetsForVariantSpecCategoryFilters(new String[]{"0/1","1/1"}, path, patientsInScope, bucketCache); @@ -260,18 +264,19 @@ private void addIdSetsForRequiredFields(Query query, ArrayList> fil } } - private void addIdSetsForAnyRecordOf(Query query, ArrayList> filteredIdSets) { - if(!query.getAnyRecordOf().isEmpty()) { + private void addIdSetsForAnyRecordOf(List anyRecordOfFilters, ArrayList> filteredIdSets) { + if(!anyRecordOfFilters.isEmpty()) { VariantBucketHolder bucketCache = new VariantBucketHolder<>(); - filteredIdSets.addAll(query.getAnyRecordOf().parallelStream().map(path->{ - if(VariantUtils.pathIsVariantSpec(path)) { + Set anyRecordOfPatientSet = anyRecordOfFilters.stream().flatMap(path -> { + if (VariantUtils.pathIsVariantSpec(path)) { TreeSet patientsInScope = new TreeSet<>(); - addIdSetsForVariantSpecCategoryFilters(new String[]{"0/1","1/1"}, path, patientsInScope, bucketCache); - return patientsInScope; + addIdSetsForVariantSpecCategoryFilters(new String[]{"0/1", "1/1"}, path, patientsInScope, bucketCache); + return patientsInScope.stream(); } else { - return new TreeSet(getCube(path).keyBasedIndex()); + return (Stream) getCube(path).keyBasedIndex().stream(); } - }).collect(Collectors.toSet())); + }).collect(Collectors.toSet()); + filteredIdSets.add(anyRecordOfPatientSet); } } @@ -286,7 +291,7 @@ private void addIdSetsForNumericFilters(Query query, ArrayList> fil private void addIdSetsForCategoryFilters(Query query, ArrayList> filteredIdSets) { if(!query.getCategoryFilters().isEmpty()) { VariantBucketHolder bucketCache = new VariantBucketHolder(); - Set> idsThatMatchFilters = (Set>)query.getCategoryFilters().entrySet().parallelStream().map(entry->{ + Set> idsThatMatchFilters = (Set>)query.getCategoryFilters().entrySet().stream().map(entry->{ Set ids = new TreeSet(); if(VariantUtils.pathIsVariantSpec(entry.getKey())) { addIdSetsForVariantSpecCategoryFilters(entry.getValue(), entry.getKey(), ids, bucketCache); From b71a5afbd09def7dfe244bdde18b8f0584be7541 Mon Sep 17 00:00:00 2001 From: Ryan Amari Date: Mon, 28 Aug 2023 11:57:18 -0400 Subject: [PATCH 3/4] ALS-4947: Minor cleanup --- .../avillach/hpds/processing/AbstractProcessor.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessor.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessor.java index 28c8595e..e67d24cb 100644 --- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessor.java +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessor.java @@ -252,7 +252,7 @@ public TreeSet getPatientSubsetForQuery(Query query) { private void addIdSetsForRequiredFields(Query query, ArrayList> filteredIdSets) { if(!query.getRequiredFields().isEmpty()) { VariantBucketHolder bucketCache = new VariantBucketHolder<>(); - filteredIdSets.addAll(query.getRequiredFields().stream().map(path->{ + filteredIdSets.addAll(query.getRequiredFields().parallelStream().map(path->{ if(VariantUtils.pathIsVariantSpec(path)) { TreeSet patientsInScope = new TreeSet<>(); addIdSetsForVariantSpecCategoryFilters(new String[]{"0/1","1/1"}, path, patientsInScope, bucketCache); @@ -267,7 +267,7 @@ private void addIdSetsForRequiredFields(Query query, ArrayList> fil private void addIdSetsForAnyRecordOf(List anyRecordOfFilters, ArrayList> filteredIdSets) { if(!anyRecordOfFilters.isEmpty()) { VariantBucketHolder bucketCache = new VariantBucketHolder<>(); - Set anyRecordOfPatientSet = anyRecordOfFilters.stream().flatMap(path -> { + Set anyRecordOfPatientSet = anyRecordOfFilters.parallelStream().flatMap(path -> { if (VariantUtils.pathIsVariantSpec(path)) { TreeSet patientsInScope = new TreeSet<>(); addIdSetsForVariantSpecCategoryFilters(new String[]{"0/1", "1/1"}, path, patientsInScope, bucketCache); @@ -290,9 +290,9 @@ private void addIdSetsForNumericFilters(Query query, ArrayList> fil private void addIdSetsForCategoryFilters(Query query, ArrayList> filteredIdSets) { if(!query.getCategoryFilters().isEmpty()) { - VariantBucketHolder bucketCache = new VariantBucketHolder(); - Set> idsThatMatchFilters = (Set>)query.getCategoryFilters().entrySet().stream().map(entry->{ - Set ids = new TreeSet(); + VariantBucketHolder bucketCache = new VariantBucketHolder<>(); + Set> idsThatMatchFilters = query.getCategoryFilters().entrySet().parallelStream().map(entry->{ + Set ids = new TreeSet<>(); if(VariantUtils.pathIsVariantSpec(entry.getKey())) { addIdSetsForVariantSpecCategoryFilters(entry.getValue(), entry.getKey(), ids, bucketCache); } else { From ff5f7621aff5971864540ca45b487b74eec719ee Mon Sep 17 00:00:00 2001 From: Ryan Amari Date: Mon, 28 Aug 2023 14:30:21 -0400 Subject: [PATCH 4/4] ALS-4947: Removed redundant merging handled by getter --- .../edu/harvard/hms/dbmi/avillach/hpds/data/query/Query.java | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/client-api/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/data/query/Query.java b/client-api/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/data/query/Query.java index 3465d5ef..8e28e74e 100644 --- a/client-api/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/data/query/Query.java +++ b/client-api/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/data/query/Query.java @@ -203,10 +203,7 @@ public String toString() { writePartFormat("Numeric filters", numericFilters, builder); writePartFormat("Category filters", categoryFilters, builder); writePartFormat("Variant Info filters", variantInfoFilters, builder, false); - - List> allAnyRecordOf = new ArrayList<>(anyRecordOfMulti); - allAnyRecordOf.add(anyRecordOf); - writePartFormat("Any-Record-Of filters", allAnyRecordOf, builder, true); + writePartFormat("Any-Record-Of filters", getAllAnyRecordOf(), builder, true); return builder.toString(); }