diff --git a/zulia-analyzer/src/main/java/io/zulia/server/field/FieldTypeUtil.java b/zulia-analyzer/src/main/java/io/zulia/server/field/FieldTypeUtil.java index c22d981e..e5235fca 100644 --- a/zulia-analyzer/src/main/java/io/zulia/server/field/FieldTypeUtil.java +++ b/zulia-analyzer/src/main/java/io/zulia/server/field/FieldTypeUtil.java @@ -26,8 +26,16 @@ public static boolean isDateFieldType(FieldConfig.FieldType fieldType) { } public static boolean isNumericOrDateFieldType(FieldConfig.FieldType fieldType) { + return isNumericFieldType(fieldType) || isDateFieldType(fieldType); + } + + public static boolean isNumericFieldType(FieldConfig.FieldType fieldType) { return isNumericIntFieldType(fieldType) || isNumericLongFieldType(fieldType) || isNumericFloatFieldType(fieldType) || isNumericDoubleFieldType( - fieldType) || isDateFieldType(fieldType); + fieldType); + } + + public static boolean isNumericFloatingPointFieldType(FieldConfig.FieldType fieldType) { + return isNumericFloatFieldType(fieldType) || isNumericDoubleFieldType(fieldType); } public static boolean isBooleanFieldType(FieldConfig.FieldType fieldType) { diff --git a/zulia-client/src/main/java/io/zulia/client/command/builder/NumericStat.java b/zulia-client/src/main/java/io/zulia/client/command/builder/NumericStat.java new file mode 100644 index 00000000..b0f8aa9d --- /dev/null +++ b/zulia-client/src/main/java/io/zulia/client/command/builder/NumericStat.java @@ -0,0 +1,17 @@ +package io.zulia.client.command.builder; + +import io.zulia.message.ZuliaQuery.StatRequest; + +public class NumericStat implements StatBuilder { + + private final StatRequest.Builder statRequestBuilder; + + public NumericStat(String numericField) { + statRequestBuilder = StatRequest.newBuilder().setNumericField(numericField); + } + + @Override + public StatRequest getStatRequest() { + return statRequestBuilder.build(); + } +} diff --git a/zulia-client/src/main/java/io/zulia/client/command/builder/Search.java b/zulia-client/src/main/java/io/zulia/client/command/builder/Search.java index 6a94a5c5..7713905e 100644 --- a/zulia-client/src/main/java/io/zulia/client/command/builder/Search.java +++ b/zulia-client/src/main/java/io/zulia/client/command/builder/Search.java @@ -182,6 +182,16 @@ public Search clearFacetCount() { return this; } + public Search addStat(StatBuilder statBuilder) { + facetRequest.addStatRequest(statBuilder.getStatRequest()); + return this; + } + + public Search clearStat() { + facetRequest.clearStatRequest(); + return this; + } + public Search addAnalysis(AnalysisBuilder analysisBuilder) { queryRequest.addAnalysisRequest(analysisBuilder.getAnalysis()); return this; diff --git a/zulia-client/src/main/java/io/zulia/client/command/builder/StatBuilder.java b/zulia-client/src/main/java/io/zulia/client/command/builder/StatBuilder.java new file mode 100644 index 00000000..b91f9692 --- /dev/null +++ b/zulia-client/src/main/java/io/zulia/client/command/builder/StatBuilder.java @@ -0,0 +1,8 @@ +package io.zulia.client.command.builder; + +import io.zulia.message.ZuliaQuery; + +public interface StatBuilder { + + ZuliaQuery.StatRequest getStatRequest(); +} diff --git a/zulia-client/src/main/java/io/zulia/client/command/builder/StatFacet.java b/zulia-client/src/main/java/io/zulia/client/command/builder/StatFacet.java new file mode 100644 index 00000000..0c60bedc --- /dev/null +++ b/zulia-client/src/main/java/io/zulia/client/command/builder/StatFacet.java @@ -0,0 +1,39 @@ +package io.zulia.client.command.builder; + +import io.zulia.message.ZuliaQuery; +import io.zulia.message.ZuliaQuery.StatRequest; + +import java.util.Arrays; + +public class StatFacet implements StatBuilder { + + private final StatRequest.Builder statRequestBuilder; + + public StatFacet(String numericField, String facetField) { + statRequestBuilder = StatRequest.newBuilder().setNumericField(numericField).setFacetField(ZuliaQuery.Facet.newBuilder().setLabel(facetField).build()); + } + + public StatFacet(String numericField, String facetField, String... path) { + this(numericField, facetField, Arrays.asList(path)); + } + + public StatFacet(String numericField, String facetField, Iterable path) { + statRequestBuilder = StatRequest.newBuilder().setNumericField(numericField) + .setFacetField(ZuliaQuery.Facet.newBuilder().setLabel(facetField).addAllPath(path).build()); + } + + public StatFacet setTopN(int topN) { + statRequestBuilder.setMaxFacets(topN); + return this; + } + + public StatFacet setTopNShard(int topNShard) { + statRequestBuilder.setShardFacets(topNShard); + return this; + } + + @Override + public StatRequest getStatRequest() { + return statRequestBuilder.build(); + } +} diff --git a/zulia-client/src/main/java/io/zulia/client/result/QueryResult.java b/zulia-client/src/main/java/io/zulia/client/result/QueryResult.java index 1bc2ca51..2a7bed7c 100644 --- a/zulia-client/src/main/java/io/zulia/client/result/QueryResult.java +++ b/zulia-client/src/main/java/io/zulia/client/result/QueryResult.java @@ -5,8 +5,10 @@ import io.zulia.message.ZuliaQuery.AnalysisResult; import io.zulia.message.ZuliaQuery.FacetCount; import io.zulia.message.ZuliaQuery.FacetGroup; +import io.zulia.message.ZuliaQuery.FacetStats; import io.zulia.message.ZuliaQuery.LastResult; import io.zulia.message.ZuliaQuery.ScoredResult; +import io.zulia.message.ZuliaQuery.StatGroup; import io.zulia.message.ZuliaServiceOuterClass.QueryResponse; import io.zulia.util.ResultHelper; import io.zulia.util.ZuliaUtil; @@ -132,6 +134,44 @@ public int getFacetGroupCount() { return queryResponse.getFacetGroupCount(); } + public List getStatGroups() { + return queryResponse.getStatGroupList(); + } + + public FacetStats getNumericFieldStat(String numericFieldName) { + for (StatGroup sg : queryResponse.getStatGroupList()) { + if (numericFieldName.equals(sg.getStatRequest().getNumericField()) && sg.getStatRequest().getFacetField().getLabel().isEmpty()) { + return sg.getGlobalStats(); + } + } + return null; + } + + public List getFacetFieldStat(String numericFieldName, String facetField) { + if (facetField == null) { + facetField = ""; + } + for (StatGroup sg : queryResponse.getStatGroupList()) { + if (numericFieldName.equals(sg.getStatRequest().getNumericField()) && facetField.equals(sg.getStatRequest().getFacetField().getLabel())) { + return sg.getFacetStatsList(); + } + } + return null; + } + + public List getFacetFieldStat(String numericFieldName, String facetField, List paths) { + if (facetField == null) { + facetField = ""; + } + for (StatGroup sg : queryResponse.getStatGroupList()) { + if (numericFieldName.equals(sg.getStatRequest().getNumericField()) && facetField.equals(sg.getStatRequest().getFacetField().getLabel()) + && paths.equals(sg.getStatRequest().getFacetField().getPathList())) { + return sg.getFacetStatsList(); + } + } + return null; + } + public List getSummaryAnalysisResults() { return queryResponse.getAnalysisResultList(); } diff --git a/zulia-common/src/main/proto/zulia_query.proto b/zulia-common/src/main/proto/zulia_query.proto index b0ee63c8..ceb526da 100644 --- a/zulia-common/src/main/proto/zulia_query.proto +++ b/zulia-common/src/main/proto/zulia_query.proto @@ -48,6 +48,14 @@ message Facet { message FacetRequest { repeated CountRequest countRequest = 1; repeated Facet drillDown = 2; + repeated StatRequest statRequest = 3; +} + +message StatRequest { + string numericField = 1; + Facet facetField = 2; + uint32 maxFacets = 3; // default 10, set to -1 to get all + uint32 shardFacets = 4; // defaults to maxFacets * 10, ignored for single shard indexes, set to -1 to get all } message CountRequest { @@ -56,7 +64,6 @@ message CountRequest { uint32 shardFacets = 3; // defaults to maxFacets * 10, ignored for single shard indexes, set to -1 to get all } - message FacetCount { string facet = 1; uint64 count = 2; @@ -70,6 +77,22 @@ message FacetGroup { uint64 maxValuePossibleMissing = 4; // default 0 } +message FacetStats { + string facet = 1; + SortValue min = 2; + SortValue max = 3; + SortValue sum = 4; + uint64 docCount = 5; + uint64 valueCount = 6; +} + + +message StatGroup { + StatRequest statRequest = 1; + FacetStats globalStats = 2; + repeated FacetStats facetStats = 3; +} + message SortRequest { repeated FieldSort fieldSort = 1; } @@ -197,4 +220,5 @@ message ShardQueryResponse { ScoredResult next = 5; repeated FacetGroup facetGroup = 6; repeated AnalysisResult analysisResult = 7; + repeated StatGroup statGroup = 8; } \ No newline at end of file diff --git a/zulia-common/src/main/proto/zulia_service.proto b/zulia-common/src/main/proto/zulia_service.proto index 29c4ef90..a4a1c9b3 100644 --- a/zulia-common/src/main/proto/zulia_service.proto +++ b/zulia-common/src/main/proto/zulia_service.proto @@ -76,6 +76,7 @@ message QueryResponse { LastResult lastResult = 3; repeated FacetGroup facetGroup = 4; repeated AnalysisResult analysisResult = 5; + repeated StatGroup statGroup = 6; } message InternalQueryResponse { diff --git a/zulia-server/src/main/java/io/zulia/server/index/ShardReader.java b/zulia-server/src/main/java/io/zulia/server/index/ShardReader.java index d64b8a79..4e7c8bd8 100644 --- a/zulia-server/src/main/java/io/zulia/server/index/ShardReader.java +++ b/zulia-server/src/main/java/io/zulia/server/index/ShardReader.java @@ -16,6 +16,7 @@ import io.zulia.server.field.FieldTypeUtil; import io.zulia.server.search.QueryCacheKey; import io.zulia.server.search.QueryResultCache; +import io.zulia.server.search.TaxonomyStatsHandler; import io.zulia.server.search.ZuliaQueryParser; import io.zulia.server.util.FieldAndSubFields; import io.zulia.util.ResultHelper; @@ -24,7 +25,6 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Document; import org.apache.lucene.facet.FacetResult; -import org.apache.lucene.facet.Facets; import org.apache.lucene.facet.FacetsCollector; import org.apache.lucene.facet.FacetsConfig; import org.apache.lucene.facet.LabelAndValue; @@ -121,10 +121,6 @@ public void close() throws Exception { taxoReader.close(); } - public Facets getFacets(FacetsCollector facetsCollector) throws IOException { - return new FastTaxonomyFacetCounts(taxoReader, facetsConfig, facetsCollector); - } - public int getTotalFacets() { return taxoReader.getSize(); } @@ -192,10 +188,21 @@ public ZuliaQuery.ShardQueryResponse queryShard(Query query, Map facetGroups = handleFacets(facetRequest.getCountRequestList(), facetsCollector); + shardQueryReponseBuilder.addAllFacetGroup(facetGroups); + } + if (hasStatRequests) { + List statGroups = handleStats(facetRequest.getStatRequestList(), facetsCollector); + shardQueryReponseBuilder.addAllStatGroup(statGroups); + } } else { indexSearcher.search(query, collector); @@ -262,6 +269,65 @@ public ZuliaQuery.ShardQueryResponse queryShard(Query query, Map handleStats(List statRequestList, FacetsCollector facetsCollector) throws IOException { + List statGroups = new ArrayList<>(); + + TaxonomyStatsHandler facets = new TaxonomyStatsHandler(taxoReader, facetsCollector, statRequestList, indexConfig); + + for (ZuliaQuery.StatRequest statRequest : statRequestList) { + + ZuliaQuery.StatGroup.Builder statGroupBuilder = ZuliaQuery.StatGroup.newBuilder(); + statGroupBuilder.setStatRequest(statRequest); + String label = statRequest.getFacetField().getLabel(); + if (!label.isEmpty()) { + + int numOfFacets; + if (indexConfig.getNumberOfShards() > 1) { + if (statRequest.getShardFacets() > 0) { + numOfFacets = statRequest.getShardFacets(); + } + else if (statRequest.getShardFacets() == 0) { + numOfFacets = statRequest.getMaxFacets() * 10; + } + else { + numOfFacets = getTotalFacets(); + } + } + else { + if (statRequest.getMaxFacets() > 0) { + numOfFacets = statRequest.getMaxFacets(); + } + else { + numOfFacets = getTotalFacets(); + } + } + + if (indexConfig.isHierarchicalFacet(label)) { + List topChildren = facets.getTopChildren(statRequest.getNumericField(), numOfFacets, label, + statRequest.getFacetField().getPathList().toArray(new String[0])); + statGroupBuilder.addAllFacetStats(topChildren); + } + else { + List topChildren = facets.getTopChildren(statRequest.getNumericField(), numOfFacets, label, + statRequest.getFacetField().getPathList().toArray(new String[0])); + if (topChildren != null) { + statGroupBuilder.addAllFacetStats(topChildren); + } + } + + } + else { + ZuliaQuery.FacetStats globalStats = facets.getGlobalStatsForNumericField(statRequest.getNumericField()); + statGroupBuilder.setGlobalStats(globalStats); + } + + statGroups.add(statGroupBuilder.build()); + + } + + return statGroups; + } + private List getAnalysisHandlerList(List analysisRequests) throws Exception { if (analysisRequests.isEmpty()) { return Collections.emptyList(); @@ -349,14 +415,12 @@ else if (ZuliaBase.Similarity.TF.equals(similarity)) { }; } - private void searchWithFacets(ZuliaQuery.FacetRequest facetRequest, Query q, IndexSearcher indexSearcher, TopDocsCollector collector, - ZuliaQuery.ShardQueryResponse.Builder segmentReponseBuilder) throws Exception { - FacetsCollector facetsCollector = new FacetsCollector(); - indexSearcher.search(q, MultiCollector.wrap(collector, facetsCollector)); + private List handleFacets(List countRequests, FacetsCollector facetsCollector) throws IOException { + FastTaxonomyFacetCounts facets = new FastTaxonomyFacetCounts(taxoReader, facetsConfig, facetsCollector); - Facets facets = getFacets(facetsCollector); + List facetGroups = new ArrayList<>(); - for (ZuliaQuery.CountRequest countRequest : facetRequest.getCountRequestList()) { + for (ZuliaQuery.CountRequest countRequest : countRequests) { ZuliaQuery.Facet facetField = countRequest.getFacetField(); String label = facetField.getLabel(); @@ -427,8 +491,9 @@ else if (countRequest.getShardFacets() == 0) { fg.addFacetCount(facetCountBuilder); } } - segmentReponseBuilder.addFacetGroup(fg); + facetGroups.add(fg.build()); } + return facetGroups; } private TopDocsCollector getSortingCollector(ZuliaQuery.SortRequest sortRequest, int hasMoreAmount, FieldDoc after) throws Exception { diff --git a/zulia-server/src/main/java/io/zulia/server/search/FacetCombiner.java b/zulia-server/src/main/java/io/zulia/server/search/FacetCombiner.java new file mode 100644 index 00000000..3e821cc9 --- /dev/null +++ b/zulia-server/src/main/java/io/zulia/server/search/FacetCombiner.java @@ -0,0 +1,164 @@ +package io.zulia.server.search; + +import io.zulia.message.ZuliaQuery; +import io.zulia.message.ZuliaQuery.FacetCount; +import io.zulia.message.ZuliaQuery.FacetGroup; +import org.apache.lucene.util.FixedBitSet; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.SortedSet; +import java.util.TreeSet; +import java.util.concurrent.atomic.AtomicLong; +import java.util.stream.Collectors; + +public class FacetCombiner { + + public static class FacetGroupWithShardIndex { + private final FacetGroup facetGroup; + private final int shardIndex; + + public FacetGroupWithShardIndex(FacetGroup facetGroup, int shardIndex) { + this.facetGroup = facetGroup; + this.shardIndex = shardIndex; + } + + public FacetGroup getFacetGroup() { + return facetGroup; + } + + public int getShardIndex() { + return shardIndex; + } + } + + private final List facetGroups; + private final int[] shardIndexes; + private final ZuliaQuery.CountRequest countRequest; + private final int shardReponses; + + public FacetCombiner(ZuliaQuery.CountRequest countRequest, int shardReponses) { + this.countRequest = countRequest; + this.shardReponses = shardReponses; + this.facetGroups = new ArrayList<>(shardReponses); + this.shardIndexes = new int[shardReponses]; + } + + public void handleFacetGroupForShard(FacetGroup facetGroup, int shardIndex) { + facetGroups.add(new FacetGroupWithShardIndex(facetGroup, shardIndex)); + } + + public FacetGroup getCombinedFacetGroup() { + if (facetGroups.size() == 1) { + return facetGroups.get(0).getFacetGroup(); + } + else { + + Map facetCounts = new HashMap<>(); + Map shardsReturned = new HashMap<>(); + FixedBitSet fullResults = new FixedBitSet(shardReponses); + long[] minForShard = new long[shardReponses]; + + for (FacetGroupWithShardIndex facetGroupWithShardIndex : facetGroups) { + FacetGroup fg = facetGroupWithShardIndex.getFacetGroup(); + int shardIndex = facetGroupWithShardIndex.getShardIndex(); + + for (FacetCount fc : fg.getFacetCountList()) { + String facet = fc.getFacet(); + AtomicLong facetSum = facetCounts.get(facet); + FixedBitSet shardSet = shardsReturned.get(facet); + + if (facetSum == null) { + facetSum = new AtomicLong(); + facetCounts.put(facet, facetSum); + shardSet = new FixedBitSet(shardReponses); + shardsReturned.put(facet, shardSet); + } + long count = fc.getCount(); + facetSum.addAndGet(count); + shardSet.set(shardIndex); + + minForShard[shardIndex] = count; + } + + int shardFacets = countRequest.getShardFacets(); + int facetCountCount = fg.getFacetCountCount(); + if (facetCountCount < shardFacets || (shardFacets == -1)) { + fullResults.set(shardIndex); + minForShard[shardIndex] = 0; + } + } + + FacetGroup.Builder fg = FacetGroup.newBuilder(); + fg.setCountRequest(countRequest); + + int numberOfShards = shardIndexes.length; + long maxValuePossibleMissing = 0; + for (int i = 0; i < numberOfShards; i++) { + maxValuePossibleMissing += minForShard[i]; + } + + boolean computeError = countRequest.getMaxFacets() > 0 && countRequest.getShardFacets() > 0 && numberOfShards > 1; + boolean computePossibleMissing = computeError && (maxValuePossibleMissing != 0); + + SortedSet sortedFacetResults = facetCounts.keySet().stream() + .map(facet -> new FacetCountResult(facet, facetCounts.get(facet).get())).collect(Collectors.toCollection(TreeSet::new)); + + int maxCount = countRequest.getMaxFacets(); + + long minCountReturned = 0; + + int count = 0; + for (FacetCountResult facet : sortedFacetResults) { + + FixedBitSet shardCount = shardsReturned.get(facet.getFacet()); + shardCount.or(fullResults); + + FacetCount.Builder facetCountBuilder = FacetCount.newBuilder().setFacet(facet.getFacet()).setCount(facet.getCount()); + + long maxWithError = 0; + if (computeError) { + long maxError = 0; + if (shardCount.cardinality() < numberOfShards) { + for (int i = 0; i < numberOfShards; i++) { + if (!shardCount.get(i)) { + maxError += minForShard[i]; + } + } + } + facetCountBuilder.setMaxError(maxError); + maxWithError = maxError + facet.getCount(); + } + + count++; + + if (maxCount > 0 && count > maxCount) { + + if (computePossibleMissing) { + if (maxWithError > maxValuePossibleMissing) { + maxValuePossibleMissing = maxWithError; + } + } + else { + break; + } + } + else { + fg.addFacetCount(facetCountBuilder); + minCountReturned = facet.getCount(); + } + } + + if (!sortedFacetResults.isEmpty()) { + if (maxValuePossibleMissing > minCountReturned) { + fg.setPossibleMissing(true); + fg.setMaxValuePossibleMissing(maxValuePossibleMissing); + } + } + + return fg.build(); + } + } +} diff --git a/zulia-server/src/main/java/io/zulia/server/search/QueryCombiner.java b/zulia-server/src/main/java/io/zulia/server/search/QueryCombiner.java index bda9e02c..10dfbae0 100644 --- a/zulia-server/src/main/java/io/zulia/server/search/QueryCombiner.java +++ b/zulia-server/src/main/java/io/zulia/server/search/QueryCombiner.java @@ -1,13 +1,11 @@ package io.zulia.server.search; -import io.zulia.ZuliaConstants; import io.zulia.message.ZuliaBase.Term; import io.zulia.message.ZuliaIndex.FieldConfig; import io.zulia.message.ZuliaQuery; import io.zulia.message.ZuliaQuery.AnalysisRequest; import io.zulia.message.ZuliaQuery.AnalysisResult; import io.zulia.message.ZuliaQuery.CountRequest; -import io.zulia.message.ZuliaQuery.FacetCount; import io.zulia.message.ZuliaQuery.FacetGroup; import io.zulia.message.ZuliaQuery.FieldSort; import io.zulia.message.ZuliaQuery.IndexShardResponse; @@ -16,15 +14,12 @@ import io.zulia.message.ZuliaQuery.ScoredResult; import io.zulia.message.ZuliaQuery.ShardQueryResponse; import io.zulia.message.ZuliaQuery.SortRequest; -import io.zulia.message.ZuliaQuery.SortValues; +import io.zulia.message.ZuliaQuery.StatRequest; import io.zulia.message.ZuliaServiceOuterClass.InternalQueryResponse; import io.zulia.message.ZuliaServiceOuterClass.QueryRequest; import io.zulia.message.ZuliaServiceOuterClass.QueryResponse; import io.zulia.server.analysis.frequency.TermFreq; -import io.zulia.server.field.FieldTypeUtil; import io.zulia.server.index.ZuliaIndex; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.FixedBitSet; import java.util.ArrayList; import java.util.Arrays; @@ -34,38 +29,28 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.SortedSet; -import java.util.TreeSet; -import java.util.concurrent.atomic.AtomicLong; import java.util.logging.Logger; -import java.util.stream.Collectors; public class QueryCombiner { + private final static Logger log = Logger.getLogger(QueryCombiner.class.getSimpleName()); + private final static Comparator scoreCompare = new ScoreCompare(); private final static Comparator reverseScoreCompare = new ReverseScoreCompare(); - private final static Logger log = Logger.getLogger(QueryCombiner.class.getSimpleName()); - private final List responses; - private final Map> indexToShardQueryResponseMap; private final List shardResponses; - private final int amount; private final int start; private final LastResult lastResult; private final List analysisRequestList; - - private boolean isShort; - private List results; - private int resultsSize; - - private SortRequest sortRequest; - + private final SortRequest sortRequest; private final Collection indexes; private final Map indexToShardCount; + private boolean isShort; + public QueryCombiner(Collection indexes, QueryRequest request, List responses) { this.indexToShardCount = new HashMap<>(); @@ -85,8 +70,7 @@ public QueryCombiner(Collection indexes, QueryRequest request, List< this.analysisRequestList = request.getAnalysisRequestList(); this.isShort = false; - this.results = Collections.emptyList(); - this.resultsSize = 0; + } private void validate() throws Exception { @@ -140,8 +124,6 @@ public QueryResponse getQueryResponse() throws Exception { validate(); - boolean sorting = (sortRequest != null && !sortRequest.getFieldSortList().isEmpty()); - long totalHits = 0; long returnedHits = 0; for (ShardQueryResponse sr : shardResponses) { @@ -152,82 +134,27 @@ public QueryResponse getQueryResponse() throws Exception { QueryResponse.Builder builder = QueryResponse.newBuilder(); builder.setTotalHits(totalHits); - resultsSize = Math.min(amount, (int) returnedHits); + int resultsSize = Math.min(amount, (int) returnedHits); - results = Collections.emptyList(); - - Map lastIndexResultMap = new HashMap<>(); - - for (String indexName : indexToShardQueryResponseMap.keySet()) { - int numberOfShards = indexToShardCount.get(indexName); - lastIndexResultMap.put(indexName, new ScoredResult[numberOfShards]); - } - - for (LastIndexResult lir : lastResult.getLastIndexResultList()) { - ScoredResult[] lastForShardArr = lastIndexResultMap.get(lir.getIndexName()); - // initialize with last results - for (ScoredResult sr : lir.getLastForShardList()) { - lastForShardArr[sr.getShard()] = sr; - } - } - - Map> facetCountsMap = new HashMap<>(); - Map> shardsReturnedMap = new HashMap<>(); - Map fullResultsMap = new HashMap<>(); - Map minForShardMap = new HashMap<>(); + Map facetCombinerMap = new HashMap<>(); + Map statCombinerMap = new HashMap<>(); Map> analysisRequestToTermMap = new HashMap<>(); int shardIndex = 0; for (ShardQueryResponse sr : shardResponses) { - for (FacetGroup fg : sr.getFacetGroupList()) { - CountRequest countRequest = fg.getCountRequest(); - Map facetCounts = facetCountsMap.get(countRequest); - Map shardsReturned = shardsReturnedMap.get(countRequest); - FixedBitSet fullResults = fullResultsMap.get(countRequest); - long[] minForShard = minForShardMap.get(countRequest); - - if (facetCounts == null) { - facetCounts = new HashMap<>(); - facetCountsMap.put(countRequest, facetCounts); - - shardsReturned = new HashMap<>(); - shardsReturnedMap.put(countRequest, shardsReturned); - - fullResults = new FixedBitSet(shardResponses.size()); - fullResultsMap.put(countRequest, fullResults); - - minForShard = new long[shardResponses.size()]; - minForShardMap.put(countRequest, minForShard); - } - - for (FacetCount fc : fg.getFacetCountList()) { - String facet = fc.getFacet(); - AtomicLong facetSum = facetCounts.get(facet); - FixedBitSet shardSet = shardsReturned.get(facet); - - if (facetSum == null) { - facetSum = new AtomicLong(); - facetCounts.put(facet, facetSum); - shardSet = new FixedBitSet(shardResponses.size()); - shardsReturned.put(facet, shardSet); - } - long count = fc.getCount(); - facetSum.addAndGet(count); - shardSet.set(shardIndex); - - minForShard[shardIndex] = count; - } + FacetCombiner facetCombiner = facetCombinerMap.computeIfAbsent(countRequest, + countRequest1 -> new FacetCombiner(countRequest, shardResponses.size())); + facetCombiner.handleFacetGroupForShard(fg, shardIndex); + } - int shardFacets = countRequest.getShardFacets(); - int facetCountCount = fg.getFacetCountCount(); - if (facetCountCount < shardFacets || (shardFacets == -1)) { - fullResults.set(shardIndex); - minForShard[shardIndex] = 0; - } + for (ZuliaQuery.StatGroup sg : sr.getStatGroupList()) { + StatRequest statRequest = sg.getStatRequest(); + StatCombiner statCombiner = statCombinerMap.computeIfAbsent(statRequest, statRequest1 -> new StatCombiner(statRequest, shardResponses.size())); + statCombiner.handleStatGroupForShard(sg, shardIndex); } for (AnalysisResult analysisResult : sr.getAnalysisResultList()) { @@ -240,19 +167,14 @@ public QueryResponse getQueryResponse() throws Exception { Map termMap = analysisRequestToTermMap.get(analysisRequest); for (Term term : analysisResult.getTermsList()) { - String key = term.getValue(); if (!termMap.containsKey(key)) { termMap.put(key, Term.newBuilder().setValue(key).setDocFreq(0).setTermFreq(0)); } Term.Builder termsBuilder = termMap.get(key); - termsBuilder.setDocFreq(termsBuilder.getDocFreq() + term.getDocFreq()); - termsBuilder.setScore(termsBuilder.getScore() + term.getScore()); - termsBuilder.setTermFreq(termsBuilder.getTermFreq() + term.getTermFreq()); - } } @@ -270,239 +192,63 @@ public QueryResponse getQueryResponse() throws Exception { } } - for (CountRequest countRequest : facetCountsMap.keySet()) { - - FacetGroup.Builder fg = FacetGroup.newBuilder(); - fg.setCountRequest(countRequest); - Map facetCounts = facetCountsMap.get(countRequest); - Map shardsReturned = shardsReturnedMap.get(countRequest); - FixedBitSet fullResults = fullResultsMap.get(countRequest); - long[] minForShard = minForShardMap.get(countRequest); - - int numberOfShards = shardResponses.size(); - long maxValuePossibleMissing = 0; - for (int i = 0; i < numberOfShards; i++) { - maxValuePossibleMissing += minForShard[i]; - } - - boolean computeError = countRequest.getMaxFacets() > 0 && countRequest.getShardFacets() > 0 && numberOfShards > 1; - boolean computePossibleMissing = computeError && (maxValuePossibleMissing != 0); - - SortedSet sortedFacetResults = facetCounts.keySet().stream() - .map(facet -> new FacetCountResult(facet, facetCounts.get(facet).get())).collect(Collectors.toCollection(TreeSet::new)); - - Integer maxCount = countRequest.getMaxFacets(); - - long minCountReturned = 0; - - int count = 0; - for (FacetCountResult facet : sortedFacetResults) { - - FixedBitSet shardCount = shardsReturned.get(facet.getFacet()); - shardCount.or(fullResults); - - FacetCount.Builder facetCountBuilder = FacetCount.newBuilder().setFacet(facet.getFacet()).setCount(facet.getCount()); - - long maxWithError = 0; - if (computeError) { - long maxError = 0; - if (shardCount.cardinality() < numberOfShards) { - for (int i = 0; i < numberOfShards; i++) { - if (!shardCount.get(i)) { - maxError += minForShard[i]; - } - } - } - facetCountBuilder.setMaxError(maxError); - maxWithError = maxError + facet.getCount(); - } - - count++; - - if (maxCount > 0 && count > maxCount) { + for (FacetCombiner facetCombiner : facetCombinerMap.values()) { + builder.addFacetGroup(facetCombiner.getCombinedFacetGroup()); + } - if (computePossibleMissing) { - if (maxWithError > maxValuePossibleMissing) { - maxValuePossibleMissing = maxWithError; - } - } - else { - break; - } - } - else { - fg.addFacetCount(facetCountBuilder); - minCountReturned = facet.getCount(); - } - } + for (StatCombiner statCombiner : statCombinerMap.values()) { + builder.addStatGroup(statCombiner.getCombinedStatGroup()); + } - if (!sortedFacetResults.isEmpty()) { - if (maxValuePossibleMissing > minCountReturned) { - fg.setPossibleMissing(true); - fg.setMaxValuePossibleMissing(maxValuePossibleMissing); - } + Map lastIndexResultMap = createLastIndexResultMapWithPreviousLastResults(); + List results; + if (shardResponses.size() > 1) { + results = mergeResults((int) returnedHits, resultsSize, lastIndexResultMap); + } + else { + ShardQueryResponse shardQueryResponse = shardResponses.get(0); + results = shardQueryResponse.getScoredResultList(); + if (!results.isEmpty()) { + lastIndexResultMap.get(shardQueryResponse.getIndexName())[shardQueryResponse.getShardNumber()] = results.get(results.size() - 1); } - - builder.addFacetGroup(fg); } - List mergedResults = new ArrayList<>((int) returnedHits); - for (ShardQueryResponse sr : shardResponses) { - mergedResults.addAll(sr.getScoredResultList()); + if (start == 0) { + builder.addAllResults(results); } - - Comparator myCompare = scoreCompare; - - if (sorting) { - final List fieldSortList = sortRequest.getFieldSortList(); - - final HashMap sortTypeMap = new HashMap<>(); - - for (FieldSort fieldSort : fieldSortList) { - String sortField = fieldSort.getSortField(); - - if (ZuliaQueryParser.rewriteLengthFields(sortField).equals(sortField)) { - - for (ZuliaIndex index : indexes) { - FieldConfig.FieldType currentSortType = sortTypeMap.get(sortField); - - FieldConfig.FieldType indexSortType = index.getSortFieldType(sortField); - if (currentSortType == null) { - sortTypeMap.put(sortField, indexSortType); - } - else { - if (!currentSortType.equals(indexSortType)) { - log.severe("Sort fields must be defined the same in all indexes searched in a single query"); - String message = - "Cannot sort on field <" + sortField + ">: found type: <" + currentSortType + "> then type: <" + indexSortType + ">"; - log.severe(message); - - throw new Exception(message); - } - } - } + else { + int i = 0; + for (ScoredResult scoredResult : results) { + if (i >= start) { + builder.addResults(scoredResult); } + i++; } + } - myCompare = (o1, o2) -> { - int compare = 0; - - int sortValueIndex = 0; - - SortValues sortValues1 = o1.getSortValues(); - SortValues sortValues2 = o2.getSortValues(); - for (FieldSort fs : fieldSortList) { - String sortField = fs.getSortField(); - - FieldConfig.FieldType sortType = sortTypeMap.get(sortField); - - if (!ZuliaQueryParser.rewriteLengthFields(sortField).equals(sortField)) { - sortType = FieldConfig.FieldType.NUMERIC_LONG; - } - - if (ZuliaConstants.SCORE_FIELD.equals(sortField)) { - if (FieldSort.Direction.DESCENDING.equals(fs.getDirection())) { - compare = scoreCompare.compare(o1, o2); - } - else { - compare = reverseScoreCompare.compare(o1, o2); - } - } - else { - ZuliaQuery.SortValue sortValue1 = sortValues1.getSortValue(sortValueIndex); - ZuliaQuery.SortValue sortValue2 = sortValues2.getSortValue(sortValueIndex); - - if (FieldTypeUtil.isNumericIntFieldType(sortType)) { - Integer a = sortValue1.getExists() ? sortValue1.getIntegerValue() : null; - Integer b = sortValue2.getExists() ? sortValue2.getIntegerValue() : null; - - if (!fs.getMissingLast()) { - compare = Comparator.nullsFirst(Integer::compareTo).compare(a, b); - } - else { - compare = Comparator.nullsLast(Integer::compareTo).compare(a, b); - } - } - else if (FieldTypeUtil.isNumericLongFieldType(sortType)) { - Long a = sortValue1.getExists() ? sortValue1.getLongValue() : null; - Long b = sortValue2.getExists() ? sortValue2.getLongValue() : null; - - if (!fs.getMissingLast()) { - compare = Comparator.nullsFirst(Long::compareTo).compare(a, b); - } - else { - compare = Comparator.nullsLast(Long::compareTo).compare(a, b); - } - } - else if (FieldTypeUtil.isDateFieldType(sortType)) { - Long a = sortValue1.getExists() ? sortValue1.getDateValue() : null; - Long b = sortValue2.getExists() ? sortValue2.getDateValue() : null; - - if (!fs.getMissingLast()) { - compare = Comparator.nullsFirst(Long::compareTo).compare(a, b); - } - else { - compare = Comparator.nullsLast(Long::compareTo).compare(a, b); - } - } - else if (FieldTypeUtil.isNumericFloatFieldType(sortType)) { - - Float a = sortValue1.getExists() ? sortValue1.getFloatValue() : null; - Float b = sortValue2.getExists() ? sortValue2.getFloatValue() : null; - - if (!fs.getMissingLast()) { - compare = Comparator.nullsFirst(Float::compareTo).compare(a, b); - } - else { - compare = Comparator.nullsLast(Float::compareTo).compare(a, b); - } - } - else if (FieldTypeUtil.isNumericDoubleFieldType(sortType)) { - - Double a = sortValue1.getExists() ? sortValue1.getDoubleValue() : null; - Double b = sortValue2.getExists() ? sortValue2.getDoubleValue() : null; - - if (!fs.getMissingLast()) { - compare = Comparator.nullsFirst(Double::compareTo).compare(a, b); - } - else { - compare = Comparator.nullsLast(Double::compareTo).compare(a, b); - } - - } - else { - String a = sortValue1.getExists() ? sortValue1.getStringValue() : null; - String b = sortValue2.getExists() ? sortValue2.getStringValue() : null; - - if (!fs.getMissingLast()) { - compare = Comparator.nullsFirst(BytesRef::compareTo) - .compare(a != null ? new BytesRef(a) : null, b != null ? new BytesRef(b) : null); - } - else { - compare = Comparator.nullsLast(BytesRef::compareTo) - .compare(a != null ? new BytesRef(a) : null, b != null ? new BytesRef(b) : null); - } - } + builder.setLastResult(createLastResult(lastIndexResultMap)); - if (FieldSort.Direction.DESCENDING.equals(fs.getDirection())) { - compare *= -1; - } - } + return builder.build(); + } - if (compare != 0) { - return compare; - } + private List mergeResults(int returnedHits, int resultsSize, Map lastIndexResultMap) throws Exception { - sortValueIndex++; + List results = Collections.emptyList(); - } + boolean sorting = (sortRequest != null && !sortRequest.getFieldSortList().isEmpty()); - return compare; - }; + List mergedResults = new ArrayList<>(returnedHits); + for (ShardQueryResponse sr : shardResponses) { + mergedResults.addAll(sr.getScoredResultList()); } if (!mergedResults.isEmpty()) { - mergedResults.sort(myCompare); + + List fieldSortList = sortRequest != null ? sortRequest.getFieldSortList() : Collections.emptyList(); + HashMap sortTypeMap = createSortTypeMap(fieldSortList); + + Comparator comparator = new ZuliaPostSortingComparator(fieldSortList, sortTypeMap); + mergedResults.sort(comparator); results = mergedResults.subList(0, resultsSize); @@ -522,7 +268,7 @@ else if (FieldTypeUtil.isNumericDoubleFieldType(sortType)) { lastForIndex = sr; } else { - if (myCompare.compare(sr, lastForIndex) > 0) { + if (comparator.compare(sr, lastForIndex) > 0) { lastForIndex = sr; } } @@ -542,13 +288,12 @@ else if (FieldTypeUtil.isNumericDoubleFieldType(sortType)) { ShardQueryResponse sr = shardResponseMap.get(shardNumber); if (sr.hasNext()) { ScoredResult next = sr.getNext(); - int compare = myCompare.compare(lastForIndex, next); + int compare = comparator.compare(lastForIndex, next); if (compare > 0) { if (sorting) { String msg = "Result set did not return the most relevant sorted documents for index <" + indexName + ">\n"; - msg += " Last for index from shard <" + lastForIndex.getShard() + "> has sort values <" + lastForIndex.getSortValues() - + ">\n"; + msg += " Last for index from shard <" + lastForIndex.getShard() + "> has sort values <" + lastForIndex.getSortValues() + ">\n"; msg += " Next for shard <" + next.getShard() + "> has sort values <" + next.getSortValues() + ">\n"; msg += " Last for shards: \n"; msg += " " + Arrays.toString(lastForShardArr) + "\n"; @@ -564,8 +309,7 @@ else if (FieldTypeUtil.isNumericDoubleFieldType(sortType)) { double diff = (Math.abs(lastForIndex.getScore() - next.getScore())); if (diff > shardTolerance) { - String msg = "Result set did not return the most relevant documents for index <" + indexName + "> with shard tolerance <" - + shardTolerance + ">\n"; + String msg = "Result set did not return the most relevant documents for index <" + indexName + "> with shard tolerance <" + shardTolerance + ">\n"; msg += " Last for index from shard <" + lastForIndex.getShard() + "> has score <" + lastForIndex.getScore() + ">\n"; msg += " Next for shard <" + next.getShard() + "> has score <" + next.getScore() + ">\n"; msg += " Last for shards: \n"; @@ -585,15 +329,43 @@ else if (FieldTypeUtil.isNumericDoubleFieldType(sortType)) { } } + return results; + } + + private HashMap createSortTypeMap(List fieldSortList) throws Exception { + HashMap sortTypeMap = new HashMap<>(); + if (!fieldSortList.isEmpty()) { + + for (FieldSort fieldSort : fieldSortList) { + String sortField = fieldSort.getSortField(); + + if (ZuliaQueryParser.rewriteLengthFields(sortField).equals(sortField)) { + + for (ZuliaIndex index : indexes) { + FieldConfig.FieldType currentSortType = sortTypeMap.get(sortField); + + FieldConfig.FieldType indexSortType = index.getSortFieldType(sortField); + if (currentSortType == null) { + sortTypeMap.put(sortField, indexSortType); + } + else { + if (!currentSortType.equals(indexSortType)) { + log.severe("Sort fields must be defined the same in all indexes searched in a single query"); + String message = + "Cannot sort on field <" + sortField + ">: found type: <" + currentSortType + "> then type: <" + indexSortType + ">"; + log.severe(message); - int i = 0; - for (ScoredResult scoredResult : results) { - if (i >= start) { - builder.addResults(scoredResult); + throw new Exception(message); + } + } + } + } } - i++; } + return sortTypeMap; + } + private LastResult createLastResult(Map lastIndexResultMap) { LastResult.Builder newLastResultBuilder = LastResult.newBuilder(); for (String indexName : lastIndexResultMap.keySet()) { ScoredResult[] lastForShardArr = lastIndexResultMap.get(indexName); @@ -601,8 +373,8 @@ else if (FieldTypeUtil.isNumericDoubleFieldType(sortType)) { List indexList = new ArrayList<>(); for (int shard = 0; shard < numberOfShards; shard++) { if (lastForShardArr[shard] != null) { - ScoredResult.Builder minimalSR = ScoredResult.newBuilder(lastForShardArr[shard]); - minimalSR = minimalSR.clearUniqueId().clearIndexName().clearResultIndex().clearTimestamp().clearResultDocument(); + ScoredResult.Builder minimalSR = ScoredResult.newBuilder(lastForShardArr[shard]).clearUniqueId().clearIndexName().clearResultIndex() + .clearTimestamp().clearResultDocument(); indexList.add(minimalSR.build()); } } @@ -611,10 +383,25 @@ else if (FieldTypeUtil.isNumericDoubleFieldType(sortType)) { newLastResultBuilder.addLastIndexResult(lastIndexResult); } } + return newLastResultBuilder.build(); + } - builder.setLastResult(newLastResultBuilder.build()); + private Map createLastIndexResultMapWithPreviousLastResults() { + Map lastIndexResultMap = new HashMap<>(); - return builder.build(); + for (String indexName : indexToShardQueryResponseMap.keySet()) { + int numberOfShards = indexToShardCount.get(indexName); + lastIndexResultMap.put(indexName, new ScoredResult[numberOfShards]); + } + + for (LastIndexResult lir : lastResult.getLastIndexResultList()) { + ScoredResult[] lastForShardArr = lastIndexResultMap.get(lir.getIndexName()); + // initialize with last results + for (ScoredResult sr : lir.getLastForShardList()) { + lastForShardArr[sr.getShard()] = sr; + } + } + return lastIndexResultMap; } public boolean isShort() { diff --git a/zulia-server/src/main/java/io/zulia/server/search/StatCombiner.java b/zulia-server/src/main/java/io/zulia/server/search/StatCombiner.java new file mode 100644 index 00000000..610b0269 --- /dev/null +++ b/zulia-server/src/main/java/io/zulia/server/search/StatCombiner.java @@ -0,0 +1,55 @@ +package io.zulia.server.search; + +import io.zulia.message.ZuliaQuery.StatGroup; +import io.zulia.message.ZuliaQuery.StatRequest; + +import java.util.ArrayList; +import java.util.List; + +public class StatCombiner { + + public static class StatGroupWithShardIndex { + private final StatGroup statGroup; + private final int shardIndex; + + public StatGroupWithShardIndex(StatGroup statGroup, int shardIndex) { + this.statGroup = statGroup; + this.shardIndex = shardIndex; + } + + public StatGroup getStatGroup() { + return statGroup; + } + + public int getShardIndex() { + return shardIndex; + } + } + + private final List statGroups; + private final int[] shardIndexes; + private final StatRequest statRequest; + private final int shardReponses; + + public StatCombiner(StatRequest statRequest, int shardReponses) { + this.statRequest = statRequest; + this.shardReponses = shardReponses; + this.statGroups = new ArrayList<>(shardReponses); + this.shardIndexes = new int[shardReponses]; + } + + public void handleStatGroupForShard(StatGroup statGroup, int shardIndex) { + statGroups.add(new StatGroupWithShardIndex(statGroup, shardIndex)); + } + + public StatGroup getCombinedStatGroup() { + + if (statGroups.size() == 1) { + return statGroups.get(0).getStatGroup(); + } + else { + //TODO support this + throw new UnsupportedOperationException("Multiple indexes or shards are not supported"); + } + } +} diff --git a/zulia-server/src/main/java/io/zulia/server/search/TaxonomyStatsHandler.java b/zulia-server/src/main/java/io/zulia/server/search/TaxonomyStatsHandler.java new file mode 100644 index 00000000..7437dc22 --- /dev/null +++ b/zulia-server/src/main/java/io/zulia/server/search/TaxonomyStatsHandler.java @@ -0,0 +1,364 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.zulia.server.search; + +import io.zulia.message.ZuliaIndex; +import io.zulia.message.ZuliaQuery; +import io.zulia.server.config.ServerIndexConfig; +import io.zulia.server.field.FieldTypeUtil; +import org.apache.lucene.facet.FacetsCollector; +import org.apache.lucene.facet.FacetsCollector.MatchingDocs; +import org.apache.lucene.facet.FacetsConfig; +import org.apache.lucene.facet.taxonomy.DocValuesOrdinalsReader; +import org.apache.lucene.facet.taxonomy.FacetLabel; +import org.apache.lucene.facet.taxonomy.OrdinalsReader; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.util.IntsRef; +import org.apache.lucene.util.NumericUtils; +import org.apache.lucene.util.PriorityQueue; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +public class TaxonomyStatsHandler { + + public static class TopStatQueue extends PriorityQueue { + + public TopStatQueue(int topN) { + super(topN); + } + + @Override + protected boolean lessThan(Stats a, Stats b) { + if (a.doubleSum < b.doubleSum || a.longSum < b.longSum) { + return true; + } + else if (a.doubleSum > b.doubleSum || a.longSum > b.longSum) { + return false; + } + else { + return a.ordinal > b.ordinal; + } + } + } + + public static class Stats { + + private int ordinal; + private long docCount; + private long valueCount; + + private double doubleSum; + private double doubleMinValue = Double.POSITIVE_INFINITY; + private double doubleMaxValue = Double.NEGATIVE_INFINITY; + + private long longSum; + private long longMinValue = Long.MAX_VALUE; + private long longMaxValue = Long.MIN_VALUE; + + public Stats(boolean floatingPoint) { + if (floatingPoint) { + longMinValue = 0; + longMaxValue = 0; + } + else { + doubleMinValue = 0; + doubleMaxValue = 0; + } + } + + public void newDoc() { + docCount++; + } + + public void newValue(double newValue) { + this.doubleSum += newValue; + if (newValue < doubleMinValue) { + doubleMinValue = newValue; + } + if (newValue > doubleMaxValue) { + doubleMaxValue = newValue; + } + this.valueCount++; + } + + public void newValue(long newValue) { + this.longSum += newValue; + if (newValue < longMinValue) { + longMinValue = newValue; + } + if (newValue > longMaxValue) { + longMaxValue = newValue; + } + this.valueCount++; + } + } + + private final OrdinalsReader ordinalsReader; + private final List fieldsList; + protected final Stats[][] fieldFacetStats; + protected final Stats[] fieldStats; + + private final TaxonomyReader taxoReader; + private int[] children; + private int[] siblings; + private final List fieldTypes; + + public TaxonomyStatsHandler(TaxonomyReader taxoReader, FacetsCollector fc, List statRequests, ServerIndexConfig serverIndexConfig) + throws IOException { + + Set numericFields = statRequests.stream().map(ZuliaQuery.StatRequest::getNumericField).collect(Collectors.toSet()); + boolean facetLevel = statRequests.stream().map(ZuliaQuery.StatRequest::getFacetField).anyMatch(s -> !s.getLabel().isEmpty()); + boolean global = statRequests.stream().map(ZuliaQuery.StatRequest::getFacetField).anyMatch(s -> s.getLabel().isEmpty()); + + fieldTypes = new ArrayList<>(); + for (String numericField : numericFields) { + ZuliaIndex.FieldConfig.FieldType fieldTypeForSortField = serverIndexConfig.getFieldTypeForSortField(numericField); + if (fieldTypeForSortField == null) { + throw new IllegalArgumentException("Numeric field <" + numericField + "> must be indexed as a SORTABLE numeric field"); + } + if (!FieldTypeUtil.isNumericFieldType(fieldTypeForSortField)) { + throw new IllegalArgumentException("Numeric field <" + numericField + "> must be indexed as a sortable NUMERIC field"); + } + fieldTypes.add(fieldTypeForSortField); + } + + this.ordinalsReader = new DocValuesOrdinalsReader(FacetsConfig.DEFAULT_INDEX_FIELD_NAME); + this.fieldsList = new ArrayList<>(numericFields); + + if (facetLevel) { + this.fieldFacetStats = new Stats[fieldsList.size()][taxoReader.getSize()]; + this.taxoReader = taxoReader; + } + else { + this.fieldFacetStats = null; + this.taxoReader = null; + } + + if (global) { + this.fieldStats = new Stats[fieldsList.size()]; + for (int i = 0; i < fieldStats.length; i++) { + this.fieldStats[i] = new Stats(FieldTypeUtil.isNumericFloatingPointFieldType(fieldTypes.get(i))); + } + } + else { + this.fieldStats = null; + } + + sumValues(fc.getMatchingDocs(), fieldsList); + } + + private void sumValues(List matchingDocs, List fieldsList) throws IOException { + + final SortedNumericDocValues[] functionValues = new SortedNumericDocValues[fieldsList.size()]; + + IntsRef scratch = new IntsRef(); + for (MatchingDocs hits : matchingDocs) { + + for (int f = 0; f < fieldsList.size(); f++) { + String field = fieldsList.get(f); + functionValues[f] = DocValues.getSortedNumeric(hits.context.reader(), field); + } + + DocIdSetIterator docs = hits.bits.iterator(); + + OrdinalsReader.OrdinalsSegmentReader ords = null; + if (fieldFacetStats != null) { + ords = ordinalsReader.getReader(hits.context); + } + + int doc; + while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + + if (ords != null) { + ords.get(doc, scratch); + } + + for (int f = 0; f < fieldsList.size(); f++) { + + SortedNumericDocValues functionValue = functionValues[f]; + ZuliaIndex.FieldConfig.FieldType fieldType = fieldTypes.get(f); + if (functionValue.advanceExact(doc)) { + if (ords != null) { + + for (int i = 0; i < scratch.length; i++) { + int ordIndex = scratch.ints[i]; + Stats stats = fieldFacetStats[f][ordIndex]; + if (stats == null) { + stats = new Stats(FieldTypeUtil.isNumericFloatingPointFieldType(fieldType)); + fieldFacetStats[f][ordIndex] = stats; + } + stats.newDoc(); + } + for (int j = 0; j < functionValue.docValueCount(); j++) { + long value = functionValue.nextValue(); + + for (int i = 0; i < scratch.length; i++) { + int ordIndex = scratch.ints[i]; + Stats stats = fieldFacetStats[f][ordIndex]; + + if (FieldTypeUtil.isNumericDoubleFieldType(fieldType)) { + stats.newValue(NumericUtils.sortableLongToDouble(value)); + } + else if (FieldTypeUtil.isNumericFloatFieldType(fieldType)) { + stats.newValue(NumericUtils.sortableIntToFloat((int) value)); + } + else if (FieldTypeUtil.isNumericLongFieldType(fieldType)) { + stats.newValue(value); + } + else if (FieldTypeUtil.isNumericIntFieldType(fieldType)) { + stats.newValue((int) value); + } + + } + } + } + if (fieldStats != null) { + docValuesForDocument(functionValue, fieldType, fieldStats[f]); + } + } + } + } + } + } + + private void docValuesForDocument(SortedNumericDocValues functionValue, ZuliaIndex.FieldConfig.FieldType fieldType, Stats stats) throws IOException { + stats.newDoc(); + for (int j = 0; j < functionValue.docValueCount(); j++) { + long value = functionValue.nextValue(); + + if (FieldTypeUtil.isNumericDoubleFieldType(fieldType)) { + stats.newValue(NumericUtils.sortableLongToDouble(value)); + } + else if (FieldTypeUtil.isNumericFloatFieldType(fieldType)) { + stats.newValue(NumericUtils.sortableIntToFloat((int) value)); + } + else if (FieldTypeUtil.isNumericLongFieldType(fieldType)) { + stats.newValue(value); + } + else if (FieldTypeUtil.isNumericIntFieldType(fieldType)) { + stats.newValue((int) value); + } + + } + } + + public ZuliaQuery.FacetStats getGlobalStatsForNumericField(String field) { + int fieldIndex = fieldsList.indexOf(field); + + if (fieldIndex == -1) { + throw new IllegalArgumentException("Field <" + field + "> was not given in constructor"); + } + + return createFacetStat(fieldStats[fieldIndex], ""); + } + + public List getTopChildren(String field, int topN, String dim, String... path) throws IOException { + int fieldIndex = fieldsList.indexOf(field); + + if (fieldIndex == -1) { + throw new IllegalArgumentException("Field <" + field + "> was not given in constructor"); + } + + Stats[] stats = fieldFacetStats[fieldIndex]; + + if (topN <= 0) { + throw new IllegalArgumentException("topN must be > 0 (got: " + topN + ")"); + } + + FacetLabel cp = new FacetLabel(dim, path); + int dimOrd = taxoReader.getOrdinal(cp); + if (dimOrd == -1) { + return null; + } + + TopStatQueue q = new TopStatQueue(Math.min(taxoReader.getSize(), topN)); + + if (children == null) { + children = taxoReader.getParallelTaxonomyArrays().children(); + } + + if (siblings == null) { + siblings = taxoReader.getParallelTaxonomyArrays().siblings(); + } + + int ord = children[dimOrd]; + + double doubleSumValues = 0; + double doubleBottomValue = 0; + + long longSumValues = 0; + long longBottomValue = 0; + + while (ord != TaxonomyReader.INVALID_ORDINAL) { + Stats stat = stats[ord]; + stat.ordinal = ord; + if (stat.doubleSum > 0) { + doubleSumValues += stat.doubleSum; + if (stat.doubleSum > doubleBottomValue) { + q.insertWithOverflow(stat); + if (q.size() == topN) { + doubleBottomValue = q.top().doubleSum; + } + } + } + else if (stat.longSum > 0) { + longSumValues += stat.longSum; + if (stat.longSum > longBottomValue) { + q.insertWithOverflow(stat); + if (q.size() == topN) { + longBottomValue = q.top().longSum; + } + } + } + + ord = siblings[ord]; + } + + if (doubleSumValues == 0 && longSumValues == 0) { + return null; + } + + ZuliaQuery.FacetStats[] facetStats = new ZuliaQuery.FacetStats[q.size()]; + for (int i = facetStats.length - 1; i >= 0; i--) { + Stats stat = q.pop(); + FacetLabel child = taxoReader.getPath(stat.ordinal); + String label = child.components[cp.length]; + facetStats[i] = createFacetStat(stat, label); + } + + return Arrays.asList(facetStats); + } + + private ZuliaQuery.FacetStats createFacetStat(Stats stat, String label) { + ZuliaQuery.SortValue sum = ZuliaQuery.SortValue.newBuilder().setLongValue(stat.longSum).setDoubleValue(stat.doubleSum).build(); + ZuliaQuery.SortValue min = ZuliaQuery.SortValue.newBuilder().setLongValue(stat.longMinValue).setDoubleValue(stat.doubleMinValue).build(); + ZuliaQuery.SortValue max = ZuliaQuery.SortValue.newBuilder().setLongValue(stat.longMaxValue).setDoubleValue(stat.doubleMaxValue).build(); + + return ZuliaQuery.FacetStats.newBuilder().setFacet(label).setDocCount(stat.docCount).setValueCount(stat.valueCount).setSum(sum).setMin(min).setMax(max) + .build(); + + } + +} diff --git a/zulia-server/src/main/java/io/zulia/server/search/ZuliaPostSortingComparator.java b/zulia-server/src/main/java/io/zulia/server/search/ZuliaPostSortingComparator.java new file mode 100644 index 00000000..341689f7 --- /dev/null +++ b/zulia-server/src/main/java/io/zulia/server/search/ZuliaPostSortingComparator.java @@ -0,0 +1,151 @@ +package io.zulia.server.search; + +import io.zulia.ZuliaConstants; +import io.zulia.message.ZuliaIndex.FieldConfig; +import io.zulia.message.ZuliaQuery; +import io.zulia.message.ZuliaQuery.FieldSort; +import io.zulia.message.ZuliaQuery.ScoredResult; +import io.zulia.message.ZuliaQuery.SortValues; +import io.zulia.server.field.FieldTypeUtil; +import org.apache.lucene.util.BytesRef; + +import java.util.Comparator; +import java.util.List; +import java.util.Map; + +public class ZuliaPostSortingComparator implements Comparator { + + private final static Comparator scoreCompare = new ScoreCompare(); + private final static Comparator reverseScoreCompare = new ReverseScoreCompare(); + + private final List fieldSortList; + private final Map sortTypeMap; + + public ZuliaPostSortingComparator(List fieldSortList, Map sortTypeMap) { + this.fieldSortList = fieldSortList; + this.sortTypeMap = sortTypeMap; + } + + @Override + public int compare(ScoredResult o1, ScoredResult o2) { + + if (fieldSortList == null || fieldSortList.isEmpty()) { + return scoreCompare.compare(o1, o2); + } + + int compare = 0; + + int sortValueIndex = 0; + + SortValues sortValues1 = o1.getSortValues(); + SortValues sortValues2 = o2.getSortValues(); + for (FieldSort fs : fieldSortList) { + String sortField = fs.getSortField(); + + FieldConfig.FieldType sortType = sortTypeMap.get(sortField); + + if (!ZuliaQueryParser.rewriteLengthFields(sortField).equals(sortField)) { + sortType = FieldConfig.FieldType.NUMERIC_LONG; + } + + if (ZuliaConstants.SCORE_FIELD.equals(sortField)) { + if (FieldSort.Direction.DESCENDING.equals(fs.getDirection())) { + compare = scoreCompare.compare(o1, o2); + } + else { + compare = reverseScoreCompare.compare(o1, o2); + } + } + else { + ZuliaQuery.SortValue sortValue1 = sortValues1.getSortValue(sortValueIndex); + ZuliaQuery.SortValue sortValue2 = sortValues2.getSortValue(sortValueIndex); + + if (FieldTypeUtil.isNumericIntFieldType(sortType)) { + Integer a = sortValue1.getExists() ? sortValue1.getIntegerValue() : null; + Integer b = sortValue2.getExists() ? sortValue2.getIntegerValue() : null; + + if (!fs.getMissingLast()) { + compare = Comparator.nullsFirst(Integer::compareTo).compare(a, b); + } + else { + compare = Comparator.nullsLast(Integer::compareTo).compare(a, b); + } + } + else if (FieldTypeUtil.isNumericLongFieldType(sortType)) { + Long a = sortValue1.getExists() ? sortValue1.getLongValue() : null; + Long b = sortValue2.getExists() ? sortValue2.getLongValue() : null; + + if (!fs.getMissingLast()) { + compare = Comparator.nullsFirst(Long::compareTo).compare(a, b); + } + else { + compare = Comparator.nullsLast(Long::compareTo).compare(a, b); + } + } + else if (FieldTypeUtil.isDateFieldType(sortType)) { + Long a = sortValue1.getExists() ? sortValue1.getDateValue() : null; + Long b = sortValue2.getExists() ? sortValue2.getDateValue() : null; + + if (!fs.getMissingLast()) { + compare = Comparator.nullsFirst(Long::compareTo).compare(a, b); + } + else { + compare = Comparator.nullsLast(Long::compareTo).compare(a, b); + } + } + else if (FieldTypeUtil.isNumericFloatFieldType(sortType)) { + + Float a = sortValue1.getExists() ? sortValue1.getFloatValue() : null; + Float b = sortValue2.getExists() ? sortValue2.getFloatValue() : null; + + if (!fs.getMissingLast()) { + compare = Comparator.nullsFirst(Float::compareTo).compare(a, b); + } + else { + compare = Comparator.nullsLast(Float::compareTo).compare(a, b); + } + } + else if (FieldTypeUtil.isNumericDoubleFieldType(sortType)) { + + Double a = sortValue1.getExists() ? sortValue1.getDoubleValue() : null; + Double b = sortValue2.getExists() ? sortValue2.getDoubleValue() : null; + + if (!fs.getMissingLast()) { + compare = Comparator.nullsFirst(Double::compareTo).compare(a, b); + } + else { + compare = Comparator.nullsLast(Double::compareTo).compare(a, b); + } + + } + else { + String a = sortValue1.getExists() ? sortValue1.getStringValue() : null; + String b = sortValue2.getExists() ? sortValue2.getStringValue() : null; + + if (!fs.getMissingLast()) { + compare = Comparator.nullsFirst(BytesRef::compareTo).compare(a != null ? new BytesRef(a) : null, b != null ? new BytesRef(b) : null); + } + else { + compare = Comparator.nullsLast(BytesRef::compareTo).compare(a != null ? new BytesRef(a) : null, b != null ? new BytesRef(b) : null); + } + } + + if (FieldSort.Direction.DESCENDING.equals(fs.getDirection())) { + compare *= -1; + } + } + + if (compare != 0) { + return compare; + } + + sortValueIndex++; + + } + + return compare; + } + + ; + +} diff --git a/zulia-server/src/test/java/io/zulia/server/test/node/HierarchicalFacetTest.java b/zulia-server/src/test/java/io/zulia/server/test/node/HierarchicalFacetTest.java index cdf11025..791bf542 100644 --- a/zulia-server/src/test/java/io/zulia/server/test/node/HierarchicalFacetTest.java +++ b/zulia-server/src/test/java/io/zulia/server/test/node/HierarchicalFacetTest.java @@ -36,8 +36,6 @@ public class HierarchicalFacetTest { private final String[] paths = new String[] { "1/2/3", "1/3/4", "3/20/13", "a/b/c", "one/two/three", "1", "2/3/blah", "4/5/1000", "a/bee/sea" }; - private final int totalRecords = COUNT_PER_PATH * paths.length; - private static ZuliaWorkPool zuliaWorkPool; @BeforeAll diff --git a/zulia-server/src/test/java/io/zulia/server/test/node/StatTest.java b/zulia-server/src/test/java/io/zulia/server/test/node/StatTest.java new file mode 100644 index 00000000..7e2d3e95 --- /dev/null +++ b/zulia-server/src/test/java/io/zulia/server/test/node/StatTest.java @@ -0,0 +1,293 @@ +package io.zulia.server.test.node; + +import io.zulia.DefaultAnalyzers; +import io.zulia.client.command.Store; +import io.zulia.client.command.builder.NumericStat; +import io.zulia.client.command.builder.Search; +import io.zulia.client.command.builder.StatFacet; +import io.zulia.client.config.ClientIndexConfig; +import io.zulia.client.pool.ZuliaWorkPool; +import io.zulia.client.result.SearchResult; +import io.zulia.doc.ResultDocBuilder; +import io.zulia.fields.FieldConfigBuilder; +import io.zulia.message.ZuliaIndex.FieldConfig.FieldType; +import io.zulia.message.ZuliaQuery.FacetStats; +import org.bson.Document; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.MethodOrderer; +import org.junit.jupiter.api.Order; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestMethodOrder; +import org.opentest4j.AssertionFailedError; + +import java.util.List; + +@TestMethodOrder(MethodOrderer.OrderAnnotation.class) +public class StatTest { + + public static final String STAT_TEST_INDEX = "stat"; + + private static ZuliaWorkPool zuliaWorkPool; + private static int repeatCount = 100; + + @BeforeAll + public static void initAll() throws Exception { + + TestHelper.createNodes(3); + + TestHelper.startNodes(); + + Thread.sleep(2000); + + zuliaWorkPool = TestHelper.createClient(); + + ClientIndexConfig indexConfig = new ClientIndexConfig(); + indexConfig.addDefaultSearchField("title"); + indexConfig.addFieldConfig(FieldConfigBuilder.create("id", FieldType.STRING).indexAs(DefaultAnalyzers.LC_KEYWORD).sort()); + indexConfig.addFieldConfig(FieldConfigBuilder.create("title", FieldType.STRING).indexAs(DefaultAnalyzers.STANDARD).sort()); + indexConfig.addFieldConfig(FieldConfigBuilder.create("pathFacet", FieldType.STRING).indexAs(DefaultAnalyzers.LC_KEYWORD).facetHierarchical().sort()); + indexConfig.addFieldConfig(FieldConfigBuilder.create("normalFacet", FieldType.STRING).indexAs(DefaultAnalyzers.LC_KEYWORD).facet().sort()); + //indexConfig.addFieldConfig(FieldConfigBuilder.create("authorCount", FieldType.NUMERIC_INT).index().sort()); + indexConfig.addFieldConfig(FieldConfigBuilder.create("rating", FieldType.NUMERIC_DOUBLE).index().sort()); + indexConfig.setIndexName(STAT_TEST_INDEX); + indexConfig.setNumberOfShards(1); + indexConfig.setShardCommitInterval(20); //force some commits + + zuliaWorkPool.createIndex(indexConfig); + } + + @Test + @Order(2) + public void index() throws Exception { + + for (int i = 0; i < repeatCount; i++) { + indexRecord(i * 5, "something special", "top1/middle/bottom1", "foo", 3, List.of(3.5, 1.0)); + indexRecord(i * 5 + 1, "something really special", "top1/middle/bottom2", "foo", 4, List.of(2.5)); + indexRecord(i * 5 + 2, "something special", "top2/middle/bottom3", "bar", 2, List.of(0.5)); + indexRecord(i * 5 + 3, "something really special", "top3/middle/bottom4", "bar", 5, List.of(3.0)); + indexRecord(i * 5 + 4, "something really special", "top3/middle/bottom4", null, 4, List.of()); + } + + } + + private void indexRecord(int id, String title, String pathFacet, String normalFacet, int authorCount, List rating) throws Exception { + + String uniqueId = "" + id; + + Document mongoDocument = new Document(); + mongoDocument.put("id", uniqueId); + mongoDocument.put("title", title); + mongoDocument.put("pathFacet", pathFacet); + mongoDocument.put("normalFacet", normalFacet); + mongoDocument.put("authorCount", authorCount); + mongoDocument.put("rating", rating); + + Store s = new Store(uniqueId, STAT_TEST_INDEX); + + ResultDocBuilder resultDocumentBuilder = ResultDocBuilder.newBuilder().setDocument(mongoDocument); + s.setResultDocument(resultDocumentBuilder); + zuliaWorkPool.store(s); + + } + + @Test + @Order(3) + public void statTest() throws Exception { + + Search search = new Search(STAT_TEST_INDEX); + search.addStat(new NumericStat("rating")); + + SearchResult searchResult = zuliaWorkPool.search(search); + + FacetStats ratingStat = searchResult.getNumericFieldStat("rating"); + + Assertions.assertEquals(0.5, ratingStat.getMin().getDoubleValue(), 0.001); + Assertions.assertEquals(3.5, ratingStat.getMax().getDoubleValue(), 0.001); + + Assertions.assertEquals(10.5 * repeatCount, ratingStat.getSum().getDoubleValue(), 0.001); + Assertions.assertEquals(4L * repeatCount, ratingStat.getDocCount()); + Assertions.assertEquals(5L * repeatCount, ratingStat.getValueCount()); + + search.clearStat(); + search.addStat(new StatFacet("rating", "normalFacet")); + searchResult = zuliaWorkPool.search(search); + + List ratingByFacet = searchResult.getFacetFieldStat("rating", "normalFacet"); + + for (FacetStats facetStats : ratingByFacet) { + if (facetStats.getFacet().equals("foo")) { + Assertions.assertEquals(1, facetStats.getMin().getDoubleValue(), 0.001); + Assertions.assertEquals(3.5, facetStats.getMax().getDoubleValue(), 0.001); + Assertions.assertEquals(7L * repeatCount, facetStats.getSum().getDoubleValue(), 0.001); + Assertions.assertEquals(2L * repeatCount, facetStats.getDocCount()); + Assertions.assertEquals(3L * repeatCount, facetStats.getValueCount()); + } + else if (facetStats.getFacet().equals("bar")) { + Assertions.assertEquals(0.5, facetStats.getMin().getDoubleValue(), 0.001); + Assertions.assertEquals(3.0, facetStats.getMax().getDoubleValue(), 0.001); + Assertions.assertEquals(3.5 * repeatCount, facetStats.getSum().getDoubleValue(), 0.001); + Assertions.assertEquals(2L * repeatCount, facetStats.getDocCount()); + Assertions.assertEquals(2L * repeatCount, facetStats.getValueCount()); + } + else { + throw new AssertionFailedError("Unexpect facet <" + facetStats.getFacet() + ">"); + } + } + + search.clearStat(); + search.addStat(new StatFacet("rating", "pathFacet")); + searchResult = zuliaWorkPool.search(search); + + List ratingByPathFacet = searchResult.getFacetFieldStat("rating", "pathFacet"); + + for (FacetStats facetStats : ratingByPathFacet) { + if (facetStats.getFacet().equals("top1")) { + Assertions.assertEquals(1, facetStats.getMin().getDoubleValue(), 0.001); + Assertions.assertEquals(3.5, facetStats.getMax().getDoubleValue(), 0.001); + Assertions.assertEquals(7L * repeatCount, facetStats.getSum().getDoubleValue(), 0.001); + Assertions.assertEquals(2L * repeatCount, facetStats.getDocCount()); + Assertions.assertEquals(3L * repeatCount, facetStats.getValueCount()); + } + else if (facetStats.getFacet().equals("top2")) { + Assertions.assertEquals(0.5, facetStats.getMin().getDoubleValue(), 0.001); + Assertions.assertEquals(0.5, facetStats.getMax().getDoubleValue(), 0.001); + Assertions.assertEquals(0.5 * repeatCount, facetStats.getSum().getDoubleValue(), 0.001); + Assertions.assertEquals(repeatCount, facetStats.getDocCount()); + Assertions.assertEquals(repeatCount, facetStats.getValueCount()); + } + else if (facetStats.getFacet().equals("top3")) { + Assertions.assertEquals(3.0, facetStats.getMin().getDoubleValue(), 0.001); + Assertions.assertEquals(3.0, facetStats.getMax().getDoubleValue(), 0.001); + Assertions.assertEquals(3.0 * repeatCount, facetStats.getSum().getDoubleValue(), 0.001); + Assertions.assertEquals(repeatCount, facetStats.getDocCount()); + Assertions.assertEquals(repeatCount, facetStats.getValueCount()); + } + else { + throw new AssertionFailedError("Unexpect facet <" + facetStats.getFacet() + ">"); + } + } + + search = new Search(STAT_TEST_INDEX); + search.addStat(new StatFacet("authorCount", "pathFacet")); + Search finalSearch = search; + Assertions.assertThrows(Exception.class, () -> zuliaWorkPool.search(finalSearch), + "Expecting: Search: Numeric field must be indexed as a SORTABLE numeric field"); + + } + + @Test + @Order(4) + public void reindex() throws Exception { + ClientIndexConfig indexConfig = new ClientIndexConfig(); + indexConfig.addDefaultSearchField("title"); + indexConfig.addFieldConfig(FieldConfigBuilder.create("id", FieldType.STRING).indexAs(DefaultAnalyzers.LC_KEYWORD).sort()); + indexConfig.addFieldConfig(FieldConfigBuilder.create("title", FieldType.STRING).indexAs(DefaultAnalyzers.STANDARD).sort()); + indexConfig.addFieldConfig(FieldConfigBuilder.create("pathFacet", FieldType.STRING).indexAs(DefaultAnalyzers.LC_KEYWORD).facetHierarchical().sort()); + indexConfig.addFieldConfig(FieldConfigBuilder.create("normalFacet", FieldType.STRING).indexAs(DefaultAnalyzers.LC_KEYWORD).facet().sort()); + indexConfig.addFieldConfig(FieldConfigBuilder.create("authorCount", FieldType.NUMERIC_INT).index().sort()); + indexConfig.addFieldConfig(FieldConfigBuilder.create("rating", FieldType.NUMERIC_DOUBLE).index().sort()); + indexConfig.setIndexName(STAT_TEST_INDEX); + indexConfig.setNumberOfShards(1); + indexConfig.setShardCommitInterval(20); //force some commits + + zuliaWorkPool.createIndex(indexConfig); + + //trigger indexing again with path2 added in the index config + index(); + + } + + @Test + @Order(5) + public void restart() throws Exception { + TestHelper.stopNodes(); + Thread.sleep(2000); + TestHelper.startNodes(); + Thread.sleep(2000); + } + + @Test + @Order(6) + public void confirm() throws Exception { + Search search = new Search(STAT_TEST_INDEX); + search.addStat(new NumericStat("authorCount")); + + SearchResult searchResult = zuliaWorkPool.search(search); + + FacetStats ratingStat = searchResult.getNumericFieldStat("authorCount"); + + Assertions.assertEquals(2, ratingStat.getMin().getLongValue()); + Assertions.assertEquals(5, ratingStat.getMax().getLongValue()); + + Assertions.assertEquals(18L * repeatCount, ratingStat.getSum().getLongValue()); + Assertions.assertEquals(5L * repeatCount, ratingStat.getDocCount()); + Assertions.assertEquals(5L * repeatCount, ratingStat.getValueCount()); + + search.clearStat(); + search.addStat(new StatFacet("authorCount", "normalFacet")); + searchResult = zuliaWorkPool.search(search); + + List ratingByFacet = searchResult.getFacetFieldStat("authorCount", "normalFacet"); + + for (FacetStats facetStats : ratingByFacet) { + if (facetStats.getFacet().equals("foo")) { + Assertions.assertEquals(3L, facetStats.getMin().getLongValue()); + Assertions.assertEquals(4L, facetStats.getMax().getLongValue()); + Assertions.assertEquals(7L * repeatCount, facetStats.getSum().getLongValue()); + Assertions.assertEquals(2L * repeatCount, facetStats.getDocCount()); + Assertions.assertEquals(2L * repeatCount, facetStats.getValueCount()); + } + else if (facetStats.getFacet().equals("bar")) { + Assertions.assertEquals(2L, facetStats.getMin().getLongValue()); + Assertions.assertEquals(5L, facetStats.getMax().getLongValue()); + Assertions.assertEquals(7L * repeatCount, facetStats.getSum().getLongValue()); + Assertions.assertEquals(2L * repeatCount, facetStats.getDocCount()); + Assertions.assertEquals(2L * repeatCount, facetStats.getValueCount()); + } + else { + throw new AssertionFailedError("Unexpect facet <" + facetStats.getFacet() + ">"); + } + } + + search.clearStat(); + search.addStat(new StatFacet("authorCount", "pathFacet")); + searchResult = zuliaWorkPool.search(search); + + List ratingByPathFacet = searchResult.getFacetFieldStat("authorCount", "pathFacet"); + + for (FacetStats facetStats : ratingByPathFacet) { + if (facetStats.getFacet().equals("top1")) { + Assertions.assertEquals(3L, facetStats.getMin().getLongValue()); + Assertions.assertEquals(4L, facetStats.getMax().getLongValue()); + Assertions.assertEquals(7L * repeatCount, facetStats.getSum().getLongValue()); + Assertions.assertEquals(2L * repeatCount, facetStats.getDocCount()); + Assertions.assertEquals(2L * repeatCount, facetStats.getValueCount()); + } + else if (facetStats.getFacet().equals("top2")) { + Assertions.assertEquals(2L, facetStats.getMin().getLongValue()); + Assertions.assertEquals(2L, facetStats.getMax().getLongValue()); + Assertions.assertEquals(2L * repeatCount, facetStats.getSum().getLongValue()); + Assertions.assertEquals(repeatCount, facetStats.getDocCount()); + Assertions.assertEquals(repeatCount, facetStats.getValueCount()); + } + else if (facetStats.getFacet().equals("top3")) { + Assertions.assertEquals(4L, facetStats.getMin().getLongValue()); + Assertions.assertEquals(5L, facetStats.getMax().getLongValue()); + Assertions.assertEquals(9L * repeatCount, facetStats.getSum().getLongValue()); + Assertions.assertEquals(2L * repeatCount, facetStats.getDocCount()); + Assertions.assertEquals(2L * repeatCount, facetStats.getValueCount()); + } + else { + throw new AssertionFailedError("Unexpect facet <" + facetStats.getFacet() + ">"); + } + } + } + + @Test + @Order(7) + public void shutdown() throws Exception { + TestHelper.stopNodes(); + zuliaWorkPool.shutdown(); + } +}