Revert "Optimize sort on long field (#48804)"

This reverts commit 79d9b36.
elastic · Nov 26, 2019 · e9ba252 · e9ba252
1 parent 5f57c7e
commit e9ba252
Show file tree

Hide file tree

Showing 12 changed files with 297 additions and 767 deletions.
diff --git a/buildSrc/src/main/groovy/org/elasticsearch/gradle/BuildPlugin.groovy b/buildSrc/src/main/groovy/org/elasticsearch/gradle/BuildPlugin.groovy
@@ -720,9 +720,6 @@ class BuildPlugin implements Plugin<Project> {
                 // TODO: remove this once ctx isn't added to update script params in 7.0
                 test.systemProperty 'es.scripting.update.ctx_in_params', 'false'
 
-                // TODO: remove this property in 8.0
-                test.systemProperty 'es.search.rewrite_sort', 'true'
-
                 // TODO: remove this once cname is prepended to transport.publish_address by default in 8.0
                 test.systemProperty 'es.transport.cname_in_publish_address', 'true'
 

diff --git a/docs/reference/search/profile.asciidoc b/docs/reference/search/profile.asciidoc
@@ -153,9 +153,16 @@ The API returns the following result:
                  "rewrite_time": 51443,
                  "collector": [
                     {
-                       "name": "SimpleTopScoreDocCollector",
-                       "reason": "search_top_hits",
-                       "time_in_nanos": "32273"
+                       "name": "CancellableCollector",
+                       "reason": "search_cancelled",
+                       "time_in_nanos": "304311",
+                       "children": [
+                         {
+                           "name": "SimpleTopScoreDocCollector",
+                           "reason": "search_top_hits",
+                           "time_in_nanos": "32273"
+                         }
+                       ]
                     }
                  ]
               }
@@ -438,9 +445,16 @@ Looking at the previous example:
 --------------------------------------------------
 "collector": [
    {
-      "name": "SimpleTopScoreDocCollector",
-      "reason": "search_top_hits",
-      "time_in_nanos": "32273"
+      "name": "CancellableCollector",
+      "reason": "search_cancelled",
+      "time_in_nanos": "304311",
+      "children": [
+        {
+          "name": "SimpleTopScoreDocCollector",
+          "reason": "search_top_hits",
+          "time_in_nanos": "32273"
+        }
+      ]
    }
 ]
 --------------------------------------------------
@@ -643,26 +657,33 @@ The API returns the following result:
                      "rewrite_time": 7208,
                      "collector": [
                         {
-                          "name": "MultiCollector",
-                          "reason": "search_multi",
-                          "time_in_nanos": 1820,
+                          "name": "CancellableCollector",
+                          "reason": "search_cancelled",
+                          "time_in_nanos": 2390,
                           "children": [
                             {
-                              "name": "FilteredCollector",
-                              "reason": "search_post_filter",
-                              "time_in_nanos": 7735,
+                              "name": "MultiCollector",
+                              "reason": "search_multi",
+                              "time_in_nanos": 1820,
                               "children": [
                                 {
-                                  "name": "SimpleTopScoreDocCollector",
-                                  "reason": "search_top_hits",
-                                  "time_in_nanos": 1328
+                                  "name": "FilteredCollector",
+                                  "reason": "search_post_filter",
+                                  "time_in_nanos": 7735,
+                                  "children": [
+                                    {
+                                      "name": "SimpleTopScoreDocCollector",
+                                      "reason": "search_top_hits",
+                                      "time_in_nanos": 1328
+                                    }
+                                  ]
+                                },
+                                {
+                                  "name": "MultiBucketCollector: [[my_scoped_agg, my_global_agg]]",
+                                  "reason": "aggregation",
+                                  "time_in_nanos": 8273
                                 }
                               ]
-                            },
-                            {
-                              "name": "MultiBucketCollector: [[my_scoped_agg, my_global_agg]]",
-                              "reason": "aggregation",
-                              "time_in_nanos": 8273
                             }
                           ]
                         }

diff --git a/server/src/main/java/org/elasticsearch/search/internal/ContextIndexSearcher.java b/server/src/main/java/org/elasticsearch/search/internal/ContextIndexSearcher.java
@@ -27,7 +27,6 @@
 import org.apache.lucene.search.CollectionStatistics;
 import org.apache.lucene.search.CollectionTerminatedException;
 import org.apache.lucene.search.Collector;
-import org.apache.lucene.search.CollectorManager;
 import org.apache.lucene.search.ConjunctionDISI;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.Explanation;
@@ -36,31 +35,24 @@
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.QueryCache;
 import org.apache.lucene.search.QueryCachingPolicy;
-import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.ScoreMode;
 import org.apache.lucene.search.Scorer;
 import org.apache.lucene.search.TermStatistics;
-import org.apache.lucene.search.TopFieldDocs;
-import org.apache.lucene.search.TotalHits;
 import org.apache.lucene.search.Weight;
 import org.apache.lucene.search.similarities.Similarity;
 import org.apache.lucene.util.BitSet;
 import org.apache.lucene.util.BitSetIterator;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.CombinedBitSet;
 import org.apache.lucene.util.SparseFixedBitSet;
-import org.elasticsearch.common.lucene.search.TopDocsAndMaxScore;
-import org.elasticsearch.search.DocValueFormat;
 import org.elasticsearch.search.dfs.AggregatedDfs;
 import org.elasticsearch.search.profile.Timer;
 import org.elasticsearch.search.profile.query.ProfileWeight;
 import org.elasticsearch.search.profile.query.QueryProfileBreakdown;
 import org.elasticsearch.search.profile.query.QueryProfiler;
 import org.elasticsearch.search.profile.query.QueryTimingType;
-import org.elasticsearch.search.query.QuerySearchResult;
 
 import java.io.IOException;
-import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 import java.util.Set;
@@ -139,86 +131,12 @@ public Weight createWeight(Query query, ScoreMode scoreMode, float boost) throws
         }
     }
 
-    private void checkCancelled() {
-        if (checkCancelled != null) {
-            checkCancelled.run();
-        }
-    }
-
-    public void search(List<LeafReaderContext> leaves, Weight weight, CollectorManager manager,
-            QuerySearchResult result, DocValueFormat[] formats, TotalHits totalHits) throws IOException {
-        final List<Collector> collectors = new ArrayList<>(leaves.size());
-        for (LeafReaderContext ctx : leaves) {
-            final Collector collector = manager.newCollector();
-            searchLeaf(ctx, weight, collector);
-            collectors.add(collector);
-        }
-        TopFieldDocs mergedTopDocs = (TopFieldDocs) manager.reduce(collectors);
-        // Lucene sets shards indexes during merging of topDocs from different collectors
-        // We need to reset shard index; ES will set shard index later during reduce stage
-        for (ScoreDoc scoreDoc : mergedTopDocs.scoreDocs) {
-            scoreDoc.shardIndex = -1;
-        }
-        if (totalHits != null) { // we have already precalculated totalHits for the whole index
-            mergedTopDocs = new TopFieldDocs(totalHits, mergedTopDocs.scoreDocs, mergedTopDocs.fields);
-        }
-        result.topDocs(new TopDocsAndMaxScore(mergedTopDocs, Float.NaN), formats);
-    }
-
     @Override
     protected void search(List<LeafReaderContext> leaves, Weight weight, Collector collector) throws IOException {
-        for (LeafReaderContext ctx : leaves) { // search each subreader
-            searchLeaf(ctx, weight, collector);
-        }
-    }
-
-    /**
-     * Lower-level search API.
-     *
-     * {@link LeafCollector#collect(int)} is called for every matching document in
-     * the provided <code>ctx</code>.
-     */
-    private void searchLeaf(LeafReaderContext ctx, Weight weight, Collector collector) throws IOException {
-        checkCancelled();
-        weight = wrapWeight(weight);
-        final LeafCollector leafCollector;
-        try {
-            leafCollector = collector.getLeafCollector(ctx);
-        } catch (CollectionTerminatedException e) {
-            // there is no doc of interest in this reader context
-            // continue with the following leaf
-            return;
-        }
-        Bits liveDocs = ctx.reader().getLiveDocs();
-        BitSet liveDocsBitSet = getSparseBitSetOrNull(liveDocs);
-        if (liveDocsBitSet == null) {
-            BulkScorer bulkScorer = weight.bulkScorer(ctx);
-            if (bulkScorer != null) {
-                try {
-                    bulkScorer.score(leafCollector, liveDocs);
-                } catch (CollectionTerminatedException e) {
-                    // collection was terminated prematurely
-                    // continue with the following leaf
-                }
-            }
-        } else {
-            // if the role query result set is sparse then we should use the SparseFixedBitSet for advancing:
-            Scorer scorer = weight.scorer(ctx);
-            if (scorer != null) {
-                try {
-                    intersectScorerAndBitSet(scorer, liveDocsBitSet, leafCollector,
-                        checkCancelled == null ? () -> { } : checkCancelled);
-                } catch (CollectionTerminatedException e) {
-                    // collection was terminated prematurely
-                    // continue with the following leaf
-                }
-            }
-        }
-    }
-
-    private Weight wrapWeight(Weight weight) {
+        final Weight cancellableWeight;
         if (checkCancelled != null) {
-            return new Weight(weight.getQuery()) {
+            cancellableWeight = new Weight(weight.getQuery()) {
+
                 @Override
                 public void extractTerms(Set<Term> terms) {
                     throw new UnsupportedOperationException();
@@ -250,10 +168,48 @@ public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
                 }
             };
         } else {
-            return weight;
+            cancellableWeight = weight;
         }
+        searchInternal(leaves, cancellableWeight, collector);
     }
 
+    private void searchInternal(List<LeafReaderContext> leaves, Weight weight, Collector collector) throws IOException {
+        for (LeafReaderContext ctx : leaves) { // search each subreader
+            final LeafCollector leafCollector;
+            try {
+                leafCollector = collector.getLeafCollector(ctx);
+            } catch (CollectionTerminatedException e) {
+                // there is no doc of interest in this reader context
+                // continue with the following leaf
+                continue;
+            }
+            Bits liveDocs = ctx.reader().getLiveDocs();
+            BitSet liveDocsBitSet = getSparseBitSetOrNull(liveDocs);
+            if (liveDocsBitSet == null) {
+                BulkScorer bulkScorer = weight.bulkScorer(ctx);
+                if (bulkScorer != null) {
+                    try {
+                        bulkScorer.score(leafCollector, liveDocs);
+                    } catch (CollectionTerminatedException e) {
+                        // collection was terminated prematurely
+                        // continue with the following leaf
+                    }
+                }
+            } else {
+                // if the role query result set is sparse then we should use the SparseFixedBitSet for advancing:
+                Scorer scorer = weight.scorer(ctx);
+                if (scorer != null) {
+                    try {
+                        intersectScorerAndBitSet(scorer, liveDocsBitSet, leafCollector,
+                            checkCancelled == null ? () -> {} : checkCancelled);
+                    } catch (CollectionTerminatedException e) {
+                        // collection was terminated prematurely
+                        // continue with the following leaf
+                    }
+                }
+            }
+        }
+    }
 
     private static BitSet getSparseBitSetOrNull(Bits liveDocs) {
         if (liveDocs instanceof SparseFixedBitSet) {

diff --git a/server/src/main/java/org/elasticsearch/search/profile/query/CollectorResult.java b/server/src/main/java/org/elasticsearch/search/profile/query/CollectorResult.java
@@ -49,6 +49,8 @@ public class CollectorResult implements ToXContentObject, Writeable {
     public static final String REASON_SEARCH_POST_FILTER = "search_post_filter";
     public static final String REASON_SEARCH_MIN_SCORE = "search_min_score";
     public static final String REASON_SEARCH_MULTI = "search_multi";
+    public static final String REASON_SEARCH_TIMEOUT = "search_timeout";
+    public static final String REASON_SEARCH_CANCELLED = "search_cancelled";
     public static final String REASON_AGGREGATION = "aggregation";
     public static final String REASON_AGGREGATION_GLOBAL = "aggregation_global";
 

diff --git a/server/src/main/java/org/elasticsearch/search/query/CancellableCollector.java b/server/src/main/java/org/elasticsearch/search/query/CancellableCollector.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.search.query;
+
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.search.Collector;
+import org.apache.lucene.search.FilterCollector;
+import org.apache.lucene.search.LeafCollector;
+import org.elasticsearch.tasks.TaskCancelledException;
+
+import java.io.IOException;
+import java.util.function.BooleanSupplier;
+
+/**
+ * Collector that checks if the task it is executed under is cancelled.
+ */
+public class CancellableCollector extends FilterCollector {
+    private final BooleanSupplier cancelled;
+
+    /**
+     * Constructor
+     * @param cancelled supplier of the cancellation flag, the supplier will be called for each segment
+     * @param in wrapped collector
+     */
+    public CancellableCollector(BooleanSupplier cancelled, Collector in) {
+        super(in);
+        this.cancelled = cancelled;
+    }
+
+    @Override
+    public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
+        if (cancelled.getAsBoolean()) {
+            throw new TaskCancelledException("cancelled");
+        }
+        return super.getLeafCollector(context);
+    }
+}
diff --git a/server/src/main/java/org/elasticsearch/search/query/QueryCollectorContext.java b/server/src/main/java/org/elasticsearch/search/query/QueryCollectorContext.java
@@ -28,13 +28,16 @@
 import org.elasticsearch.common.lucene.MinimumScoreCollector;
 import org.elasticsearch.common.lucene.search.FilteredCollector;
 import org.elasticsearch.search.profile.query.InternalProfileCollector;
+import org.elasticsearch.tasks.TaskCancelledException;
 
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
+import java.util.function.BooleanSupplier;
 
+import static org.elasticsearch.search.profile.query.CollectorResult.REASON_SEARCH_CANCELLED;
 import static org.elasticsearch.search.profile.query.CollectorResult.REASON_SEARCH_MIN_SCORE;
 import static org.elasticsearch.search.profile.query.CollectorResult.REASON_SEARCH_MULTI;
 import static org.elasticsearch.search.profile.query.CollectorResult.REASON_SEARCH_POST_FILTER;
@@ -147,6 +150,18 @@ protected InternalProfileCollector createWithProfiler(InternalProfileCollector i
         };
     }
 
+    /**
+     * Creates a collector that throws {@link TaskCancelledException} if the search is cancelled
+     */
+    static QueryCollectorContext createCancellableCollectorContext(BooleanSupplier cancelled) {
+        return new QueryCollectorContext(REASON_SEARCH_CANCELLED) {
+            @Override
+            Collector create(Collector in) throws IOException {
+                return new CancellableCollector(cancelled, in);
+            }
+        };
+    }
+
     /**
      * Creates collector limiting the collection to the first <code>numHits</code> documents
      */