elastic · s1monw · Nov 16, 2017 · Nov 15, 2017 · Nov 15, 2017 · Nov 16, 2017
diff --git a/core/src/main/java/org/elasticsearch/index/shard/ShardSplittingQuery.java b/core/src/main/java/org/elasticsearch/index/shard/ShardSplittingQuery.java
@@ -19,9 +19,11 @@
 package org.elasticsearch.index.shard;
 
 import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.IndexReaderContext;
 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.index.ReaderUtil;
 import org.apache.lucene.index.StoredFieldVisitor;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
@@ -33,19 +35,22 @@
 import org.apache.lucene.search.Scorer;
 import org.apache.lucene.search.TwoPhaseIterator;
 import org.apache.lucene.search.Weight;
+import org.apache.lucene.search.join.BitSetProducer;
+import org.apache.lucene.util.BitSet;
 import org.apache.lucene.util.BitSetIterator;
-import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.FixedBitSet;
 import org.elasticsearch.Version;
 import org.elasticsearch.cluster.metadata.IndexMetaData;
 import org.elasticsearch.cluster.routing.OperationRouting;
+import org.elasticsearch.common.lucene.search.Queries;
 import org.elasticsearch.index.mapper.IdFieldMapper;
 import org.elasticsearch.index.mapper.RoutingFieldMapper;
 import org.elasticsearch.index.mapper.Uid;
 
 import java.io.IOException;
 import java.util.function.IntConsumer;
+import java.util.function.IntPredicate;
 import java.util.function.Predicate;
 
 /**
@@ -56,16 +61,21 @@
 final class ShardSplittingQuery extends Query {
     private final IndexMetaData indexMetaData;
     private final int shardId;
+    private final BitSetProducer nestedParentBitSetProducer;
 
-    ShardSplittingQuery(IndexMetaData indexMetaData, int shardId) {
+    ShardSplittingQuery(IndexMetaData indexMetaData, int shardId, boolean hasNested) {
+        this(indexMetaData, shardId, hasNested ? newParentDocBitSetProducer() : null);
+    }
+
+    private ShardSplittingQuery(IndexMetaData indexMetaData, int shardId, BitSetProducer nestedParentBitSetProducer) {
         if (indexMetaData.getCreationVersion().before(Version.V_6_0_0_rc2)) {
             throw new IllegalArgumentException("Splitting query can only be executed on an index created with version "
                 + Version.V_6_0_0_rc2 + " or higher");
         }
         this.indexMetaData = indexMetaData;
         this.shardId = shardId;
+        this.nestedParentBitSetProducer = nestedParentBitSetProducer;
     }
-
     @Override
     public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) {
         return new ConstantScoreWeight(this, boost) {
@@ -84,44 +94,85 @@ public Scorer scorer(LeafReaderContext context) throws IOException {
                         Uid.decodeId(ref.bytes, ref.offset, ref.length), null);
                     return shardId == targetShardId;
                 };
-                if (terms == null) { // this is the common case - no partitioning and no _routing values
+                if (terms == null) {
+                    // this is the common case - no partitioning and no _routing values
+                    // in this case we also don't do anything special with regards to nested docs since we basically delete
+                    // by ID and parent and nested all have the same id.
                     assert indexMetaData.isRoutingPartitionedIndex() == false;
                     findSplitDocs(IdFieldMapper.NAME, includeInShard, leafReader, bitSet::set);
                 } else {
+                    final BitSet parentBitSet;
+                    final IntPredicate includeDoc;
+                    if (nestedParentBitSetProducer == null) {
+                        parentBitSet = null;
+                        includeDoc = i -> true;
+                    } else {
+                        parentBitSet = nestedParentBitSetProducer.getBitSet(context);
+                        if (parentBitSet == null) {
+                            return null; // no matches
+                        }
+                        includeDoc = parentBitSet::get;
+                    }
                     if (indexMetaData.isRoutingPartitionedIndex()) {
                         // this is the heaviest invariant. Here we have to visit all docs stored fields do extract _id and _routing
                         // this this index is routing partitioned.
                         Visitor visitor = new Visitor();
-                        return new ConstantScoreScorer(this, score(),
-                            new RoutingPartitionedDocIdSetIterator(leafReader, visitor));
+                        TwoPhaseIterator twoPhaseIterator =
+                            parentBitSet == null ? new RoutingPartitionedDocIdSetIterator(leafReader, visitor) :
+                                new NestedRoutingPartitionedDocIdSetIterator(leafReader, visitor, parentBitSet);
+                        return new ConstantScoreScorer(this, score(), twoPhaseIterator);
                     } else {
                         // in the _routing case we first go and find all docs that have a routing value and mark the ones we have to delete
                         findSplitDocs(RoutingFieldMapper.NAME, ref -> {
                             int targetShardId = OperationRouting.generateShardId(indexMetaData, null, ref.utf8ToString());
                             return shardId == targetShardId;
-                        }, leafReader, bitSet::set);
+                        }, leafReader, docId -> {
+                            if (includeDoc.test(docId)) {
+                                bitSet.set(docId);
+                            }
+                        });
+
                         // now if we have a mixed index where some docs have a _routing value and some don't we have to exclude the ones
                         // with a routing value from the next iteration an delete / select based on the ID.
                         if (terms.getDocCount() != leafReader.maxDoc()) {
                             // this is a special case where some of the docs have no routing values this sucks but it's possible today
                             FixedBitSet hasRoutingValue = new FixedBitSet(leafReader.maxDoc());
                             findSplitDocs(RoutingFieldMapper.NAME, ref -> false, leafReader,
-                                    hasRoutingValue::set);
+                                docId -> {
+                                    if (includeDoc.test(docId)) {
+                                        hasRoutingValue.set(docId);
+                                    }
+                                });
+
                             findSplitDocs(IdFieldMapper.NAME, includeInShard, leafReader, docId -> {
-                                if (hasRoutingValue.get(docId) == false) {
+                                if (hasRoutingValue.get(docId) == false && includeDoc.test(docId)) {
                                     bitSet.set(docId);
                                 }
                             });
                         }
                     }
+                    if (parentBitSet != null) {
+                        markChildDocs(parentBitSet, bitSet);
+                    }
                 }
+
                 return new ConstantScoreScorer(this, score(), new BitSetIterator(bitSet, bitSet.length()));
             }
-
-
         };
     }
 
+    private void markChildDocs(BitSet parentDocs, BitSet deletedDocs) {
+        int currentDeleted = 0;
+        while (currentDeleted < deletedDocs.length() &&
+            (currentDeleted = deletedDocs.nextSetBit(currentDeleted)) != DocIdSetIterator.NO_MORE_DOCS) {
+            int previousParent = parentDocs.prevSetBit(Math.max(0, currentDeleted-1));
+            for (int i = previousParent + 1; i < currentDeleted; i++) {
+                deletedDocs.set(i);
+            }
+            currentDeleted++;
+        }
+    }
+
     @Override
     public String toString(String field) {
         return "shard_splitting_query";
@@ -216,7 +267,7 @@ public Status needsField(FieldInfo fieldInfo) throws IOException {
      * This two phase iterator visits every live doc and selects all docs that don't belong into this
      * shard based on their id and routing value. This is only used in a routing partitioned index.
      */
-    private final class RoutingPartitionedDocIdSetIterator extends TwoPhaseIterator {
+    private class RoutingPartitionedDocIdSetIterator extends TwoPhaseIterator {
         private final LeafReader leafReader;
         private final Visitor visitor;
 
@@ -228,7 +279,10 @@ private final class RoutingPartitionedDocIdSetIterator extends TwoPhaseIterator
 
         @Override
         public boolean matches() throws IOException {
-            int doc = approximation.docID();
+            return innerMatches(approximation.docID());
+        }
+
+        protected boolean innerMatches(int doc) throws IOException {
             visitor.reset();
             leafReader.document(doc, visitor);
             int targetShardId = OperationRouting.generateShardId(indexMetaData, visitor.id, visitor.routing);
@@ -240,6 +294,46 @@ public float matchCost() {
             return 42; // that's obvious, right?
         }
     }
+
+    /**
+     * This TwoPhaseIterator marks all nested docs of matching parents as matches as well.
+     */
+    private final class NestedRoutingPartitionedDocIdSetIterator extends RoutingPartitionedDocIdSetIterator {
+        private final BitSet parentDocs;
+        private int nextParent = -1;
+        private boolean nextParentMatches;
+
+        NestedRoutingPartitionedDocIdSetIterator(LeafReader leafReader, Visitor visitor, BitSet parentDocs) {
+            super(leafReader, visitor);
+            this.parentDocs = parentDocs;
+        }
+
+        @Override
+        public boolean matches() throws IOException {
+            int doc = approximation.docID();
+            if (doc > nextParent) {
+                nextParent = parentDocs.nextSetBit(doc);
+                nextParentMatches = innerMatches(nextParent);
+            }
+            return nextParentMatches;
+        }
+    }
+
+    /*
+     * this is used internally to obtain a bitset for parent documents. We don't cache this since we never access the same reader more
+     * than once
+     */
+    private static BitSetProducer newParentDocBitSetProducer() {
+        return context -> {
+                Query query = Queries.newNonNestedFilter();
+                final IndexReaderContext topLevelContext = ReaderUtil.getTopLevelContext(context);
+                final IndexSearcher searcher = new IndexSearcher(topLevelContext);
+                searcher.setQueryCache(null);
+                final Weight weight = searcher.createNormalizedWeight(query, false);
+                Scorer s = weight.scorer(context);
+                return s == null ? null : BitSet.of(s.iterator(), context.reader().maxDoc());
+            };
+    }
 }
 
 
diff --git a/core/src/main/java/org/elasticsearch/index/shard/StoreRecovery.java b/core/src/main/java/org/elasticsearch/index/shard/StoreRecovery.java
@@ -115,6 +115,7 @@ boolean recoverFromLocalShards(BiConsumer<String, MappingMetaData> mappingUpdate
             indexShard.mapperService().merge(sourceMetaData, MapperService.MergeReason.MAPPING_RECOVERY, true);
             // now that the mapping is merged we can validate the index sort configuration.
             Sort indexSort = indexShard.getIndexSort();
+            final boolean hasNested = indexShard.mapperService().hasNested();
             final boolean isSplit = sourceMetaData.getNumberOfShards() < indexShard.indexSettings().getNumberOfShards();
             assert isSplit == false || sourceMetaData.getCreationVersion().onOrAfter(Version.V_6_0_0_alpha1) : "for split we require a " +
                 "single type but the index is created before 6.0.0";
@@ -127,7 +128,7 @@ boolean recoverFromLocalShards(BiConsumer<String, MappingMetaData> mappingUpdate
                     final long maxUnsafeAutoIdTimestamp =
                             shards.stream().mapToLong(LocalShardSnapshot::maxUnsafeAutoIdTimestamp).max().getAsLong();
                     addIndices(indexShard.recoveryState().getIndex(), directory, indexSort, sources, maxSeqNo, maxUnsafeAutoIdTimestamp,
-                        indexShard.indexSettings().getIndexMetaData(), indexShard.shardId().id(), isSplit);
+                        indexShard.indexSettings().getIndexMetaData(), indexShard.shardId().id(), isSplit, hasNested);
                     internalRecoverFromStore(indexShard);
                     // just trigger a merge to do housekeeping on the
                     // copied segments - we will also see them in stats etc.
@@ -142,8 +143,8 @@ boolean recoverFromLocalShards(BiConsumer<String, MappingMetaData> mappingUpdate
     }
 
     void addIndices(final RecoveryState.Index indexRecoveryStats, final Directory target, final Sort indexSort, final Directory[] sources,
-            final long maxSeqNo, final long maxUnsafeAutoIdTimestamp, IndexMetaData indexMetaData, int shardId, boolean split)
-        throws IOException {
+            final long maxSeqNo, final long maxUnsafeAutoIdTimestamp, IndexMetaData indexMetaData, int shardId, boolean split,
+            boolean hasNested) throws IOException {
         final Directory hardLinkOrCopyTarget = new org.apache.lucene.store.HardlinkCopyDirectoryWrapper(target);
         IndexWriterConfig iwc = new IndexWriterConfig(null)
             .setCommitOnClose(false)
@@ -158,9 +159,8 @@ void addIndices(final RecoveryState.Index indexRecoveryStats, final Directory ta
 
         try (IndexWriter writer = new IndexWriter(new StatsDirectoryWrapper(hardLinkOrCopyTarget, indexRecoveryStats), iwc)) {
             writer.addIndexes(sources);
-
             if (split) {
-                writer.deleteDocuments(new ShardSplittingQuery(indexMetaData, shardId));
+                writer.deleteDocuments(new ShardSplittingQuery(indexMetaData, shardId, hasNested));
             }
             /*
              * We set the maximum sequence number and the local checkpoint on the target to the maximum of the maximum sequence numbers on