Skip to content

Commit

Permalink
[index] Prevent duplication of documents when retry indexing after fail
Browse files Browse the repository at this point in the history
If bulk index request fails due to a disconnect, unavailable shard etc, the request is
retried once before actually failing. However, even in case of failure the documents
might already be indexed. For autogenerated ids the request must not add the
documents again and therfore canHaveDuplicates must be set to true.

closes elastic#8788
  • Loading branch information
brwe committed Jan 2, 2015
1 parent aa7730a commit 9717861
Show file tree
Hide file tree
Showing 4 changed files with 148 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ public class TransportShardBulkAction extends TransportShardReplicationOperation
private final static String OP_TYPE_UPDATE = "update";
private final static String OP_TYPE_DELETE = "delete";

private static final String ACTION_NAME = BulkAction.NAME + "[s]";
public static final String ACTION_NAME = BulkAction.NAME + "[s]";

private final MappingUpdatedAction mappingUpdatedAction;
private final UpdateHelper updateHelper;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -453,6 +453,7 @@ public void handleException(TransportException exp) {
if (exp.unwrapCause() instanceof ConnectTransportException || exp.unwrapCause() instanceof NodeClosedException ||
retryPrimaryException(exp)) {
primaryOperationStarted.set(false);
internalRequest.request().setCanHaveDuplicates();
// we already marked it as started when we executed it (removed the listener) so pass false
// to re-add to the cluster listener
logger.trace("received an error from node the primary was assigned to ({}), scheduling a retry", exp.getMessage());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,17 @@ private void innerCreateNoLock(Create create, IndexWriter writer, long currentVe
// #7142: the primary already determined it's OK to index this document, and we confirmed above that the version doesn't
// conflict, so we must also update here on the replica to remain consistent:
doUpdate = true;
} else if (create.origin() == Operation.Origin.PRIMARY && create.autoGeneratedId() && create.canHaveDuplicates() && currentVersion == 1 && create.version() == Versions.MATCH_ANY) {
/**
* If bulk index request fails due to a disconnect, unavailable shard etc. then the request is
* retried before it actually fails. However, the documents might already be indexed.
* For autogenerated ids this means that a version conflict will be reported in the bulk request
* although the document was indexed properly.
* To avoid this we have to make sure that the index request is treated as an update and set updatedVersion to 1.
* See also discussion on https://github.com/elasticsearch/elasticsearch/pull/9125
*/
doUpdate = true;
updatedVersion = 1;
} else {
// On primary, we throw DAEE if the _uid is already in the index with an older version:
assert create.origin() == Operation.Origin.PRIMARY;
Expand Down
135 changes: 135 additions & 0 deletions src/test/java/org/elasticsearch/index/store/ExceptionRetryTests.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.store;

import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import org.elasticsearch.action.admin.cluster.node.stats.NodeStats;
import org.elasticsearch.action.admin.cluster.node.stats.NodesStatsResponse;
import org.elasticsearch.action.bulk.BulkItemResponse;
import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.bulk.TransportShardBulkAction;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.discovery.Discovery;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.test.ElasticsearchIntegrationTest;
import org.elasticsearch.test.transport.MockTransportService;
import org.elasticsearch.transport.*;
import org.junit.Test;

import java.io.IOException;
import java.util.*;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.atomic.AtomicBoolean;

import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.index.query.QueryBuilders.termQuery;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.*;
import static org.hamcrest.CoreMatchers.equalTo;
import static org.hamcrest.Matchers.greaterThan;

@ElasticsearchIntegrationTest.ClusterScope(scope = ElasticsearchIntegrationTest.Scope.SUITE)
public class ExceptionRetryTests extends ElasticsearchIntegrationTest {

@Override
protected Settings nodeSettings(int nodeOrdinal) {
return ImmutableSettings.builder()
.put(super.nodeSettings(nodeOrdinal)).put("gateway.type", "local")
.put(TransportModule.TRANSPORT_SERVICE_TYPE_KEY, MockTransportService.class.getName())
.build();
}

/**
* Tests retry mechanism when indexing. If an exception occurs when indexing then the indexing request is tried again before finally failing.
* If auto generated ids are used this must not lead to duplicate ids
* see https://github.com/elasticsearch/elasticsearch/issues/8788
*/
@Test
public void testRetryDueToExceptionOnNetworkLayer() throws ExecutionException, InterruptedException, IOException {
final AtomicBoolean exceptionThrown = new AtomicBoolean(false);
int numDocs = scaledRandomIntBetween(100, 1000);
NodesStatsResponse nodeStats = client().admin().cluster().prepareNodesStats().get();
NodeStats unluckyNode = randomFrom(nodeStats.getNodes());
assertAcked(client().admin().indices().prepareCreate("index"));
ensureGreen("index");

//create a transport service that throws a ConnectTransportException for one bulk request and therefore triggers a retry.
for (NodeStats dataNode : nodeStats.getNodes()) {
MockTransportService mockTransportService = ((MockTransportService) internalCluster().getInstance(TransportService.class, dataNode.getNode().name()));
mockTransportService.addDelegate(internalCluster().getInstance(Discovery.class, unluckyNode.getNode().name()).localNode(), new MockTransportService.DelegateTransport(mockTransportService.original()) {

@Override
public void sendRequest(DiscoveryNode node, long requestId, String action, TransportRequest request, TransportRequestOptions options) throws IOException, TransportException {
super.sendRequest(node, requestId, action, request, options);
if (action.equals(TransportShardBulkAction.ACTION_NAME) && !exceptionThrown.get()) {
logger.debug("Throw ConnectTransportException");
exceptionThrown.set(true);
throw new ConnectTransportException(node, action);
}
}
});
}

BulkRequestBuilder bulkBuilder = client().prepareBulk();
for (int i = 0; i < numDocs; i++) {
XContentBuilder doc = null;
doc = jsonBuilder().startObject().field("foo", "bar").endObject();
bulkBuilder.add(client().prepareIndex("index", "type").setSource(doc));
}

BulkResponse response = bulkBuilder.get();
if (response.hasFailures()) {
for (BulkItemResponse singleIndexRespons : response.getItems()) {
if (singleIndexRespons.isFailed()) {
fail("None of the bulk items should fail but got " + singleIndexRespons.getFailureMessage());
}
}
}

refresh();
SearchResponse searchResponse = client().prepareSearch("index").setSize(numDocs * 2).addField("_id").get();

Set<String> uniqueIds = new HashSet();
long dupCounter = 0;
boolean found_duplicate_already = false;
for (int i = 0; i < searchResponse.getHits().getHits().length; i++) {
if (!uniqueIds.add(searchResponse.getHits().getHits()[i].getId())) {
if (!found_duplicate_already) {
SearchResponse dupIdResponse = client().prepareSearch("index").setQuery(termQuery("_id", searchResponse.getHits().getHits()[i].getId())).setExplain(true).get();
assertThat(dupIdResponse.getHits().totalHits(), greaterThan(1l));
logger.info("found a duplicate id:");
for (SearchHit hit : dupIdResponse.getHits()) {
logger.info("Doc {} was found on shard {}", hit.getId(), hit.getShard().getShardId());
}
logger.info("will not print anymore in case more duplicates are found.");
found_duplicate_already = true;
}
dupCounter++;
}
}
assertSearchResponse(searchResponse);
assertThat(dupCounter, equalTo(0l));
assertHitCount(searchResponse, numDocs);
}
}

0 comments on commit 9717861

Please sign in to comment.