Skip to content

Commit

Permalink
Make ingest executing non blocking (#43361)
Browse files Browse the repository at this point in the history
Added an additional method to the Processor interface to allow a
processor implementation to make a non blocking call.

Also added semaphore in order to avoid search thread pools from rejecting
search requests originating from the match processor. This is a temporary workaround.
  • Loading branch information
martijnvg authored Jul 1, 2019
1 parent 79ba94f commit 7c42cde
Show file tree
Hide file tree
Showing 22 changed files with 803 additions and 460 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.function.BiConsumer;

import org.elasticsearch.script.ScriptService;

import static org.elasticsearch.ingest.ConfigurationUtils.newConfigurationException;
Expand Down Expand Up @@ -63,29 +66,46 @@ boolean isIgnoreMissing() {
}

@Override
public IngestDocument execute(IngestDocument ingestDocument) throws Exception {
public void execute(IngestDocument ingestDocument, BiConsumer<IngestDocument, Exception> handler) {
List<?> values = ingestDocument.getFieldValue(field, List.class, ignoreMissing);
if (values == null) {
if (ignoreMissing) {
return ingestDocument;
handler.accept(ingestDocument, null);
} else {
handler.accept(null, new IllegalArgumentException("field [" + field + "] is null, cannot loop over its elements."));
}
throw new IllegalArgumentException("field [" + field + "] is null, cannot loop over its elements.");
} else {
List<Object> newValues = new CopyOnWriteArrayList<>();
innerExecute(0, values, newValues, ingestDocument, handler);
}
List<Object> newValues = new ArrayList<>(values.size());
IngestDocument document = ingestDocument;
for (Object value : values) {
Object previousValue = ingestDocument.getIngestMetadata().put("_value", value);
try {
document = processor.execute(document);
if (document == null) {
return null;
}
} finally {
newValues.add(ingestDocument.getIngestMetadata().put("_value", previousValue));
}
}

void innerExecute(int index, List<?> values, List<Object> newValues, IngestDocument document,
BiConsumer<IngestDocument, Exception> handler) {
if (index == values.size()) {
document.setFieldValue(field, new ArrayList<>(newValues));
handler.accept(document, null);
return;
}
document.setFieldValue(field, newValues);
return document;

Object value = values.get(index);
Object previousValue = document.getIngestMetadata().put("_value", value);
processor.execute(document, (result, e) -> {
if (e != null) {
newValues.add(document.getIngestMetadata().put("_value", previousValue));
handler.accept(null, e);
} else if (result == null) {
handler.accept(null, null);
} else {
newValues.add(document.getIngestMetadata().put("_value", previousValue));
innerExecute(index + 1, values, newValues, document, handler);
}
});
}

@Override
public IngestDocument execute(IngestDocument ingestDocument) throws Exception {
throw new UnsupportedOperationException("this method should not get executed");
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ public void testExecute() throws Exception {
"_tag", "values", new UppercaseProcessor("_tag", "_ingest._value", false, "_ingest._value"),
false
);
processor.execute(ingestDocument);
processor.execute(ingestDocument, (result, e) -> {});

@SuppressWarnings("unchecked")
List<String> result = ingestDocument.getFieldValue("values", List.class);
Expand All @@ -73,12 +73,9 @@ public void testExecuteWithFailure() throws Exception {
}
});
ForEachProcessor processor = new ForEachProcessor("_tag", "values", testProcessor, false);
try {
processor.execute(ingestDocument);
fail("exception expected");
} catch (RuntimeException e) {
assertThat(e.getMessage(), equalTo("failure"));
}
Exception[] exceptions = new Exception[1];
processor.execute(ingestDocument, (result, e) -> {exceptions[0] = e;});
assertThat(exceptions[0].getMessage(), equalTo("failure"));
assertThat(testProcessor.getInvokedCounter(), equalTo(3));
assertThat(ingestDocument.getFieldValue("values", List.class), equalTo(Arrays.asList("a", "b", "c")));

Expand All @@ -95,7 +92,7 @@ public void testExecuteWithFailure() throws Exception {
"_tag", "values", new CompoundProcessor(false, Arrays.asList(testProcessor), Arrays.asList(onFailureProcessor)),
false
);
processor.execute(ingestDocument);
processor.execute(ingestDocument, (result, e) -> {});
assertThat(testProcessor.getInvokedCounter(), equalTo(3));
assertThat(ingestDocument.getFieldValue("values", List.class), equalTo(Arrays.asList("A", "B", "c")));
}
Expand All @@ -114,7 +111,7 @@ public void testMetaDataAvailable() throws Exception {
id.setFieldValue("_ingest._value.id", id.getSourceAndMetadata().get("_id"));
});
ForEachProcessor processor = new ForEachProcessor("_tag", "values", innerProcessor, false);
processor.execute(ingestDocument);
processor.execute(ingestDocument, (result, e) -> {});

assertThat(innerProcessor.getInvokedCounter(), equalTo(2));
assertThat(ingestDocument.getFieldValue("values.0.index", String.class), equalTo("_index"));
Expand Down Expand Up @@ -142,7 +139,7 @@ public void testRestOfTheDocumentIsAvailable() throws Exception {
"_tag", "values", new SetProcessor("_tag",
new TestTemplateService.MockTemplateScript.Factory("_ingest._value.new_field"),
(model) -> model.get("other")), false);
processor.execute(ingestDocument);
processor.execute(ingestDocument, (result, e) -> {});

assertThat(ingestDocument.getFieldValue("values.0.new_field", String.class), equalTo("value"));
assertThat(ingestDocument.getFieldValue("values.1.new_field", String.class), equalTo("value"));
Expand Down Expand Up @@ -180,7 +177,7 @@ public String getTag() {
);

ForEachProcessor processor = new ForEachProcessor("_tag", "values", innerProcessor, false);
processor.execute(ingestDocument);
processor.execute(ingestDocument, (result, e) -> {});
@SuppressWarnings("unchecked")
List<String> result = ingestDocument.getFieldValue("values", List.class);
assertThat(result.size(), equalTo(numValues));
Expand All @@ -205,7 +202,7 @@ public void testModifyFieldsOutsideArray() throws Exception {
Collections.singletonList(new UppercaseProcessor("_tag_upper", "_ingest._value", false, "_ingest._value")),
Collections.singletonList(new AppendProcessor("_tag", template, (model) -> (Collections.singletonList("added"))))
), false);
processor.execute(ingestDocument);
processor.execute(ingestDocument, (result, e) -> {});

List<?> result = ingestDocument.getFieldValue("values", List.class);
assertThat(result.get(0), equalTo("STRING"));
Expand All @@ -231,7 +228,7 @@ public void testScalarValueAllowsUnderscoreValueFieldToRemainAccessible() throws
TestProcessor processor = new TestProcessor(doc -> doc.setFieldValue("_ingest._value",
doc.getFieldValue("_source._value", String.class)));
ForEachProcessor forEachProcessor = new ForEachProcessor("_tag", "values", processor, false);
forEachProcessor.execute(ingestDocument);
forEachProcessor.execute(ingestDocument, (result, e) -> {});

List<?> result = ingestDocument.getFieldValue("values", List.class);
assertThat(result.get(0), equalTo("new_value"));
Expand Down Expand Up @@ -264,7 +261,7 @@ public void testNestedForEach() throws Exception {
);
ForEachProcessor processor = new ForEachProcessor(
"_tag", "values1", new ForEachProcessor("_tag", "_ingest._value.values2", testProcessor, false), false);
processor.execute(ingestDocument);
processor.execute(ingestDocument, (result, e) -> {});

List<?> result = ingestDocument.getFieldValue("values1.0.values2", List.class);
assertThat(result.get(0), equalTo("ABC"));
Expand All @@ -282,7 +279,7 @@ public void testIgnoreMissing() throws Exception {
IngestDocument ingestDocument = new IngestDocument(originalIngestDocument);
TestProcessor testProcessor = new TestProcessor(doc -> {});
ForEachProcessor processor = new ForEachProcessor("_tag", "_ingest._value", testProcessor, true);
processor.execute(ingestDocument);
processor.execute(ingestDocument, (result, e) -> {});
assertIngestDocument(originalIngestDocument, ingestDocument);
assertThat(testProcessor.getInvokedCounter(), equalTo(0));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@

package org.elasticsearch.action.bulk;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.message.ParameterizedMessage;
import org.apache.lucene.util.SparseFixedBitSet;
import org.elasticsearch.ElasticsearchParseException;
Expand Down Expand Up @@ -80,6 +82,7 @@
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicIntegerArray;
import java.util.function.LongSupplier;
import java.util.function.Supplier;
import java.util.stream.Collectors;
Expand Down Expand Up @@ -581,14 +584,13 @@ private long relativeTime() {
}

void processBulkIndexIngestRequest(Task task, BulkRequest original, ActionListener<BulkResponse> listener) {
long ingestStartTimeInNanos = System.nanoTime();
BulkRequestModifier bulkRequestModifier = new BulkRequestModifier(original);
ingestService.executeBulkRequest(() -> bulkRequestModifier,
(indexRequest, exception) -> {
logger.debug(() -> new ParameterizedMessage("failed to execute pipeline [{}] for document [{}/{}/{}]",
indexRequest.getPipeline(), indexRequest.index(), indexRequest.type(), indexRequest.id()), exception);
bulkRequestModifier.markCurrentItemAsFailed(exception);
}, (exception) -> {
final long ingestStartTimeInNanos = System.nanoTime();
final BulkRequestModifier bulkRequestModifier = new BulkRequestModifier(original);
ingestService.executeBulkRequest(
original.numberOfActions(),
() -> bulkRequestModifier,
bulkRequestModifier::markItemAsFailed,
(originalThread, exception) -> {
if (exception != null) {
logger.error("failed to execute pipeline for a bulk request", exception);
listener.onFailure(exception);
Expand All @@ -603,26 +605,56 @@ void processBulkIndexIngestRequest(Task task, BulkRequest original, ActionListen
// (this will happen if pre-processing all items in the bulk failed)
actionListener.onResponse(new BulkResponse(new BulkItemResponse[0], 0));
} else {
doExecute(task, bulkRequest, actionListener);
// If a processor went async and returned a response on a different thread then
// before we continue the bulk request we should fork back on a write thread:
if (originalThread == Thread.currentThread()) {
assert Thread.currentThread().getName().contains(ThreadPool.Names.WRITE);
doExecute(task, bulkRequest, actionListener);
} else {
threadPool.executor(ThreadPool.Names.WRITE).execute(new AbstractRunnable() {
@Override
public void onFailure(Exception e) {
listener.onFailure(e);
}

@Override
protected void doRun() throws Exception {
doExecute(task, bulkRequest, actionListener);
}

@Override
public boolean isForceExecution() {
// If we fork back to a write thread we **not** should fail, because tp queue is full.
// (Otherwise the work done during ingest will be lost)
// It is okay to force execution here. Throttling of write requests happens prior to
// ingest when a node receives a bulk request.
return true;
}
});
}
}
}
},
indexRequest -> bulkRequestModifier.markCurrentItemAsDropped());
bulkRequestModifier::markItemAsDropped
);
}

static final class BulkRequestModifier implements Iterator<DocWriteRequest<?>> {

private static final Logger LOGGER = LogManager.getLogger(BulkRequestModifier.class);

final BulkRequest bulkRequest;
final SparseFixedBitSet failedSlots;
final List<BulkItemResponse> itemResponses;
final AtomicIntegerArray originalSlots;

int currentSlot = -1;
int[] originalSlots;
volatile int currentSlot = -1;

BulkRequestModifier(BulkRequest bulkRequest) {
this.bulkRequest = bulkRequest;
this.failedSlots = new SparseFixedBitSet(bulkRequest.requests().size());
this.itemResponses = new ArrayList<>(bulkRequest.requests().size());
this.originalSlots = new AtomicIntegerArray(bulkRequest.requests().size()); // oversize, but that's ok
}

@Override
Expand All @@ -646,12 +678,11 @@ BulkRequest getBulkRequest() {

int slot = 0;
List<DocWriteRequest<?>> requests = bulkRequest.requests();
originalSlots = new int[requests.size()]; // oversize, but that's ok
for (int i = 0; i < requests.size(); i++) {
DocWriteRequest<?> request = requests.get(i);
if (failedSlots.get(i) == false) {
modifiedBulkRequest.add(request);
originalSlots[slot++] = i;
originalSlots.set(slot++, i);
}
}
return modifiedBulkRequest;
Expand All @@ -666,7 +697,7 @@ ActionListener<BulkResponse> wrapActionListenerIfNeeded(long ingestTookInMillis,
return ActionListener.delegateFailure(actionListener, (delegatedListener, response) -> {
BulkItemResponse[] items = response.getItems();
for (int i = 0; i < items.length; i++) {
itemResponses.add(originalSlots[i], response.getItems()[i]);
itemResponses.add(originalSlots.get(i), response.getItems()[i]);
}
delegatedListener.onResponse(
new BulkResponse(
Expand All @@ -675,11 +706,11 @@ ActionListener<BulkResponse> wrapActionListenerIfNeeded(long ingestTookInMillis,
}
}

void markCurrentItemAsDropped() {
IndexRequest indexRequest = getIndexWriteRequest(bulkRequest.requests().get(currentSlot));
failedSlots.set(currentSlot);
synchronized void markItemAsDropped(int slot) {
IndexRequest indexRequest = getIndexWriteRequest(bulkRequest.requests().get(slot));
failedSlots.set(slot);
itemResponses.add(
new BulkItemResponse(currentSlot, indexRequest.opType(),
new BulkItemResponse(slot, indexRequest.opType(),
new UpdateResponse(
new ShardId(indexRequest.index(), IndexMetaData.INDEX_UUID_NA_VALUE, 0),
indexRequest.type(), indexRequest.id(), indexRequest.version(), DocWriteResponse.Result.NOOP
Expand All @@ -688,16 +719,19 @@ void markCurrentItemAsDropped() {
);
}

void markCurrentItemAsFailed(Exception e) {
IndexRequest indexRequest = getIndexWriteRequest(bulkRequest.requests().get(currentSlot));
synchronized void markItemAsFailed(int slot, Exception e) {
IndexRequest indexRequest = getIndexWriteRequest(bulkRequest.requests().get(slot));
LOGGER.debug(() -> new ParameterizedMessage("failed to execute pipeline [{}] for document [{}/{}/{}]",
indexRequest.getPipeline(), indexRequest.index(), indexRequest.type(), indexRequest.id()), e);

// We hit a error during preprocessing a request, so we:
// 1) Remember the request item slot from the bulk, so that we're done processing all requests we know what failed
// 2) Add a bulk item failure for this request
// 3) Continue with the next request in the bulk.
failedSlots.set(currentSlot);
failedSlots.set(slot);
BulkItemResponse.Failure failure = new BulkItemResponse.Failure(indexRequest.index(), indexRequest.type(),
indexRequest.id(), e);
itemResponses.add(new BulkItemResponse(currentSlot, indexRequest.opType(), failure));
itemResponses.add(new BulkItemResponse(slot, indexRequest.opType(), failure));
}

}
Expand Down
Loading

0 comments on commit 7c42cde

Please sign in to comment.