-
Notifications
You must be signed in to change notification settings - Fork 30
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Transaction batching Kafka producer (#735)
* Add experimental Kafka batch producer * Initial refactor of experimental batch producer * Cleanup & rename refactor * Additional cleanup, documentation, error handling, and metrics added * Rework exception handling to better catch errors * Adjust producer linger/batch configs * Add bulk ingest edge instrumentation * Add support for additional kafka props on bulk ingest * Remove default configs in favor of env settings * Add ability to override blocking task executor limits for bulk ingest * Add timer for bulk ingest * Add a stall counter for bulk ingest * Test pooling kafka producers * Increase default kafka producer pool, make configurable * Move bulk ingest to non-blocking virtual threads * Remove pooled producer logic, unused metrics, early exit * Remove unused blocking config * PR feedback --------- Co-authored-by: Bryan Burkholder <[email protected]>
- Loading branch information
Showing
23 changed files
with
1,065 additions
and
572 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
105 changes: 105 additions & 0 deletions
105
kaldb/src/main/java/com/slack/kaldb/bulkIngestApi/BulkIngestApi.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
package com.slack.kaldb.bulkIngestApi; | ||
|
||
import static com.linecorp.armeria.common.HttpStatus.INTERNAL_SERVER_ERROR; | ||
import static com.linecorp.armeria.common.HttpStatus.TOO_MANY_REQUESTS; | ||
|
||
import com.linecorp.armeria.common.HttpResponse; | ||
import com.linecorp.armeria.server.annotation.Post; | ||
import com.slack.kaldb.bulkIngestApi.opensearch.BulkApiRequestParser; | ||
import com.slack.service.murron.trace.Trace; | ||
import io.micrometer.core.instrument.Counter; | ||
import io.micrometer.core.instrument.MeterRegistry; | ||
import io.micrometer.core.instrument.Timer; | ||
import java.nio.charset.StandardCharsets; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.concurrent.CompletableFuture; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
/** | ||
* This class is responsible for defining the http endpoint behavior for the bulk ingest. It is | ||
* expected to handle appropriate rate limiting, error handling, and submit the parsed messages to | ||
* Kafka for ingestion. | ||
*/ | ||
public class BulkIngestApi { | ||
private static final Logger LOG = LoggerFactory.getLogger(BulkIngestApi.class); | ||
private final BulkIngestKafkaProducer bulkIngestKafkaProducer; | ||
private final DatasetRateLimitingService datasetRateLimitingService; | ||
private final MeterRegistry meterRegistry; | ||
private final Counter incomingByteTotal; | ||
private final Timer bulkIngestTimer; | ||
private final String BULK_INGEST_INCOMING_BYTE_TOTAL = "kaldb_preprocessor_incoming_byte"; | ||
private final String BULK_INGEST_TIMER = "kaldb_preprocessor_bulk_ingest"; | ||
|
||
public BulkIngestApi( | ||
BulkIngestKafkaProducer bulkIngestKafkaProducer, | ||
DatasetRateLimitingService datasetRateLimitingService, | ||
MeterRegistry meterRegistry) { | ||
|
||
this.bulkIngestKafkaProducer = bulkIngestKafkaProducer; | ||
this.datasetRateLimitingService = datasetRateLimitingService; | ||
this.meterRegistry = meterRegistry; | ||
this.incomingByteTotal = meterRegistry.counter(BULK_INGEST_INCOMING_BYTE_TOTAL); | ||
this.bulkIngestTimer = meterRegistry.timer(BULK_INGEST_TIMER); | ||
} | ||
|
||
@Post("/_bulk") | ||
public HttpResponse addDocument(String bulkRequest) { | ||
// 1. Kaldb does not support the concept of "updates". It's always an add. | ||
// 2. The "index" is used as the span name | ||
CompletableFuture<HttpResponse> future = new CompletableFuture<>(); | ||
Timer.Sample sample = Timer.start(meterRegistry); | ||
future.thenRun(() -> sample.stop(bulkIngestTimer)); | ||
|
||
try { | ||
byte[] bulkRequestBytes = bulkRequest.getBytes(StandardCharsets.UTF_8); | ||
incomingByteTotal.increment(bulkRequestBytes.length); | ||
Map<String, List<Trace.Span>> docs = BulkApiRequestParser.parseRequest(bulkRequestBytes); | ||
|
||
// todo - our rate limiter doesn't have a way to acquire permits across multiple | ||
// datasets | ||
// so today as a limitation we reject any request that has documents against | ||
// multiple indexes | ||
// We think most indexing requests will be against 1 index | ||
if (docs.keySet().size() > 1) { | ||
BulkIngestResponse response = | ||
new BulkIngestResponse(0, 0, "request must contain only 1 unique index"); | ||
future.complete(HttpResponse.ofJson(INTERNAL_SERVER_ERROR, response)); | ||
return HttpResponse.of(future); | ||
} | ||
|
||
for (Map.Entry<String, List<Trace.Span>> indexDocs : docs.entrySet()) { | ||
final String index = indexDocs.getKey(); | ||
if (!datasetRateLimitingService.tryAcquire(index, indexDocs.getValue())) { | ||
BulkIngestResponse response = new BulkIngestResponse(0, 0, "rate limit exceeded"); | ||
future.complete(HttpResponse.ofJson(TOO_MANY_REQUESTS, response)); | ||
return HttpResponse.of(future); | ||
} | ||
} | ||
|
||
// todo - explore the possibility of using the blocking task executor backed by virtual | ||
// threads to fulfill this | ||
Thread.ofVirtual() | ||
.start( | ||
() -> { | ||
try { | ||
BulkIngestResponse response = | ||
bulkIngestKafkaProducer.submitRequest(docs).getResponse(); | ||
future.complete(HttpResponse.ofJson(response)); | ||
} catch (InterruptedException e) { | ||
LOG.error("Request failed ", e); | ||
future.complete( | ||
HttpResponse.ofJson( | ||
INTERNAL_SERVER_ERROR, new BulkIngestResponse(0, 0, e.getMessage()))); | ||
} | ||
}); | ||
} catch (Exception e) { | ||
LOG.error("Request failed ", e); | ||
BulkIngestResponse response = new BulkIngestResponse(0, 0, e.getMessage()); | ||
future.complete(HttpResponse.ofJson(INTERNAL_SERVER_ERROR, response)); | ||
} | ||
|
||
return HttpResponse.of(future); | ||
} | ||
} |
Oops, something went wrong.