forked from opensearch-project/opensearch-migrations
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
RFS now uses reactor-netty for bulk indexing (opensearch-project#607)
* Checkpoint: improved ConnectionDetails; unit tested it Signed-off-by: Chris Helma <[email protected]> * RFS now uses reactor-netty and bulk indexing Signed-off-by: Chris Helma <[email protected]> * Fixes per PR; unit tested LuceneDocumentsReader Signed-off-by: Chris Helma <[email protected]> * Updated a unit test name Signed-off-by: Chris Helma <[email protected]> * Updated a method name per PR feedback Signed-off-by: Chris Helma <[email protected]> --------- Signed-off-by: Chris Helma <[email protected]>
- Loading branch information
1 parent
ca5c157
commit 9cdf46c
Showing
10 changed files
with
376 additions
and
52 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
110 changes: 101 additions & 9 deletions
110
RFS/src/main/java/com/rfs/common/DocumentReindexer.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,32 +1,124 @@ | ||
package com.rfs.common; | ||
|
||
import java.time.Duration; | ||
import java.util.Base64; | ||
import java.util.List; | ||
|
||
import io.netty.buffer.Unpooled; | ||
import org.apache.logging.log4j.LogManager; | ||
import org.apache.logging.log4j.Logger; | ||
import org.apache.lucene.document.Document; | ||
import reactor.core.publisher.Flux; | ||
import reactor.core.publisher.Mono; | ||
import reactor.netty.http.client.HttpClient; | ||
import reactor.util.retry.Retry; | ||
|
||
|
||
public class DocumentReindexer { | ||
private static final Logger logger = LogManager.getLogger(DocumentReindexer.class); | ||
private static final int MAX_BATCH_SIZE = 1000; // Arbitrarily chosen | ||
|
||
public static void reindex(String indexName, Flux<Document> documentStream, ConnectionDetails targetConnection) throws Exception { | ||
String targetUrl = "/" + indexName + "/_bulk"; | ||
HttpClient client = HttpClient.create() | ||
.host(targetConnection.hostName) | ||
.port(targetConnection.port) | ||
.headers(h -> { | ||
h.set("Content-Type", "application/json"); | ||
if (targetConnection.authType == ConnectionDetails.AuthType.BASIC) { | ||
String credentials = targetConnection.username + ":" + targetConnection.password; | ||
String encodedCredentials = Base64.getEncoder().encodeToString(credentials.getBytes()); | ||
h.set("Authorization", "Basic " + encodedCredentials); | ||
} | ||
}); | ||
|
||
documentStream | ||
.map(DocumentReindexer::convertDocumentToBulkSection) // Convert each Document to part of a bulk operation | ||
.buffer(MAX_BATCH_SIZE) // Collect until you hit the batch size | ||
.map(DocumentReindexer::convertToBulkRequestBody) // Assemble the bulk request body from the parts | ||
.flatMap(bulkJson -> sendBulkRequest(client, targetUrl, bulkJson)) // Send the request | ||
.retryWhen(Retry.backoff(3, Duration.ofSeconds(1)).maxBackoff(Duration.ofSeconds(5))) | ||
.subscribe( | ||
response -> logger.info("Batch uploaded successfully"), | ||
error -> logger.error("Failed to upload batch", error) | ||
); | ||
} | ||
|
||
public static void reindex(String indexName, Document document, ConnectionDetails targetConnection) throws Exception { | ||
// Get the document details | ||
private static String convertDocumentToBulkSection(Document document) { | ||
String id = Uid.decodeId(document.getBinaryValue("_id").bytes); | ||
String source = document.getBinaryValue("_source").utf8ToString(); | ||
String action = "{\"index\": {\"_id\": \"" + id + "\"}}"; | ||
|
||
logger.info("Reindexing document - Index: " + indexName + ", Document ID: " + id); | ||
return action + "\n" + source; | ||
} | ||
|
||
// Assemble the request details | ||
String path = indexName + "/_doc/" + id; | ||
String body = source; | ||
private static String convertToBulkRequestBody(List<String> bulkSections) { | ||
logger.info(bulkSections.size() + " documents in current bulk request"); | ||
StringBuilder builder = new StringBuilder(); | ||
for (String section : bulkSections) { | ||
builder.append(section).append("\n"); | ||
} | ||
return builder.toString(); | ||
} | ||
|
||
// Send the request | ||
RestClient client = new RestClient(targetConnection); | ||
client.put(path, body, false); | ||
private static Mono<Void> sendBulkRequest(HttpClient client, String url, String bulkJson) { | ||
return client.post() | ||
.uri(url) | ||
.send(Flux.just(Unpooled.wrappedBuffer(bulkJson.getBytes()))) | ||
.responseSingle((res, content) -> | ||
content.asString() // Convert the response content to a string | ||
.map(body -> new BulkResponseDetails(res.status().code(), body)) // Map both status code and body into a response details object | ||
) | ||
.flatMap(responseDetails -> { | ||
// Something bad happened with our request, log it | ||
if (responseDetails.hasBadStatusCode()) { | ||
logger.error(responseDetails.getFailureMessage()); | ||
} | ||
// Some of the bulk operations failed, log it | ||
else if (responseDetails.hasFailedOperations()) { | ||
logger.error(responseDetails.getFailureMessage()); | ||
} | ||
return Mono.just(responseDetails); | ||
}) | ||
.doOnError(err -> { | ||
// We weren't even able to complete the request, log it | ||
logger.error("Bulk request failed", err); | ||
}) | ||
.then(); | ||
} | ||
|
||
public static void refreshAllDocuments(ConnectionDetails targetConnection) throws Exception { | ||
// Send the request | ||
RestClient client = new RestClient(targetConnection); | ||
client.get("_refresh", false); | ||
} | ||
|
||
static class BulkResponseDetails { | ||
public final int statusCode; | ||
public final String body; | ||
|
||
BulkResponseDetails(int statusCode, String body) { | ||
this.statusCode = statusCode; | ||
this.body = body; | ||
} | ||
|
||
public boolean hasBadStatusCode() { | ||
return !(statusCode == 200 || statusCode == 201); | ||
} | ||
|
||
public boolean hasFailedOperations() { | ||
return body.contains("\"errors\":true"); | ||
} | ||
|
||
public String getFailureMessage() { | ||
String failureMessage; | ||
if (hasBadStatusCode()) { | ||
failureMessage = "Bulk request failed. Status code: " + statusCode + ", Response body: " + body; | ||
} else { | ||
failureMessage = "Bulk request succeeded, but some operations failed. Response body: " + body; | ||
} | ||
|
||
return failureMessage; | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.