-
Notifications
You must be signed in to change notification settings - Fork 30
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Modify Jenkins Full E2E Integ Test to perform Transformations #1182
base: main
Are you sure you want to change the base?
Changes from 15 commits
569a292
413136f
e171f0f
90cee67
d0d91a3
79fe4bd
79b7af0
01a7d7c
b2cc0e1
fae7986
a6f51ef
77c9577
d3f0d21
552d7f7
c5fba6c
b189282
ecf71dc
0e2f453
34c967a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,282 @@ | ||
package org.opensearch.migrations.bulkload; | ||
|
||
import java.io.File; | ||
import java.nio.file.Files; | ||
import java.nio.file.Path; | ||
import java.util.Arrays; | ||
import java.util.HashMap; | ||
import java.util.Map; | ||
import java.util.concurrent.CompletableFuture; | ||
import java.util.concurrent.TimeUnit; | ||
import java.util.function.Function; | ||
|
||
import org.opensearch.migrations.CreateSnapshot; | ||
import org.opensearch.migrations.bulkload.common.RestClient; | ||
import org.opensearch.migrations.bulkload.common.http.ConnectionContextTestParams; | ||
import org.opensearch.migrations.bulkload.framework.SearchClusterContainer; | ||
import org.opensearch.migrations.bulkload.http.ClusterOperations; | ||
import org.opensearch.migrations.bulkload.http.SearchClusterRequests; | ||
import org.opensearch.migrations.reindexer.tracing.DocumentMigrationTestContext; | ||
import org.opensearch.migrations.snapshot.creation.tracing.SnapshotTestContext; | ||
|
||
import lombok.AllArgsConstructor; | ||
import lombok.Getter; | ||
import lombok.SneakyThrows; | ||
import lombok.extern.slf4j.Slf4j; | ||
import org.hamcrest.MatcherAssert; | ||
import org.hamcrest.Matchers; | ||
import org.jetbrains.annotations.NotNull; | ||
import org.json.JSONArray; | ||
import org.json.JSONObject; | ||
import org.junit.jupiter.api.Assertions; | ||
import org.junit.jupiter.api.Tag; | ||
import org.junit.jupiter.api.Test; | ||
import org.testcontainers.containers.Network; | ||
|
||
@Slf4j | ||
@Tag("isolatedTest") | ||
public class CustomTransformationTest extends SourceTestBase { | ||
|
||
public static final String TARGET_DOCKER_HOSTNAME = "target"; | ||
public static final String SNAPSHOT_NAME = "test_snapshot"; | ||
|
||
@AllArgsConstructor | ||
@Getter | ||
private static class RunData { | ||
Path tempDirSnapshot; | ||
Path tempDirLucene; | ||
SearchClusterContainer targetContainer; | ||
} | ||
|
||
@Test | ||
public void testCustomTransformationProducesDesiredTargetClusterState() { | ||
String nameTransformation = createIndexNameTransformation("geonames", "geonames_transformed"); | ||
var expectedSourceMap = new HashMap<String, Integer>(); | ||
expectedSourceMap.put("geonames", 1); | ||
var expectedTargetMap = new HashMap<String, Integer>(); | ||
expectedTargetMap.put("geonames_transformed", 1); | ||
// 2 Shards, for each shard, expect three status code 2 and one status code 0 | ||
int shards = 2; | ||
int migrationProcessesPerShard = 4; | ||
int continueExitCode = 2; | ||
int finalExitCodePerShard = 0; | ||
runTestProcessWithCheckpoint(continueExitCode, (migrationProcessesPerShard - 1) * shards, | ||
finalExitCodePerShard, shards, expectedSourceMap, expectedTargetMap, | ||
d -> runProcessAgainstTarget(d.tempDirSnapshot, d.tempDirLucene, d.targetContainer, nameTransformation | ||
)); | ||
} | ||
|
||
@SneakyThrows | ||
private void runTestProcessWithCheckpoint(int initialExitCode, int initialExitCodes, | ||
int eventualExitCode, int eventualExitCodeCount, | ||
Map<String, Integer> expectedSourceDocs, | ||
Map<String, Integer> expectedTargetDocs, | ||
Function<RunData, Integer> processRunner) { | ||
final var testSnapshotContext = SnapshotTestContext.factory().noOtelTracking(); | ||
|
||
var tempDirSnapshot = Files.createTempDirectory("opensearchMigrationReindexFromSnapshot_test_snapshot"); | ||
var tempDirLucene = Files.createTempDirectory("opensearchMigrationReindexFromSnapshot_test_lucene"); | ||
|
||
try ( | ||
var esSourceContainer = new SearchClusterContainer(SearchClusterContainer.ES_V7_10_2) | ||
.withAccessToHost(true); | ||
var network = Network.newNetwork(); | ||
var osTargetContainer = new SearchClusterContainer(SearchClusterContainer.OS_V2_14_0) | ||
.withAccessToHost(true) | ||
.withNetwork(network) | ||
.withNetworkAliases(TARGET_DOCKER_HOSTNAME); | ||
) { | ||
CompletableFuture.allOf( | ||
CompletableFuture.runAsync(esSourceContainer::start), | ||
CompletableFuture.runAsync(osTargetContainer::start) | ||
).join(); | ||
|
||
var sourceClusterOperations = new ClusterOperations(esSourceContainer.getUrl()); | ||
|
||
var shards = 2; | ||
// Number of default shards is different across different versions on ES/OS. | ||
// So we explicitly set it. | ||
String body = String.format( | ||
"{" + | ||
" \"settings\": {" + | ||
" \"index\": {" + | ||
" \"number_of_shards\": %d," + | ||
" \"number_of_replicas\": 0" + | ||
" }" + | ||
" }" + | ||
"}", | ||
shards | ||
); | ||
sourceClusterOperations.createIndex("geonames", body); | ||
sourceClusterOperations.createDocument("geonames", "111", "{\"author\":\"Tobias Funke\", \"category\": \"cooking\"}"); | ||
|
||
// Create the snapshot from the source cluster | ||
var args = new CreateSnapshot.Args(); | ||
args.snapshotName = SNAPSHOT_NAME; | ||
args.fileSystemRepoPath = SearchClusterContainer.CLUSTER_SNAPSHOT_DIR; | ||
args.sourceArgs.host = esSourceContainer.getUrl(); | ||
|
||
var snapshotCreator = new CreateSnapshot(args, testSnapshotContext.createSnapshotCreateContext()); | ||
snapshotCreator.run(); | ||
|
||
esSourceContainer.copySnapshotData(tempDirSnapshot.toString()); | ||
|
||
int exitCode; | ||
int finalExitCodeCount = 0; | ||
int runs = 0; | ||
do { | ||
exitCode = processRunner.apply(new RunData(tempDirSnapshot, tempDirLucene, osTargetContainer)); | ||
runs++; | ||
if (exitCode == eventualExitCode) { | ||
finalExitCodeCount++; | ||
} | ||
log.atInfo().setMessage("Process exited with code: {}").addArgument(exitCode).log(); | ||
// Clean tree for subsequent run | ||
deleteTree(tempDirLucene); | ||
} while (finalExitCodeCount < eventualExitCodeCount && runs < initialExitCodes * 2); | ||
|
||
// Assert doc count on the source and target cluster match expected | ||
validateFinalClusterDocs( | ||
esSourceContainer, | ||
osTargetContainer, | ||
DocumentMigrationTestContext.factory().noOtelTracking(), | ||
expectedSourceDocs, | ||
expectedTargetDocs | ||
); | ||
} finally { | ||
deleteTree(tempDirSnapshot); | ||
} | ||
} | ||
|
||
private static String createIndexNameTransformation(String existingIndexName, String newIndexName) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you please add a comment explaining what this transform is supposed to accomplish? The method name is a good start, but, frankly, Jolt is inscrutable. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added |
||
JSONArray rootArray = new JSONArray(); | ||
JSONObject firstObject = new JSONObject(); | ||
JSONArray jsonConditionalTransformerProvider = new JSONArray(); | ||
|
||
// JsonJMESPathPredicateProvider object | ||
JSONObject jsonJMESPathPredicateProvider = new JSONObject(); | ||
jsonJMESPathPredicateProvider.put("script", String.format("index._index == '%s'", existingIndexName)); | ||
JSONObject jsonJMESPathPredicateWrapper = new JSONObject(); | ||
jsonJMESPathPredicateWrapper.put("JsonJMESPathPredicateProvider", jsonJMESPathPredicateProvider); | ||
jsonConditionalTransformerProvider.put(jsonJMESPathPredicateWrapper); | ||
|
||
JSONArray transformerList = new JSONArray(); | ||
|
||
// First JsonJoltTransformerProvider | ||
JSONObject firstJoltTransformer = new JSONObject(); | ||
JSONObject firstJoltScript = new JSONObject(); | ||
firstJoltScript.put("operation", "modify-overwrite-beta"); | ||
firstJoltScript.put("spec", new JSONObject().put("index", new JSONObject().put("\\_index", newIndexName))); | ||
firstJoltTransformer.put("JsonJoltTransformerProvider", new JSONObject().put("script", firstJoltScript)); | ||
transformerList.put(firstJoltTransformer); | ||
|
||
jsonConditionalTransformerProvider.put(transformerList); | ||
firstObject.put("JsonConditionalTransformerProvider", jsonConditionalTransformerProvider); | ||
rootArray.put(firstObject); | ||
return rootArray.toString(); | ||
} | ||
|
||
@SneakyThrows | ||
private static int runProcessAgainstTarget( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It looks like this was basically copied from the ProcessLifecycleTest. Can you please find a way to share/re-use rather than copy? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've done a good amount of refactoring so this should be different, with some shared code move into parent class |
||
Path tempDirSnapshot, | ||
Path tempDirLucene, | ||
SearchClusterContainer targetContainer, | ||
String transformations | ||
) | ||
{ | ||
String targetAddress = targetContainer.getUrl(); | ||
|
||
int timeoutSeconds = 30; | ||
ProcessBuilder processBuilder = setupProcess(tempDirSnapshot, tempDirLucene, targetAddress, transformations); | ||
|
||
var process = runAndMonitorProcess(processBuilder); | ||
boolean finished = process.waitFor(timeoutSeconds, TimeUnit.SECONDS); | ||
if (!finished) { | ||
log.atError().setMessage("Process timed out, attempting to kill it...").log(); | ||
process.destroy(); // Try to be nice about things first... | ||
if (!process.waitFor(10, TimeUnit.SECONDS)) { | ||
log.atError().setMessage("Process still running, attempting to force kill it...").log(); | ||
process.destroyForcibly(); | ||
} | ||
Assertions.fail("The process did not finish within the timeout period (" + timeoutSeconds + " seconds)."); | ||
} | ||
|
||
return process.exitValue(); | ||
} | ||
|
||
|
||
@NotNull | ||
private static ProcessBuilder setupProcess( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It looks like this was basically copied from the ProcessLifecycleTest. Can you please find a way to share/re-use rather than copy? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
Path tempDirSnapshot, | ||
Path tempDirLucene, | ||
String targetAddress, | ||
String transformations | ||
) { | ||
String classpath = System.getProperty("java.class.path"); | ||
String javaHome = System.getProperty("java.home"); | ||
String javaExecutable = javaHome + File.separator + "bin" + File.separator + "java"; | ||
|
||
String[] args = { | ||
"--snapshot-name", | ||
SNAPSHOT_NAME, | ||
"--snapshot-local-dir", | ||
tempDirSnapshot.toString(), | ||
"--lucene-dir", | ||
tempDirLucene.toString(), | ||
"--target-host", | ||
targetAddress, | ||
"--documents-per-bulk-request", | ||
"5", | ||
"--max-connections", | ||
"4", | ||
"--source-version", | ||
"ES_7_10", | ||
"--doc-transformer-config", | ||
transformations, | ||
}; | ||
|
||
// Kick off the doc migration process | ||
log.atInfo().setMessage("Running RfsMigrateDocuments with args: {}") | ||
.addArgument(() -> Arrays.toString(args)) | ||
.log(); | ||
ProcessBuilder processBuilder = new ProcessBuilder( | ||
javaExecutable, | ||
|
||
"-cp", | ||
classpath, | ||
"org.opensearch.migrations.RfsMigrateDocuments" | ||
); | ||
processBuilder.command().addAll(Arrays.asList(args)); | ||
processBuilder.redirectErrorStream(true); | ||
processBuilder.redirectOutput(); | ||
return processBuilder; | ||
} | ||
|
||
private static void validateFinalClusterDocs( | ||
SearchClusterContainer esSourceContainer, | ||
SearchClusterContainer osTargetContainer, | ||
DocumentMigrationTestContext context, | ||
Map<String, Integer> expectedSourceDocs, | ||
Map<String, Integer> expectedTargetDocs | ||
) { | ||
var targetClient = new RestClient(ConnectionContextTestParams.builder() | ||
.host(osTargetContainer.getUrl()) | ||
.build() | ||
.toConnectionContext() | ||
); | ||
var sourceClient = new RestClient(ConnectionContextTestParams.builder() | ||
.host(esSourceContainer.getUrl()) | ||
.build() | ||
.toConnectionContext() | ||
); | ||
|
||
var requests = new SearchClusterRequests(context); | ||
var sourceMap = requests.getMapOfIndexAndDocCount(sourceClient); | ||
var refreshResponse = targetClient.get("_refresh", context.createUnboundRequestContext()); | ||
Assertions.assertEquals(200, refreshResponse.statusCode); | ||
var targetMap = requests.getMapOfIndexAndDocCount(targetClient); | ||
|
||
MatcherAssert.assertThat(sourceMap, Matchers.equalTo(expectedSourceDocs)); | ||
MatcherAssert.assertThat(targetMap, Matchers.equalTo(expectedTargetDocs)); | ||
} | ||
|
||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Non-blocking: why define this shard count again? Maybe you can use a class-level member instead of redefining.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should be removed now, I've done a good amount of refactoring to remove extra logic for this test class that isn't needed