Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding subshard work items on lease expiry #1198

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
Open
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,17 @@

/** Options index configuration */
public class IndexOptions {
public static final String PROP_NUMBER_OF_SHARDS = "index.number_of_shards";
public static final String PROP_NUMBER_OF_REPLICAS = "index.number_of_replicas";
public static final String PROP_QUERIES_CACHE_ENABLED = "index.queries.cache.enabled";
public static final String PROP_REQUESTS_CACHE_ENABLED = "index.requests.cache.enable";

private static final ObjectMapper mapper = new ObjectMapper();

/** Improvement to add more flexibility with these values */
public final ObjectNode indexSettings = mapper.createObjectNode()
.put("index.number_of_shards", 5)
.put("index.number_of_replicas", 0)
.put("index.queries.cache.enabled", false)
.put("index.requests.cache.enable", false);
.put(PROP_NUMBER_OF_SHARDS, 5)
.put(PROP_NUMBER_OF_REPLICAS, 0)
.put(PROP_QUERIES_CACHE_ENABLED, false)
.put(PROP_REQUESTS_CACHE_ENABLED, false);
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ public void generate(WorkloadOptions options) {
// This workload creates ALL documents in memory, schedules them and waits for completion.
// If larger scale is needed remove the toList() calls and stream all data.
var allDocs = new ArrayList<CompletableFuture<?>>();
for (var workload : options.workloads) {
for (var workload : options.getWorkloads()) {
var workloadInstance = workload.getNewInstance().get();
var docs = workloadInstance
.indexNames()
Expand All @@ -43,27 +43,37 @@ public void generate(WorkloadOptions options) {

private List<CompletableFuture<?>> generateDocs(String indexName, Workload workload, WorkloadOptions options) {
// This happens inline to be sure the index exists before docs are indexed on it
var indexRequestDoc = workload.createIndex(options.index.indexSettings.deepCopy());
var indexRequestDoc = workload.createIndex(options.getIndex().indexSettings.deepCopy());
log.atInfo().setMessage("Creating index {} with {}").addArgument(indexName).addArgument(indexRequestDoc).log();
client.createIndex(indexName, indexRequestDoc, null);

var docIdCounter = new AtomicInteger(0);
var allDocs = workload.createDocs(options.totalDocs)
var allDocs = workload.createDocs(options.getTotalDocs())
.map(doc -> {
log.atTrace().setMessage("Created doc for index {}: {}")
.addArgument(indexName)
.addArgument(doc::toString).log();
return new BulkDocSection(indexName + "_" + docIdCounter.incrementAndGet(), indexName, null, doc.toString());
var docId = docIdCounter.incrementAndGet();
return new BulkDocSection(indexName + "_" + docId, indexName, null, doc.toString(), null);
})
.collect(Collectors.toList());

var bulkDocGroups = new ArrayList<List<BulkDocSection>>();
for (int i = 0; i < allDocs.size(); i += options.maxBulkBatchSize) {
bulkDocGroups.add(allDocs.subList(i, Math.min(i + options.maxBulkBatchSize, allDocs.size())));
for (int i = 0; i < allDocs.size(); i += options.getMaxBulkBatchSize()) {
bulkDocGroups.add(allDocs.subList(i, Math.min(i + options.getMaxBulkBatchSize(), allDocs.size())));
}

return bulkDocGroups.stream()
.map(docs -> client.sendBulkRequest(indexName, docs, null).toFuture())
.map(docs -> {
var sendFuture = client.sendBulkRequest(indexName, docs, null).toFuture();
if (options.isRefreshAfterEachWrite()) {
sendFuture.thenRun(() -> client.refresh(null));
// Requests will be sent in parallel unless we wait for completion
// This allows more segments to be created
sendFuture.join();
}
return sendFuture;
})
.collect(Collectors.toList());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,22 @@
import org.opensearch.migrations.data.workloads.Workloads;

import com.beust.jcommander.Parameter;
import lombok.Getter;
import lombok.Setter;

@Getter
@Setter
public class WorkloadOptions {
@Parameter(names = { "--workloads", "-w" }, description = "The list of workloads to run, defaults to all available workloads.", required = false)
public List<Workloads> workloads = Arrays.asList(Workloads.values());
private List<Workloads> workloads = Arrays.asList(Workloads.values());

@Parameter(names = { "--docs-per-workload-count" }, description = "The number of documents per workload")
public int totalDocs = 1000;
private int totalDocs = 1000;

@Parameter(names = { "--max-bulk-request-batch-count" }, description = "The maximum batch count for bulk requests")
public int maxBulkBatchSize = 50;
private int maxBulkBatchSize = 50;

public final IndexOptions index = new IndexOptions();
private final IndexOptions index = new IndexOptions();

private boolean refreshAfterEachWrite = false;
}
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ void generateData(final SearchClusterContainer targetCluster) {
assertThat(refreshResponse.body, refreshResponse.statusCode, equalTo(200));

// Confirm all indexes have the expected number of docs
var defaultCount = arguments.workloadOptions.totalDocs;
var defaultCount = arguments.workloadOptions.getTotalDocs();
var expectedIndexes = Map.of(
"geonames", defaultCount,
"logs-181998", defaultCount,
Expand Down
Loading
Loading