From 1402cedf99a0bae87a573c14a6f7b42a5f846705 Mon Sep 17 00:00:00 2001 From: PritLadani Date: Mon, 19 Dec 2022 22:44:20 +0530 Subject: [PATCH 01/34] Cancellation of in-flight search requests at coordinator level Signed-off-by: PritLadani --- .../common/settings/ClusterSettings.java | 9 +- .../SearchBackpressureService.java | 125 +++++++++++++----- .../settings/SearchBackpressureSettings.java | 6 + .../settings/SearchTaskSettings.java | 62 +++++++++ .../stats/SearchBackpressureStats.java | 16 ++- .../backpressure/stats/SearchTaskStats.java | 100 ++++++++++++++ .../trackers/CpuUsageTracker.java | 47 ++++++- .../trackers/ElapsedTimeTracker.java | 44 +++++- .../trackers/HeapUsageTracker.java | 110 +++++++++++++-- .../trackers/TaskResourceUsageTracker.java | 28 +++- .../SearchBackpressureServiceTests.java | 29 ++-- .../stats/SearchBackpressureStatsTests.java | 1 + .../stats/SearchTaskStatsTests.java | 44 ++++++ .../tasks/TaskCancellationTests.java | 24 ++-- 14 files changed, 565 insertions(+), 80 deletions(-) create mode 100644 server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java create mode 100644 server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java create mode 100644 server/src/test/java/org/opensearch/search/backpressure/stats/SearchTaskStatsTests.java diff --git a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java index 7e9c7bd3123c5..8bf979c05ef30 100644 --- a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java @@ -44,6 +44,7 @@ import org.opensearch.search.backpressure.settings.NodeDuressSettings; import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; import org.opensearch.search.backpressure.settings.SearchShardTaskSettings; +import org.opensearch.search.backpressure.settings.SearchTaskSettings; import org.opensearch.search.backpressure.trackers.CpuUsageTracker; import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; import org.opensearch.search.backpressure.trackers.HeapUsageTracker; @@ -601,11 +602,17 @@ public void apply(Settings value, Settings current, Settings previous) { NodeDuressSettings.SETTING_CPU_THRESHOLD, NodeDuressSettings.SETTING_HEAP_THRESHOLD, SearchShardTaskSettings.SETTING_TOTAL_HEAP_PERCENT_THRESHOLD, + HeapUsageTracker.SETTING_HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY, HeapUsageTracker.SETTING_HEAP_PERCENT_THRESHOLD, + HeapUsageTracker.SETTING_HEAP_VARIANCE_THRESHOLD_FOR_SEARCH_QUERY, HeapUsageTracker.SETTING_HEAP_VARIANCE_THRESHOLD, + HeapUsageTracker.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE_FOR_SEARCH_QUERY, HeapUsageTracker.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE, + CpuUsageTracker.SETTING_CPU_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY, CpuUsageTracker.SETTING_CPU_TIME_MILLIS_THRESHOLD, - ElapsedTimeTracker.SETTING_ELAPSED_TIME_MILLIS_THRESHOLD + ElapsedTimeTracker.SETTING_ELAPSED_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY, + ElapsedTimeTracker.SETTING_ELAPSED_TIME_MILLIS_THRESHOLD, + SearchTaskSettings.SETTING_TOTAL_HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY ) ) ); diff --git a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java index fd13198b957da..2465790176daa 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java +++ b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java @@ -12,6 +12,7 @@ import org.apache.logging.log4j.Logger; import org.opensearch.ExceptionsHelper; import org.opensearch.action.search.SearchShardTask; +import org.opensearch.action.search.SearchTask; import org.opensearch.common.component.AbstractLifecycleComponent; import org.opensearch.common.util.TokenBucket; import org.opensearch.monitor.jvm.JvmStats; @@ -20,6 +21,7 @@ import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; import org.opensearch.search.backpressure.stats.SearchBackpressureStats; import org.opensearch.search.backpressure.stats.SearchShardTaskStats; +import org.opensearch.search.backpressure.stats.SearchTaskStats; import org.opensearch.search.backpressure.trackers.CpuUsageTracker; import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; import org.opensearch.search.backpressure.trackers.HeapUsageTracker; @@ -37,7 +39,9 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Comparator; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.concurrent.atomic.AtomicReference; import java.util.function.LongSupplier; @@ -68,9 +72,12 @@ public class SearchBackpressureService extends AbstractLifecycleComponent private final AtomicReference taskCancellationRateLimiter = new AtomicReference<>(); private final AtomicReference taskCancellationRatioLimiter = new AtomicReference<>(); - // Currently, only the state of SearchShardTask is being tracked. - // This can be generalized to Map once we start supporting cancellation of SearchTasks as well. - private final SearchBackpressureState state = new SearchBackpressureState(); + private final Map, SearchBackpressureState> searchBackpressureStates = new HashMap<>() { + { + put(SearchTask.class, new SearchBackpressureState()); + put(SearchShardTask.class, new SearchBackpressureState()); + } + }; public SearchBackpressureService( SearchBackpressureSettings settings, @@ -116,10 +123,15 @@ public SearchBackpressureService( ); this.taskCancellationRatioLimiter.set( - new TokenBucket(state::getCompletionCount, getSettings().getCancellationRatio(), getSettings().getCancellationBurst()) + new TokenBucket(this::getTaskCompletionCount, getSettings().getCancellationRatio(), getSettings().getCancellationBurst()) ); } + private long getTaskCompletionCount() { + return searchBackpressureStates.get(SearchTask.class).getCompletionCount() + searchBackpressureStates.get(SearchShardTask.class) + .getCompletionCount(); + } + void doRun() { SearchBackpressureMode mode = getSettings().getMode(); if (mode == SearchBackpressureMode.DISABLED) { @@ -130,18 +142,29 @@ void doRun() { return; } - // We are only targeting in-flight cancellation of SearchShardTask for now. - List searchShardTasks = getSearchShardTasks(); + List searchTasks = getSearchTasks(); + List searchShardTasks = getSearchShardTasks(); + List cancellableTasks = new ArrayList<>(); // Force-refresh usage stats of these tasks before making a cancellation decision. + taskResourceTrackingService.refreshResourceStats(searchTasks.toArray(new Task[0])); taskResourceTrackingService.refreshResourceStats(searchShardTasks.toArray(new Task[0])); - // Skip cancellation if the increase in heap usage is not due to search requests. - if (isHeapUsageDominatedBySearch(searchShardTasks) == false) { + // Check if increase in heap usage is due to SearchTasks + if (isHeapUsageDominatedBySearch(searchTasks, getSettings().getSearchTaskSettings().getTotalHeapBytesThreshold())) { + cancellableTasks.addAll(searchTasks); + } + + // Check if increase in heap usage is due to SearchShardTasks + if (isHeapUsageDominatedBySearch(searchShardTasks, getSettings().getSearchShardTaskSettings().getTotalHeapBytesThreshold())) { + cancellableTasks.addAll(searchShardTasks); + } + + if (cancellableTasks.isEmpty()) { return; } - for (TaskCancellation taskCancellation : getTaskCancellations(searchShardTasks)) { + for (TaskCancellation taskCancellation : getTaskCancellations(cancellableTasks)) { logger.debug( "[{} mode] cancelling task [{}] due to high resource consumption [{}]", mode.getName(), @@ -160,7 +183,10 @@ void doRun() { // Stop cancelling tasks if there are no tokens in either of the two token buckets. if (rateLimitReached && ratioLimitReached) { logger.debug("task cancellation limit reached"); - state.incrementLimitReachedCount(); + SearchBackpressureState searchBackpressureState = searchBackpressureStates.get( + (taskCancellation.getTask() instanceof SearchTask) ? SearchTask.class : SearchShardTask.class + ); + searchBackpressureState.incrementLimitReachedCount(); break; } @@ -187,9 +213,8 @@ boolean isNodeInDuress() { /** * Returns true if the increase in heap usage is due to search requests. */ - boolean isHeapUsageDominatedBySearch(List searchShardTasks) { - long usage = searchShardTasks.stream().mapToLong(task -> task.getTotalResourceStats().getMemoryInBytes()).sum(); - long threshold = getSettings().getSearchShardTaskSettings().getTotalHeapBytesThreshold(); + boolean isHeapUsageDominatedBySearch(List cancellableTasks, long threshold) { + long usage = cancellableTasks.stream().mapToLong(task -> task.getTotalResourceStats().getMemoryInBytes()).sum(); if (usage < threshold) { logger.debug("heap usage not dominated by search requests [{}/{}]", usage, threshold); return false; @@ -201,7 +226,7 @@ boolean isHeapUsageDominatedBySearch(List searchShardTasks) { /** * Filters and returns the list of currently running SearchShardTasks. */ - List getSearchShardTasks() { + List getSearchShardTasks() { return taskResourceTrackingService.getResourceAwareTasks() .values() .stream() @@ -210,6 +235,18 @@ List getSearchShardTasks() { .collect(Collectors.toUnmodifiableList()); } + /** + * Filters and returns the list of currently running SearchTasks. + */ + List getSearchTasks() { + return taskResourceTrackingService.getResourceAwareTasks() + .values() + .stream() + .filter(task -> task instanceof SearchTask) + .map(task -> (SearchTask) task) + .collect(Collectors.toUnmodifiableList()); + } + /** * Returns a TaskCancellation wrapper containing the list of reasons (possibly zero), along with an overall * cancellation score for the given task. Cancelling a task with a higher score has better chance of recovering the @@ -222,13 +259,19 @@ TaskCancellation getTaskCancellation(CancellableTask task) { for (TaskResourceUsageTracker tracker : taskResourceUsageTrackers) { Optional reason = tracker.checkAndMaybeGetCancellationReason(task); if (reason.isPresent()) { + if (task instanceof SearchTask) { + callbacks.add(tracker::incrementSearchTaskCancellations); + } else { + callbacks.add(tracker::incrementSearchShardTaskCancellations); + } reasons.add(reason.get()); - callbacks.add(tracker::incrementCancellations); } } - if (task instanceof SearchShardTask) { - callbacks.add(state::incrementCancellationCount); + if (task instanceof SearchTask) { + callbacks.add(searchBackpressureStates.get(SearchTask.class)::incrementCancellationCount); + } else { + callbacks.add(searchBackpressureStates.get(SearchShardTask.class)::incrementCancellationCount); } return new TaskCancellation(task, reasons, callbacks); @@ -249,8 +292,12 @@ SearchBackpressureSettings getSettings() { return settings; } - SearchBackpressureState getState() { - return state; + SearchBackpressureState getSearchTasksState() { + return searchBackpressureStates.get(SearchTask.class); + } + + SearchBackpressureState getSearchShardTasksState() { + return searchBackpressureStates.get(SearchShardTask.class); } @Override @@ -259,19 +306,22 @@ public void onTaskCompleted(Task task) { return; } - if (task instanceof SearchShardTask == false) { + if (task instanceof SearchTask == false && task instanceof SearchShardTask == false) { return; } - SearchShardTask searchShardTask = (SearchShardTask) task; - if (searchShardTask.isCancelled() == false) { - state.incrementCompletionCount(); + CancellableTask cancellableTask = (CancellableTask) task; + SearchBackpressureState searchBackpressureState = searchBackpressureStates.get( + (task instanceof SearchTask) ? SearchTask.class : SearchShardTask.class + ); + if (cancellableTask.isCancelled() == false) { + searchBackpressureState.incrementCompletionCount(); } List exceptions = new ArrayList<>(); for (TaskResourceUsageTracker tracker : taskResourceUsageTrackers) { try { - tracker.update(searchShardTask); + tracker.update(task); } catch (Exception e) { exceptions.add(e); } @@ -282,7 +332,7 @@ public void onTaskCompleted(Task task) { @Override public void onCancellationRatioChanged() { taskCancellationRatioLimiter.set( - new TokenBucket(state::getCompletionCount, getSettings().getCancellationRatio(), getSettings().getCancellationBurst()) + new TokenBucket(this::getTaskCompletionCount, getSettings().getCancellationRatio(), getSettings().getCancellationBurst()) ); } @@ -321,15 +371,30 @@ protected void doStop() { protected void doClose() throws IOException {} public SearchBackpressureStats nodeStats() { - List searchShardTasks = getSearchShardTasks(); + List searchTasks = getSearchTasks(); + List searchShardTasks = getSearchShardTasks(); + + SearchTaskStats searchTaskStats = new SearchTaskStats( + searchBackpressureStates.get(SearchTask.class).getCancellationCount(), + searchBackpressureStates.get(SearchTask.class).getLimitReachedCount(), + taskResourceUsageTrackers.stream() + .collect( + Collectors.toUnmodifiableMap(t -> TaskResourceUsageTrackerType.fromName(t.name()), t -> t.searchTaskStats(searchTasks)) + ) + ); SearchShardTaskStats searchShardTaskStats = new SearchShardTaskStats( - state.getCancellationCount(), - state.getLimitReachedCount(), + searchBackpressureStates.get(SearchShardTask.class).getCancellationCount(), + searchBackpressureStates.get(SearchShardTask.class).getLimitReachedCount(), taskResourceUsageTrackers.stream() - .collect(Collectors.toUnmodifiableMap(t -> TaskResourceUsageTrackerType.fromName(t.name()), t -> t.stats(searchShardTasks))) + .collect( + Collectors.toUnmodifiableMap( + t -> TaskResourceUsageTrackerType.fromName(t.name()), + t -> t.searchShardTaskStats(searchShardTasks) + ) + ) ); - return new SearchBackpressureStats(searchShardTaskStats, getSettings().getMode()); + return new SearchBackpressureStats(searchTaskStats, searchShardTaskStats, getSettings().getMode()); } } diff --git a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchBackpressureSettings.java b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchBackpressureSettings.java index df2c04a730fbc..3906228389729 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchBackpressureSettings.java +++ b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchBackpressureSettings.java @@ -110,12 +110,14 @@ public interface Listener { private final Settings settings; private final ClusterSettings clusterSettings; private final NodeDuressSettings nodeDuressSettings; + private final SearchTaskSettings searchTaskSettings; private final SearchShardTaskSettings searchShardTaskSettings; public SearchBackpressureSettings(Settings settings, ClusterSettings clusterSettings) { this.settings = settings; this.clusterSettings = clusterSettings; this.nodeDuressSettings = new NodeDuressSettings(settings, clusterSettings); + this.searchTaskSettings = new SearchTaskSettings(settings, clusterSettings); this.searchShardTaskSettings = new SearchShardTaskSettings(settings, clusterSettings); interval = new TimeValue(SETTING_INTERVAL_MILLIS.get(settings)); @@ -149,6 +151,10 @@ public NodeDuressSettings getNodeDuressSettings() { return nodeDuressSettings; } + public SearchTaskSettings getSearchTaskSettings() { + return searchTaskSettings; + } + public SearchShardTaskSettings getSearchShardTaskSettings() { return searchShardTaskSettings; } diff --git a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java new file mode 100644 index 0000000000000..f28cdd17a3cff --- /dev/null +++ b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java @@ -0,0 +1,62 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.backpressure.settings; + +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Setting; +import org.opensearch.common.settings.Settings; +import org.opensearch.monitor.jvm.JvmStats; + +/** + * Defines the settings related to the cancellation of SearchTasks. + * + * @opensearch.internal + */ + +public class SearchTaskSettings { + private static final long HEAP_SIZE_BYTES = JvmStats.jvmStats().getMem().getHeapMax().getBytes(); + + private static class Defaults { + private static final double TOTAL_HEAP_PERCENT_THRESHOLD = 0.05; + } + + /** + * Defines the heap usage threshold (in percentage) for the sum of heap usages across all search tasks + * before in-flight cancellation is applied. + */ + private volatile double totalHeapPercentThreshold; + public static final Setting SETTING_TOTAL_HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY = Setting.doubleSetting( + "search_backpressure.search_task.total_heap_percent_threshold", + Defaults.TOTAL_HEAP_PERCENT_THRESHOLD, + 0.0, + 1.0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + public SearchTaskSettings(Settings settings, ClusterSettings clusterSettings) { + totalHeapPercentThreshold = SETTING_TOTAL_HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY.get(settings); + clusterSettings.addSettingsUpdateConsumer( + SETTING_TOTAL_HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY, + this::setTotalHeapPercentThreshold + ); + } + + public double getTotalHeapPercentThreshold() { + return totalHeapPercentThreshold; + } + + public long getTotalHeapBytesThreshold() { + return (long) (HEAP_SIZE_BYTES * getTotalHeapPercentThreshold()); + } + + private void setTotalHeapPercentThreshold(double totalHeapPercentThreshold) { + this.totalHeapPercentThreshold = totalHeapPercentThreshold; + } +} diff --git a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java index 3aec0dfc579c5..92a52b62477f2 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java +++ b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java @@ -22,21 +22,28 @@ * Stats related to search backpressure. */ public class SearchBackpressureStats implements ToXContentFragment, Writeable { + private final SearchTaskStats searchTaskStats; private final SearchShardTaskStats searchShardTaskStats; private final SearchBackpressureMode mode; - public SearchBackpressureStats(SearchShardTaskStats searchShardTaskStats, SearchBackpressureMode mode) { + public SearchBackpressureStats( + SearchTaskStats searchTaskStats, + SearchShardTaskStats searchShardTaskStats, + SearchBackpressureMode mode + ) { + this.searchTaskStats = searchTaskStats; this.searchShardTaskStats = searchShardTaskStats; this.mode = mode; } public SearchBackpressureStats(StreamInput in) throws IOException { - this(new SearchShardTaskStats(in), SearchBackpressureMode.fromName(in.readString())); + this(new SearchTaskStats(in), new SearchShardTaskStats(in), SearchBackpressureMode.fromName(in.readString())); } @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { return builder.startObject("search_backpressure") + .field("search_task", searchTaskStats) .field("search_shard_task", searchShardTaskStats) .field("mode", mode.getName()) .endObject(); @@ -44,6 +51,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws @Override public void writeTo(StreamOutput out) throws IOException { + searchTaskStats.writeTo(out); searchShardTaskStats.writeTo(out); out.writeString(mode.getName()); } @@ -53,11 +61,11 @@ public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; SearchBackpressureStats that = (SearchBackpressureStats) o; - return searchShardTaskStats.equals(that.searchShardTaskStats) && mode == that.mode; + return searchTaskStats.equals(that.searchTaskStats) && searchShardTaskStats.equals(that.searchShardTaskStats) && mode == that.mode; } @Override public int hashCode() { - return Objects.hash(searchShardTaskStats, mode); + return Objects.hash(searchTaskStats, searchShardTaskStats, mode); } } diff --git a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java new file mode 100644 index 0000000000000..87318a60b46fd --- /dev/null +++ b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java @@ -0,0 +1,100 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.backpressure.stats; + +import org.opensearch.common.collect.MapBuilder; +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.common.io.stream.StreamOutput; +import org.opensearch.common.io.stream.Writeable; +import org.opensearch.common.xcontent.ToXContent; +import org.opensearch.common.xcontent.ToXContentObject; +import org.opensearch.common.xcontent.XContentBuilder; +import org.opensearch.search.backpressure.trackers.CpuUsageTracker; +import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; +import org.opensearch.search.backpressure.trackers.HeapUsageTracker; +import org.opensearch.search.backpressure.trackers.TaskResourceUsageTracker; +import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType; + +import java.io.IOException; +import java.util.Map; +import java.util.Objects; + +/** + * Stats related to cancelled search tasks. + */ + +public class SearchTaskStats implements ToXContentObject, Writeable { + private final long cancellationCount; + private final long limitReachedCount; + private final Map resourceUsageTrackerStats; + + public SearchTaskStats( + long cancellationCount, + long limitReachedCount, + Map resourceUsageTrackerStats + ) { + this.cancellationCount = cancellationCount; + this.limitReachedCount = limitReachedCount; + this.resourceUsageTrackerStats = resourceUsageTrackerStats; + } + + public SearchTaskStats(StreamInput in) throws IOException { + this.cancellationCount = in.readVLong(); + this.limitReachedCount = in.readVLong(); + + MapBuilder builder = new MapBuilder<>(); + builder.put(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, in.readOptionalWriteable(CpuUsageTracker.Stats::new)); + builder.put(TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER, in.readOptionalWriteable(HeapUsageTracker.Stats::new)); + builder.put(TaskResourceUsageTrackerType.ELAPSED_TIME_TRACKER, in.readOptionalWriteable(ElapsedTimeTracker.Stats::new)); + this.resourceUsageTrackerStats = builder.immutableMap(); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params params) throws IOException { + builder.startObject(); + + builder.startObject("resource_tracker_stats"); + for (Map.Entry entry : resourceUsageTrackerStats.entrySet()) { + builder.field(entry.getKey().getName(), entry.getValue()); + } + builder.endObject(); + + builder.startObject("cancellation_stats") + .field("cancellation_count", cancellationCount) + .field("cancellation_limit_reached_count", limitReachedCount) + .endObject(); + + return builder.endObject(); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeVLong(cancellationCount); + out.writeVLong(limitReachedCount); + + out.writeOptionalWriteable(resourceUsageTrackerStats.get(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER)); + out.writeOptionalWriteable(resourceUsageTrackerStats.get(TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER)); + out.writeOptionalWriteable(resourceUsageTrackerStats.get(TaskResourceUsageTrackerType.ELAPSED_TIME_TRACKER)); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + SearchTaskStats that = (SearchTaskStats) o; + return cancellationCount == that.cancellationCount + && limitReachedCount == that.limitReachedCount + && resourceUsageTrackerStats.equals(that.resourceUsageTrackerStats); + } + + @Override + public int hashCode() { + return Objects.hash(cancellationCount, limitReachedCount, resourceUsageTrackerStats); + } +} diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/CpuUsageTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/CpuUsageTracker.java index 21bb3af32ae08..1e332eca2649c 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/CpuUsageTracker.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/CpuUsageTracker.java @@ -8,6 +8,7 @@ package org.opensearch.search.backpressure.trackers; +import org.opensearch.action.search.SearchTask; import org.opensearch.common.settings.Setting; import org.opensearch.common.io.stream.StreamInput; import org.opensearch.common.io.stream.StreamOutput; @@ -32,11 +33,24 @@ */ public class CpuUsageTracker extends TaskResourceUsageTracker { private static class Defaults { + private static final long CPU_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY = 60000; private static final long CPU_TIME_MILLIS_THRESHOLD = 15000; } /** - * Defines the CPU usage threshold (in millis) for an individual task before it is considered for cancellation. + * Defines the CPU usage threshold (in millis) for an individual search task before it is considered for cancellation. + */ + private volatile long cpuTimeMillisThresholdForSearchQuery; + public static final Setting SETTING_CPU_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY = Setting.longSetting( + "search_backpressure.search_task.cpu_time_millis_threshold_for_search_query", + Defaults.CPU_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY, + 0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Defines the CPU usage threshold (in millis) for an individual search shard task before it is considered for cancellation. */ private volatile long cpuTimeMillisThreshold; public static final Setting SETTING_CPU_TIME_MILLIS_THRESHOLD = Setting.longSetting( @@ -48,7 +62,10 @@ private static class Defaults { ); public CpuUsageTracker(SearchBackpressureSettings settings) { + this.cpuTimeMillisThresholdForSearchQuery = SETTING_CPU_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY.get(settings.getSettings()); this.cpuTimeMillisThreshold = SETTING_CPU_TIME_MILLIS_THRESHOLD.get(settings.getSettings()); + settings.getClusterSettings() + .addSettingsUpdateConsumer(SETTING_CPU_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY, this::setCpuTimeMillisThresholdForSearchQuery); settings.getClusterSettings().addSettingsUpdateConsumer(SETTING_CPU_TIME_MILLIS_THRESHOLD, this::setCpuTimeMillisThreshold); } @@ -60,7 +77,7 @@ public String name() { @Override public Optional checkAndMaybeGetCancellationReason(Task task) { long usage = task.getTotalResourceStats().getCpuTimeInNanos(); - long threshold = getCpuTimeNanosThreshold(); + long threshold = (task instanceof SearchTask) ? getCpuTimeNanosThresholdForSearchQuery() : getCpuTimeNanosThreshold(); if (usage < threshold) { return Optional.empty(); @@ -78,19 +95,37 @@ public Optional checkAndMaybeGetCancellationReason(Task ); } + public long getCpuTimeNanosThresholdForSearchQuery() { + return TimeUnit.MILLISECONDS.toNanos(cpuTimeMillisThresholdForSearchQuery); + } + public long getCpuTimeNanosThreshold() { return TimeUnit.MILLISECONDS.toNanos(cpuTimeMillisThreshold); } + public void setCpuTimeMillisThresholdForSearchQuery(long cpuTimeMillisThresholdForSearchQuery) { + this.cpuTimeMillisThresholdForSearchQuery = cpuTimeMillisThresholdForSearchQuery; + } + public void setCpuTimeMillisThreshold(long cpuTimeMillisThreshold) { this.cpuTimeMillisThreshold = cpuTimeMillisThreshold; } @Override - public TaskResourceUsageTracker.Stats stats(List activeTasks) { - long currentMax = activeTasks.stream().mapToLong(t -> t.getTotalResourceStats().getCpuTimeInNanos()).max().orElse(0); - long currentAvg = (long) activeTasks.stream().mapToLong(t -> t.getTotalResourceStats().getCpuTimeInNanos()).average().orElse(0); - return new Stats(getCancellations(), currentMax, currentAvg); + public TaskResourceUsageTracker.Stats searchTaskStats(List searchTasks) { + long currentMax = searchTasks.stream().mapToLong(t -> t.getTotalResourceStats().getCpuTimeInNanos()).max().orElse(0); + long currentAvg = (long) searchTasks.stream().mapToLong(t -> t.getTotalResourceStats().getCpuTimeInNanos()).average().orElse(0); + return new Stats(getSearchTaskCancellationCount(), currentMax, currentAvg); + } + + @Override + public TaskResourceUsageTracker.Stats searchShardTaskStats(List searchShardTasks) { + long currentMax = searchShardTasks.stream().mapToLong(t -> t.getTotalResourceStats().getCpuTimeInNanos()).max().orElse(0); + long currentAvg = (long) searchShardTasks.stream() + .mapToLong(t -> t.getTotalResourceStats().getCpuTimeInNanos()) + .average() + .orElse(0); + return new Stats(getSearchShardTaskCancellationCount(), currentMax, currentAvg); } /** diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTracker.java index 10e53e2bce5ae..eba8c4ee7afd8 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTracker.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTracker.java @@ -8,6 +8,7 @@ package org.opensearch.search.backpressure.trackers; +import org.opensearch.action.search.SearchTask; import org.opensearch.common.settings.Setting; import org.opensearch.common.io.stream.StreamInput; import org.opensearch.common.io.stream.StreamOutput; @@ -33,11 +34,24 @@ */ public class ElapsedTimeTracker extends TaskResourceUsageTracker { private static class Defaults { + private static final long ELAPSED_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY = 120000; private static final long ELAPSED_TIME_MILLIS_THRESHOLD = 30000; } /** - * Defines the elapsed time threshold (in millis) for an individual task before it is considered for cancellation. + * Defines the elapsed time threshold (in millis) for an individual search task before it is considered for cancellation. + */ + private volatile long elapsedTimeMillisThresholdForSearchQuery; + public static final Setting SETTING_ELAPSED_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY = Setting.longSetting( + "search_backpressure.search_task.elapsed_time_millis_threshold_for_search_query", + Defaults.ELAPSED_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY, + 0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Defines the elapsed time threshold (in millis) for an individual search shard task before it is considered for cancellation. */ private volatile long elapsedTimeMillisThreshold; public static final Setting SETTING_ELAPSED_TIME_MILLIS_THRESHOLD = Setting.longSetting( @@ -52,7 +66,13 @@ private static class Defaults { public ElapsedTimeTracker(SearchBackpressureSettings settings, LongSupplier timeNanosSupplier) { this.timeNanosSupplier = timeNanosSupplier; + this.elapsedTimeMillisThresholdForSearchQuery = SETTING_ELAPSED_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY.get(settings.getSettings()); this.elapsedTimeMillisThreshold = SETTING_ELAPSED_TIME_MILLIS_THRESHOLD.get(settings.getSettings()); + settings.getClusterSettings() + .addSettingsUpdateConsumer( + SETTING_ELAPSED_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY, + this::setElapsedTimeMillisThresholdForSearchQuery + ); settings.getClusterSettings().addSettingsUpdateConsumer(SETTING_ELAPSED_TIME_MILLIS_THRESHOLD, this::setElapsedTimeMillisThreshold); } @@ -64,7 +84,7 @@ public String name() { @Override public Optional checkAndMaybeGetCancellationReason(Task task) { long usage = timeNanosSupplier.getAsLong() - task.getStartTimeNanos(); - long threshold = getElapsedTimeNanosThreshold(); + long threshold = (task instanceof SearchTask) ? getElapsedTimeNanosThresholdForSearchQuery() : getElapsedTimeNanosThreshold(); if (usage < threshold) { return Optional.empty(); @@ -82,20 +102,36 @@ public Optional checkAndMaybeGetCancellationReason(Task ); } + public long getElapsedTimeNanosThresholdForSearchQuery() { + return TimeUnit.MILLISECONDS.toNanos(elapsedTimeMillisThresholdForSearchQuery); + } + public long getElapsedTimeNanosThreshold() { return TimeUnit.MILLISECONDS.toNanos(elapsedTimeMillisThreshold); } + public void setElapsedTimeMillisThresholdForSearchQuery(long elapsedTimeMillisThresholdForSearchQuery) { + this.elapsedTimeMillisThresholdForSearchQuery = elapsedTimeMillisThresholdForSearchQuery; + } + public void setElapsedTimeMillisThreshold(long elapsedTimeMillisThreshold) { this.elapsedTimeMillisThreshold = elapsedTimeMillisThreshold; } @Override - public TaskResourceUsageTracker.Stats stats(List activeTasks) { + public TaskResourceUsageTracker.Stats searchTaskStats(List activeTasks) { + long now = timeNanosSupplier.getAsLong(); + long currentMax = activeTasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).max().orElse(0); + long currentAvg = (long) activeTasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).average().orElse(0); + return new Stats(getSearchTaskCancellationCount(), currentMax, currentAvg); + } + + @Override + public TaskResourceUsageTracker.Stats searchShardTaskStats(List activeTasks) { long now = timeNanosSupplier.getAsLong(); long currentMax = activeTasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).max().orElse(0); long currentAvg = (long) activeTasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).average().orElse(0); - return new Stats(getCancellations(), currentMax, currentAvg); + return new Stats(getSearchShardTaskCancellationCount(), currentMax, currentAvg); } /** diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java index d1a264609e522..31f62055dbfc5 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java @@ -8,6 +8,7 @@ package org.opensearch.search.backpressure.trackers; +import org.opensearch.action.search.SearchTask; import org.opensearch.common.settings.Setting; import org.opensearch.common.io.stream.StreamInput; import org.opensearch.common.io.stream.StreamOutput; @@ -37,13 +38,29 @@ public class HeapUsageTracker extends TaskResourceUsageTracker { private static final long HEAP_SIZE_BYTES = JvmStats.jvmStats().getMem().getHeapMax().getBytes(); private static class Defaults { + private static final double HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY = 0.02; private static final double HEAP_PERCENT_THRESHOLD = 0.005; + private static final double HEAP_VARIANCE_THRESHOLD_FOR_SEARCH_QUERY = 2.0; private static final double HEAP_VARIANCE_THRESHOLD = 2.0; + private static final int HEAP_MOVING_AVERAGE_WINDOW_SIZE_FOR_SEARCH_QUERY = 100; private static final int HEAP_MOVING_AVERAGE_WINDOW_SIZE = 100; } /** - * Defines the heap usage threshold (in percentage) for an individual task before it is considered for cancellation. + * Defines the heap usage threshold (in percentage) for an individual search task before it is considered for cancellation. + */ + private volatile double heapPercentThresholdForSearchQuery; + public static final Setting SETTING_HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY = Setting.doubleSetting( + "search_backpressure.search_task.heap_percent_threshold_for_search_query", + Defaults.HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY, + 0.0, + 1.0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Defines the heap usage threshold (in percentage) for an individual search shard task before it is considered for cancellation. */ private volatile double heapPercentThreshold; public static final Setting SETTING_HEAP_PERCENT_THRESHOLD = Setting.doubleSetting( @@ -56,7 +73,20 @@ private static class Defaults { ); /** - * Defines the heap usage variance for an individual task before it is considered for cancellation. + * Defines the heap usage variance for an individual search task before it is considered for cancellation. + * A task is considered for cancellation when taskHeapUsage is greater than or equal to heapUsageMovingAverage * variance. + */ + private volatile double heapVarianceThresholdForSearchQuery; + public static final Setting SETTING_HEAP_VARIANCE_THRESHOLD_FOR_SEARCH_QUERY = Setting.doubleSetting( + "search_backpressure.search_task.heap_variance_for_search_query", + Defaults.HEAP_VARIANCE_THRESHOLD_FOR_SEARCH_QUERY, + 0.0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Defines the heap usage variance for an individual search shard task before it is considered for cancellation. * A task is considered for cancellation when taskHeapUsage is greater than or equal to heapUsageMovingAverage * variance. */ private volatile double heapVarianceThreshold; @@ -69,7 +99,19 @@ private static class Defaults { ); /** - * Defines the window size to calculate the moving average of heap usage of completed tasks. + * Defines the window size to calculate the moving average of heap usage of completed search tasks. + */ + private volatile int heapMovingAverageWindowSizeForSearchQuery; + public static final Setting SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE_FOR_SEARCH_QUERY = Setting.intSetting( + "search_backpressure.search_task.heap_moving_average_window_size_for_search_query", + Defaults.HEAP_MOVING_AVERAGE_WINDOW_SIZE_FOR_SEARCH_QUERY, + 0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Defines the window size to calculate the moving average of heap usage of completed search shard tasks. */ private volatile int heapMovingAverageWindowSize; public static final Setting SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE = Setting.intSetting( @@ -80,19 +122,33 @@ private static class Defaults { Setting.Property.NodeScope ); + private final AtomicReference movingAverageReferenceForSearchQuery; private final AtomicReference movingAverageReference; public HeapUsageTracker(SearchBackpressureSettings settings) { + heapPercentThresholdForSearchQuery = SETTING_HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY.get(settings.getSettings()); + settings.getClusterSettings() + .addSettingsUpdateConsumer(SETTING_HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY, this::setHeapPercentThresholdForSearchQuery); heapPercentThreshold = SETTING_HEAP_PERCENT_THRESHOLD.get(settings.getSettings()); settings.getClusterSettings().addSettingsUpdateConsumer(SETTING_HEAP_PERCENT_THRESHOLD, this::setHeapPercentThreshold); + heapPercentThresholdForSearchQuery = SETTING_HEAP_VARIANCE_THRESHOLD_FOR_SEARCH_QUERY.get(settings.getSettings()); + settings.getClusterSettings() + .addSettingsUpdateConsumer(SETTING_HEAP_VARIANCE_THRESHOLD_FOR_SEARCH_QUERY, this::setHeapVarianceThresholdForSearchQuery); heapVarianceThreshold = SETTING_HEAP_VARIANCE_THRESHOLD.get(settings.getSettings()); settings.getClusterSettings().addSettingsUpdateConsumer(SETTING_HEAP_VARIANCE_THRESHOLD, this::setHeapVarianceThreshold); + heapMovingAverageWindowSizeForSearchQuery = SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE_FOR_SEARCH_QUERY.get(settings.getSettings()); + settings.getClusterSettings() + .addSettingsUpdateConsumer( + SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE_FOR_SEARCH_QUERY, + this::setHeapMovingAverageWindowSizeForSearchQuery + ); heapMovingAverageWindowSize = SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE.get(settings.getSettings()); settings.getClusterSettings() .addSettingsUpdateConsumer(SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE, this::setHeapMovingAverageWindowSize); + this.movingAverageReferenceForSearchQuery = new AtomicReference<>(new MovingAverage(heapMovingAverageWindowSizeForSearchQuery)); this.movingAverageReference = new AtomicReference<>(new MovingAverage(heapMovingAverageWindowSize)); } @@ -103,12 +159,18 @@ public String name() { @Override public void update(Task task) { - movingAverageReference.get().record(task.getTotalResourceStats().getMemoryInBytes()); + if (task instanceof SearchTask) { + movingAverageReferenceForSearchQuery.get().record(task.getTotalResourceStats().getMemoryInBytes()); + } else { + movingAverageReference.get().record(task.getTotalResourceStats().getMemoryInBytes()); + } } @Override public Optional checkAndMaybeGetCancellationReason(Task task) { - MovingAverage movingAverage = movingAverageReference.get(); + MovingAverage movingAverage = (task instanceof SearchTask) + ? movingAverageReferenceForSearchQuery.get() + : movingAverageReference.get(); // There haven't been enough measurements. if (movingAverage.isReady() == false) { @@ -117,9 +179,11 @@ public Optional checkAndMaybeGetCancellationReason(Task double currentUsage = task.getTotalResourceStats().getMemoryInBytes(); double averageUsage = movingAverage.getAverage(); - double allowedUsage = averageUsage * getHeapVarianceThreshold(); + double variance = (task instanceof SearchTask) ? getHeapVarianceThresholdForSearchQuery() : getHeapBytesThreshold(); + double allowedUsage = averageUsage * variance; + double threshold = (task instanceof SearchTask) ? getHeapBytesThresholdForSearchQuery() : getHeapBytesThreshold(); - if (currentUsage < getHeapBytesThreshold() || currentUsage < allowedUsage) { + if (currentUsage < threshold || currentUsage < allowedUsage) { return Optional.empty(); } @@ -131,32 +195,60 @@ public Optional checkAndMaybeGetCancellationReason(Task ); } + public long getHeapBytesThresholdForSearchQuery() { + return (long) (HEAP_SIZE_BYTES * heapPercentThresholdForSearchQuery); + } + public long getHeapBytesThreshold() { return (long) (HEAP_SIZE_BYTES * heapPercentThreshold); } + public void setHeapPercentThresholdForSearchQuery(double heapPercentThresholdForSearchQuery) { + this.heapPercentThresholdForSearchQuery = heapPercentThresholdForSearchQuery; + } + public void setHeapPercentThreshold(double heapPercentThreshold) { this.heapPercentThreshold = heapPercentThreshold; } + public double getHeapVarianceThresholdForSearchQuery() { + return heapVarianceThresholdForSearchQuery; + } + public double getHeapVarianceThreshold() { return heapVarianceThreshold; } + public void setHeapVarianceThresholdForSearchQuery(double heapVarianceThresholdForSearchQuery) { + this.heapVarianceThresholdForSearchQuery = heapVarianceThresholdForSearchQuery; + } + public void setHeapVarianceThreshold(double heapVarianceThreshold) { this.heapVarianceThreshold = heapVarianceThreshold; } + public void setHeapMovingAverageWindowSizeForSearchQuery(int heapMovingAverageWindowSizeForSearchQuery) { + this.heapMovingAverageWindowSizeForSearchQuery = heapMovingAverageWindowSizeForSearchQuery; + this.movingAverageReferenceForSearchQuery.set(new MovingAverage(heapMovingAverageWindowSizeForSearchQuery)); + } + public void setHeapMovingAverageWindowSize(int heapMovingAverageWindowSize) { this.heapMovingAverageWindowSize = heapMovingAverageWindowSize; this.movingAverageReference.set(new MovingAverage(heapMovingAverageWindowSize)); } @Override - public TaskResourceUsageTracker.Stats stats(List activeTasks) { + public TaskResourceUsageTracker.Stats searchTaskStats(List activeTasks) { + long currentMax = activeTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).max().orElse(0); + long currentAvg = (long) activeTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).average().orElse(0); + return new Stats(getSearchTaskCancellationCount(), currentMax, currentAvg, (long) movingAverageReference.get().getAverage()); + } + + @Override + public TaskResourceUsageTracker.Stats searchShardTaskStats(List activeTasks) { long currentMax = activeTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).max().orElse(0); long currentAvg = (long) activeTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).average().orElse(0); - return new Stats(getCancellations(), currentMax, currentAvg, (long) movingAverageReference.get().getAverage()); + return new Stats(getSearchShardTaskCancellationCount(), currentMax, currentAvg, (long) movingAverageReference.get().getAverage()); } /** diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTracker.java index cbbb751b996be..a08ca34cd37bc 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTracker.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTracker.java @@ -26,14 +26,23 @@ public abstract class TaskResourceUsageTracker { /** * Counts the number of cancellations made due to this tracker. */ - private final AtomicLong cancellations = new AtomicLong(); + private final AtomicLong searchTaskCancellationCount = new AtomicLong(); + private final AtomicLong searchShardTaskCancellationCount = new AtomicLong(); - public long incrementCancellations() { - return cancellations.incrementAndGet(); + public long incrementSearchTaskCancellations() { + return searchTaskCancellationCount.incrementAndGet(); } - public long getCancellations() { - return cancellations.get(); + public long incrementSearchShardTaskCancellations() { + return searchShardTaskCancellationCount.incrementAndGet(); + } + + public long getSearchTaskCancellationCount() { + return searchTaskCancellationCount.get(); + } + + public long getSearchShardTaskCancellationCount() { + return searchShardTaskCancellationCount.get(); } /** @@ -52,9 +61,14 @@ public void update(Task task) {} public abstract Optional checkAndMaybeGetCancellationReason(Task task); /** - * Returns the tracker's state as seen in the stats API. + * Returns the tracker's state for SearchTasks as seen in the stats API. + */ + public abstract Stats searchTaskStats(List activeTasks); + + /** + * Returns the tracker's state for SearchShardTasks as seen in the stats API. */ - public abstract Stats stats(List activeTasks); + public abstract Stats searchShardTaskStats(List activeTasks); /** * Represents the tracker's state as seen in the stats API. diff --git a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java index 07a962c6824ca..1285131bf5da8 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java @@ -16,6 +16,7 @@ import org.opensearch.search.backpressure.settings.SearchBackpressureMode; import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; import org.opensearch.search.backpressure.settings.SearchShardTaskSettings; +import org.opensearch.search.backpressure.stats.SearchTaskStats; import org.opensearch.search.backpressure.trackers.NodeDuressTracker; import org.opensearch.common.xcontent.XContentBuilder; import org.opensearch.search.backpressure.stats.SearchBackpressureStats; @@ -120,7 +121,7 @@ public void testTrackerStateUpdateOnTaskCompletion() { for (int i = 0; i < 100; i++) { service.onTaskCompleted(createMockTaskWithResourceStats(SearchShardTask.class, 100, 200)); } - assertEquals(100, service.getState().getCompletionCount()); + assertEquals(100, service.getSearchShardTasksState().getCompletionCount()); verify(mockTaskResourceUsageTracker, times(100)).update(any()); } @@ -150,8 +151,13 @@ public Optional checkAndMaybeGetCancellationReason(Task } @Override - public Stats stats(List activeTasks) { - return new MockStats(getCancellations()); + public Stats searchTaskStats(List activeTasks) { + return new MockStats(getSearchTaskCancellationCount()); + } + + @Override + public Stats searchShardTaskStats(List activeTasks) { + return new MockStats(getSearchShardTaskCancellationCount()); } }; @@ -200,13 +206,13 @@ public Stats stats(List activeTasks) { // There are 15 tasks eligible for cancellation but only 10 will be cancelled (burst limit). service.doRun(); - assertEquals(10, service.getState().getCancellationCount()); - assertEquals(1, service.getState().getLimitReachedCount()); + assertEquals(10, service.getSearchShardTasksState().getCancellationCount()); + assertEquals(1, service.getSearchShardTasksState().getLimitReachedCount()); // If the clock or completed task count haven't made sufficient progress, we'll continue to be rate-limited. service.doRun(); - assertEquals(10, service.getState().getCancellationCount()); - assertEquals(2, service.getState().getLimitReachedCount()); + assertEquals(10, service.getSearchShardTasksState().getCancellationCount()); + assertEquals(2, service.getSearchShardTasksState().getLimitReachedCount()); // Simulate task completion to replenish some tokens. // This will add 2 tokens (task count delta * cancellationRatio) to 'rateLimitPerTaskCompletion'. @@ -214,18 +220,19 @@ public Stats stats(List activeTasks) { service.onTaskCompleted(createMockTaskWithResourceStats(SearchShardTask.class, 100, taskHeapUsageBytes)); } service.doRun(); - assertEquals(12, service.getState().getCancellationCount()); - assertEquals(3, service.getState().getLimitReachedCount()); + assertEquals(12, service.getSearchShardTasksState().getCancellationCount()); + assertEquals(3, service.getSearchShardTasksState().getLimitReachedCount()); // Fast-forward the clock by one second to replenish some tokens. // This will add 3 tokens (time delta * rate) to 'rateLimitPerTime'. mockTime.addAndGet(TimeUnit.SECONDS.toNanos(1)); service.doRun(); - assertEquals(15, service.getState().getCancellationCount()); - assertEquals(3, service.getState().getLimitReachedCount()); // no more tasks to cancel; limit not reached + assertEquals(15, service.getSearchShardTasksState().getCancellationCount()); + assertEquals(3, service.getSearchShardTasksState().getLimitReachedCount()); // no more tasks to cancel; limit not reached // Verify search backpressure stats. SearchBackpressureStats expectedStats = new SearchBackpressureStats( + new SearchTaskStats(0, 0, Map.of(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, new MockStats(0))), new SearchShardTaskStats(15, 3, Map.of(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, new MockStats(15))), SearchBackpressureMode.ENFORCED ); diff --git a/server/src/test/java/org/opensearch/search/backpressure/stats/SearchBackpressureStatsTests.java b/server/src/test/java/org/opensearch/search/backpressure/stats/SearchBackpressureStatsTests.java index 2665a6d5e05aa..0c86cf4b11239 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/stats/SearchBackpressureStatsTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/stats/SearchBackpressureStatsTests.java @@ -25,6 +25,7 @@ protected SearchBackpressureStats createTestInstance() { public static SearchBackpressureStats randomInstance() { return new SearchBackpressureStats( + SearchTaskStatsTests.randomInstance(), SearchShardTaskStatsTests.randomInstance(), randomFrom(SearchBackpressureMode.DISABLED, SearchBackpressureMode.MONITOR_ONLY, SearchBackpressureMode.ENFORCED) ); diff --git a/server/src/test/java/org/opensearch/search/backpressure/stats/SearchTaskStatsTests.java b/server/src/test/java/org/opensearch/search/backpressure/stats/SearchTaskStatsTests.java new file mode 100644 index 0000000000000..59375c22bb932 --- /dev/null +++ b/server/src/test/java/org/opensearch/search/backpressure/stats/SearchTaskStatsTests.java @@ -0,0 +1,44 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.backpressure.stats; + +import org.opensearch.common.io.stream.Writeable; +import org.opensearch.search.backpressure.trackers.CpuUsageTracker; +import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; +import org.opensearch.search.backpressure.trackers.HeapUsageTracker; +import org.opensearch.search.backpressure.trackers.TaskResourceUsageTracker; +import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType; +import org.opensearch.test.AbstractWireSerializingTestCase; + +import java.util.Map; + +public class SearchTaskStatsTests extends AbstractWireSerializingTestCase { + public static SearchTaskStats randomInstance() { + Map resourceUsageTrackerStats = Map.of( + TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, + new CpuUsageTracker.Stats(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()), + TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER, + new HeapUsageTracker.Stats(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()), + TaskResourceUsageTrackerType.ELAPSED_TIME_TRACKER, + new ElapsedTimeTracker.Stats(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()) + ); + + return new SearchTaskStats(randomNonNegativeLong(), randomNonNegativeLong(), resourceUsageTrackerStats); + } + + @Override + protected Writeable.Reader instanceReader() { + return SearchTaskStats::new; + } + + @Override + protected SearchTaskStats createTestInstance() { + return randomInstance(); + } +} diff --git a/server/src/test/java/org/opensearch/tasks/TaskCancellationTests.java b/server/src/test/java/org/opensearch/tasks/TaskCancellationTests.java index e74f89c905499..f30c15de28b90 100644 --- a/server/src/test/java/org/opensearch/tasks/TaskCancellationTests.java +++ b/server/src/test/java/org/opensearch/tasks/TaskCancellationTests.java @@ -27,16 +27,19 @@ public void testTaskCancellation() { TaskResourceUsageTracker mockTracker3 = createMockTaskResourceUsageTracker("mock_tracker_3"); List reasons = new ArrayList<>(); - List callbacks = List.of(mockTracker1::incrementCancellations, mockTracker2::incrementCancellations); + List callbacks = List.of( + mockTracker1::incrementSearchShardTaskCancellations, + mockTracker2::incrementSearchShardTaskCancellations + ); TaskCancellation taskCancellation = new TaskCancellation(mockTask, reasons, callbacks); // Task does not have any reason to be cancelled. assertEquals(0, taskCancellation.totalCancellationScore()); assertFalse(taskCancellation.isEligibleForCancellation()); taskCancellation.cancel(); - assertEquals(0, mockTracker1.getCancellations()); - assertEquals(0, mockTracker2.getCancellations()); - assertEquals(0, mockTracker3.getCancellations()); + assertEquals(0, mockTracker1.getSearchShardTaskCancellationCount()); + assertEquals(0, mockTracker2.getSearchShardTaskCancellationCount()); + assertEquals(0, mockTracker3.getSearchShardTaskCancellationCount()); // Task has one or more reasons to be cancelled. reasons.add(new TaskCancellation.Reason("limits exceeded 1", 10)); @@ -48,9 +51,9 @@ public void testTaskCancellation() { // Cancel the task and validate the cancellation reason and invocation of callbacks. taskCancellation.cancel(); assertTrue(mockTask.getReasonCancelled().contains("limits exceeded 1, limits exceeded 2, limits exceeded 3")); - assertEquals(1, mockTracker1.getCancellations()); - assertEquals(1, mockTracker2.getCancellations()); - assertEquals(0, mockTracker3.getCancellations()); + assertEquals(1, mockTracker1.getSearchShardTaskCancellationCount()); + assertEquals(1, mockTracker2.getSearchShardTaskCancellationCount()); + assertEquals(0, mockTracker3.getSearchShardTaskCancellationCount()); } private static TaskResourceUsageTracker createMockTaskResourceUsageTracker(String name) { @@ -69,7 +72,12 @@ public Optional checkAndMaybeGetCancellationReason(Task } @Override - public Stats stats(List activeTasks) { + public Stats searchTaskStats(List activeTasks) { + return null; + } + + @Override + public Stats searchShardTaskStats(List activeTasks) { return null; } }; From 9ccdae52ff451e2bb71685a94167eda3b8b16ae9 Mon Sep 17 00:00:00 2001 From: PritLadani Date: Mon, 19 Dec 2022 22:44:20 +0530 Subject: [PATCH 02/34] Cancellation of in-flight search requests at coordinator level Signed-off-by: PritLadani --- CHANGELOG.md | 1 + .../common/settings/ClusterSettings.java | 9 +- .../SearchBackpressureService.java | 125 +++++++++++++----- .../settings/SearchBackpressureSettings.java | 6 + .../settings/SearchTaskSettings.java | 62 +++++++++ .../stats/SearchBackpressureStats.java | 16 ++- .../backpressure/stats/SearchTaskStats.java | 100 ++++++++++++++ .../trackers/CpuUsageTracker.java | 47 ++++++- .../trackers/ElapsedTimeTracker.java | 44 +++++- .../trackers/HeapUsageTracker.java | 110 +++++++++++++-- .../trackers/TaskResourceUsageTracker.java | 28 +++- .../SearchBackpressureServiceTests.java | 29 ++-- .../stats/SearchBackpressureStatsTests.java | 1 + .../stats/SearchTaskStatsTests.java | 44 ++++++ .../tasks/TaskCancellationTests.java | 24 ++-- 15 files changed, 566 insertions(+), 80 deletions(-) create mode 100644 server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java create mode 100644 server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java create mode 100644 server/src/test/java/org/opensearch/search/backpressure/stats/SearchTaskStatsTests.java diff --git a/CHANGELOG.md b/CHANGELOG.md index 051b6ed149da4..925508675bfb1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Added experimental support for extensions ([#5347](https://github.com/opensearch-project/OpenSearch/pull/5347)), ([#5518](https://github.com/opensearch-project/OpenSearch/pull/5518)) - Add CI bundle pattern to distribution download ([#5348](https://github.com/opensearch-project/OpenSearch/pull/5348)) - Add support for ppc64le architecture ([#5459](https://github.com/opensearch-project/OpenSearch/pull/5459)) +- Cancellation of in-flight SearchTasks based on resource consumption ([#5606](https://github.com/opensearch-project/OpenSearch/pull/5605)) ### Dependencies diff --git a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java index 7e9c7bd3123c5..8bf979c05ef30 100644 --- a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java @@ -44,6 +44,7 @@ import org.opensearch.search.backpressure.settings.NodeDuressSettings; import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; import org.opensearch.search.backpressure.settings.SearchShardTaskSettings; +import org.opensearch.search.backpressure.settings.SearchTaskSettings; import org.opensearch.search.backpressure.trackers.CpuUsageTracker; import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; import org.opensearch.search.backpressure.trackers.HeapUsageTracker; @@ -601,11 +602,17 @@ public void apply(Settings value, Settings current, Settings previous) { NodeDuressSettings.SETTING_CPU_THRESHOLD, NodeDuressSettings.SETTING_HEAP_THRESHOLD, SearchShardTaskSettings.SETTING_TOTAL_HEAP_PERCENT_THRESHOLD, + HeapUsageTracker.SETTING_HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY, HeapUsageTracker.SETTING_HEAP_PERCENT_THRESHOLD, + HeapUsageTracker.SETTING_HEAP_VARIANCE_THRESHOLD_FOR_SEARCH_QUERY, HeapUsageTracker.SETTING_HEAP_VARIANCE_THRESHOLD, + HeapUsageTracker.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE_FOR_SEARCH_QUERY, HeapUsageTracker.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE, + CpuUsageTracker.SETTING_CPU_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY, CpuUsageTracker.SETTING_CPU_TIME_MILLIS_THRESHOLD, - ElapsedTimeTracker.SETTING_ELAPSED_TIME_MILLIS_THRESHOLD + ElapsedTimeTracker.SETTING_ELAPSED_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY, + ElapsedTimeTracker.SETTING_ELAPSED_TIME_MILLIS_THRESHOLD, + SearchTaskSettings.SETTING_TOTAL_HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY ) ) ); diff --git a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java index fd13198b957da..2465790176daa 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java +++ b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java @@ -12,6 +12,7 @@ import org.apache.logging.log4j.Logger; import org.opensearch.ExceptionsHelper; import org.opensearch.action.search.SearchShardTask; +import org.opensearch.action.search.SearchTask; import org.opensearch.common.component.AbstractLifecycleComponent; import org.opensearch.common.util.TokenBucket; import org.opensearch.monitor.jvm.JvmStats; @@ -20,6 +21,7 @@ import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; import org.opensearch.search.backpressure.stats.SearchBackpressureStats; import org.opensearch.search.backpressure.stats.SearchShardTaskStats; +import org.opensearch.search.backpressure.stats.SearchTaskStats; import org.opensearch.search.backpressure.trackers.CpuUsageTracker; import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; import org.opensearch.search.backpressure.trackers.HeapUsageTracker; @@ -37,7 +39,9 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Comparator; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.concurrent.atomic.AtomicReference; import java.util.function.LongSupplier; @@ -68,9 +72,12 @@ public class SearchBackpressureService extends AbstractLifecycleComponent private final AtomicReference taskCancellationRateLimiter = new AtomicReference<>(); private final AtomicReference taskCancellationRatioLimiter = new AtomicReference<>(); - // Currently, only the state of SearchShardTask is being tracked. - // This can be generalized to Map once we start supporting cancellation of SearchTasks as well. - private final SearchBackpressureState state = new SearchBackpressureState(); + private final Map, SearchBackpressureState> searchBackpressureStates = new HashMap<>() { + { + put(SearchTask.class, new SearchBackpressureState()); + put(SearchShardTask.class, new SearchBackpressureState()); + } + }; public SearchBackpressureService( SearchBackpressureSettings settings, @@ -116,10 +123,15 @@ public SearchBackpressureService( ); this.taskCancellationRatioLimiter.set( - new TokenBucket(state::getCompletionCount, getSettings().getCancellationRatio(), getSettings().getCancellationBurst()) + new TokenBucket(this::getTaskCompletionCount, getSettings().getCancellationRatio(), getSettings().getCancellationBurst()) ); } + private long getTaskCompletionCount() { + return searchBackpressureStates.get(SearchTask.class).getCompletionCount() + searchBackpressureStates.get(SearchShardTask.class) + .getCompletionCount(); + } + void doRun() { SearchBackpressureMode mode = getSettings().getMode(); if (mode == SearchBackpressureMode.DISABLED) { @@ -130,18 +142,29 @@ void doRun() { return; } - // We are only targeting in-flight cancellation of SearchShardTask for now. - List searchShardTasks = getSearchShardTasks(); + List searchTasks = getSearchTasks(); + List searchShardTasks = getSearchShardTasks(); + List cancellableTasks = new ArrayList<>(); // Force-refresh usage stats of these tasks before making a cancellation decision. + taskResourceTrackingService.refreshResourceStats(searchTasks.toArray(new Task[0])); taskResourceTrackingService.refreshResourceStats(searchShardTasks.toArray(new Task[0])); - // Skip cancellation if the increase in heap usage is not due to search requests. - if (isHeapUsageDominatedBySearch(searchShardTasks) == false) { + // Check if increase in heap usage is due to SearchTasks + if (isHeapUsageDominatedBySearch(searchTasks, getSettings().getSearchTaskSettings().getTotalHeapBytesThreshold())) { + cancellableTasks.addAll(searchTasks); + } + + // Check if increase in heap usage is due to SearchShardTasks + if (isHeapUsageDominatedBySearch(searchShardTasks, getSettings().getSearchShardTaskSettings().getTotalHeapBytesThreshold())) { + cancellableTasks.addAll(searchShardTasks); + } + + if (cancellableTasks.isEmpty()) { return; } - for (TaskCancellation taskCancellation : getTaskCancellations(searchShardTasks)) { + for (TaskCancellation taskCancellation : getTaskCancellations(cancellableTasks)) { logger.debug( "[{} mode] cancelling task [{}] due to high resource consumption [{}]", mode.getName(), @@ -160,7 +183,10 @@ void doRun() { // Stop cancelling tasks if there are no tokens in either of the two token buckets. if (rateLimitReached && ratioLimitReached) { logger.debug("task cancellation limit reached"); - state.incrementLimitReachedCount(); + SearchBackpressureState searchBackpressureState = searchBackpressureStates.get( + (taskCancellation.getTask() instanceof SearchTask) ? SearchTask.class : SearchShardTask.class + ); + searchBackpressureState.incrementLimitReachedCount(); break; } @@ -187,9 +213,8 @@ boolean isNodeInDuress() { /** * Returns true if the increase in heap usage is due to search requests. */ - boolean isHeapUsageDominatedBySearch(List searchShardTasks) { - long usage = searchShardTasks.stream().mapToLong(task -> task.getTotalResourceStats().getMemoryInBytes()).sum(); - long threshold = getSettings().getSearchShardTaskSettings().getTotalHeapBytesThreshold(); + boolean isHeapUsageDominatedBySearch(List cancellableTasks, long threshold) { + long usage = cancellableTasks.stream().mapToLong(task -> task.getTotalResourceStats().getMemoryInBytes()).sum(); if (usage < threshold) { logger.debug("heap usage not dominated by search requests [{}/{}]", usage, threshold); return false; @@ -201,7 +226,7 @@ boolean isHeapUsageDominatedBySearch(List searchShardTasks) { /** * Filters and returns the list of currently running SearchShardTasks. */ - List getSearchShardTasks() { + List getSearchShardTasks() { return taskResourceTrackingService.getResourceAwareTasks() .values() .stream() @@ -210,6 +235,18 @@ List getSearchShardTasks() { .collect(Collectors.toUnmodifiableList()); } + /** + * Filters and returns the list of currently running SearchTasks. + */ + List getSearchTasks() { + return taskResourceTrackingService.getResourceAwareTasks() + .values() + .stream() + .filter(task -> task instanceof SearchTask) + .map(task -> (SearchTask) task) + .collect(Collectors.toUnmodifiableList()); + } + /** * Returns a TaskCancellation wrapper containing the list of reasons (possibly zero), along with an overall * cancellation score for the given task. Cancelling a task with a higher score has better chance of recovering the @@ -222,13 +259,19 @@ TaskCancellation getTaskCancellation(CancellableTask task) { for (TaskResourceUsageTracker tracker : taskResourceUsageTrackers) { Optional reason = tracker.checkAndMaybeGetCancellationReason(task); if (reason.isPresent()) { + if (task instanceof SearchTask) { + callbacks.add(tracker::incrementSearchTaskCancellations); + } else { + callbacks.add(tracker::incrementSearchShardTaskCancellations); + } reasons.add(reason.get()); - callbacks.add(tracker::incrementCancellations); } } - if (task instanceof SearchShardTask) { - callbacks.add(state::incrementCancellationCount); + if (task instanceof SearchTask) { + callbacks.add(searchBackpressureStates.get(SearchTask.class)::incrementCancellationCount); + } else { + callbacks.add(searchBackpressureStates.get(SearchShardTask.class)::incrementCancellationCount); } return new TaskCancellation(task, reasons, callbacks); @@ -249,8 +292,12 @@ SearchBackpressureSettings getSettings() { return settings; } - SearchBackpressureState getState() { - return state; + SearchBackpressureState getSearchTasksState() { + return searchBackpressureStates.get(SearchTask.class); + } + + SearchBackpressureState getSearchShardTasksState() { + return searchBackpressureStates.get(SearchShardTask.class); } @Override @@ -259,19 +306,22 @@ public void onTaskCompleted(Task task) { return; } - if (task instanceof SearchShardTask == false) { + if (task instanceof SearchTask == false && task instanceof SearchShardTask == false) { return; } - SearchShardTask searchShardTask = (SearchShardTask) task; - if (searchShardTask.isCancelled() == false) { - state.incrementCompletionCount(); + CancellableTask cancellableTask = (CancellableTask) task; + SearchBackpressureState searchBackpressureState = searchBackpressureStates.get( + (task instanceof SearchTask) ? SearchTask.class : SearchShardTask.class + ); + if (cancellableTask.isCancelled() == false) { + searchBackpressureState.incrementCompletionCount(); } List exceptions = new ArrayList<>(); for (TaskResourceUsageTracker tracker : taskResourceUsageTrackers) { try { - tracker.update(searchShardTask); + tracker.update(task); } catch (Exception e) { exceptions.add(e); } @@ -282,7 +332,7 @@ public void onTaskCompleted(Task task) { @Override public void onCancellationRatioChanged() { taskCancellationRatioLimiter.set( - new TokenBucket(state::getCompletionCount, getSettings().getCancellationRatio(), getSettings().getCancellationBurst()) + new TokenBucket(this::getTaskCompletionCount, getSettings().getCancellationRatio(), getSettings().getCancellationBurst()) ); } @@ -321,15 +371,30 @@ protected void doStop() { protected void doClose() throws IOException {} public SearchBackpressureStats nodeStats() { - List searchShardTasks = getSearchShardTasks(); + List searchTasks = getSearchTasks(); + List searchShardTasks = getSearchShardTasks(); + + SearchTaskStats searchTaskStats = new SearchTaskStats( + searchBackpressureStates.get(SearchTask.class).getCancellationCount(), + searchBackpressureStates.get(SearchTask.class).getLimitReachedCount(), + taskResourceUsageTrackers.stream() + .collect( + Collectors.toUnmodifiableMap(t -> TaskResourceUsageTrackerType.fromName(t.name()), t -> t.searchTaskStats(searchTasks)) + ) + ); SearchShardTaskStats searchShardTaskStats = new SearchShardTaskStats( - state.getCancellationCount(), - state.getLimitReachedCount(), + searchBackpressureStates.get(SearchShardTask.class).getCancellationCount(), + searchBackpressureStates.get(SearchShardTask.class).getLimitReachedCount(), taskResourceUsageTrackers.stream() - .collect(Collectors.toUnmodifiableMap(t -> TaskResourceUsageTrackerType.fromName(t.name()), t -> t.stats(searchShardTasks))) + .collect( + Collectors.toUnmodifiableMap( + t -> TaskResourceUsageTrackerType.fromName(t.name()), + t -> t.searchShardTaskStats(searchShardTasks) + ) + ) ); - return new SearchBackpressureStats(searchShardTaskStats, getSettings().getMode()); + return new SearchBackpressureStats(searchTaskStats, searchShardTaskStats, getSettings().getMode()); } } diff --git a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchBackpressureSettings.java b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchBackpressureSettings.java index df2c04a730fbc..3906228389729 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchBackpressureSettings.java +++ b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchBackpressureSettings.java @@ -110,12 +110,14 @@ public interface Listener { private final Settings settings; private final ClusterSettings clusterSettings; private final NodeDuressSettings nodeDuressSettings; + private final SearchTaskSettings searchTaskSettings; private final SearchShardTaskSettings searchShardTaskSettings; public SearchBackpressureSettings(Settings settings, ClusterSettings clusterSettings) { this.settings = settings; this.clusterSettings = clusterSettings; this.nodeDuressSettings = new NodeDuressSettings(settings, clusterSettings); + this.searchTaskSettings = new SearchTaskSettings(settings, clusterSettings); this.searchShardTaskSettings = new SearchShardTaskSettings(settings, clusterSettings); interval = new TimeValue(SETTING_INTERVAL_MILLIS.get(settings)); @@ -149,6 +151,10 @@ public NodeDuressSettings getNodeDuressSettings() { return nodeDuressSettings; } + public SearchTaskSettings getSearchTaskSettings() { + return searchTaskSettings; + } + public SearchShardTaskSettings getSearchShardTaskSettings() { return searchShardTaskSettings; } diff --git a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java new file mode 100644 index 0000000000000..f28cdd17a3cff --- /dev/null +++ b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java @@ -0,0 +1,62 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.backpressure.settings; + +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Setting; +import org.opensearch.common.settings.Settings; +import org.opensearch.monitor.jvm.JvmStats; + +/** + * Defines the settings related to the cancellation of SearchTasks. + * + * @opensearch.internal + */ + +public class SearchTaskSettings { + private static final long HEAP_SIZE_BYTES = JvmStats.jvmStats().getMem().getHeapMax().getBytes(); + + private static class Defaults { + private static final double TOTAL_HEAP_PERCENT_THRESHOLD = 0.05; + } + + /** + * Defines the heap usage threshold (in percentage) for the sum of heap usages across all search tasks + * before in-flight cancellation is applied. + */ + private volatile double totalHeapPercentThreshold; + public static final Setting SETTING_TOTAL_HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY = Setting.doubleSetting( + "search_backpressure.search_task.total_heap_percent_threshold", + Defaults.TOTAL_HEAP_PERCENT_THRESHOLD, + 0.0, + 1.0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + public SearchTaskSettings(Settings settings, ClusterSettings clusterSettings) { + totalHeapPercentThreshold = SETTING_TOTAL_HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY.get(settings); + clusterSettings.addSettingsUpdateConsumer( + SETTING_TOTAL_HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY, + this::setTotalHeapPercentThreshold + ); + } + + public double getTotalHeapPercentThreshold() { + return totalHeapPercentThreshold; + } + + public long getTotalHeapBytesThreshold() { + return (long) (HEAP_SIZE_BYTES * getTotalHeapPercentThreshold()); + } + + private void setTotalHeapPercentThreshold(double totalHeapPercentThreshold) { + this.totalHeapPercentThreshold = totalHeapPercentThreshold; + } +} diff --git a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java index 3aec0dfc579c5..92a52b62477f2 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java +++ b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java @@ -22,21 +22,28 @@ * Stats related to search backpressure. */ public class SearchBackpressureStats implements ToXContentFragment, Writeable { + private final SearchTaskStats searchTaskStats; private final SearchShardTaskStats searchShardTaskStats; private final SearchBackpressureMode mode; - public SearchBackpressureStats(SearchShardTaskStats searchShardTaskStats, SearchBackpressureMode mode) { + public SearchBackpressureStats( + SearchTaskStats searchTaskStats, + SearchShardTaskStats searchShardTaskStats, + SearchBackpressureMode mode + ) { + this.searchTaskStats = searchTaskStats; this.searchShardTaskStats = searchShardTaskStats; this.mode = mode; } public SearchBackpressureStats(StreamInput in) throws IOException { - this(new SearchShardTaskStats(in), SearchBackpressureMode.fromName(in.readString())); + this(new SearchTaskStats(in), new SearchShardTaskStats(in), SearchBackpressureMode.fromName(in.readString())); } @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { return builder.startObject("search_backpressure") + .field("search_task", searchTaskStats) .field("search_shard_task", searchShardTaskStats) .field("mode", mode.getName()) .endObject(); @@ -44,6 +51,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws @Override public void writeTo(StreamOutput out) throws IOException { + searchTaskStats.writeTo(out); searchShardTaskStats.writeTo(out); out.writeString(mode.getName()); } @@ -53,11 +61,11 @@ public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; SearchBackpressureStats that = (SearchBackpressureStats) o; - return searchShardTaskStats.equals(that.searchShardTaskStats) && mode == that.mode; + return searchTaskStats.equals(that.searchTaskStats) && searchShardTaskStats.equals(that.searchShardTaskStats) && mode == that.mode; } @Override public int hashCode() { - return Objects.hash(searchShardTaskStats, mode); + return Objects.hash(searchTaskStats, searchShardTaskStats, mode); } } diff --git a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java new file mode 100644 index 0000000000000..87318a60b46fd --- /dev/null +++ b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java @@ -0,0 +1,100 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.backpressure.stats; + +import org.opensearch.common.collect.MapBuilder; +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.common.io.stream.StreamOutput; +import org.opensearch.common.io.stream.Writeable; +import org.opensearch.common.xcontent.ToXContent; +import org.opensearch.common.xcontent.ToXContentObject; +import org.opensearch.common.xcontent.XContentBuilder; +import org.opensearch.search.backpressure.trackers.CpuUsageTracker; +import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; +import org.opensearch.search.backpressure.trackers.HeapUsageTracker; +import org.opensearch.search.backpressure.trackers.TaskResourceUsageTracker; +import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType; + +import java.io.IOException; +import java.util.Map; +import java.util.Objects; + +/** + * Stats related to cancelled search tasks. + */ + +public class SearchTaskStats implements ToXContentObject, Writeable { + private final long cancellationCount; + private final long limitReachedCount; + private final Map resourceUsageTrackerStats; + + public SearchTaskStats( + long cancellationCount, + long limitReachedCount, + Map resourceUsageTrackerStats + ) { + this.cancellationCount = cancellationCount; + this.limitReachedCount = limitReachedCount; + this.resourceUsageTrackerStats = resourceUsageTrackerStats; + } + + public SearchTaskStats(StreamInput in) throws IOException { + this.cancellationCount = in.readVLong(); + this.limitReachedCount = in.readVLong(); + + MapBuilder builder = new MapBuilder<>(); + builder.put(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, in.readOptionalWriteable(CpuUsageTracker.Stats::new)); + builder.put(TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER, in.readOptionalWriteable(HeapUsageTracker.Stats::new)); + builder.put(TaskResourceUsageTrackerType.ELAPSED_TIME_TRACKER, in.readOptionalWriteable(ElapsedTimeTracker.Stats::new)); + this.resourceUsageTrackerStats = builder.immutableMap(); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params params) throws IOException { + builder.startObject(); + + builder.startObject("resource_tracker_stats"); + for (Map.Entry entry : resourceUsageTrackerStats.entrySet()) { + builder.field(entry.getKey().getName(), entry.getValue()); + } + builder.endObject(); + + builder.startObject("cancellation_stats") + .field("cancellation_count", cancellationCount) + .field("cancellation_limit_reached_count", limitReachedCount) + .endObject(); + + return builder.endObject(); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeVLong(cancellationCount); + out.writeVLong(limitReachedCount); + + out.writeOptionalWriteable(resourceUsageTrackerStats.get(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER)); + out.writeOptionalWriteable(resourceUsageTrackerStats.get(TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER)); + out.writeOptionalWriteable(resourceUsageTrackerStats.get(TaskResourceUsageTrackerType.ELAPSED_TIME_TRACKER)); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + SearchTaskStats that = (SearchTaskStats) o; + return cancellationCount == that.cancellationCount + && limitReachedCount == that.limitReachedCount + && resourceUsageTrackerStats.equals(that.resourceUsageTrackerStats); + } + + @Override + public int hashCode() { + return Objects.hash(cancellationCount, limitReachedCount, resourceUsageTrackerStats); + } +} diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/CpuUsageTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/CpuUsageTracker.java index 21bb3af32ae08..1e332eca2649c 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/CpuUsageTracker.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/CpuUsageTracker.java @@ -8,6 +8,7 @@ package org.opensearch.search.backpressure.trackers; +import org.opensearch.action.search.SearchTask; import org.opensearch.common.settings.Setting; import org.opensearch.common.io.stream.StreamInput; import org.opensearch.common.io.stream.StreamOutput; @@ -32,11 +33,24 @@ */ public class CpuUsageTracker extends TaskResourceUsageTracker { private static class Defaults { + private static final long CPU_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY = 60000; private static final long CPU_TIME_MILLIS_THRESHOLD = 15000; } /** - * Defines the CPU usage threshold (in millis) for an individual task before it is considered for cancellation. + * Defines the CPU usage threshold (in millis) for an individual search task before it is considered for cancellation. + */ + private volatile long cpuTimeMillisThresholdForSearchQuery; + public static final Setting SETTING_CPU_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY = Setting.longSetting( + "search_backpressure.search_task.cpu_time_millis_threshold_for_search_query", + Defaults.CPU_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY, + 0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Defines the CPU usage threshold (in millis) for an individual search shard task before it is considered for cancellation. */ private volatile long cpuTimeMillisThreshold; public static final Setting SETTING_CPU_TIME_MILLIS_THRESHOLD = Setting.longSetting( @@ -48,7 +62,10 @@ private static class Defaults { ); public CpuUsageTracker(SearchBackpressureSettings settings) { + this.cpuTimeMillisThresholdForSearchQuery = SETTING_CPU_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY.get(settings.getSettings()); this.cpuTimeMillisThreshold = SETTING_CPU_TIME_MILLIS_THRESHOLD.get(settings.getSettings()); + settings.getClusterSettings() + .addSettingsUpdateConsumer(SETTING_CPU_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY, this::setCpuTimeMillisThresholdForSearchQuery); settings.getClusterSettings().addSettingsUpdateConsumer(SETTING_CPU_TIME_MILLIS_THRESHOLD, this::setCpuTimeMillisThreshold); } @@ -60,7 +77,7 @@ public String name() { @Override public Optional checkAndMaybeGetCancellationReason(Task task) { long usage = task.getTotalResourceStats().getCpuTimeInNanos(); - long threshold = getCpuTimeNanosThreshold(); + long threshold = (task instanceof SearchTask) ? getCpuTimeNanosThresholdForSearchQuery() : getCpuTimeNanosThreshold(); if (usage < threshold) { return Optional.empty(); @@ -78,19 +95,37 @@ public Optional checkAndMaybeGetCancellationReason(Task ); } + public long getCpuTimeNanosThresholdForSearchQuery() { + return TimeUnit.MILLISECONDS.toNanos(cpuTimeMillisThresholdForSearchQuery); + } + public long getCpuTimeNanosThreshold() { return TimeUnit.MILLISECONDS.toNanos(cpuTimeMillisThreshold); } + public void setCpuTimeMillisThresholdForSearchQuery(long cpuTimeMillisThresholdForSearchQuery) { + this.cpuTimeMillisThresholdForSearchQuery = cpuTimeMillisThresholdForSearchQuery; + } + public void setCpuTimeMillisThreshold(long cpuTimeMillisThreshold) { this.cpuTimeMillisThreshold = cpuTimeMillisThreshold; } @Override - public TaskResourceUsageTracker.Stats stats(List activeTasks) { - long currentMax = activeTasks.stream().mapToLong(t -> t.getTotalResourceStats().getCpuTimeInNanos()).max().orElse(0); - long currentAvg = (long) activeTasks.stream().mapToLong(t -> t.getTotalResourceStats().getCpuTimeInNanos()).average().orElse(0); - return new Stats(getCancellations(), currentMax, currentAvg); + public TaskResourceUsageTracker.Stats searchTaskStats(List searchTasks) { + long currentMax = searchTasks.stream().mapToLong(t -> t.getTotalResourceStats().getCpuTimeInNanos()).max().orElse(0); + long currentAvg = (long) searchTasks.stream().mapToLong(t -> t.getTotalResourceStats().getCpuTimeInNanos()).average().orElse(0); + return new Stats(getSearchTaskCancellationCount(), currentMax, currentAvg); + } + + @Override + public TaskResourceUsageTracker.Stats searchShardTaskStats(List searchShardTasks) { + long currentMax = searchShardTasks.stream().mapToLong(t -> t.getTotalResourceStats().getCpuTimeInNanos()).max().orElse(0); + long currentAvg = (long) searchShardTasks.stream() + .mapToLong(t -> t.getTotalResourceStats().getCpuTimeInNanos()) + .average() + .orElse(0); + return new Stats(getSearchShardTaskCancellationCount(), currentMax, currentAvg); } /** diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTracker.java index 10e53e2bce5ae..eba8c4ee7afd8 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTracker.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTracker.java @@ -8,6 +8,7 @@ package org.opensearch.search.backpressure.trackers; +import org.opensearch.action.search.SearchTask; import org.opensearch.common.settings.Setting; import org.opensearch.common.io.stream.StreamInput; import org.opensearch.common.io.stream.StreamOutput; @@ -33,11 +34,24 @@ */ public class ElapsedTimeTracker extends TaskResourceUsageTracker { private static class Defaults { + private static final long ELAPSED_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY = 120000; private static final long ELAPSED_TIME_MILLIS_THRESHOLD = 30000; } /** - * Defines the elapsed time threshold (in millis) for an individual task before it is considered for cancellation. + * Defines the elapsed time threshold (in millis) for an individual search task before it is considered for cancellation. + */ + private volatile long elapsedTimeMillisThresholdForSearchQuery; + public static final Setting SETTING_ELAPSED_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY = Setting.longSetting( + "search_backpressure.search_task.elapsed_time_millis_threshold_for_search_query", + Defaults.ELAPSED_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY, + 0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Defines the elapsed time threshold (in millis) for an individual search shard task before it is considered for cancellation. */ private volatile long elapsedTimeMillisThreshold; public static final Setting SETTING_ELAPSED_TIME_MILLIS_THRESHOLD = Setting.longSetting( @@ -52,7 +66,13 @@ private static class Defaults { public ElapsedTimeTracker(SearchBackpressureSettings settings, LongSupplier timeNanosSupplier) { this.timeNanosSupplier = timeNanosSupplier; + this.elapsedTimeMillisThresholdForSearchQuery = SETTING_ELAPSED_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY.get(settings.getSettings()); this.elapsedTimeMillisThreshold = SETTING_ELAPSED_TIME_MILLIS_THRESHOLD.get(settings.getSettings()); + settings.getClusterSettings() + .addSettingsUpdateConsumer( + SETTING_ELAPSED_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY, + this::setElapsedTimeMillisThresholdForSearchQuery + ); settings.getClusterSettings().addSettingsUpdateConsumer(SETTING_ELAPSED_TIME_MILLIS_THRESHOLD, this::setElapsedTimeMillisThreshold); } @@ -64,7 +84,7 @@ public String name() { @Override public Optional checkAndMaybeGetCancellationReason(Task task) { long usage = timeNanosSupplier.getAsLong() - task.getStartTimeNanos(); - long threshold = getElapsedTimeNanosThreshold(); + long threshold = (task instanceof SearchTask) ? getElapsedTimeNanosThresholdForSearchQuery() : getElapsedTimeNanosThreshold(); if (usage < threshold) { return Optional.empty(); @@ -82,20 +102,36 @@ public Optional checkAndMaybeGetCancellationReason(Task ); } + public long getElapsedTimeNanosThresholdForSearchQuery() { + return TimeUnit.MILLISECONDS.toNanos(elapsedTimeMillisThresholdForSearchQuery); + } + public long getElapsedTimeNanosThreshold() { return TimeUnit.MILLISECONDS.toNanos(elapsedTimeMillisThreshold); } + public void setElapsedTimeMillisThresholdForSearchQuery(long elapsedTimeMillisThresholdForSearchQuery) { + this.elapsedTimeMillisThresholdForSearchQuery = elapsedTimeMillisThresholdForSearchQuery; + } + public void setElapsedTimeMillisThreshold(long elapsedTimeMillisThreshold) { this.elapsedTimeMillisThreshold = elapsedTimeMillisThreshold; } @Override - public TaskResourceUsageTracker.Stats stats(List activeTasks) { + public TaskResourceUsageTracker.Stats searchTaskStats(List activeTasks) { + long now = timeNanosSupplier.getAsLong(); + long currentMax = activeTasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).max().orElse(0); + long currentAvg = (long) activeTasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).average().orElse(0); + return new Stats(getSearchTaskCancellationCount(), currentMax, currentAvg); + } + + @Override + public TaskResourceUsageTracker.Stats searchShardTaskStats(List activeTasks) { long now = timeNanosSupplier.getAsLong(); long currentMax = activeTasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).max().orElse(0); long currentAvg = (long) activeTasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).average().orElse(0); - return new Stats(getCancellations(), currentMax, currentAvg); + return new Stats(getSearchShardTaskCancellationCount(), currentMax, currentAvg); } /** diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java index d1a264609e522..31f62055dbfc5 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java @@ -8,6 +8,7 @@ package org.opensearch.search.backpressure.trackers; +import org.opensearch.action.search.SearchTask; import org.opensearch.common.settings.Setting; import org.opensearch.common.io.stream.StreamInput; import org.opensearch.common.io.stream.StreamOutput; @@ -37,13 +38,29 @@ public class HeapUsageTracker extends TaskResourceUsageTracker { private static final long HEAP_SIZE_BYTES = JvmStats.jvmStats().getMem().getHeapMax().getBytes(); private static class Defaults { + private static final double HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY = 0.02; private static final double HEAP_PERCENT_THRESHOLD = 0.005; + private static final double HEAP_VARIANCE_THRESHOLD_FOR_SEARCH_QUERY = 2.0; private static final double HEAP_VARIANCE_THRESHOLD = 2.0; + private static final int HEAP_MOVING_AVERAGE_WINDOW_SIZE_FOR_SEARCH_QUERY = 100; private static final int HEAP_MOVING_AVERAGE_WINDOW_SIZE = 100; } /** - * Defines the heap usage threshold (in percentage) for an individual task before it is considered for cancellation. + * Defines the heap usage threshold (in percentage) for an individual search task before it is considered for cancellation. + */ + private volatile double heapPercentThresholdForSearchQuery; + public static final Setting SETTING_HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY = Setting.doubleSetting( + "search_backpressure.search_task.heap_percent_threshold_for_search_query", + Defaults.HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY, + 0.0, + 1.0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Defines the heap usage threshold (in percentage) for an individual search shard task before it is considered for cancellation. */ private volatile double heapPercentThreshold; public static final Setting SETTING_HEAP_PERCENT_THRESHOLD = Setting.doubleSetting( @@ -56,7 +73,20 @@ private static class Defaults { ); /** - * Defines the heap usage variance for an individual task before it is considered for cancellation. + * Defines the heap usage variance for an individual search task before it is considered for cancellation. + * A task is considered for cancellation when taskHeapUsage is greater than or equal to heapUsageMovingAverage * variance. + */ + private volatile double heapVarianceThresholdForSearchQuery; + public static final Setting SETTING_HEAP_VARIANCE_THRESHOLD_FOR_SEARCH_QUERY = Setting.doubleSetting( + "search_backpressure.search_task.heap_variance_for_search_query", + Defaults.HEAP_VARIANCE_THRESHOLD_FOR_SEARCH_QUERY, + 0.0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Defines the heap usage variance for an individual search shard task before it is considered for cancellation. * A task is considered for cancellation when taskHeapUsage is greater than or equal to heapUsageMovingAverage * variance. */ private volatile double heapVarianceThreshold; @@ -69,7 +99,19 @@ private static class Defaults { ); /** - * Defines the window size to calculate the moving average of heap usage of completed tasks. + * Defines the window size to calculate the moving average of heap usage of completed search tasks. + */ + private volatile int heapMovingAverageWindowSizeForSearchQuery; + public static final Setting SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE_FOR_SEARCH_QUERY = Setting.intSetting( + "search_backpressure.search_task.heap_moving_average_window_size_for_search_query", + Defaults.HEAP_MOVING_AVERAGE_WINDOW_SIZE_FOR_SEARCH_QUERY, + 0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Defines the window size to calculate the moving average of heap usage of completed search shard tasks. */ private volatile int heapMovingAverageWindowSize; public static final Setting SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE = Setting.intSetting( @@ -80,19 +122,33 @@ private static class Defaults { Setting.Property.NodeScope ); + private final AtomicReference movingAverageReferenceForSearchQuery; private final AtomicReference movingAverageReference; public HeapUsageTracker(SearchBackpressureSettings settings) { + heapPercentThresholdForSearchQuery = SETTING_HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY.get(settings.getSettings()); + settings.getClusterSettings() + .addSettingsUpdateConsumer(SETTING_HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY, this::setHeapPercentThresholdForSearchQuery); heapPercentThreshold = SETTING_HEAP_PERCENT_THRESHOLD.get(settings.getSettings()); settings.getClusterSettings().addSettingsUpdateConsumer(SETTING_HEAP_PERCENT_THRESHOLD, this::setHeapPercentThreshold); + heapPercentThresholdForSearchQuery = SETTING_HEAP_VARIANCE_THRESHOLD_FOR_SEARCH_QUERY.get(settings.getSettings()); + settings.getClusterSettings() + .addSettingsUpdateConsumer(SETTING_HEAP_VARIANCE_THRESHOLD_FOR_SEARCH_QUERY, this::setHeapVarianceThresholdForSearchQuery); heapVarianceThreshold = SETTING_HEAP_VARIANCE_THRESHOLD.get(settings.getSettings()); settings.getClusterSettings().addSettingsUpdateConsumer(SETTING_HEAP_VARIANCE_THRESHOLD, this::setHeapVarianceThreshold); + heapMovingAverageWindowSizeForSearchQuery = SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE_FOR_SEARCH_QUERY.get(settings.getSettings()); + settings.getClusterSettings() + .addSettingsUpdateConsumer( + SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE_FOR_SEARCH_QUERY, + this::setHeapMovingAverageWindowSizeForSearchQuery + ); heapMovingAverageWindowSize = SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE.get(settings.getSettings()); settings.getClusterSettings() .addSettingsUpdateConsumer(SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE, this::setHeapMovingAverageWindowSize); + this.movingAverageReferenceForSearchQuery = new AtomicReference<>(new MovingAverage(heapMovingAverageWindowSizeForSearchQuery)); this.movingAverageReference = new AtomicReference<>(new MovingAverage(heapMovingAverageWindowSize)); } @@ -103,12 +159,18 @@ public String name() { @Override public void update(Task task) { - movingAverageReference.get().record(task.getTotalResourceStats().getMemoryInBytes()); + if (task instanceof SearchTask) { + movingAverageReferenceForSearchQuery.get().record(task.getTotalResourceStats().getMemoryInBytes()); + } else { + movingAverageReference.get().record(task.getTotalResourceStats().getMemoryInBytes()); + } } @Override public Optional checkAndMaybeGetCancellationReason(Task task) { - MovingAverage movingAverage = movingAverageReference.get(); + MovingAverage movingAverage = (task instanceof SearchTask) + ? movingAverageReferenceForSearchQuery.get() + : movingAverageReference.get(); // There haven't been enough measurements. if (movingAverage.isReady() == false) { @@ -117,9 +179,11 @@ public Optional checkAndMaybeGetCancellationReason(Task double currentUsage = task.getTotalResourceStats().getMemoryInBytes(); double averageUsage = movingAverage.getAverage(); - double allowedUsage = averageUsage * getHeapVarianceThreshold(); + double variance = (task instanceof SearchTask) ? getHeapVarianceThresholdForSearchQuery() : getHeapBytesThreshold(); + double allowedUsage = averageUsage * variance; + double threshold = (task instanceof SearchTask) ? getHeapBytesThresholdForSearchQuery() : getHeapBytesThreshold(); - if (currentUsage < getHeapBytesThreshold() || currentUsage < allowedUsage) { + if (currentUsage < threshold || currentUsage < allowedUsage) { return Optional.empty(); } @@ -131,32 +195,60 @@ public Optional checkAndMaybeGetCancellationReason(Task ); } + public long getHeapBytesThresholdForSearchQuery() { + return (long) (HEAP_SIZE_BYTES * heapPercentThresholdForSearchQuery); + } + public long getHeapBytesThreshold() { return (long) (HEAP_SIZE_BYTES * heapPercentThreshold); } + public void setHeapPercentThresholdForSearchQuery(double heapPercentThresholdForSearchQuery) { + this.heapPercentThresholdForSearchQuery = heapPercentThresholdForSearchQuery; + } + public void setHeapPercentThreshold(double heapPercentThreshold) { this.heapPercentThreshold = heapPercentThreshold; } + public double getHeapVarianceThresholdForSearchQuery() { + return heapVarianceThresholdForSearchQuery; + } + public double getHeapVarianceThreshold() { return heapVarianceThreshold; } + public void setHeapVarianceThresholdForSearchQuery(double heapVarianceThresholdForSearchQuery) { + this.heapVarianceThresholdForSearchQuery = heapVarianceThresholdForSearchQuery; + } + public void setHeapVarianceThreshold(double heapVarianceThreshold) { this.heapVarianceThreshold = heapVarianceThreshold; } + public void setHeapMovingAverageWindowSizeForSearchQuery(int heapMovingAverageWindowSizeForSearchQuery) { + this.heapMovingAverageWindowSizeForSearchQuery = heapMovingAverageWindowSizeForSearchQuery; + this.movingAverageReferenceForSearchQuery.set(new MovingAverage(heapMovingAverageWindowSizeForSearchQuery)); + } + public void setHeapMovingAverageWindowSize(int heapMovingAverageWindowSize) { this.heapMovingAverageWindowSize = heapMovingAverageWindowSize; this.movingAverageReference.set(new MovingAverage(heapMovingAverageWindowSize)); } @Override - public TaskResourceUsageTracker.Stats stats(List activeTasks) { + public TaskResourceUsageTracker.Stats searchTaskStats(List activeTasks) { + long currentMax = activeTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).max().orElse(0); + long currentAvg = (long) activeTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).average().orElse(0); + return new Stats(getSearchTaskCancellationCount(), currentMax, currentAvg, (long) movingAverageReference.get().getAverage()); + } + + @Override + public TaskResourceUsageTracker.Stats searchShardTaskStats(List activeTasks) { long currentMax = activeTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).max().orElse(0); long currentAvg = (long) activeTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).average().orElse(0); - return new Stats(getCancellations(), currentMax, currentAvg, (long) movingAverageReference.get().getAverage()); + return new Stats(getSearchShardTaskCancellationCount(), currentMax, currentAvg, (long) movingAverageReference.get().getAverage()); } /** diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTracker.java index cbbb751b996be..a08ca34cd37bc 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTracker.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTracker.java @@ -26,14 +26,23 @@ public abstract class TaskResourceUsageTracker { /** * Counts the number of cancellations made due to this tracker. */ - private final AtomicLong cancellations = new AtomicLong(); + private final AtomicLong searchTaskCancellationCount = new AtomicLong(); + private final AtomicLong searchShardTaskCancellationCount = new AtomicLong(); - public long incrementCancellations() { - return cancellations.incrementAndGet(); + public long incrementSearchTaskCancellations() { + return searchTaskCancellationCount.incrementAndGet(); } - public long getCancellations() { - return cancellations.get(); + public long incrementSearchShardTaskCancellations() { + return searchShardTaskCancellationCount.incrementAndGet(); + } + + public long getSearchTaskCancellationCount() { + return searchTaskCancellationCount.get(); + } + + public long getSearchShardTaskCancellationCount() { + return searchShardTaskCancellationCount.get(); } /** @@ -52,9 +61,14 @@ public void update(Task task) {} public abstract Optional checkAndMaybeGetCancellationReason(Task task); /** - * Returns the tracker's state as seen in the stats API. + * Returns the tracker's state for SearchTasks as seen in the stats API. + */ + public abstract Stats searchTaskStats(List activeTasks); + + /** + * Returns the tracker's state for SearchShardTasks as seen in the stats API. */ - public abstract Stats stats(List activeTasks); + public abstract Stats searchShardTaskStats(List activeTasks); /** * Represents the tracker's state as seen in the stats API. diff --git a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java index 07a962c6824ca..1285131bf5da8 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java @@ -16,6 +16,7 @@ import org.opensearch.search.backpressure.settings.SearchBackpressureMode; import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; import org.opensearch.search.backpressure.settings.SearchShardTaskSettings; +import org.opensearch.search.backpressure.stats.SearchTaskStats; import org.opensearch.search.backpressure.trackers.NodeDuressTracker; import org.opensearch.common.xcontent.XContentBuilder; import org.opensearch.search.backpressure.stats.SearchBackpressureStats; @@ -120,7 +121,7 @@ public void testTrackerStateUpdateOnTaskCompletion() { for (int i = 0; i < 100; i++) { service.onTaskCompleted(createMockTaskWithResourceStats(SearchShardTask.class, 100, 200)); } - assertEquals(100, service.getState().getCompletionCount()); + assertEquals(100, service.getSearchShardTasksState().getCompletionCount()); verify(mockTaskResourceUsageTracker, times(100)).update(any()); } @@ -150,8 +151,13 @@ public Optional checkAndMaybeGetCancellationReason(Task } @Override - public Stats stats(List activeTasks) { - return new MockStats(getCancellations()); + public Stats searchTaskStats(List activeTasks) { + return new MockStats(getSearchTaskCancellationCount()); + } + + @Override + public Stats searchShardTaskStats(List activeTasks) { + return new MockStats(getSearchShardTaskCancellationCount()); } }; @@ -200,13 +206,13 @@ public Stats stats(List activeTasks) { // There are 15 tasks eligible for cancellation but only 10 will be cancelled (burst limit). service.doRun(); - assertEquals(10, service.getState().getCancellationCount()); - assertEquals(1, service.getState().getLimitReachedCount()); + assertEquals(10, service.getSearchShardTasksState().getCancellationCount()); + assertEquals(1, service.getSearchShardTasksState().getLimitReachedCount()); // If the clock or completed task count haven't made sufficient progress, we'll continue to be rate-limited. service.doRun(); - assertEquals(10, service.getState().getCancellationCount()); - assertEquals(2, service.getState().getLimitReachedCount()); + assertEquals(10, service.getSearchShardTasksState().getCancellationCount()); + assertEquals(2, service.getSearchShardTasksState().getLimitReachedCount()); // Simulate task completion to replenish some tokens. // This will add 2 tokens (task count delta * cancellationRatio) to 'rateLimitPerTaskCompletion'. @@ -214,18 +220,19 @@ public Stats stats(List activeTasks) { service.onTaskCompleted(createMockTaskWithResourceStats(SearchShardTask.class, 100, taskHeapUsageBytes)); } service.doRun(); - assertEquals(12, service.getState().getCancellationCount()); - assertEquals(3, service.getState().getLimitReachedCount()); + assertEquals(12, service.getSearchShardTasksState().getCancellationCount()); + assertEquals(3, service.getSearchShardTasksState().getLimitReachedCount()); // Fast-forward the clock by one second to replenish some tokens. // This will add 3 tokens (time delta * rate) to 'rateLimitPerTime'. mockTime.addAndGet(TimeUnit.SECONDS.toNanos(1)); service.doRun(); - assertEquals(15, service.getState().getCancellationCount()); - assertEquals(3, service.getState().getLimitReachedCount()); // no more tasks to cancel; limit not reached + assertEquals(15, service.getSearchShardTasksState().getCancellationCount()); + assertEquals(3, service.getSearchShardTasksState().getLimitReachedCount()); // no more tasks to cancel; limit not reached // Verify search backpressure stats. SearchBackpressureStats expectedStats = new SearchBackpressureStats( + new SearchTaskStats(0, 0, Map.of(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, new MockStats(0))), new SearchShardTaskStats(15, 3, Map.of(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, new MockStats(15))), SearchBackpressureMode.ENFORCED ); diff --git a/server/src/test/java/org/opensearch/search/backpressure/stats/SearchBackpressureStatsTests.java b/server/src/test/java/org/opensearch/search/backpressure/stats/SearchBackpressureStatsTests.java index 2665a6d5e05aa..0c86cf4b11239 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/stats/SearchBackpressureStatsTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/stats/SearchBackpressureStatsTests.java @@ -25,6 +25,7 @@ protected SearchBackpressureStats createTestInstance() { public static SearchBackpressureStats randomInstance() { return new SearchBackpressureStats( + SearchTaskStatsTests.randomInstance(), SearchShardTaskStatsTests.randomInstance(), randomFrom(SearchBackpressureMode.DISABLED, SearchBackpressureMode.MONITOR_ONLY, SearchBackpressureMode.ENFORCED) ); diff --git a/server/src/test/java/org/opensearch/search/backpressure/stats/SearchTaskStatsTests.java b/server/src/test/java/org/opensearch/search/backpressure/stats/SearchTaskStatsTests.java new file mode 100644 index 0000000000000..59375c22bb932 --- /dev/null +++ b/server/src/test/java/org/opensearch/search/backpressure/stats/SearchTaskStatsTests.java @@ -0,0 +1,44 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.backpressure.stats; + +import org.opensearch.common.io.stream.Writeable; +import org.opensearch.search.backpressure.trackers.CpuUsageTracker; +import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; +import org.opensearch.search.backpressure.trackers.HeapUsageTracker; +import org.opensearch.search.backpressure.trackers.TaskResourceUsageTracker; +import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType; +import org.opensearch.test.AbstractWireSerializingTestCase; + +import java.util.Map; + +public class SearchTaskStatsTests extends AbstractWireSerializingTestCase { + public static SearchTaskStats randomInstance() { + Map resourceUsageTrackerStats = Map.of( + TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, + new CpuUsageTracker.Stats(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()), + TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER, + new HeapUsageTracker.Stats(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()), + TaskResourceUsageTrackerType.ELAPSED_TIME_TRACKER, + new ElapsedTimeTracker.Stats(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()) + ); + + return new SearchTaskStats(randomNonNegativeLong(), randomNonNegativeLong(), resourceUsageTrackerStats); + } + + @Override + protected Writeable.Reader instanceReader() { + return SearchTaskStats::new; + } + + @Override + protected SearchTaskStats createTestInstance() { + return randomInstance(); + } +} diff --git a/server/src/test/java/org/opensearch/tasks/TaskCancellationTests.java b/server/src/test/java/org/opensearch/tasks/TaskCancellationTests.java index e74f89c905499..f30c15de28b90 100644 --- a/server/src/test/java/org/opensearch/tasks/TaskCancellationTests.java +++ b/server/src/test/java/org/opensearch/tasks/TaskCancellationTests.java @@ -27,16 +27,19 @@ public void testTaskCancellation() { TaskResourceUsageTracker mockTracker3 = createMockTaskResourceUsageTracker("mock_tracker_3"); List reasons = new ArrayList<>(); - List callbacks = List.of(mockTracker1::incrementCancellations, mockTracker2::incrementCancellations); + List callbacks = List.of( + mockTracker1::incrementSearchShardTaskCancellations, + mockTracker2::incrementSearchShardTaskCancellations + ); TaskCancellation taskCancellation = new TaskCancellation(mockTask, reasons, callbacks); // Task does not have any reason to be cancelled. assertEquals(0, taskCancellation.totalCancellationScore()); assertFalse(taskCancellation.isEligibleForCancellation()); taskCancellation.cancel(); - assertEquals(0, mockTracker1.getCancellations()); - assertEquals(0, mockTracker2.getCancellations()); - assertEquals(0, mockTracker3.getCancellations()); + assertEquals(0, mockTracker1.getSearchShardTaskCancellationCount()); + assertEquals(0, mockTracker2.getSearchShardTaskCancellationCount()); + assertEquals(0, mockTracker3.getSearchShardTaskCancellationCount()); // Task has one or more reasons to be cancelled. reasons.add(new TaskCancellation.Reason("limits exceeded 1", 10)); @@ -48,9 +51,9 @@ public void testTaskCancellation() { // Cancel the task and validate the cancellation reason and invocation of callbacks. taskCancellation.cancel(); assertTrue(mockTask.getReasonCancelled().contains("limits exceeded 1, limits exceeded 2, limits exceeded 3")); - assertEquals(1, mockTracker1.getCancellations()); - assertEquals(1, mockTracker2.getCancellations()); - assertEquals(0, mockTracker3.getCancellations()); + assertEquals(1, mockTracker1.getSearchShardTaskCancellationCount()); + assertEquals(1, mockTracker2.getSearchShardTaskCancellationCount()); + assertEquals(0, mockTracker3.getSearchShardTaskCancellationCount()); } private static TaskResourceUsageTracker createMockTaskResourceUsageTracker(String name) { @@ -69,7 +72,12 @@ public Optional checkAndMaybeGetCancellationReason(Task } @Override - public Stats stats(List activeTasks) { + public Stats searchTaskStats(List activeTasks) { + return null; + } + + @Override + public Stats searchShardTaskStats(List activeTasks) { return null; } }; From 5817770d0df42edcd79a844911426e7c63f659ff Mon Sep 17 00:00:00 2001 From: PritLadani Date: Tue, 20 Dec 2022 13:01:25 +0530 Subject: [PATCH 03/34] Fixing test failures Signed-off-by: PritLadani --- .../backpressure/SearchBackpressureService.java | 4 +++- .../stats/SearchBackpressureStats.java | 13 +++++++++++-- .../trackers/ElapsedTimeTracker.java | 12 ++++++------ .../backpressure/trackers/HeapUsageTracker.java | 16 ++++++++-------- .../SearchBackpressureServiceTests.java | 4 ++-- .../opensearch/tasks/TaskCancellationTests.java | 4 ++-- 6 files changed, 32 insertions(+), 21 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java index 2465790176daa..dcfdbe0e7b02b 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java +++ b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java @@ -379,7 +379,9 @@ public SearchBackpressureStats nodeStats() { searchBackpressureStates.get(SearchTask.class).getLimitReachedCount(), taskResourceUsageTrackers.stream() .collect( - Collectors.toUnmodifiableMap(t -> TaskResourceUsageTrackerType.fromName(t.name()), t -> t.searchTaskStats(searchTasks)) + Collectors.toUnmodifiableMap( + t -> TaskResourceUsageTrackerType.fromName(t.name()), + t -> t.searchTaskStats(searchTasks)) ) ); diff --git a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java index 92a52b62477f2..80ed849400d8d 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java +++ b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java @@ -8,6 +8,7 @@ package org.opensearch.search.backpressure.stats; +import org.opensearch.Version; import org.opensearch.common.io.stream.StreamInput; import org.opensearch.common.io.stream.StreamOutput; import org.opensearch.common.io.stream.Writeable; @@ -37,7 +38,13 @@ public SearchBackpressureStats( } public SearchBackpressureStats(StreamInput in) throws IOException { - this(new SearchTaskStats(in), new SearchShardTaskStats(in), SearchBackpressureMode.fromName(in.readString())); + searchShardTaskStats = new SearchShardTaskStats(in); + mode = SearchBackpressureMode.fromName(in.readString()); + if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + searchTaskStats = new SearchTaskStats(in); + } else { + searchTaskStats = null; + } } @Override @@ -51,9 +58,11 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws @Override public void writeTo(StreamOutput out) throws IOException { - searchTaskStats.writeTo(out); searchShardTaskStats.writeTo(out); out.writeString(mode.getName()); + if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + searchTaskStats.writeTo(out); + } } @Override diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTracker.java index eba8c4ee7afd8..3b1b904178b2c 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTracker.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTracker.java @@ -119,18 +119,18 @@ public void setElapsedTimeMillisThreshold(long elapsedTimeMillisThreshold) { } @Override - public TaskResourceUsageTracker.Stats searchTaskStats(List activeTasks) { + public TaskResourceUsageTracker.Stats searchTaskStats(List searchTasks) { long now = timeNanosSupplier.getAsLong(); - long currentMax = activeTasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).max().orElse(0); - long currentAvg = (long) activeTasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).average().orElse(0); + long currentMax = searchTasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).max().orElse(0); + long currentAvg = (long) searchTasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).average().orElse(0); return new Stats(getSearchTaskCancellationCount(), currentMax, currentAvg); } @Override - public TaskResourceUsageTracker.Stats searchShardTaskStats(List activeTasks) { + public TaskResourceUsageTracker.Stats searchShardTaskStats(List searchShardTasks) { long now = timeNanosSupplier.getAsLong(); - long currentMax = activeTasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).max().orElse(0); - long currentAvg = (long) activeTasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).average().orElse(0); + long currentMax = searchShardTasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).max().orElse(0); + long currentAvg = (long) searchShardTasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).average().orElse(0); return new Stats(getSearchShardTaskCancellationCount(), currentMax, currentAvg); } diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java index 31f62055dbfc5..840b3461e63ae 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java @@ -179,7 +179,7 @@ public Optional checkAndMaybeGetCancellationReason(Task double currentUsage = task.getTotalResourceStats().getMemoryInBytes(); double averageUsage = movingAverage.getAverage(); - double variance = (task instanceof SearchTask) ? getHeapVarianceThresholdForSearchQuery() : getHeapBytesThreshold(); + double variance = (task instanceof SearchTask) ? getHeapVarianceThresholdForSearchQuery() : getHeapVarianceThreshold(); double allowedUsage = averageUsage * variance; double threshold = (task instanceof SearchTask) ? getHeapBytesThresholdForSearchQuery() : getHeapBytesThreshold(); @@ -238,16 +238,16 @@ public void setHeapMovingAverageWindowSize(int heapMovingAverageWindowSize) { } @Override - public TaskResourceUsageTracker.Stats searchTaskStats(List activeTasks) { - long currentMax = activeTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).max().orElse(0); - long currentAvg = (long) activeTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).average().orElse(0); - return new Stats(getSearchTaskCancellationCount(), currentMax, currentAvg, (long) movingAverageReference.get().getAverage()); + public TaskResourceUsageTracker.Stats searchTaskStats(List searchTasks) { + long currentMax = searchTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).max().orElse(0); + long currentAvg = (long) searchTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).average().orElse(0); + return new Stats(getSearchTaskCancellationCount(), currentMax, currentAvg, (long) movingAverageReferenceForSearchQuery.get().getAverage()); } @Override - public TaskResourceUsageTracker.Stats searchShardTaskStats(List activeTasks) { - long currentMax = activeTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).max().orElse(0); - long currentAvg = (long) activeTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).average().orElse(0); + public TaskResourceUsageTracker.Stats searchShardTaskStats(List searchShardTasks) { + long currentMax = searchShardTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).max().orElse(0); + long currentAvg = (long) searchShardTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).average().orElse(0); return new Stats(getSearchShardTaskCancellationCount(), currentMax, currentAvg, (long) movingAverageReference.get().getAverage()); } diff --git a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java index 1285131bf5da8..81bb1580436ac 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java @@ -151,12 +151,12 @@ public Optional checkAndMaybeGetCancellationReason(Task } @Override - public Stats searchTaskStats(List activeTasks) { + public Stats searchTaskStats(List searchTasks) { return new MockStats(getSearchTaskCancellationCount()); } @Override - public Stats searchShardTaskStats(List activeTasks) { + public Stats searchShardTaskStats(List searchShardTasks) { return new MockStats(getSearchShardTaskCancellationCount()); } }; diff --git a/server/src/test/java/org/opensearch/tasks/TaskCancellationTests.java b/server/src/test/java/org/opensearch/tasks/TaskCancellationTests.java index f30c15de28b90..b8fa91f2d438b 100644 --- a/server/src/test/java/org/opensearch/tasks/TaskCancellationTests.java +++ b/server/src/test/java/org/opensearch/tasks/TaskCancellationTests.java @@ -72,12 +72,12 @@ public Optional checkAndMaybeGetCancellationReason(Task } @Override - public Stats searchTaskStats(List activeTasks) { + public Stats searchTaskStats(List searchTasks) { return null; } @Override - public Stats searchShardTaskStats(List activeTasks) { + public Stats searchShardTaskStats(List searchShardTasks) { return null; } }; From 1f188a3d0d04e46d5a25e4971cd60b5ba0bc9ee6 Mon Sep 17 00:00:00 2001 From: PritLadani Date: Tue, 20 Dec 2022 14:05:39 +0530 Subject: [PATCH 04/34] java code formatting Signed-off-by: PritLadani --- .../search/backpressure/SearchBackpressureService.java | 4 +--- .../search/backpressure/trackers/HeapUsageTracker.java | 7 ++++++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java index dcfdbe0e7b02b..2465790176daa 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java +++ b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java @@ -379,9 +379,7 @@ public SearchBackpressureStats nodeStats() { searchBackpressureStates.get(SearchTask.class).getLimitReachedCount(), taskResourceUsageTrackers.stream() .collect( - Collectors.toUnmodifiableMap( - t -> TaskResourceUsageTrackerType.fromName(t.name()), - t -> t.searchTaskStats(searchTasks)) + Collectors.toUnmodifiableMap(t -> TaskResourceUsageTrackerType.fromName(t.name()), t -> t.searchTaskStats(searchTasks)) ) ); diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java index 840b3461e63ae..e9c179bc0967c 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java @@ -241,7 +241,12 @@ public void setHeapMovingAverageWindowSize(int heapMovingAverageWindowSize) { public TaskResourceUsageTracker.Stats searchTaskStats(List searchTasks) { long currentMax = searchTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).max().orElse(0); long currentAvg = (long) searchTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).average().orElse(0); - return new Stats(getSearchTaskCancellationCount(), currentMax, currentAvg, (long) movingAverageReferenceForSearchQuery.get().getAverage()); + return new Stats( + getSearchTaskCancellationCount(), + currentMax, + currentAvg, + (long) movingAverageReferenceForSearchQuery.get().getAverage() + ); } @Override From 64c6ed08ff0a14eae520e5b20599c93d30b4d33d Mon Sep 17 00:00:00 2001 From: PritLadani Date: Wed, 21 Dec 2022 17:33:00 +0530 Subject: [PATCH 05/34] Adding unit tests and integration tests Signed-off-by: PritLadani --- .../backpressure/SearchBackpressureIT.java | 161 +++++++++++++-- .../trackers/HeapUsageTracker.java | 2 +- .../SearchBackpressureServiceTests.java | 194 ++++++++++++++---- .../trackers/CpuUsageTrackerTests.java | 14 +- .../trackers/ElapsedTimeTrackerTests.java | 14 +- .../trackers/HeapUsageTrackerTests.java | 24 ++- 6 files changed, 347 insertions(+), 62 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/search/backpressure/SearchBackpressureIT.java b/server/src/internalClusterTest/java/org/opensearch/search/backpressure/SearchBackpressureIT.java index f8629e2c88b07..e3ad4e2286e36 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/backpressure/SearchBackpressureIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/backpressure/SearchBackpressureIT.java @@ -17,6 +17,7 @@ import org.opensearch.action.ActionResponse; import org.opensearch.action.ActionType; import org.opensearch.action.search.SearchShardTask; +import org.opensearch.action.search.SearchTask; import org.opensearch.action.support.ActionFilters; import org.opensearch.action.support.HandledTransportAction; import org.opensearch.common.inject.Inject; @@ -29,9 +30,11 @@ import org.opensearch.search.backpressure.settings.NodeDuressSettings; import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; import org.opensearch.search.backpressure.settings.SearchShardTaskSettings; +import org.opensearch.search.backpressure.settings.SearchTaskSettings; import org.opensearch.search.backpressure.trackers.CpuUsageTracker; import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; import org.opensearch.search.backpressure.trackers.HeapUsageTracker; +import org.opensearch.tasks.CancellableTask; import org.opensearch.tasks.Task; import org.opensearch.tasks.TaskCancelledException; import org.opensearch.tasks.TaskId; @@ -47,6 +50,7 @@ import java.util.Map; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; +import java.util.function.Supplier; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.instanceOf; @@ -56,6 +60,7 @@ public class SearchBackpressureIT extends OpenSearchIntegTestCase { private static final TimeValue TIMEOUT = new TimeValue(10, TimeUnit.SECONDS); + private static final int MOVING_AVERAGE_WINDOW_SIZE = 10; @Override protected Collection> nodePlugins() { @@ -70,6 +75,7 @@ public final void setupNodeSettings() { .put(NodeDuressSettings.SETTING_CPU_THRESHOLD.getKey(), 0.0) .put(NodeDuressSettings.SETTING_HEAP_THRESHOLD.getKey(), 0.0) .put(NodeDuressSettings.SETTING_NUM_SUCCESSIVE_BREACHES.getKey(), 1) + .put(SearchTaskSettings.SETTING_TOTAL_HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY.getKey(), 0.0) .put(SearchShardTaskSettings.SETTING_TOTAL_HEAP_PERCENT_THRESHOLD.getKey(), 0.0) .build(); assertAcked(client().admin().cluster().prepareUpdateSettings().setPersistentSettings(request).get()); @@ -86,6 +92,37 @@ public final void cleanupNodeSettings() { ); } + public void testSearchTaskCancellationWithHighElapsedTime() throws InterruptedException { + Settings request = Settings.builder() + .put(SearchBackpressureSettings.SETTING_MODE.getKey(), "enforced") + .put(ElapsedTimeTracker.SETTING_ELAPSED_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY.getKey(), 1000) + .build(); + assertAcked(client().admin().cluster().prepareUpdateSettings().setPersistentSettings(request).get()); + + ExceptionCatchingListener listener = new ExceptionCatchingListener(); + client().execute( + TestTransportAction.ACTION, + new TestRequest<>( + RequestType.HIGH_ELAPSED_TIME, + (TaskFactory) (id, type, action, description, parentTaskId, headers) -> new SearchTask( + id, + type, + action, + descriptionSupplier(description), + parentTaskId, + headers + ) + ), + listener + ); + assertTrue(listener.latch.await(TIMEOUT.getSeconds(), TimeUnit.SECONDS)); + + Exception caughtException = listener.getException(); + assertNotNull("SearchTask should have been cancelled with TaskCancelledException", caughtException); + MatcherAssert.assertThat(caughtException, instanceOf(TaskCancelledException.class)); + MatcherAssert.assertThat(caughtException.getMessage(), containsString("elapsed time exceeded")); + } + public void testSearchShardTaskCancellationWithHighElapsedTime() throws InterruptedException { Settings request = Settings.builder() .put(SearchBackpressureSettings.SETTING_MODE.getKey(), "enforced") @@ -94,7 +131,7 @@ public void testSearchShardTaskCancellationWithHighElapsedTime() throws Interrup assertAcked(client().admin().cluster().prepareUpdateSettings().setPersistentSettings(request).get()); ExceptionCatchingListener listener = new ExceptionCatchingListener(); - client().execute(TestTransportAction.ACTION, new TestRequest(RequestType.HIGH_ELAPSED_TIME), listener); + client().execute(TestTransportAction.ACTION, new TestRequest<>(RequestType.HIGH_ELAPSED_TIME, SearchShardTask::new), listener); assertTrue(listener.latch.await(TIMEOUT.getSeconds(), TimeUnit.SECONDS)); Exception caughtException = listener.getException(); @@ -103,6 +140,37 @@ public void testSearchShardTaskCancellationWithHighElapsedTime() throws Interrup MatcherAssert.assertThat(caughtException.getMessage(), containsString("elapsed time exceeded")); } + public void testSearchTaskCancellationWithHighCpu() throws InterruptedException { + Settings request = Settings.builder() + .put(SearchBackpressureSettings.SETTING_MODE.getKey(), "enforced") + .put(CpuUsageTracker.SETTING_CPU_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY.getKey(), 1000) + .build(); + assertAcked(client().admin().cluster().prepareUpdateSettings().setPersistentSettings(request).get()); + + ExceptionCatchingListener listener = new ExceptionCatchingListener(); + client().execute( + TestTransportAction.ACTION, + new TestRequest<>( + RequestType.HIGH_CPU, + (TaskFactory) (id, type, action, description, parentTaskId, headers) -> new SearchTask( + id, + type, + action, + descriptionSupplier(description), + parentTaskId, + headers + ) + ), + listener + ); + assertTrue(listener.latch.await(TIMEOUT.getSeconds(), TimeUnit.SECONDS)); + + Exception caughtException = listener.getException(); + assertNotNull("SearchTask should have been cancelled with TaskCancelledException", caughtException); + MatcherAssert.assertThat(caughtException, instanceOf(TaskCancelledException.class)); + MatcherAssert.assertThat(caughtException.getMessage(), containsString("cpu usage exceeded")); + } + public void testSearchShardTaskCancellationWithHighCpu() throws InterruptedException { Settings request = Settings.builder() .put(SearchBackpressureSettings.SETTING_MODE.getKey(), "enforced") @@ -111,7 +179,7 @@ public void testSearchShardTaskCancellationWithHighCpu() throws InterruptedExcep assertAcked(client().admin().cluster().prepareUpdateSettings().setPersistentSettings(request).get()); ExceptionCatchingListener listener = new ExceptionCatchingListener(); - client().execute(TestTransportAction.ACTION, new TestRequest(RequestType.HIGH_CPU), listener); + client().execute(TestTransportAction.ACTION, new TestRequest<>(RequestType.HIGH_CPU, SearchShardTask::new), listener); assertTrue(listener.latch.await(TIMEOUT.getSeconds(), TimeUnit.SECONDS)); Exception caughtException = listener.getException(); @@ -120,12 +188,67 @@ public void testSearchShardTaskCancellationWithHighCpu() throws InterruptedExcep MatcherAssert.assertThat(caughtException.getMessage(), containsString("cpu usage exceeded")); } + public void testSearchTaskCancellationWithHighHeapUsage() throws InterruptedException { + // Before SearchBackpressureService cancels a task based on its heap usage, we need to build up the heap moving average + // To build up the heap moving average, we need to hit the same node with multiple requests and then hit the same node with a + // request having higher heap usage + String node = randomFrom(internalCluster().getNodeNames()); + Settings request = Settings.builder() + .put(SearchBackpressureSettings.SETTING_MODE.getKey(), "enforced") + .put(HeapUsageTracker.SETTING_HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY.getKey(), 0.0) + .put(HeapUsageTracker.SETTING_HEAP_VARIANCE_THRESHOLD_FOR_SEARCH_QUERY.getKey(), 1.0) + .put(HeapUsageTracker.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE_FOR_SEARCH_QUERY.getKey(), MOVING_AVERAGE_WINDOW_SIZE) + .build(); + assertAcked(client().admin().cluster().prepareUpdateSettings().setPersistentSettings(request).get()); + + ExceptionCatchingListener listener = new ExceptionCatchingListener(); + for (int i = 0; i < MOVING_AVERAGE_WINDOW_SIZE; i++) { + client(node).execute( + TestTransportAction.ACTION, + new TestRequest<>( + RequestType.HIGH_HEAP, + (TaskFactory) (id, type, action, description, parentTaskId, headers) -> new SearchTask( + id, + type, + action, + descriptionSupplier(description), + parentTaskId, + headers + ) + ), + listener + ); + } + + listener = new ExceptionCatchingListener(); + client(node).execute( + TestTransportAction.ACTION, + new TestRequest<>( + RequestType.HIGHER_HEAP, + (TaskFactory) (id, type, action, description, parentTaskId, headers) -> new SearchTask( + id, + type, + action, + descriptionSupplier(description), + parentTaskId, + headers + ) + ), + listener + ); + assertTrue(listener.latch.await(TIMEOUT.getSeconds(), TimeUnit.SECONDS)); + + Exception caughtException = listener.getException(); + assertNotNull("SearchTask should have been cancelled with TaskCancelledException", caughtException); + MatcherAssert.assertThat(caughtException, instanceOf(TaskCancelledException.class)); + MatcherAssert.assertThat(caughtException.getMessage(), containsString("heap usage exceeded")); + } + public void testSearchShardTaskCancellationWithHighHeapUsage() throws InterruptedException { // Before SearchBackpressureService cancels a task based on its heap usage, we need to build up the heap moving average // To build up the heap moving average, we need to hit the same node with multiple requests and then hit the same node with a // request having higher heap usage String node = randomFrom(internalCluster().getNodeNames()); - final int MOVING_AVERAGE_WINDOW_SIZE = 10; Settings request = Settings.builder() .put(SearchBackpressureSettings.SETTING_MODE.getKey(), "enforced") .put(HeapUsageTracker.SETTING_HEAP_PERCENT_THRESHOLD.getKey(), 0.0) @@ -136,11 +259,11 @@ public void testSearchShardTaskCancellationWithHighHeapUsage() throws Interrupte ExceptionCatchingListener listener = new ExceptionCatchingListener(); for (int i = 0; i < MOVING_AVERAGE_WINDOW_SIZE; i++) { - client(node).execute(TestTransportAction.ACTION, new TestRequest(RequestType.HIGH_HEAP), listener); + client(node).execute(TestTransportAction.ACTION, new TestRequest<>(RequestType.HIGH_HEAP, SearchShardTask::new), listener); } listener = new ExceptionCatchingListener(); - client(node).execute(TestTransportAction.ACTION, new TestRequest(RequestType.HIGHER_HEAP), listener); + client(node).execute(TestTransportAction.ACTION, new TestRequest<>(RequestType.HIGHER_HEAP, SearchShardTask::new), listener); assertTrue(listener.latch.await(TIMEOUT.getSeconds(), TimeUnit.SECONDS)); Exception caughtException = listener.getException(); @@ -154,7 +277,7 @@ public void testSearchCancellationWithBackpressureDisabled() throws InterruptedE assertAcked(client().admin().cluster().prepareUpdateSettings().setPersistentSettings(request).get()); ExceptionCatchingListener listener = new ExceptionCatchingListener(); - client().execute(TestTransportAction.ACTION, new TestRequest(RequestType.HIGH_ELAPSED_TIME), listener); + client().execute(TestTransportAction.ACTION, new TestRequest<>(RequestType.HIGH_ELAPSED_TIME, SearchShardTask::new), listener); // waiting for the TIMEOUT * 3 time for the request to complete and the latch to countdown. assertTrue( "SearchShardTask should have been completed by now and countdown the latch", @@ -196,11 +319,21 @@ enum RequestType { HIGH_ELAPSED_TIME; } - public static class TestRequest extends ActionRequest { + private Supplier descriptionSupplier(String description) { + return () -> description; + } + + interface TaskFactory { + T createTask(long id, String type, String action, String description, TaskId parentTaskId, Map headers); + } + + public static class TestRequest extends ActionRequest { private final RequestType type; + private TaskFactory taskFactory; - public TestRequest(RequestType type) { + public TestRequest(RequestType type, TaskFactory taskFactory) { this.type = type; + this.taskFactory = taskFactory; } public TestRequest(StreamInput in) throws IOException { @@ -215,7 +348,7 @@ public ActionRequestValidationException validate() { @Override public Task createTask(long id, String type, String action, TaskId parentTaskId, Map headers) { - return new SearchShardTask(id, type, action, "", parentTaskId, headers); + return taskFactory.createTask(id, type, action, "", parentTaskId, headers); } @Override @@ -252,7 +385,7 @@ public TestTransportAction(TransportService transportService, ThreadPool threadP protected void doExecute(Task task, TestRequest request, ActionListener listener) { threadPool.executor(ThreadPool.Names.SEARCH).execute(() -> { try { - SearchShardTask searchShardTask = (SearchShardTask) task; + CancellableTask cancellableTask = (CancellableTask) task; long startTime = System.nanoTime(); // Doing a busy-wait until task cancellation or timeout. @@ -260,11 +393,11 @@ protected void doExecute(Task task, TestRequest request, ActionListener request) throws InterruptedException { switch (request.getType()) { case HIGH_CPU: long i = 0, j = 1, k = 1, iterations = 1000; diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java index e9c179bc0967c..de96f13891112 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java @@ -132,7 +132,7 @@ public HeapUsageTracker(SearchBackpressureSettings settings) { heapPercentThreshold = SETTING_HEAP_PERCENT_THRESHOLD.get(settings.getSettings()); settings.getClusterSettings().addSettingsUpdateConsumer(SETTING_HEAP_PERCENT_THRESHOLD, this::setHeapPercentThreshold); - heapPercentThresholdForSearchQuery = SETTING_HEAP_VARIANCE_THRESHOLD_FOR_SEARCH_QUERY.get(settings.getSettings()); + heapVarianceThresholdForSearchQuery = SETTING_HEAP_VARIANCE_THRESHOLD_FOR_SEARCH_QUERY.get(settings.getSettings()); settings.getClusterSettings() .addSettingsUpdateConsumer(SETTING_HEAP_VARIANCE_THRESHOLD_FOR_SEARCH_QUERY, this::setHeapVarianceThresholdForSearchQuery); heapVarianceThreshold = SETTING_HEAP_VARIANCE_THRESHOLD.get(settings.getSettings()); diff --git a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java index 81bb1580436ac..df3f725e25f45 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java @@ -9,6 +9,7 @@ package org.opensearch.search.backpressure; import org.opensearch.action.search.SearchShardTask; +import org.opensearch.action.search.SearchTask; import org.opensearch.common.io.stream.StreamInput; import org.opensearch.common.io.stream.StreamOutput; import org.opensearch.common.settings.ClusterSettings; @@ -16,6 +17,7 @@ import org.opensearch.search.backpressure.settings.SearchBackpressureMode; import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; import org.opensearch.search.backpressure.settings.SearchShardTaskSettings; +import org.opensearch.search.backpressure.settings.SearchTaskSettings; import org.opensearch.search.backpressure.stats.SearchTaskStats; import org.opensearch.search.backpressure.trackers.NodeDuressTracker; import org.opensearch.common.xcontent.XContentBuilder; @@ -96,7 +98,7 @@ public void testIsNodeInDuress() { assertFalse(service.isNodeInDuress()); } - public void testTrackerStateUpdateOnTaskCompletion() { + public void testTrackerStateUpdateOnSearchTaskCompletion() { TaskResourceTrackingService mockTaskResourceTrackingService = mock(TaskResourceTrackingService.class); ThreadPool mockThreadPool = mock(ThreadPool.class); LongSupplier mockTimeNanosSupplier = () -> TimeUnit.SECONDS.toNanos(1234); @@ -116,7 +118,34 @@ public void testTrackerStateUpdateOnTaskCompletion() { List.of(mockTaskResourceUsageTracker) ); - // Record task completions to update the tracker state. Tasks other than SearchShardTask are ignored. + for (int i = 0; i < 100; i++) { + service.onTaskCompleted(createMockTaskWithResourceStats(SearchTask.class, 100, 200)); + } + assertEquals(100, service.getSearchTasksState().getCompletionCount()); + verify(mockTaskResourceUsageTracker, times(100)).update(any()); + } + + public void testTrackerStateUpdateOnSearchShardTaskCompletion() { + TaskResourceTrackingService mockTaskResourceTrackingService = mock(TaskResourceTrackingService.class); + ThreadPool mockThreadPool = mock(ThreadPool.class); + LongSupplier mockTimeNanosSupplier = () -> TimeUnit.SECONDS.toNanos(1234); + TaskResourceUsageTracker mockTaskResourceUsageTracker = mock(TaskResourceUsageTracker.class); + + SearchBackpressureSettings settings = new SearchBackpressureSettings( + Settings.EMPTY, + new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS) + ); + + SearchBackpressureService service = new SearchBackpressureService( + settings, + mockTaskResourceTrackingService, + mockThreadPool, + mockTimeNanosSupplier, + Collections.emptyList(), + List.of(mockTaskResourceUsageTracker) + ); + + // Record task completions to update the tracker state. Tasks other than SearchTask & SearchShardTask are ignored. service.onTaskCompleted(createMockTaskWithResourceStats(CancellableTask.class, 100, 200)); for (int i = 0; i < 100; i++) { service.onTaskCompleted(createMockTaskWithResourceStats(SearchShardTask.class, 100, 200)); @@ -125,54 +154,86 @@ public void testTrackerStateUpdateOnTaskCompletion() { verify(mockTaskResourceUsageTracker, times(100)).update(any()); } - public void testInFlightCancellation() { + public void testSearchTaskInFlightCancellation() { TaskResourceTrackingService mockTaskResourceTrackingService = mock(TaskResourceTrackingService.class); ThreadPool mockThreadPool = mock(ThreadPool.class); AtomicLong mockTime = new AtomicLong(0); LongSupplier mockTimeNanosSupplier = mockTime::get; NodeDuressTracker mockNodeDuressTracker = new NodeDuressTracker(() -> true); - TaskResourceUsageTracker mockTaskResourceUsageTracker = new TaskResourceUsageTracker() { - @Override - public String name() { - return TaskResourceUsageTrackerType.CPU_USAGE_TRACKER.getName(); - } + TaskResourceUsageTracker mockTaskResourceUsageTracker = getMockedTaskResourceUsageTracker(); - @Override - public void update(Task task) {} + // Mocking 'settings' with predictable rate limiting thresholds. + SearchBackpressureSettings settings = getBackpressureSettings("enforced", 0.2, 0.005, 10.0); - @Override - public Optional checkAndMaybeGetCancellationReason(Task task) { - if (task.getTotalResourceStats().getCpuTimeInNanos() < 300) { - return Optional.empty(); - } + SearchBackpressureService service = new SearchBackpressureService( + settings, + mockTaskResourceTrackingService, + mockThreadPool, + mockTimeNanosSupplier, + List.of(mockNodeDuressTracker), + List.of(mockTaskResourceUsageTracker) + ); - return Optional.of(new TaskCancellation.Reason("limits exceeded", 5)); - } + // Run two iterations so that node is marked 'in duress' from the third iteration onwards. + service.doRun(); + service.doRun(); - @Override - public Stats searchTaskStats(List searchTasks) { - return new MockStats(getSearchTaskCancellationCount()); - } + // Mocking 'settings' with predictable totalHeapBytesThreshold so that cancellation logic doesn't get skipped. + long taskHeapUsageBytes = 500; + SearchTaskSettings searchTaskSettings = mock(SearchTaskSettings.class); + when(searchTaskSettings.getTotalHeapBytesThreshold()).thenReturn(taskHeapUsageBytes); + when(settings.getSearchTaskSettings()).thenReturn(searchTaskSettings); - @Override - public Stats searchShardTaskStats(List searchShardTasks) { - return new MockStats(getSearchShardTaskCancellationCount()); + // Create a mix of low and high resource usage SearchTasks (50 low + 25 high resource usage tasks). + Map activeSearchTasks = new HashMap<>(); + for (long i = 0; i < 75; i++) { + if (i % 3 == 0) { + activeSearchTasks.put(i, createMockTaskWithResourceStats(SearchTask.class, 500, taskHeapUsageBytes)); + } else { + activeSearchTasks.put(i, createMockTaskWithResourceStats(SearchTask.class, 100, taskHeapUsageBytes)); } - }; + } + doReturn(activeSearchTasks).when(mockTaskResourceTrackingService).getResourceAwareTasks(); - // Mocking 'settings' with predictable rate limiting thresholds. - SearchBackpressureSettings settings = spy( - new SearchBackpressureSettings( - Settings.builder() - .put(SearchBackpressureSettings.SETTING_MODE.getKey(), "enforced") - .put(SearchBackpressureSettings.SETTING_CANCELLATION_RATIO.getKey(), 0.1) - .put(SearchBackpressureSettings.SETTING_CANCELLATION_RATE.getKey(), 0.003) - .put(SearchBackpressureSettings.SETTING_CANCELLATION_BURST.getKey(), 10.0) - .build(), - new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS) - ) + // There are 25 SearchTasks eligible for cancellation but only 10 will be cancelled (burst limit). + service.doRun(); + assertEquals(10, service.getSearchTasksState().getCancellationCount()); + assertEquals(1, service.getSearchTasksState().getLimitReachedCount()); + + // If the clock or completed task count haven't made sufficient progress, we'll continue to be rate-limited. + service.doRun(); + assertEquals(10, service.getSearchTasksState().getCancellationCount()); + assertEquals(2, service.getSearchTasksState().getLimitReachedCount()); + + // Fast-forward the clock by ten second to replenish some tokens. + // This will add 50 tokens (time delta * rate) to 'rateLimitPerTime' but it will cancel only 10 tasks (burst limit). + mockTime.addAndGet(TimeUnit.SECONDS.toNanos(10)); + service.doRun(); + assertEquals(20, service.getSearchTasksState().getCancellationCount()); + assertEquals(3, service.getSearchTasksState().getLimitReachedCount()); + + // Verify search backpressure stats. + SearchBackpressureStats expectedStats = new SearchBackpressureStats( + new SearchTaskStats(20, 3, Map.of(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, new MockStats(20))), + new SearchShardTaskStats(0, 0, Map.of(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, new MockStats(0))), + SearchBackpressureMode.ENFORCED ); + SearchBackpressureStats actualStats = service.nodeStats(); + assertEquals(expectedStats, actualStats); + } + + public void testSearchShardTaskInFlightCancellation() { + TaskResourceTrackingService mockTaskResourceTrackingService = mock(TaskResourceTrackingService.class); + ThreadPool mockThreadPool = mock(ThreadPool.class); + AtomicLong mockTime = new AtomicLong(0); + LongSupplier mockTimeNanosSupplier = mockTime::get; + NodeDuressTracker mockNodeDuressTracker = new NodeDuressTracker(() -> true); + + TaskResourceUsageTracker mockTaskResourceUsageTracker = getMockedTaskResourceUsageTracker(); + + // Mocking 'settings' with predictable rate limiting thresholds. + SearchBackpressureSettings settings = getBackpressureSettings("enforced", 0.1, 0.003, 10.0); SearchBackpressureService service = new SearchBackpressureService( settings, @@ -189,22 +250,22 @@ public Stats searchShardTaskStats(List searchShardTasks) { // Mocking 'settings' with predictable totalHeapBytesThreshold so that cancellation logic doesn't get skipped. long taskHeapUsageBytes = 500; - SearchShardTaskSettings shardTaskSettings = mock(SearchShardTaskSettings.class); - when(shardTaskSettings.getTotalHeapBytesThreshold()).thenReturn(taskHeapUsageBytes); - when(settings.getSearchShardTaskSettings()).thenReturn(shardTaskSettings); + SearchShardTaskSettings searchShardTaskSettings = mock(SearchShardTaskSettings.class); + when(searchShardTaskSettings.getTotalHeapBytesThreshold()).thenReturn(taskHeapUsageBytes); + when(settings.getSearchShardTaskSettings()).thenReturn(searchShardTaskSettings); // Create a mix of low and high resource usage tasks (60 low + 15 high resource usage tasks). - Map activeTasks = new HashMap<>(); + Map activeSearchShardTasks = new HashMap<>(); for (long i = 0; i < 75; i++) { if (i % 5 == 0) { - activeTasks.put(i, createMockTaskWithResourceStats(SearchShardTask.class, 500, taskHeapUsageBytes)); + activeSearchShardTasks.put(i, createMockTaskWithResourceStats(SearchShardTask.class, 500, taskHeapUsageBytes)); } else { - activeTasks.put(i, createMockTaskWithResourceStats(SearchShardTask.class, 100, taskHeapUsageBytes)); + activeSearchShardTasks.put(i, createMockTaskWithResourceStats(SearchShardTask.class, 100, taskHeapUsageBytes)); } } - doReturn(activeTasks).when(mockTaskResourceTrackingService).getResourceAwareTasks(); + doReturn(activeSearchShardTasks).when(mockTaskResourceTrackingService).getResourceAwareTasks(); - // There are 15 tasks eligible for cancellation but only 10 will be cancelled (burst limit). + // There are 15 SearchShardTasks eligible for cancellation but only 10 will be cancelled (burst limit). service.doRun(); assertEquals(10, service.getSearchShardTasksState().getCancellationCount()); assertEquals(1, service.getSearchShardTasksState().getLimitReachedCount()); @@ -240,6 +301,51 @@ public Stats searchShardTaskStats(List searchShardTasks) { assertEquals(expectedStats, actualStats); } + private SearchBackpressureSettings getBackpressureSettings(String mode, double ratio, double rate, double burst) { + return spy( + new SearchBackpressureSettings( + Settings.builder() + .put(SearchBackpressureSettings.SETTING_MODE.getKey(), mode) + .put(SearchBackpressureSettings.SETTING_CANCELLATION_RATIO.getKey(), ratio) + .put(SearchBackpressureSettings.SETTING_CANCELLATION_RATE.getKey(), rate) + .put(SearchBackpressureSettings.SETTING_CANCELLATION_BURST.getKey(), burst) + .build(), + new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS) + ) + ); + } + + private TaskResourceUsageTracker getMockedTaskResourceUsageTracker() { + return new TaskResourceUsageTracker() { + @Override + public String name() { + return TaskResourceUsageTrackerType.CPU_USAGE_TRACKER.getName(); + } + + @Override + public void update(Task task) {} + + @Override + public Optional checkAndMaybeGetCancellationReason(Task task) { + if (task.getTotalResourceStats().getCpuTimeInNanos() < 300) { + return Optional.empty(); + } + + return Optional.of(new TaskCancellation.Reason("limits exceeded", 5)); + } + + @Override + public Stats searchTaskStats(List searchTasks) { + return new MockStats(getSearchTaskCancellationCount()); + } + + @Override + public Stats searchShardTaskStats(List searchShardTasks) { + return new MockStats(getSearchShardTaskCancellationCount()); + } + }; + } + private static class MockStats implements TaskResourceUsageTracker.Stats { private final long cancellationCount; diff --git a/server/src/test/java/org/opensearch/search/backpressure/trackers/CpuUsageTrackerTests.java b/server/src/test/java/org/opensearch/search/backpressure/trackers/CpuUsageTrackerTests.java index c790fb2e60eea..62e2950189436 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/trackers/CpuUsageTrackerTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/trackers/CpuUsageTrackerTests.java @@ -9,6 +9,7 @@ package org.opensearch.search.backpressure.trackers; import org.opensearch.action.search.SearchShardTask; +import org.opensearch.action.search.SearchTask; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; @@ -24,11 +25,22 @@ public class CpuUsageTrackerTests extends OpenSearchTestCase { private static final SearchBackpressureSettings mockSettings = new SearchBackpressureSettings( Settings.builder() .put(CpuUsageTracker.SETTING_CPU_TIME_MILLIS_THRESHOLD.getKey(), 15) // 15 ms + .put(CpuUsageTracker.SETTING_CPU_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY.getKey(), 25) // 25 ms .build(), new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS) ); - public void testEligibleForCancellation() { + public void testSearchTaskEligibleForCancellation() { + Task task = createMockTaskWithResourceStats(SearchTask.class, 100000000, 200); + CpuUsageTracker tracker = new CpuUsageTracker(mockSettings); + + Optional reason = tracker.checkAndMaybeGetCancellationReason(task); + assertTrue(reason.isPresent()); + assertEquals(1, reason.get().getCancellationScore()); + assertEquals("cpu usage exceeded [100ms >= 25ms]", reason.get().getMessage()); + } + + public void testSearchShardTaskEligibleForCancellation() { Task task = createMockTaskWithResourceStats(SearchShardTask.class, 200000000, 200); CpuUsageTracker tracker = new CpuUsageTracker(mockSettings); diff --git a/server/src/test/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTrackerTests.java b/server/src/test/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTrackerTests.java index 67ed6059a1914..1748ce8d7c253 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTrackerTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTrackerTests.java @@ -9,6 +9,7 @@ package org.opensearch.search.backpressure.trackers; import org.opensearch.action.search.SearchShardTask; +import org.opensearch.action.search.SearchTask; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; @@ -25,11 +26,22 @@ public class ElapsedTimeTrackerTests extends OpenSearchTestCase { private static final SearchBackpressureSettings mockSettings = new SearchBackpressureSettings( Settings.builder() .put(ElapsedTimeTracker.SETTING_ELAPSED_TIME_MILLIS_THRESHOLD.getKey(), 100) // 100 ms + .put(ElapsedTimeTracker.SETTING_ELAPSED_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY.getKey(), 150) // 150 ms .build(), new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS) ); - public void testEligibleForCancellation() { + public void testSearchTaskEligibleForCancellation() { + Task task = createMockTaskWithResourceStats(SearchTask.class, 1, 1, 0); + ElapsedTimeTracker tracker = new ElapsedTimeTracker(mockSettings, () -> 150000000); + + Optional reason = tracker.checkAndMaybeGetCancellationReason(task); + assertTrue(reason.isPresent()); + assertEquals(1, reason.get().getCancellationScore()); + assertEquals("elapsed time exceeded [150ms >= 150ms]", reason.get().getMessage()); + } + + public void testSearchShardTaskEligibleForCancellation() { Task task = createMockTaskWithResourceStats(SearchShardTask.class, 1, 1, 0); ElapsedTimeTracker tracker = new ElapsedTimeTracker(mockSettings, () -> 200000000); diff --git a/server/src/test/java/org/opensearch/search/backpressure/trackers/HeapUsageTrackerTests.java b/server/src/test/java/org/opensearch/search/backpressure/trackers/HeapUsageTrackerTests.java index b9967da22fbf1..74f36fe15551e 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/trackers/HeapUsageTrackerTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/trackers/HeapUsageTrackerTests.java @@ -9,6 +9,7 @@ package org.opensearch.search.backpressure.trackers; import org.opensearch.action.search.SearchShardTask; +import org.opensearch.action.search.SearchTask; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; @@ -24,17 +25,38 @@ public class HeapUsageTrackerTests extends OpenSearchTestCase { private static final long HEAP_BYTES_THRESHOLD = 100; + private static final long HEAP_BYTES_THRESHOLD_FOR_SEARCH_QUERY = 50; private static final int HEAP_MOVING_AVERAGE_WINDOW_SIZE = 100; private static final SearchBackpressureSettings mockSettings = new SearchBackpressureSettings( Settings.builder() + .put(HeapUsageTracker.SETTING_HEAP_VARIANCE_THRESHOLD_FOR_SEARCH_QUERY.getKey(), 3.0) .put(HeapUsageTracker.SETTING_HEAP_VARIANCE_THRESHOLD.getKey(), 2.0) + .put(HeapUsageTracker.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE_FOR_SEARCH_QUERY.getKey(), HEAP_MOVING_AVERAGE_WINDOW_SIZE) .put(HeapUsageTracker.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE.getKey(), HEAP_MOVING_AVERAGE_WINDOW_SIZE) .build(), new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS) ); - public void testEligibleForCancellation() { + public void testSearchTaskEligibleForCancellation() { + HeapUsageTracker tracker = spy(new HeapUsageTracker(mockSettings)); + when(tracker.getHeapBytesThresholdForSearchQuery()).thenReturn(HEAP_BYTES_THRESHOLD_FOR_SEARCH_QUERY); + Task task = createMockTaskWithResourceStats(SearchTask.class, 1, 50); + + // Record enough observations to make the moving average 'ready'. + for (int i = 0; i < HEAP_MOVING_AVERAGE_WINDOW_SIZE; i++) { + tracker.update(task); + } + + // Task that has heap usage >= heapBytesThreshold and (movingAverage * heapVariance). + task = createMockTaskWithResourceStats(SearchTask.class, 1, 300); + Optional reason = tracker.checkAndMaybeGetCancellationReason(task); + assertTrue(reason.isPresent()); + assertEquals(6, reason.get().getCancellationScore()); + assertEquals("heap usage exceeded [300b >= 150b]", reason.get().getMessage()); + } + + public void testSearchShardTaskEligibleForCancellation() { HeapUsageTracker tracker = spy(new HeapUsageTracker(mockSettings)); when(tracker.getHeapBytesThreshold()).thenReturn(HEAP_BYTES_THRESHOLD); Task task = createMockTaskWithResourceStats(SearchShardTask.class, 1, 50); From 5b846a2cb5149a1dd966854dd86151efe811a4ee Mon Sep 17 00:00:00 2001 From: PritLadani Date: Thu, 5 Jan 2023 23:40:13 +0530 Subject: [PATCH 06/34] Introducing separate thresholds for different task types Signed-off-by: PritLadani --- .../backpressure/SearchBackpressureIT.java | 25 +- .../action/search/SearchShardTask.java | 3 +- .../opensearch/action/search/SearchTask.java | 3 +- .../common/settings/ClusterSettings.java | 34 +-- .../SearchBackpressureService.java | 261 ++++++++++++------ .../settings/SearchBackpressureSettings.java | 171 +++++++++--- .../settings/SearchShardTaskSettings.java | 121 +++++++- .../settings/SearchTaskSettings.java | 132 ++++++++- .../stats/SearchBackpressureStats.java | 18 +- ....java => SearchBackpressureTaskStats.java} | 25 +- .../backpressure/stats/SearchTaskStats.java | 100 ------- .../trackers/CpuUsageTracker.java | 76 +---- .../trackers/ElapsedTimeTracker.java | 78 +----- .../trackers/HeapUsageTracker.java | 212 +++----------- .../trackers/TaskResourceUsageTracker.java | 28 +- .../tasks/SearchBackpressureTask.java | 11 + .../SearchBackpressureServiceTests.java | 73 ++--- .../stats/SearchBackpressureStatsTests.java | 4 +- ... => SearchBackpressureTaskStatsTests.java} | 12 +- .../stats/SearchTaskStatsTests.java | 44 --- .../trackers/CpuUsageTrackerTests.java | 12 +- .../trackers/ElapsedTimeTrackerTests.java | 21 +- .../trackers/HeapUsageTrackerTests.java | 56 +++- .../tasks/TaskCancellationTests.java | 24 +- 24 files changed, 795 insertions(+), 749 deletions(-) rename server/src/main/java/org/opensearch/search/backpressure/stats/{SearchShardTaskStats.java => SearchBackpressureTaskStats.java} (80%) delete mode 100644 server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java create mode 100644 server/src/main/java/org/opensearch/tasks/SearchBackpressureTask.java rename server/src/test/java/org/opensearch/search/backpressure/stats/{SearchShardTaskStatsTests.java => SearchBackpressureTaskStatsTests.java} (75%) delete mode 100644 server/src/test/java/org/opensearch/search/backpressure/stats/SearchTaskStatsTests.java diff --git a/server/src/internalClusterTest/java/org/opensearch/search/backpressure/SearchBackpressureIT.java b/server/src/internalClusterTest/java/org/opensearch/search/backpressure/SearchBackpressureIT.java index e3ad4e2286e36..8d343821cfa87 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/backpressure/SearchBackpressureIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/backpressure/SearchBackpressureIT.java @@ -31,9 +31,6 @@ import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; import org.opensearch.search.backpressure.settings.SearchShardTaskSettings; import org.opensearch.search.backpressure.settings.SearchTaskSettings; -import org.opensearch.search.backpressure.trackers.CpuUsageTracker; -import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; -import org.opensearch.search.backpressure.trackers.HeapUsageTracker; import org.opensearch.tasks.CancellableTask; import org.opensearch.tasks.Task; import org.opensearch.tasks.TaskCancelledException; @@ -75,7 +72,7 @@ public final void setupNodeSettings() { .put(NodeDuressSettings.SETTING_CPU_THRESHOLD.getKey(), 0.0) .put(NodeDuressSettings.SETTING_HEAP_THRESHOLD.getKey(), 0.0) .put(NodeDuressSettings.SETTING_NUM_SUCCESSIVE_BREACHES.getKey(), 1) - .put(SearchTaskSettings.SETTING_TOTAL_HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY.getKey(), 0.0) + .put(SearchTaskSettings.SETTING_TOTAL_HEAP_PERCENT_THRESHOLD.getKey(), 0.0) .put(SearchShardTaskSettings.SETTING_TOTAL_HEAP_PERCENT_THRESHOLD.getKey(), 0.0) .build(); assertAcked(client().admin().cluster().prepareUpdateSettings().setPersistentSettings(request).get()); @@ -95,7 +92,7 @@ public final void cleanupNodeSettings() { public void testSearchTaskCancellationWithHighElapsedTime() throws InterruptedException { Settings request = Settings.builder() .put(SearchBackpressureSettings.SETTING_MODE.getKey(), "enforced") - .put(ElapsedTimeTracker.SETTING_ELAPSED_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY.getKey(), 1000) + .put(SearchTaskSettings.SETTING_ELAPSED_TIME_MILLIS_THRESHOLD.getKey(), 1000) .build(); assertAcked(client().admin().cluster().prepareUpdateSettings().setPersistentSettings(request).get()); @@ -126,7 +123,7 @@ public void testSearchTaskCancellationWithHighElapsedTime() throws InterruptedEx public void testSearchShardTaskCancellationWithHighElapsedTime() throws InterruptedException { Settings request = Settings.builder() .put(SearchBackpressureSettings.SETTING_MODE.getKey(), "enforced") - .put(ElapsedTimeTracker.SETTING_ELAPSED_TIME_MILLIS_THRESHOLD.getKey(), 1000) + .put(SearchShardTaskSettings.SETTING_ELAPSED_TIME_MILLIS_THRESHOLD.getKey(), 1000) .build(); assertAcked(client().admin().cluster().prepareUpdateSettings().setPersistentSettings(request).get()); @@ -143,7 +140,7 @@ public void testSearchShardTaskCancellationWithHighElapsedTime() throws Interrup public void testSearchTaskCancellationWithHighCpu() throws InterruptedException { Settings request = Settings.builder() .put(SearchBackpressureSettings.SETTING_MODE.getKey(), "enforced") - .put(CpuUsageTracker.SETTING_CPU_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY.getKey(), 1000) + .put(SearchTaskSettings.SETTING_CPU_TIME_MILLIS_THRESHOLD.getKey(), 1000) .build(); assertAcked(client().admin().cluster().prepareUpdateSettings().setPersistentSettings(request).get()); @@ -174,7 +171,7 @@ public void testSearchTaskCancellationWithHighCpu() throws InterruptedException public void testSearchShardTaskCancellationWithHighCpu() throws InterruptedException { Settings request = Settings.builder() .put(SearchBackpressureSettings.SETTING_MODE.getKey(), "enforced") - .put(CpuUsageTracker.SETTING_CPU_TIME_MILLIS_THRESHOLD.getKey(), 1000) + .put(SearchShardTaskSettings.SETTING_CPU_TIME_MILLIS_THRESHOLD.getKey(), 1000) .build(); assertAcked(client().admin().cluster().prepareUpdateSettings().setPersistentSettings(request).get()); @@ -195,9 +192,9 @@ public void testSearchTaskCancellationWithHighHeapUsage() throws InterruptedExce String node = randomFrom(internalCluster().getNodeNames()); Settings request = Settings.builder() .put(SearchBackpressureSettings.SETTING_MODE.getKey(), "enforced") - .put(HeapUsageTracker.SETTING_HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY.getKey(), 0.0) - .put(HeapUsageTracker.SETTING_HEAP_VARIANCE_THRESHOLD_FOR_SEARCH_QUERY.getKey(), 1.0) - .put(HeapUsageTracker.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE_FOR_SEARCH_QUERY.getKey(), MOVING_AVERAGE_WINDOW_SIZE) + .put(SearchTaskSettings.SETTING_HEAP_PERCENT_THRESHOLD.getKey(), 0.0) + .put(SearchTaskSettings.SETTING_HEAP_VARIANCE_THRESHOLD.getKey(), 1.0) + .put(SearchTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE.getKey(), MOVING_AVERAGE_WINDOW_SIZE) .build(); assertAcked(client().admin().cluster().prepareUpdateSettings().setPersistentSettings(request).get()); @@ -251,9 +248,9 @@ public void testSearchShardTaskCancellationWithHighHeapUsage() throws Interrupte String node = randomFrom(internalCluster().getNodeNames()); Settings request = Settings.builder() .put(SearchBackpressureSettings.SETTING_MODE.getKey(), "enforced") - .put(HeapUsageTracker.SETTING_HEAP_PERCENT_THRESHOLD.getKey(), 0.0) - .put(HeapUsageTracker.SETTING_HEAP_VARIANCE_THRESHOLD.getKey(), 1.0) - .put(HeapUsageTracker.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE.getKey(), MOVING_AVERAGE_WINDOW_SIZE) + .put(SearchShardTaskSettings.SETTING_HEAP_PERCENT_THRESHOLD.getKey(), 0.0) + .put(SearchShardTaskSettings.SETTING_HEAP_VARIANCE_THRESHOLD.getKey(), 1.0) + .put(SearchShardTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE.getKey(), MOVING_AVERAGE_WINDOW_SIZE) .build(); assertAcked(client().admin().cluster().prepareUpdateSettings().setPersistentSettings(request).get()); diff --git a/server/src/main/java/org/opensearch/action/search/SearchShardTask.java b/server/src/main/java/org/opensearch/action/search/SearchShardTask.java index c9d0d6e2d3d47..c94f02395cf38 100644 --- a/server/src/main/java/org/opensearch/action/search/SearchShardTask.java +++ b/server/src/main/java/org/opensearch/action/search/SearchShardTask.java @@ -36,6 +36,7 @@ import org.opensearch.search.fetch.ShardFetchSearchRequest; import org.opensearch.search.internal.ShardSearchRequest; import org.opensearch.tasks.CancellableTask; +import org.opensearch.tasks.SearchBackpressureTask; import org.opensearch.tasks.TaskId; import java.util.Map; @@ -47,7 +48,7 @@ * * @opensearch.internal */ -public class SearchShardTask extends CancellableTask { +public class SearchShardTask extends CancellableTask implements SearchBackpressureTask { // generating metadata in a lazy way since source can be quite big private final MemoizedSupplier metadataSupplier; diff --git a/server/src/main/java/org/opensearch/action/search/SearchTask.java b/server/src/main/java/org/opensearch/action/search/SearchTask.java index 987485fe44c65..dad6c44da4f10 100644 --- a/server/src/main/java/org/opensearch/action/search/SearchTask.java +++ b/server/src/main/java/org/opensearch/action/search/SearchTask.java @@ -34,6 +34,7 @@ import org.opensearch.common.unit.TimeValue; import org.opensearch.tasks.CancellableTask; +import org.opensearch.tasks.SearchBackpressureTask; import org.opensearch.tasks.TaskId; import java.util.Map; @@ -46,7 +47,7 @@ * * @opensearch.internal */ -public class SearchTask extends CancellableTask { +public class SearchTask extends CancellableTask implements SearchBackpressureTask { // generating description in a lazy way since source can be quite big private final Supplier descriptionSupplier; private SearchProgressListener progressListener = SearchProgressListener.NOOP; diff --git a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java index 8bf979c05ef30..4e3eb764e3401 100644 --- a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java @@ -45,9 +45,6 @@ import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; import org.opensearch.search.backpressure.settings.SearchShardTaskSettings; import org.opensearch.search.backpressure.settings.SearchTaskSettings; -import org.opensearch.search.backpressure.trackers.CpuUsageTracker; -import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; -import org.opensearch.search.backpressure.trackers.HeapUsageTracker; import org.opensearch.tasks.TaskManager; import org.opensearch.tasks.TaskResourceTrackingService; import org.opensearch.watcher.ResourceWatcherService; @@ -595,24 +592,27 @@ public void apply(Settings value, Settings current, Settings previous) { // Settings related to search backpressure SearchBackpressureSettings.SETTING_MODE, - SearchBackpressureSettings.SETTING_CANCELLATION_RATIO, - SearchBackpressureSettings.SETTING_CANCELLATION_RATE, - SearchBackpressureSettings.SETTING_CANCELLATION_BURST, + SearchBackpressureSettings.SETTING_CANCELLATION_RATIO_SEARCH_TASK, + SearchBackpressureSettings.SETTING_CANCELLATION_RATE_SEARCH_TASK, + SearchBackpressureSettings.SETTING_CANCELLATION_BURST_SEARCH_TASK, + SearchBackpressureSettings.SETTING_CANCELLATION_RATIO_SEARCH_SHARD_TASK, + SearchBackpressureSettings.SETTING_CANCELLATION_RATE_SEARCH_SHARD_TASK, + SearchBackpressureSettings.SETTING_CANCELLATION_BURST_SEARCH_SHARD_TASK, NodeDuressSettings.SETTING_NUM_SUCCESSIVE_BREACHES, NodeDuressSettings.SETTING_CPU_THRESHOLD, NodeDuressSettings.SETTING_HEAP_THRESHOLD, SearchShardTaskSettings.SETTING_TOTAL_HEAP_PERCENT_THRESHOLD, - HeapUsageTracker.SETTING_HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY, - HeapUsageTracker.SETTING_HEAP_PERCENT_THRESHOLD, - HeapUsageTracker.SETTING_HEAP_VARIANCE_THRESHOLD_FOR_SEARCH_QUERY, - HeapUsageTracker.SETTING_HEAP_VARIANCE_THRESHOLD, - HeapUsageTracker.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE_FOR_SEARCH_QUERY, - HeapUsageTracker.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE, - CpuUsageTracker.SETTING_CPU_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY, - CpuUsageTracker.SETTING_CPU_TIME_MILLIS_THRESHOLD, - ElapsedTimeTracker.SETTING_ELAPSED_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY, - ElapsedTimeTracker.SETTING_ELAPSED_TIME_MILLIS_THRESHOLD, - SearchTaskSettings.SETTING_TOTAL_HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY + SearchTaskSettings.SETTING_HEAP_PERCENT_THRESHOLD, + SearchShardTaskSettings.SETTING_HEAP_PERCENT_THRESHOLD, + SearchTaskSettings.SETTING_HEAP_VARIANCE_THRESHOLD, + SearchShardTaskSettings.SETTING_HEAP_VARIANCE_THRESHOLD, + SearchTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE, + SearchShardTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE, + SearchTaskSettings.SETTING_CPU_TIME_MILLIS_THRESHOLD, + SearchShardTaskSettings.SETTING_CPU_TIME_MILLIS_THRESHOLD, + SearchTaskSettings.SETTING_ELAPSED_TIME_MILLIS_THRESHOLD, + SearchShardTaskSettings.SETTING_ELAPSED_TIME_MILLIS_THRESHOLD, + SearchTaskSettings.SETTING_TOTAL_HEAP_PERCENT_THRESHOLD ) ) ); diff --git a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java index 2465790176daa..928b26a835bc4 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java +++ b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java @@ -20,8 +20,7 @@ import org.opensearch.search.backpressure.settings.SearchBackpressureMode; import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; import org.opensearch.search.backpressure.stats.SearchBackpressureStats; -import org.opensearch.search.backpressure.stats.SearchShardTaskStats; -import org.opensearch.search.backpressure.stats.SearchTaskStats; +import org.opensearch.search.backpressure.stats.SearchBackpressureTaskStats; import org.opensearch.search.backpressure.trackers.CpuUsageTracker; import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; import org.opensearch.search.backpressure.trackers.HeapUsageTracker; @@ -29,6 +28,7 @@ import org.opensearch.search.backpressure.trackers.TaskResourceUsageTracker; import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType; import org.opensearch.tasks.CancellableTask; +import org.opensearch.tasks.SearchBackpressureTask; import org.opensearch.tasks.Task; import org.opensearch.tasks.TaskCancellation; import org.opensearch.tasks.TaskResourceTrackingService; @@ -39,7 +39,6 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Comparator; -import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Optional; @@ -67,17 +66,15 @@ public class SearchBackpressureService extends AbstractLifecycleComponent private final LongSupplier timeNanosSupplier; private final List nodeDuressTrackers; - private final List taskResourceUsageTrackers; + private final List searchTaskTrackers; + private final List searchShardTaskTrackers; - private final AtomicReference taskCancellationRateLimiter = new AtomicReference<>(); - private final AtomicReference taskCancellationRatioLimiter = new AtomicReference<>(); + private final AtomicReference searchTaskCancellationRateLimiter = new AtomicReference<>(); + private final AtomicReference searchTaskCancellationRatioLimiter = new AtomicReference<>(); + private final AtomicReference searchShardTaskCancellationRateLimiter = new AtomicReference<>(); + private final AtomicReference searchShardTaskCancellationRatioLimiter = new AtomicReference<>(); - private final Map, SearchBackpressureState> searchBackpressureStates = new HashMap<>() { - { - put(SearchTask.class, new SearchBackpressureState()); - put(SearchShardTask.class, new SearchBackpressureState()); - } - }; + private final Map, SearchBackpressureState> searchBackpressureStates; public SearchBackpressureService( SearchBackpressureSettings settings, @@ -97,7 +94,26 @@ public SearchBackpressureService( () -> JvmStats.jvmStats().getMem().getHeapUsedPercent() / 100.0 >= settings.getNodeDuressSettings().getHeapThreshold() ) ), - List.of(new CpuUsageTracker(settings), new HeapUsageTracker(settings), new ElapsedTimeTracker(settings, System::nanoTime)) + List.of( + new CpuUsageTracker(settings.getSearchTaskSettings()::getCpuTimeNanosThreshold), + new HeapUsageTracker( + settings.getSearchTaskSettings()::getHeapVarianceThreshold, + settings.getSearchTaskSettings()::getHeapBytesThreshold, + settings.getSearchTaskSettings()::getHeapMovingAverageWindowSize, + settings.getClusterSettings() + ), + new ElapsedTimeTracker(settings.getSearchTaskSettings()::getElapsedTimeNanosThreshold, System::nanoTime) + ), + List.of( + new CpuUsageTracker(settings.getSearchShardTaskSettings()::getCpuTimeNanosThreshold), + new HeapUsageTracker( + settings.getSearchShardTaskSettings()::getHeapVarianceThreshold, + settings.getSearchShardTaskSettings()::getHeapBytesThreshold, + settings.getSearchShardTaskSettings()::getHeapMovingAverageWindowSize, + settings.getClusterSettings() + ), + new ElapsedTimeTracker(settings.getSearchShardTaskSettings()::getElapsedTimeNanosThreshold, System::nanoTime) + ) ); } @@ -107,7 +123,8 @@ public SearchBackpressureService( ThreadPool threadPool, LongSupplier timeNanosSupplier, List nodeDuressTrackers, - List taskResourceUsageTrackers + List searchTaskTrackers, + List searchShardTaskTrackers ) { this.settings = settings; this.settings.addListener(this); @@ -116,20 +133,55 @@ public SearchBackpressureService( this.threadPool = threadPool; this.timeNanosSupplier = timeNanosSupplier; this.nodeDuressTrackers = nodeDuressTrackers; - this.taskResourceUsageTrackers = taskResourceUsageTrackers; + this.searchTaskTrackers = searchTaskTrackers; + this.searchShardTaskTrackers = searchShardTaskTrackers; + + this.searchBackpressureStates = Map.of( + SearchTask.class, + new SearchBackpressureState(), + SearchShardTask.class, + new SearchBackpressureState() + ); + + this.searchTaskCancellationRateLimiter.set( + new TokenBucket( + timeNanosSupplier, + getSettings().getCancellationRateSearchTaskNanos(), + getSettings().getCancellationBurstSearchTask() + ) + ); - this.taskCancellationRateLimiter.set( - new TokenBucket(timeNanosSupplier, getSettings().getCancellationRateNanos(), getSettings().getCancellationBurst()) + this.searchTaskCancellationRatioLimiter.set( + new TokenBucket( + this::getSearchTaskCompletionCount, + getSettings().getCancellationRatioSearchTask(), + getSettings().getCancellationBurstSearchTask() + ) ); - this.taskCancellationRatioLimiter.set( - new TokenBucket(this::getTaskCompletionCount, getSettings().getCancellationRatio(), getSettings().getCancellationBurst()) + this.searchShardTaskCancellationRateLimiter.set( + new TokenBucket( + timeNanosSupplier, + getSettings().getCancellationRateSearchShardTaskNanos(), + getSettings().getCancellationBurstSearchShardTask() + ) ); + + this.searchShardTaskCancellationRatioLimiter.set( + new TokenBucket( + this::getSearchShardTaskCompletionCount, + getSettings().getCancellationRatioSearchShardTask(), + getSettings().getCancellationBurstSearchShardTask() + ) + ); + } + + private long getSearchTaskCompletionCount() { + return searchBackpressureStates.get(SearchTask.class).getCompletionCount(); } - private long getTaskCompletionCount() { - return searchBackpressureStates.get(SearchTask.class).getCompletionCount() + searchBackpressureStates.get(SearchShardTask.class) - .getCompletionCount(); + private long getSearchShardTaskCompletionCount() { + return searchBackpressureStates.get(SearchShardTask.class).getCompletionCount(); } void doRun() { @@ -142,8 +194,8 @@ void doRun() { return; } - List searchTasks = getSearchTasks(); - List searchShardTasks = getSearchShardTasks(); + List searchTasks = getTaskByType(SearchTask.class); + List searchShardTasks = getTaskByType(SearchShardTask.class); List cancellableTasks = new ArrayList<>(); // Force-refresh usage stats of these tasks before making a cancellation decision. @@ -160,6 +212,7 @@ void doRun() { cancellableTasks.addAll(searchShardTasks); } + // none of the task type is breaching the heap usage thresholds and hence we do not cancel any tasks if (cancellableTasks.isEmpty()) { return; } @@ -176,9 +229,15 @@ void doRun() { continue; } + boolean isSearchTask = taskCancellation.getTask() instanceof SearchTask; + // Independently remove tokens from both token buckets. - boolean rateLimitReached = taskCancellationRateLimiter.get().request() == false; - boolean ratioLimitReached = taskCancellationRatioLimiter.get().request() == false; + boolean rateLimitReached = isSearchTask + ? searchTaskCancellationRateLimiter.get().request() == false + : searchShardTaskCancellationRateLimiter.get().request() == false; + boolean ratioLimitReached = isSearchTask + ? searchTaskCancellationRatioLimiter.get().request() == false + : searchShardTaskCancellationRatioLimiter.get().request() == false; // Stop cancelling tasks if there are no tokens in either of the two token buckets. if (rateLimitReached && ratioLimitReached) { @@ -186,7 +245,9 @@ void doRun() { SearchBackpressureState searchBackpressureState = searchBackpressureStates.get( (taskCancellation.getTask() instanceof SearchTask) ? SearchTask.class : SearchShardTask.class ); - searchBackpressureState.incrementLimitReachedCount(); + if (searchBackpressureState != null) { + searchBackpressureState.incrementLimitReachedCount(); + } break; } @@ -224,26 +285,14 @@ boolean isHeapUsageDominatedBySearch(List cancellableTasks, lon } /** - * Filters and returns the list of currently running SearchShardTasks. - */ - List getSearchShardTasks() { - return taskResourceTrackingService.getResourceAwareTasks() - .values() - .stream() - .filter(task -> task instanceof SearchShardTask) - .map(task -> (SearchShardTask) task) - .collect(Collectors.toUnmodifiableList()); - } - - /** - * Filters and returns the list of currently running SearchTasks. + * Filters and returns the list of currently running tasks of specified type. */ - List getSearchTasks() { + List getTaskByType(Class type) { return taskResourceTrackingService.getResourceAwareTasks() .values() .stream() - .filter(task -> task instanceof SearchTask) - .map(task -> (SearchTask) task) + .filter(type::isInstance) + .map(type::cast) .collect(Collectors.toUnmodifiableList()); } @@ -255,23 +304,21 @@ List getSearchTasks() { TaskCancellation getTaskCancellation(CancellableTask task) { List reasons = new ArrayList<>(); List callbacks = new ArrayList<>(); - - for (TaskResourceUsageTracker tracker : taskResourceUsageTrackers) { + boolean isSearchTask = task instanceof SearchTask; + List trackers = isSearchTask ? searchTaskTrackers : searchShardTaskTrackers; + for (TaskResourceUsageTracker tracker : trackers) { Optional reason = tracker.checkAndMaybeGetCancellationReason(task); if (reason.isPresent()) { - if (task instanceof SearchTask) { - callbacks.add(tracker::incrementSearchTaskCancellations); - } else { - callbacks.add(tracker::incrementSearchShardTaskCancellations); - } + callbacks.add(tracker::incrementCancellations); reasons.add(reason.get()); } } - if (task instanceof SearchTask) { - callbacks.add(searchBackpressureStates.get(SearchTask.class)::incrementCancellationCount); - } else { - callbacks.add(searchBackpressureStates.get(SearchShardTask.class)::incrementCancellationCount); + SearchBackpressureState searchBackpressureState = searchBackpressureStates.get( + isSearchTask ? SearchTask.class : SearchShardTask.class + ); + if (searchBackpressureState != null) { + callbacks.add(searchBackpressureState::incrementCancellationCount); } return new TaskCancellation(task, reasons, callbacks); @@ -281,23 +328,20 @@ TaskCancellation getTaskCancellation(CancellableTask task) { * Returns a list of TaskCancellations sorted by descending order of their cancellation scores. */ List getTaskCancellations(List tasks) { - return tasks.stream() + List t = tasks.stream() .map(this::getTaskCancellation) .filter(TaskCancellation::isEligibleForCancellation) .sorted(Comparator.reverseOrder()) .collect(Collectors.toUnmodifiableList()); + return t; } SearchBackpressureSettings getSettings() { return settings; } - SearchBackpressureState getSearchTasksState() { - return searchBackpressureStates.get(SearchTask.class); - } - - SearchBackpressureState getSearchShardTasksState() { - return searchBackpressureStates.get(SearchShardTask.class); + SearchBackpressureState getSearchBackpressureTaskStats(Class taskType) { + return searchBackpressureStates.get(taskType); } @Override @@ -306,47 +350,88 @@ public void onTaskCompleted(Task task) { return; } - if (task instanceof SearchTask == false && task instanceof SearchShardTask == false) { + if (task instanceof SearchBackpressureTask == false) { return; } CancellableTask cancellableTask = (CancellableTask) task; - SearchBackpressureState searchBackpressureState = searchBackpressureStates.get( - (task instanceof SearchTask) ? SearchTask.class : SearchShardTask.class - ); + boolean isSearchTask = task instanceof SearchTask; if (cancellableTask.isCancelled() == false) { - searchBackpressureState.incrementCompletionCount(); + SearchBackpressureState searchBackpressureState = searchBackpressureStates.get( + isSearchTask ? SearchTask.class : SearchShardTask.class + ); + if (searchBackpressureState != null) { + searchBackpressureState.incrementCompletionCount(); + } } List exceptions = new ArrayList<>(); - for (TaskResourceUsageTracker tracker : taskResourceUsageTrackers) { + List trackers = isSearchTask ? searchTaskTrackers : searchShardTaskTrackers; + for (TaskResourceUsageTracker tracker : trackers) { try { tracker.update(task); } catch (Exception e) { exceptions.add(e); } } + ExceptionsHelper.maybeThrowRuntimeAndSuppress(exceptions); } @Override - public void onCancellationRatioChanged() { - taskCancellationRatioLimiter.set( - new TokenBucket(this::getTaskCompletionCount, getSettings().getCancellationRatio(), getSettings().getCancellationBurst()) + public void onCancellationRatioSearchTaskChanged() { + searchTaskCancellationRatioLimiter.set( + new TokenBucket( + this::getSearchTaskCompletionCount, + getSettings().getCancellationRatioSearchTask(), + getSettings().getCancellationBurstSearchTask() + ) + ); + } + + @Override + public void onCancellationRateSearchTaskChanged() { + searchTaskCancellationRateLimiter.set( + new TokenBucket( + timeNanosSupplier, + getSettings().getCancellationRateSearchTaskNanos(), + getSettings().getCancellationBurstSearchTask() + ) + ); + } + + @Override + public void onCancellationBurstSearchTaskChanged() { + onCancellationRatioSearchTaskChanged(); + onCancellationRateSearchTaskChanged(); + } + + @Override + public void onCancellationRatioSearchShardTaskChanged() { + searchShardTaskCancellationRatioLimiter.set( + new TokenBucket( + this::getSearchShardTaskCompletionCount, + getSettings().getCancellationRatioSearchShardTask(), + getSettings().getCancellationBurstSearchShardTask() + ) ); } @Override - public void onCancellationRateChanged() { - taskCancellationRateLimiter.set( - new TokenBucket(timeNanosSupplier, getSettings().getCancellationRateNanos(), getSettings().getCancellationBurst()) + public void onCancellationRateSearchShardTaskChanged() { + searchShardTaskCancellationRateLimiter.set( + new TokenBucket( + timeNanosSupplier, + getSettings().getCancellationRateSearchShardTaskNanos(), + getSettings().getCancellationBurstSearchShardTask() + ) ); } @Override - public void onCancellationBurstChanged() { - onCancellationRatioChanged(); - onCancellationRateChanged(); + public void onCancellationBurstSearchShardTaskChanged() { + onCancellationRatioSearchShardTaskChanged(); + onCancellationRateSearchShardTaskChanged(); } @Override @@ -371,28 +456,20 @@ protected void doStop() { protected void doClose() throws IOException {} public SearchBackpressureStats nodeStats() { - List searchTasks = getSearchTasks(); - List searchShardTasks = getSearchShardTasks(); - - SearchTaskStats searchTaskStats = new SearchTaskStats( + List searchTasks = getTaskByType(SearchTask.class); + List searchShardTasks = getTaskByType(SearchShardTask.class); + SearchBackpressureTaskStats searchTaskStats = new SearchBackpressureTaskStats( searchBackpressureStates.get(SearchTask.class).getCancellationCount(), searchBackpressureStates.get(SearchTask.class).getLimitReachedCount(), - taskResourceUsageTrackers.stream() - .collect( - Collectors.toUnmodifiableMap(t -> TaskResourceUsageTrackerType.fromName(t.name()), t -> t.searchTaskStats(searchTasks)) - ) + searchTaskTrackers.stream() + .collect(Collectors.toUnmodifiableMap(t -> TaskResourceUsageTrackerType.fromName(t.name()), t -> t.stats(searchTasks))) ); - SearchShardTaskStats searchShardTaskStats = new SearchShardTaskStats( + SearchBackpressureTaskStats searchShardTaskStats = new SearchBackpressureTaskStats( searchBackpressureStates.get(SearchShardTask.class).getCancellationCount(), searchBackpressureStates.get(SearchShardTask.class).getLimitReachedCount(), - taskResourceUsageTrackers.stream() - .collect( - Collectors.toUnmodifiableMap( - t -> TaskResourceUsageTrackerType.fromName(t.name()), - t -> t.searchShardTaskStats(searchShardTasks) - ) - ) + searchShardTaskTrackers.stream() + .collect(Collectors.toUnmodifiableMap(t -> TaskResourceUsageTrackerType.fromName(t.name()), t -> t.stats(searchShardTasks))) ); return new SearchBackpressureStats(searchTaskStats, searchShardTaskStats, getSettings().getMode()); diff --git a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchBackpressureSettings.java b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchBackpressureSettings.java index 3906228389729..e0cd4efd43aac 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchBackpressureSettings.java +++ b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchBackpressureSettings.java @@ -29,9 +29,13 @@ private static class Defaults { private static final long INTERVAL_MILLIS = 1000; private static final String MODE = "monitor_only"; - private static final double CANCELLATION_RATIO = 0.1; - private static final double CANCELLATION_RATE = 0.003; - private static final double CANCELLATION_BURST = 10.0; + // TODO: decide on default settings for SearchTask + private static final double CANCELLATION_RATIO_SEARCH_TASK = 0.1; + private static final double CANCELLATION_RATE_SEARCH_TASK = 0.003; + private static final double CANCELLATION_BURST_SEARCH_TASK = 10.0; + private static final double CANCELLATION_RATIO_SEARCH_SHARD_TASK = 0.1; + private static final double CANCELLATION_RATE_SEARCH_SHARD_TASK = 0.003; + private static final double CANCELLATION_BURST_SEARCH_SHARD_TASK = 10.0; } /** @@ -56,14 +60,53 @@ private static class Defaults { Setting.Property.NodeScope ); + /** + * Defines the percentage of SearchTasks to cancel relative to the number of successful SearchTask completions. + * In other words, it is the number of tokens added to the bucket on each successful SearchTask completion. + */ + private volatile double cancellationRatioSearchTask; + public static final Setting SETTING_CANCELLATION_RATIO_SEARCH_TASK = Setting.doubleSetting( + "search_backpressure.cancellation_ratio_search_task", + Defaults.CANCELLATION_RATIO_SEARCH_TASK, + 0.0, + 1.0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Defines the number of tasks to cancel per unit time (in millis). + * In other words, it is the number of tokens added to the bucket each millisecond. + */ + private volatile double cancellationRateSearchTask; + public static final Setting SETTING_CANCELLATION_RATE_SEARCH_TASK = Setting.doubleSetting( + "search_backpressure.cancellation_rate_search_task", + Defaults.CANCELLATION_RATE_SEARCH_TASK, + 0.0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Defines the maximum number of tasks that can be cancelled before being rate-limited. + */ + private volatile double cancellationBurstSearchTask; + public static final Setting SETTING_CANCELLATION_BURST_SEARCH_TASK = Setting.doubleSetting( + "search_backpressure.cancellation_burst_search_task", + Defaults.CANCELLATION_BURST_SEARCH_TASK, + 1.0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + /** * Defines the percentage of tasks to cancel relative to the number of successful task completions. * In other words, it is the number of tokens added to the bucket on each successful task completion. */ - private volatile double cancellationRatio; - public static final Setting SETTING_CANCELLATION_RATIO = Setting.doubleSetting( - "search_backpressure.cancellation_ratio", - Defaults.CANCELLATION_RATIO, + private volatile double cancellationRatioSearchShardTask; + public static final Setting SETTING_CANCELLATION_RATIO_SEARCH_SHARD_TASK = Setting.doubleSetting( + "search_backpressure.cancellation_ratio_search_shard_task", + Defaults.CANCELLATION_RATIO_SEARCH_SHARD_TASK, 0.0, 1.0, Setting.Property.Dynamic, @@ -74,10 +117,10 @@ private static class Defaults { * Defines the number of tasks to cancel per unit time (in millis). * In other words, it is the number of tokens added to the bucket each millisecond. */ - private volatile double cancellationRate; - public static final Setting SETTING_CANCELLATION_RATE = Setting.doubleSetting( - "search_backpressure.cancellation_rate", - Defaults.CANCELLATION_RATE, + private volatile double cancellationRateSearchShardTask; + public static final Setting SETTING_CANCELLATION_RATE_SEARCH_SHARD_TASK = Setting.doubleSetting( + "search_backpressure.cancellation_rate_search_shard_task", + Defaults.CANCELLATION_RATE_SEARCH_SHARD_TASK, 0.0, Setting.Property.Dynamic, Setting.Property.NodeScope @@ -86,10 +129,10 @@ private static class Defaults { /** * Defines the maximum number of tasks that can be cancelled before being rate-limited. */ - private volatile double cancellationBurst; - public static final Setting SETTING_CANCELLATION_BURST = Setting.doubleSetting( - "search_backpressure.cancellation_burst", - Defaults.CANCELLATION_BURST, + private volatile double cancellationBurstSearchShardTask; + public static final Setting SETTING_CANCELLATION_BURST_SEARCH_SHARD_TASK = Setting.doubleSetting( + "search_backpressure.cancellation_burst_search_shard_task", + Defaults.CANCELLATION_BURST_SEARCH_SHARD_TASK, 1.0, Setting.Property.Dynamic, Setting.Property.NodeScope @@ -99,11 +142,17 @@ private static class Defaults { * Callback listeners. */ public interface Listener { - void onCancellationRatioChanged(); + void onCancellationRatioSearchTaskChanged(); + + void onCancellationRateSearchTaskChanged(); - void onCancellationRateChanged(); + void onCancellationBurstSearchTaskChanged(); - void onCancellationBurstChanged(); + void onCancellationRatioSearchShardTaskChanged(); + + void onCancellationRateSearchShardTaskChanged(); + + void onCancellationBurstSearchShardTaskChanged(); } private final List listeners = new ArrayList<>(); @@ -125,14 +174,23 @@ public SearchBackpressureSettings(Settings settings, ClusterSettings clusterSett mode = SearchBackpressureMode.fromName(SETTING_MODE.get(settings)); clusterSettings.addSettingsUpdateConsumer(SETTING_MODE, s -> this.setMode(SearchBackpressureMode.fromName(s))); - cancellationRatio = SETTING_CANCELLATION_RATIO.get(settings); - clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_RATIO, this::setCancellationRatio); + cancellationRatioSearchTask = SETTING_CANCELLATION_RATIO_SEARCH_TASK.get(settings); + clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_RATIO_SEARCH_TASK, this::setCancellationRatioSearchTask); + + cancellationRateSearchTask = SETTING_CANCELLATION_RATE_SEARCH_TASK.get(settings); + clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_RATE_SEARCH_TASK, this::setCancellationRateSearchTask); - cancellationRate = SETTING_CANCELLATION_RATE.get(settings); - clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_RATE, this::setCancellationRate); + cancellationBurstSearchTask = SETTING_CANCELLATION_BURST_SEARCH_TASK.get(settings); + clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_BURST_SEARCH_TASK, this::setCancellationBurstSearchTask); - cancellationBurst = SETTING_CANCELLATION_BURST.get(settings); - clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_BURST, this::setCancellationBurst); + cancellationRatioSearchShardTask = SETTING_CANCELLATION_RATIO_SEARCH_SHARD_TASK.get(settings); + clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_RATIO_SEARCH_SHARD_TASK, this::setCancellationRatioSearchShardTask); + + cancellationRateSearchShardTask = SETTING_CANCELLATION_RATE_SEARCH_SHARD_TASK.get(settings); + clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_RATE_SEARCH_SHARD_TASK, this::setCancellationRateSearchShardTask); + + cancellationBurstSearchShardTask = SETTING_CANCELLATION_BURST_SEARCH_SHARD_TASK.get(settings); + clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_BURST_SEARCH_SHARD_TASK, this::setCancellationBurstSearchShardTask); } public void addListener(Listener listener) { @@ -171,35 +229,66 @@ public void setMode(SearchBackpressureMode mode) { this.mode = mode; } - public double getCancellationRatio() { - return cancellationRatio; + public double getCancellationRatioSearchTask() { + return cancellationRatioSearchTask; + } + + private void setCancellationRatioSearchTask(double cancellationRatioSearchTask) { + this.cancellationRatioSearchTask = cancellationRatioSearchTask; + notifyListeners(Listener::onCancellationRatioSearchTaskChanged); + } + + public double getCancellationRateSearchTask() { + return cancellationRateSearchTask; + } + + public double getCancellationRateSearchTaskNanos() { + return getCancellationRateSearchTask() / TimeUnit.MILLISECONDS.toNanos(1); // rate per nanoseconds + } + + private void setCancellationRateSearchTask(double cancellationRateSearchTask) { + this.cancellationRateSearchTask = cancellationRateSearchTask; + notifyListeners(Listener::onCancellationRateSearchTaskChanged); + } + + public double getCancellationBurstSearchTask() { + return cancellationBurstSearchTask; + } + + private void setCancellationBurstSearchTask(double cancellationBurstSearchTask) { + this.cancellationBurstSearchTask = cancellationBurstSearchTask; + notifyListeners(Listener::onCancellationBurstSearchTaskChanged); + } + + public double getCancellationRatioSearchShardTask() { + return cancellationRatioSearchShardTask; } - private void setCancellationRatio(double cancellationRatio) { - this.cancellationRatio = cancellationRatio; - notifyListeners(Listener::onCancellationRatioChanged); + private void setCancellationRatioSearchShardTask(double cancellationRatioSearchShardTask) { + this.cancellationRatioSearchShardTask = cancellationRatioSearchShardTask; + notifyListeners(Listener::onCancellationRatioSearchShardTaskChanged); } - public double getCancellationRate() { - return cancellationRate; + public double getCancellationRateSearchShardTask() { + return cancellationRateSearchShardTask; } - public double getCancellationRateNanos() { - return getCancellationRate() / TimeUnit.MILLISECONDS.toNanos(1); // rate per nanoseconds + public double getCancellationRateSearchShardTaskNanos() { + return getCancellationRateSearchShardTask() / TimeUnit.MILLISECONDS.toNanos(1); // rate per nanoseconds } - private void setCancellationRate(double cancellationRate) { - this.cancellationRate = cancellationRate; - notifyListeners(Listener::onCancellationRateChanged); + private void setCancellationRateSearchShardTask(double cancellationRateSearchShardTask) { + this.cancellationRateSearchShardTask = cancellationRateSearchShardTask; + notifyListeners(Listener::onCancellationRateSearchShardTaskChanged); } - public double getCancellationBurst() { - return cancellationBurst; + public double getCancellationBurstSearchShardTask() { + return cancellationBurstSearchShardTask; } - private void setCancellationBurst(double cancellationBurst) { - this.cancellationBurst = cancellationBurst; - notifyListeners(Listener::onCancellationBurstChanged); + private void setCancellationBurstSearchShardTask(double cancellationBurstSearchShardTask) { + this.cancellationBurstSearchShardTask = cancellationBurstSearchShardTask; + notifyListeners(Listener::onCancellationBurstSearchShardTaskChanged); } private void notifyListeners(Consumer consumer) { diff --git a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java index 7e40f1c0eab53..98599e9478a29 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java +++ b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java @@ -13,6 +13,8 @@ import org.opensearch.common.settings.Settings; import org.opensearch.monitor.jvm.JvmStats; +import java.util.concurrent.TimeUnit; + /** * Defines the settings related to the cancellation of SearchShardTasks. * @@ -23,6 +25,11 @@ public class SearchShardTaskSettings { private static class Defaults { private static final double TOTAL_HEAP_PERCENT_THRESHOLD = 0.05; + private static final long CPU_TIME_MILLIS_THRESHOLD = 15000; + private static final long ELAPSED_TIME_MILLIS_THRESHOLD = 30000; + private static final double HEAP_PERCENT_THRESHOLD = 0.005; + private static final double HEAP_VARIANCE_THRESHOLD = 2.0; + private static final int HEAP_MOVING_AVERAGE_WINDOW_SIZE = 100; } /** @@ -39,9 +46,81 @@ private static class Defaults { Setting.Property.NodeScope ); + /** + * Defines the CPU usage threshold (in millis) for an individual search shard task before it is considered for cancellation. + */ + private volatile long cpuTimeMillisThreshold; + public static final Setting SETTING_CPU_TIME_MILLIS_THRESHOLD = Setting.longSetting( + "search_backpressure.search_shard_task.cpu_time_millis_threshold", + Defaults.CPU_TIME_MILLIS_THRESHOLD, + 0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Defines the elapsed time threshold (in millis) for an individual search shard task before it is considered for cancellation. + */ + private volatile long elapsedTimeMillisThreshold; + public static final Setting SETTING_ELAPSED_TIME_MILLIS_THRESHOLD = Setting.longSetting( + "search_backpressure.search_shard_task.elapsed_time_millis_threshold", + Defaults.ELAPSED_TIME_MILLIS_THRESHOLD, + 0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Defines the heap usage threshold (in percentage) for an individual search shard task before it is considered for cancellation. + */ + private volatile double heapPercentThreshold; + public static final Setting SETTING_HEAP_PERCENT_THRESHOLD = Setting.doubleSetting( + "search_backpressure.search_shard_task.heap_percent_threshold", + Defaults.HEAP_PERCENT_THRESHOLD, + 0.0, + 1.0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Defines the heap usage variance for an individual search shard task before it is considered for cancellation. + * A task is considered for cancellation when taskHeapUsage is greater than or equal to heapUsageMovingAverage * variance. + */ + private volatile double heapVarianceThreshold; + public static final Setting SETTING_HEAP_VARIANCE_THRESHOLD = Setting.doubleSetting( + "search_backpressure.search_shard_task.heap_variance", + Defaults.HEAP_VARIANCE_THRESHOLD, + 0.0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Defines the window size to calculate the moving average of heap usage of completed search shard tasks. + */ + private volatile int heapMovingAverageWindowSize; + public static final Setting SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE = Setting.intSetting( + "search_backpressure.search_shard_task.heap_moving_average_window_size", + Defaults.HEAP_MOVING_AVERAGE_WINDOW_SIZE, + 0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + public SearchShardTaskSettings(Settings settings, ClusterSettings clusterSettings) { totalHeapPercentThreshold = SETTING_TOTAL_HEAP_PERCENT_THRESHOLD.get(settings); + this.cpuTimeMillisThreshold = SETTING_CPU_TIME_MILLIS_THRESHOLD.get(settings); + this.elapsedTimeMillisThreshold = SETTING_ELAPSED_TIME_MILLIS_THRESHOLD.get(settings); + heapPercentThreshold = SETTING_HEAP_PERCENT_THRESHOLD.get(settings); + heapVarianceThreshold = SETTING_HEAP_VARIANCE_THRESHOLD.get(settings); + heapMovingAverageWindowSize = SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE.get(settings); clusterSettings.addSettingsUpdateConsumer(SETTING_TOTAL_HEAP_PERCENT_THRESHOLD, this::setTotalHeapPercentThreshold); + clusterSettings.addSettingsUpdateConsumer(SETTING_CPU_TIME_MILLIS_THRESHOLD, this::setCpuTimeMillisThreshold); + clusterSettings.addSettingsUpdateConsumer(SETTING_ELAPSED_TIME_MILLIS_THRESHOLD, this::setElapsedTimeMillisThreshold); + clusterSettings.addSettingsUpdateConsumer(SETTING_HEAP_PERCENT_THRESHOLD, this::setHeapPercentThreshold); + clusterSettings.addSettingsUpdateConsumer(SETTING_HEAP_VARIANCE_THRESHOLD, this::setHeapVarianceThreshold); + clusterSettings.addSettingsUpdateConsumer(SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE, this::setHeapMovingAverageWindowSize); } public double getTotalHeapPercentThreshold() { @@ -52,7 +131,47 @@ public long getTotalHeapBytesThreshold() { return (long) (HEAP_SIZE_BYTES * getTotalHeapPercentThreshold()); } - private void setTotalHeapPercentThreshold(double totalHeapPercentThreshold) { + public long getCpuTimeNanosThreshold() { + return TimeUnit.MILLISECONDS.toNanos(cpuTimeMillisThreshold); + } + + public long getElapsedTimeNanosThreshold() { + return TimeUnit.MILLISECONDS.toNanos(elapsedTimeMillisThreshold); + } + + public long getHeapBytesThreshold() { + return (long) (HEAP_SIZE_BYTES * heapPercentThreshold); + } + + public double getHeapVarianceThreshold() { + return heapVarianceThreshold; + } + + public int getHeapMovingAverageWindowSize() { + return heapMovingAverageWindowSize; + } + + public void setTotalHeapPercentThreshold(double totalHeapPercentThreshold) { this.totalHeapPercentThreshold = totalHeapPercentThreshold; } + + public void setCpuTimeMillisThreshold(long cpuTimeMillisThreshold) { + this.cpuTimeMillisThreshold = cpuTimeMillisThreshold; + } + + public void setElapsedTimeMillisThreshold(long elapsedTimeMillisThreshold) { + this.elapsedTimeMillisThreshold = elapsedTimeMillisThreshold; + } + + public void setHeapPercentThreshold(double heapPercentThreshold) { + this.heapPercentThreshold = heapPercentThreshold; + } + + public void setHeapVarianceThreshold(double heapVarianceThreshold) { + this.heapVarianceThreshold = heapVarianceThreshold; + } + + public void setHeapMovingAverageWindowSize(int heapMovingAverageWindowSize) { + this.heapMovingAverageWindowSize = heapMovingAverageWindowSize; + } } diff --git a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java index f28cdd17a3cff..3b89ba7f3492d 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java +++ b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java @@ -8,11 +8,14 @@ package org.opensearch.search.backpressure.settings; +import org.apache.logging.log4j.LogManager; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; import org.opensearch.monitor.jvm.JvmStats; +import java.util.concurrent.TimeUnit; + /** * Defines the settings related to the cancellation of SearchTasks. * @@ -24,6 +27,11 @@ public class SearchTaskSettings { private static class Defaults { private static final double TOTAL_HEAP_PERCENT_THRESHOLD = 0.05; + private static final long CPU_TIME_MILLIS_THRESHOLD = 60000; + private static final long ELAPSED_TIME_MILLIS_THRESHOLD = 120000; + private static final double HEAP_PERCENT_THRESHOLD = 0.02; + private static final double HEAP_VARIANCE_THRESHOLD = 2.0; + private static final int HEAP_MOVING_AVERAGE_WINDOW_SIZE = 100; } /** @@ -31,7 +39,7 @@ private static class Defaults { * before in-flight cancellation is applied. */ private volatile double totalHeapPercentThreshold; - public static final Setting SETTING_TOTAL_HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY = Setting.doubleSetting( + public static final Setting SETTING_TOTAL_HEAP_PERCENT_THRESHOLD = Setting.doubleSetting( "search_backpressure.search_task.total_heap_percent_threshold", Defaults.TOTAL_HEAP_PERCENT_THRESHOLD, 0.0, @@ -40,12 +48,81 @@ private static class Defaults { Setting.Property.NodeScope ); + /** + * Defines the CPU usage threshold (in millis) for an individual search task before it is considered for cancellation. + */ + private volatile long cpuTimeMillisThreshold; + public static final Setting SETTING_CPU_TIME_MILLIS_THRESHOLD = Setting.longSetting( + "search_backpressure.search_task.cpu_time_millis_threshold", + Defaults.CPU_TIME_MILLIS_THRESHOLD, + 0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Defines the elapsed time threshold (in millis) for an individual search task before it is considered for cancellation. + */ + private volatile long elapsedTimeMillisThreshold; + public static final Setting SETTING_ELAPSED_TIME_MILLIS_THRESHOLD = Setting.longSetting( + "search_backpressure.search_task.elapsed_time_millis_threshold", + Defaults.ELAPSED_TIME_MILLIS_THRESHOLD, + 0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Defines the heap usage threshold (in percentage) for an individual search task before it is considered for cancellation. + */ + private volatile double heapPercentThreshold; + public static final Setting SETTING_HEAP_PERCENT_THRESHOLD = Setting.doubleSetting( + "search_backpressure.search_task.heap_percent_threshold", + Defaults.HEAP_PERCENT_THRESHOLD, + 0.0, + 1.0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Defines the heap usage variance for an individual search task before it is considered for cancellation. + * A task is considered for cancellation when taskHeapUsage is greater than or equal to heapUsageMovingAverage * variance. + */ + private volatile double heapVarianceThreshold; + public static final Setting SETTING_HEAP_VARIANCE_THRESHOLD = Setting.doubleSetting( + "search_backpressure.search_task.heap_variance", + Defaults.HEAP_VARIANCE_THRESHOLD, + 0.0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Defines the window size to calculate the moving average of heap usage of completed search tasks. + */ + private volatile int heapMovingAverageWindowSize; + public static final Setting SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE = Setting.intSetting( + "search_backpressure.search_task.heap_moving_average_window_size", + Defaults.HEAP_MOVING_AVERAGE_WINDOW_SIZE, + 0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + public SearchTaskSettings(Settings settings, ClusterSettings clusterSettings) { - totalHeapPercentThreshold = SETTING_TOTAL_HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY.get(settings); - clusterSettings.addSettingsUpdateConsumer( - SETTING_TOTAL_HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY, - this::setTotalHeapPercentThreshold - ); + this.totalHeapPercentThreshold = SETTING_TOTAL_HEAP_PERCENT_THRESHOLD.get(settings); + this.cpuTimeMillisThreshold = SETTING_CPU_TIME_MILLIS_THRESHOLD.get(settings); + this.elapsedTimeMillisThreshold = SETTING_ELAPSED_TIME_MILLIS_THRESHOLD.get(settings); + this.heapPercentThreshold = SETTING_HEAP_PERCENT_THRESHOLD.get(settings); + this.heapVarianceThreshold = SETTING_HEAP_VARIANCE_THRESHOLD.get(settings); + this.heapMovingAverageWindowSize = SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE.get(settings); + clusterSettings.addSettingsUpdateConsumer(SETTING_TOTAL_HEAP_PERCENT_THRESHOLD, this::setTotalHeapPercentThreshold); + clusterSettings.addSettingsUpdateConsumer(SETTING_CPU_TIME_MILLIS_THRESHOLD, this::setCpuTimeMillisThreshold); + clusterSettings.addSettingsUpdateConsumer(SETTING_ELAPSED_TIME_MILLIS_THRESHOLD, this::setElapsedTimeMillisThreshold); + clusterSettings.addSettingsUpdateConsumer(SETTING_HEAP_PERCENT_THRESHOLD, this::setHeapPercentThreshold); + clusterSettings.addSettingsUpdateConsumer(SETTING_HEAP_VARIANCE_THRESHOLD, this::setHeapVarianceThreshold); + clusterSettings.addSettingsUpdateConsumer(SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE, this::setHeapMovingAverageWindowSize); } public double getTotalHeapPercentThreshold() { @@ -56,7 +133,48 @@ public long getTotalHeapBytesThreshold() { return (long) (HEAP_SIZE_BYTES * getTotalHeapPercentThreshold()); } - private void setTotalHeapPercentThreshold(double totalHeapPercentThreshold) { + public long getCpuTimeNanosThreshold() { + return TimeUnit.MILLISECONDS.toNanos(cpuTimeMillisThreshold); + } + + public long getElapsedTimeNanosThreshold() { + return TimeUnit.MILLISECONDS.toNanos(elapsedTimeMillisThreshold); + } + + public long getHeapBytesThreshold() { + return (long) (HEAP_SIZE_BYTES * heapPercentThreshold); + } + + public double getHeapVarianceThreshold() { + return heapVarianceThreshold; + } + + public int getHeapMovingAverageWindowSize() { + return heapMovingAverageWindowSize; + } + + public void setTotalHeapPercentThreshold(double totalHeapPercentThreshold) { this.totalHeapPercentThreshold = totalHeapPercentThreshold; } + + public void setCpuTimeMillisThreshold(long cpuTimeMillisThreshold) { + LogManager.getLogger(SearchTaskSettings.class).info("setCpuTimeMillisThreshold " + cpuTimeMillisThreshold); + this.cpuTimeMillisThreshold = cpuTimeMillisThreshold; + } + + public void setElapsedTimeMillisThreshold(long elapsedTimeMillisThreshold) { + this.elapsedTimeMillisThreshold = elapsedTimeMillisThreshold; + } + + public void setHeapPercentThreshold(double heapPercentThreshold) { + this.heapPercentThreshold = heapPercentThreshold; + } + + public void setHeapVarianceThreshold(double heapVarianceThreshold) { + this.heapVarianceThreshold = heapVarianceThreshold; + } + + public void setHeapMovingAverageWindowSize(int heapMovingAverageWindowSize) { + this.heapMovingAverageWindowSize = heapMovingAverageWindowSize; + } } diff --git a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java index 80ed849400d8d..bd5f24ef0dbee 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java +++ b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java @@ -23,13 +23,13 @@ * Stats related to search backpressure. */ public class SearchBackpressureStats implements ToXContentFragment, Writeable { - private final SearchTaskStats searchTaskStats; - private final SearchShardTaskStats searchShardTaskStats; + private final SearchBackpressureTaskStats searchTaskStats; + private final SearchBackpressureTaskStats searchShardTaskStats; private final SearchBackpressureMode mode; public SearchBackpressureStats( - SearchTaskStats searchTaskStats, - SearchShardTaskStats searchShardTaskStats, + SearchBackpressureTaskStats searchTaskStats, + SearchBackpressureTaskStats searchShardTaskStats, SearchBackpressureMode mode ) { this.searchTaskStats = searchTaskStats; @@ -38,10 +38,10 @@ public SearchBackpressureStats( } public SearchBackpressureStats(StreamInput in) throws IOException { - searchShardTaskStats = new SearchShardTaskStats(in); + searchShardTaskStats = new SearchBackpressureTaskStats(in); mode = SearchBackpressureMode.fromName(in.readString()); if (in.getVersion().onOrAfter(Version.V_3_0_0)) { - searchTaskStats = new SearchTaskStats(in); + searchTaskStats = new SearchBackpressureTaskStats(in); } else { searchTaskStats = null; } @@ -60,7 +60,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws public void writeTo(StreamOutput out) throws IOException { searchShardTaskStats.writeTo(out); out.writeString(mode.getName()); - if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + if (Version.CURRENT.onOrAfter(Version.V_3_0_0) && out.getVersion().onOrAfter(Version.V_3_0_0)) { searchTaskStats.writeTo(out); } } @@ -70,7 +70,9 @@ public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; SearchBackpressureStats that = (SearchBackpressureStats) o; - return searchTaskStats.equals(that.searchTaskStats) && searchShardTaskStats.equals(that.searchShardTaskStats) && mode == that.mode; + return (Version.CURRENT.onOrAfter(Version.V_3_0_0) + && searchTaskStats.equals(that.searchTaskStats) + && searchShardTaskStats.equals(that.searchShardTaskStats)) && mode == that.mode; } @Override diff --git a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchShardTaskStats.java b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureTaskStats.java similarity index 80% rename from server/src/main/java/org/opensearch/search/backpressure/stats/SearchShardTaskStats.java rename to server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureTaskStats.java index 4d532cfb12f80..f6925c4c6bc8c 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchShardTaskStats.java +++ b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureTaskStats.java @@ -27,12 +27,12 @@ /** * Stats related to cancelled search shard tasks. */ -public class SearchShardTaskStats implements ToXContentObject, Writeable { +public class SearchBackpressureTaskStats implements ToXContentObject, Writeable { private final long cancellationCount; private final long limitReachedCount; private final Map resourceUsageTrackerStats; - public SearchShardTaskStats( + public SearchBackpressureTaskStats( long cancellationCount, long limitReachedCount, Map resourceUsageTrackerStats @@ -42,7 +42,7 @@ public SearchShardTaskStats( this.resourceUsageTrackerStats = resourceUsageTrackerStats; } - public SearchShardTaskStats(StreamInput in) throws IOException { + public SearchBackpressureTaskStats(StreamInput in) throws IOException { this.cancellationCount = in.readVLong(); this.limitReachedCount = in.readVLong(); @@ -85,10 +85,25 @@ public void writeTo(StreamOutput out) throws IOException { public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; - SearchShardTaskStats that = (SearchShardTaskStats) o; + SearchBackpressureTaskStats that = (SearchBackpressureTaskStats) o; return cancellationCount == that.cancellationCount && limitReachedCount == that.limitReachedCount - && resourceUsageTrackerStats.equals(that.resourceUsageTrackerStats); + && compareMaps(resourceUsageTrackerStats, that.resourceUsageTrackerStats); + } + + private boolean compareMaps( + Map trackers1, + Map trackers2 + ) { + if (trackers1.size() != trackers2.size()) { + return false; + } + for (Map.Entry e1 : trackers1.entrySet()) { + if (trackers2.containsKey(e1.getKey()) == false || trackers2.get(e1.getKey()).equals(e1.getValue()) == false) { + return false; + } + } + return true; } @Override diff --git a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java deleted file mode 100644 index 87318a60b46fd..0000000000000 --- a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java +++ /dev/null @@ -1,100 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.search.backpressure.stats; - -import org.opensearch.common.collect.MapBuilder; -import org.opensearch.common.io.stream.StreamInput; -import org.opensearch.common.io.stream.StreamOutput; -import org.opensearch.common.io.stream.Writeable; -import org.opensearch.common.xcontent.ToXContent; -import org.opensearch.common.xcontent.ToXContentObject; -import org.opensearch.common.xcontent.XContentBuilder; -import org.opensearch.search.backpressure.trackers.CpuUsageTracker; -import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; -import org.opensearch.search.backpressure.trackers.HeapUsageTracker; -import org.opensearch.search.backpressure.trackers.TaskResourceUsageTracker; -import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType; - -import java.io.IOException; -import java.util.Map; -import java.util.Objects; - -/** - * Stats related to cancelled search tasks. - */ - -public class SearchTaskStats implements ToXContentObject, Writeable { - private final long cancellationCount; - private final long limitReachedCount; - private final Map resourceUsageTrackerStats; - - public SearchTaskStats( - long cancellationCount, - long limitReachedCount, - Map resourceUsageTrackerStats - ) { - this.cancellationCount = cancellationCount; - this.limitReachedCount = limitReachedCount; - this.resourceUsageTrackerStats = resourceUsageTrackerStats; - } - - public SearchTaskStats(StreamInput in) throws IOException { - this.cancellationCount = in.readVLong(); - this.limitReachedCount = in.readVLong(); - - MapBuilder builder = new MapBuilder<>(); - builder.put(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, in.readOptionalWriteable(CpuUsageTracker.Stats::new)); - builder.put(TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER, in.readOptionalWriteable(HeapUsageTracker.Stats::new)); - builder.put(TaskResourceUsageTrackerType.ELAPSED_TIME_TRACKER, in.readOptionalWriteable(ElapsedTimeTracker.Stats::new)); - this.resourceUsageTrackerStats = builder.immutableMap(); - } - - @Override - public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params params) throws IOException { - builder.startObject(); - - builder.startObject("resource_tracker_stats"); - for (Map.Entry entry : resourceUsageTrackerStats.entrySet()) { - builder.field(entry.getKey().getName(), entry.getValue()); - } - builder.endObject(); - - builder.startObject("cancellation_stats") - .field("cancellation_count", cancellationCount) - .field("cancellation_limit_reached_count", limitReachedCount) - .endObject(); - - return builder.endObject(); - } - - @Override - public void writeTo(StreamOutput out) throws IOException { - out.writeVLong(cancellationCount); - out.writeVLong(limitReachedCount); - - out.writeOptionalWriteable(resourceUsageTrackerStats.get(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER)); - out.writeOptionalWriteable(resourceUsageTrackerStats.get(TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER)); - out.writeOptionalWriteable(resourceUsageTrackerStats.get(TaskResourceUsageTrackerType.ELAPSED_TIME_TRACKER)); - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - SearchTaskStats that = (SearchTaskStats) o; - return cancellationCount == that.cancellationCount - && limitReachedCount == that.limitReachedCount - && resourceUsageTrackerStats.equals(that.resourceUsageTrackerStats); - } - - @Override - public int hashCode() { - return Objects.hash(cancellationCount, limitReachedCount, resourceUsageTrackerStats); - } -} diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/CpuUsageTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/CpuUsageTracker.java index 1e332eca2649c..5215a17a61a8c 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/CpuUsageTracker.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/CpuUsageTracker.java @@ -8,12 +8,9 @@ package org.opensearch.search.backpressure.trackers; -import org.opensearch.action.search.SearchTask; -import org.opensearch.common.settings.Setting; import org.opensearch.common.io.stream.StreamInput; import org.opensearch.common.io.stream.StreamOutput; import org.opensearch.common.unit.TimeValue; -import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; import org.opensearch.common.xcontent.XContentBuilder; import org.opensearch.tasks.Task; import org.opensearch.tasks.TaskCancellation; @@ -23,6 +20,7 @@ import java.util.Objects; import java.util.Optional; import java.util.concurrent.TimeUnit; +import java.util.function.LongSupplier; import static org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType.CPU_USAGE_TRACKER; @@ -32,41 +30,11 @@ * @opensearch.internal */ public class CpuUsageTracker extends TaskResourceUsageTracker { - private static class Defaults { - private static final long CPU_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY = 60000; - private static final long CPU_TIME_MILLIS_THRESHOLD = 15000; - } - /** - * Defines the CPU usage threshold (in millis) for an individual search task before it is considered for cancellation. - */ - private volatile long cpuTimeMillisThresholdForSearchQuery; - public static final Setting SETTING_CPU_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY = Setting.longSetting( - "search_backpressure.search_task.cpu_time_millis_threshold_for_search_query", - Defaults.CPU_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY, - 0, - Setting.Property.Dynamic, - Setting.Property.NodeScope - ); + private final LongSupplier thresholdSupplier; - /** - * Defines the CPU usage threshold (in millis) for an individual search shard task before it is considered for cancellation. - */ - private volatile long cpuTimeMillisThreshold; - public static final Setting SETTING_CPU_TIME_MILLIS_THRESHOLD = Setting.longSetting( - "search_backpressure.search_shard_task.cpu_time_millis_threshold", - Defaults.CPU_TIME_MILLIS_THRESHOLD, - 0, - Setting.Property.Dynamic, - Setting.Property.NodeScope - ); - - public CpuUsageTracker(SearchBackpressureSettings settings) { - this.cpuTimeMillisThresholdForSearchQuery = SETTING_CPU_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY.get(settings.getSettings()); - this.cpuTimeMillisThreshold = SETTING_CPU_TIME_MILLIS_THRESHOLD.get(settings.getSettings()); - settings.getClusterSettings() - .addSettingsUpdateConsumer(SETTING_CPU_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY, this::setCpuTimeMillisThresholdForSearchQuery); - settings.getClusterSettings().addSettingsUpdateConsumer(SETTING_CPU_TIME_MILLIS_THRESHOLD, this::setCpuTimeMillisThreshold); + public CpuUsageTracker(LongSupplier thresholdSupplier) { + this.thresholdSupplier = thresholdSupplier; } @Override @@ -77,7 +45,7 @@ public String name() { @Override public Optional checkAndMaybeGetCancellationReason(Task task) { long usage = task.getTotalResourceStats().getCpuTimeInNanos(); - long threshold = (task instanceof SearchTask) ? getCpuTimeNanosThresholdForSearchQuery() : getCpuTimeNanosThreshold(); + long threshold = thresholdSupplier.getAsLong(); if (usage < threshold) { return Optional.empty(); @@ -95,37 +63,11 @@ public Optional checkAndMaybeGetCancellationReason(Task ); } - public long getCpuTimeNanosThresholdForSearchQuery() { - return TimeUnit.MILLISECONDS.toNanos(cpuTimeMillisThresholdForSearchQuery); - } - - public long getCpuTimeNanosThreshold() { - return TimeUnit.MILLISECONDS.toNanos(cpuTimeMillisThreshold); - } - - public void setCpuTimeMillisThresholdForSearchQuery(long cpuTimeMillisThresholdForSearchQuery) { - this.cpuTimeMillisThresholdForSearchQuery = cpuTimeMillisThresholdForSearchQuery; - } - - public void setCpuTimeMillisThreshold(long cpuTimeMillisThreshold) { - this.cpuTimeMillisThreshold = cpuTimeMillisThreshold; - } - - @Override - public TaskResourceUsageTracker.Stats searchTaskStats(List searchTasks) { - long currentMax = searchTasks.stream().mapToLong(t -> t.getTotalResourceStats().getCpuTimeInNanos()).max().orElse(0); - long currentAvg = (long) searchTasks.stream().mapToLong(t -> t.getTotalResourceStats().getCpuTimeInNanos()).average().orElse(0); - return new Stats(getSearchTaskCancellationCount(), currentMax, currentAvg); - } - @Override - public TaskResourceUsageTracker.Stats searchShardTaskStats(List searchShardTasks) { - long currentMax = searchShardTasks.stream().mapToLong(t -> t.getTotalResourceStats().getCpuTimeInNanos()).max().orElse(0); - long currentAvg = (long) searchShardTasks.stream() - .mapToLong(t -> t.getTotalResourceStats().getCpuTimeInNanos()) - .average() - .orElse(0); - return new Stats(getSearchShardTaskCancellationCount(), currentMax, currentAvg); + public TaskResourceUsageTracker.Stats stats(List tasks) { + long currentMax = tasks.stream().mapToLong(t -> t.getTotalResourceStats().getCpuTimeInNanos()).max().orElse(0); + long currentAvg = (long) tasks.stream().mapToLong(t -> t.getTotalResourceStats().getCpuTimeInNanos()).average().orElse(0); + return new Stats(getCancellations(), currentMax, currentAvg); } /** diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTracker.java index 3b1b904178b2c..d1700861476d6 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTracker.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTracker.java @@ -8,12 +8,9 @@ package org.opensearch.search.backpressure.trackers; -import org.opensearch.action.search.SearchTask; -import org.opensearch.common.settings.Setting; import org.opensearch.common.io.stream.StreamInput; import org.opensearch.common.io.stream.StreamOutput; import org.opensearch.common.unit.TimeValue; -import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; import org.opensearch.common.xcontent.XContentBuilder; import org.opensearch.tasks.Task; import org.opensearch.tasks.TaskCancellation; @@ -33,47 +30,12 @@ * @opensearch.internal */ public class ElapsedTimeTracker extends TaskResourceUsageTracker { - private static class Defaults { - private static final long ELAPSED_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY = 120000; - private static final long ELAPSED_TIME_MILLIS_THRESHOLD = 30000; - } - - /** - * Defines the elapsed time threshold (in millis) for an individual search task before it is considered for cancellation. - */ - private volatile long elapsedTimeMillisThresholdForSearchQuery; - public static final Setting SETTING_ELAPSED_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY = Setting.longSetting( - "search_backpressure.search_task.elapsed_time_millis_threshold_for_search_query", - Defaults.ELAPSED_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY, - 0, - Setting.Property.Dynamic, - Setting.Property.NodeScope - ); - - /** - * Defines the elapsed time threshold (in millis) for an individual search shard task before it is considered for cancellation. - */ - private volatile long elapsedTimeMillisThreshold; - public static final Setting SETTING_ELAPSED_TIME_MILLIS_THRESHOLD = Setting.longSetting( - "search_backpressure.search_shard_task.elapsed_time_millis_threshold", - Defaults.ELAPSED_TIME_MILLIS_THRESHOLD, - 0, - Setting.Property.Dynamic, - Setting.Property.NodeScope - ); - + private final LongSupplier thresholdSupplier; private final LongSupplier timeNanosSupplier; - public ElapsedTimeTracker(SearchBackpressureSettings settings, LongSupplier timeNanosSupplier) { + public ElapsedTimeTracker(LongSupplier thresholdSupplier, LongSupplier timeNanosSupplier) { + this.thresholdSupplier = thresholdSupplier; this.timeNanosSupplier = timeNanosSupplier; - this.elapsedTimeMillisThresholdForSearchQuery = SETTING_ELAPSED_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY.get(settings.getSettings()); - this.elapsedTimeMillisThreshold = SETTING_ELAPSED_TIME_MILLIS_THRESHOLD.get(settings.getSettings()); - settings.getClusterSettings() - .addSettingsUpdateConsumer( - SETTING_ELAPSED_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY, - this::setElapsedTimeMillisThresholdForSearchQuery - ); - settings.getClusterSettings().addSettingsUpdateConsumer(SETTING_ELAPSED_TIME_MILLIS_THRESHOLD, this::setElapsedTimeMillisThreshold); } @Override @@ -84,7 +46,7 @@ public String name() { @Override public Optional checkAndMaybeGetCancellationReason(Task task) { long usage = timeNanosSupplier.getAsLong() - task.getStartTimeNanos(); - long threshold = (task instanceof SearchTask) ? getElapsedTimeNanosThresholdForSearchQuery() : getElapsedTimeNanosThreshold(); + long threshold = thresholdSupplier.getAsLong(); if (usage < threshold) { return Optional.empty(); @@ -102,36 +64,12 @@ public Optional checkAndMaybeGetCancellationReason(Task ); } - public long getElapsedTimeNanosThresholdForSearchQuery() { - return TimeUnit.MILLISECONDS.toNanos(elapsedTimeMillisThresholdForSearchQuery); - } - - public long getElapsedTimeNanosThreshold() { - return TimeUnit.MILLISECONDS.toNanos(elapsedTimeMillisThreshold); - } - - public void setElapsedTimeMillisThresholdForSearchQuery(long elapsedTimeMillisThresholdForSearchQuery) { - this.elapsedTimeMillisThresholdForSearchQuery = elapsedTimeMillisThresholdForSearchQuery; - } - - public void setElapsedTimeMillisThreshold(long elapsedTimeMillisThreshold) { - this.elapsedTimeMillisThreshold = elapsedTimeMillisThreshold; - } - - @Override - public TaskResourceUsageTracker.Stats searchTaskStats(List searchTasks) { - long now = timeNanosSupplier.getAsLong(); - long currentMax = searchTasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).max().orElse(0); - long currentAvg = (long) searchTasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).average().orElse(0); - return new Stats(getSearchTaskCancellationCount(), currentMax, currentAvg); - } - @Override - public TaskResourceUsageTracker.Stats searchShardTaskStats(List searchShardTasks) { + public TaskResourceUsageTracker.Stats stats(List tasks) { long now = timeNanosSupplier.getAsLong(); - long currentMax = searchShardTasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).max().orElse(0); - long currentAvg = (long) searchShardTasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).average().orElse(0); - return new Stats(getSearchShardTaskCancellationCount(), currentMax, currentAvg); + long currentMax = tasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).max().orElse(0); + long currentAvg = (long) tasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).average().orElse(0); + return new Stats(getCancellations(), currentMax, currentAvg); } /** diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java index de96f13891112..915b6ed60f685 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java @@ -8,15 +8,15 @@ package org.opensearch.search.backpressure.trackers; -import org.opensearch.action.search.SearchTask; -import org.opensearch.common.settings.Setting; +import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.io.stream.StreamInput; import org.opensearch.common.io.stream.StreamOutput; import org.opensearch.common.unit.ByteSizeValue; import org.opensearch.common.util.MovingAverage; import org.opensearch.monitor.jvm.JvmStats; -import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; import org.opensearch.common.xcontent.XContentBuilder; +import org.opensearch.search.backpressure.settings.SearchShardTaskSettings; +import org.opensearch.search.backpressure.settings.SearchTaskSettings; import org.opensearch.tasks.Task; import org.opensearch.tasks.TaskCancellation; @@ -25,6 +25,9 @@ import java.util.Objects; import java.util.Optional; import java.util.concurrent.atomic.AtomicReference; +import java.util.function.DoubleSupplier; +import java.util.function.IntSupplier; +import java.util.function.LongSupplier; import static org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER; @@ -36,120 +39,25 @@ */ public class HeapUsageTracker extends TaskResourceUsageTracker { private static final long HEAP_SIZE_BYTES = JvmStats.jvmStats().getMem().getHeapMax().getBytes(); - - private static class Defaults { - private static final double HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY = 0.02; - private static final double HEAP_PERCENT_THRESHOLD = 0.005; - private static final double HEAP_VARIANCE_THRESHOLD_FOR_SEARCH_QUERY = 2.0; - private static final double HEAP_VARIANCE_THRESHOLD = 2.0; - private static final int HEAP_MOVING_AVERAGE_WINDOW_SIZE_FOR_SEARCH_QUERY = 100; - private static final int HEAP_MOVING_AVERAGE_WINDOW_SIZE = 100; - } - - /** - * Defines the heap usage threshold (in percentage) for an individual search task before it is considered for cancellation. - */ - private volatile double heapPercentThresholdForSearchQuery; - public static final Setting SETTING_HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY = Setting.doubleSetting( - "search_backpressure.search_task.heap_percent_threshold_for_search_query", - Defaults.HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY, - 0.0, - 1.0, - Setting.Property.Dynamic, - Setting.Property.NodeScope - ); - - /** - * Defines the heap usage threshold (in percentage) for an individual search shard task before it is considered for cancellation. - */ - private volatile double heapPercentThreshold; - public static final Setting SETTING_HEAP_PERCENT_THRESHOLD = Setting.doubleSetting( - "search_backpressure.search_shard_task.heap_percent_threshold", - Defaults.HEAP_PERCENT_THRESHOLD, - 0.0, - 1.0, - Setting.Property.Dynamic, - Setting.Property.NodeScope - ); - - /** - * Defines the heap usage variance for an individual search task before it is considered for cancellation. - * A task is considered for cancellation when taskHeapUsage is greater than or equal to heapUsageMovingAverage * variance. - */ - private volatile double heapVarianceThresholdForSearchQuery; - public static final Setting SETTING_HEAP_VARIANCE_THRESHOLD_FOR_SEARCH_QUERY = Setting.doubleSetting( - "search_backpressure.search_task.heap_variance_for_search_query", - Defaults.HEAP_VARIANCE_THRESHOLD_FOR_SEARCH_QUERY, - 0.0, - Setting.Property.Dynamic, - Setting.Property.NodeScope - ); - - /** - * Defines the heap usage variance for an individual search shard task before it is considered for cancellation. - * A task is considered for cancellation when taskHeapUsage is greater than or equal to heapUsageMovingAverage * variance. - */ - private volatile double heapVarianceThreshold; - public static final Setting SETTING_HEAP_VARIANCE_THRESHOLD = Setting.doubleSetting( - "search_backpressure.search_shard_task.heap_variance", - Defaults.HEAP_VARIANCE_THRESHOLD, - 0.0, - Setting.Property.Dynamic, - Setting.Property.NodeScope - ); - - /** - * Defines the window size to calculate the moving average of heap usage of completed search tasks. - */ - private volatile int heapMovingAverageWindowSizeForSearchQuery; - public static final Setting SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE_FOR_SEARCH_QUERY = Setting.intSetting( - "search_backpressure.search_task.heap_moving_average_window_size_for_search_query", - Defaults.HEAP_MOVING_AVERAGE_WINDOW_SIZE_FOR_SEARCH_QUERY, - 0, - Setting.Property.Dynamic, - Setting.Property.NodeScope - ); - - /** - * Defines the window size to calculate the moving average of heap usage of completed search shard tasks. - */ - private volatile int heapMovingAverageWindowSize; - public static final Setting SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE = Setting.intSetting( - "search_backpressure.search_shard_task.heap_moving_average_window_size", - Defaults.HEAP_MOVING_AVERAGE_WINDOW_SIZE, - 0, - Setting.Property.Dynamic, - Setting.Property.NodeScope - ); - - private final AtomicReference movingAverageReferenceForSearchQuery; + private final DoubleSupplier heapVarianceSupplier; + private final LongSupplier heapBytesThresholdSupplier; + private final IntSupplier windowSizeSupplier; private final AtomicReference movingAverageReference; - public HeapUsageTracker(SearchBackpressureSettings settings) { - heapPercentThresholdForSearchQuery = SETTING_HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY.get(settings.getSettings()); - settings.getClusterSettings() - .addSettingsUpdateConsumer(SETTING_HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY, this::setHeapPercentThresholdForSearchQuery); - heapPercentThreshold = SETTING_HEAP_PERCENT_THRESHOLD.get(settings.getSettings()); - settings.getClusterSettings().addSettingsUpdateConsumer(SETTING_HEAP_PERCENT_THRESHOLD, this::setHeapPercentThreshold); - - heapVarianceThresholdForSearchQuery = SETTING_HEAP_VARIANCE_THRESHOLD_FOR_SEARCH_QUERY.get(settings.getSettings()); - settings.getClusterSettings() - .addSettingsUpdateConsumer(SETTING_HEAP_VARIANCE_THRESHOLD_FOR_SEARCH_QUERY, this::setHeapVarianceThresholdForSearchQuery); - heapVarianceThreshold = SETTING_HEAP_VARIANCE_THRESHOLD.get(settings.getSettings()); - settings.getClusterSettings().addSettingsUpdateConsumer(SETTING_HEAP_VARIANCE_THRESHOLD, this::setHeapVarianceThreshold); - - heapMovingAverageWindowSizeForSearchQuery = SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE_FOR_SEARCH_QUERY.get(settings.getSettings()); - settings.getClusterSettings() - .addSettingsUpdateConsumer( - SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE_FOR_SEARCH_QUERY, - this::setHeapMovingAverageWindowSizeForSearchQuery - ); - heapMovingAverageWindowSize = SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE.get(settings.getSettings()); - settings.getClusterSettings() - .addSettingsUpdateConsumer(SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE, this::setHeapMovingAverageWindowSize); - - this.movingAverageReferenceForSearchQuery = new AtomicReference<>(new MovingAverage(heapMovingAverageWindowSizeForSearchQuery)); - this.movingAverageReference = new AtomicReference<>(new MovingAverage(heapMovingAverageWindowSize)); + public HeapUsageTracker( + DoubleSupplier heapVarianceSupplier, + LongSupplier heapBytesThresholdSupplier, + IntSupplier windowSizeSupplier, + ClusterSettings clusterSettings + ) { + this.heapVarianceSupplier = heapVarianceSupplier; + this.heapBytesThresholdSupplier = heapBytesThresholdSupplier; + this.windowSizeSupplier = windowSizeSupplier; + this.movingAverageReference = new AtomicReference<>(new MovingAverage(windowSizeSupplier.getAsInt())); + // TODO: find a way to get the type of the setting SearchTaskSettings/SearchShardTaskSettings and then add consumer only for the + // required setting + clusterSettings.addSettingsUpdateConsumer(SearchTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE, this::updateWindowSize); + clusterSettings.addSettingsUpdateConsumer(SearchShardTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE, this::updateWindowSize); } @Override @@ -159,18 +67,12 @@ public String name() { @Override public void update(Task task) { - if (task instanceof SearchTask) { - movingAverageReferenceForSearchQuery.get().record(task.getTotalResourceStats().getMemoryInBytes()); - } else { - movingAverageReference.get().record(task.getTotalResourceStats().getMemoryInBytes()); - } + movingAverageReference.get().record(task.getTotalResourceStats().getMemoryInBytes()); } @Override public Optional checkAndMaybeGetCancellationReason(Task task) { - MovingAverage movingAverage = (task instanceof SearchTask) - ? movingAverageReferenceForSearchQuery.get() - : movingAverageReference.get(); + MovingAverage movingAverage = movingAverageReference.get(); // There haven't been enough measurements. if (movingAverage.isReady() == false) { @@ -179,9 +81,9 @@ public Optional checkAndMaybeGetCancellationReason(Task double currentUsage = task.getTotalResourceStats().getMemoryInBytes(); double averageUsage = movingAverage.getAverage(); - double variance = (task instanceof SearchTask) ? getHeapVarianceThresholdForSearchQuery() : getHeapVarianceThreshold(); + double variance = heapVarianceSupplier.getAsDouble(); double allowedUsage = averageUsage * variance; - double threshold = (task instanceof SearchTask) ? getHeapBytesThresholdForSearchQuery() : getHeapBytesThreshold(); + double threshold = heapBytesThresholdSupplier.getAsLong(); if (currentUsage < threshold || currentUsage < allowedUsage) { return Optional.empty(); @@ -195,65 +97,15 @@ public Optional checkAndMaybeGetCancellationReason(Task ); } - public long getHeapBytesThresholdForSearchQuery() { - return (long) (HEAP_SIZE_BYTES * heapPercentThresholdForSearchQuery); - } - - public long getHeapBytesThreshold() { - return (long) (HEAP_SIZE_BYTES * heapPercentThreshold); - } - - public void setHeapPercentThresholdForSearchQuery(double heapPercentThresholdForSearchQuery) { - this.heapPercentThresholdForSearchQuery = heapPercentThresholdForSearchQuery; - } - - public void setHeapPercentThreshold(double heapPercentThreshold) { - this.heapPercentThreshold = heapPercentThreshold; - } - - public double getHeapVarianceThresholdForSearchQuery() { - return heapVarianceThresholdForSearchQuery; - } - - public double getHeapVarianceThreshold() { - return heapVarianceThreshold; - } - - public void setHeapVarianceThresholdForSearchQuery(double heapVarianceThresholdForSearchQuery) { - this.heapVarianceThresholdForSearchQuery = heapVarianceThresholdForSearchQuery; - } - - public void setHeapVarianceThreshold(double heapVarianceThreshold) { - this.heapVarianceThreshold = heapVarianceThreshold; - } - - public void setHeapMovingAverageWindowSizeForSearchQuery(int heapMovingAverageWindowSizeForSearchQuery) { - this.heapMovingAverageWindowSizeForSearchQuery = heapMovingAverageWindowSizeForSearchQuery; - this.movingAverageReferenceForSearchQuery.set(new MovingAverage(heapMovingAverageWindowSizeForSearchQuery)); - } - - public void setHeapMovingAverageWindowSize(int heapMovingAverageWindowSize) { - this.heapMovingAverageWindowSize = heapMovingAverageWindowSize; + private void updateWindowSize(int heapMovingAverageWindowSize) { this.movingAverageReference.set(new MovingAverage(heapMovingAverageWindowSize)); } @Override - public TaskResourceUsageTracker.Stats searchTaskStats(List searchTasks) { - long currentMax = searchTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).max().orElse(0); - long currentAvg = (long) searchTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).average().orElse(0); - return new Stats( - getSearchTaskCancellationCount(), - currentMax, - currentAvg, - (long) movingAverageReferenceForSearchQuery.get().getAverage() - ); - } - - @Override - public TaskResourceUsageTracker.Stats searchShardTaskStats(List searchShardTasks) { - long currentMax = searchShardTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).max().orElse(0); - long currentAvg = (long) searchShardTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).average().orElse(0); - return new Stats(getSearchShardTaskCancellationCount(), currentMax, currentAvg, (long) movingAverageReference.get().getAverage()); + public TaskResourceUsageTracker.Stats stats(List tasks) { + long currentMax = tasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).max().orElse(0); + long currentAvg = (long) tasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).average().orElse(0); + return new Stats(getCancellations(), currentMax, currentAvg, (long) movingAverageReference.get().getAverage()); } /** diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTracker.java index a08ca34cd37bc..e54cfcd5d3970 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTracker.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTracker.java @@ -26,23 +26,14 @@ public abstract class TaskResourceUsageTracker { /** * Counts the number of cancellations made due to this tracker. */ - private final AtomicLong searchTaskCancellationCount = new AtomicLong(); - private final AtomicLong searchShardTaskCancellationCount = new AtomicLong(); + private final AtomicLong cancellations = new AtomicLong(); - public long incrementSearchTaskCancellations() { - return searchTaskCancellationCount.incrementAndGet(); + public long incrementCancellations() { + return cancellations.incrementAndGet(); } - public long incrementSearchShardTaskCancellations() { - return searchShardTaskCancellationCount.incrementAndGet(); - } - - public long getSearchTaskCancellationCount() { - return searchTaskCancellationCount.get(); - } - - public long getSearchShardTaskCancellationCount() { - return searchShardTaskCancellationCount.get(); + public long getCancellations() { + return cancellations.get(); } /** @@ -61,14 +52,9 @@ public void update(Task task) {} public abstract Optional checkAndMaybeGetCancellationReason(Task task); /** - * Returns the tracker's state for SearchTasks as seen in the stats API. - */ - public abstract Stats searchTaskStats(List activeTasks); - - /** - * Returns the tracker's state for SearchShardTasks as seen in the stats API. + * Returns the tracker's state for tasks as seen in the stats API. */ - public abstract Stats searchShardTaskStats(List activeTasks); + public abstract Stats stats(List activeTasks); /** * Represents the tracker's state as seen in the stats API. diff --git a/server/src/main/java/org/opensearch/tasks/SearchBackpressureTask.java b/server/src/main/java/org/opensearch/tasks/SearchBackpressureTask.java new file mode 100644 index 0000000000000..89dc5ef1938e2 --- /dev/null +++ b/server/src/main/java/org/opensearch/tasks/SearchBackpressureTask.java @@ -0,0 +1,11 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.tasks; + +public interface SearchBackpressureTask {} diff --git a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java index df3f725e25f45..d2dce8731d141 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java @@ -8,6 +8,7 @@ package org.opensearch.search.backpressure; +import org.apache.logging.log4j.LogManager; import org.opensearch.action.search.SearchShardTask; import org.opensearch.action.search.SearchTask; import org.opensearch.common.io.stream.StreamInput; @@ -18,11 +19,10 @@ import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; import org.opensearch.search.backpressure.settings.SearchShardTaskSettings; import org.opensearch.search.backpressure.settings.SearchTaskSettings; -import org.opensearch.search.backpressure.stats.SearchTaskStats; import org.opensearch.search.backpressure.trackers.NodeDuressTracker; import org.opensearch.common.xcontent.XContentBuilder; import org.opensearch.search.backpressure.stats.SearchBackpressureStats; -import org.opensearch.search.backpressure.stats.SearchShardTaskStats; +import org.opensearch.search.backpressure.stats.SearchBackpressureTaskStats; import org.opensearch.search.backpressure.trackers.TaskResourceUsageTracker; import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType; import org.opensearch.tasks.CancellableTask; @@ -75,6 +75,7 @@ public void testIsNodeInDuress() { mockThreadPool, System::nanoTime, List.of(cpuUsageTracker, heapUsageTracker), + Collections.emptyList(), Collections.emptyList() ); @@ -115,13 +116,15 @@ public void testTrackerStateUpdateOnSearchTaskCompletion() { mockThreadPool, mockTimeNanosSupplier, Collections.emptyList(), - List.of(mockTaskResourceUsageTracker) + List.of(mockTaskResourceUsageTracker), + Collections.emptyList() ); for (int i = 0; i < 100; i++) { + // service.onTaskCompleted(new SearchTask(1, "test", "test", () -> "Test", TaskId.EMPTY_TASK_ID, new HashMap<>())); service.onTaskCompleted(createMockTaskWithResourceStats(SearchTask.class, 100, 200)); } - assertEquals(100, service.getSearchTasksState().getCompletionCount()); + assertEquals(100, service.getSearchBackpressureTaskStats(SearchTask.class).getCompletionCount()); verify(mockTaskResourceUsageTracker, times(100)).update(any()); } @@ -142,6 +145,7 @@ public void testTrackerStateUpdateOnSearchShardTaskCompletion() { mockThreadPool, mockTimeNanosSupplier, Collections.emptyList(), + Collections.emptyList(), List.of(mockTaskResourceUsageTracker) ); @@ -150,7 +154,7 @@ public void testTrackerStateUpdateOnSearchShardTaskCompletion() { for (int i = 0; i < 100; i++) { service.onTaskCompleted(createMockTaskWithResourceStats(SearchShardTask.class, 100, 200)); } - assertEquals(100, service.getSearchShardTasksState().getCompletionCount()); + assertEquals(100, service.getSearchBackpressureTaskStats(SearchShardTask.class).getCompletionCount()); verify(mockTaskResourceUsageTracker, times(100)).update(any()); } @@ -172,7 +176,8 @@ public void testSearchTaskInFlightCancellation() { mockThreadPool, mockTimeNanosSupplier, List.of(mockNodeDuressTracker), - List.of(mockTaskResourceUsageTracker) + List.of(mockTaskResourceUsageTracker), + Collections.emptyList() ); // Run two iterations so that node is marked 'in duress' from the third iteration onwards. @@ -197,26 +202,29 @@ public void testSearchTaskInFlightCancellation() { doReturn(activeSearchTasks).when(mockTaskResourceTrackingService).getResourceAwareTasks(); // There are 25 SearchTasks eligible for cancellation but only 10 will be cancelled (burst limit). + LogManager.getLogger(SearchBackpressureServiceTests.class).info("first run"); service.doRun(); - assertEquals(10, service.getSearchTasksState().getCancellationCount()); - assertEquals(1, service.getSearchTasksState().getLimitReachedCount()); + assertEquals(10, service.getSearchBackpressureTaskStats(SearchTask.class).getCancellationCount()); + assertEquals(1, service.getSearchBackpressureTaskStats(SearchTask.class).getLimitReachedCount()); // If the clock or completed task count haven't made sufficient progress, we'll continue to be rate-limited. + LogManager.getLogger(SearchBackpressureServiceTests.class).info("second run"); service.doRun(); - assertEquals(10, service.getSearchTasksState().getCancellationCount()); - assertEquals(2, service.getSearchTasksState().getLimitReachedCount()); + assertEquals(10, service.getSearchBackpressureTaskStats(SearchTask.class).getCancellationCount()); + assertEquals(2, service.getSearchBackpressureTaskStats(SearchTask.class).getLimitReachedCount()); // Fast-forward the clock by ten second to replenish some tokens. // This will add 50 tokens (time delta * rate) to 'rateLimitPerTime' but it will cancel only 10 tasks (burst limit). mockTime.addAndGet(TimeUnit.SECONDS.toNanos(10)); + LogManager.getLogger(SearchBackpressureServiceTests.class).info("third run"); service.doRun(); - assertEquals(20, service.getSearchTasksState().getCancellationCount()); - assertEquals(3, service.getSearchTasksState().getLimitReachedCount()); + assertEquals(20, service.getSearchBackpressureTaskStats(SearchTask.class).getCancellationCount()); + assertEquals(3, service.getSearchBackpressureTaskStats(SearchTask.class).getLimitReachedCount()); // Verify search backpressure stats. SearchBackpressureStats expectedStats = new SearchBackpressureStats( - new SearchTaskStats(20, 3, Map.of(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, new MockStats(20))), - new SearchShardTaskStats(0, 0, Map.of(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, new MockStats(0))), + new SearchBackpressureTaskStats(20, 3, Map.of(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, new MockStats(20))), + new SearchBackpressureTaskStats(0, 0, Collections.emptyMap()), SearchBackpressureMode.ENFORCED ); SearchBackpressureStats actualStats = service.nodeStats(); @@ -241,6 +249,7 @@ public void testSearchShardTaskInFlightCancellation() { mockThreadPool, mockTimeNanosSupplier, List.of(mockNodeDuressTracker), + Collections.emptyList(), List.of(mockTaskResourceUsageTracker) ); @@ -267,13 +276,13 @@ public void testSearchShardTaskInFlightCancellation() { // There are 15 SearchShardTasks eligible for cancellation but only 10 will be cancelled (burst limit). service.doRun(); - assertEquals(10, service.getSearchShardTasksState().getCancellationCount()); - assertEquals(1, service.getSearchShardTasksState().getLimitReachedCount()); + assertEquals(10, service.getSearchBackpressureTaskStats(SearchShardTask.class).getCancellationCount()); + assertEquals(1, service.getSearchBackpressureTaskStats(SearchShardTask.class).getLimitReachedCount()); // If the clock or completed task count haven't made sufficient progress, we'll continue to be rate-limited. service.doRun(); - assertEquals(10, service.getSearchShardTasksState().getCancellationCount()); - assertEquals(2, service.getSearchShardTasksState().getLimitReachedCount()); + assertEquals(10, service.getSearchBackpressureTaskStats(SearchShardTask.class).getCancellationCount()); + assertEquals(2, service.getSearchBackpressureTaskStats(SearchShardTask.class).getLimitReachedCount()); // Simulate task completion to replenish some tokens. // This will add 2 tokens (task count delta * cancellationRatio) to 'rateLimitPerTaskCompletion'. @@ -281,20 +290,21 @@ public void testSearchShardTaskInFlightCancellation() { service.onTaskCompleted(createMockTaskWithResourceStats(SearchShardTask.class, 100, taskHeapUsageBytes)); } service.doRun(); - assertEquals(12, service.getSearchShardTasksState().getCancellationCount()); - assertEquals(3, service.getSearchShardTasksState().getLimitReachedCount()); + assertEquals(12, service.getSearchBackpressureTaskStats(SearchShardTask.class).getCancellationCount()); + assertEquals(3, service.getSearchBackpressureTaskStats(SearchShardTask.class).getLimitReachedCount()); // Fast-forward the clock by one second to replenish some tokens. // This will add 3 tokens (time delta * rate) to 'rateLimitPerTime'. mockTime.addAndGet(TimeUnit.SECONDS.toNanos(1)); service.doRun(); - assertEquals(15, service.getSearchShardTasksState().getCancellationCount()); - assertEquals(3, service.getSearchShardTasksState().getLimitReachedCount()); // no more tasks to cancel; limit not reached + assertEquals(15, service.getSearchBackpressureTaskStats(SearchShardTask.class).getCancellationCount()); + assertEquals(3, service.getSearchBackpressureTaskStats(SearchShardTask.class).getLimitReachedCount()); // no more tasks to cancel; + // limit not reached // Verify search backpressure stats. SearchBackpressureStats expectedStats = new SearchBackpressureStats( - new SearchTaskStats(0, 0, Map.of(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, new MockStats(0))), - new SearchShardTaskStats(15, 3, Map.of(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, new MockStats(15))), + new SearchBackpressureTaskStats(0, 0, Collections.emptyMap()), + new SearchBackpressureTaskStats(15, 3, Map.of(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, new MockStats(15))), SearchBackpressureMode.ENFORCED ); SearchBackpressureStats actualStats = service.nodeStats(); @@ -306,9 +316,9 @@ private SearchBackpressureSettings getBackpressureSettings(String mode, double r new SearchBackpressureSettings( Settings.builder() .put(SearchBackpressureSettings.SETTING_MODE.getKey(), mode) - .put(SearchBackpressureSettings.SETTING_CANCELLATION_RATIO.getKey(), ratio) - .put(SearchBackpressureSettings.SETTING_CANCELLATION_RATE.getKey(), rate) - .put(SearchBackpressureSettings.SETTING_CANCELLATION_BURST.getKey(), burst) + .put(SearchBackpressureSettings.SETTING_CANCELLATION_RATIO_SEARCH_SHARD_TASK.getKey(), ratio) + .put(SearchBackpressureSettings.SETTING_CANCELLATION_RATE_SEARCH_TASK.getKey(), rate) + .put(SearchBackpressureSettings.SETTING_CANCELLATION_BURST_SEARCH_TASK.getKey(), burst) .build(), new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS) ) @@ -335,13 +345,8 @@ public Optional checkAndMaybeGetCancellationReason(Task } @Override - public Stats searchTaskStats(List searchTasks) { - return new MockStats(getSearchTaskCancellationCount()); - } - - @Override - public Stats searchShardTaskStats(List searchShardTasks) { - return new MockStats(getSearchShardTaskCancellationCount()); + public Stats stats(List tasks) { + return new MockStats(getCancellations()); } }; } diff --git a/server/src/test/java/org/opensearch/search/backpressure/stats/SearchBackpressureStatsTests.java b/server/src/test/java/org/opensearch/search/backpressure/stats/SearchBackpressureStatsTests.java index 0c86cf4b11239..3c301b40b5f4f 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/stats/SearchBackpressureStatsTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/stats/SearchBackpressureStatsTests.java @@ -25,8 +25,8 @@ protected SearchBackpressureStats createTestInstance() { public static SearchBackpressureStats randomInstance() { return new SearchBackpressureStats( - SearchTaskStatsTests.randomInstance(), - SearchShardTaskStatsTests.randomInstance(), + SearchBackpressureTaskStatsTests.randomInstance(), + SearchBackpressureTaskStatsTests.randomInstance(), randomFrom(SearchBackpressureMode.DISABLED, SearchBackpressureMode.MONITOR_ONLY, SearchBackpressureMode.ENFORCED) ); } diff --git a/server/src/test/java/org/opensearch/search/backpressure/stats/SearchShardTaskStatsTests.java b/server/src/test/java/org/opensearch/search/backpressure/stats/SearchBackpressureTaskStatsTests.java similarity index 75% rename from server/src/test/java/org/opensearch/search/backpressure/stats/SearchShardTaskStatsTests.java rename to server/src/test/java/org/opensearch/search/backpressure/stats/SearchBackpressureTaskStatsTests.java index d5bc9398492eb..92ff3ccee6227 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/stats/SearchShardTaskStatsTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/stats/SearchBackpressureTaskStatsTests.java @@ -18,18 +18,18 @@ import java.util.Map; -public class SearchShardTaskStatsTests extends AbstractWireSerializingTestCase { +public class SearchBackpressureTaskStatsTests extends AbstractWireSerializingTestCase { @Override - protected Writeable.Reader instanceReader() { - return SearchShardTaskStats::new; + protected Writeable.Reader instanceReader() { + return SearchBackpressureTaskStats::new; } @Override - protected SearchShardTaskStats createTestInstance() { + protected SearchBackpressureTaskStats createTestInstance() { return randomInstance(); } - public static SearchShardTaskStats randomInstance() { + public static SearchBackpressureTaskStats randomInstance() { Map resourceUsageTrackerStats = Map.of( TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, new CpuUsageTracker.Stats(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()), @@ -39,6 +39,6 @@ public static SearchShardTaskStats randomInstance() { new ElapsedTimeTracker.Stats(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()) ); - return new SearchShardTaskStats(randomNonNegativeLong(), randomNonNegativeLong(), resourceUsageTrackerStats); + return new SearchBackpressureTaskStats(randomNonNegativeLong(), randomNonNegativeLong(), resourceUsageTrackerStats); } } diff --git a/server/src/test/java/org/opensearch/search/backpressure/stats/SearchTaskStatsTests.java b/server/src/test/java/org/opensearch/search/backpressure/stats/SearchTaskStatsTests.java deleted file mode 100644 index 59375c22bb932..0000000000000 --- a/server/src/test/java/org/opensearch/search/backpressure/stats/SearchTaskStatsTests.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.search.backpressure.stats; - -import org.opensearch.common.io.stream.Writeable; -import org.opensearch.search.backpressure.trackers.CpuUsageTracker; -import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; -import org.opensearch.search.backpressure.trackers.HeapUsageTracker; -import org.opensearch.search.backpressure.trackers.TaskResourceUsageTracker; -import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType; -import org.opensearch.test.AbstractWireSerializingTestCase; - -import java.util.Map; - -public class SearchTaskStatsTests extends AbstractWireSerializingTestCase { - public static SearchTaskStats randomInstance() { - Map resourceUsageTrackerStats = Map.of( - TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, - new CpuUsageTracker.Stats(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()), - TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER, - new HeapUsageTracker.Stats(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()), - TaskResourceUsageTrackerType.ELAPSED_TIME_TRACKER, - new ElapsedTimeTracker.Stats(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()) - ); - - return new SearchTaskStats(randomNonNegativeLong(), randomNonNegativeLong(), resourceUsageTrackerStats); - } - - @Override - protected Writeable.Reader instanceReader() { - return SearchTaskStats::new; - } - - @Override - protected SearchTaskStats createTestInstance() { - return randomInstance(); - } -} diff --git a/server/src/test/java/org/opensearch/search/backpressure/trackers/CpuUsageTrackerTests.java b/server/src/test/java/org/opensearch/search/backpressure/trackers/CpuUsageTrackerTests.java index 62e2950189436..8cdcbc7511bd2 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/trackers/CpuUsageTrackerTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/trackers/CpuUsageTrackerTests.java @@ -13,6 +13,8 @@ import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; +import org.opensearch.search.backpressure.settings.SearchShardTaskSettings; +import org.opensearch.search.backpressure.settings.SearchTaskSettings; import org.opensearch.tasks.Task; import org.opensearch.tasks.TaskCancellation; import org.opensearch.test.OpenSearchTestCase; @@ -24,15 +26,15 @@ public class CpuUsageTrackerTests extends OpenSearchTestCase { private static final SearchBackpressureSettings mockSettings = new SearchBackpressureSettings( Settings.builder() - .put(CpuUsageTracker.SETTING_CPU_TIME_MILLIS_THRESHOLD.getKey(), 15) // 15 ms - .put(CpuUsageTracker.SETTING_CPU_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY.getKey(), 25) // 25 ms + .put(SearchShardTaskSettings.SETTING_CPU_TIME_MILLIS_THRESHOLD.getKey(), 15) // 15 ms + .put(SearchTaskSettings.SETTING_CPU_TIME_MILLIS_THRESHOLD.getKey(), 25) // 25 ms .build(), new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS) ); public void testSearchTaskEligibleForCancellation() { Task task = createMockTaskWithResourceStats(SearchTask.class, 100000000, 200); - CpuUsageTracker tracker = new CpuUsageTracker(mockSettings); + CpuUsageTracker tracker = new CpuUsageTracker(mockSettings.getSearchTaskSettings()::getCpuTimeNanosThreshold); Optional reason = tracker.checkAndMaybeGetCancellationReason(task); assertTrue(reason.isPresent()); @@ -42,7 +44,7 @@ public void testSearchTaskEligibleForCancellation() { public void testSearchShardTaskEligibleForCancellation() { Task task = createMockTaskWithResourceStats(SearchShardTask.class, 200000000, 200); - CpuUsageTracker tracker = new CpuUsageTracker(mockSettings); + CpuUsageTracker tracker = new CpuUsageTracker(mockSettings.getSearchShardTaskSettings()::getCpuTimeNanosThreshold); Optional reason = tracker.checkAndMaybeGetCancellationReason(task); assertTrue(reason.isPresent()); @@ -52,7 +54,7 @@ public void testSearchShardTaskEligibleForCancellation() { public void testNotEligibleForCancellation() { Task task = createMockTaskWithResourceStats(SearchShardTask.class, 5000000, 200); - CpuUsageTracker tracker = new CpuUsageTracker(mockSettings); + CpuUsageTracker tracker = new CpuUsageTracker(mockSettings.getSearchShardTaskSettings()::getCpuTimeNanosThreshold); Optional reason = tracker.checkAndMaybeGetCancellationReason(task); assertFalse(reason.isPresent()); diff --git a/server/src/test/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTrackerTests.java b/server/src/test/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTrackerTests.java index 1748ce8d7c253..921d01e7355a7 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTrackerTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTrackerTests.java @@ -13,6 +13,8 @@ import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; +import org.opensearch.search.backpressure.settings.SearchShardTaskSettings; +import org.opensearch.search.backpressure.settings.SearchTaskSettings; import org.opensearch.tasks.Task; import org.opensearch.tasks.TaskCancellation; import org.opensearch.test.OpenSearchTestCase; @@ -25,15 +27,18 @@ public class ElapsedTimeTrackerTests extends OpenSearchTestCase { private static final SearchBackpressureSettings mockSettings = new SearchBackpressureSettings( Settings.builder() - .put(ElapsedTimeTracker.SETTING_ELAPSED_TIME_MILLIS_THRESHOLD.getKey(), 100) // 100 ms - .put(ElapsedTimeTracker.SETTING_ELAPSED_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY.getKey(), 150) // 150 ms + .put(SearchShardTaskSettings.SETTING_ELAPSED_TIME_MILLIS_THRESHOLD.getKey(), 100) // 100 ms + .put(SearchTaskSettings.SETTING_ELAPSED_TIME_MILLIS_THRESHOLD.getKey(), 150) // 150 ms .build(), new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS) ); public void testSearchTaskEligibleForCancellation() { Task task = createMockTaskWithResourceStats(SearchTask.class, 1, 1, 0); - ElapsedTimeTracker tracker = new ElapsedTimeTracker(mockSettings, () -> 150000000); + ElapsedTimeTracker tracker = new ElapsedTimeTracker( + mockSettings.getSearchTaskSettings()::getElapsedTimeNanosThreshold, + () -> 150000000 + ); Optional reason = tracker.checkAndMaybeGetCancellationReason(task); assertTrue(reason.isPresent()); @@ -43,7 +48,10 @@ public void testSearchTaskEligibleForCancellation() { public void testSearchShardTaskEligibleForCancellation() { Task task = createMockTaskWithResourceStats(SearchShardTask.class, 1, 1, 0); - ElapsedTimeTracker tracker = new ElapsedTimeTracker(mockSettings, () -> 200000000); + ElapsedTimeTracker tracker = new ElapsedTimeTracker( + mockSettings.getSearchShardTaskSettings()::getElapsedTimeNanosThreshold, + () -> 200000000 + ); Optional reason = tracker.checkAndMaybeGetCancellationReason(task); assertTrue(reason.isPresent()); @@ -53,7 +61,10 @@ public void testSearchShardTaskEligibleForCancellation() { public void testNotEligibleForCancellation() { Task task = createMockTaskWithResourceStats(SearchShardTask.class, 1, 1, 150000000); - ElapsedTimeTracker tracker = new ElapsedTimeTracker(mockSettings, () -> 200000000); + ElapsedTimeTracker tracker = new ElapsedTimeTracker( + mockSettings.getSearchShardTaskSettings()::getElapsedTimeNanosThreshold, + () -> 200000000 + ); Optional reason = tracker.checkAndMaybeGetCancellationReason(task); assertFalse(reason.isPresent()); diff --git a/server/src/test/java/org/opensearch/search/backpressure/trackers/HeapUsageTrackerTests.java b/server/src/test/java/org/opensearch/search/backpressure/trackers/HeapUsageTrackerTests.java index 74f36fe15551e..aa9bd39fb3451 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/trackers/HeapUsageTrackerTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/trackers/HeapUsageTrackerTests.java @@ -13,6 +13,8 @@ import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; +import org.opensearch.search.backpressure.settings.SearchShardTaskSettings; +import org.opensearch.search.backpressure.settings.SearchTaskSettings; import org.opensearch.tasks.Task; import org.opensearch.tasks.TaskCancellation; import org.opensearch.test.OpenSearchTestCase; @@ -24,23 +26,33 @@ import static org.opensearch.search.backpressure.SearchBackpressureTestHelpers.createMockTaskWithResourceStats; public class HeapUsageTrackerTests extends OpenSearchTestCase { - private static final long HEAP_BYTES_THRESHOLD = 100; - private static final long HEAP_BYTES_THRESHOLD_FOR_SEARCH_QUERY = 50; + private static final long HEAP_BYTES_THRESHOLD_SEARCH_SHARD_TASK = 100; + private static final long HEAP_BYTES_THRESHOLD_SEARCH_TASK = 50; private static final int HEAP_MOVING_AVERAGE_WINDOW_SIZE = 100; private static final SearchBackpressureSettings mockSettings = new SearchBackpressureSettings( Settings.builder() - .put(HeapUsageTracker.SETTING_HEAP_VARIANCE_THRESHOLD_FOR_SEARCH_QUERY.getKey(), 3.0) - .put(HeapUsageTracker.SETTING_HEAP_VARIANCE_THRESHOLD.getKey(), 2.0) - .put(HeapUsageTracker.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE_FOR_SEARCH_QUERY.getKey(), HEAP_MOVING_AVERAGE_WINDOW_SIZE) - .put(HeapUsageTracker.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE.getKey(), HEAP_MOVING_AVERAGE_WINDOW_SIZE) + .put(SearchTaskSettings.SETTING_HEAP_VARIANCE_THRESHOLD.getKey(), 3.0) + .put(SearchShardTaskSettings.SETTING_HEAP_VARIANCE_THRESHOLD.getKey(), 2.0) + .put(SearchTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE.getKey(), HEAP_MOVING_AVERAGE_WINDOW_SIZE) + .put(SearchShardTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE.getKey(), HEAP_MOVING_AVERAGE_WINDOW_SIZE) .build(), new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS) ); public void testSearchTaskEligibleForCancellation() { - HeapUsageTracker tracker = spy(new HeapUsageTracker(mockSettings)); - when(tracker.getHeapBytesThresholdForSearchQuery()).thenReturn(HEAP_BYTES_THRESHOLD_FOR_SEARCH_QUERY); + SearchTaskSettings mockSearchTaskSettings = spy( + new SearchTaskSettings(mockSettings.getSettings(), mockSettings.getClusterSettings()) + ); + when(mockSearchTaskSettings.getHeapBytesThreshold()).thenReturn(HEAP_BYTES_THRESHOLD_SEARCH_TASK); + HeapUsageTracker tracker = spy( + new HeapUsageTracker( + mockSearchTaskSettings::getHeapVarianceThreshold, + mockSearchTaskSettings::getHeapBytesThreshold, + mockSearchTaskSettings::getHeapMovingAverageWindowSize, + mockSettings.getClusterSettings() + ) + ); Task task = createMockTaskWithResourceStats(SearchTask.class, 1, 50); // Record enough observations to make the moving average 'ready'. @@ -57,8 +69,18 @@ public void testSearchTaskEligibleForCancellation() { } public void testSearchShardTaskEligibleForCancellation() { - HeapUsageTracker tracker = spy(new HeapUsageTracker(mockSettings)); - when(tracker.getHeapBytesThreshold()).thenReturn(HEAP_BYTES_THRESHOLD); + SearchShardTaskSettings mockSearchShardTaskSettings = spy( + new SearchShardTaskSettings(mockSettings.getSettings(), mockSettings.getClusterSettings()) + ); + when(mockSearchShardTaskSettings.getHeapBytesThreshold()).thenReturn(HEAP_BYTES_THRESHOLD_SEARCH_TASK); + HeapUsageTracker tracker = spy( + new HeapUsageTracker( + mockSearchShardTaskSettings::getHeapVarianceThreshold, + mockSearchShardTaskSettings::getHeapBytesThreshold, + mockSearchShardTaskSettings::getHeapMovingAverageWindowSize, + mockSettings.getClusterSettings() + ) + ); Task task = createMockTaskWithResourceStats(SearchShardTask.class, 1, 50); // Record enough observations to make the moving average 'ready'. @@ -77,8 +99,18 @@ public void testSearchShardTaskEligibleForCancellation() { public void testNotEligibleForCancellation() { Task task; Optional reason; - HeapUsageTracker tracker = spy(new HeapUsageTracker(mockSettings)); - when(tracker.getHeapBytesThreshold()).thenReturn(HEAP_BYTES_THRESHOLD); + SearchShardTaskSettings mockSearchShardTaskSettings = spy( + new SearchShardTaskSettings(mockSettings.getSettings(), mockSettings.getClusterSettings()) + ); + when(mockSearchShardTaskSettings.getHeapBytesThreshold()).thenReturn(HEAP_BYTES_THRESHOLD_SEARCH_SHARD_TASK); + HeapUsageTracker tracker = spy( + new HeapUsageTracker( + mockSearchShardTaskSettings::getHeapVarianceThreshold, + mockSearchShardTaskSettings::getHeapBytesThreshold, + mockSearchShardTaskSettings::getHeapMovingAverageWindowSize, + mockSettings.getClusterSettings() + ) + ); // Task with heap usage < heapBytesThreshold. task = createMockTaskWithResourceStats(SearchShardTask.class, 1, 99); diff --git a/server/src/test/java/org/opensearch/tasks/TaskCancellationTests.java b/server/src/test/java/org/opensearch/tasks/TaskCancellationTests.java index b8fa91f2d438b..bb577edd6667d 100644 --- a/server/src/test/java/org/opensearch/tasks/TaskCancellationTests.java +++ b/server/src/test/java/org/opensearch/tasks/TaskCancellationTests.java @@ -27,19 +27,16 @@ public void testTaskCancellation() { TaskResourceUsageTracker mockTracker3 = createMockTaskResourceUsageTracker("mock_tracker_3"); List reasons = new ArrayList<>(); - List callbacks = List.of( - mockTracker1::incrementSearchShardTaskCancellations, - mockTracker2::incrementSearchShardTaskCancellations - ); + List callbacks = List.of(mockTracker1::incrementCancellations, mockTracker2::incrementCancellations); TaskCancellation taskCancellation = new TaskCancellation(mockTask, reasons, callbacks); // Task does not have any reason to be cancelled. assertEquals(0, taskCancellation.totalCancellationScore()); assertFalse(taskCancellation.isEligibleForCancellation()); taskCancellation.cancel(); - assertEquals(0, mockTracker1.getSearchShardTaskCancellationCount()); - assertEquals(0, mockTracker2.getSearchShardTaskCancellationCount()); - assertEquals(0, mockTracker3.getSearchShardTaskCancellationCount()); + assertEquals(0, mockTracker1.getCancellations()); + assertEquals(0, mockTracker2.getCancellations()); + assertEquals(0, mockTracker3.getCancellations()); // Task has one or more reasons to be cancelled. reasons.add(new TaskCancellation.Reason("limits exceeded 1", 10)); @@ -51,9 +48,9 @@ public void testTaskCancellation() { // Cancel the task and validate the cancellation reason and invocation of callbacks. taskCancellation.cancel(); assertTrue(mockTask.getReasonCancelled().contains("limits exceeded 1, limits exceeded 2, limits exceeded 3")); - assertEquals(1, mockTracker1.getSearchShardTaskCancellationCount()); - assertEquals(1, mockTracker2.getSearchShardTaskCancellationCount()); - assertEquals(0, mockTracker3.getSearchShardTaskCancellationCount()); + assertEquals(1, mockTracker1.getCancellations()); + assertEquals(1, mockTracker2.getCancellations()); + assertEquals(0, mockTracker3.getCancellations()); } private static TaskResourceUsageTracker createMockTaskResourceUsageTracker(String name) { @@ -72,12 +69,7 @@ public Optional checkAndMaybeGetCancellationReason(Task } @Override - public Stats searchTaskStats(List searchTasks) { - return null; - } - - @Override - public Stats searchShardTaskStats(List searchShardTasks) { + public Stats stats(List searchShardTasks) { return null; } }; From 4778162294d80a4c4028de34e4b9a3e416d37c34 Mon Sep 17 00:00:00 2001 From: PritLadani Date: Fri, 6 Jan 2023 09:40:59 +0530 Subject: [PATCH 07/34] Adding java docs Signed-off-by: PritLadani --- .../java/org/opensearch/tasks/SearchBackpressureTask.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/server/src/main/java/org/opensearch/tasks/SearchBackpressureTask.java b/server/src/main/java/org/opensearch/tasks/SearchBackpressureTask.java index 89dc5ef1938e2..0cab67e35ab02 100644 --- a/server/src/main/java/org/opensearch/tasks/SearchBackpressureTask.java +++ b/server/src/main/java/org/opensearch/tasks/SearchBackpressureTask.java @@ -8,4 +8,9 @@ package org.opensearch.tasks; +/** + * A task related to search backpressure + * + * @opensearch.internal + */ public interface SearchBackpressureTask {} From 5b0bf1592131563be79d4d3f41b07a0346c94bcc Mon Sep 17 00:00:00 2001 From: PritLadani Date: Wed, 18 Jan 2023 20:02:17 +0530 Subject: [PATCH 08/34] Moving cancellation settings to task specific settings Signed-off-by: PritLadani --- .../common/settings/ClusterSettings.java | 25 ++- .../SearchBackpressureService.java | 150 +++++++------ .../settings/SearchBackpressureSettings.java | 210 +----------------- .../settings/SearchShardTaskSettings.java | 120 +++++++++- .../settings/SearchTaskSettings.java | 117 +++++++++- .../stats/SearchBackpressureStats.java | 18 +- .../stats/SearchBackpressureTaskStats.java | 17 +- .../trackers/CpuUsageTracker.java | 6 +- .../trackers/ElapsedTimeTracker.java | 6 +- .../trackers/HeapUsageTracker.java | 26 +-- .../SearchBackpressureServiceTests.java | 7 +- .../trackers/HeapUsageTrackerTests.java | 15 +- 12 files changed, 360 insertions(+), 357 deletions(-) diff --git a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java index 4e3eb764e3401..6e87cbdfff505 100644 --- a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java @@ -592,27 +592,28 @@ public void apply(Settings value, Settings current, Settings previous) { // Settings related to search backpressure SearchBackpressureSettings.SETTING_MODE, - SearchBackpressureSettings.SETTING_CANCELLATION_RATIO_SEARCH_TASK, - SearchBackpressureSettings.SETTING_CANCELLATION_RATE_SEARCH_TASK, - SearchBackpressureSettings.SETTING_CANCELLATION_BURST_SEARCH_TASK, - SearchBackpressureSettings.SETTING_CANCELLATION_RATIO_SEARCH_SHARD_TASK, - SearchBackpressureSettings.SETTING_CANCELLATION_RATE_SEARCH_SHARD_TASK, - SearchBackpressureSettings.SETTING_CANCELLATION_BURST_SEARCH_SHARD_TASK, + NodeDuressSettings.SETTING_NUM_SUCCESSIVE_BREACHES, NodeDuressSettings.SETTING_CPU_THRESHOLD, NodeDuressSettings.SETTING_HEAP_THRESHOLD, - SearchShardTaskSettings.SETTING_TOTAL_HEAP_PERCENT_THRESHOLD, + SearchTaskSettings.SETTING_CANCELLATION_RATIO, + SearchTaskSettings.SETTING_CANCELLATION_RATE, + SearchTaskSettings.SETTING_CANCELLATION_BURST, SearchTaskSettings.SETTING_HEAP_PERCENT_THRESHOLD, - SearchShardTaskSettings.SETTING_HEAP_PERCENT_THRESHOLD, SearchTaskSettings.SETTING_HEAP_VARIANCE_THRESHOLD, - SearchShardTaskSettings.SETTING_HEAP_VARIANCE_THRESHOLD, SearchTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE, - SearchShardTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE, SearchTaskSettings.SETTING_CPU_TIME_MILLIS_THRESHOLD, - SearchShardTaskSettings.SETTING_CPU_TIME_MILLIS_THRESHOLD, SearchTaskSettings.SETTING_ELAPSED_TIME_MILLIS_THRESHOLD, + SearchTaskSettings.SETTING_TOTAL_HEAP_PERCENT_THRESHOLD, + SearchShardTaskSettings.SETTING_CANCELLATION_RATIO, + SearchShardTaskSettings.SETTING_CANCELLATION_RATE, + SearchShardTaskSettings.SETTING_CANCELLATION_BURST, + SearchShardTaskSettings.SETTING_HEAP_PERCENT_THRESHOLD, + SearchShardTaskSettings.SETTING_HEAP_VARIANCE_THRESHOLD, + SearchShardTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE, + SearchShardTaskSettings.SETTING_CPU_TIME_MILLIS_THRESHOLD, SearchShardTaskSettings.SETTING_ELAPSED_TIME_MILLIS_THRESHOLD, - SearchTaskSettings.SETTING_TOTAL_HEAP_PERCENT_THRESHOLD + SearchShardTaskSettings.SETTING_TOTAL_HEAP_PERCENT_THRESHOLD ) ) ); diff --git a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java index 928b26a835bc4..7efff0fd5ffdb 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java +++ b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java @@ -19,6 +19,8 @@ import org.opensearch.monitor.process.ProcessProbe; import org.opensearch.search.backpressure.settings.SearchBackpressureMode; import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; +import org.opensearch.search.backpressure.settings.SearchShardTaskSettings; +import org.opensearch.search.backpressure.settings.SearchTaskSettings; import org.opensearch.search.backpressure.stats.SearchBackpressureStats; import org.opensearch.search.backpressure.stats.SearchBackpressureTaskStats; import org.opensearch.search.backpressure.trackers.CpuUsageTracker; @@ -55,7 +57,8 @@ public class SearchBackpressureService extends AbstractLifecycleComponent implements TaskCompletionListener, - SearchBackpressureSettings.Listener { + SearchTaskSettings.Listener, + SearchShardTaskSettings.Listener { private static final Logger logger = LogManager.getLogger(SearchBackpressureService.class); private volatile Scheduler.Cancellable scheduledFuture; @@ -69,10 +72,8 @@ public class SearchBackpressureService extends AbstractLifecycleComponent private final List searchTaskTrackers; private final List searchShardTaskTrackers; - private final AtomicReference searchTaskCancellationRateLimiter = new AtomicReference<>(); - private final AtomicReference searchTaskCancellationRatioLimiter = new AtomicReference<>(); - private final AtomicReference searchShardTaskCancellationRateLimiter = new AtomicReference<>(); - private final AtomicReference searchShardTaskCancellationRatioLimiter = new AtomicReference<>(); + private final Map, AtomicReference> rateLimiters; + private final Map, AtomicReference> ratioLimiters; private final Map, SearchBackpressureState> searchBackpressureStates; @@ -99,8 +100,9 @@ public SearchBackpressureService( new HeapUsageTracker( settings.getSearchTaskSettings()::getHeapVarianceThreshold, settings.getSearchTaskSettings()::getHeapBytesThreshold, - settings.getSearchTaskSettings()::getHeapMovingAverageWindowSize, - settings.getClusterSettings() + settings.getSearchTaskSettings().getHeapMovingAverageWindowSize(), + settings.getClusterSettings(), + SearchTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE ), new ElapsedTimeTracker(settings.getSearchTaskSettings()::getElapsedTimeNanosThreshold, System::nanoTime) ), @@ -109,8 +111,9 @@ public SearchBackpressureService( new HeapUsageTracker( settings.getSearchShardTaskSettings()::getHeapVarianceThreshold, settings.getSearchShardTaskSettings()::getHeapBytesThreshold, - settings.getSearchShardTaskSettings()::getHeapMovingAverageWindowSize, - settings.getClusterSettings() + settings.getSearchShardTaskSettings().getHeapMovingAverageWindowSize(), + settings.getClusterSettings(), + SearchShardTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE ), new ElapsedTimeTracker(settings.getSearchShardTaskSettings()::getElapsedTimeNanosThreshold, System::nanoTime) ) @@ -127,7 +130,8 @@ public SearchBackpressureService( List searchShardTaskTrackers ) { this.settings = settings; - this.settings.addListener(this); + this.settings.getSearchTaskSettings().addListener(this); + this.settings.getSearchShardTaskSettings().addListener(this); this.taskResourceTrackingService = taskResourceTrackingService; this.taskResourceTrackingService.addTaskCompletionListener(this); this.threadPool = threadPool; @@ -143,35 +147,41 @@ public SearchBackpressureService( new SearchBackpressureState() ); - this.searchTaskCancellationRateLimiter.set( - new TokenBucket( - timeNanosSupplier, - getSettings().getCancellationRateSearchTaskNanos(), - getSettings().getCancellationBurstSearchTask() - ) - ); - - this.searchTaskCancellationRatioLimiter.set( - new TokenBucket( - this::getSearchTaskCompletionCount, - getSettings().getCancellationRatioSearchTask(), - getSettings().getCancellationBurstSearchTask() - ) - ); - - this.searchShardTaskCancellationRateLimiter.set( - new TokenBucket( - timeNanosSupplier, - getSettings().getCancellationRateSearchShardTaskNanos(), - getSettings().getCancellationBurstSearchShardTask() + this.rateLimiters = Map.of( + SearchTask.class, + new AtomicReference<>( + new TokenBucket( + timeNanosSupplier, + getSettings().getSearchTaskSettings().getCancellationRateNanos(), + getSettings().getSearchTaskSettings().getCancellationBurst() + ) + ), + SearchShardTask.class, + new AtomicReference<>( + new TokenBucket( + timeNanosSupplier, + getSettings().getSearchShardTaskSettings().getCancellationRateNanos(), + getSettings().getSearchShardTaskSettings().getCancellationBurst() + ) ) ); - this.searchShardTaskCancellationRatioLimiter.set( - new TokenBucket( - this::getSearchShardTaskCompletionCount, - getSettings().getCancellationRatioSearchShardTask(), - getSettings().getCancellationBurstSearchShardTask() + this.ratioLimiters = Map.of( + SearchTask.class, + new AtomicReference<>( + new TokenBucket( + this::getSearchTaskCompletionCount, + getSettings().getSearchTaskSettings().getCancellationRatio(), + getSettings().getSearchTaskSettings().getCancellationBurst() + ) + ), + SearchShardTask.class, + new AtomicReference<>( + new TokenBucket( + this::getSearchShardTaskCompletionCount, + getSettings().getSearchShardTaskSettings().getCancellationRatio(), + getSettings().getSearchShardTaskSettings().getCancellationBurst() + ) ) ); } @@ -233,11 +243,11 @@ void doRun() { // Independently remove tokens from both token buckets. boolean rateLimitReached = isSearchTask - ? searchTaskCancellationRateLimiter.get().request() == false - : searchShardTaskCancellationRateLimiter.get().request() == false; + ? rateLimiters.get(SearchTask.class).get().request() == false + : rateLimiters.get(SearchShardTask.class).get().request() == false; boolean ratioLimitReached = isSearchTask - ? searchTaskCancellationRatioLimiter.get().request() == false - : searchShardTaskCancellationRatioLimiter.get().request() == false; + ? ratioLimiters.get(SearchTask.class).get().request() == false + : ratioLimiters.get(SearchShardTask.class).get().request() == false; // Stop cancelling tasks if there are no tokens in either of the two token buckets. if (rateLimitReached && ratioLimitReached) { @@ -380,24 +390,26 @@ public void onTaskCompleted(Task task) { @Override public void onCancellationRatioSearchTaskChanged() { - searchTaskCancellationRatioLimiter.set( - new TokenBucket( - this::getSearchTaskCompletionCount, - getSettings().getCancellationRatioSearchTask(), - getSettings().getCancellationBurstSearchTask() - ) - ); + ratioLimiters.get(SearchTask.class) + .set( + new TokenBucket( + this::getSearchTaskCompletionCount, + getSettings().getSearchTaskSettings().getCancellationRatio(), + getSettings().getSearchTaskSettings().getCancellationBurst() + ) + ); } @Override public void onCancellationRateSearchTaskChanged() { - searchTaskCancellationRateLimiter.set( - new TokenBucket( - timeNanosSupplier, - getSettings().getCancellationRateSearchTaskNanos(), - getSettings().getCancellationBurstSearchTask() - ) - ); + rateLimiters.get(SearchTask.class) + .set( + new TokenBucket( + timeNanosSupplier, + getSettings().getSearchTaskSettings().getCancellationRateNanos(), + getSettings().getSearchTaskSettings().getCancellationBurst() + ) + ); } @Override @@ -408,24 +420,26 @@ public void onCancellationBurstSearchTaskChanged() { @Override public void onCancellationRatioSearchShardTaskChanged() { - searchShardTaskCancellationRatioLimiter.set( - new TokenBucket( - this::getSearchShardTaskCompletionCount, - getSettings().getCancellationRatioSearchShardTask(), - getSettings().getCancellationBurstSearchShardTask() - ) - ); + ratioLimiters.get(SearchShardTask.class) + .set( + new TokenBucket( + this::getSearchShardTaskCompletionCount, + getSettings().getSearchShardTaskSettings().getCancellationRatio(), + getSettings().getSearchShardTaskSettings().getCancellationBurst() + ) + ); } @Override public void onCancellationRateSearchShardTaskChanged() { - searchShardTaskCancellationRateLimiter.set( - new TokenBucket( - timeNanosSupplier, - getSettings().getCancellationRateSearchShardTaskNanos(), - getSettings().getCancellationBurstSearchShardTask() - ) - ); + rateLimiters.get(SearchShardTask.class) + .set( + new TokenBucket( + timeNanosSupplier, + getSettings().getSearchShardTaskSettings().getCancellationRateNanos(), + getSettings().getSearchShardTaskSettings().getCancellationBurst() + ) + ); } @Override diff --git a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchBackpressureSettings.java b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchBackpressureSettings.java index e0cd4efd43aac..13287d04886c1 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchBackpressureSettings.java +++ b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchBackpressureSettings.java @@ -8,19 +8,13 @@ package org.opensearch.search.backpressure.settings; -import org.opensearch.ExceptionsHelper; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; -import java.util.ArrayList; -import java.util.List; -import java.util.concurrent.TimeUnit; -import java.util.function.Consumer; - /** - * Settings related to search backpressure and cancellation of in-flight requests. + * Settings related to search backpressure mode and internal * * @opensearch.internal */ @@ -28,14 +22,6 @@ public class SearchBackpressureSettings { private static class Defaults { private static final long INTERVAL_MILLIS = 1000; private static final String MODE = "monitor_only"; - - // TODO: decide on default settings for SearchTask - private static final double CANCELLATION_RATIO_SEARCH_TASK = 0.1; - private static final double CANCELLATION_RATE_SEARCH_TASK = 0.003; - private static final double CANCELLATION_BURST_SEARCH_TASK = 10.0; - private static final double CANCELLATION_RATIO_SEARCH_SHARD_TASK = 0.1; - private static final double CANCELLATION_RATE_SEARCH_SHARD_TASK = 0.003; - private static final double CANCELLATION_BURST_SEARCH_SHARD_TASK = 10.0; } /** @@ -60,102 +46,6 @@ private static class Defaults { Setting.Property.NodeScope ); - /** - * Defines the percentage of SearchTasks to cancel relative to the number of successful SearchTask completions. - * In other words, it is the number of tokens added to the bucket on each successful SearchTask completion. - */ - private volatile double cancellationRatioSearchTask; - public static final Setting SETTING_CANCELLATION_RATIO_SEARCH_TASK = Setting.doubleSetting( - "search_backpressure.cancellation_ratio_search_task", - Defaults.CANCELLATION_RATIO_SEARCH_TASK, - 0.0, - 1.0, - Setting.Property.Dynamic, - Setting.Property.NodeScope - ); - - /** - * Defines the number of tasks to cancel per unit time (in millis). - * In other words, it is the number of tokens added to the bucket each millisecond. - */ - private volatile double cancellationRateSearchTask; - public static final Setting SETTING_CANCELLATION_RATE_SEARCH_TASK = Setting.doubleSetting( - "search_backpressure.cancellation_rate_search_task", - Defaults.CANCELLATION_RATE_SEARCH_TASK, - 0.0, - Setting.Property.Dynamic, - Setting.Property.NodeScope - ); - - /** - * Defines the maximum number of tasks that can be cancelled before being rate-limited. - */ - private volatile double cancellationBurstSearchTask; - public static final Setting SETTING_CANCELLATION_BURST_SEARCH_TASK = Setting.doubleSetting( - "search_backpressure.cancellation_burst_search_task", - Defaults.CANCELLATION_BURST_SEARCH_TASK, - 1.0, - Setting.Property.Dynamic, - Setting.Property.NodeScope - ); - - /** - * Defines the percentage of tasks to cancel relative to the number of successful task completions. - * In other words, it is the number of tokens added to the bucket on each successful task completion. - */ - private volatile double cancellationRatioSearchShardTask; - public static final Setting SETTING_CANCELLATION_RATIO_SEARCH_SHARD_TASK = Setting.doubleSetting( - "search_backpressure.cancellation_ratio_search_shard_task", - Defaults.CANCELLATION_RATIO_SEARCH_SHARD_TASK, - 0.0, - 1.0, - Setting.Property.Dynamic, - Setting.Property.NodeScope - ); - - /** - * Defines the number of tasks to cancel per unit time (in millis). - * In other words, it is the number of tokens added to the bucket each millisecond. - */ - private volatile double cancellationRateSearchShardTask; - public static final Setting SETTING_CANCELLATION_RATE_SEARCH_SHARD_TASK = Setting.doubleSetting( - "search_backpressure.cancellation_rate_search_shard_task", - Defaults.CANCELLATION_RATE_SEARCH_SHARD_TASK, - 0.0, - Setting.Property.Dynamic, - Setting.Property.NodeScope - ); - - /** - * Defines the maximum number of tasks that can be cancelled before being rate-limited. - */ - private volatile double cancellationBurstSearchShardTask; - public static final Setting SETTING_CANCELLATION_BURST_SEARCH_SHARD_TASK = Setting.doubleSetting( - "search_backpressure.cancellation_burst_search_shard_task", - Defaults.CANCELLATION_BURST_SEARCH_SHARD_TASK, - 1.0, - Setting.Property.Dynamic, - Setting.Property.NodeScope - ); - - /** - * Callback listeners. - */ - public interface Listener { - void onCancellationRatioSearchTaskChanged(); - - void onCancellationRateSearchTaskChanged(); - - void onCancellationBurstSearchTaskChanged(); - - void onCancellationRatioSearchShardTaskChanged(); - - void onCancellationRateSearchShardTaskChanged(); - - void onCancellationBurstSearchShardTaskChanged(); - } - - private final List listeners = new ArrayList<>(); private final Settings settings; private final ClusterSettings clusterSettings; private final NodeDuressSettings nodeDuressSettings; @@ -173,28 +63,6 @@ public SearchBackpressureSettings(Settings settings, ClusterSettings clusterSett mode = SearchBackpressureMode.fromName(SETTING_MODE.get(settings)); clusterSettings.addSettingsUpdateConsumer(SETTING_MODE, s -> this.setMode(SearchBackpressureMode.fromName(s))); - - cancellationRatioSearchTask = SETTING_CANCELLATION_RATIO_SEARCH_TASK.get(settings); - clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_RATIO_SEARCH_TASK, this::setCancellationRatioSearchTask); - - cancellationRateSearchTask = SETTING_CANCELLATION_RATE_SEARCH_TASK.get(settings); - clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_RATE_SEARCH_TASK, this::setCancellationRateSearchTask); - - cancellationBurstSearchTask = SETTING_CANCELLATION_BURST_SEARCH_TASK.get(settings); - clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_BURST_SEARCH_TASK, this::setCancellationBurstSearchTask); - - cancellationRatioSearchShardTask = SETTING_CANCELLATION_RATIO_SEARCH_SHARD_TASK.get(settings); - clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_RATIO_SEARCH_SHARD_TASK, this::setCancellationRatioSearchShardTask); - - cancellationRateSearchShardTask = SETTING_CANCELLATION_RATE_SEARCH_SHARD_TASK.get(settings); - clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_RATE_SEARCH_SHARD_TASK, this::setCancellationRateSearchShardTask); - - cancellationBurstSearchShardTask = SETTING_CANCELLATION_BURST_SEARCH_SHARD_TASK.get(settings); - clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_BURST_SEARCH_SHARD_TASK, this::setCancellationBurstSearchShardTask); - } - - public void addListener(Listener listener) { - listeners.add(listener); } public Settings getSettings() { @@ -228,80 +96,4 @@ public SearchBackpressureMode getMode() { public void setMode(SearchBackpressureMode mode) { this.mode = mode; } - - public double getCancellationRatioSearchTask() { - return cancellationRatioSearchTask; - } - - private void setCancellationRatioSearchTask(double cancellationRatioSearchTask) { - this.cancellationRatioSearchTask = cancellationRatioSearchTask; - notifyListeners(Listener::onCancellationRatioSearchTaskChanged); - } - - public double getCancellationRateSearchTask() { - return cancellationRateSearchTask; - } - - public double getCancellationRateSearchTaskNanos() { - return getCancellationRateSearchTask() / TimeUnit.MILLISECONDS.toNanos(1); // rate per nanoseconds - } - - private void setCancellationRateSearchTask(double cancellationRateSearchTask) { - this.cancellationRateSearchTask = cancellationRateSearchTask; - notifyListeners(Listener::onCancellationRateSearchTaskChanged); - } - - public double getCancellationBurstSearchTask() { - return cancellationBurstSearchTask; - } - - private void setCancellationBurstSearchTask(double cancellationBurstSearchTask) { - this.cancellationBurstSearchTask = cancellationBurstSearchTask; - notifyListeners(Listener::onCancellationBurstSearchTaskChanged); - } - - public double getCancellationRatioSearchShardTask() { - return cancellationRatioSearchShardTask; - } - - private void setCancellationRatioSearchShardTask(double cancellationRatioSearchShardTask) { - this.cancellationRatioSearchShardTask = cancellationRatioSearchShardTask; - notifyListeners(Listener::onCancellationRatioSearchShardTaskChanged); - } - - public double getCancellationRateSearchShardTask() { - return cancellationRateSearchShardTask; - } - - public double getCancellationRateSearchShardTaskNanos() { - return getCancellationRateSearchShardTask() / TimeUnit.MILLISECONDS.toNanos(1); // rate per nanoseconds - } - - private void setCancellationRateSearchShardTask(double cancellationRateSearchShardTask) { - this.cancellationRateSearchShardTask = cancellationRateSearchShardTask; - notifyListeners(Listener::onCancellationRateSearchShardTaskChanged); - } - - public double getCancellationBurstSearchShardTask() { - return cancellationBurstSearchShardTask; - } - - private void setCancellationBurstSearchShardTask(double cancellationBurstSearchShardTask) { - this.cancellationBurstSearchShardTask = cancellationBurstSearchShardTask; - notifyListeners(Listener::onCancellationBurstSearchShardTaskChanged); - } - - private void notifyListeners(Consumer consumer) { - List exceptions = new ArrayList<>(); - - for (Listener listener : listeners) { - try { - consumer.accept(listener); - } catch (Exception e) { - exceptions.add(e); - } - } - - ExceptionsHelper.maybeThrowRuntimeAndSuppress(exceptions); - } } diff --git a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java index 98599e9478a29..1cfb2a5a350f5 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java +++ b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java @@ -8,12 +8,16 @@ package org.opensearch.search.backpressure.settings; +import org.opensearch.ExceptionsHelper; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; import org.opensearch.monitor.jvm.JvmStats; +import java.util.ArrayList; +import java.util.List; import java.util.concurrent.TimeUnit; +import java.util.function.Consumer; /** * Defines the settings related to the cancellation of SearchShardTasks. @@ -22,8 +26,12 @@ */ public class SearchShardTaskSettings { private static final long HEAP_SIZE_BYTES = JvmStats.jvmStats().getMem().getHeapMax().getBytes(); + private final List listeners = new ArrayList<>(); private static class Defaults { + private static final double CANCELLATION_RATIO = 0.1; + private static final double CANCELLATION_RATE = 0.003; + private static final double CANCELLATION_BURST = 10.0; private static final double TOTAL_HEAP_PERCENT_THRESHOLD = 0.05; private static final long CPU_TIME_MILLIS_THRESHOLD = 15000; private static final long ELAPSED_TIME_MILLIS_THRESHOLD = 30000; @@ -32,6 +40,45 @@ private static class Defaults { private static final int HEAP_MOVING_AVERAGE_WINDOW_SIZE = 100; } + /** + * Defines the percentage of SearchShardTasks to cancel relative to the number of successful SearchShardTasks completions. + * In other words, it is the number of tokens added to the bucket on each successful SearchShardTask completion. + */ + private volatile double cancellationRatio; + public static final Setting SETTING_CANCELLATION_RATIO = Setting.doubleSetting( + "search_backpressure.search_shard_task.cancellation_ratio", + Defaults.CANCELLATION_RATIO, + 0.0, + 1.0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Defines the number of SearchShardTasks to cancel per unit time (in millis). + * In other words, it is the number of tokens added to the bucket each millisecond. + */ + private volatile double cancellationRate; + public static final Setting SETTING_CANCELLATION_RATE = Setting.doubleSetting( + "search_backpressure.search_shard_task.cancellation_rate", + Defaults.CANCELLATION_RATE, + 0.0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Defines the maximum number of SearchShardTasks that can be cancelled before being rate-limited. + */ + private volatile double cancellationBurst; + public static final Setting SETTING_CANCELLATION_BURST = Setting.doubleSetting( + "search_backpressure.search_shard_task.cancellation_burst", + Defaults.CANCELLATION_BURST, + 1.0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + /** * Defines the heap usage threshold (in percentage) for the sum of heap usages across all search shard tasks * before in-flight cancellation is applied. @@ -112,15 +159,33 @@ public SearchShardTaskSettings(Settings settings, ClusterSettings clusterSetting totalHeapPercentThreshold = SETTING_TOTAL_HEAP_PERCENT_THRESHOLD.get(settings); this.cpuTimeMillisThreshold = SETTING_CPU_TIME_MILLIS_THRESHOLD.get(settings); this.elapsedTimeMillisThreshold = SETTING_ELAPSED_TIME_MILLIS_THRESHOLD.get(settings); - heapPercentThreshold = SETTING_HEAP_PERCENT_THRESHOLD.get(settings); - heapVarianceThreshold = SETTING_HEAP_VARIANCE_THRESHOLD.get(settings); - heapMovingAverageWindowSize = SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE.get(settings); + this.heapPercentThreshold = SETTING_HEAP_PERCENT_THRESHOLD.get(settings); + this.heapVarianceThreshold = SETTING_HEAP_VARIANCE_THRESHOLD.get(settings); + this.heapMovingAverageWindowSize = SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE.get(settings); + this.cancellationRatio = SETTING_CANCELLATION_RATIO.get(settings); + this.cancellationRate = SETTING_CANCELLATION_RATE.get(settings); + this.cancellationBurst = SETTING_CANCELLATION_BURST.get(settings); + clusterSettings.addSettingsUpdateConsumer(SETTING_TOTAL_HEAP_PERCENT_THRESHOLD, this::setTotalHeapPercentThreshold); clusterSettings.addSettingsUpdateConsumer(SETTING_CPU_TIME_MILLIS_THRESHOLD, this::setCpuTimeMillisThreshold); clusterSettings.addSettingsUpdateConsumer(SETTING_ELAPSED_TIME_MILLIS_THRESHOLD, this::setElapsedTimeMillisThreshold); clusterSettings.addSettingsUpdateConsumer(SETTING_HEAP_PERCENT_THRESHOLD, this::setHeapPercentThreshold); clusterSettings.addSettingsUpdateConsumer(SETTING_HEAP_VARIANCE_THRESHOLD, this::setHeapVarianceThreshold); clusterSettings.addSettingsUpdateConsumer(SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE, this::setHeapMovingAverageWindowSize); + clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_RATIO, this::setCancellationRatio); + clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_RATE, this::setCancellationRate); + clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_BURST, this::setCancellationBurst); + } + + /** + * Callback listeners. + */ + public interface Listener { + void onCancellationRatioSearchShardTaskChanged(); + + void onCancellationRateSearchShardTaskChanged(); + + void onCancellationBurstSearchShardTaskChanged(); } public double getTotalHeapPercentThreshold() { @@ -174,4 +239,53 @@ public void setHeapVarianceThreshold(double heapVarianceThreshold) { public void setHeapMovingAverageWindowSize(int heapMovingAverageWindowSize) { this.heapMovingAverageWindowSize = heapMovingAverageWindowSize; } + + public double getCancellationRatio() { + return cancellationRatio; + } + + private void setCancellationRatio(double cancellationRatio) { + this.cancellationRatio = cancellationRatio; + notifyListeners(Listener::onCancellationRatioSearchShardTaskChanged); + } + + public double getCancellationRate() { + return cancellationRate; + } + + public double getCancellationRateNanos() { + return getCancellationRate() / TimeUnit.MILLISECONDS.toNanos(1); // rate per nanoseconds + } + + private void setCancellationRate(double cancellationRate) { + this.cancellationRate = cancellationRate; + notifyListeners(Listener::onCancellationRateSearchShardTaskChanged); + } + + public double getCancellationBurst() { + return cancellationBurst; + } + + private void setCancellationBurst(double cancellationBurst) { + this.cancellationBurst = cancellationBurst; + notifyListeners(Listener::onCancellationBurstSearchShardTaskChanged); + } + + public void addListener(Listener listener) { + listeners.add(listener); + } + + private void notifyListeners(Consumer consumer) { + List exceptions = new ArrayList<>(); + + for (Listener listener : listeners) { + try { + consumer.accept(listener); + } catch (Exception e) { + exceptions.add(e); + } + } + + ExceptionsHelper.maybeThrowRuntimeAndSuppress(exceptions); + } } diff --git a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java index 3b89ba7f3492d..af216f84d790e 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java +++ b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java @@ -8,13 +8,16 @@ package org.opensearch.search.backpressure.settings; -import org.apache.logging.log4j.LogManager; +import org.opensearch.ExceptionsHelper; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; import org.opensearch.monitor.jvm.JvmStats; +import java.util.ArrayList; +import java.util.List; import java.util.concurrent.TimeUnit; +import java.util.function.Consumer; /** * Defines the settings related to the cancellation of SearchTasks. @@ -24,8 +27,13 @@ public class SearchTaskSettings { private static final long HEAP_SIZE_BYTES = JvmStats.jvmStats().getMem().getHeapMax().getBytes(); + private final List listeners = new ArrayList<>(); private static class Defaults { + // TODO: decide on default settings for SearchTask + private static final double CANCELLATION_RATIO = 0.1; + private static final double CANCELLATION_RATE = 0.003; + private static final double CANCELLATION_BURST = 10.0; private static final double TOTAL_HEAP_PERCENT_THRESHOLD = 0.05; private static final long CPU_TIME_MILLIS_THRESHOLD = 60000; private static final long ELAPSED_TIME_MILLIS_THRESHOLD = 120000; @@ -34,6 +42,45 @@ private static class Defaults { private static final int HEAP_MOVING_AVERAGE_WINDOW_SIZE = 100; } + /** + * Defines the percentage of SearchTasks to cancel relative to the number of successful SearchTask completions. + * In other words, it is the number of tokens added to the bucket on each successful SearchTask completion. + */ + private volatile double cancellationRatio; + public static final Setting SETTING_CANCELLATION_RATIO = Setting.doubleSetting( + "search_backpressure.search_task.cancellation_ratio", + Defaults.CANCELLATION_RATIO, + 0.0, + 1.0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Defines the number of SearchTasks to cancel per unit time (in millis). + * In other words, it is the number of tokens added to the bucket each millisecond. + */ + private volatile double cancellationRate; + public static final Setting SETTING_CANCELLATION_RATE = Setting.doubleSetting( + "search_backpressure.search_task.cancellation_rate", + Defaults.CANCELLATION_RATE, + 0.0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Defines the maximum number of SearchTasks that can be cancelled before being rate-limited. + */ + private volatile double cancellationBurst; + public static final Setting SETTING_CANCELLATION_BURST = Setting.doubleSetting( + "search_backpressure.search_task.cancellation_burst", + Defaults.CANCELLATION_BURST, + 1.0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + /** * Defines the heap usage threshold (in percentage) for the sum of heap usages across all search tasks * before in-flight cancellation is applied. @@ -117,12 +164,30 @@ public SearchTaskSettings(Settings settings, ClusterSettings clusterSettings) { this.heapPercentThreshold = SETTING_HEAP_PERCENT_THRESHOLD.get(settings); this.heapVarianceThreshold = SETTING_HEAP_VARIANCE_THRESHOLD.get(settings); this.heapMovingAverageWindowSize = SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE.get(settings); + this.cancellationRatio = SETTING_CANCELLATION_RATIO.get(settings); + this.cancellationRate = SETTING_CANCELLATION_RATE.get(settings); + this.cancellationBurst = SETTING_CANCELLATION_BURST.get(settings); + clusterSettings.addSettingsUpdateConsumer(SETTING_TOTAL_HEAP_PERCENT_THRESHOLD, this::setTotalHeapPercentThreshold); clusterSettings.addSettingsUpdateConsumer(SETTING_CPU_TIME_MILLIS_THRESHOLD, this::setCpuTimeMillisThreshold); clusterSettings.addSettingsUpdateConsumer(SETTING_ELAPSED_TIME_MILLIS_THRESHOLD, this::setElapsedTimeMillisThreshold); clusterSettings.addSettingsUpdateConsumer(SETTING_HEAP_PERCENT_THRESHOLD, this::setHeapPercentThreshold); clusterSettings.addSettingsUpdateConsumer(SETTING_HEAP_VARIANCE_THRESHOLD, this::setHeapVarianceThreshold); clusterSettings.addSettingsUpdateConsumer(SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE, this::setHeapMovingAverageWindowSize); + clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_RATIO, this::setCancellationRatio); + clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_RATE, this::setCancellationRate); + clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_BURST, this::setCancellationBurst); + } + + /** + * Callback listeners. + */ + public interface Listener { + void onCancellationRatioSearchTaskChanged(); + + void onCancellationRateSearchTaskChanged(); + + void onCancellationBurstSearchTaskChanged(); } public double getTotalHeapPercentThreshold() { @@ -158,7 +223,6 @@ public void setTotalHeapPercentThreshold(double totalHeapPercentThreshold) { } public void setCpuTimeMillisThreshold(long cpuTimeMillisThreshold) { - LogManager.getLogger(SearchTaskSettings.class).info("setCpuTimeMillisThreshold " + cpuTimeMillisThreshold); this.cpuTimeMillisThreshold = cpuTimeMillisThreshold; } @@ -177,4 +241,53 @@ public void setHeapVarianceThreshold(double heapVarianceThreshold) { public void setHeapMovingAverageWindowSize(int heapMovingAverageWindowSize) { this.heapMovingAverageWindowSize = heapMovingAverageWindowSize; } + + public double getCancellationRatio() { + return cancellationRatio; + } + + private void setCancellationRatio(double cancellationRatio) { + this.cancellationRatio = cancellationRatio; + notifyListeners(Listener::onCancellationRatioSearchTaskChanged); + } + + public double getCancellationRate() { + return cancellationRate; + } + + public double getCancellationRateNanos() { + return getCancellationRate() / TimeUnit.MILLISECONDS.toNanos(1); // rate per nanoseconds + } + + private void setCancellationRate(double cancellationRate) { + this.cancellationRate = cancellationRate; + notifyListeners(Listener::onCancellationRateSearchTaskChanged); + } + + public double getCancellationBurst() { + return cancellationBurst; + } + + private void setCancellationBurst(double cancellationBurst) { + this.cancellationBurst = cancellationBurst; + notifyListeners(Listener::onCancellationBurstSearchTaskChanged); + } + + public void addListener(Listener listener) { + listeners.add(listener); + } + + private void notifyListeners(Consumer consumer) { + List exceptions = new ArrayList<>(); + + for (Listener listener : listeners) { + try { + consumer.accept(listener); + } catch (Exception e) { + exceptions.add(e); + } + } + + ExceptionsHelper.maybeThrowRuntimeAndSuppress(exceptions); + } } diff --git a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java index bd5f24ef0dbee..2d2eba16aa7a5 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java +++ b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java @@ -8,7 +8,6 @@ package org.opensearch.search.backpressure.stats; -import org.opensearch.Version; import org.opensearch.common.io.stream.StreamInput; import org.opensearch.common.io.stream.StreamOutput; import org.opensearch.common.io.stream.Writeable; @@ -40,11 +39,7 @@ public SearchBackpressureStats( public SearchBackpressureStats(StreamInput in) throws IOException { searchShardTaskStats = new SearchBackpressureTaskStats(in); mode = SearchBackpressureMode.fromName(in.readString()); - if (in.getVersion().onOrAfter(Version.V_3_0_0)) { - searchTaskStats = new SearchBackpressureTaskStats(in); - } else { - searchTaskStats = null; - } + searchTaskStats = in.readOptionalWriteable(SearchBackpressureTaskStats::new); } @Override @@ -60,9 +55,8 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws public void writeTo(StreamOutput out) throws IOException { searchShardTaskStats.writeTo(out); out.writeString(mode.getName()); - if (Version.CURRENT.onOrAfter(Version.V_3_0_0) && out.getVersion().onOrAfter(Version.V_3_0_0)) { - searchTaskStats.writeTo(out); - } + // searchTaskStats.writeTo(out); + out.writeOptionalWriteable(searchTaskStats); } @Override @@ -70,9 +64,9 @@ public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; SearchBackpressureStats that = (SearchBackpressureStats) o; - return (Version.CURRENT.onOrAfter(Version.V_3_0_0) - && searchTaskStats.equals(that.searchTaskStats) - && searchShardTaskStats.equals(that.searchShardTaskStats)) && mode == that.mode; + return mode == that.mode + && Objects.equals(searchTaskStats, that.searchTaskStats) + && Objects.equals(searchShardTaskStats, that.searchShardTaskStats); } @Override diff --git a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureTaskStats.java b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureTaskStats.java index f6925c4c6bc8c..5d7bb31ae2fbb 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureTaskStats.java +++ b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureTaskStats.java @@ -88,22 +88,7 @@ public boolean equals(Object o) { SearchBackpressureTaskStats that = (SearchBackpressureTaskStats) o; return cancellationCount == that.cancellationCount && limitReachedCount == that.limitReachedCount - && compareMaps(resourceUsageTrackerStats, that.resourceUsageTrackerStats); - } - - private boolean compareMaps( - Map trackers1, - Map trackers2 - ) { - if (trackers1.size() != trackers2.size()) { - return false; - } - for (Map.Entry e1 : trackers1.entrySet()) { - if (trackers2.containsKey(e1.getKey()) == false || trackers2.get(e1.getKey()).equals(e1.getValue()) == false) { - return false; - } - } - return true; + && resourceUsageTrackerStats.equals(that.resourceUsageTrackerStats); } @Override diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/CpuUsageTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/CpuUsageTracker.java index 5215a17a61a8c..fb4cd342de25b 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/CpuUsageTracker.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/CpuUsageTracker.java @@ -64,9 +64,9 @@ public Optional checkAndMaybeGetCancellationReason(Task } @Override - public TaskResourceUsageTracker.Stats stats(List tasks) { - long currentMax = tasks.stream().mapToLong(t -> t.getTotalResourceStats().getCpuTimeInNanos()).max().orElse(0); - long currentAvg = (long) tasks.stream().mapToLong(t -> t.getTotalResourceStats().getCpuTimeInNanos()).average().orElse(0); + public TaskResourceUsageTracker.Stats stats(List activeTasks) { + long currentMax = activeTasks.stream().mapToLong(t -> t.getTotalResourceStats().getCpuTimeInNanos()).max().orElse(0); + long currentAvg = (long) activeTasks.stream().mapToLong(t -> t.getTotalResourceStats().getCpuTimeInNanos()).average().orElse(0); return new Stats(getCancellations(), currentMax, currentAvg); } diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTracker.java index d1700861476d6..1175d68fb8550 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTracker.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTracker.java @@ -65,10 +65,10 @@ public Optional checkAndMaybeGetCancellationReason(Task } @Override - public TaskResourceUsageTracker.Stats stats(List tasks) { + public TaskResourceUsageTracker.Stats stats(List activeTasks) { long now = timeNanosSupplier.getAsLong(); - long currentMax = tasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).max().orElse(0); - long currentAvg = (long) tasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).average().orElse(0); + long currentMax = activeTasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).max().orElse(0); + long currentAvg = (long) activeTasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).average().orElse(0); return new Stats(getCancellations(), currentMax, currentAvg); } diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java index 915b6ed60f685..15058bf8fe156 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java @@ -11,12 +11,10 @@ import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.io.stream.StreamInput; import org.opensearch.common.io.stream.StreamOutput; +import org.opensearch.common.settings.Setting; import org.opensearch.common.unit.ByteSizeValue; import org.opensearch.common.util.MovingAverage; -import org.opensearch.monitor.jvm.JvmStats; import org.opensearch.common.xcontent.XContentBuilder; -import org.opensearch.search.backpressure.settings.SearchShardTaskSettings; -import org.opensearch.search.backpressure.settings.SearchTaskSettings; import org.opensearch.tasks.Task; import org.opensearch.tasks.TaskCancellation; @@ -26,7 +24,6 @@ import java.util.Optional; import java.util.concurrent.atomic.AtomicReference; import java.util.function.DoubleSupplier; -import java.util.function.IntSupplier; import java.util.function.LongSupplier; import static org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER; @@ -38,26 +35,21 @@ * @opensearch.internal */ public class HeapUsageTracker extends TaskResourceUsageTracker { - private static final long HEAP_SIZE_BYTES = JvmStats.jvmStats().getMem().getHeapMax().getBytes(); private final DoubleSupplier heapVarianceSupplier; private final LongSupplier heapBytesThresholdSupplier; - private final IntSupplier windowSizeSupplier; private final AtomicReference movingAverageReference; public HeapUsageTracker( DoubleSupplier heapVarianceSupplier, LongSupplier heapBytesThresholdSupplier, - IntSupplier windowSizeSupplier, - ClusterSettings clusterSettings + int heapMovingAverageWindowSize, + ClusterSettings clusterSettings, + Setting windowSizeSetting ) { this.heapVarianceSupplier = heapVarianceSupplier; this.heapBytesThresholdSupplier = heapBytesThresholdSupplier; - this.windowSizeSupplier = windowSizeSupplier; - this.movingAverageReference = new AtomicReference<>(new MovingAverage(windowSizeSupplier.getAsInt())); - // TODO: find a way to get the type of the setting SearchTaskSettings/SearchShardTaskSettings and then add consumer only for the - // required setting - clusterSettings.addSettingsUpdateConsumer(SearchTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE, this::updateWindowSize); - clusterSettings.addSettingsUpdateConsumer(SearchShardTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE, this::updateWindowSize); + this.movingAverageReference = new AtomicReference<>(new MovingAverage(heapMovingAverageWindowSize)); + clusterSettings.addSettingsUpdateConsumer(windowSizeSetting, this::updateWindowSize); } @Override @@ -102,9 +94,9 @@ private void updateWindowSize(int heapMovingAverageWindowSize) { } @Override - public TaskResourceUsageTracker.Stats stats(List tasks) { - long currentMax = tasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).max().orElse(0); - long currentAvg = (long) tasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).average().orElse(0); + public TaskResourceUsageTracker.Stats stats(List activeTasks) { + long currentMax = activeTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).max().orElse(0); + long currentAvg = (long) activeTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).average().orElse(0); return new Stats(getCancellations(), currentMax, currentAvg, (long) movingAverageReference.get().getAverage()); } diff --git a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java index d2dce8731d141..1b61ecb9e0341 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java @@ -314,12 +314,7 @@ public void testSearchShardTaskInFlightCancellation() { private SearchBackpressureSettings getBackpressureSettings(String mode, double ratio, double rate, double burst) { return spy( new SearchBackpressureSettings( - Settings.builder() - .put(SearchBackpressureSettings.SETTING_MODE.getKey(), mode) - .put(SearchBackpressureSettings.SETTING_CANCELLATION_RATIO_SEARCH_SHARD_TASK.getKey(), ratio) - .put(SearchBackpressureSettings.SETTING_CANCELLATION_RATE_SEARCH_TASK.getKey(), rate) - .put(SearchBackpressureSettings.SETTING_CANCELLATION_BURST_SEARCH_TASK.getKey(), burst) - .build(), + Settings.builder().put(SearchBackpressureSettings.SETTING_MODE.getKey(), mode).build(), new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS) ) ); diff --git a/server/src/test/java/org/opensearch/search/backpressure/trackers/HeapUsageTrackerTests.java b/server/src/test/java/org/opensearch/search/backpressure/trackers/HeapUsageTrackerTests.java index aa9bd39fb3451..4af23be03f9f2 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/trackers/HeapUsageTrackerTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/trackers/HeapUsageTrackerTests.java @@ -49,8 +49,9 @@ public void testSearchTaskEligibleForCancellation() { new HeapUsageTracker( mockSearchTaskSettings::getHeapVarianceThreshold, mockSearchTaskSettings::getHeapBytesThreshold, - mockSearchTaskSettings::getHeapMovingAverageWindowSize, - mockSettings.getClusterSettings() + mockSearchTaskSettings.getHeapMovingAverageWindowSize(), + mockSettings.getClusterSettings(), + SearchTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE ) ); Task task = createMockTaskWithResourceStats(SearchTask.class, 1, 50); @@ -77,8 +78,9 @@ public void testSearchShardTaskEligibleForCancellation() { new HeapUsageTracker( mockSearchShardTaskSettings::getHeapVarianceThreshold, mockSearchShardTaskSettings::getHeapBytesThreshold, - mockSearchShardTaskSettings::getHeapMovingAverageWindowSize, - mockSettings.getClusterSettings() + mockSearchShardTaskSettings.getHeapMovingAverageWindowSize(), + mockSettings.getClusterSettings(), + SearchShardTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE ) ); Task task = createMockTaskWithResourceStats(SearchShardTask.class, 1, 50); @@ -107,8 +109,9 @@ public void testNotEligibleForCancellation() { new HeapUsageTracker( mockSearchShardTaskSettings::getHeapVarianceThreshold, mockSearchShardTaskSettings::getHeapBytesThreshold, - mockSearchShardTaskSettings::getHeapMovingAverageWindowSize, - mockSettings.getClusterSettings() + mockSearchShardTaskSettings.getHeapMovingAverageWindowSize(), + mockSettings.getClusterSettings(), + SearchShardTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE ) ); From ac0688fdd52f9c2a26239f3c5c8f21c4a989d3f2 Mon Sep 17 00:00:00 2001 From: PritLadani Date: Wed, 18 Jan 2023 20:02:17 +0530 Subject: [PATCH 09/34] Moving cancellation settings to task specific settings Signed-off-by: PritLadani --- .../common/settings/ClusterSettings.java | 25 ++- .../SearchBackpressureService.java | 150 +++++++------ .../settings/SearchBackpressureSettings.java | 210 +----------------- .../settings/SearchShardTaskSettings.java | 120 +++++++++- .../settings/SearchTaskSettings.java | 117 +++++++++- .../stats/SearchBackpressureStats.java | 18 +- .../stats/SearchBackpressureTaskStats.java | 17 +- .../trackers/CpuUsageTracker.java | 6 +- .../trackers/ElapsedTimeTracker.java | 6 +- .../trackers/HeapUsageTracker.java | 26 +-- .../SearchBackpressureServiceTests.java | 7 +- .../trackers/HeapUsageTrackerTests.java | 15 +- 12 files changed, 360 insertions(+), 357 deletions(-) diff --git a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java index 4e3eb764e3401..6e87cbdfff505 100644 --- a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java @@ -592,27 +592,28 @@ public void apply(Settings value, Settings current, Settings previous) { // Settings related to search backpressure SearchBackpressureSettings.SETTING_MODE, - SearchBackpressureSettings.SETTING_CANCELLATION_RATIO_SEARCH_TASK, - SearchBackpressureSettings.SETTING_CANCELLATION_RATE_SEARCH_TASK, - SearchBackpressureSettings.SETTING_CANCELLATION_BURST_SEARCH_TASK, - SearchBackpressureSettings.SETTING_CANCELLATION_RATIO_SEARCH_SHARD_TASK, - SearchBackpressureSettings.SETTING_CANCELLATION_RATE_SEARCH_SHARD_TASK, - SearchBackpressureSettings.SETTING_CANCELLATION_BURST_SEARCH_SHARD_TASK, + NodeDuressSettings.SETTING_NUM_SUCCESSIVE_BREACHES, NodeDuressSettings.SETTING_CPU_THRESHOLD, NodeDuressSettings.SETTING_HEAP_THRESHOLD, - SearchShardTaskSettings.SETTING_TOTAL_HEAP_PERCENT_THRESHOLD, + SearchTaskSettings.SETTING_CANCELLATION_RATIO, + SearchTaskSettings.SETTING_CANCELLATION_RATE, + SearchTaskSettings.SETTING_CANCELLATION_BURST, SearchTaskSettings.SETTING_HEAP_PERCENT_THRESHOLD, - SearchShardTaskSettings.SETTING_HEAP_PERCENT_THRESHOLD, SearchTaskSettings.SETTING_HEAP_VARIANCE_THRESHOLD, - SearchShardTaskSettings.SETTING_HEAP_VARIANCE_THRESHOLD, SearchTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE, - SearchShardTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE, SearchTaskSettings.SETTING_CPU_TIME_MILLIS_THRESHOLD, - SearchShardTaskSettings.SETTING_CPU_TIME_MILLIS_THRESHOLD, SearchTaskSettings.SETTING_ELAPSED_TIME_MILLIS_THRESHOLD, + SearchTaskSettings.SETTING_TOTAL_HEAP_PERCENT_THRESHOLD, + SearchShardTaskSettings.SETTING_CANCELLATION_RATIO, + SearchShardTaskSettings.SETTING_CANCELLATION_RATE, + SearchShardTaskSettings.SETTING_CANCELLATION_BURST, + SearchShardTaskSettings.SETTING_HEAP_PERCENT_THRESHOLD, + SearchShardTaskSettings.SETTING_HEAP_VARIANCE_THRESHOLD, + SearchShardTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE, + SearchShardTaskSettings.SETTING_CPU_TIME_MILLIS_THRESHOLD, SearchShardTaskSettings.SETTING_ELAPSED_TIME_MILLIS_THRESHOLD, - SearchTaskSettings.SETTING_TOTAL_HEAP_PERCENT_THRESHOLD + SearchShardTaskSettings.SETTING_TOTAL_HEAP_PERCENT_THRESHOLD ) ) ); diff --git a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java index 928b26a835bc4..7efff0fd5ffdb 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java +++ b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java @@ -19,6 +19,8 @@ import org.opensearch.monitor.process.ProcessProbe; import org.opensearch.search.backpressure.settings.SearchBackpressureMode; import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; +import org.opensearch.search.backpressure.settings.SearchShardTaskSettings; +import org.opensearch.search.backpressure.settings.SearchTaskSettings; import org.opensearch.search.backpressure.stats.SearchBackpressureStats; import org.opensearch.search.backpressure.stats.SearchBackpressureTaskStats; import org.opensearch.search.backpressure.trackers.CpuUsageTracker; @@ -55,7 +57,8 @@ public class SearchBackpressureService extends AbstractLifecycleComponent implements TaskCompletionListener, - SearchBackpressureSettings.Listener { + SearchTaskSettings.Listener, + SearchShardTaskSettings.Listener { private static final Logger logger = LogManager.getLogger(SearchBackpressureService.class); private volatile Scheduler.Cancellable scheduledFuture; @@ -69,10 +72,8 @@ public class SearchBackpressureService extends AbstractLifecycleComponent private final List searchTaskTrackers; private final List searchShardTaskTrackers; - private final AtomicReference searchTaskCancellationRateLimiter = new AtomicReference<>(); - private final AtomicReference searchTaskCancellationRatioLimiter = new AtomicReference<>(); - private final AtomicReference searchShardTaskCancellationRateLimiter = new AtomicReference<>(); - private final AtomicReference searchShardTaskCancellationRatioLimiter = new AtomicReference<>(); + private final Map, AtomicReference> rateLimiters; + private final Map, AtomicReference> ratioLimiters; private final Map, SearchBackpressureState> searchBackpressureStates; @@ -99,8 +100,9 @@ public SearchBackpressureService( new HeapUsageTracker( settings.getSearchTaskSettings()::getHeapVarianceThreshold, settings.getSearchTaskSettings()::getHeapBytesThreshold, - settings.getSearchTaskSettings()::getHeapMovingAverageWindowSize, - settings.getClusterSettings() + settings.getSearchTaskSettings().getHeapMovingAverageWindowSize(), + settings.getClusterSettings(), + SearchTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE ), new ElapsedTimeTracker(settings.getSearchTaskSettings()::getElapsedTimeNanosThreshold, System::nanoTime) ), @@ -109,8 +111,9 @@ public SearchBackpressureService( new HeapUsageTracker( settings.getSearchShardTaskSettings()::getHeapVarianceThreshold, settings.getSearchShardTaskSettings()::getHeapBytesThreshold, - settings.getSearchShardTaskSettings()::getHeapMovingAverageWindowSize, - settings.getClusterSettings() + settings.getSearchShardTaskSettings().getHeapMovingAverageWindowSize(), + settings.getClusterSettings(), + SearchShardTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE ), new ElapsedTimeTracker(settings.getSearchShardTaskSettings()::getElapsedTimeNanosThreshold, System::nanoTime) ) @@ -127,7 +130,8 @@ public SearchBackpressureService( List searchShardTaskTrackers ) { this.settings = settings; - this.settings.addListener(this); + this.settings.getSearchTaskSettings().addListener(this); + this.settings.getSearchShardTaskSettings().addListener(this); this.taskResourceTrackingService = taskResourceTrackingService; this.taskResourceTrackingService.addTaskCompletionListener(this); this.threadPool = threadPool; @@ -143,35 +147,41 @@ public SearchBackpressureService( new SearchBackpressureState() ); - this.searchTaskCancellationRateLimiter.set( - new TokenBucket( - timeNanosSupplier, - getSettings().getCancellationRateSearchTaskNanos(), - getSettings().getCancellationBurstSearchTask() - ) - ); - - this.searchTaskCancellationRatioLimiter.set( - new TokenBucket( - this::getSearchTaskCompletionCount, - getSettings().getCancellationRatioSearchTask(), - getSettings().getCancellationBurstSearchTask() - ) - ); - - this.searchShardTaskCancellationRateLimiter.set( - new TokenBucket( - timeNanosSupplier, - getSettings().getCancellationRateSearchShardTaskNanos(), - getSettings().getCancellationBurstSearchShardTask() + this.rateLimiters = Map.of( + SearchTask.class, + new AtomicReference<>( + new TokenBucket( + timeNanosSupplier, + getSettings().getSearchTaskSettings().getCancellationRateNanos(), + getSettings().getSearchTaskSettings().getCancellationBurst() + ) + ), + SearchShardTask.class, + new AtomicReference<>( + new TokenBucket( + timeNanosSupplier, + getSettings().getSearchShardTaskSettings().getCancellationRateNanos(), + getSettings().getSearchShardTaskSettings().getCancellationBurst() + ) ) ); - this.searchShardTaskCancellationRatioLimiter.set( - new TokenBucket( - this::getSearchShardTaskCompletionCount, - getSettings().getCancellationRatioSearchShardTask(), - getSettings().getCancellationBurstSearchShardTask() + this.ratioLimiters = Map.of( + SearchTask.class, + new AtomicReference<>( + new TokenBucket( + this::getSearchTaskCompletionCount, + getSettings().getSearchTaskSettings().getCancellationRatio(), + getSettings().getSearchTaskSettings().getCancellationBurst() + ) + ), + SearchShardTask.class, + new AtomicReference<>( + new TokenBucket( + this::getSearchShardTaskCompletionCount, + getSettings().getSearchShardTaskSettings().getCancellationRatio(), + getSettings().getSearchShardTaskSettings().getCancellationBurst() + ) ) ); } @@ -233,11 +243,11 @@ void doRun() { // Independently remove tokens from both token buckets. boolean rateLimitReached = isSearchTask - ? searchTaskCancellationRateLimiter.get().request() == false - : searchShardTaskCancellationRateLimiter.get().request() == false; + ? rateLimiters.get(SearchTask.class).get().request() == false + : rateLimiters.get(SearchShardTask.class).get().request() == false; boolean ratioLimitReached = isSearchTask - ? searchTaskCancellationRatioLimiter.get().request() == false - : searchShardTaskCancellationRatioLimiter.get().request() == false; + ? ratioLimiters.get(SearchTask.class).get().request() == false + : ratioLimiters.get(SearchShardTask.class).get().request() == false; // Stop cancelling tasks if there are no tokens in either of the two token buckets. if (rateLimitReached && ratioLimitReached) { @@ -380,24 +390,26 @@ public void onTaskCompleted(Task task) { @Override public void onCancellationRatioSearchTaskChanged() { - searchTaskCancellationRatioLimiter.set( - new TokenBucket( - this::getSearchTaskCompletionCount, - getSettings().getCancellationRatioSearchTask(), - getSettings().getCancellationBurstSearchTask() - ) - ); + ratioLimiters.get(SearchTask.class) + .set( + new TokenBucket( + this::getSearchTaskCompletionCount, + getSettings().getSearchTaskSettings().getCancellationRatio(), + getSettings().getSearchTaskSettings().getCancellationBurst() + ) + ); } @Override public void onCancellationRateSearchTaskChanged() { - searchTaskCancellationRateLimiter.set( - new TokenBucket( - timeNanosSupplier, - getSettings().getCancellationRateSearchTaskNanos(), - getSettings().getCancellationBurstSearchTask() - ) - ); + rateLimiters.get(SearchTask.class) + .set( + new TokenBucket( + timeNanosSupplier, + getSettings().getSearchTaskSettings().getCancellationRateNanos(), + getSettings().getSearchTaskSettings().getCancellationBurst() + ) + ); } @Override @@ -408,24 +420,26 @@ public void onCancellationBurstSearchTaskChanged() { @Override public void onCancellationRatioSearchShardTaskChanged() { - searchShardTaskCancellationRatioLimiter.set( - new TokenBucket( - this::getSearchShardTaskCompletionCount, - getSettings().getCancellationRatioSearchShardTask(), - getSettings().getCancellationBurstSearchShardTask() - ) - ); + ratioLimiters.get(SearchShardTask.class) + .set( + new TokenBucket( + this::getSearchShardTaskCompletionCount, + getSettings().getSearchShardTaskSettings().getCancellationRatio(), + getSettings().getSearchShardTaskSettings().getCancellationBurst() + ) + ); } @Override public void onCancellationRateSearchShardTaskChanged() { - searchShardTaskCancellationRateLimiter.set( - new TokenBucket( - timeNanosSupplier, - getSettings().getCancellationRateSearchShardTaskNanos(), - getSettings().getCancellationBurstSearchShardTask() - ) - ); + rateLimiters.get(SearchShardTask.class) + .set( + new TokenBucket( + timeNanosSupplier, + getSettings().getSearchShardTaskSettings().getCancellationRateNanos(), + getSettings().getSearchShardTaskSettings().getCancellationBurst() + ) + ); } @Override diff --git a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchBackpressureSettings.java b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchBackpressureSettings.java index e0cd4efd43aac..13287d04886c1 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchBackpressureSettings.java +++ b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchBackpressureSettings.java @@ -8,19 +8,13 @@ package org.opensearch.search.backpressure.settings; -import org.opensearch.ExceptionsHelper; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; -import java.util.ArrayList; -import java.util.List; -import java.util.concurrent.TimeUnit; -import java.util.function.Consumer; - /** - * Settings related to search backpressure and cancellation of in-flight requests. + * Settings related to search backpressure mode and internal * * @opensearch.internal */ @@ -28,14 +22,6 @@ public class SearchBackpressureSettings { private static class Defaults { private static final long INTERVAL_MILLIS = 1000; private static final String MODE = "monitor_only"; - - // TODO: decide on default settings for SearchTask - private static final double CANCELLATION_RATIO_SEARCH_TASK = 0.1; - private static final double CANCELLATION_RATE_SEARCH_TASK = 0.003; - private static final double CANCELLATION_BURST_SEARCH_TASK = 10.0; - private static final double CANCELLATION_RATIO_SEARCH_SHARD_TASK = 0.1; - private static final double CANCELLATION_RATE_SEARCH_SHARD_TASK = 0.003; - private static final double CANCELLATION_BURST_SEARCH_SHARD_TASK = 10.0; } /** @@ -60,102 +46,6 @@ private static class Defaults { Setting.Property.NodeScope ); - /** - * Defines the percentage of SearchTasks to cancel relative to the number of successful SearchTask completions. - * In other words, it is the number of tokens added to the bucket on each successful SearchTask completion. - */ - private volatile double cancellationRatioSearchTask; - public static final Setting SETTING_CANCELLATION_RATIO_SEARCH_TASK = Setting.doubleSetting( - "search_backpressure.cancellation_ratio_search_task", - Defaults.CANCELLATION_RATIO_SEARCH_TASK, - 0.0, - 1.0, - Setting.Property.Dynamic, - Setting.Property.NodeScope - ); - - /** - * Defines the number of tasks to cancel per unit time (in millis). - * In other words, it is the number of tokens added to the bucket each millisecond. - */ - private volatile double cancellationRateSearchTask; - public static final Setting SETTING_CANCELLATION_RATE_SEARCH_TASK = Setting.doubleSetting( - "search_backpressure.cancellation_rate_search_task", - Defaults.CANCELLATION_RATE_SEARCH_TASK, - 0.0, - Setting.Property.Dynamic, - Setting.Property.NodeScope - ); - - /** - * Defines the maximum number of tasks that can be cancelled before being rate-limited. - */ - private volatile double cancellationBurstSearchTask; - public static final Setting SETTING_CANCELLATION_BURST_SEARCH_TASK = Setting.doubleSetting( - "search_backpressure.cancellation_burst_search_task", - Defaults.CANCELLATION_BURST_SEARCH_TASK, - 1.0, - Setting.Property.Dynamic, - Setting.Property.NodeScope - ); - - /** - * Defines the percentage of tasks to cancel relative to the number of successful task completions. - * In other words, it is the number of tokens added to the bucket on each successful task completion. - */ - private volatile double cancellationRatioSearchShardTask; - public static final Setting SETTING_CANCELLATION_RATIO_SEARCH_SHARD_TASK = Setting.doubleSetting( - "search_backpressure.cancellation_ratio_search_shard_task", - Defaults.CANCELLATION_RATIO_SEARCH_SHARD_TASK, - 0.0, - 1.0, - Setting.Property.Dynamic, - Setting.Property.NodeScope - ); - - /** - * Defines the number of tasks to cancel per unit time (in millis). - * In other words, it is the number of tokens added to the bucket each millisecond. - */ - private volatile double cancellationRateSearchShardTask; - public static final Setting SETTING_CANCELLATION_RATE_SEARCH_SHARD_TASK = Setting.doubleSetting( - "search_backpressure.cancellation_rate_search_shard_task", - Defaults.CANCELLATION_RATE_SEARCH_SHARD_TASK, - 0.0, - Setting.Property.Dynamic, - Setting.Property.NodeScope - ); - - /** - * Defines the maximum number of tasks that can be cancelled before being rate-limited. - */ - private volatile double cancellationBurstSearchShardTask; - public static final Setting SETTING_CANCELLATION_BURST_SEARCH_SHARD_TASK = Setting.doubleSetting( - "search_backpressure.cancellation_burst_search_shard_task", - Defaults.CANCELLATION_BURST_SEARCH_SHARD_TASK, - 1.0, - Setting.Property.Dynamic, - Setting.Property.NodeScope - ); - - /** - * Callback listeners. - */ - public interface Listener { - void onCancellationRatioSearchTaskChanged(); - - void onCancellationRateSearchTaskChanged(); - - void onCancellationBurstSearchTaskChanged(); - - void onCancellationRatioSearchShardTaskChanged(); - - void onCancellationRateSearchShardTaskChanged(); - - void onCancellationBurstSearchShardTaskChanged(); - } - - private final List listeners = new ArrayList<>(); private final Settings settings; private final ClusterSettings clusterSettings; private final NodeDuressSettings nodeDuressSettings; @@ -173,28 +63,6 @@ public SearchBackpressureSettings(Settings settings, ClusterSettings clusterSett mode = SearchBackpressureMode.fromName(SETTING_MODE.get(settings)); clusterSettings.addSettingsUpdateConsumer(SETTING_MODE, s -> this.setMode(SearchBackpressureMode.fromName(s))); - - cancellationRatioSearchTask = SETTING_CANCELLATION_RATIO_SEARCH_TASK.get(settings); - clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_RATIO_SEARCH_TASK, this::setCancellationRatioSearchTask); - - cancellationRateSearchTask = SETTING_CANCELLATION_RATE_SEARCH_TASK.get(settings); - clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_RATE_SEARCH_TASK, this::setCancellationRateSearchTask); - - cancellationBurstSearchTask = SETTING_CANCELLATION_BURST_SEARCH_TASK.get(settings); - clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_BURST_SEARCH_TASK, this::setCancellationBurstSearchTask); - - cancellationRatioSearchShardTask = SETTING_CANCELLATION_RATIO_SEARCH_SHARD_TASK.get(settings); - clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_RATIO_SEARCH_SHARD_TASK, this::setCancellationRatioSearchShardTask); - - cancellationRateSearchShardTask = SETTING_CANCELLATION_RATE_SEARCH_SHARD_TASK.get(settings); - clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_RATE_SEARCH_SHARD_TASK, this::setCancellationRateSearchShardTask); - - cancellationBurstSearchShardTask = SETTING_CANCELLATION_BURST_SEARCH_SHARD_TASK.get(settings); - clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_BURST_SEARCH_SHARD_TASK, this::setCancellationBurstSearchShardTask); - } - - public void addListener(Listener listener) { - listeners.add(listener); } public Settings getSettings() { @@ -228,80 +96,4 @@ public SearchBackpressureMode getMode() { public void setMode(SearchBackpressureMode mode) { this.mode = mode; } - - public double getCancellationRatioSearchTask() { - return cancellationRatioSearchTask; - } - - private void setCancellationRatioSearchTask(double cancellationRatioSearchTask) { - this.cancellationRatioSearchTask = cancellationRatioSearchTask; - notifyListeners(Listener::onCancellationRatioSearchTaskChanged); - } - - public double getCancellationRateSearchTask() { - return cancellationRateSearchTask; - } - - public double getCancellationRateSearchTaskNanos() { - return getCancellationRateSearchTask() / TimeUnit.MILLISECONDS.toNanos(1); // rate per nanoseconds - } - - private void setCancellationRateSearchTask(double cancellationRateSearchTask) { - this.cancellationRateSearchTask = cancellationRateSearchTask; - notifyListeners(Listener::onCancellationRateSearchTaskChanged); - } - - public double getCancellationBurstSearchTask() { - return cancellationBurstSearchTask; - } - - private void setCancellationBurstSearchTask(double cancellationBurstSearchTask) { - this.cancellationBurstSearchTask = cancellationBurstSearchTask; - notifyListeners(Listener::onCancellationBurstSearchTaskChanged); - } - - public double getCancellationRatioSearchShardTask() { - return cancellationRatioSearchShardTask; - } - - private void setCancellationRatioSearchShardTask(double cancellationRatioSearchShardTask) { - this.cancellationRatioSearchShardTask = cancellationRatioSearchShardTask; - notifyListeners(Listener::onCancellationRatioSearchShardTaskChanged); - } - - public double getCancellationRateSearchShardTask() { - return cancellationRateSearchShardTask; - } - - public double getCancellationRateSearchShardTaskNanos() { - return getCancellationRateSearchShardTask() / TimeUnit.MILLISECONDS.toNanos(1); // rate per nanoseconds - } - - private void setCancellationRateSearchShardTask(double cancellationRateSearchShardTask) { - this.cancellationRateSearchShardTask = cancellationRateSearchShardTask; - notifyListeners(Listener::onCancellationRateSearchShardTaskChanged); - } - - public double getCancellationBurstSearchShardTask() { - return cancellationBurstSearchShardTask; - } - - private void setCancellationBurstSearchShardTask(double cancellationBurstSearchShardTask) { - this.cancellationBurstSearchShardTask = cancellationBurstSearchShardTask; - notifyListeners(Listener::onCancellationBurstSearchShardTaskChanged); - } - - private void notifyListeners(Consumer consumer) { - List exceptions = new ArrayList<>(); - - for (Listener listener : listeners) { - try { - consumer.accept(listener); - } catch (Exception e) { - exceptions.add(e); - } - } - - ExceptionsHelper.maybeThrowRuntimeAndSuppress(exceptions); - } } diff --git a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java index 98599e9478a29..1cfb2a5a350f5 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java +++ b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java @@ -8,12 +8,16 @@ package org.opensearch.search.backpressure.settings; +import org.opensearch.ExceptionsHelper; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; import org.opensearch.monitor.jvm.JvmStats; +import java.util.ArrayList; +import java.util.List; import java.util.concurrent.TimeUnit; +import java.util.function.Consumer; /** * Defines the settings related to the cancellation of SearchShardTasks. @@ -22,8 +26,12 @@ */ public class SearchShardTaskSettings { private static final long HEAP_SIZE_BYTES = JvmStats.jvmStats().getMem().getHeapMax().getBytes(); + private final List listeners = new ArrayList<>(); private static class Defaults { + private static final double CANCELLATION_RATIO = 0.1; + private static final double CANCELLATION_RATE = 0.003; + private static final double CANCELLATION_BURST = 10.0; private static final double TOTAL_HEAP_PERCENT_THRESHOLD = 0.05; private static final long CPU_TIME_MILLIS_THRESHOLD = 15000; private static final long ELAPSED_TIME_MILLIS_THRESHOLD = 30000; @@ -32,6 +40,45 @@ private static class Defaults { private static final int HEAP_MOVING_AVERAGE_WINDOW_SIZE = 100; } + /** + * Defines the percentage of SearchShardTasks to cancel relative to the number of successful SearchShardTasks completions. + * In other words, it is the number of tokens added to the bucket on each successful SearchShardTask completion. + */ + private volatile double cancellationRatio; + public static final Setting SETTING_CANCELLATION_RATIO = Setting.doubleSetting( + "search_backpressure.search_shard_task.cancellation_ratio", + Defaults.CANCELLATION_RATIO, + 0.0, + 1.0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Defines the number of SearchShardTasks to cancel per unit time (in millis). + * In other words, it is the number of tokens added to the bucket each millisecond. + */ + private volatile double cancellationRate; + public static final Setting SETTING_CANCELLATION_RATE = Setting.doubleSetting( + "search_backpressure.search_shard_task.cancellation_rate", + Defaults.CANCELLATION_RATE, + 0.0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Defines the maximum number of SearchShardTasks that can be cancelled before being rate-limited. + */ + private volatile double cancellationBurst; + public static final Setting SETTING_CANCELLATION_BURST = Setting.doubleSetting( + "search_backpressure.search_shard_task.cancellation_burst", + Defaults.CANCELLATION_BURST, + 1.0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + /** * Defines the heap usage threshold (in percentage) for the sum of heap usages across all search shard tasks * before in-flight cancellation is applied. @@ -112,15 +159,33 @@ public SearchShardTaskSettings(Settings settings, ClusterSettings clusterSetting totalHeapPercentThreshold = SETTING_TOTAL_HEAP_PERCENT_THRESHOLD.get(settings); this.cpuTimeMillisThreshold = SETTING_CPU_TIME_MILLIS_THRESHOLD.get(settings); this.elapsedTimeMillisThreshold = SETTING_ELAPSED_TIME_MILLIS_THRESHOLD.get(settings); - heapPercentThreshold = SETTING_HEAP_PERCENT_THRESHOLD.get(settings); - heapVarianceThreshold = SETTING_HEAP_VARIANCE_THRESHOLD.get(settings); - heapMovingAverageWindowSize = SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE.get(settings); + this.heapPercentThreshold = SETTING_HEAP_PERCENT_THRESHOLD.get(settings); + this.heapVarianceThreshold = SETTING_HEAP_VARIANCE_THRESHOLD.get(settings); + this.heapMovingAverageWindowSize = SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE.get(settings); + this.cancellationRatio = SETTING_CANCELLATION_RATIO.get(settings); + this.cancellationRate = SETTING_CANCELLATION_RATE.get(settings); + this.cancellationBurst = SETTING_CANCELLATION_BURST.get(settings); + clusterSettings.addSettingsUpdateConsumer(SETTING_TOTAL_HEAP_PERCENT_THRESHOLD, this::setTotalHeapPercentThreshold); clusterSettings.addSettingsUpdateConsumer(SETTING_CPU_TIME_MILLIS_THRESHOLD, this::setCpuTimeMillisThreshold); clusterSettings.addSettingsUpdateConsumer(SETTING_ELAPSED_TIME_MILLIS_THRESHOLD, this::setElapsedTimeMillisThreshold); clusterSettings.addSettingsUpdateConsumer(SETTING_HEAP_PERCENT_THRESHOLD, this::setHeapPercentThreshold); clusterSettings.addSettingsUpdateConsumer(SETTING_HEAP_VARIANCE_THRESHOLD, this::setHeapVarianceThreshold); clusterSettings.addSettingsUpdateConsumer(SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE, this::setHeapMovingAverageWindowSize); + clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_RATIO, this::setCancellationRatio); + clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_RATE, this::setCancellationRate); + clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_BURST, this::setCancellationBurst); + } + + /** + * Callback listeners. + */ + public interface Listener { + void onCancellationRatioSearchShardTaskChanged(); + + void onCancellationRateSearchShardTaskChanged(); + + void onCancellationBurstSearchShardTaskChanged(); } public double getTotalHeapPercentThreshold() { @@ -174,4 +239,53 @@ public void setHeapVarianceThreshold(double heapVarianceThreshold) { public void setHeapMovingAverageWindowSize(int heapMovingAverageWindowSize) { this.heapMovingAverageWindowSize = heapMovingAverageWindowSize; } + + public double getCancellationRatio() { + return cancellationRatio; + } + + private void setCancellationRatio(double cancellationRatio) { + this.cancellationRatio = cancellationRatio; + notifyListeners(Listener::onCancellationRatioSearchShardTaskChanged); + } + + public double getCancellationRate() { + return cancellationRate; + } + + public double getCancellationRateNanos() { + return getCancellationRate() / TimeUnit.MILLISECONDS.toNanos(1); // rate per nanoseconds + } + + private void setCancellationRate(double cancellationRate) { + this.cancellationRate = cancellationRate; + notifyListeners(Listener::onCancellationRateSearchShardTaskChanged); + } + + public double getCancellationBurst() { + return cancellationBurst; + } + + private void setCancellationBurst(double cancellationBurst) { + this.cancellationBurst = cancellationBurst; + notifyListeners(Listener::onCancellationBurstSearchShardTaskChanged); + } + + public void addListener(Listener listener) { + listeners.add(listener); + } + + private void notifyListeners(Consumer consumer) { + List exceptions = new ArrayList<>(); + + for (Listener listener : listeners) { + try { + consumer.accept(listener); + } catch (Exception e) { + exceptions.add(e); + } + } + + ExceptionsHelper.maybeThrowRuntimeAndSuppress(exceptions); + } } diff --git a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java index 3b89ba7f3492d..af216f84d790e 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java +++ b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java @@ -8,13 +8,16 @@ package org.opensearch.search.backpressure.settings; -import org.apache.logging.log4j.LogManager; +import org.opensearch.ExceptionsHelper; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; import org.opensearch.monitor.jvm.JvmStats; +import java.util.ArrayList; +import java.util.List; import java.util.concurrent.TimeUnit; +import java.util.function.Consumer; /** * Defines the settings related to the cancellation of SearchTasks. @@ -24,8 +27,13 @@ public class SearchTaskSettings { private static final long HEAP_SIZE_BYTES = JvmStats.jvmStats().getMem().getHeapMax().getBytes(); + private final List listeners = new ArrayList<>(); private static class Defaults { + // TODO: decide on default settings for SearchTask + private static final double CANCELLATION_RATIO = 0.1; + private static final double CANCELLATION_RATE = 0.003; + private static final double CANCELLATION_BURST = 10.0; private static final double TOTAL_HEAP_PERCENT_THRESHOLD = 0.05; private static final long CPU_TIME_MILLIS_THRESHOLD = 60000; private static final long ELAPSED_TIME_MILLIS_THRESHOLD = 120000; @@ -34,6 +42,45 @@ private static class Defaults { private static final int HEAP_MOVING_AVERAGE_WINDOW_SIZE = 100; } + /** + * Defines the percentage of SearchTasks to cancel relative to the number of successful SearchTask completions. + * In other words, it is the number of tokens added to the bucket on each successful SearchTask completion. + */ + private volatile double cancellationRatio; + public static final Setting SETTING_CANCELLATION_RATIO = Setting.doubleSetting( + "search_backpressure.search_task.cancellation_ratio", + Defaults.CANCELLATION_RATIO, + 0.0, + 1.0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Defines the number of SearchTasks to cancel per unit time (in millis). + * In other words, it is the number of tokens added to the bucket each millisecond. + */ + private volatile double cancellationRate; + public static final Setting SETTING_CANCELLATION_RATE = Setting.doubleSetting( + "search_backpressure.search_task.cancellation_rate", + Defaults.CANCELLATION_RATE, + 0.0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Defines the maximum number of SearchTasks that can be cancelled before being rate-limited. + */ + private volatile double cancellationBurst; + public static final Setting SETTING_CANCELLATION_BURST = Setting.doubleSetting( + "search_backpressure.search_task.cancellation_burst", + Defaults.CANCELLATION_BURST, + 1.0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + /** * Defines the heap usage threshold (in percentage) for the sum of heap usages across all search tasks * before in-flight cancellation is applied. @@ -117,12 +164,30 @@ public SearchTaskSettings(Settings settings, ClusterSettings clusterSettings) { this.heapPercentThreshold = SETTING_HEAP_PERCENT_THRESHOLD.get(settings); this.heapVarianceThreshold = SETTING_HEAP_VARIANCE_THRESHOLD.get(settings); this.heapMovingAverageWindowSize = SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE.get(settings); + this.cancellationRatio = SETTING_CANCELLATION_RATIO.get(settings); + this.cancellationRate = SETTING_CANCELLATION_RATE.get(settings); + this.cancellationBurst = SETTING_CANCELLATION_BURST.get(settings); + clusterSettings.addSettingsUpdateConsumer(SETTING_TOTAL_HEAP_PERCENT_THRESHOLD, this::setTotalHeapPercentThreshold); clusterSettings.addSettingsUpdateConsumer(SETTING_CPU_TIME_MILLIS_THRESHOLD, this::setCpuTimeMillisThreshold); clusterSettings.addSettingsUpdateConsumer(SETTING_ELAPSED_TIME_MILLIS_THRESHOLD, this::setElapsedTimeMillisThreshold); clusterSettings.addSettingsUpdateConsumer(SETTING_HEAP_PERCENT_THRESHOLD, this::setHeapPercentThreshold); clusterSettings.addSettingsUpdateConsumer(SETTING_HEAP_VARIANCE_THRESHOLD, this::setHeapVarianceThreshold); clusterSettings.addSettingsUpdateConsumer(SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE, this::setHeapMovingAverageWindowSize); + clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_RATIO, this::setCancellationRatio); + clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_RATE, this::setCancellationRate); + clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_BURST, this::setCancellationBurst); + } + + /** + * Callback listeners. + */ + public interface Listener { + void onCancellationRatioSearchTaskChanged(); + + void onCancellationRateSearchTaskChanged(); + + void onCancellationBurstSearchTaskChanged(); } public double getTotalHeapPercentThreshold() { @@ -158,7 +223,6 @@ public void setTotalHeapPercentThreshold(double totalHeapPercentThreshold) { } public void setCpuTimeMillisThreshold(long cpuTimeMillisThreshold) { - LogManager.getLogger(SearchTaskSettings.class).info("setCpuTimeMillisThreshold " + cpuTimeMillisThreshold); this.cpuTimeMillisThreshold = cpuTimeMillisThreshold; } @@ -177,4 +241,53 @@ public void setHeapVarianceThreshold(double heapVarianceThreshold) { public void setHeapMovingAverageWindowSize(int heapMovingAverageWindowSize) { this.heapMovingAverageWindowSize = heapMovingAverageWindowSize; } + + public double getCancellationRatio() { + return cancellationRatio; + } + + private void setCancellationRatio(double cancellationRatio) { + this.cancellationRatio = cancellationRatio; + notifyListeners(Listener::onCancellationRatioSearchTaskChanged); + } + + public double getCancellationRate() { + return cancellationRate; + } + + public double getCancellationRateNanos() { + return getCancellationRate() / TimeUnit.MILLISECONDS.toNanos(1); // rate per nanoseconds + } + + private void setCancellationRate(double cancellationRate) { + this.cancellationRate = cancellationRate; + notifyListeners(Listener::onCancellationRateSearchTaskChanged); + } + + public double getCancellationBurst() { + return cancellationBurst; + } + + private void setCancellationBurst(double cancellationBurst) { + this.cancellationBurst = cancellationBurst; + notifyListeners(Listener::onCancellationBurstSearchTaskChanged); + } + + public void addListener(Listener listener) { + listeners.add(listener); + } + + private void notifyListeners(Consumer consumer) { + List exceptions = new ArrayList<>(); + + for (Listener listener : listeners) { + try { + consumer.accept(listener); + } catch (Exception e) { + exceptions.add(e); + } + } + + ExceptionsHelper.maybeThrowRuntimeAndSuppress(exceptions); + } } diff --git a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java index bd5f24ef0dbee..2d2eba16aa7a5 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java +++ b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java @@ -8,7 +8,6 @@ package org.opensearch.search.backpressure.stats; -import org.opensearch.Version; import org.opensearch.common.io.stream.StreamInput; import org.opensearch.common.io.stream.StreamOutput; import org.opensearch.common.io.stream.Writeable; @@ -40,11 +39,7 @@ public SearchBackpressureStats( public SearchBackpressureStats(StreamInput in) throws IOException { searchShardTaskStats = new SearchBackpressureTaskStats(in); mode = SearchBackpressureMode.fromName(in.readString()); - if (in.getVersion().onOrAfter(Version.V_3_0_0)) { - searchTaskStats = new SearchBackpressureTaskStats(in); - } else { - searchTaskStats = null; - } + searchTaskStats = in.readOptionalWriteable(SearchBackpressureTaskStats::new); } @Override @@ -60,9 +55,8 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws public void writeTo(StreamOutput out) throws IOException { searchShardTaskStats.writeTo(out); out.writeString(mode.getName()); - if (Version.CURRENT.onOrAfter(Version.V_3_0_0) && out.getVersion().onOrAfter(Version.V_3_0_0)) { - searchTaskStats.writeTo(out); - } + // searchTaskStats.writeTo(out); + out.writeOptionalWriteable(searchTaskStats); } @Override @@ -70,9 +64,9 @@ public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; SearchBackpressureStats that = (SearchBackpressureStats) o; - return (Version.CURRENT.onOrAfter(Version.V_3_0_0) - && searchTaskStats.equals(that.searchTaskStats) - && searchShardTaskStats.equals(that.searchShardTaskStats)) && mode == that.mode; + return mode == that.mode + && Objects.equals(searchTaskStats, that.searchTaskStats) + && Objects.equals(searchShardTaskStats, that.searchShardTaskStats); } @Override diff --git a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureTaskStats.java b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureTaskStats.java index f6925c4c6bc8c..5d7bb31ae2fbb 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureTaskStats.java +++ b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureTaskStats.java @@ -88,22 +88,7 @@ public boolean equals(Object o) { SearchBackpressureTaskStats that = (SearchBackpressureTaskStats) o; return cancellationCount == that.cancellationCount && limitReachedCount == that.limitReachedCount - && compareMaps(resourceUsageTrackerStats, that.resourceUsageTrackerStats); - } - - private boolean compareMaps( - Map trackers1, - Map trackers2 - ) { - if (trackers1.size() != trackers2.size()) { - return false; - } - for (Map.Entry e1 : trackers1.entrySet()) { - if (trackers2.containsKey(e1.getKey()) == false || trackers2.get(e1.getKey()).equals(e1.getValue()) == false) { - return false; - } - } - return true; + && resourceUsageTrackerStats.equals(that.resourceUsageTrackerStats); } @Override diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/CpuUsageTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/CpuUsageTracker.java index 5215a17a61a8c..fb4cd342de25b 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/CpuUsageTracker.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/CpuUsageTracker.java @@ -64,9 +64,9 @@ public Optional checkAndMaybeGetCancellationReason(Task } @Override - public TaskResourceUsageTracker.Stats stats(List tasks) { - long currentMax = tasks.stream().mapToLong(t -> t.getTotalResourceStats().getCpuTimeInNanos()).max().orElse(0); - long currentAvg = (long) tasks.stream().mapToLong(t -> t.getTotalResourceStats().getCpuTimeInNanos()).average().orElse(0); + public TaskResourceUsageTracker.Stats stats(List activeTasks) { + long currentMax = activeTasks.stream().mapToLong(t -> t.getTotalResourceStats().getCpuTimeInNanos()).max().orElse(0); + long currentAvg = (long) activeTasks.stream().mapToLong(t -> t.getTotalResourceStats().getCpuTimeInNanos()).average().orElse(0); return new Stats(getCancellations(), currentMax, currentAvg); } diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTracker.java index d1700861476d6..1175d68fb8550 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTracker.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTracker.java @@ -65,10 +65,10 @@ public Optional checkAndMaybeGetCancellationReason(Task } @Override - public TaskResourceUsageTracker.Stats stats(List tasks) { + public TaskResourceUsageTracker.Stats stats(List activeTasks) { long now = timeNanosSupplier.getAsLong(); - long currentMax = tasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).max().orElse(0); - long currentAvg = (long) tasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).average().orElse(0); + long currentMax = activeTasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).max().orElse(0); + long currentAvg = (long) activeTasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).average().orElse(0); return new Stats(getCancellations(), currentMax, currentAvg); } diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java index 915b6ed60f685..15058bf8fe156 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java @@ -11,12 +11,10 @@ import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.io.stream.StreamInput; import org.opensearch.common.io.stream.StreamOutput; +import org.opensearch.common.settings.Setting; import org.opensearch.common.unit.ByteSizeValue; import org.opensearch.common.util.MovingAverage; -import org.opensearch.monitor.jvm.JvmStats; import org.opensearch.common.xcontent.XContentBuilder; -import org.opensearch.search.backpressure.settings.SearchShardTaskSettings; -import org.opensearch.search.backpressure.settings.SearchTaskSettings; import org.opensearch.tasks.Task; import org.opensearch.tasks.TaskCancellation; @@ -26,7 +24,6 @@ import java.util.Optional; import java.util.concurrent.atomic.AtomicReference; import java.util.function.DoubleSupplier; -import java.util.function.IntSupplier; import java.util.function.LongSupplier; import static org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER; @@ -38,26 +35,21 @@ * @opensearch.internal */ public class HeapUsageTracker extends TaskResourceUsageTracker { - private static final long HEAP_SIZE_BYTES = JvmStats.jvmStats().getMem().getHeapMax().getBytes(); private final DoubleSupplier heapVarianceSupplier; private final LongSupplier heapBytesThresholdSupplier; - private final IntSupplier windowSizeSupplier; private final AtomicReference movingAverageReference; public HeapUsageTracker( DoubleSupplier heapVarianceSupplier, LongSupplier heapBytesThresholdSupplier, - IntSupplier windowSizeSupplier, - ClusterSettings clusterSettings + int heapMovingAverageWindowSize, + ClusterSettings clusterSettings, + Setting windowSizeSetting ) { this.heapVarianceSupplier = heapVarianceSupplier; this.heapBytesThresholdSupplier = heapBytesThresholdSupplier; - this.windowSizeSupplier = windowSizeSupplier; - this.movingAverageReference = new AtomicReference<>(new MovingAverage(windowSizeSupplier.getAsInt())); - // TODO: find a way to get the type of the setting SearchTaskSettings/SearchShardTaskSettings and then add consumer only for the - // required setting - clusterSettings.addSettingsUpdateConsumer(SearchTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE, this::updateWindowSize); - clusterSettings.addSettingsUpdateConsumer(SearchShardTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE, this::updateWindowSize); + this.movingAverageReference = new AtomicReference<>(new MovingAverage(heapMovingAverageWindowSize)); + clusterSettings.addSettingsUpdateConsumer(windowSizeSetting, this::updateWindowSize); } @Override @@ -102,9 +94,9 @@ private void updateWindowSize(int heapMovingAverageWindowSize) { } @Override - public TaskResourceUsageTracker.Stats stats(List tasks) { - long currentMax = tasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).max().orElse(0); - long currentAvg = (long) tasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).average().orElse(0); + public TaskResourceUsageTracker.Stats stats(List activeTasks) { + long currentMax = activeTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).max().orElse(0); + long currentAvg = (long) activeTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).average().orElse(0); return new Stats(getCancellations(), currentMax, currentAvg, (long) movingAverageReference.get().getAverage()); } diff --git a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java index d2dce8731d141..1b61ecb9e0341 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java @@ -314,12 +314,7 @@ public void testSearchShardTaskInFlightCancellation() { private SearchBackpressureSettings getBackpressureSettings(String mode, double ratio, double rate, double burst) { return spy( new SearchBackpressureSettings( - Settings.builder() - .put(SearchBackpressureSettings.SETTING_MODE.getKey(), mode) - .put(SearchBackpressureSettings.SETTING_CANCELLATION_RATIO_SEARCH_SHARD_TASK.getKey(), ratio) - .put(SearchBackpressureSettings.SETTING_CANCELLATION_RATE_SEARCH_TASK.getKey(), rate) - .put(SearchBackpressureSettings.SETTING_CANCELLATION_BURST_SEARCH_TASK.getKey(), burst) - .build(), + Settings.builder().put(SearchBackpressureSettings.SETTING_MODE.getKey(), mode).build(), new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS) ) ); diff --git a/server/src/test/java/org/opensearch/search/backpressure/trackers/HeapUsageTrackerTests.java b/server/src/test/java/org/opensearch/search/backpressure/trackers/HeapUsageTrackerTests.java index aa9bd39fb3451..4af23be03f9f2 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/trackers/HeapUsageTrackerTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/trackers/HeapUsageTrackerTests.java @@ -49,8 +49,9 @@ public void testSearchTaskEligibleForCancellation() { new HeapUsageTracker( mockSearchTaskSettings::getHeapVarianceThreshold, mockSearchTaskSettings::getHeapBytesThreshold, - mockSearchTaskSettings::getHeapMovingAverageWindowSize, - mockSettings.getClusterSettings() + mockSearchTaskSettings.getHeapMovingAverageWindowSize(), + mockSettings.getClusterSettings(), + SearchTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE ) ); Task task = createMockTaskWithResourceStats(SearchTask.class, 1, 50); @@ -77,8 +78,9 @@ public void testSearchShardTaskEligibleForCancellation() { new HeapUsageTracker( mockSearchShardTaskSettings::getHeapVarianceThreshold, mockSearchShardTaskSettings::getHeapBytesThreshold, - mockSearchShardTaskSettings::getHeapMovingAverageWindowSize, - mockSettings.getClusterSettings() + mockSearchShardTaskSettings.getHeapMovingAverageWindowSize(), + mockSettings.getClusterSettings(), + SearchShardTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE ) ); Task task = createMockTaskWithResourceStats(SearchShardTask.class, 1, 50); @@ -107,8 +109,9 @@ public void testNotEligibleForCancellation() { new HeapUsageTracker( mockSearchShardTaskSettings::getHeapVarianceThreshold, mockSearchShardTaskSettings::getHeapBytesThreshold, - mockSearchShardTaskSettings::getHeapMovingAverageWindowSize, - mockSettings.getClusterSettings() + mockSearchShardTaskSettings.getHeapMovingAverageWindowSize(), + mockSettings.getClusterSettings(), + SearchShardTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE ) ); From 53609a739876f5d1a688534882923c4a139581b2 Mon Sep 17 00:00:00 2001 From: PritLadani Date: Thu, 19 Jan 2023 16:26:16 +0530 Subject: [PATCH 10/34] Separating SearchTaskStats and SearchShardTaskStats Signed-off-by: PritLadani --- .../SearchBackpressureService.java | 7 +-- .../stats/SearchBackpressureStats.java | 12 ++--- .../stats/SearchBackpressureTaskStats.java | 4 +- .../stats/SearchShardTaskStats.java | 35 +++++++++++++++ .../backpressure/stats/SearchTaskStats.java | 35 +++++++++++++++ .../SearchBackpressureServiceTests.java | 11 ++--- .../stats/SearchBackpressureStatsTests.java | 4 +- .../stats/SearchShardTaskStatsTests.java | 44 ++++++++++++++++++ .../stats/SearchTaskStatsTests.java | 45 +++++++++++++++++++ 9 files changed, 180 insertions(+), 17 deletions(-) create mode 100644 server/src/main/java/org/opensearch/search/backpressure/stats/SearchShardTaskStats.java create mode 100644 server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java create mode 100644 server/src/test/java/org/opensearch/search/backpressure/stats/SearchShardTaskStatsTests.java create mode 100644 server/src/test/java/org/opensearch/search/backpressure/stats/SearchTaskStatsTests.java diff --git a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java index 7efff0fd5ffdb..c3a36b6ac613a 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java +++ b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java @@ -22,7 +22,8 @@ import org.opensearch.search.backpressure.settings.SearchShardTaskSettings; import org.opensearch.search.backpressure.settings.SearchTaskSettings; import org.opensearch.search.backpressure.stats.SearchBackpressureStats; -import org.opensearch.search.backpressure.stats.SearchBackpressureTaskStats; +import org.opensearch.search.backpressure.stats.SearchShardTaskStats; +import org.opensearch.search.backpressure.stats.SearchTaskStats; import org.opensearch.search.backpressure.trackers.CpuUsageTracker; import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; import org.opensearch.search.backpressure.trackers.HeapUsageTracker; @@ -472,14 +473,14 @@ protected void doClose() throws IOException {} public SearchBackpressureStats nodeStats() { List searchTasks = getTaskByType(SearchTask.class); List searchShardTasks = getTaskByType(SearchShardTask.class); - SearchBackpressureTaskStats searchTaskStats = new SearchBackpressureTaskStats( + SearchTaskStats searchTaskStats = new SearchTaskStats( searchBackpressureStates.get(SearchTask.class).getCancellationCount(), searchBackpressureStates.get(SearchTask.class).getLimitReachedCount(), searchTaskTrackers.stream() .collect(Collectors.toUnmodifiableMap(t -> TaskResourceUsageTrackerType.fromName(t.name()), t -> t.stats(searchTasks))) ); - SearchBackpressureTaskStats searchShardTaskStats = new SearchBackpressureTaskStats( + SearchShardTaskStats searchShardTaskStats = new SearchShardTaskStats( searchBackpressureStates.get(SearchShardTask.class).getCancellationCount(), searchBackpressureStates.get(SearchShardTask.class).getLimitReachedCount(), searchShardTaskTrackers.stream() diff --git a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java index 2d2eba16aa7a5..756ce79d3a769 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java +++ b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java @@ -22,13 +22,13 @@ * Stats related to search backpressure. */ public class SearchBackpressureStats implements ToXContentFragment, Writeable { - private final SearchBackpressureTaskStats searchTaskStats; - private final SearchBackpressureTaskStats searchShardTaskStats; + private final SearchTaskStats searchTaskStats; + private final SearchShardTaskStats searchShardTaskStats; private final SearchBackpressureMode mode; public SearchBackpressureStats( - SearchBackpressureTaskStats searchTaskStats, - SearchBackpressureTaskStats searchShardTaskStats, + SearchTaskStats searchTaskStats, + SearchShardTaskStats searchShardTaskStats, SearchBackpressureMode mode ) { this.searchTaskStats = searchTaskStats; @@ -37,9 +37,9 @@ public SearchBackpressureStats( } public SearchBackpressureStats(StreamInput in) throws IOException { - searchShardTaskStats = new SearchBackpressureTaskStats(in); + searchShardTaskStats = new SearchShardTaskStats(in); mode = SearchBackpressureMode.fromName(in.readString()); - searchTaskStats = in.readOptionalWriteable(SearchBackpressureTaskStats::new); + searchTaskStats = in.readOptionalWriteable(SearchTaskStats::new); } @Override diff --git a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureTaskStats.java b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureTaskStats.java index 5d7bb31ae2fbb..ce517d831b2eb 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureTaskStats.java +++ b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureTaskStats.java @@ -25,7 +25,9 @@ import java.util.Objects; /** - * Stats related to cancelled search shard tasks. + * Stats related to cancelled SearchBackpressureTasks. + * Since the children of this class has exact same structures, we have extracted the common stats to this class. + * However, in the future, if some task stats does not have this common stats, we can remove this class. */ public class SearchBackpressureTaskStats implements ToXContentObject, Writeable { private final long cancellationCount; diff --git a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchShardTaskStats.java b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchShardTaskStats.java new file mode 100644 index 0000000000000..c0db0d342d02e --- /dev/null +++ b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchShardTaskStats.java @@ -0,0 +1,35 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.backpressure.stats; + +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.search.backpressure.trackers.TaskResourceUsageTracker; +import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType; + +import java.io.IOException; +import java.util.Map; + +/** + * Stats related to cancelled SearchShardTasks. + */ + +public class SearchShardTaskStats extends SearchBackpressureTaskStats { + + public SearchShardTaskStats( + long cancellationCount, + long limitReachedCount, + Map resourceUsageTrackerStats + ) { + super(cancellationCount, limitReachedCount, resourceUsageTrackerStats); + } + + public SearchShardTaskStats(StreamInput in) throws IOException { + super(in); + } +} diff --git a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java new file mode 100644 index 0000000000000..023e97298b6c4 --- /dev/null +++ b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java @@ -0,0 +1,35 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.backpressure.stats; + +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.search.backpressure.trackers.TaskResourceUsageTracker; +import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType; + +import java.io.IOException; +import java.util.Map; + +/** + * Stats related to cancelled SearchTasks. + */ + +public class SearchTaskStats extends SearchBackpressureTaskStats { + + public SearchTaskStats( + long cancellationCount, + long limitReachedCount, + Map resourceUsageTrackerStats + ) { + super(cancellationCount, limitReachedCount, resourceUsageTrackerStats); + } + + public SearchTaskStats(StreamInput in) throws IOException { + super(in); + } +} diff --git a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java index 1b61ecb9e0341..d43fedac047dc 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java @@ -19,10 +19,11 @@ import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; import org.opensearch.search.backpressure.settings.SearchShardTaskSettings; import org.opensearch.search.backpressure.settings.SearchTaskSettings; +import org.opensearch.search.backpressure.stats.SearchShardTaskStats; +import org.opensearch.search.backpressure.stats.SearchTaskStats; import org.opensearch.search.backpressure.trackers.NodeDuressTracker; import org.opensearch.common.xcontent.XContentBuilder; import org.opensearch.search.backpressure.stats.SearchBackpressureStats; -import org.opensearch.search.backpressure.stats.SearchBackpressureTaskStats; import org.opensearch.search.backpressure.trackers.TaskResourceUsageTracker; import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType; import org.opensearch.tasks.CancellableTask; @@ -223,8 +224,8 @@ public void testSearchTaskInFlightCancellation() { // Verify search backpressure stats. SearchBackpressureStats expectedStats = new SearchBackpressureStats( - new SearchBackpressureTaskStats(20, 3, Map.of(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, new MockStats(20))), - new SearchBackpressureTaskStats(0, 0, Collections.emptyMap()), + new SearchTaskStats(20, 3, Map.of(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, new MockStats(20))), + new SearchShardTaskStats(0, 0, Collections.emptyMap()), SearchBackpressureMode.ENFORCED ); SearchBackpressureStats actualStats = service.nodeStats(); @@ -303,8 +304,8 @@ public void testSearchShardTaskInFlightCancellation() { // Verify search backpressure stats. SearchBackpressureStats expectedStats = new SearchBackpressureStats( - new SearchBackpressureTaskStats(0, 0, Collections.emptyMap()), - new SearchBackpressureTaskStats(15, 3, Map.of(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, new MockStats(15))), + new SearchTaskStats(0, 0, Collections.emptyMap()), + new SearchShardTaskStats(15, 3, Map.of(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, new MockStats(15))), SearchBackpressureMode.ENFORCED ); SearchBackpressureStats actualStats = service.nodeStats(); diff --git a/server/src/test/java/org/opensearch/search/backpressure/stats/SearchBackpressureStatsTests.java b/server/src/test/java/org/opensearch/search/backpressure/stats/SearchBackpressureStatsTests.java index 3c301b40b5f4f..0c86cf4b11239 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/stats/SearchBackpressureStatsTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/stats/SearchBackpressureStatsTests.java @@ -25,8 +25,8 @@ protected SearchBackpressureStats createTestInstance() { public static SearchBackpressureStats randomInstance() { return new SearchBackpressureStats( - SearchBackpressureTaskStatsTests.randomInstance(), - SearchBackpressureTaskStatsTests.randomInstance(), + SearchTaskStatsTests.randomInstance(), + SearchShardTaskStatsTests.randomInstance(), randomFrom(SearchBackpressureMode.DISABLED, SearchBackpressureMode.MONITOR_ONLY, SearchBackpressureMode.ENFORCED) ); } diff --git a/server/src/test/java/org/opensearch/search/backpressure/stats/SearchShardTaskStatsTests.java b/server/src/test/java/org/opensearch/search/backpressure/stats/SearchShardTaskStatsTests.java new file mode 100644 index 0000000000000..d5bc9398492eb --- /dev/null +++ b/server/src/test/java/org/opensearch/search/backpressure/stats/SearchShardTaskStatsTests.java @@ -0,0 +1,44 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.backpressure.stats; + +import org.opensearch.common.io.stream.Writeable; +import org.opensearch.search.backpressure.trackers.CpuUsageTracker; +import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; +import org.opensearch.search.backpressure.trackers.HeapUsageTracker; +import org.opensearch.search.backpressure.trackers.TaskResourceUsageTracker; +import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType; +import org.opensearch.test.AbstractWireSerializingTestCase; + +import java.util.Map; + +public class SearchShardTaskStatsTests extends AbstractWireSerializingTestCase { + @Override + protected Writeable.Reader instanceReader() { + return SearchShardTaskStats::new; + } + + @Override + protected SearchShardTaskStats createTestInstance() { + return randomInstance(); + } + + public static SearchShardTaskStats randomInstance() { + Map resourceUsageTrackerStats = Map.of( + TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, + new CpuUsageTracker.Stats(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()), + TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER, + new HeapUsageTracker.Stats(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()), + TaskResourceUsageTrackerType.ELAPSED_TIME_TRACKER, + new ElapsedTimeTracker.Stats(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()) + ); + + return new SearchShardTaskStats(randomNonNegativeLong(), randomNonNegativeLong(), resourceUsageTrackerStats); + } +} diff --git a/server/src/test/java/org/opensearch/search/backpressure/stats/SearchTaskStatsTests.java b/server/src/test/java/org/opensearch/search/backpressure/stats/SearchTaskStatsTests.java new file mode 100644 index 0000000000000..07cec723efb17 --- /dev/null +++ b/server/src/test/java/org/opensearch/search/backpressure/stats/SearchTaskStatsTests.java @@ -0,0 +1,45 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.backpressure.stats; + +import org.opensearch.common.io.stream.Writeable; +import org.opensearch.search.backpressure.trackers.CpuUsageTracker; +import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; +import org.opensearch.search.backpressure.trackers.HeapUsageTracker; +import org.opensearch.search.backpressure.trackers.TaskResourceUsageTracker; +import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType; +import org.opensearch.test.AbstractWireSerializingTestCase; + +import java.util.Map; + +public class SearchTaskStatsTests extends AbstractWireSerializingTestCase { + + @Override + protected Writeable.Reader instanceReader() { + return SearchTaskStats::new; + } + + @Override + protected SearchTaskStats createTestInstance() { + return randomInstance(); + } + + public static SearchTaskStats randomInstance() { + Map resourceUsageTrackerStats = Map.of( + TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, + new CpuUsageTracker.Stats(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()), + TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER, + new HeapUsageTracker.Stats(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()), + TaskResourceUsageTrackerType.ELAPSED_TIME_TRACKER, + new ElapsedTimeTracker.Stats(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()) + ); + + return new SearchTaskStats(randomNonNegativeLong(), randomNonNegativeLong(), resourceUsageTrackerStats); + } +} From 102cfd5c47308d1331b42f22b02b0ffd5f587446 Mon Sep 17 00:00:00 2001 From: PritLadani Date: Mon, 19 Dec 2022 22:44:20 +0530 Subject: [PATCH 11/34] Cancellation of in-flight search requests at coordinator level Signed-off-by: PritLadani --- CHANGELOG.md | 2 + .../common/settings/ClusterSettings.java | 9 +- .../SearchBackpressureService.java | 125 +++++++++++++----- .../settings/SearchBackpressureSettings.java | 6 + .../settings/SearchTaskSettings.java | 62 +++++++++ .../stats/SearchBackpressureStats.java | 16 ++- .../backpressure/stats/SearchTaskStats.java | 100 ++++++++++++++ .../trackers/CpuUsageTracker.java | 47 ++++++- .../trackers/ElapsedTimeTracker.java | 44 +++++- .../trackers/HeapUsageTracker.java | 110 +++++++++++++-- .../trackers/TaskResourceUsageTracker.java | 28 +++- .../SearchBackpressureServiceTests.java | 29 ++-- .../stats/SearchBackpressureStatsTests.java | 1 + .../stats/SearchTaskStatsTests.java | 44 ++++++ .../tasks/TaskCancellationTests.java | 24 ++-- 15 files changed, 567 insertions(+), 80 deletions(-) create mode 100644 server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java create mode 100644 server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java create mode 100644 server/src/test/java/org/opensearch/search/backpressure/stats/SearchTaskStatsTests.java diff --git a/CHANGELOG.md b/CHANGELOG.md index 2f9075b6a4ed3..9d96c19181f84 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Added experimental support for extensions ([#5347](https://github.com/opensearch-project/OpenSearch/pull/5347)), ([#5518](https://github.com/opensearch-project/OpenSearch/pull/5518), ([#5597](https://github.com/opensearch-project/OpenSearch/pull/5597)), ([#5615](https://github.com/opensearch-project/OpenSearch/pull/5615))) - Add CI bundle pattern to distribution download ([#5348](https://github.com/opensearch-project/OpenSearch/pull/5348)) - Add support for ppc64le architecture ([#5459](https://github.com/opensearch-project/OpenSearch/pull/5459)) +- Cancellation of in-flight SearchTasks based on resource consumption ([#5606](https://github.com/opensearch-project/OpenSearch/pull/5605)) + - Support versioning for Weighted routing apis([#5255](https://github.com/opensearch-project/OpenSearch/pull/5255)) - Added @gbbafna as an OpenSearch maintainer ([#5668](https://github.com/opensearch-project/OpenSearch/pull/5668)) - Add support for discovered cluster manager and remove local weights ([#5680](https://github.com/opensearch-project/OpenSearch/pull/5680)) diff --git a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java index 5549b4e3f26b7..a7f520c26e480 100644 --- a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java @@ -44,6 +44,7 @@ import org.opensearch.search.backpressure.settings.NodeDuressSettings; import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; import org.opensearch.search.backpressure.settings.SearchShardTaskSettings; +import org.opensearch.search.backpressure.settings.SearchTaskSettings; import org.opensearch.search.backpressure.trackers.CpuUsageTracker; import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; import org.opensearch.search.backpressure.trackers.HeapUsageTracker; @@ -605,11 +606,17 @@ public void apply(Settings value, Settings current, Settings previous) { NodeDuressSettings.SETTING_CPU_THRESHOLD, NodeDuressSettings.SETTING_HEAP_THRESHOLD, SearchShardTaskSettings.SETTING_TOTAL_HEAP_PERCENT_THRESHOLD, + HeapUsageTracker.SETTING_HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY, HeapUsageTracker.SETTING_HEAP_PERCENT_THRESHOLD, + HeapUsageTracker.SETTING_HEAP_VARIANCE_THRESHOLD_FOR_SEARCH_QUERY, HeapUsageTracker.SETTING_HEAP_VARIANCE_THRESHOLD, + HeapUsageTracker.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE_FOR_SEARCH_QUERY, HeapUsageTracker.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE, + CpuUsageTracker.SETTING_CPU_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY, CpuUsageTracker.SETTING_CPU_TIME_MILLIS_THRESHOLD, - ElapsedTimeTracker.SETTING_ELAPSED_TIME_MILLIS_THRESHOLD + ElapsedTimeTracker.SETTING_ELAPSED_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY, + ElapsedTimeTracker.SETTING_ELAPSED_TIME_MILLIS_THRESHOLD, + SearchTaskSettings.SETTING_TOTAL_HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY ) ) ); diff --git a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java index fd13198b957da..2465790176daa 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java +++ b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java @@ -12,6 +12,7 @@ import org.apache.logging.log4j.Logger; import org.opensearch.ExceptionsHelper; import org.opensearch.action.search.SearchShardTask; +import org.opensearch.action.search.SearchTask; import org.opensearch.common.component.AbstractLifecycleComponent; import org.opensearch.common.util.TokenBucket; import org.opensearch.monitor.jvm.JvmStats; @@ -20,6 +21,7 @@ import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; import org.opensearch.search.backpressure.stats.SearchBackpressureStats; import org.opensearch.search.backpressure.stats.SearchShardTaskStats; +import org.opensearch.search.backpressure.stats.SearchTaskStats; import org.opensearch.search.backpressure.trackers.CpuUsageTracker; import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; import org.opensearch.search.backpressure.trackers.HeapUsageTracker; @@ -37,7 +39,9 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Comparator; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.concurrent.atomic.AtomicReference; import java.util.function.LongSupplier; @@ -68,9 +72,12 @@ public class SearchBackpressureService extends AbstractLifecycleComponent private final AtomicReference taskCancellationRateLimiter = new AtomicReference<>(); private final AtomicReference taskCancellationRatioLimiter = new AtomicReference<>(); - // Currently, only the state of SearchShardTask is being tracked. - // This can be generalized to Map once we start supporting cancellation of SearchTasks as well. - private final SearchBackpressureState state = new SearchBackpressureState(); + private final Map, SearchBackpressureState> searchBackpressureStates = new HashMap<>() { + { + put(SearchTask.class, new SearchBackpressureState()); + put(SearchShardTask.class, new SearchBackpressureState()); + } + }; public SearchBackpressureService( SearchBackpressureSettings settings, @@ -116,10 +123,15 @@ public SearchBackpressureService( ); this.taskCancellationRatioLimiter.set( - new TokenBucket(state::getCompletionCount, getSettings().getCancellationRatio(), getSettings().getCancellationBurst()) + new TokenBucket(this::getTaskCompletionCount, getSettings().getCancellationRatio(), getSettings().getCancellationBurst()) ); } + private long getTaskCompletionCount() { + return searchBackpressureStates.get(SearchTask.class).getCompletionCount() + searchBackpressureStates.get(SearchShardTask.class) + .getCompletionCount(); + } + void doRun() { SearchBackpressureMode mode = getSettings().getMode(); if (mode == SearchBackpressureMode.DISABLED) { @@ -130,18 +142,29 @@ void doRun() { return; } - // We are only targeting in-flight cancellation of SearchShardTask for now. - List searchShardTasks = getSearchShardTasks(); + List searchTasks = getSearchTasks(); + List searchShardTasks = getSearchShardTasks(); + List cancellableTasks = new ArrayList<>(); // Force-refresh usage stats of these tasks before making a cancellation decision. + taskResourceTrackingService.refreshResourceStats(searchTasks.toArray(new Task[0])); taskResourceTrackingService.refreshResourceStats(searchShardTasks.toArray(new Task[0])); - // Skip cancellation if the increase in heap usage is not due to search requests. - if (isHeapUsageDominatedBySearch(searchShardTasks) == false) { + // Check if increase in heap usage is due to SearchTasks + if (isHeapUsageDominatedBySearch(searchTasks, getSettings().getSearchTaskSettings().getTotalHeapBytesThreshold())) { + cancellableTasks.addAll(searchTasks); + } + + // Check if increase in heap usage is due to SearchShardTasks + if (isHeapUsageDominatedBySearch(searchShardTasks, getSettings().getSearchShardTaskSettings().getTotalHeapBytesThreshold())) { + cancellableTasks.addAll(searchShardTasks); + } + + if (cancellableTasks.isEmpty()) { return; } - for (TaskCancellation taskCancellation : getTaskCancellations(searchShardTasks)) { + for (TaskCancellation taskCancellation : getTaskCancellations(cancellableTasks)) { logger.debug( "[{} mode] cancelling task [{}] due to high resource consumption [{}]", mode.getName(), @@ -160,7 +183,10 @@ void doRun() { // Stop cancelling tasks if there are no tokens in either of the two token buckets. if (rateLimitReached && ratioLimitReached) { logger.debug("task cancellation limit reached"); - state.incrementLimitReachedCount(); + SearchBackpressureState searchBackpressureState = searchBackpressureStates.get( + (taskCancellation.getTask() instanceof SearchTask) ? SearchTask.class : SearchShardTask.class + ); + searchBackpressureState.incrementLimitReachedCount(); break; } @@ -187,9 +213,8 @@ boolean isNodeInDuress() { /** * Returns true if the increase in heap usage is due to search requests. */ - boolean isHeapUsageDominatedBySearch(List searchShardTasks) { - long usage = searchShardTasks.stream().mapToLong(task -> task.getTotalResourceStats().getMemoryInBytes()).sum(); - long threshold = getSettings().getSearchShardTaskSettings().getTotalHeapBytesThreshold(); + boolean isHeapUsageDominatedBySearch(List cancellableTasks, long threshold) { + long usage = cancellableTasks.stream().mapToLong(task -> task.getTotalResourceStats().getMemoryInBytes()).sum(); if (usage < threshold) { logger.debug("heap usage not dominated by search requests [{}/{}]", usage, threshold); return false; @@ -201,7 +226,7 @@ boolean isHeapUsageDominatedBySearch(List searchShardTasks) { /** * Filters and returns the list of currently running SearchShardTasks. */ - List getSearchShardTasks() { + List getSearchShardTasks() { return taskResourceTrackingService.getResourceAwareTasks() .values() .stream() @@ -210,6 +235,18 @@ List getSearchShardTasks() { .collect(Collectors.toUnmodifiableList()); } + /** + * Filters and returns the list of currently running SearchTasks. + */ + List getSearchTasks() { + return taskResourceTrackingService.getResourceAwareTasks() + .values() + .stream() + .filter(task -> task instanceof SearchTask) + .map(task -> (SearchTask) task) + .collect(Collectors.toUnmodifiableList()); + } + /** * Returns a TaskCancellation wrapper containing the list of reasons (possibly zero), along with an overall * cancellation score for the given task. Cancelling a task with a higher score has better chance of recovering the @@ -222,13 +259,19 @@ TaskCancellation getTaskCancellation(CancellableTask task) { for (TaskResourceUsageTracker tracker : taskResourceUsageTrackers) { Optional reason = tracker.checkAndMaybeGetCancellationReason(task); if (reason.isPresent()) { + if (task instanceof SearchTask) { + callbacks.add(tracker::incrementSearchTaskCancellations); + } else { + callbacks.add(tracker::incrementSearchShardTaskCancellations); + } reasons.add(reason.get()); - callbacks.add(tracker::incrementCancellations); } } - if (task instanceof SearchShardTask) { - callbacks.add(state::incrementCancellationCount); + if (task instanceof SearchTask) { + callbacks.add(searchBackpressureStates.get(SearchTask.class)::incrementCancellationCount); + } else { + callbacks.add(searchBackpressureStates.get(SearchShardTask.class)::incrementCancellationCount); } return new TaskCancellation(task, reasons, callbacks); @@ -249,8 +292,12 @@ SearchBackpressureSettings getSettings() { return settings; } - SearchBackpressureState getState() { - return state; + SearchBackpressureState getSearchTasksState() { + return searchBackpressureStates.get(SearchTask.class); + } + + SearchBackpressureState getSearchShardTasksState() { + return searchBackpressureStates.get(SearchShardTask.class); } @Override @@ -259,19 +306,22 @@ public void onTaskCompleted(Task task) { return; } - if (task instanceof SearchShardTask == false) { + if (task instanceof SearchTask == false && task instanceof SearchShardTask == false) { return; } - SearchShardTask searchShardTask = (SearchShardTask) task; - if (searchShardTask.isCancelled() == false) { - state.incrementCompletionCount(); + CancellableTask cancellableTask = (CancellableTask) task; + SearchBackpressureState searchBackpressureState = searchBackpressureStates.get( + (task instanceof SearchTask) ? SearchTask.class : SearchShardTask.class + ); + if (cancellableTask.isCancelled() == false) { + searchBackpressureState.incrementCompletionCount(); } List exceptions = new ArrayList<>(); for (TaskResourceUsageTracker tracker : taskResourceUsageTrackers) { try { - tracker.update(searchShardTask); + tracker.update(task); } catch (Exception e) { exceptions.add(e); } @@ -282,7 +332,7 @@ public void onTaskCompleted(Task task) { @Override public void onCancellationRatioChanged() { taskCancellationRatioLimiter.set( - new TokenBucket(state::getCompletionCount, getSettings().getCancellationRatio(), getSettings().getCancellationBurst()) + new TokenBucket(this::getTaskCompletionCount, getSettings().getCancellationRatio(), getSettings().getCancellationBurst()) ); } @@ -321,15 +371,30 @@ protected void doStop() { protected void doClose() throws IOException {} public SearchBackpressureStats nodeStats() { - List searchShardTasks = getSearchShardTasks(); + List searchTasks = getSearchTasks(); + List searchShardTasks = getSearchShardTasks(); + + SearchTaskStats searchTaskStats = new SearchTaskStats( + searchBackpressureStates.get(SearchTask.class).getCancellationCount(), + searchBackpressureStates.get(SearchTask.class).getLimitReachedCount(), + taskResourceUsageTrackers.stream() + .collect( + Collectors.toUnmodifiableMap(t -> TaskResourceUsageTrackerType.fromName(t.name()), t -> t.searchTaskStats(searchTasks)) + ) + ); SearchShardTaskStats searchShardTaskStats = new SearchShardTaskStats( - state.getCancellationCount(), - state.getLimitReachedCount(), + searchBackpressureStates.get(SearchShardTask.class).getCancellationCount(), + searchBackpressureStates.get(SearchShardTask.class).getLimitReachedCount(), taskResourceUsageTrackers.stream() - .collect(Collectors.toUnmodifiableMap(t -> TaskResourceUsageTrackerType.fromName(t.name()), t -> t.stats(searchShardTasks))) + .collect( + Collectors.toUnmodifiableMap( + t -> TaskResourceUsageTrackerType.fromName(t.name()), + t -> t.searchShardTaskStats(searchShardTasks) + ) + ) ); - return new SearchBackpressureStats(searchShardTaskStats, getSettings().getMode()); + return new SearchBackpressureStats(searchTaskStats, searchShardTaskStats, getSettings().getMode()); } } diff --git a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchBackpressureSettings.java b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchBackpressureSettings.java index df2c04a730fbc..3906228389729 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchBackpressureSettings.java +++ b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchBackpressureSettings.java @@ -110,12 +110,14 @@ public interface Listener { private final Settings settings; private final ClusterSettings clusterSettings; private final NodeDuressSettings nodeDuressSettings; + private final SearchTaskSettings searchTaskSettings; private final SearchShardTaskSettings searchShardTaskSettings; public SearchBackpressureSettings(Settings settings, ClusterSettings clusterSettings) { this.settings = settings; this.clusterSettings = clusterSettings; this.nodeDuressSettings = new NodeDuressSettings(settings, clusterSettings); + this.searchTaskSettings = new SearchTaskSettings(settings, clusterSettings); this.searchShardTaskSettings = new SearchShardTaskSettings(settings, clusterSettings); interval = new TimeValue(SETTING_INTERVAL_MILLIS.get(settings)); @@ -149,6 +151,10 @@ public NodeDuressSettings getNodeDuressSettings() { return nodeDuressSettings; } + public SearchTaskSettings getSearchTaskSettings() { + return searchTaskSettings; + } + public SearchShardTaskSettings getSearchShardTaskSettings() { return searchShardTaskSettings; } diff --git a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java new file mode 100644 index 0000000000000..f28cdd17a3cff --- /dev/null +++ b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java @@ -0,0 +1,62 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.backpressure.settings; + +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Setting; +import org.opensearch.common.settings.Settings; +import org.opensearch.monitor.jvm.JvmStats; + +/** + * Defines the settings related to the cancellation of SearchTasks. + * + * @opensearch.internal + */ + +public class SearchTaskSettings { + private static final long HEAP_SIZE_BYTES = JvmStats.jvmStats().getMem().getHeapMax().getBytes(); + + private static class Defaults { + private static final double TOTAL_HEAP_PERCENT_THRESHOLD = 0.05; + } + + /** + * Defines the heap usage threshold (in percentage) for the sum of heap usages across all search tasks + * before in-flight cancellation is applied. + */ + private volatile double totalHeapPercentThreshold; + public static final Setting SETTING_TOTAL_HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY = Setting.doubleSetting( + "search_backpressure.search_task.total_heap_percent_threshold", + Defaults.TOTAL_HEAP_PERCENT_THRESHOLD, + 0.0, + 1.0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + public SearchTaskSettings(Settings settings, ClusterSettings clusterSettings) { + totalHeapPercentThreshold = SETTING_TOTAL_HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY.get(settings); + clusterSettings.addSettingsUpdateConsumer( + SETTING_TOTAL_HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY, + this::setTotalHeapPercentThreshold + ); + } + + public double getTotalHeapPercentThreshold() { + return totalHeapPercentThreshold; + } + + public long getTotalHeapBytesThreshold() { + return (long) (HEAP_SIZE_BYTES * getTotalHeapPercentThreshold()); + } + + private void setTotalHeapPercentThreshold(double totalHeapPercentThreshold) { + this.totalHeapPercentThreshold = totalHeapPercentThreshold; + } +} diff --git a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java index 3aec0dfc579c5..92a52b62477f2 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java +++ b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java @@ -22,21 +22,28 @@ * Stats related to search backpressure. */ public class SearchBackpressureStats implements ToXContentFragment, Writeable { + private final SearchTaskStats searchTaskStats; private final SearchShardTaskStats searchShardTaskStats; private final SearchBackpressureMode mode; - public SearchBackpressureStats(SearchShardTaskStats searchShardTaskStats, SearchBackpressureMode mode) { + public SearchBackpressureStats( + SearchTaskStats searchTaskStats, + SearchShardTaskStats searchShardTaskStats, + SearchBackpressureMode mode + ) { + this.searchTaskStats = searchTaskStats; this.searchShardTaskStats = searchShardTaskStats; this.mode = mode; } public SearchBackpressureStats(StreamInput in) throws IOException { - this(new SearchShardTaskStats(in), SearchBackpressureMode.fromName(in.readString())); + this(new SearchTaskStats(in), new SearchShardTaskStats(in), SearchBackpressureMode.fromName(in.readString())); } @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { return builder.startObject("search_backpressure") + .field("search_task", searchTaskStats) .field("search_shard_task", searchShardTaskStats) .field("mode", mode.getName()) .endObject(); @@ -44,6 +51,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws @Override public void writeTo(StreamOutput out) throws IOException { + searchTaskStats.writeTo(out); searchShardTaskStats.writeTo(out); out.writeString(mode.getName()); } @@ -53,11 +61,11 @@ public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; SearchBackpressureStats that = (SearchBackpressureStats) o; - return searchShardTaskStats.equals(that.searchShardTaskStats) && mode == that.mode; + return searchTaskStats.equals(that.searchTaskStats) && searchShardTaskStats.equals(that.searchShardTaskStats) && mode == that.mode; } @Override public int hashCode() { - return Objects.hash(searchShardTaskStats, mode); + return Objects.hash(searchTaskStats, searchShardTaskStats, mode); } } diff --git a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java new file mode 100644 index 0000000000000..87318a60b46fd --- /dev/null +++ b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java @@ -0,0 +1,100 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.backpressure.stats; + +import org.opensearch.common.collect.MapBuilder; +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.common.io.stream.StreamOutput; +import org.opensearch.common.io.stream.Writeable; +import org.opensearch.common.xcontent.ToXContent; +import org.opensearch.common.xcontent.ToXContentObject; +import org.opensearch.common.xcontent.XContentBuilder; +import org.opensearch.search.backpressure.trackers.CpuUsageTracker; +import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; +import org.opensearch.search.backpressure.trackers.HeapUsageTracker; +import org.opensearch.search.backpressure.trackers.TaskResourceUsageTracker; +import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType; + +import java.io.IOException; +import java.util.Map; +import java.util.Objects; + +/** + * Stats related to cancelled search tasks. + */ + +public class SearchTaskStats implements ToXContentObject, Writeable { + private final long cancellationCount; + private final long limitReachedCount; + private final Map resourceUsageTrackerStats; + + public SearchTaskStats( + long cancellationCount, + long limitReachedCount, + Map resourceUsageTrackerStats + ) { + this.cancellationCount = cancellationCount; + this.limitReachedCount = limitReachedCount; + this.resourceUsageTrackerStats = resourceUsageTrackerStats; + } + + public SearchTaskStats(StreamInput in) throws IOException { + this.cancellationCount = in.readVLong(); + this.limitReachedCount = in.readVLong(); + + MapBuilder builder = new MapBuilder<>(); + builder.put(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, in.readOptionalWriteable(CpuUsageTracker.Stats::new)); + builder.put(TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER, in.readOptionalWriteable(HeapUsageTracker.Stats::new)); + builder.put(TaskResourceUsageTrackerType.ELAPSED_TIME_TRACKER, in.readOptionalWriteable(ElapsedTimeTracker.Stats::new)); + this.resourceUsageTrackerStats = builder.immutableMap(); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params params) throws IOException { + builder.startObject(); + + builder.startObject("resource_tracker_stats"); + for (Map.Entry entry : resourceUsageTrackerStats.entrySet()) { + builder.field(entry.getKey().getName(), entry.getValue()); + } + builder.endObject(); + + builder.startObject("cancellation_stats") + .field("cancellation_count", cancellationCount) + .field("cancellation_limit_reached_count", limitReachedCount) + .endObject(); + + return builder.endObject(); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeVLong(cancellationCount); + out.writeVLong(limitReachedCount); + + out.writeOptionalWriteable(resourceUsageTrackerStats.get(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER)); + out.writeOptionalWriteable(resourceUsageTrackerStats.get(TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER)); + out.writeOptionalWriteable(resourceUsageTrackerStats.get(TaskResourceUsageTrackerType.ELAPSED_TIME_TRACKER)); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + SearchTaskStats that = (SearchTaskStats) o; + return cancellationCount == that.cancellationCount + && limitReachedCount == that.limitReachedCount + && resourceUsageTrackerStats.equals(that.resourceUsageTrackerStats); + } + + @Override + public int hashCode() { + return Objects.hash(cancellationCount, limitReachedCount, resourceUsageTrackerStats); + } +} diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/CpuUsageTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/CpuUsageTracker.java index 21bb3af32ae08..1e332eca2649c 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/CpuUsageTracker.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/CpuUsageTracker.java @@ -8,6 +8,7 @@ package org.opensearch.search.backpressure.trackers; +import org.opensearch.action.search.SearchTask; import org.opensearch.common.settings.Setting; import org.opensearch.common.io.stream.StreamInput; import org.opensearch.common.io.stream.StreamOutput; @@ -32,11 +33,24 @@ */ public class CpuUsageTracker extends TaskResourceUsageTracker { private static class Defaults { + private static final long CPU_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY = 60000; private static final long CPU_TIME_MILLIS_THRESHOLD = 15000; } /** - * Defines the CPU usage threshold (in millis) for an individual task before it is considered for cancellation. + * Defines the CPU usage threshold (in millis) for an individual search task before it is considered for cancellation. + */ + private volatile long cpuTimeMillisThresholdForSearchQuery; + public static final Setting SETTING_CPU_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY = Setting.longSetting( + "search_backpressure.search_task.cpu_time_millis_threshold_for_search_query", + Defaults.CPU_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY, + 0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Defines the CPU usage threshold (in millis) for an individual search shard task before it is considered for cancellation. */ private volatile long cpuTimeMillisThreshold; public static final Setting SETTING_CPU_TIME_MILLIS_THRESHOLD = Setting.longSetting( @@ -48,7 +62,10 @@ private static class Defaults { ); public CpuUsageTracker(SearchBackpressureSettings settings) { + this.cpuTimeMillisThresholdForSearchQuery = SETTING_CPU_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY.get(settings.getSettings()); this.cpuTimeMillisThreshold = SETTING_CPU_TIME_MILLIS_THRESHOLD.get(settings.getSettings()); + settings.getClusterSettings() + .addSettingsUpdateConsumer(SETTING_CPU_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY, this::setCpuTimeMillisThresholdForSearchQuery); settings.getClusterSettings().addSettingsUpdateConsumer(SETTING_CPU_TIME_MILLIS_THRESHOLD, this::setCpuTimeMillisThreshold); } @@ -60,7 +77,7 @@ public String name() { @Override public Optional checkAndMaybeGetCancellationReason(Task task) { long usage = task.getTotalResourceStats().getCpuTimeInNanos(); - long threshold = getCpuTimeNanosThreshold(); + long threshold = (task instanceof SearchTask) ? getCpuTimeNanosThresholdForSearchQuery() : getCpuTimeNanosThreshold(); if (usage < threshold) { return Optional.empty(); @@ -78,19 +95,37 @@ public Optional checkAndMaybeGetCancellationReason(Task ); } + public long getCpuTimeNanosThresholdForSearchQuery() { + return TimeUnit.MILLISECONDS.toNanos(cpuTimeMillisThresholdForSearchQuery); + } + public long getCpuTimeNanosThreshold() { return TimeUnit.MILLISECONDS.toNanos(cpuTimeMillisThreshold); } + public void setCpuTimeMillisThresholdForSearchQuery(long cpuTimeMillisThresholdForSearchQuery) { + this.cpuTimeMillisThresholdForSearchQuery = cpuTimeMillisThresholdForSearchQuery; + } + public void setCpuTimeMillisThreshold(long cpuTimeMillisThreshold) { this.cpuTimeMillisThreshold = cpuTimeMillisThreshold; } @Override - public TaskResourceUsageTracker.Stats stats(List activeTasks) { - long currentMax = activeTasks.stream().mapToLong(t -> t.getTotalResourceStats().getCpuTimeInNanos()).max().orElse(0); - long currentAvg = (long) activeTasks.stream().mapToLong(t -> t.getTotalResourceStats().getCpuTimeInNanos()).average().orElse(0); - return new Stats(getCancellations(), currentMax, currentAvg); + public TaskResourceUsageTracker.Stats searchTaskStats(List searchTasks) { + long currentMax = searchTasks.stream().mapToLong(t -> t.getTotalResourceStats().getCpuTimeInNanos()).max().orElse(0); + long currentAvg = (long) searchTasks.stream().mapToLong(t -> t.getTotalResourceStats().getCpuTimeInNanos()).average().orElse(0); + return new Stats(getSearchTaskCancellationCount(), currentMax, currentAvg); + } + + @Override + public TaskResourceUsageTracker.Stats searchShardTaskStats(List searchShardTasks) { + long currentMax = searchShardTasks.stream().mapToLong(t -> t.getTotalResourceStats().getCpuTimeInNanos()).max().orElse(0); + long currentAvg = (long) searchShardTasks.stream() + .mapToLong(t -> t.getTotalResourceStats().getCpuTimeInNanos()) + .average() + .orElse(0); + return new Stats(getSearchShardTaskCancellationCount(), currentMax, currentAvg); } /** diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTracker.java index 10e53e2bce5ae..eba8c4ee7afd8 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTracker.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTracker.java @@ -8,6 +8,7 @@ package org.opensearch.search.backpressure.trackers; +import org.opensearch.action.search.SearchTask; import org.opensearch.common.settings.Setting; import org.opensearch.common.io.stream.StreamInput; import org.opensearch.common.io.stream.StreamOutput; @@ -33,11 +34,24 @@ */ public class ElapsedTimeTracker extends TaskResourceUsageTracker { private static class Defaults { + private static final long ELAPSED_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY = 120000; private static final long ELAPSED_TIME_MILLIS_THRESHOLD = 30000; } /** - * Defines the elapsed time threshold (in millis) for an individual task before it is considered for cancellation. + * Defines the elapsed time threshold (in millis) for an individual search task before it is considered for cancellation. + */ + private volatile long elapsedTimeMillisThresholdForSearchQuery; + public static final Setting SETTING_ELAPSED_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY = Setting.longSetting( + "search_backpressure.search_task.elapsed_time_millis_threshold_for_search_query", + Defaults.ELAPSED_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY, + 0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Defines the elapsed time threshold (in millis) for an individual search shard task before it is considered for cancellation. */ private volatile long elapsedTimeMillisThreshold; public static final Setting SETTING_ELAPSED_TIME_MILLIS_THRESHOLD = Setting.longSetting( @@ -52,7 +66,13 @@ private static class Defaults { public ElapsedTimeTracker(SearchBackpressureSettings settings, LongSupplier timeNanosSupplier) { this.timeNanosSupplier = timeNanosSupplier; + this.elapsedTimeMillisThresholdForSearchQuery = SETTING_ELAPSED_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY.get(settings.getSettings()); this.elapsedTimeMillisThreshold = SETTING_ELAPSED_TIME_MILLIS_THRESHOLD.get(settings.getSettings()); + settings.getClusterSettings() + .addSettingsUpdateConsumer( + SETTING_ELAPSED_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY, + this::setElapsedTimeMillisThresholdForSearchQuery + ); settings.getClusterSettings().addSettingsUpdateConsumer(SETTING_ELAPSED_TIME_MILLIS_THRESHOLD, this::setElapsedTimeMillisThreshold); } @@ -64,7 +84,7 @@ public String name() { @Override public Optional checkAndMaybeGetCancellationReason(Task task) { long usage = timeNanosSupplier.getAsLong() - task.getStartTimeNanos(); - long threshold = getElapsedTimeNanosThreshold(); + long threshold = (task instanceof SearchTask) ? getElapsedTimeNanosThresholdForSearchQuery() : getElapsedTimeNanosThreshold(); if (usage < threshold) { return Optional.empty(); @@ -82,20 +102,36 @@ public Optional checkAndMaybeGetCancellationReason(Task ); } + public long getElapsedTimeNanosThresholdForSearchQuery() { + return TimeUnit.MILLISECONDS.toNanos(elapsedTimeMillisThresholdForSearchQuery); + } + public long getElapsedTimeNanosThreshold() { return TimeUnit.MILLISECONDS.toNanos(elapsedTimeMillisThreshold); } + public void setElapsedTimeMillisThresholdForSearchQuery(long elapsedTimeMillisThresholdForSearchQuery) { + this.elapsedTimeMillisThresholdForSearchQuery = elapsedTimeMillisThresholdForSearchQuery; + } + public void setElapsedTimeMillisThreshold(long elapsedTimeMillisThreshold) { this.elapsedTimeMillisThreshold = elapsedTimeMillisThreshold; } @Override - public TaskResourceUsageTracker.Stats stats(List activeTasks) { + public TaskResourceUsageTracker.Stats searchTaskStats(List activeTasks) { + long now = timeNanosSupplier.getAsLong(); + long currentMax = activeTasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).max().orElse(0); + long currentAvg = (long) activeTasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).average().orElse(0); + return new Stats(getSearchTaskCancellationCount(), currentMax, currentAvg); + } + + @Override + public TaskResourceUsageTracker.Stats searchShardTaskStats(List activeTasks) { long now = timeNanosSupplier.getAsLong(); long currentMax = activeTasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).max().orElse(0); long currentAvg = (long) activeTasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).average().orElse(0); - return new Stats(getCancellations(), currentMax, currentAvg); + return new Stats(getSearchShardTaskCancellationCount(), currentMax, currentAvg); } /** diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java index d1a264609e522..31f62055dbfc5 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java @@ -8,6 +8,7 @@ package org.opensearch.search.backpressure.trackers; +import org.opensearch.action.search.SearchTask; import org.opensearch.common.settings.Setting; import org.opensearch.common.io.stream.StreamInput; import org.opensearch.common.io.stream.StreamOutput; @@ -37,13 +38,29 @@ public class HeapUsageTracker extends TaskResourceUsageTracker { private static final long HEAP_SIZE_BYTES = JvmStats.jvmStats().getMem().getHeapMax().getBytes(); private static class Defaults { + private static final double HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY = 0.02; private static final double HEAP_PERCENT_THRESHOLD = 0.005; + private static final double HEAP_VARIANCE_THRESHOLD_FOR_SEARCH_QUERY = 2.0; private static final double HEAP_VARIANCE_THRESHOLD = 2.0; + private static final int HEAP_MOVING_AVERAGE_WINDOW_SIZE_FOR_SEARCH_QUERY = 100; private static final int HEAP_MOVING_AVERAGE_WINDOW_SIZE = 100; } /** - * Defines the heap usage threshold (in percentage) for an individual task before it is considered for cancellation. + * Defines the heap usage threshold (in percentage) for an individual search task before it is considered for cancellation. + */ + private volatile double heapPercentThresholdForSearchQuery; + public static final Setting SETTING_HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY = Setting.doubleSetting( + "search_backpressure.search_task.heap_percent_threshold_for_search_query", + Defaults.HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY, + 0.0, + 1.0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Defines the heap usage threshold (in percentage) for an individual search shard task before it is considered for cancellation. */ private volatile double heapPercentThreshold; public static final Setting SETTING_HEAP_PERCENT_THRESHOLD = Setting.doubleSetting( @@ -56,7 +73,20 @@ private static class Defaults { ); /** - * Defines the heap usage variance for an individual task before it is considered for cancellation. + * Defines the heap usage variance for an individual search task before it is considered for cancellation. + * A task is considered for cancellation when taskHeapUsage is greater than or equal to heapUsageMovingAverage * variance. + */ + private volatile double heapVarianceThresholdForSearchQuery; + public static final Setting SETTING_HEAP_VARIANCE_THRESHOLD_FOR_SEARCH_QUERY = Setting.doubleSetting( + "search_backpressure.search_task.heap_variance_for_search_query", + Defaults.HEAP_VARIANCE_THRESHOLD_FOR_SEARCH_QUERY, + 0.0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Defines the heap usage variance for an individual search shard task before it is considered for cancellation. * A task is considered for cancellation when taskHeapUsage is greater than or equal to heapUsageMovingAverage * variance. */ private volatile double heapVarianceThreshold; @@ -69,7 +99,19 @@ private static class Defaults { ); /** - * Defines the window size to calculate the moving average of heap usage of completed tasks. + * Defines the window size to calculate the moving average of heap usage of completed search tasks. + */ + private volatile int heapMovingAverageWindowSizeForSearchQuery; + public static final Setting SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE_FOR_SEARCH_QUERY = Setting.intSetting( + "search_backpressure.search_task.heap_moving_average_window_size_for_search_query", + Defaults.HEAP_MOVING_AVERAGE_WINDOW_SIZE_FOR_SEARCH_QUERY, + 0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Defines the window size to calculate the moving average of heap usage of completed search shard tasks. */ private volatile int heapMovingAverageWindowSize; public static final Setting SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE = Setting.intSetting( @@ -80,19 +122,33 @@ private static class Defaults { Setting.Property.NodeScope ); + private final AtomicReference movingAverageReferenceForSearchQuery; private final AtomicReference movingAverageReference; public HeapUsageTracker(SearchBackpressureSettings settings) { + heapPercentThresholdForSearchQuery = SETTING_HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY.get(settings.getSettings()); + settings.getClusterSettings() + .addSettingsUpdateConsumer(SETTING_HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY, this::setHeapPercentThresholdForSearchQuery); heapPercentThreshold = SETTING_HEAP_PERCENT_THRESHOLD.get(settings.getSettings()); settings.getClusterSettings().addSettingsUpdateConsumer(SETTING_HEAP_PERCENT_THRESHOLD, this::setHeapPercentThreshold); + heapPercentThresholdForSearchQuery = SETTING_HEAP_VARIANCE_THRESHOLD_FOR_SEARCH_QUERY.get(settings.getSettings()); + settings.getClusterSettings() + .addSettingsUpdateConsumer(SETTING_HEAP_VARIANCE_THRESHOLD_FOR_SEARCH_QUERY, this::setHeapVarianceThresholdForSearchQuery); heapVarianceThreshold = SETTING_HEAP_VARIANCE_THRESHOLD.get(settings.getSettings()); settings.getClusterSettings().addSettingsUpdateConsumer(SETTING_HEAP_VARIANCE_THRESHOLD, this::setHeapVarianceThreshold); + heapMovingAverageWindowSizeForSearchQuery = SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE_FOR_SEARCH_QUERY.get(settings.getSettings()); + settings.getClusterSettings() + .addSettingsUpdateConsumer( + SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE_FOR_SEARCH_QUERY, + this::setHeapMovingAverageWindowSizeForSearchQuery + ); heapMovingAverageWindowSize = SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE.get(settings.getSettings()); settings.getClusterSettings() .addSettingsUpdateConsumer(SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE, this::setHeapMovingAverageWindowSize); + this.movingAverageReferenceForSearchQuery = new AtomicReference<>(new MovingAverage(heapMovingAverageWindowSizeForSearchQuery)); this.movingAverageReference = new AtomicReference<>(new MovingAverage(heapMovingAverageWindowSize)); } @@ -103,12 +159,18 @@ public String name() { @Override public void update(Task task) { - movingAverageReference.get().record(task.getTotalResourceStats().getMemoryInBytes()); + if (task instanceof SearchTask) { + movingAverageReferenceForSearchQuery.get().record(task.getTotalResourceStats().getMemoryInBytes()); + } else { + movingAverageReference.get().record(task.getTotalResourceStats().getMemoryInBytes()); + } } @Override public Optional checkAndMaybeGetCancellationReason(Task task) { - MovingAverage movingAverage = movingAverageReference.get(); + MovingAverage movingAverage = (task instanceof SearchTask) + ? movingAverageReferenceForSearchQuery.get() + : movingAverageReference.get(); // There haven't been enough measurements. if (movingAverage.isReady() == false) { @@ -117,9 +179,11 @@ public Optional checkAndMaybeGetCancellationReason(Task double currentUsage = task.getTotalResourceStats().getMemoryInBytes(); double averageUsage = movingAverage.getAverage(); - double allowedUsage = averageUsage * getHeapVarianceThreshold(); + double variance = (task instanceof SearchTask) ? getHeapVarianceThresholdForSearchQuery() : getHeapBytesThreshold(); + double allowedUsage = averageUsage * variance; + double threshold = (task instanceof SearchTask) ? getHeapBytesThresholdForSearchQuery() : getHeapBytesThreshold(); - if (currentUsage < getHeapBytesThreshold() || currentUsage < allowedUsage) { + if (currentUsage < threshold || currentUsage < allowedUsage) { return Optional.empty(); } @@ -131,32 +195,60 @@ public Optional checkAndMaybeGetCancellationReason(Task ); } + public long getHeapBytesThresholdForSearchQuery() { + return (long) (HEAP_SIZE_BYTES * heapPercentThresholdForSearchQuery); + } + public long getHeapBytesThreshold() { return (long) (HEAP_SIZE_BYTES * heapPercentThreshold); } + public void setHeapPercentThresholdForSearchQuery(double heapPercentThresholdForSearchQuery) { + this.heapPercentThresholdForSearchQuery = heapPercentThresholdForSearchQuery; + } + public void setHeapPercentThreshold(double heapPercentThreshold) { this.heapPercentThreshold = heapPercentThreshold; } + public double getHeapVarianceThresholdForSearchQuery() { + return heapVarianceThresholdForSearchQuery; + } + public double getHeapVarianceThreshold() { return heapVarianceThreshold; } + public void setHeapVarianceThresholdForSearchQuery(double heapVarianceThresholdForSearchQuery) { + this.heapVarianceThresholdForSearchQuery = heapVarianceThresholdForSearchQuery; + } + public void setHeapVarianceThreshold(double heapVarianceThreshold) { this.heapVarianceThreshold = heapVarianceThreshold; } + public void setHeapMovingAverageWindowSizeForSearchQuery(int heapMovingAverageWindowSizeForSearchQuery) { + this.heapMovingAverageWindowSizeForSearchQuery = heapMovingAverageWindowSizeForSearchQuery; + this.movingAverageReferenceForSearchQuery.set(new MovingAverage(heapMovingAverageWindowSizeForSearchQuery)); + } + public void setHeapMovingAverageWindowSize(int heapMovingAverageWindowSize) { this.heapMovingAverageWindowSize = heapMovingAverageWindowSize; this.movingAverageReference.set(new MovingAverage(heapMovingAverageWindowSize)); } @Override - public TaskResourceUsageTracker.Stats stats(List activeTasks) { + public TaskResourceUsageTracker.Stats searchTaskStats(List activeTasks) { + long currentMax = activeTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).max().orElse(0); + long currentAvg = (long) activeTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).average().orElse(0); + return new Stats(getSearchTaskCancellationCount(), currentMax, currentAvg, (long) movingAverageReference.get().getAverage()); + } + + @Override + public TaskResourceUsageTracker.Stats searchShardTaskStats(List activeTasks) { long currentMax = activeTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).max().orElse(0); long currentAvg = (long) activeTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).average().orElse(0); - return new Stats(getCancellations(), currentMax, currentAvg, (long) movingAverageReference.get().getAverage()); + return new Stats(getSearchShardTaskCancellationCount(), currentMax, currentAvg, (long) movingAverageReference.get().getAverage()); } /** diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTracker.java index cbbb751b996be..a08ca34cd37bc 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTracker.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTracker.java @@ -26,14 +26,23 @@ public abstract class TaskResourceUsageTracker { /** * Counts the number of cancellations made due to this tracker. */ - private final AtomicLong cancellations = new AtomicLong(); + private final AtomicLong searchTaskCancellationCount = new AtomicLong(); + private final AtomicLong searchShardTaskCancellationCount = new AtomicLong(); - public long incrementCancellations() { - return cancellations.incrementAndGet(); + public long incrementSearchTaskCancellations() { + return searchTaskCancellationCount.incrementAndGet(); } - public long getCancellations() { - return cancellations.get(); + public long incrementSearchShardTaskCancellations() { + return searchShardTaskCancellationCount.incrementAndGet(); + } + + public long getSearchTaskCancellationCount() { + return searchTaskCancellationCount.get(); + } + + public long getSearchShardTaskCancellationCount() { + return searchShardTaskCancellationCount.get(); } /** @@ -52,9 +61,14 @@ public void update(Task task) {} public abstract Optional checkAndMaybeGetCancellationReason(Task task); /** - * Returns the tracker's state as seen in the stats API. + * Returns the tracker's state for SearchTasks as seen in the stats API. + */ + public abstract Stats searchTaskStats(List activeTasks); + + /** + * Returns the tracker's state for SearchShardTasks as seen in the stats API. */ - public abstract Stats stats(List activeTasks); + public abstract Stats searchShardTaskStats(List activeTasks); /** * Represents the tracker's state as seen in the stats API. diff --git a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java index 07a962c6824ca..1285131bf5da8 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java @@ -16,6 +16,7 @@ import org.opensearch.search.backpressure.settings.SearchBackpressureMode; import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; import org.opensearch.search.backpressure.settings.SearchShardTaskSettings; +import org.opensearch.search.backpressure.stats.SearchTaskStats; import org.opensearch.search.backpressure.trackers.NodeDuressTracker; import org.opensearch.common.xcontent.XContentBuilder; import org.opensearch.search.backpressure.stats.SearchBackpressureStats; @@ -120,7 +121,7 @@ public void testTrackerStateUpdateOnTaskCompletion() { for (int i = 0; i < 100; i++) { service.onTaskCompleted(createMockTaskWithResourceStats(SearchShardTask.class, 100, 200)); } - assertEquals(100, service.getState().getCompletionCount()); + assertEquals(100, service.getSearchShardTasksState().getCompletionCount()); verify(mockTaskResourceUsageTracker, times(100)).update(any()); } @@ -150,8 +151,13 @@ public Optional checkAndMaybeGetCancellationReason(Task } @Override - public Stats stats(List activeTasks) { - return new MockStats(getCancellations()); + public Stats searchTaskStats(List activeTasks) { + return new MockStats(getSearchTaskCancellationCount()); + } + + @Override + public Stats searchShardTaskStats(List activeTasks) { + return new MockStats(getSearchShardTaskCancellationCount()); } }; @@ -200,13 +206,13 @@ public Stats stats(List activeTasks) { // There are 15 tasks eligible for cancellation but only 10 will be cancelled (burst limit). service.doRun(); - assertEquals(10, service.getState().getCancellationCount()); - assertEquals(1, service.getState().getLimitReachedCount()); + assertEquals(10, service.getSearchShardTasksState().getCancellationCount()); + assertEquals(1, service.getSearchShardTasksState().getLimitReachedCount()); // If the clock or completed task count haven't made sufficient progress, we'll continue to be rate-limited. service.doRun(); - assertEquals(10, service.getState().getCancellationCount()); - assertEquals(2, service.getState().getLimitReachedCount()); + assertEquals(10, service.getSearchShardTasksState().getCancellationCount()); + assertEquals(2, service.getSearchShardTasksState().getLimitReachedCount()); // Simulate task completion to replenish some tokens. // This will add 2 tokens (task count delta * cancellationRatio) to 'rateLimitPerTaskCompletion'. @@ -214,18 +220,19 @@ public Stats stats(List activeTasks) { service.onTaskCompleted(createMockTaskWithResourceStats(SearchShardTask.class, 100, taskHeapUsageBytes)); } service.doRun(); - assertEquals(12, service.getState().getCancellationCount()); - assertEquals(3, service.getState().getLimitReachedCount()); + assertEquals(12, service.getSearchShardTasksState().getCancellationCount()); + assertEquals(3, service.getSearchShardTasksState().getLimitReachedCount()); // Fast-forward the clock by one second to replenish some tokens. // This will add 3 tokens (time delta * rate) to 'rateLimitPerTime'. mockTime.addAndGet(TimeUnit.SECONDS.toNanos(1)); service.doRun(); - assertEquals(15, service.getState().getCancellationCount()); - assertEquals(3, service.getState().getLimitReachedCount()); // no more tasks to cancel; limit not reached + assertEquals(15, service.getSearchShardTasksState().getCancellationCount()); + assertEquals(3, service.getSearchShardTasksState().getLimitReachedCount()); // no more tasks to cancel; limit not reached // Verify search backpressure stats. SearchBackpressureStats expectedStats = new SearchBackpressureStats( + new SearchTaskStats(0, 0, Map.of(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, new MockStats(0))), new SearchShardTaskStats(15, 3, Map.of(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, new MockStats(15))), SearchBackpressureMode.ENFORCED ); diff --git a/server/src/test/java/org/opensearch/search/backpressure/stats/SearchBackpressureStatsTests.java b/server/src/test/java/org/opensearch/search/backpressure/stats/SearchBackpressureStatsTests.java index 2665a6d5e05aa..0c86cf4b11239 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/stats/SearchBackpressureStatsTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/stats/SearchBackpressureStatsTests.java @@ -25,6 +25,7 @@ protected SearchBackpressureStats createTestInstance() { public static SearchBackpressureStats randomInstance() { return new SearchBackpressureStats( + SearchTaskStatsTests.randomInstance(), SearchShardTaskStatsTests.randomInstance(), randomFrom(SearchBackpressureMode.DISABLED, SearchBackpressureMode.MONITOR_ONLY, SearchBackpressureMode.ENFORCED) ); diff --git a/server/src/test/java/org/opensearch/search/backpressure/stats/SearchTaskStatsTests.java b/server/src/test/java/org/opensearch/search/backpressure/stats/SearchTaskStatsTests.java new file mode 100644 index 0000000000000..59375c22bb932 --- /dev/null +++ b/server/src/test/java/org/opensearch/search/backpressure/stats/SearchTaskStatsTests.java @@ -0,0 +1,44 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.backpressure.stats; + +import org.opensearch.common.io.stream.Writeable; +import org.opensearch.search.backpressure.trackers.CpuUsageTracker; +import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; +import org.opensearch.search.backpressure.trackers.HeapUsageTracker; +import org.opensearch.search.backpressure.trackers.TaskResourceUsageTracker; +import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType; +import org.opensearch.test.AbstractWireSerializingTestCase; + +import java.util.Map; + +public class SearchTaskStatsTests extends AbstractWireSerializingTestCase { + public static SearchTaskStats randomInstance() { + Map resourceUsageTrackerStats = Map.of( + TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, + new CpuUsageTracker.Stats(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()), + TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER, + new HeapUsageTracker.Stats(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()), + TaskResourceUsageTrackerType.ELAPSED_TIME_TRACKER, + new ElapsedTimeTracker.Stats(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()) + ); + + return new SearchTaskStats(randomNonNegativeLong(), randomNonNegativeLong(), resourceUsageTrackerStats); + } + + @Override + protected Writeable.Reader instanceReader() { + return SearchTaskStats::new; + } + + @Override + protected SearchTaskStats createTestInstance() { + return randomInstance(); + } +} diff --git a/server/src/test/java/org/opensearch/tasks/TaskCancellationTests.java b/server/src/test/java/org/opensearch/tasks/TaskCancellationTests.java index e74f89c905499..f30c15de28b90 100644 --- a/server/src/test/java/org/opensearch/tasks/TaskCancellationTests.java +++ b/server/src/test/java/org/opensearch/tasks/TaskCancellationTests.java @@ -27,16 +27,19 @@ public void testTaskCancellation() { TaskResourceUsageTracker mockTracker3 = createMockTaskResourceUsageTracker("mock_tracker_3"); List reasons = new ArrayList<>(); - List callbacks = List.of(mockTracker1::incrementCancellations, mockTracker2::incrementCancellations); + List callbacks = List.of( + mockTracker1::incrementSearchShardTaskCancellations, + mockTracker2::incrementSearchShardTaskCancellations + ); TaskCancellation taskCancellation = new TaskCancellation(mockTask, reasons, callbacks); // Task does not have any reason to be cancelled. assertEquals(0, taskCancellation.totalCancellationScore()); assertFalse(taskCancellation.isEligibleForCancellation()); taskCancellation.cancel(); - assertEquals(0, mockTracker1.getCancellations()); - assertEquals(0, mockTracker2.getCancellations()); - assertEquals(0, mockTracker3.getCancellations()); + assertEquals(0, mockTracker1.getSearchShardTaskCancellationCount()); + assertEquals(0, mockTracker2.getSearchShardTaskCancellationCount()); + assertEquals(0, mockTracker3.getSearchShardTaskCancellationCount()); // Task has one or more reasons to be cancelled. reasons.add(new TaskCancellation.Reason("limits exceeded 1", 10)); @@ -48,9 +51,9 @@ public void testTaskCancellation() { // Cancel the task and validate the cancellation reason and invocation of callbacks. taskCancellation.cancel(); assertTrue(mockTask.getReasonCancelled().contains("limits exceeded 1, limits exceeded 2, limits exceeded 3")); - assertEquals(1, mockTracker1.getCancellations()); - assertEquals(1, mockTracker2.getCancellations()); - assertEquals(0, mockTracker3.getCancellations()); + assertEquals(1, mockTracker1.getSearchShardTaskCancellationCount()); + assertEquals(1, mockTracker2.getSearchShardTaskCancellationCount()); + assertEquals(0, mockTracker3.getSearchShardTaskCancellationCount()); } private static TaskResourceUsageTracker createMockTaskResourceUsageTracker(String name) { @@ -69,7 +72,12 @@ public Optional checkAndMaybeGetCancellationReason(Task } @Override - public Stats stats(List activeTasks) { + public Stats searchTaskStats(List activeTasks) { + return null; + } + + @Override + public Stats searchShardTaskStats(List activeTasks) { return null; } }; From 474be6c3c0c4b1437b54a33612e5e0acd25bbecb Mon Sep 17 00:00:00 2001 From: PritLadani Date: Tue, 20 Dec 2022 13:01:25 +0530 Subject: [PATCH 12/34] Fixing test failures Signed-off-by: PritLadani --- .../backpressure/SearchBackpressureService.java | 4 +++- .../stats/SearchBackpressureStats.java | 13 +++++++++++-- .../trackers/ElapsedTimeTracker.java | 12 ++++++------ .../backpressure/trackers/HeapUsageTracker.java | 16 ++++++++-------- .../SearchBackpressureServiceTests.java | 4 ++-- .../opensearch/tasks/TaskCancellationTests.java | 4 ++-- 6 files changed, 32 insertions(+), 21 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java index 2465790176daa..dcfdbe0e7b02b 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java +++ b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java @@ -379,7 +379,9 @@ public SearchBackpressureStats nodeStats() { searchBackpressureStates.get(SearchTask.class).getLimitReachedCount(), taskResourceUsageTrackers.stream() .collect( - Collectors.toUnmodifiableMap(t -> TaskResourceUsageTrackerType.fromName(t.name()), t -> t.searchTaskStats(searchTasks)) + Collectors.toUnmodifiableMap( + t -> TaskResourceUsageTrackerType.fromName(t.name()), + t -> t.searchTaskStats(searchTasks)) ) ); diff --git a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java index 92a52b62477f2..80ed849400d8d 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java +++ b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java @@ -8,6 +8,7 @@ package org.opensearch.search.backpressure.stats; +import org.opensearch.Version; import org.opensearch.common.io.stream.StreamInput; import org.opensearch.common.io.stream.StreamOutput; import org.opensearch.common.io.stream.Writeable; @@ -37,7 +38,13 @@ public SearchBackpressureStats( } public SearchBackpressureStats(StreamInput in) throws IOException { - this(new SearchTaskStats(in), new SearchShardTaskStats(in), SearchBackpressureMode.fromName(in.readString())); + searchShardTaskStats = new SearchShardTaskStats(in); + mode = SearchBackpressureMode.fromName(in.readString()); + if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + searchTaskStats = new SearchTaskStats(in); + } else { + searchTaskStats = null; + } } @Override @@ -51,9 +58,11 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws @Override public void writeTo(StreamOutput out) throws IOException { - searchTaskStats.writeTo(out); searchShardTaskStats.writeTo(out); out.writeString(mode.getName()); + if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + searchTaskStats.writeTo(out); + } } @Override diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTracker.java index eba8c4ee7afd8..3b1b904178b2c 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTracker.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTracker.java @@ -119,18 +119,18 @@ public void setElapsedTimeMillisThreshold(long elapsedTimeMillisThreshold) { } @Override - public TaskResourceUsageTracker.Stats searchTaskStats(List activeTasks) { + public TaskResourceUsageTracker.Stats searchTaskStats(List searchTasks) { long now = timeNanosSupplier.getAsLong(); - long currentMax = activeTasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).max().orElse(0); - long currentAvg = (long) activeTasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).average().orElse(0); + long currentMax = searchTasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).max().orElse(0); + long currentAvg = (long) searchTasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).average().orElse(0); return new Stats(getSearchTaskCancellationCount(), currentMax, currentAvg); } @Override - public TaskResourceUsageTracker.Stats searchShardTaskStats(List activeTasks) { + public TaskResourceUsageTracker.Stats searchShardTaskStats(List searchShardTasks) { long now = timeNanosSupplier.getAsLong(); - long currentMax = activeTasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).max().orElse(0); - long currentAvg = (long) activeTasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).average().orElse(0); + long currentMax = searchShardTasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).max().orElse(0); + long currentAvg = (long) searchShardTasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).average().orElse(0); return new Stats(getSearchShardTaskCancellationCount(), currentMax, currentAvg); } diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java index 31f62055dbfc5..840b3461e63ae 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java @@ -179,7 +179,7 @@ public Optional checkAndMaybeGetCancellationReason(Task double currentUsage = task.getTotalResourceStats().getMemoryInBytes(); double averageUsage = movingAverage.getAverage(); - double variance = (task instanceof SearchTask) ? getHeapVarianceThresholdForSearchQuery() : getHeapBytesThreshold(); + double variance = (task instanceof SearchTask) ? getHeapVarianceThresholdForSearchQuery() : getHeapVarianceThreshold(); double allowedUsage = averageUsage * variance; double threshold = (task instanceof SearchTask) ? getHeapBytesThresholdForSearchQuery() : getHeapBytesThreshold(); @@ -238,16 +238,16 @@ public void setHeapMovingAverageWindowSize(int heapMovingAverageWindowSize) { } @Override - public TaskResourceUsageTracker.Stats searchTaskStats(List activeTasks) { - long currentMax = activeTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).max().orElse(0); - long currentAvg = (long) activeTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).average().orElse(0); - return new Stats(getSearchTaskCancellationCount(), currentMax, currentAvg, (long) movingAverageReference.get().getAverage()); + public TaskResourceUsageTracker.Stats searchTaskStats(List searchTasks) { + long currentMax = searchTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).max().orElse(0); + long currentAvg = (long) searchTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).average().orElse(0); + return new Stats(getSearchTaskCancellationCount(), currentMax, currentAvg, (long) movingAverageReferenceForSearchQuery.get().getAverage()); } @Override - public TaskResourceUsageTracker.Stats searchShardTaskStats(List activeTasks) { - long currentMax = activeTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).max().orElse(0); - long currentAvg = (long) activeTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).average().orElse(0); + public TaskResourceUsageTracker.Stats searchShardTaskStats(List searchShardTasks) { + long currentMax = searchShardTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).max().orElse(0); + long currentAvg = (long) searchShardTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).average().orElse(0); return new Stats(getSearchShardTaskCancellationCount(), currentMax, currentAvg, (long) movingAverageReference.get().getAverage()); } diff --git a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java index 1285131bf5da8..81bb1580436ac 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java @@ -151,12 +151,12 @@ public Optional checkAndMaybeGetCancellationReason(Task } @Override - public Stats searchTaskStats(List activeTasks) { + public Stats searchTaskStats(List searchTasks) { return new MockStats(getSearchTaskCancellationCount()); } @Override - public Stats searchShardTaskStats(List activeTasks) { + public Stats searchShardTaskStats(List searchShardTasks) { return new MockStats(getSearchShardTaskCancellationCount()); } }; diff --git a/server/src/test/java/org/opensearch/tasks/TaskCancellationTests.java b/server/src/test/java/org/opensearch/tasks/TaskCancellationTests.java index f30c15de28b90..b8fa91f2d438b 100644 --- a/server/src/test/java/org/opensearch/tasks/TaskCancellationTests.java +++ b/server/src/test/java/org/opensearch/tasks/TaskCancellationTests.java @@ -72,12 +72,12 @@ public Optional checkAndMaybeGetCancellationReason(Task } @Override - public Stats searchTaskStats(List activeTasks) { + public Stats searchTaskStats(List searchTasks) { return null; } @Override - public Stats searchShardTaskStats(List activeTasks) { + public Stats searchShardTaskStats(List searchShardTasks) { return null; } }; From 72c5d8700e04ad292afcbe32855c2bb598366ac9 Mon Sep 17 00:00:00 2001 From: PritLadani Date: Tue, 20 Dec 2022 14:05:39 +0530 Subject: [PATCH 13/34] java code formatting Signed-off-by: PritLadani --- .../search/backpressure/SearchBackpressureService.java | 4 +--- .../search/backpressure/trackers/HeapUsageTracker.java | 7 ++++++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java index dcfdbe0e7b02b..2465790176daa 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java +++ b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java @@ -379,9 +379,7 @@ public SearchBackpressureStats nodeStats() { searchBackpressureStates.get(SearchTask.class).getLimitReachedCount(), taskResourceUsageTrackers.stream() .collect( - Collectors.toUnmodifiableMap( - t -> TaskResourceUsageTrackerType.fromName(t.name()), - t -> t.searchTaskStats(searchTasks)) + Collectors.toUnmodifiableMap(t -> TaskResourceUsageTrackerType.fromName(t.name()), t -> t.searchTaskStats(searchTasks)) ) ); diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java index 840b3461e63ae..e9c179bc0967c 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java @@ -241,7 +241,12 @@ public void setHeapMovingAverageWindowSize(int heapMovingAverageWindowSize) { public TaskResourceUsageTracker.Stats searchTaskStats(List searchTasks) { long currentMax = searchTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).max().orElse(0); long currentAvg = (long) searchTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).average().orElse(0); - return new Stats(getSearchTaskCancellationCount(), currentMax, currentAvg, (long) movingAverageReferenceForSearchQuery.get().getAverage()); + return new Stats( + getSearchTaskCancellationCount(), + currentMax, + currentAvg, + (long) movingAverageReferenceForSearchQuery.get().getAverage() + ); } @Override From 0192d6c87e54ca6c410d64203a5198b453eade67 Mon Sep 17 00:00:00 2001 From: PritLadani Date: Wed, 21 Dec 2022 17:33:00 +0530 Subject: [PATCH 14/34] Adding unit tests and integration tests Signed-off-by: PritLadani --- .../backpressure/SearchBackpressureIT.java | 161 +++++++++++++-- .../trackers/HeapUsageTracker.java | 2 +- .../SearchBackpressureServiceTests.java | 194 ++++++++++++++---- .../trackers/CpuUsageTrackerTests.java | 14 +- .../trackers/ElapsedTimeTrackerTests.java | 14 +- .../trackers/HeapUsageTrackerTests.java | 24 ++- 6 files changed, 347 insertions(+), 62 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/search/backpressure/SearchBackpressureIT.java b/server/src/internalClusterTest/java/org/opensearch/search/backpressure/SearchBackpressureIT.java index f8629e2c88b07..e3ad4e2286e36 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/backpressure/SearchBackpressureIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/backpressure/SearchBackpressureIT.java @@ -17,6 +17,7 @@ import org.opensearch.action.ActionResponse; import org.opensearch.action.ActionType; import org.opensearch.action.search.SearchShardTask; +import org.opensearch.action.search.SearchTask; import org.opensearch.action.support.ActionFilters; import org.opensearch.action.support.HandledTransportAction; import org.opensearch.common.inject.Inject; @@ -29,9 +30,11 @@ import org.opensearch.search.backpressure.settings.NodeDuressSettings; import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; import org.opensearch.search.backpressure.settings.SearchShardTaskSettings; +import org.opensearch.search.backpressure.settings.SearchTaskSettings; import org.opensearch.search.backpressure.trackers.CpuUsageTracker; import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; import org.opensearch.search.backpressure.trackers.HeapUsageTracker; +import org.opensearch.tasks.CancellableTask; import org.opensearch.tasks.Task; import org.opensearch.tasks.TaskCancelledException; import org.opensearch.tasks.TaskId; @@ -47,6 +50,7 @@ import java.util.Map; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; +import java.util.function.Supplier; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.instanceOf; @@ -56,6 +60,7 @@ public class SearchBackpressureIT extends OpenSearchIntegTestCase { private static final TimeValue TIMEOUT = new TimeValue(10, TimeUnit.SECONDS); + private static final int MOVING_AVERAGE_WINDOW_SIZE = 10; @Override protected Collection> nodePlugins() { @@ -70,6 +75,7 @@ public final void setupNodeSettings() { .put(NodeDuressSettings.SETTING_CPU_THRESHOLD.getKey(), 0.0) .put(NodeDuressSettings.SETTING_HEAP_THRESHOLD.getKey(), 0.0) .put(NodeDuressSettings.SETTING_NUM_SUCCESSIVE_BREACHES.getKey(), 1) + .put(SearchTaskSettings.SETTING_TOTAL_HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY.getKey(), 0.0) .put(SearchShardTaskSettings.SETTING_TOTAL_HEAP_PERCENT_THRESHOLD.getKey(), 0.0) .build(); assertAcked(client().admin().cluster().prepareUpdateSettings().setPersistentSettings(request).get()); @@ -86,6 +92,37 @@ public final void cleanupNodeSettings() { ); } + public void testSearchTaskCancellationWithHighElapsedTime() throws InterruptedException { + Settings request = Settings.builder() + .put(SearchBackpressureSettings.SETTING_MODE.getKey(), "enforced") + .put(ElapsedTimeTracker.SETTING_ELAPSED_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY.getKey(), 1000) + .build(); + assertAcked(client().admin().cluster().prepareUpdateSettings().setPersistentSettings(request).get()); + + ExceptionCatchingListener listener = new ExceptionCatchingListener(); + client().execute( + TestTransportAction.ACTION, + new TestRequest<>( + RequestType.HIGH_ELAPSED_TIME, + (TaskFactory) (id, type, action, description, parentTaskId, headers) -> new SearchTask( + id, + type, + action, + descriptionSupplier(description), + parentTaskId, + headers + ) + ), + listener + ); + assertTrue(listener.latch.await(TIMEOUT.getSeconds(), TimeUnit.SECONDS)); + + Exception caughtException = listener.getException(); + assertNotNull("SearchTask should have been cancelled with TaskCancelledException", caughtException); + MatcherAssert.assertThat(caughtException, instanceOf(TaskCancelledException.class)); + MatcherAssert.assertThat(caughtException.getMessage(), containsString("elapsed time exceeded")); + } + public void testSearchShardTaskCancellationWithHighElapsedTime() throws InterruptedException { Settings request = Settings.builder() .put(SearchBackpressureSettings.SETTING_MODE.getKey(), "enforced") @@ -94,7 +131,7 @@ public void testSearchShardTaskCancellationWithHighElapsedTime() throws Interrup assertAcked(client().admin().cluster().prepareUpdateSettings().setPersistentSettings(request).get()); ExceptionCatchingListener listener = new ExceptionCatchingListener(); - client().execute(TestTransportAction.ACTION, new TestRequest(RequestType.HIGH_ELAPSED_TIME), listener); + client().execute(TestTransportAction.ACTION, new TestRequest<>(RequestType.HIGH_ELAPSED_TIME, SearchShardTask::new), listener); assertTrue(listener.latch.await(TIMEOUT.getSeconds(), TimeUnit.SECONDS)); Exception caughtException = listener.getException(); @@ -103,6 +140,37 @@ public void testSearchShardTaskCancellationWithHighElapsedTime() throws Interrup MatcherAssert.assertThat(caughtException.getMessage(), containsString("elapsed time exceeded")); } + public void testSearchTaskCancellationWithHighCpu() throws InterruptedException { + Settings request = Settings.builder() + .put(SearchBackpressureSettings.SETTING_MODE.getKey(), "enforced") + .put(CpuUsageTracker.SETTING_CPU_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY.getKey(), 1000) + .build(); + assertAcked(client().admin().cluster().prepareUpdateSettings().setPersistentSettings(request).get()); + + ExceptionCatchingListener listener = new ExceptionCatchingListener(); + client().execute( + TestTransportAction.ACTION, + new TestRequest<>( + RequestType.HIGH_CPU, + (TaskFactory) (id, type, action, description, parentTaskId, headers) -> new SearchTask( + id, + type, + action, + descriptionSupplier(description), + parentTaskId, + headers + ) + ), + listener + ); + assertTrue(listener.latch.await(TIMEOUT.getSeconds(), TimeUnit.SECONDS)); + + Exception caughtException = listener.getException(); + assertNotNull("SearchTask should have been cancelled with TaskCancelledException", caughtException); + MatcherAssert.assertThat(caughtException, instanceOf(TaskCancelledException.class)); + MatcherAssert.assertThat(caughtException.getMessage(), containsString("cpu usage exceeded")); + } + public void testSearchShardTaskCancellationWithHighCpu() throws InterruptedException { Settings request = Settings.builder() .put(SearchBackpressureSettings.SETTING_MODE.getKey(), "enforced") @@ -111,7 +179,7 @@ public void testSearchShardTaskCancellationWithHighCpu() throws InterruptedExcep assertAcked(client().admin().cluster().prepareUpdateSettings().setPersistentSettings(request).get()); ExceptionCatchingListener listener = new ExceptionCatchingListener(); - client().execute(TestTransportAction.ACTION, new TestRequest(RequestType.HIGH_CPU), listener); + client().execute(TestTransportAction.ACTION, new TestRequest<>(RequestType.HIGH_CPU, SearchShardTask::new), listener); assertTrue(listener.latch.await(TIMEOUT.getSeconds(), TimeUnit.SECONDS)); Exception caughtException = listener.getException(); @@ -120,12 +188,67 @@ public void testSearchShardTaskCancellationWithHighCpu() throws InterruptedExcep MatcherAssert.assertThat(caughtException.getMessage(), containsString("cpu usage exceeded")); } + public void testSearchTaskCancellationWithHighHeapUsage() throws InterruptedException { + // Before SearchBackpressureService cancels a task based on its heap usage, we need to build up the heap moving average + // To build up the heap moving average, we need to hit the same node with multiple requests and then hit the same node with a + // request having higher heap usage + String node = randomFrom(internalCluster().getNodeNames()); + Settings request = Settings.builder() + .put(SearchBackpressureSettings.SETTING_MODE.getKey(), "enforced") + .put(HeapUsageTracker.SETTING_HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY.getKey(), 0.0) + .put(HeapUsageTracker.SETTING_HEAP_VARIANCE_THRESHOLD_FOR_SEARCH_QUERY.getKey(), 1.0) + .put(HeapUsageTracker.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE_FOR_SEARCH_QUERY.getKey(), MOVING_AVERAGE_WINDOW_SIZE) + .build(); + assertAcked(client().admin().cluster().prepareUpdateSettings().setPersistentSettings(request).get()); + + ExceptionCatchingListener listener = new ExceptionCatchingListener(); + for (int i = 0; i < MOVING_AVERAGE_WINDOW_SIZE; i++) { + client(node).execute( + TestTransportAction.ACTION, + new TestRequest<>( + RequestType.HIGH_HEAP, + (TaskFactory) (id, type, action, description, parentTaskId, headers) -> new SearchTask( + id, + type, + action, + descriptionSupplier(description), + parentTaskId, + headers + ) + ), + listener + ); + } + + listener = new ExceptionCatchingListener(); + client(node).execute( + TestTransportAction.ACTION, + new TestRequest<>( + RequestType.HIGHER_HEAP, + (TaskFactory) (id, type, action, description, parentTaskId, headers) -> new SearchTask( + id, + type, + action, + descriptionSupplier(description), + parentTaskId, + headers + ) + ), + listener + ); + assertTrue(listener.latch.await(TIMEOUT.getSeconds(), TimeUnit.SECONDS)); + + Exception caughtException = listener.getException(); + assertNotNull("SearchTask should have been cancelled with TaskCancelledException", caughtException); + MatcherAssert.assertThat(caughtException, instanceOf(TaskCancelledException.class)); + MatcherAssert.assertThat(caughtException.getMessage(), containsString("heap usage exceeded")); + } + public void testSearchShardTaskCancellationWithHighHeapUsage() throws InterruptedException { // Before SearchBackpressureService cancels a task based on its heap usage, we need to build up the heap moving average // To build up the heap moving average, we need to hit the same node with multiple requests and then hit the same node with a // request having higher heap usage String node = randomFrom(internalCluster().getNodeNames()); - final int MOVING_AVERAGE_WINDOW_SIZE = 10; Settings request = Settings.builder() .put(SearchBackpressureSettings.SETTING_MODE.getKey(), "enforced") .put(HeapUsageTracker.SETTING_HEAP_PERCENT_THRESHOLD.getKey(), 0.0) @@ -136,11 +259,11 @@ public void testSearchShardTaskCancellationWithHighHeapUsage() throws Interrupte ExceptionCatchingListener listener = new ExceptionCatchingListener(); for (int i = 0; i < MOVING_AVERAGE_WINDOW_SIZE; i++) { - client(node).execute(TestTransportAction.ACTION, new TestRequest(RequestType.HIGH_HEAP), listener); + client(node).execute(TestTransportAction.ACTION, new TestRequest<>(RequestType.HIGH_HEAP, SearchShardTask::new), listener); } listener = new ExceptionCatchingListener(); - client(node).execute(TestTransportAction.ACTION, new TestRequest(RequestType.HIGHER_HEAP), listener); + client(node).execute(TestTransportAction.ACTION, new TestRequest<>(RequestType.HIGHER_HEAP, SearchShardTask::new), listener); assertTrue(listener.latch.await(TIMEOUT.getSeconds(), TimeUnit.SECONDS)); Exception caughtException = listener.getException(); @@ -154,7 +277,7 @@ public void testSearchCancellationWithBackpressureDisabled() throws InterruptedE assertAcked(client().admin().cluster().prepareUpdateSettings().setPersistentSettings(request).get()); ExceptionCatchingListener listener = new ExceptionCatchingListener(); - client().execute(TestTransportAction.ACTION, new TestRequest(RequestType.HIGH_ELAPSED_TIME), listener); + client().execute(TestTransportAction.ACTION, new TestRequest<>(RequestType.HIGH_ELAPSED_TIME, SearchShardTask::new), listener); // waiting for the TIMEOUT * 3 time for the request to complete and the latch to countdown. assertTrue( "SearchShardTask should have been completed by now and countdown the latch", @@ -196,11 +319,21 @@ enum RequestType { HIGH_ELAPSED_TIME; } - public static class TestRequest extends ActionRequest { + private Supplier descriptionSupplier(String description) { + return () -> description; + } + + interface TaskFactory { + T createTask(long id, String type, String action, String description, TaskId parentTaskId, Map headers); + } + + public static class TestRequest extends ActionRequest { private final RequestType type; + private TaskFactory taskFactory; - public TestRequest(RequestType type) { + public TestRequest(RequestType type, TaskFactory taskFactory) { this.type = type; + this.taskFactory = taskFactory; } public TestRequest(StreamInput in) throws IOException { @@ -215,7 +348,7 @@ public ActionRequestValidationException validate() { @Override public Task createTask(long id, String type, String action, TaskId parentTaskId, Map headers) { - return new SearchShardTask(id, type, action, "", parentTaskId, headers); + return taskFactory.createTask(id, type, action, "", parentTaskId, headers); } @Override @@ -252,7 +385,7 @@ public TestTransportAction(TransportService transportService, ThreadPool threadP protected void doExecute(Task task, TestRequest request, ActionListener listener) { threadPool.executor(ThreadPool.Names.SEARCH).execute(() -> { try { - SearchShardTask searchShardTask = (SearchShardTask) task; + CancellableTask cancellableTask = (CancellableTask) task; long startTime = System.nanoTime(); // Doing a busy-wait until task cancellation or timeout. @@ -260,11 +393,11 @@ protected void doExecute(Task task, TestRequest request, ActionListener request) throws InterruptedException { switch (request.getType()) { case HIGH_CPU: long i = 0, j = 1, k = 1, iterations = 1000; diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java index e9c179bc0967c..de96f13891112 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java @@ -132,7 +132,7 @@ public HeapUsageTracker(SearchBackpressureSettings settings) { heapPercentThreshold = SETTING_HEAP_PERCENT_THRESHOLD.get(settings.getSettings()); settings.getClusterSettings().addSettingsUpdateConsumer(SETTING_HEAP_PERCENT_THRESHOLD, this::setHeapPercentThreshold); - heapPercentThresholdForSearchQuery = SETTING_HEAP_VARIANCE_THRESHOLD_FOR_SEARCH_QUERY.get(settings.getSettings()); + heapVarianceThresholdForSearchQuery = SETTING_HEAP_VARIANCE_THRESHOLD_FOR_SEARCH_QUERY.get(settings.getSettings()); settings.getClusterSettings() .addSettingsUpdateConsumer(SETTING_HEAP_VARIANCE_THRESHOLD_FOR_SEARCH_QUERY, this::setHeapVarianceThresholdForSearchQuery); heapVarianceThreshold = SETTING_HEAP_VARIANCE_THRESHOLD.get(settings.getSettings()); diff --git a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java index 81bb1580436ac..df3f725e25f45 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java @@ -9,6 +9,7 @@ package org.opensearch.search.backpressure; import org.opensearch.action.search.SearchShardTask; +import org.opensearch.action.search.SearchTask; import org.opensearch.common.io.stream.StreamInput; import org.opensearch.common.io.stream.StreamOutput; import org.opensearch.common.settings.ClusterSettings; @@ -16,6 +17,7 @@ import org.opensearch.search.backpressure.settings.SearchBackpressureMode; import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; import org.opensearch.search.backpressure.settings.SearchShardTaskSettings; +import org.opensearch.search.backpressure.settings.SearchTaskSettings; import org.opensearch.search.backpressure.stats.SearchTaskStats; import org.opensearch.search.backpressure.trackers.NodeDuressTracker; import org.opensearch.common.xcontent.XContentBuilder; @@ -96,7 +98,7 @@ public void testIsNodeInDuress() { assertFalse(service.isNodeInDuress()); } - public void testTrackerStateUpdateOnTaskCompletion() { + public void testTrackerStateUpdateOnSearchTaskCompletion() { TaskResourceTrackingService mockTaskResourceTrackingService = mock(TaskResourceTrackingService.class); ThreadPool mockThreadPool = mock(ThreadPool.class); LongSupplier mockTimeNanosSupplier = () -> TimeUnit.SECONDS.toNanos(1234); @@ -116,7 +118,34 @@ public void testTrackerStateUpdateOnTaskCompletion() { List.of(mockTaskResourceUsageTracker) ); - // Record task completions to update the tracker state. Tasks other than SearchShardTask are ignored. + for (int i = 0; i < 100; i++) { + service.onTaskCompleted(createMockTaskWithResourceStats(SearchTask.class, 100, 200)); + } + assertEquals(100, service.getSearchTasksState().getCompletionCount()); + verify(mockTaskResourceUsageTracker, times(100)).update(any()); + } + + public void testTrackerStateUpdateOnSearchShardTaskCompletion() { + TaskResourceTrackingService mockTaskResourceTrackingService = mock(TaskResourceTrackingService.class); + ThreadPool mockThreadPool = mock(ThreadPool.class); + LongSupplier mockTimeNanosSupplier = () -> TimeUnit.SECONDS.toNanos(1234); + TaskResourceUsageTracker mockTaskResourceUsageTracker = mock(TaskResourceUsageTracker.class); + + SearchBackpressureSettings settings = new SearchBackpressureSettings( + Settings.EMPTY, + new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS) + ); + + SearchBackpressureService service = new SearchBackpressureService( + settings, + mockTaskResourceTrackingService, + mockThreadPool, + mockTimeNanosSupplier, + Collections.emptyList(), + List.of(mockTaskResourceUsageTracker) + ); + + // Record task completions to update the tracker state. Tasks other than SearchTask & SearchShardTask are ignored. service.onTaskCompleted(createMockTaskWithResourceStats(CancellableTask.class, 100, 200)); for (int i = 0; i < 100; i++) { service.onTaskCompleted(createMockTaskWithResourceStats(SearchShardTask.class, 100, 200)); @@ -125,54 +154,86 @@ public void testTrackerStateUpdateOnTaskCompletion() { verify(mockTaskResourceUsageTracker, times(100)).update(any()); } - public void testInFlightCancellation() { + public void testSearchTaskInFlightCancellation() { TaskResourceTrackingService mockTaskResourceTrackingService = mock(TaskResourceTrackingService.class); ThreadPool mockThreadPool = mock(ThreadPool.class); AtomicLong mockTime = new AtomicLong(0); LongSupplier mockTimeNanosSupplier = mockTime::get; NodeDuressTracker mockNodeDuressTracker = new NodeDuressTracker(() -> true); - TaskResourceUsageTracker mockTaskResourceUsageTracker = new TaskResourceUsageTracker() { - @Override - public String name() { - return TaskResourceUsageTrackerType.CPU_USAGE_TRACKER.getName(); - } + TaskResourceUsageTracker mockTaskResourceUsageTracker = getMockedTaskResourceUsageTracker(); - @Override - public void update(Task task) {} + // Mocking 'settings' with predictable rate limiting thresholds. + SearchBackpressureSettings settings = getBackpressureSettings("enforced", 0.2, 0.005, 10.0); - @Override - public Optional checkAndMaybeGetCancellationReason(Task task) { - if (task.getTotalResourceStats().getCpuTimeInNanos() < 300) { - return Optional.empty(); - } + SearchBackpressureService service = new SearchBackpressureService( + settings, + mockTaskResourceTrackingService, + mockThreadPool, + mockTimeNanosSupplier, + List.of(mockNodeDuressTracker), + List.of(mockTaskResourceUsageTracker) + ); - return Optional.of(new TaskCancellation.Reason("limits exceeded", 5)); - } + // Run two iterations so that node is marked 'in duress' from the third iteration onwards. + service.doRun(); + service.doRun(); - @Override - public Stats searchTaskStats(List searchTasks) { - return new MockStats(getSearchTaskCancellationCount()); - } + // Mocking 'settings' with predictable totalHeapBytesThreshold so that cancellation logic doesn't get skipped. + long taskHeapUsageBytes = 500; + SearchTaskSettings searchTaskSettings = mock(SearchTaskSettings.class); + when(searchTaskSettings.getTotalHeapBytesThreshold()).thenReturn(taskHeapUsageBytes); + when(settings.getSearchTaskSettings()).thenReturn(searchTaskSettings); - @Override - public Stats searchShardTaskStats(List searchShardTasks) { - return new MockStats(getSearchShardTaskCancellationCount()); + // Create a mix of low and high resource usage SearchTasks (50 low + 25 high resource usage tasks). + Map activeSearchTasks = new HashMap<>(); + for (long i = 0; i < 75; i++) { + if (i % 3 == 0) { + activeSearchTasks.put(i, createMockTaskWithResourceStats(SearchTask.class, 500, taskHeapUsageBytes)); + } else { + activeSearchTasks.put(i, createMockTaskWithResourceStats(SearchTask.class, 100, taskHeapUsageBytes)); } - }; + } + doReturn(activeSearchTasks).when(mockTaskResourceTrackingService).getResourceAwareTasks(); - // Mocking 'settings' with predictable rate limiting thresholds. - SearchBackpressureSettings settings = spy( - new SearchBackpressureSettings( - Settings.builder() - .put(SearchBackpressureSettings.SETTING_MODE.getKey(), "enforced") - .put(SearchBackpressureSettings.SETTING_CANCELLATION_RATIO.getKey(), 0.1) - .put(SearchBackpressureSettings.SETTING_CANCELLATION_RATE.getKey(), 0.003) - .put(SearchBackpressureSettings.SETTING_CANCELLATION_BURST.getKey(), 10.0) - .build(), - new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS) - ) + // There are 25 SearchTasks eligible for cancellation but only 10 will be cancelled (burst limit). + service.doRun(); + assertEquals(10, service.getSearchTasksState().getCancellationCount()); + assertEquals(1, service.getSearchTasksState().getLimitReachedCount()); + + // If the clock or completed task count haven't made sufficient progress, we'll continue to be rate-limited. + service.doRun(); + assertEquals(10, service.getSearchTasksState().getCancellationCount()); + assertEquals(2, service.getSearchTasksState().getLimitReachedCount()); + + // Fast-forward the clock by ten second to replenish some tokens. + // This will add 50 tokens (time delta * rate) to 'rateLimitPerTime' but it will cancel only 10 tasks (burst limit). + mockTime.addAndGet(TimeUnit.SECONDS.toNanos(10)); + service.doRun(); + assertEquals(20, service.getSearchTasksState().getCancellationCount()); + assertEquals(3, service.getSearchTasksState().getLimitReachedCount()); + + // Verify search backpressure stats. + SearchBackpressureStats expectedStats = new SearchBackpressureStats( + new SearchTaskStats(20, 3, Map.of(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, new MockStats(20))), + new SearchShardTaskStats(0, 0, Map.of(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, new MockStats(0))), + SearchBackpressureMode.ENFORCED ); + SearchBackpressureStats actualStats = service.nodeStats(); + assertEquals(expectedStats, actualStats); + } + + public void testSearchShardTaskInFlightCancellation() { + TaskResourceTrackingService mockTaskResourceTrackingService = mock(TaskResourceTrackingService.class); + ThreadPool mockThreadPool = mock(ThreadPool.class); + AtomicLong mockTime = new AtomicLong(0); + LongSupplier mockTimeNanosSupplier = mockTime::get; + NodeDuressTracker mockNodeDuressTracker = new NodeDuressTracker(() -> true); + + TaskResourceUsageTracker mockTaskResourceUsageTracker = getMockedTaskResourceUsageTracker(); + + // Mocking 'settings' with predictable rate limiting thresholds. + SearchBackpressureSettings settings = getBackpressureSettings("enforced", 0.1, 0.003, 10.0); SearchBackpressureService service = new SearchBackpressureService( settings, @@ -189,22 +250,22 @@ public Stats searchShardTaskStats(List searchShardTasks) { // Mocking 'settings' with predictable totalHeapBytesThreshold so that cancellation logic doesn't get skipped. long taskHeapUsageBytes = 500; - SearchShardTaskSettings shardTaskSettings = mock(SearchShardTaskSettings.class); - when(shardTaskSettings.getTotalHeapBytesThreshold()).thenReturn(taskHeapUsageBytes); - when(settings.getSearchShardTaskSettings()).thenReturn(shardTaskSettings); + SearchShardTaskSettings searchShardTaskSettings = mock(SearchShardTaskSettings.class); + when(searchShardTaskSettings.getTotalHeapBytesThreshold()).thenReturn(taskHeapUsageBytes); + when(settings.getSearchShardTaskSettings()).thenReturn(searchShardTaskSettings); // Create a mix of low and high resource usage tasks (60 low + 15 high resource usage tasks). - Map activeTasks = new HashMap<>(); + Map activeSearchShardTasks = new HashMap<>(); for (long i = 0; i < 75; i++) { if (i % 5 == 0) { - activeTasks.put(i, createMockTaskWithResourceStats(SearchShardTask.class, 500, taskHeapUsageBytes)); + activeSearchShardTasks.put(i, createMockTaskWithResourceStats(SearchShardTask.class, 500, taskHeapUsageBytes)); } else { - activeTasks.put(i, createMockTaskWithResourceStats(SearchShardTask.class, 100, taskHeapUsageBytes)); + activeSearchShardTasks.put(i, createMockTaskWithResourceStats(SearchShardTask.class, 100, taskHeapUsageBytes)); } } - doReturn(activeTasks).when(mockTaskResourceTrackingService).getResourceAwareTasks(); + doReturn(activeSearchShardTasks).when(mockTaskResourceTrackingService).getResourceAwareTasks(); - // There are 15 tasks eligible for cancellation but only 10 will be cancelled (burst limit). + // There are 15 SearchShardTasks eligible for cancellation but only 10 will be cancelled (burst limit). service.doRun(); assertEquals(10, service.getSearchShardTasksState().getCancellationCount()); assertEquals(1, service.getSearchShardTasksState().getLimitReachedCount()); @@ -240,6 +301,51 @@ public Stats searchShardTaskStats(List searchShardTasks) { assertEquals(expectedStats, actualStats); } + private SearchBackpressureSettings getBackpressureSettings(String mode, double ratio, double rate, double burst) { + return spy( + new SearchBackpressureSettings( + Settings.builder() + .put(SearchBackpressureSettings.SETTING_MODE.getKey(), mode) + .put(SearchBackpressureSettings.SETTING_CANCELLATION_RATIO.getKey(), ratio) + .put(SearchBackpressureSettings.SETTING_CANCELLATION_RATE.getKey(), rate) + .put(SearchBackpressureSettings.SETTING_CANCELLATION_BURST.getKey(), burst) + .build(), + new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS) + ) + ); + } + + private TaskResourceUsageTracker getMockedTaskResourceUsageTracker() { + return new TaskResourceUsageTracker() { + @Override + public String name() { + return TaskResourceUsageTrackerType.CPU_USAGE_TRACKER.getName(); + } + + @Override + public void update(Task task) {} + + @Override + public Optional checkAndMaybeGetCancellationReason(Task task) { + if (task.getTotalResourceStats().getCpuTimeInNanos() < 300) { + return Optional.empty(); + } + + return Optional.of(new TaskCancellation.Reason("limits exceeded", 5)); + } + + @Override + public Stats searchTaskStats(List searchTasks) { + return new MockStats(getSearchTaskCancellationCount()); + } + + @Override + public Stats searchShardTaskStats(List searchShardTasks) { + return new MockStats(getSearchShardTaskCancellationCount()); + } + }; + } + private static class MockStats implements TaskResourceUsageTracker.Stats { private final long cancellationCount; diff --git a/server/src/test/java/org/opensearch/search/backpressure/trackers/CpuUsageTrackerTests.java b/server/src/test/java/org/opensearch/search/backpressure/trackers/CpuUsageTrackerTests.java index c790fb2e60eea..62e2950189436 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/trackers/CpuUsageTrackerTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/trackers/CpuUsageTrackerTests.java @@ -9,6 +9,7 @@ package org.opensearch.search.backpressure.trackers; import org.opensearch.action.search.SearchShardTask; +import org.opensearch.action.search.SearchTask; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; @@ -24,11 +25,22 @@ public class CpuUsageTrackerTests extends OpenSearchTestCase { private static final SearchBackpressureSettings mockSettings = new SearchBackpressureSettings( Settings.builder() .put(CpuUsageTracker.SETTING_CPU_TIME_MILLIS_THRESHOLD.getKey(), 15) // 15 ms + .put(CpuUsageTracker.SETTING_CPU_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY.getKey(), 25) // 25 ms .build(), new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS) ); - public void testEligibleForCancellation() { + public void testSearchTaskEligibleForCancellation() { + Task task = createMockTaskWithResourceStats(SearchTask.class, 100000000, 200); + CpuUsageTracker tracker = new CpuUsageTracker(mockSettings); + + Optional reason = tracker.checkAndMaybeGetCancellationReason(task); + assertTrue(reason.isPresent()); + assertEquals(1, reason.get().getCancellationScore()); + assertEquals("cpu usage exceeded [100ms >= 25ms]", reason.get().getMessage()); + } + + public void testSearchShardTaskEligibleForCancellation() { Task task = createMockTaskWithResourceStats(SearchShardTask.class, 200000000, 200); CpuUsageTracker tracker = new CpuUsageTracker(mockSettings); diff --git a/server/src/test/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTrackerTests.java b/server/src/test/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTrackerTests.java index 67ed6059a1914..1748ce8d7c253 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTrackerTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTrackerTests.java @@ -9,6 +9,7 @@ package org.opensearch.search.backpressure.trackers; import org.opensearch.action.search.SearchShardTask; +import org.opensearch.action.search.SearchTask; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; @@ -25,11 +26,22 @@ public class ElapsedTimeTrackerTests extends OpenSearchTestCase { private static final SearchBackpressureSettings mockSettings = new SearchBackpressureSettings( Settings.builder() .put(ElapsedTimeTracker.SETTING_ELAPSED_TIME_MILLIS_THRESHOLD.getKey(), 100) // 100 ms + .put(ElapsedTimeTracker.SETTING_ELAPSED_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY.getKey(), 150) // 150 ms .build(), new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS) ); - public void testEligibleForCancellation() { + public void testSearchTaskEligibleForCancellation() { + Task task = createMockTaskWithResourceStats(SearchTask.class, 1, 1, 0); + ElapsedTimeTracker tracker = new ElapsedTimeTracker(mockSettings, () -> 150000000); + + Optional reason = tracker.checkAndMaybeGetCancellationReason(task); + assertTrue(reason.isPresent()); + assertEquals(1, reason.get().getCancellationScore()); + assertEquals("elapsed time exceeded [150ms >= 150ms]", reason.get().getMessage()); + } + + public void testSearchShardTaskEligibleForCancellation() { Task task = createMockTaskWithResourceStats(SearchShardTask.class, 1, 1, 0); ElapsedTimeTracker tracker = new ElapsedTimeTracker(mockSettings, () -> 200000000); diff --git a/server/src/test/java/org/opensearch/search/backpressure/trackers/HeapUsageTrackerTests.java b/server/src/test/java/org/opensearch/search/backpressure/trackers/HeapUsageTrackerTests.java index b9967da22fbf1..74f36fe15551e 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/trackers/HeapUsageTrackerTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/trackers/HeapUsageTrackerTests.java @@ -9,6 +9,7 @@ package org.opensearch.search.backpressure.trackers; import org.opensearch.action.search.SearchShardTask; +import org.opensearch.action.search.SearchTask; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; @@ -24,17 +25,38 @@ public class HeapUsageTrackerTests extends OpenSearchTestCase { private static final long HEAP_BYTES_THRESHOLD = 100; + private static final long HEAP_BYTES_THRESHOLD_FOR_SEARCH_QUERY = 50; private static final int HEAP_MOVING_AVERAGE_WINDOW_SIZE = 100; private static final SearchBackpressureSettings mockSettings = new SearchBackpressureSettings( Settings.builder() + .put(HeapUsageTracker.SETTING_HEAP_VARIANCE_THRESHOLD_FOR_SEARCH_QUERY.getKey(), 3.0) .put(HeapUsageTracker.SETTING_HEAP_VARIANCE_THRESHOLD.getKey(), 2.0) + .put(HeapUsageTracker.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE_FOR_SEARCH_QUERY.getKey(), HEAP_MOVING_AVERAGE_WINDOW_SIZE) .put(HeapUsageTracker.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE.getKey(), HEAP_MOVING_AVERAGE_WINDOW_SIZE) .build(), new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS) ); - public void testEligibleForCancellation() { + public void testSearchTaskEligibleForCancellation() { + HeapUsageTracker tracker = spy(new HeapUsageTracker(mockSettings)); + when(tracker.getHeapBytesThresholdForSearchQuery()).thenReturn(HEAP_BYTES_THRESHOLD_FOR_SEARCH_QUERY); + Task task = createMockTaskWithResourceStats(SearchTask.class, 1, 50); + + // Record enough observations to make the moving average 'ready'. + for (int i = 0; i < HEAP_MOVING_AVERAGE_WINDOW_SIZE; i++) { + tracker.update(task); + } + + // Task that has heap usage >= heapBytesThreshold and (movingAverage * heapVariance). + task = createMockTaskWithResourceStats(SearchTask.class, 1, 300); + Optional reason = tracker.checkAndMaybeGetCancellationReason(task); + assertTrue(reason.isPresent()); + assertEquals(6, reason.get().getCancellationScore()); + assertEquals("heap usage exceeded [300b >= 150b]", reason.get().getMessage()); + } + + public void testSearchShardTaskEligibleForCancellation() { HeapUsageTracker tracker = spy(new HeapUsageTracker(mockSettings)); when(tracker.getHeapBytesThreshold()).thenReturn(HEAP_BYTES_THRESHOLD); Task task = createMockTaskWithResourceStats(SearchShardTask.class, 1, 50); From b31b876975af5dda4b6f449691daf3628fb4b7f6 Mon Sep 17 00:00:00 2001 From: PritLadani Date: Thu, 5 Jan 2023 23:40:13 +0530 Subject: [PATCH 15/34] Introducing separate thresholds for different task types Signed-off-by: PritLadani --- .../backpressure/SearchBackpressureIT.java | 25 +- .../action/search/SearchShardTask.java | 3 +- .../opensearch/action/search/SearchTask.java | 3 +- .../common/settings/ClusterSettings.java | 34 +-- .../SearchBackpressureService.java | 261 ++++++++++++------ .../settings/SearchBackpressureSettings.java | 171 +++++++++--- .../settings/SearchShardTaskSettings.java | 121 +++++++- .../settings/SearchTaskSettings.java | 132 ++++++++- .../stats/SearchBackpressureStats.java | 18 +- ....java => SearchBackpressureTaskStats.java} | 25 +- .../backpressure/stats/SearchTaskStats.java | 100 ------- .../trackers/CpuUsageTracker.java | 76 +---- .../trackers/ElapsedTimeTracker.java | 78 +----- .../trackers/HeapUsageTracker.java | 212 +++----------- .../trackers/TaskResourceUsageTracker.java | 28 +- .../tasks/SearchBackpressureTask.java | 11 + .../SearchBackpressureServiceTests.java | 73 ++--- .../stats/SearchBackpressureStatsTests.java | 4 +- ... => SearchBackpressureTaskStatsTests.java} | 12 +- .../stats/SearchTaskStatsTests.java | 44 --- .../trackers/CpuUsageTrackerTests.java | 12 +- .../trackers/ElapsedTimeTrackerTests.java | 21 +- .../trackers/HeapUsageTrackerTests.java | 56 +++- .../tasks/TaskCancellationTests.java | 24 +- 24 files changed, 795 insertions(+), 749 deletions(-) rename server/src/main/java/org/opensearch/search/backpressure/stats/{SearchShardTaskStats.java => SearchBackpressureTaskStats.java} (80%) delete mode 100644 server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java create mode 100644 server/src/main/java/org/opensearch/tasks/SearchBackpressureTask.java rename server/src/test/java/org/opensearch/search/backpressure/stats/{SearchShardTaskStatsTests.java => SearchBackpressureTaskStatsTests.java} (75%) delete mode 100644 server/src/test/java/org/opensearch/search/backpressure/stats/SearchTaskStatsTests.java diff --git a/server/src/internalClusterTest/java/org/opensearch/search/backpressure/SearchBackpressureIT.java b/server/src/internalClusterTest/java/org/opensearch/search/backpressure/SearchBackpressureIT.java index e3ad4e2286e36..8d343821cfa87 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/backpressure/SearchBackpressureIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/backpressure/SearchBackpressureIT.java @@ -31,9 +31,6 @@ import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; import org.opensearch.search.backpressure.settings.SearchShardTaskSettings; import org.opensearch.search.backpressure.settings.SearchTaskSettings; -import org.opensearch.search.backpressure.trackers.CpuUsageTracker; -import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; -import org.opensearch.search.backpressure.trackers.HeapUsageTracker; import org.opensearch.tasks.CancellableTask; import org.opensearch.tasks.Task; import org.opensearch.tasks.TaskCancelledException; @@ -75,7 +72,7 @@ public final void setupNodeSettings() { .put(NodeDuressSettings.SETTING_CPU_THRESHOLD.getKey(), 0.0) .put(NodeDuressSettings.SETTING_HEAP_THRESHOLD.getKey(), 0.0) .put(NodeDuressSettings.SETTING_NUM_SUCCESSIVE_BREACHES.getKey(), 1) - .put(SearchTaskSettings.SETTING_TOTAL_HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY.getKey(), 0.0) + .put(SearchTaskSettings.SETTING_TOTAL_HEAP_PERCENT_THRESHOLD.getKey(), 0.0) .put(SearchShardTaskSettings.SETTING_TOTAL_HEAP_PERCENT_THRESHOLD.getKey(), 0.0) .build(); assertAcked(client().admin().cluster().prepareUpdateSettings().setPersistentSettings(request).get()); @@ -95,7 +92,7 @@ public final void cleanupNodeSettings() { public void testSearchTaskCancellationWithHighElapsedTime() throws InterruptedException { Settings request = Settings.builder() .put(SearchBackpressureSettings.SETTING_MODE.getKey(), "enforced") - .put(ElapsedTimeTracker.SETTING_ELAPSED_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY.getKey(), 1000) + .put(SearchTaskSettings.SETTING_ELAPSED_TIME_MILLIS_THRESHOLD.getKey(), 1000) .build(); assertAcked(client().admin().cluster().prepareUpdateSettings().setPersistentSettings(request).get()); @@ -126,7 +123,7 @@ public void testSearchTaskCancellationWithHighElapsedTime() throws InterruptedEx public void testSearchShardTaskCancellationWithHighElapsedTime() throws InterruptedException { Settings request = Settings.builder() .put(SearchBackpressureSettings.SETTING_MODE.getKey(), "enforced") - .put(ElapsedTimeTracker.SETTING_ELAPSED_TIME_MILLIS_THRESHOLD.getKey(), 1000) + .put(SearchShardTaskSettings.SETTING_ELAPSED_TIME_MILLIS_THRESHOLD.getKey(), 1000) .build(); assertAcked(client().admin().cluster().prepareUpdateSettings().setPersistentSettings(request).get()); @@ -143,7 +140,7 @@ public void testSearchShardTaskCancellationWithHighElapsedTime() throws Interrup public void testSearchTaskCancellationWithHighCpu() throws InterruptedException { Settings request = Settings.builder() .put(SearchBackpressureSettings.SETTING_MODE.getKey(), "enforced") - .put(CpuUsageTracker.SETTING_CPU_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY.getKey(), 1000) + .put(SearchTaskSettings.SETTING_CPU_TIME_MILLIS_THRESHOLD.getKey(), 1000) .build(); assertAcked(client().admin().cluster().prepareUpdateSettings().setPersistentSettings(request).get()); @@ -174,7 +171,7 @@ public void testSearchTaskCancellationWithHighCpu() throws InterruptedException public void testSearchShardTaskCancellationWithHighCpu() throws InterruptedException { Settings request = Settings.builder() .put(SearchBackpressureSettings.SETTING_MODE.getKey(), "enforced") - .put(CpuUsageTracker.SETTING_CPU_TIME_MILLIS_THRESHOLD.getKey(), 1000) + .put(SearchShardTaskSettings.SETTING_CPU_TIME_MILLIS_THRESHOLD.getKey(), 1000) .build(); assertAcked(client().admin().cluster().prepareUpdateSettings().setPersistentSettings(request).get()); @@ -195,9 +192,9 @@ public void testSearchTaskCancellationWithHighHeapUsage() throws InterruptedExce String node = randomFrom(internalCluster().getNodeNames()); Settings request = Settings.builder() .put(SearchBackpressureSettings.SETTING_MODE.getKey(), "enforced") - .put(HeapUsageTracker.SETTING_HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY.getKey(), 0.0) - .put(HeapUsageTracker.SETTING_HEAP_VARIANCE_THRESHOLD_FOR_SEARCH_QUERY.getKey(), 1.0) - .put(HeapUsageTracker.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE_FOR_SEARCH_QUERY.getKey(), MOVING_AVERAGE_WINDOW_SIZE) + .put(SearchTaskSettings.SETTING_HEAP_PERCENT_THRESHOLD.getKey(), 0.0) + .put(SearchTaskSettings.SETTING_HEAP_VARIANCE_THRESHOLD.getKey(), 1.0) + .put(SearchTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE.getKey(), MOVING_AVERAGE_WINDOW_SIZE) .build(); assertAcked(client().admin().cluster().prepareUpdateSettings().setPersistentSettings(request).get()); @@ -251,9 +248,9 @@ public void testSearchShardTaskCancellationWithHighHeapUsage() throws Interrupte String node = randomFrom(internalCluster().getNodeNames()); Settings request = Settings.builder() .put(SearchBackpressureSettings.SETTING_MODE.getKey(), "enforced") - .put(HeapUsageTracker.SETTING_HEAP_PERCENT_THRESHOLD.getKey(), 0.0) - .put(HeapUsageTracker.SETTING_HEAP_VARIANCE_THRESHOLD.getKey(), 1.0) - .put(HeapUsageTracker.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE.getKey(), MOVING_AVERAGE_WINDOW_SIZE) + .put(SearchShardTaskSettings.SETTING_HEAP_PERCENT_THRESHOLD.getKey(), 0.0) + .put(SearchShardTaskSettings.SETTING_HEAP_VARIANCE_THRESHOLD.getKey(), 1.0) + .put(SearchShardTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE.getKey(), MOVING_AVERAGE_WINDOW_SIZE) .build(); assertAcked(client().admin().cluster().prepareUpdateSettings().setPersistentSettings(request).get()); diff --git a/server/src/main/java/org/opensearch/action/search/SearchShardTask.java b/server/src/main/java/org/opensearch/action/search/SearchShardTask.java index c9d0d6e2d3d47..c94f02395cf38 100644 --- a/server/src/main/java/org/opensearch/action/search/SearchShardTask.java +++ b/server/src/main/java/org/opensearch/action/search/SearchShardTask.java @@ -36,6 +36,7 @@ import org.opensearch.search.fetch.ShardFetchSearchRequest; import org.opensearch.search.internal.ShardSearchRequest; import org.opensearch.tasks.CancellableTask; +import org.opensearch.tasks.SearchBackpressureTask; import org.opensearch.tasks.TaskId; import java.util.Map; @@ -47,7 +48,7 @@ * * @opensearch.internal */ -public class SearchShardTask extends CancellableTask { +public class SearchShardTask extends CancellableTask implements SearchBackpressureTask { // generating metadata in a lazy way since source can be quite big private final MemoizedSupplier metadataSupplier; diff --git a/server/src/main/java/org/opensearch/action/search/SearchTask.java b/server/src/main/java/org/opensearch/action/search/SearchTask.java index 987485fe44c65..dad6c44da4f10 100644 --- a/server/src/main/java/org/opensearch/action/search/SearchTask.java +++ b/server/src/main/java/org/opensearch/action/search/SearchTask.java @@ -34,6 +34,7 @@ import org.opensearch.common.unit.TimeValue; import org.opensearch.tasks.CancellableTask; +import org.opensearch.tasks.SearchBackpressureTask; import org.opensearch.tasks.TaskId; import java.util.Map; @@ -46,7 +47,7 @@ * * @opensearch.internal */ -public class SearchTask extends CancellableTask { +public class SearchTask extends CancellableTask implements SearchBackpressureTask { // generating description in a lazy way since source can be quite big private final Supplier descriptionSupplier; private SearchProgressListener progressListener = SearchProgressListener.NOOP; diff --git a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java index a7f520c26e480..6d2de14c5c61b 100644 --- a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java @@ -45,9 +45,6 @@ import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; import org.opensearch.search.backpressure.settings.SearchShardTaskSettings; import org.opensearch.search.backpressure.settings.SearchTaskSettings; -import org.opensearch.search.backpressure.trackers.CpuUsageTracker; -import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; -import org.opensearch.search.backpressure.trackers.HeapUsageTracker; import org.opensearch.tasks.TaskManager; import org.opensearch.tasks.TaskResourceTrackingService; import org.opensearch.watcher.ResourceWatcherService; @@ -599,24 +596,27 @@ public void apply(Settings value, Settings current, Settings previous) { // Settings related to search backpressure SearchBackpressureSettings.SETTING_MODE, - SearchBackpressureSettings.SETTING_CANCELLATION_RATIO, - SearchBackpressureSettings.SETTING_CANCELLATION_RATE, - SearchBackpressureSettings.SETTING_CANCELLATION_BURST, + SearchBackpressureSettings.SETTING_CANCELLATION_RATIO_SEARCH_TASK, + SearchBackpressureSettings.SETTING_CANCELLATION_RATE_SEARCH_TASK, + SearchBackpressureSettings.SETTING_CANCELLATION_BURST_SEARCH_TASK, + SearchBackpressureSettings.SETTING_CANCELLATION_RATIO_SEARCH_SHARD_TASK, + SearchBackpressureSettings.SETTING_CANCELLATION_RATE_SEARCH_SHARD_TASK, + SearchBackpressureSettings.SETTING_CANCELLATION_BURST_SEARCH_SHARD_TASK, NodeDuressSettings.SETTING_NUM_SUCCESSIVE_BREACHES, NodeDuressSettings.SETTING_CPU_THRESHOLD, NodeDuressSettings.SETTING_HEAP_THRESHOLD, SearchShardTaskSettings.SETTING_TOTAL_HEAP_PERCENT_THRESHOLD, - HeapUsageTracker.SETTING_HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY, - HeapUsageTracker.SETTING_HEAP_PERCENT_THRESHOLD, - HeapUsageTracker.SETTING_HEAP_VARIANCE_THRESHOLD_FOR_SEARCH_QUERY, - HeapUsageTracker.SETTING_HEAP_VARIANCE_THRESHOLD, - HeapUsageTracker.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE_FOR_SEARCH_QUERY, - HeapUsageTracker.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE, - CpuUsageTracker.SETTING_CPU_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY, - CpuUsageTracker.SETTING_CPU_TIME_MILLIS_THRESHOLD, - ElapsedTimeTracker.SETTING_ELAPSED_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY, - ElapsedTimeTracker.SETTING_ELAPSED_TIME_MILLIS_THRESHOLD, - SearchTaskSettings.SETTING_TOTAL_HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY + SearchTaskSettings.SETTING_HEAP_PERCENT_THRESHOLD, + SearchShardTaskSettings.SETTING_HEAP_PERCENT_THRESHOLD, + SearchTaskSettings.SETTING_HEAP_VARIANCE_THRESHOLD, + SearchShardTaskSettings.SETTING_HEAP_VARIANCE_THRESHOLD, + SearchTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE, + SearchShardTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE, + SearchTaskSettings.SETTING_CPU_TIME_MILLIS_THRESHOLD, + SearchShardTaskSettings.SETTING_CPU_TIME_MILLIS_THRESHOLD, + SearchTaskSettings.SETTING_ELAPSED_TIME_MILLIS_THRESHOLD, + SearchShardTaskSettings.SETTING_ELAPSED_TIME_MILLIS_THRESHOLD, + SearchTaskSettings.SETTING_TOTAL_HEAP_PERCENT_THRESHOLD ) ) ); diff --git a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java index 2465790176daa..928b26a835bc4 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java +++ b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java @@ -20,8 +20,7 @@ import org.opensearch.search.backpressure.settings.SearchBackpressureMode; import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; import org.opensearch.search.backpressure.stats.SearchBackpressureStats; -import org.opensearch.search.backpressure.stats.SearchShardTaskStats; -import org.opensearch.search.backpressure.stats.SearchTaskStats; +import org.opensearch.search.backpressure.stats.SearchBackpressureTaskStats; import org.opensearch.search.backpressure.trackers.CpuUsageTracker; import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; import org.opensearch.search.backpressure.trackers.HeapUsageTracker; @@ -29,6 +28,7 @@ import org.opensearch.search.backpressure.trackers.TaskResourceUsageTracker; import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType; import org.opensearch.tasks.CancellableTask; +import org.opensearch.tasks.SearchBackpressureTask; import org.opensearch.tasks.Task; import org.opensearch.tasks.TaskCancellation; import org.opensearch.tasks.TaskResourceTrackingService; @@ -39,7 +39,6 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Comparator; -import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Optional; @@ -67,17 +66,15 @@ public class SearchBackpressureService extends AbstractLifecycleComponent private final LongSupplier timeNanosSupplier; private final List nodeDuressTrackers; - private final List taskResourceUsageTrackers; + private final List searchTaskTrackers; + private final List searchShardTaskTrackers; - private final AtomicReference taskCancellationRateLimiter = new AtomicReference<>(); - private final AtomicReference taskCancellationRatioLimiter = new AtomicReference<>(); + private final AtomicReference searchTaskCancellationRateLimiter = new AtomicReference<>(); + private final AtomicReference searchTaskCancellationRatioLimiter = new AtomicReference<>(); + private final AtomicReference searchShardTaskCancellationRateLimiter = new AtomicReference<>(); + private final AtomicReference searchShardTaskCancellationRatioLimiter = new AtomicReference<>(); - private final Map, SearchBackpressureState> searchBackpressureStates = new HashMap<>() { - { - put(SearchTask.class, new SearchBackpressureState()); - put(SearchShardTask.class, new SearchBackpressureState()); - } - }; + private final Map, SearchBackpressureState> searchBackpressureStates; public SearchBackpressureService( SearchBackpressureSettings settings, @@ -97,7 +94,26 @@ public SearchBackpressureService( () -> JvmStats.jvmStats().getMem().getHeapUsedPercent() / 100.0 >= settings.getNodeDuressSettings().getHeapThreshold() ) ), - List.of(new CpuUsageTracker(settings), new HeapUsageTracker(settings), new ElapsedTimeTracker(settings, System::nanoTime)) + List.of( + new CpuUsageTracker(settings.getSearchTaskSettings()::getCpuTimeNanosThreshold), + new HeapUsageTracker( + settings.getSearchTaskSettings()::getHeapVarianceThreshold, + settings.getSearchTaskSettings()::getHeapBytesThreshold, + settings.getSearchTaskSettings()::getHeapMovingAverageWindowSize, + settings.getClusterSettings() + ), + new ElapsedTimeTracker(settings.getSearchTaskSettings()::getElapsedTimeNanosThreshold, System::nanoTime) + ), + List.of( + new CpuUsageTracker(settings.getSearchShardTaskSettings()::getCpuTimeNanosThreshold), + new HeapUsageTracker( + settings.getSearchShardTaskSettings()::getHeapVarianceThreshold, + settings.getSearchShardTaskSettings()::getHeapBytesThreshold, + settings.getSearchShardTaskSettings()::getHeapMovingAverageWindowSize, + settings.getClusterSettings() + ), + new ElapsedTimeTracker(settings.getSearchShardTaskSettings()::getElapsedTimeNanosThreshold, System::nanoTime) + ) ); } @@ -107,7 +123,8 @@ public SearchBackpressureService( ThreadPool threadPool, LongSupplier timeNanosSupplier, List nodeDuressTrackers, - List taskResourceUsageTrackers + List searchTaskTrackers, + List searchShardTaskTrackers ) { this.settings = settings; this.settings.addListener(this); @@ -116,20 +133,55 @@ public SearchBackpressureService( this.threadPool = threadPool; this.timeNanosSupplier = timeNanosSupplier; this.nodeDuressTrackers = nodeDuressTrackers; - this.taskResourceUsageTrackers = taskResourceUsageTrackers; + this.searchTaskTrackers = searchTaskTrackers; + this.searchShardTaskTrackers = searchShardTaskTrackers; + + this.searchBackpressureStates = Map.of( + SearchTask.class, + new SearchBackpressureState(), + SearchShardTask.class, + new SearchBackpressureState() + ); + + this.searchTaskCancellationRateLimiter.set( + new TokenBucket( + timeNanosSupplier, + getSettings().getCancellationRateSearchTaskNanos(), + getSettings().getCancellationBurstSearchTask() + ) + ); - this.taskCancellationRateLimiter.set( - new TokenBucket(timeNanosSupplier, getSettings().getCancellationRateNanos(), getSettings().getCancellationBurst()) + this.searchTaskCancellationRatioLimiter.set( + new TokenBucket( + this::getSearchTaskCompletionCount, + getSettings().getCancellationRatioSearchTask(), + getSettings().getCancellationBurstSearchTask() + ) ); - this.taskCancellationRatioLimiter.set( - new TokenBucket(this::getTaskCompletionCount, getSettings().getCancellationRatio(), getSettings().getCancellationBurst()) + this.searchShardTaskCancellationRateLimiter.set( + new TokenBucket( + timeNanosSupplier, + getSettings().getCancellationRateSearchShardTaskNanos(), + getSettings().getCancellationBurstSearchShardTask() + ) ); + + this.searchShardTaskCancellationRatioLimiter.set( + new TokenBucket( + this::getSearchShardTaskCompletionCount, + getSettings().getCancellationRatioSearchShardTask(), + getSettings().getCancellationBurstSearchShardTask() + ) + ); + } + + private long getSearchTaskCompletionCount() { + return searchBackpressureStates.get(SearchTask.class).getCompletionCount(); } - private long getTaskCompletionCount() { - return searchBackpressureStates.get(SearchTask.class).getCompletionCount() + searchBackpressureStates.get(SearchShardTask.class) - .getCompletionCount(); + private long getSearchShardTaskCompletionCount() { + return searchBackpressureStates.get(SearchShardTask.class).getCompletionCount(); } void doRun() { @@ -142,8 +194,8 @@ void doRun() { return; } - List searchTasks = getSearchTasks(); - List searchShardTasks = getSearchShardTasks(); + List searchTasks = getTaskByType(SearchTask.class); + List searchShardTasks = getTaskByType(SearchShardTask.class); List cancellableTasks = new ArrayList<>(); // Force-refresh usage stats of these tasks before making a cancellation decision. @@ -160,6 +212,7 @@ void doRun() { cancellableTasks.addAll(searchShardTasks); } + // none of the task type is breaching the heap usage thresholds and hence we do not cancel any tasks if (cancellableTasks.isEmpty()) { return; } @@ -176,9 +229,15 @@ void doRun() { continue; } + boolean isSearchTask = taskCancellation.getTask() instanceof SearchTask; + // Independently remove tokens from both token buckets. - boolean rateLimitReached = taskCancellationRateLimiter.get().request() == false; - boolean ratioLimitReached = taskCancellationRatioLimiter.get().request() == false; + boolean rateLimitReached = isSearchTask + ? searchTaskCancellationRateLimiter.get().request() == false + : searchShardTaskCancellationRateLimiter.get().request() == false; + boolean ratioLimitReached = isSearchTask + ? searchTaskCancellationRatioLimiter.get().request() == false + : searchShardTaskCancellationRatioLimiter.get().request() == false; // Stop cancelling tasks if there are no tokens in either of the two token buckets. if (rateLimitReached && ratioLimitReached) { @@ -186,7 +245,9 @@ void doRun() { SearchBackpressureState searchBackpressureState = searchBackpressureStates.get( (taskCancellation.getTask() instanceof SearchTask) ? SearchTask.class : SearchShardTask.class ); - searchBackpressureState.incrementLimitReachedCount(); + if (searchBackpressureState != null) { + searchBackpressureState.incrementLimitReachedCount(); + } break; } @@ -224,26 +285,14 @@ boolean isHeapUsageDominatedBySearch(List cancellableTasks, lon } /** - * Filters and returns the list of currently running SearchShardTasks. - */ - List getSearchShardTasks() { - return taskResourceTrackingService.getResourceAwareTasks() - .values() - .stream() - .filter(task -> task instanceof SearchShardTask) - .map(task -> (SearchShardTask) task) - .collect(Collectors.toUnmodifiableList()); - } - - /** - * Filters and returns the list of currently running SearchTasks. + * Filters and returns the list of currently running tasks of specified type. */ - List getSearchTasks() { + List getTaskByType(Class type) { return taskResourceTrackingService.getResourceAwareTasks() .values() .stream() - .filter(task -> task instanceof SearchTask) - .map(task -> (SearchTask) task) + .filter(type::isInstance) + .map(type::cast) .collect(Collectors.toUnmodifiableList()); } @@ -255,23 +304,21 @@ List getSearchTasks() { TaskCancellation getTaskCancellation(CancellableTask task) { List reasons = new ArrayList<>(); List callbacks = new ArrayList<>(); - - for (TaskResourceUsageTracker tracker : taskResourceUsageTrackers) { + boolean isSearchTask = task instanceof SearchTask; + List trackers = isSearchTask ? searchTaskTrackers : searchShardTaskTrackers; + for (TaskResourceUsageTracker tracker : trackers) { Optional reason = tracker.checkAndMaybeGetCancellationReason(task); if (reason.isPresent()) { - if (task instanceof SearchTask) { - callbacks.add(tracker::incrementSearchTaskCancellations); - } else { - callbacks.add(tracker::incrementSearchShardTaskCancellations); - } + callbacks.add(tracker::incrementCancellations); reasons.add(reason.get()); } } - if (task instanceof SearchTask) { - callbacks.add(searchBackpressureStates.get(SearchTask.class)::incrementCancellationCount); - } else { - callbacks.add(searchBackpressureStates.get(SearchShardTask.class)::incrementCancellationCount); + SearchBackpressureState searchBackpressureState = searchBackpressureStates.get( + isSearchTask ? SearchTask.class : SearchShardTask.class + ); + if (searchBackpressureState != null) { + callbacks.add(searchBackpressureState::incrementCancellationCount); } return new TaskCancellation(task, reasons, callbacks); @@ -281,23 +328,20 @@ TaskCancellation getTaskCancellation(CancellableTask task) { * Returns a list of TaskCancellations sorted by descending order of their cancellation scores. */ List getTaskCancellations(List tasks) { - return tasks.stream() + List t = tasks.stream() .map(this::getTaskCancellation) .filter(TaskCancellation::isEligibleForCancellation) .sorted(Comparator.reverseOrder()) .collect(Collectors.toUnmodifiableList()); + return t; } SearchBackpressureSettings getSettings() { return settings; } - SearchBackpressureState getSearchTasksState() { - return searchBackpressureStates.get(SearchTask.class); - } - - SearchBackpressureState getSearchShardTasksState() { - return searchBackpressureStates.get(SearchShardTask.class); + SearchBackpressureState getSearchBackpressureTaskStats(Class taskType) { + return searchBackpressureStates.get(taskType); } @Override @@ -306,47 +350,88 @@ public void onTaskCompleted(Task task) { return; } - if (task instanceof SearchTask == false && task instanceof SearchShardTask == false) { + if (task instanceof SearchBackpressureTask == false) { return; } CancellableTask cancellableTask = (CancellableTask) task; - SearchBackpressureState searchBackpressureState = searchBackpressureStates.get( - (task instanceof SearchTask) ? SearchTask.class : SearchShardTask.class - ); + boolean isSearchTask = task instanceof SearchTask; if (cancellableTask.isCancelled() == false) { - searchBackpressureState.incrementCompletionCount(); + SearchBackpressureState searchBackpressureState = searchBackpressureStates.get( + isSearchTask ? SearchTask.class : SearchShardTask.class + ); + if (searchBackpressureState != null) { + searchBackpressureState.incrementCompletionCount(); + } } List exceptions = new ArrayList<>(); - for (TaskResourceUsageTracker tracker : taskResourceUsageTrackers) { + List trackers = isSearchTask ? searchTaskTrackers : searchShardTaskTrackers; + for (TaskResourceUsageTracker tracker : trackers) { try { tracker.update(task); } catch (Exception e) { exceptions.add(e); } } + ExceptionsHelper.maybeThrowRuntimeAndSuppress(exceptions); } @Override - public void onCancellationRatioChanged() { - taskCancellationRatioLimiter.set( - new TokenBucket(this::getTaskCompletionCount, getSettings().getCancellationRatio(), getSettings().getCancellationBurst()) + public void onCancellationRatioSearchTaskChanged() { + searchTaskCancellationRatioLimiter.set( + new TokenBucket( + this::getSearchTaskCompletionCount, + getSettings().getCancellationRatioSearchTask(), + getSettings().getCancellationBurstSearchTask() + ) + ); + } + + @Override + public void onCancellationRateSearchTaskChanged() { + searchTaskCancellationRateLimiter.set( + new TokenBucket( + timeNanosSupplier, + getSettings().getCancellationRateSearchTaskNanos(), + getSettings().getCancellationBurstSearchTask() + ) + ); + } + + @Override + public void onCancellationBurstSearchTaskChanged() { + onCancellationRatioSearchTaskChanged(); + onCancellationRateSearchTaskChanged(); + } + + @Override + public void onCancellationRatioSearchShardTaskChanged() { + searchShardTaskCancellationRatioLimiter.set( + new TokenBucket( + this::getSearchShardTaskCompletionCount, + getSettings().getCancellationRatioSearchShardTask(), + getSettings().getCancellationBurstSearchShardTask() + ) ); } @Override - public void onCancellationRateChanged() { - taskCancellationRateLimiter.set( - new TokenBucket(timeNanosSupplier, getSettings().getCancellationRateNanos(), getSettings().getCancellationBurst()) + public void onCancellationRateSearchShardTaskChanged() { + searchShardTaskCancellationRateLimiter.set( + new TokenBucket( + timeNanosSupplier, + getSettings().getCancellationRateSearchShardTaskNanos(), + getSettings().getCancellationBurstSearchShardTask() + ) ); } @Override - public void onCancellationBurstChanged() { - onCancellationRatioChanged(); - onCancellationRateChanged(); + public void onCancellationBurstSearchShardTaskChanged() { + onCancellationRatioSearchShardTaskChanged(); + onCancellationRateSearchShardTaskChanged(); } @Override @@ -371,28 +456,20 @@ protected void doStop() { protected void doClose() throws IOException {} public SearchBackpressureStats nodeStats() { - List searchTasks = getSearchTasks(); - List searchShardTasks = getSearchShardTasks(); - - SearchTaskStats searchTaskStats = new SearchTaskStats( + List searchTasks = getTaskByType(SearchTask.class); + List searchShardTasks = getTaskByType(SearchShardTask.class); + SearchBackpressureTaskStats searchTaskStats = new SearchBackpressureTaskStats( searchBackpressureStates.get(SearchTask.class).getCancellationCount(), searchBackpressureStates.get(SearchTask.class).getLimitReachedCount(), - taskResourceUsageTrackers.stream() - .collect( - Collectors.toUnmodifiableMap(t -> TaskResourceUsageTrackerType.fromName(t.name()), t -> t.searchTaskStats(searchTasks)) - ) + searchTaskTrackers.stream() + .collect(Collectors.toUnmodifiableMap(t -> TaskResourceUsageTrackerType.fromName(t.name()), t -> t.stats(searchTasks))) ); - SearchShardTaskStats searchShardTaskStats = new SearchShardTaskStats( + SearchBackpressureTaskStats searchShardTaskStats = new SearchBackpressureTaskStats( searchBackpressureStates.get(SearchShardTask.class).getCancellationCount(), searchBackpressureStates.get(SearchShardTask.class).getLimitReachedCount(), - taskResourceUsageTrackers.stream() - .collect( - Collectors.toUnmodifiableMap( - t -> TaskResourceUsageTrackerType.fromName(t.name()), - t -> t.searchShardTaskStats(searchShardTasks) - ) - ) + searchShardTaskTrackers.stream() + .collect(Collectors.toUnmodifiableMap(t -> TaskResourceUsageTrackerType.fromName(t.name()), t -> t.stats(searchShardTasks))) ); return new SearchBackpressureStats(searchTaskStats, searchShardTaskStats, getSettings().getMode()); diff --git a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchBackpressureSettings.java b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchBackpressureSettings.java index 3906228389729..e0cd4efd43aac 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchBackpressureSettings.java +++ b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchBackpressureSettings.java @@ -29,9 +29,13 @@ private static class Defaults { private static final long INTERVAL_MILLIS = 1000; private static final String MODE = "monitor_only"; - private static final double CANCELLATION_RATIO = 0.1; - private static final double CANCELLATION_RATE = 0.003; - private static final double CANCELLATION_BURST = 10.0; + // TODO: decide on default settings for SearchTask + private static final double CANCELLATION_RATIO_SEARCH_TASK = 0.1; + private static final double CANCELLATION_RATE_SEARCH_TASK = 0.003; + private static final double CANCELLATION_BURST_SEARCH_TASK = 10.0; + private static final double CANCELLATION_RATIO_SEARCH_SHARD_TASK = 0.1; + private static final double CANCELLATION_RATE_SEARCH_SHARD_TASK = 0.003; + private static final double CANCELLATION_BURST_SEARCH_SHARD_TASK = 10.0; } /** @@ -56,14 +60,53 @@ private static class Defaults { Setting.Property.NodeScope ); + /** + * Defines the percentage of SearchTasks to cancel relative to the number of successful SearchTask completions. + * In other words, it is the number of tokens added to the bucket on each successful SearchTask completion. + */ + private volatile double cancellationRatioSearchTask; + public static final Setting SETTING_CANCELLATION_RATIO_SEARCH_TASK = Setting.doubleSetting( + "search_backpressure.cancellation_ratio_search_task", + Defaults.CANCELLATION_RATIO_SEARCH_TASK, + 0.0, + 1.0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Defines the number of tasks to cancel per unit time (in millis). + * In other words, it is the number of tokens added to the bucket each millisecond. + */ + private volatile double cancellationRateSearchTask; + public static final Setting SETTING_CANCELLATION_RATE_SEARCH_TASK = Setting.doubleSetting( + "search_backpressure.cancellation_rate_search_task", + Defaults.CANCELLATION_RATE_SEARCH_TASK, + 0.0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Defines the maximum number of tasks that can be cancelled before being rate-limited. + */ + private volatile double cancellationBurstSearchTask; + public static final Setting SETTING_CANCELLATION_BURST_SEARCH_TASK = Setting.doubleSetting( + "search_backpressure.cancellation_burst_search_task", + Defaults.CANCELLATION_BURST_SEARCH_TASK, + 1.0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + /** * Defines the percentage of tasks to cancel relative to the number of successful task completions. * In other words, it is the number of tokens added to the bucket on each successful task completion. */ - private volatile double cancellationRatio; - public static final Setting SETTING_CANCELLATION_RATIO = Setting.doubleSetting( - "search_backpressure.cancellation_ratio", - Defaults.CANCELLATION_RATIO, + private volatile double cancellationRatioSearchShardTask; + public static final Setting SETTING_CANCELLATION_RATIO_SEARCH_SHARD_TASK = Setting.doubleSetting( + "search_backpressure.cancellation_ratio_search_shard_task", + Defaults.CANCELLATION_RATIO_SEARCH_SHARD_TASK, 0.0, 1.0, Setting.Property.Dynamic, @@ -74,10 +117,10 @@ private static class Defaults { * Defines the number of tasks to cancel per unit time (in millis). * In other words, it is the number of tokens added to the bucket each millisecond. */ - private volatile double cancellationRate; - public static final Setting SETTING_CANCELLATION_RATE = Setting.doubleSetting( - "search_backpressure.cancellation_rate", - Defaults.CANCELLATION_RATE, + private volatile double cancellationRateSearchShardTask; + public static final Setting SETTING_CANCELLATION_RATE_SEARCH_SHARD_TASK = Setting.doubleSetting( + "search_backpressure.cancellation_rate_search_shard_task", + Defaults.CANCELLATION_RATE_SEARCH_SHARD_TASK, 0.0, Setting.Property.Dynamic, Setting.Property.NodeScope @@ -86,10 +129,10 @@ private static class Defaults { /** * Defines the maximum number of tasks that can be cancelled before being rate-limited. */ - private volatile double cancellationBurst; - public static final Setting SETTING_CANCELLATION_BURST = Setting.doubleSetting( - "search_backpressure.cancellation_burst", - Defaults.CANCELLATION_BURST, + private volatile double cancellationBurstSearchShardTask; + public static final Setting SETTING_CANCELLATION_BURST_SEARCH_SHARD_TASK = Setting.doubleSetting( + "search_backpressure.cancellation_burst_search_shard_task", + Defaults.CANCELLATION_BURST_SEARCH_SHARD_TASK, 1.0, Setting.Property.Dynamic, Setting.Property.NodeScope @@ -99,11 +142,17 @@ private static class Defaults { * Callback listeners. */ public interface Listener { - void onCancellationRatioChanged(); + void onCancellationRatioSearchTaskChanged(); + + void onCancellationRateSearchTaskChanged(); - void onCancellationRateChanged(); + void onCancellationBurstSearchTaskChanged(); - void onCancellationBurstChanged(); + void onCancellationRatioSearchShardTaskChanged(); + + void onCancellationRateSearchShardTaskChanged(); + + void onCancellationBurstSearchShardTaskChanged(); } private final List listeners = new ArrayList<>(); @@ -125,14 +174,23 @@ public SearchBackpressureSettings(Settings settings, ClusterSettings clusterSett mode = SearchBackpressureMode.fromName(SETTING_MODE.get(settings)); clusterSettings.addSettingsUpdateConsumer(SETTING_MODE, s -> this.setMode(SearchBackpressureMode.fromName(s))); - cancellationRatio = SETTING_CANCELLATION_RATIO.get(settings); - clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_RATIO, this::setCancellationRatio); + cancellationRatioSearchTask = SETTING_CANCELLATION_RATIO_SEARCH_TASK.get(settings); + clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_RATIO_SEARCH_TASK, this::setCancellationRatioSearchTask); + + cancellationRateSearchTask = SETTING_CANCELLATION_RATE_SEARCH_TASK.get(settings); + clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_RATE_SEARCH_TASK, this::setCancellationRateSearchTask); - cancellationRate = SETTING_CANCELLATION_RATE.get(settings); - clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_RATE, this::setCancellationRate); + cancellationBurstSearchTask = SETTING_CANCELLATION_BURST_SEARCH_TASK.get(settings); + clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_BURST_SEARCH_TASK, this::setCancellationBurstSearchTask); - cancellationBurst = SETTING_CANCELLATION_BURST.get(settings); - clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_BURST, this::setCancellationBurst); + cancellationRatioSearchShardTask = SETTING_CANCELLATION_RATIO_SEARCH_SHARD_TASK.get(settings); + clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_RATIO_SEARCH_SHARD_TASK, this::setCancellationRatioSearchShardTask); + + cancellationRateSearchShardTask = SETTING_CANCELLATION_RATE_SEARCH_SHARD_TASK.get(settings); + clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_RATE_SEARCH_SHARD_TASK, this::setCancellationRateSearchShardTask); + + cancellationBurstSearchShardTask = SETTING_CANCELLATION_BURST_SEARCH_SHARD_TASK.get(settings); + clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_BURST_SEARCH_SHARD_TASK, this::setCancellationBurstSearchShardTask); } public void addListener(Listener listener) { @@ -171,35 +229,66 @@ public void setMode(SearchBackpressureMode mode) { this.mode = mode; } - public double getCancellationRatio() { - return cancellationRatio; + public double getCancellationRatioSearchTask() { + return cancellationRatioSearchTask; + } + + private void setCancellationRatioSearchTask(double cancellationRatioSearchTask) { + this.cancellationRatioSearchTask = cancellationRatioSearchTask; + notifyListeners(Listener::onCancellationRatioSearchTaskChanged); + } + + public double getCancellationRateSearchTask() { + return cancellationRateSearchTask; + } + + public double getCancellationRateSearchTaskNanos() { + return getCancellationRateSearchTask() / TimeUnit.MILLISECONDS.toNanos(1); // rate per nanoseconds + } + + private void setCancellationRateSearchTask(double cancellationRateSearchTask) { + this.cancellationRateSearchTask = cancellationRateSearchTask; + notifyListeners(Listener::onCancellationRateSearchTaskChanged); + } + + public double getCancellationBurstSearchTask() { + return cancellationBurstSearchTask; + } + + private void setCancellationBurstSearchTask(double cancellationBurstSearchTask) { + this.cancellationBurstSearchTask = cancellationBurstSearchTask; + notifyListeners(Listener::onCancellationBurstSearchTaskChanged); + } + + public double getCancellationRatioSearchShardTask() { + return cancellationRatioSearchShardTask; } - private void setCancellationRatio(double cancellationRatio) { - this.cancellationRatio = cancellationRatio; - notifyListeners(Listener::onCancellationRatioChanged); + private void setCancellationRatioSearchShardTask(double cancellationRatioSearchShardTask) { + this.cancellationRatioSearchShardTask = cancellationRatioSearchShardTask; + notifyListeners(Listener::onCancellationRatioSearchShardTaskChanged); } - public double getCancellationRate() { - return cancellationRate; + public double getCancellationRateSearchShardTask() { + return cancellationRateSearchShardTask; } - public double getCancellationRateNanos() { - return getCancellationRate() / TimeUnit.MILLISECONDS.toNanos(1); // rate per nanoseconds + public double getCancellationRateSearchShardTaskNanos() { + return getCancellationRateSearchShardTask() / TimeUnit.MILLISECONDS.toNanos(1); // rate per nanoseconds } - private void setCancellationRate(double cancellationRate) { - this.cancellationRate = cancellationRate; - notifyListeners(Listener::onCancellationRateChanged); + private void setCancellationRateSearchShardTask(double cancellationRateSearchShardTask) { + this.cancellationRateSearchShardTask = cancellationRateSearchShardTask; + notifyListeners(Listener::onCancellationRateSearchShardTaskChanged); } - public double getCancellationBurst() { - return cancellationBurst; + public double getCancellationBurstSearchShardTask() { + return cancellationBurstSearchShardTask; } - private void setCancellationBurst(double cancellationBurst) { - this.cancellationBurst = cancellationBurst; - notifyListeners(Listener::onCancellationBurstChanged); + private void setCancellationBurstSearchShardTask(double cancellationBurstSearchShardTask) { + this.cancellationBurstSearchShardTask = cancellationBurstSearchShardTask; + notifyListeners(Listener::onCancellationBurstSearchShardTaskChanged); } private void notifyListeners(Consumer consumer) { diff --git a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java index 7e40f1c0eab53..98599e9478a29 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java +++ b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java @@ -13,6 +13,8 @@ import org.opensearch.common.settings.Settings; import org.opensearch.monitor.jvm.JvmStats; +import java.util.concurrent.TimeUnit; + /** * Defines the settings related to the cancellation of SearchShardTasks. * @@ -23,6 +25,11 @@ public class SearchShardTaskSettings { private static class Defaults { private static final double TOTAL_HEAP_PERCENT_THRESHOLD = 0.05; + private static final long CPU_TIME_MILLIS_THRESHOLD = 15000; + private static final long ELAPSED_TIME_MILLIS_THRESHOLD = 30000; + private static final double HEAP_PERCENT_THRESHOLD = 0.005; + private static final double HEAP_VARIANCE_THRESHOLD = 2.0; + private static final int HEAP_MOVING_AVERAGE_WINDOW_SIZE = 100; } /** @@ -39,9 +46,81 @@ private static class Defaults { Setting.Property.NodeScope ); + /** + * Defines the CPU usage threshold (in millis) for an individual search shard task before it is considered for cancellation. + */ + private volatile long cpuTimeMillisThreshold; + public static final Setting SETTING_CPU_TIME_MILLIS_THRESHOLD = Setting.longSetting( + "search_backpressure.search_shard_task.cpu_time_millis_threshold", + Defaults.CPU_TIME_MILLIS_THRESHOLD, + 0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Defines the elapsed time threshold (in millis) for an individual search shard task before it is considered for cancellation. + */ + private volatile long elapsedTimeMillisThreshold; + public static final Setting SETTING_ELAPSED_TIME_MILLIS_THRESHOLD = Setting.longSetting( + "search_backpressure.search_shard_task.elapsed_time_millis_threshold", + Defaults.ELAPSED_TIME_MILLIS_THRESHOLD, + 0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Defines the heap usage threshold (in percentage) for an individual search shard task before it is considered for cancellation. + */ + private volatile double heapPercentThreshold; + public static final Setting SETTING_HEAP_PERCENT_THRESHOLD = Setting.doubleSetting( + "search_backpressure.search_shard_task.heap_percent_threshold", + Defaults.HEAP_PERCENT_THRESHOLD, + 0.0, + 1.0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Defines the heap usage variance for an individual search shard task before it is considered for cancellation. + * A task is considered for cancellation when taskHeapUsage is greater than or equal to heapUsageMovingAverage * variance. + */ + private volatile double heapVarianceThreshold; + public static final Setting SETTING_HEAP_VARIANCE_THRESHOLD = Setting.doubleSetting( + "search_backpressure.search_shard_task.heap_variance", + Defaults.HEAP_VARIANCE_THRESHOLD, + 0.0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Defines the window size to calculate the moving average of heap usage of completed search shard tasks. + */ + private volatile int heapMovingAverageWindowSize; + public static final Setting SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE = Setting.intSetting( + "search_backpressure.search_shard_task.heap_moving_average_window_size", + Defaults.HEAP_MOVING_AVERAGE_WINDOW_SIZE, + 0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + public SearchShardTaskSettings(Settings settings, ClusterSettings clusterSettings) { totalHeapPercentThreshold = SETTING_TOTAL_HEAP_PERCENT_THRESHOLD.get(settings); + this.cpuTimeMillisThreshold = SETTING_CPU_TIME_MILLIS_THRESHOLD.get(settings); + this.elapsedTimeMillisThreshold = SETTING_ELAPSED_TIME_MILLIS_THRESHOLD.get(settings); + heapPercentThreshold = SETTING_HEAP_PERCENT_THRESHOLD.get(settings); + heapVarianceThreshold = SETTING_HEAP_VARIANCE_THRESHOLD.get(settings); + heapMovingAverageWindowSize = SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE.get(settings); clusterSettings.addSettingsUpdateConsumer(SETTING_TOTAL_HEAP_PERCENT_THRESHOLD, this::setTotalHeapPercentThreshold); + clusterSettings.addSettingsUpdateConsumer(SETTING_CPU_TIME_MILLIS_THRESHOLD, this::setCpuTimeMillisThreshold); + clusterSettings.addSettingsUpdateConsumer(SETTING_ELAPSED_TIME_MILLIS_THRESHOLD, this::setElapsedTimeMillisThreshold); + clusterSettings.addSettingsUpdateConsumer(SETTING_HEAP_PERCENT_THRESHOLD, this::setHeapPercentThreshold); + clusterSettings.addSettingsUpdateConsumer(SETTING_HEAP_VARIANCE_THRESHOLD, this::setHeapVarianceThreshold); + clusterSettings.addSettingsUpdateConsumer(SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE, this::setHeapMovingAverageWindowSize); } public double getTotalHeapPercentThreshold() { @@ -52,7 +131,47 @@ public long getTotalHeapBytesThreshold() { return (long) (HEAP_SIZE_BYTES * getTotalHeapPercentThreshold()); } - private void setTotalHeapPercentThreshold(double totalHeapPercentThreshold) { + public long getCpuTimeNanosThreshold() { + return TimeUnit.MILLISECONDS.toNanos(cpuTimeMillisThreshold); + } + + public long getElapsedTimeNanosThreshold() { + return TimeUnit.MILLISECONDS.toNanos(elapsedTimeMillisThreshold); + } + + public long getHeapBytesThreshold() { + return (long) (HEAP_SIZE_BYTES * heapPercentThreshold); + } + + public double getHeapVarianceThreshold() { + return heapVarianceThreshold; + } + + public int getHeapMovingAverageWindowSize() { + return heapMovingAverageWindowSize; + } + + public void setTotalHeapPercentThreshold(double totalHeapPercentThreshold) { this.totalHeapPercentThreshold = totalHeapPercentThreshold; } + + public void setCpuTimeMillisThreshold(long cpuTimeMillisThreshold) { + this.cpuTimeMillisThreshold = cpuTimeMillisThreshold; + } + + public void setElapsedTimeMillisThreshold(long elapsedTimeMillisThreshold) { + this.elapsedTimeMillisThreshold = elapsedTimeMillisThreshold; + } + + public void setHeapPercentThreshold(double heapPercentThreshold) { + this.heapPercentThreshold = heapPercentThreshold; + } + + public void setHeapVarianceThreshold(double heapVarianceThreshold) { + this.heapVarianceThreshold = heapVarianceThreshold; + } + + public void setHeapMovingAverageWindowSize(int heapMovingAverageWindowSize) { + this.heapMovingAverageWindowSize = heapMovingAverageWindowSize; + } } diff --git a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java index f28cdd17a3cff..3b89ba7f3492d 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java +++ b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java @@ -8,11 +8,14 @@ package org.opensearch.search.backpressure.settings; +import org.apache.logging.log4j.LogManager; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; import org.opensearch.monitor.jvm.JvmStats; +import java.util.concurrent.TimeUnit; + /** * Defines the settings related to the cancellation of SearchTasks. * @@ -24,6 +27,11 @@ public class SearchTaskSettings { private static class Defaults { private static final double TOTAL_HEAP_PERCENT_THRESHOLD = 0.05; + private static final long CPU_TIME_MILLIS_THRESHOLD = 60000; + private static final long ELAPSED_TIME_MILLIS_THRESHOLD = 120000; + private static final double HEAP_PERCENT_THRESHOLD = 0.02; + private static final double HEAP_VARIANCE_THRESHOLD = 2.0; + private static final int HEAP_MOVING_AVERAGE_WINDOW_SIZE = 100; } /** @@ -31,7 +39,7 @@ private static class Defaults { * before in-flight cancellation is applied. */ private volatile double totalHeapPercentThreshold; - public static final Setting SETTING_TOTAL_HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY = Setting.doubleSetting( + public static final Setting SETTING_TOTAL_HEAP_PERCENT_THRESHOLD = Setting.doubleSetting( "search_backpressure.search_task.total_heap_percent_threshold", Defaults.TOTAL_HEAP_PERCENT_THRESHOLD, 0.0, @@ -40,12 +48,81 @@ private static class Defaults { Setting.Property.NodeScope ); + /** + * Defines the CPU usage threshold (in millis) for an individual search task before it is considered for cancellation. + */ + private volatile long cpuTimeMillisThreshold; + public static final Setting SETTING_CPU_TIME_MILLIS_THRESHOLD = Setting.longSetting( + "search_backpressure.search_task.cpu_time_millis_threshold", + Defaults.CPU_TIME_MILLIS_THRESHOLD, + 0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Defines the elapsed time threshold (in millis) for an individual search task before it is considered for cancellation. + */ + private volatile long elapsedTimeMillisThreshold; + public static final Setting SETTING_ELAPSED_TIME_MILLIS_THRESHOLD = Setting.longSetting( + "search_backpressure.search_task.elapsed_time_millis_threshold", + Defaults.ELAPSED_TIME_MILLIS_THRESHOLD, + 0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Defines the heap usage threshold (in percentage) for an individual search task before it is considered for cancellation. + */ + private volatile double heapPercentThreshold; + public static final Setting SETTING_HEAP_PERCENT_THRESHOLD = Setting.doubleSetting( + "search_backpressure.search_task.heap_percent_threshold", + Defaults.HEAP_PERCENT_THRESHOLD, + 0.0, + 1.0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Defines the heap usage variance for an individual search task before it is considered for cancellation. + * A task is considered for cancellation when taskHeapUsage is greater than or equal to heapUsageMovingAverage * variance. + */ + private volatile double heapVarianceThreshold; + public static final Setting SETTING_HEAP_VARIANCE_THRESHOLD = Setting.doubleSetting( + "search_backpressure.search_task.heap_variance", + Defaults.HEAP_VARIANCE_THRESHOLD, + 0.0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Defines the window size to calculate the moving average of heap usage of completed search tasks. + */ + private volatile int heapMovingAverageWindowSize; + public static final Setting SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE = Setting.intSetting( + "search_backpressure.search_task.heap_moving_average_window_size", + Defaults.HEAP_MOVING_AVERAGE_WINDOW_SIZE, + 0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + public SearchTaskSettings(Settings settings, ClusterSettings clusterSettings) { - totalHeapPercentThreshold = SETTING_TOTAL_HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY.get(settings); - clusterSettings.addSettingsUpdateConsumer( - SETTING_TOTAL_HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY, - this::setTotalHeapPercentThreshold - ); + this.totalHeapPercentThreshold = SETTING_TOTAL_HEAP_PERCENT_THRESHOLD.get(settings); + this.cpuTimeMillisThreshold = SETTING_CPU_TIME_MILLIS_THRESHOLD.get(settings); + this.elapsedTimeMillisThreshold = SETTING_ELAPSED_TIME_MILLIS_THRESHOLD.get(settings); + this.heapPercentThreshold = SETTING_HEAP_PERCENT_THRESHOLD.get(settings); + this.heapVarianceThreshold = SETTING_HEAP_VARIANCE_THRESHOLD.get(settings); + this.heapMovingAverageWindowSize = SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE.get(settings); + clusterSettings.addSettingsUpdateConsumer(SETTING_TOTAL_HEAP_PERCENT_THRESHOLD, this::setTotalHeapPercentThreshold); + clusterSettings.addSettingsUpdateConsumer(SETTING_CPU_TIME_MILLIS_THRESHOLD, this::setCpuTimeMillisThreshold); + clusterSettings.addSettingsUpdateConsumer(SETTING_ELAPSED_TIME_MILLIS_THRESHOLD, this::setElapsedTimeMillisThreshold); + clusterSettings.addSettingsUpdateConsumer(SETTING_HEAP_PERCENT_THRESHOLD, this::setHeapPercentThreshold); + clusterSettings.addSettingsUpdateConsumer(SETTING_HEAP_VARIANCE_THRESHOLD, this::setHeapVarianceThreshold); + clusterSettings.addSettingsUpdateConsumer(SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE, this::setHeapMovingAverageWindowSize); } public double getTotalHeapPercentThreshold() { @@ -56,7 +133,48 @@ public long getTotalHeapBytesThreshold() { return (long) (HEAP_SIZE_BYTES * getTotalHeapPercentThreshold()); } - private void setTotalHeapPercentThreshold(double totalHeapPercentThreshold) { + public long getCpuTimeNanosThreshold() { + return TimeUnit.MILLISECONDS.toNanos(cpuTimeMillisThreshold); + } + + public long getElapsedTimeNanosThreshold() { + return TimeUnit.MILLISECONDS.toNanos(elapsedTimeMillisThreshold); + } + + public long getHeapBytesThreshold() { + return (long) (HEAP_SIZE_BYTES * heapPercentThreshold); + } + + public double getHeapVarianceThreshold() { + return heapVarianceThreshold; + } + + public int getHeapMovingAverageWindowSize() { + return heapMovingAverageWindowSize; + } + + public void setTotalHeapPercentThreshold(double totalHeapPercentThreshold) { this.totalHeapPercentThreshold = totalHeapPercentThreshold; } + + public void setCpuTimeMillisThreshold(long cpuTimeMillisThreshold) { + LogManager.getLogger(SearchTaskSettings.class).info("setCpuTimeMillisThreshold " + cpuTimeMillisThreshold); + this.cpuTimeMillisThreshold = cpuTimeMillisThreshold; + } + + public void setElapsedTimeMillisThreshold(long elapsedTimeMillisThreshold) { + this.elapsedTimeMillisThreshold = elapsedTimeMillisThreshold; + } + + public void setHeapPercentThreshold(double heapPercentThreshold) { + this.heapPercentThreshold = heapPercentThreshold; + } + + public void setHeapVarianceThreshold(double heapVarianceThreshold) { + this.heapVarianceThreshold = heapVarianceThreshold; + } + + public void setHeapMovingAverageWindowSize(int heapMovingAverageWindowSize) { + this.heapMovingAverageWindowSize = heapMovingAverageWindowSize; + } } diff --git a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java index 80ed849400d8d..bd5f24ef0dbee 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java +++ b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java @@ -23,13 +23,13 @@ * Stats related to search backpressure. */ public class SearchBackpressureStats implements ToXContentFragment, Writeable { - private final SearchTaskStats searchTaskStats; - private final SearchShardTaskStats searchShardTaskStats; + private final SearchBackpressureTaskStats searchTaskStats; + private final SearchBackpressureTaskStats searchShardTaskStats; private final SearchBackpressureMode mode; public SearchBackpressureStats( - SearchTaskStats searchTaskStats, - SearchShardTaskStats searchShardTaskStats, + SearchBackpressureTaskStats searchTaskStats, + SearchBackpressureTaskStats searchShardTaskStats, SearchBackpressureMode mode ) { this.searchTaskStats = searchTaskStats; @@ -38,10 +38,10 @@ public SearchBackpressureStats( } public SearchBackpressureStats(StreamInput in) throws IOException { - searchShardTaskStats = new SearchShardTaskStats(in); + searchShardTaskStats = new SearchBackpressureTaskStats(in); mode = SearchBackpressureMode.fromName(in.readString()); if (in.getVersion().onOrAfter(Version.V_3_0_0)) { - searchTaskStats = new SearchTaskStats(in); + searchTaskStats = new SearchBackpressureTaskStats(in); } else { searchTaskStats = null; } @@ -60,7 +60,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws public void writeTo(StreamOutput out) throws IOException { searchShardTaskStats.writeTo(out); out.writeString(mode.getName()); - if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + if (Version.CURRENT.onOrAfter(Version.V_3_0_0) && out.getVersion().onOrAfter(Version.V_3_0_0)) { searchTaskStats.writeTo(out); } } @@ -70,7 +70,9 @@ public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; SearchBackpressureStats that = (SearchBackpressureStats) o; - return searchTaskStats.equals(that.searchTaskStats) && searchShardTaskStats.equals(that.searchShardTaskStats) && mode == that.mode; + return (Version.CURRENT.onOrAfter(Version.V_3_0_0) + && searchTaskStats.equals(that.searchTaskStats) + && searchShardTaskStats.equals(that.searchShardTaskStats)) && mode == that.mode; } @Override diff --git a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchShardTaskStats.java b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureTaskStats.java similarity index 80% rename from server/src/main/java/org/opensearch/search/backpressure/stats/SearchShardTaskStats.java rename to server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureTaskStats.java index 4d532cfb12f80..f6925c4c6bc8c 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchShardTaskStats.java +++ b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureTaskStats.java @@ -27,12 +27,12 @@ /** * Stats related to cancelled search shard tasks. */ -public class SearchShardTaskStats implements ToXContentObject, Writeable { +public class SearchBackpressureTaskStats implements ToXContentObject, Writeable { private final long cancellationCount; private final long limitReachedCount; private final Map resourceUsageTrackerStats; - public SearchShardTaskStats( + public SearchBackpressureTaskStats( long cancellationCount, long limitReachedCount, Map resourceUsageTrackerStats @@ -42,7 +42,7 @@ public SearchShardTaskStats( this.resourceUsageTrackerStats = resourceUsageTrackerStats; } - public SearchShardTaskStats(StreamInput in) throws IOException { + public SearchBackpressureTaskStats(StreamInput in) throws IOException { this.cancellationCount = in.readVLong(); this.limitReachedCount = in.readVLong(); @@ -85,10 +85,25 @@ public void writeTo(StreamOutput out) throws IOException { public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; - SearchShardTaskStats that = (SearchShardTaskStats) o; + SearchBackpressureTaskStats that = (SearchBackpressureTaskStats) o; return cancellationCount == that.cancellationCount && limitReachedCount == that.limitReachedCount - && resourceUsageTrackerStats.equals(that.resourceUsageTrackerStats); + && compareMaps(resourceUsageTrackerStats, that.resourceUsageTrackerStats); + } + + private boolean compareMaps( + Map trackers1, + Map trackers2 + ) { + if (trackers1.size() != trackers2.size()) { + return false; + } + for (Map.Entry e1 : trackers1.entrySet()) { + if (trackers2.containsKey(e1.getKey()) == false || trackers2.get(e1.getKey()).equals(e1.getValue()) == false) { + return false; + } + } + return true; } @Override diff --git a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java deleted file mode 100644 index 87318a60b46fd..0000000000000 --- a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java +++ /dev/null @@ -1,100 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.search.backpressure.stats; - -import org.opensearch.common.collect.MapBuilder; -import org.opensearch.common.io.stream.StreamInput; -import org.opensearch.common.io.stream.StreamOutput; -import org.opensearch.common.io.stream.Writeable; -import org.opensearch.common.xcontent.ToXContent; -import org.opensearch.common.xcontent.ToXContentObject; -import org.opensearch.common.xcontent.XContentBuilder; -import org.opensearch.search.backpressure.trackers.CpuUsageTracker; -import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; -import org.opensearch.search.backpressure.trackers.HeapUsageTracker; -import org.opensearch.search.backpressure.trackers.TaskResourceUsageTracker; -import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType; - -import java.io.IOException; -import java.util.Map; -import java.util.Objects; - -/** - * Stats related to cancelled search tasks. - */ - -public class SearchTaskStats implements ToXContentObject, Writeable { - private final long cancellationCount; - private final long limitReachedCount; - private final Map resourceUsageTrackerStats; - - public SearchTaskStats( - long cancellationCount, - long limitReachedCount, - Map resourceUsageTrackerStats - ) { - this.cancellationCount = cancellationCount; - this.limitReachedCount = limitReachedCount; - this.resourceUsageTrackerStats = resourceUsageTrackerStats; - } - - public SearchTaskStats(StreamInput in) throws IOException { - this.cancellationCount = in.readVLong(); - this.limitReachedCount = in.readVLong(); - - MapBuilder builder = new MapBuilder<>(); - builder.put(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, in.readOptionalWriteable(CpuUsageTracker.Stats::new)); - builder.put(TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER, in.readOptionalWriteable(HeapUsageTracker.Stats::new)); - builder.put(TaskResourceUsageTrackerType.ELAPSED_TIME_TRACKER, in.readOptionalWriteable(ElapsedTimeTracker.Stats::new)); - this.resourceUsageTrackerStats = builder.immutableMap(); - } - - @Override - public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params params) throws IOException { - builder.startObject(); - - builder.startObject("resource_tracker_stats"); - for (Map.Entry entry : resourceUsageTrackerStats.entrySet()) { - builder.field(entry.getKey().getName(), entry.getValue()); - } - builder.endObject(); - - builder.startObject("cancellation_stats") - .field("cancellation_count", cancellationCount) - .field("cancellation_limit_reached_count", limitReachedCount) - .endObject(); - - return builder.endObject(); - } - - @Override - public void writeTo(StreamOutput out) throws IOException { - out.writeVLong(cancellationCount); - out.writeVLong(limitReachedCount); - - out.writeOptionalWriteable(resourceUsageTrackerStats.get(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER)); - out.writeOptionalWriteable(resourceUsageTrackerStats.get(TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER)); - out.writeOptionalWriteable(resourceUsageTrackerStats.get(TaskResourceUsageTrackerType.ELAPSED_TIME_TRACKER)); - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - SearchTaskStats that = (SearchTaskStats) o; - return cancellationCount == that.cancellationCount - && limitReachedCount == that.limitReachedCount - && resourceUsageTrackerStats.equals(that.resourceUsageTrackerStats); - } - - @Override - public int hashCode() { - return Objects.hash(cancellationCount, limitReachedCount, resourceUsageTrackerStats); - } -} diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/CpuUsageTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/CpuUsageTracker.java index 1e332eca2649c..5215a17a61a8c 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/CpuUsageTracker.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/CpuUsageTracker.java @@ -8,12 +8,9 @@ package org.opensearch.search.backpressure.trackers; -import org.opensearch.action.search.SearchTask; -import org.opensearch.common.settings.Setting; import org.opensearch.common.io.stream.StreamInput; import org.opensearch.common.io.stream.StreamOutput; import org.opensearch.common.unit.TimeValue; -import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; import org.opensearch.common.xcontent.XContentBuilder; import org.opensearch.tasks.Task; import org.opensearch.tasks.TaskCancellation; @@ -23,6 +20,7 @@ import java.util.Objects; import java.util.Optional; import java.util.concurrent.TimeUnit; +import java.util.function.LongSupplier; import static org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType.CPU_USAGE_TRACKER; @@ -32,41 +30,11 @@ * @opensearch.internal */ public class CpuUsageTracker extends TaskResourceUsageTracker { - private static class Defaults { - private static final long CPU_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY = 60000; - private static final long CPU_TIME_MILLIS_THRESHOLD = 15000; - } - /** - * Defines the CPU usage threshold (in millis) for an individual search task before it is considered for cancellation. - */ - private volatile long cpuTimeMillisThresholdForSearchQuery; - public static final Setting SETTING_CPU_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY = Setting.longSetting( - "search_backpressure.search_task.cpu_time_millis_threshold_for_search_query", - Defaults.CPU_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY, - 0, - Setting.Property.Dynamic, - Setting.Property.NodeScope - ); + private final LongSupplier thresholdSupplier; - /** - * Defines the CPU usage threshold (in millis) for an individual search shard task before it is considered for cancellation. - */ - private volatile long cpuTimeMillisThreshold; - public static final Setting SETTING_CPU_TIME_MILLIS_THRESHOLD = Setting.longSetting( - "search_backpressure.search_shard_task.cpu_time_millis_threshold", - Defaults.CPU_TIME_MILLIS_THRESHOLD, - 0, - Setting.Property.Dynamic, - Setting.Property.NodeScope - ); - - public CpuUsageTracker(SearchBackpressureSettings settings) { - this.cpuTimeMillisThresholdForSearchQuery = SETTING_CPU_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY.get(settings.getSettings()); - this.cpuTimeMillisThreshold = SETTING_CPU_TIME_MILLIS_THRESHOLD.get(settings.getSettings()); - settings.getClusterSettings() - .addSettingsUpdateConsumer(SETTING_CPU_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY, this::setCpuTimeMillisThresholdForSearchQuery); - settings.getClusterSettings().addSettingsUpdateConsumer(SETTING_CPU_TIME_MILLIS_THRESHOLD, this::setCpuTimeMillisThreshold); + public CpuUsageTracker(LongSupplier thresholdSupplier) { + this.thresholdSupplier = thresholdSupplier; } @Override @@ -77,7 +45,7 @@ public String name() { @Override public Optional checkAndMaybeGetCancellationReason(Task task) { long usage = task.getTotalResourceStats().getCpuTimeInNanos(); - long threshold = (task instanceof SearchTask) ? getCpuTimeNanosThresholdForSearchQuery() : getCpuTimeNanosThreshold(); + long threshold = thresholdSupplier.getAsLong(); if (usage < threshold) { return Optional.empty(); @@ -95,37 +63,11 @@ public Optional checkAndMaybeGetCancellationReason(Task ); } - public long getCpuTimeNanosThresholdForSearchQuery() { - return TimeUnit.MILLISECONDS.toNanos(cpuTimeMillisThresholdForSearchQuery); - } - - public long getCpuTimeNanosThreshold() { - return TimeUnit.MILLISECONDS.toNanos(cpuTimeMillisThreshold); - } - - public void setCpuTimeMillisThresholdForSearchQuery(long cpuTimeMillisThresholdForSearchQuery) { - this.cpuTimeMillisThresholdForSearchQuery = cpuTimeMillisThresholdForSearchQuery; - } - - public void setCpuTimeMillisThreshold(long cpuTimeMillisThreshold) { - this.cpuTimeMillisThreshold = cpuTimeMillisThreshold; - } - - @Override - public TaskResourceUsageTracker.Stats searchTaskStats(List searchTasks) { - long currentMax = searchTasks.stream().mapToLong(t -> t.getTotalResourceStats().getCpuTimeInNanos()).max().orElse(0); - long currentAvg = (long) searchTasks.stream().mapToLong(t -> t.getTotalResourceStats().getCpuTimeInNanos()).average().orElse(0); - return new Stats(getSearchTaskCancellationCount(), currentMax, currentAvg); - } - @Override - public TaskResourceUsageTracker.Stats searchShardTaskStats(List searchShardTasks) { - long currentMax = searchShardTasks.stream().mapToLong(t -> t.getTotalResourceStats().getCpuTimeInNanos()).max().orElse(0); - long currentAvg = (long) searchShardTasks.stream() - .mapToLong(t -> t.getTotalResourceStats().getCpuTimeInNanos()) - .average() - .orElse(0); - return new Stats(getSearchShardTaskCancellationCount(), currentMax, currentAvg); + public TaskResourceUsageTracker.Stats stats(List tasks) { + long currentMax = tasks.stream().mapToLong(t -> t.getTotalResourceStats().getCpuTimeInNanos()).max().orElse(0); + long currentAvg = (long) tasks.stream().mapToLong(t -> t.getTotalResourceStats().getCpuTimeInNanos()).average().orElse(0); + return new Stats(getCancellations(), currentMax, currentAvg); } /** diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTracker.java index 3b1b904178b2c..d1700861476d6 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTracker.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTracker.java @@ -8,12 +8,9 @@ package org.opensearch.search.backpressure.trackers; -import org.opensearch.action.search.SearchTask; -import org.opensearch.common.settings.Setting; import org.opensearch.common.io.stream.StreamInput; import org.opensearch.common.io.stream.StreamOutput; import org.opensearch.common.unit.TimeValue; -import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; import org.opensearch.common.xcontent.XContentBuilder; import org.opensearch.tasks.Task; import org.opensearch.tasks.TaskCancellation; @@ -33,47 +30,12 @@ * @opensearch.internal */ public class ElapsedTimeTracker extends TaskResourceUsageTracker { - private static class Defaults { - private static final long ELAPSED_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY = 120000; - private static final long ELAPSED_TIME_MILLIS_THRESHOLD = 30000; - } - - /** - * Defines the elapsed time threshold (in millis) for an individual search task before it is considered for cancellation. - */ - private volatile long elapsedTimeMillisThresholdForSearchQuery; - public static final Setting SETTING_ELAPSED_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY = Setting.longSetting( - "search_backpressure.search_task.elapsed_time_millis_threshold_for_search_query", - Defaults.ELAPSED_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY, - 0, - Setting.Property.Dynamic, - Setting.Property.NodeScope - ); - - /** - * Defines the elapsed time threshold (in millis) for an individual search shard task before it is considered for cancellation. - */ - private volatile long elapsedTimeMillisThreshold; - public static final Setting SETTING_ELAPSED_TIME_MILLIS_THRESHOLD = Setting.longSetting( - "search_backpressure.search_shard_task.elapsed_time_millis_threshold", - Defaults.ELAPSED_TIME_MILLIS_THRESHOLD, - 0, - Setting.Property.Dynamic, - Setting.Property.NodeScope - ); - + private final LongSupplier thresholdSupplier; private final LongSupplier timeNanosSupplier; - public ElapsedTimeTracker(SearchBackpressureSettings settings, LongSupplier timeNanosSupplier) { + public ElapsedTimeTracker(LongSupplier thresholdSupplier, LongSupplier timeNanosSupplier) { + this.thresholdSupplier = thresholdSupplier; this.timeNanosSupplier = timeNanosSupplier; - this.elapsedTimeMillisThresholdForSearchQuery = SETTING_ELAPSED_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY.get(settings.getSettings()); - this.elapsedTimeMillisThreshold = SETTING_ELAPSED_TIME_MILLIS_THRESHOLD.get(settings.getSettings()); - settings.getClusterSettings() - .addSettingsUpdateConsumer( - SETTING_ELAPSED_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY, - this::setElapsedTimeMillisThresholdForSearchQuery - ); - settings.getClusterSettings().addSettingsUpdateConsumer(SETTING_ELAPSED_TIME_MILLIS_THRESHOLD, this::setElapsedTimeMillisThreshold); } @Override @@ -84,7 +46,7 @@ public String name() { @Override public Optional checkAndMaybeGetCancellationReason(Task task) { long usage = timeNanosSupplier.getAsLong() - task.getStartTimeNanos(); - long threshold = (task instanceof SearchTask) ? getElapsedTimeNanosThresholdForSearchQuery() : getElapsedTimeNanosThreshold(); + long threshold = thresholdSupplier.getAsLong(); if (usage < threshold) { return Optional.empty(); @@ -102,36 +64,12 @@ public Optional checkAndMaybeGetCancellationReason(Task ); } - public long getElapsedTimeNanosThresholdForSearchQuery() { - return TimeUnit.MILLISECONDS.toNanos(elapsedTimeMillisThresholdForSearchQuery); - } - - public long getElapsedTimeNanosThreshold() { - return TimeUnit.MILLISECONDS.toNanos(elapsedTimeMillisThreshold); - } - - public void setElapsedTimeMillisThresholdForSearchQuery(long elapsedTimeMillisThresholdForSearchQuery) { - this.elapsedTimeMillisThresholdForSearchQuery = elapsedTimeMillisThresholdForSearchQuery; - } - - public void setElapsedTimeMillisThreshold(long elapsedTimeMillisThreshold) { - this.elapsedTimeMillisThreshold = elapsedTimeMillisThreshold; - } - - @Override - public TaskResourceUsageTracker.Stats searchTaskStats(List searchTasks) { - long now = timeNanosSupplier.getAsLong(); - long currentMax = searchTasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).max().orElse(0); - long currentAvg = (long) searchTasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).average().orElse(0); - return new Stats(getSearchTaskCancellationCount(), currentMax, currentAvg); - } - @Override - public TaskResourceUsageTracker.Stats searchShardTaskStats(List searchShardTasks) { + public TaskResourceUsageTracker.Stats stats(List tasks) { long now = timeNanosSupplier.getAsLong(); - long currentMax = searchShardTasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).max().orElse(0); - long currentAvg = (long) searchShardTasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).average().orElse(0); - return new Stats(getSearchShardTaskCancellationCount(), currentMax, currentAvg); + long currentMax = tasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).max().orElse(0); + long currentAvg = (long) tasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).average().orElse(0); + return new Stats(getCancellations(), currentMax, currentAvg); } /** diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java index de96f13891112..915b6ed60f685 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java @@ -8,15 +8,15 @@ package org.opensearch.search.backpressure.trackers; -import org.opensearch.action.search.SearchTask; -import org.opensearch.common.settings.Setting; +import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.io.stream.StreamInput; import org.opensearch.common.io.stream.StreamOutput; import org.opensearch.common.unit.ByteSizeValue; import org.opensearch.common.util.MovingAverage; import org.opensearch.monitor.jvm.JvmStats; -import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; import org.opensearch.common.xcontent.XContentBuilder; +import org.opensearch.search.backpressure.settings.SearchShardTaskSettings; +import org.opensearch.search.backpressure.settings.SearchTaskSettings; import org.opensearch.tasks.Task; import org.opensearch.tasks.TaskCancellation; @@ -25,6 +25,9 @@ import java.util.Objects; import java.util.Optional; import java.util.concurrent.atomic.AtomicReference; +import java.util.function.DoubleSupplier; +import java.util.function.IntSupplier; +import java.util.function.LongSupplier; import static org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER; @@ -36,120 +39,25 @@ */ public class HeapUsageTracker extends TaskResourceUsageTracker { private static final long HEAP_SIZE_BYTES = JvmStats.jvmStats().getMem().getHeapMax().getBytes(); - - private static class Defaults { - private static final double HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY = 0.02; - private static final double HEAP_PERCENT_THRESHOLD = 0.005; - private static final double HEAP_VARIANCE_THRESHOLD_FOR_SEARCH_QUERY = 2.0; - private static final double HEAP_VARIANCE_THRESHOLD = 2.0; - private static final int HEAP_MOVING_AVERAGE_WINDOW_SIZE_FOR_SEARCH_QUERY = 100; - private static final int HEAP_MOVING_AVERAGE_WINDOW_SIZE = 100; - } - - /** - * Defines the heap usage threshold (in percentage) for an individual search task before it is considered for cancellation. - */ - private volatile double heapPercentThresholdForSearchQuery; - public static final Setting SETTING_HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY = Setting.doubleSetting( - "search_backpressure.search_task.heap_percent_threshold_for_search_query", - Defaults.HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY, - 0.0, - 1.0, - Setting.Property.Dynamic, - Setting.Property.NodeScope - ); - - /** - * Defines the heap usage threshold (in percentage) for an individual search shard task before it is considered for cancellation. - */ - private volatile double heapPercentThreshold; - public static final Setting SETTING_HEAP_PERCENT_THRESHOLD = Setting.doubleSetting( - "search_backpressure.search_shard_task.heap_percent_threshold", - Defaults.HEAP_PERCENT_THRESHOLD, - 0.0, - 1.0, - Setting.Property.Dynamic, - Setting.Property.NodeScope - ); - - /** - * Defines the heap usage variance for an individual search task before it is considered for cancellation. - * A task is considered for cancellation when taskHeapUsage is greater than or equal to heapUsageMovingAverage * variance. - */ - private volatile double heapVarianceThresholdForSearchQuery; - public static final Setting SETTING_HEAP_VARIANCE_THRESHOLD_FOR_SEARCH_QUERY = Setting.doubleSetting( - "search_backpressure.search_task.heap_variance_for_search_query", - Defaults.HEAP_VARIANCE_THRESHOLD_FOR_SEARCH_QUERY, - 0.0, - Setting.Property.Dynamic, - Setting.Property.NodeScope - ); - - /** - * Defines the heap usage variance for an individual search shard task before it is considered for cancellation. - * A task is considered for cancellation when taskHeapUsage is greater than or equal to heapUsageMovingAverage * variance. - */ - private volatile double heapVarianceThreshold; - public static final Setting SETTING_HEAP_VARIANCE_THRESHOLD = Setting.doubleSetting( - "search_backpressure.search_shard_task.heap_variance", - Defaults.HEAP_VARIANCE_THRESHOLD, - 0.0, - Setting.Property.Dynamic, - Setting.Property.NodeScope - ); - - /** - * Defines the window size to calculate the moving average of heap usage of completed search tasks. - */ - private volatile int heapMovingAverageWindowSizeForSearchQuery; - public static final Setting SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE_FOR_SEARCH_QUERY = Setting.intSetting( - "search_backpressure.search_task.heap_moving_average_window_size_for_search_query", - Defaults.HEAP_MOVING_AVERAGE_WINDOW_SIZE_FOR_SEARCH_QUERY, - 0, - Setting.Property.Dynamic, - Setting.Property.NodeScope - ); - - /** - * Defines the window size to calculate the moving average of heap usage of completed search shard tasks. - */ - private volatile int heapMovingAverageWindowSize; - public static final Setting SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE = Setting.intSetting( - "search_backpressure.search_shard_task.heap_moving_average_window_size", - Defaults.HEAP_MOVING_AVERAGE_WINDOW_SIZE, - 0, - Setting.Property.Dynamic, - Setting.Property.NodeScope - ); - - private final AtomicReference movingAverageReferenceForSearchQuery; + private final DoubleSupplier heapVarianceSupplier; + private final LongSupplier heapBytesThresholdSupplier; + private final IntSupplier windowSizeSupplier; private final AtomicReference movingAverageReference; - public HeapUsageTracker(SearchBackpressureSettings settings) { - heapPercentThresholdForSearchQuery = SETTING_HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY.get(settings.getSettings()); - settings.getClusterSettings() - .addSettingsUpdateConsumer(SETTING_HEAP_PERCENT_THRESHOLD_FOR_SEARCH_QUERY, this::setHeapPercentThresholdForSearchQuery); - heapPercentThreshold = SETTING_HEAP_PERCENT_THRESHOLD.get(settings.getSettings()); - settings.getClusterSettings().addSettingsUpdateConsumer(SETTING_HEAP_PERCENT_THRESHOLD, this::setHeapPercentThreshold); - - heapVarianceThresholdForSearchQuery = SETTING_HEAP_VARIANCE_THRESHOLD_FOR_SEARCH_QUERY.get(settings.getSettings()); - settings.getClusterSettings() - .addSettingsUpdateConsumer(SETTING_HEAP_VARIANCE_THRESHOLD_FOR_SEARCH_QUERY, this::setHeapVarianceThresholdForSearchQuery); - heapVarianceThreshold = SETTING_HEAP_VARIANCE_THRESHOLD.get(settings.getSettings()); - settings.getClusterSettings().addSettingsUpdateConsumer(SETTING_HEAP_VARIANCE_THRESHOLD, this::setHeapVarianceThreshold); - - heapMovingAverageWindowSizeForSearchQuery = SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE_FOR_SEARCH_QUERY.get(settings.getSettings()); - settings.getClusterSettings() - .addSettingsUpdateConsumer( - SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE_FOR_SEARCH_QUERY, - this::setHeapMovingAverageWindowSizeForSearchQuery - ); - heapMovingAverageWindowSize = SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE.get(settings.getSettings()); - settings.getClusterSettings() - .addSettingsUpdateConsumer(SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE, this::setHeapMovingAverageWindowSize); - - this.movingAverageReferenceForSearchQuery = new AtomicReference<>(new MovingAverage(heapMovingAverageWindowSizeForSearchQuery)); - this.movingAverageReference = new AtomicReference<>(new MovingAverage(heapMovingAverageWindowSize)); + public HeapUsageTracker( + DoubleSupplier heapVarianceSupplier, + LongSupplier heapBytesThresholdSupplier, + IntSupplier windowSizeSupplier, + ClusterSettings clusterSettings + ) { + this.heapVarianceSupplier = heapVarianceSupplier; + this.heapBytesThresholdSupplier = heapBytesThresholdSupplier; + this.windowSizeSupplier = windowSizeSupplier; + this.movingAverageReference = new AtomicReference<>(new MovingAverage(windowSizeSupplier.getAsInt())); + // TODO: find a way to get the type of the setting SearchTaskSettings/SearchShardTaskSettings and then add consumer only for the + // required setting + clusterSettings.addSettingsUpdateConsumer(SearchTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE, this::updateWindowSize); + clusterSettings.addSettingsUpdateConsumer(SearchShardTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE, this::updateWindowSize); } @Override @@ -159,18 +67,12 @@ public String name() { @Override public void update(Task task) { - if (task instanceof SearchTask) { - movingAverageReferenceForSearchQuery.get().record(task.getTotalResourceStats().getMemoryInBytes()); - } else { - movingAverageReference.get().record(task.getTotalResourceStats().getMemoryInBytes()); - } + movingAverageReference.get().record(task.getTotalResourceStats().getMemoryInBytes()); } @Override public Optional checkAndMaybeGetCancellationReason(Task task) { - MovingAverage movingAverage = (task instanceof SearchTask) - ? movingAverageReferenceForSearchQuery.get() - : movingAverageReference.get(); + MovingAverage movingAverage = movingAverageReference.get(); // There haven't been enough measurements. if (movingAverage.isReady() == false) { @@ -179,9 +81,9 @@ public Optional checkAndMaybeGetCancellationReason(Task double currentUsage = task.getTotalResourceStats().getMemoryInBytes(); double averageUsage = movingAverage.getAverage(); - double variance = (task instanceof SearchTask) ? getHeapVarianceThresholdForSearchQuery() : getHeapVarianceThreshold(); + double variance = heapVarianceSupplier.getAsDouble(); double allowedUsage = averageUsage * variance; - double threshold = (task instanceof SearchTask) ? getHeapBytesThresholdForSearchQuery() : getHeapBytesThreshold(); + double threshold = heapBytesThresholdSupplier.getAsLong(); if (currentUsage < threshold || currentUsage < allowedUsage) { return Optional.empty(); @@ -195,65 +97,15 @@ public Optional checkAndMaybeGetCancellationReason(Task ); } - public long getHeapBytesThresholdForSearchQuery() { - return (long) (HEAP_SIZE_BYTES * heapPercentThresholdForSearchQuery); - } - - public long getHeapBytesThreshold() { - return (long) (HEAP_SIZE_BYTES * heapPercentThreshold); - } - - public void setHeapPercentThresholdForSearchQuery(double heapPercentThresholdForSearchQuery) { - this.heapPercentThresholdForSearchQuery = heapPercentThresholdForSearchQuery; - } - - public void setHeapPercentThreshold(double heapPercentThreshold) { - this.heapPercentThreshold = heapPercentThreshold; - } - - public double getHeapVarianceThresholdForSearchQuery() { - return heapVarianceThresholdForSearchQuery; - } - - public double getHeapVarianceThreshold() { - return heapVarianceThreshold; - } - - public void setHeapVarianceThresholdForSearchQuery(double heapVarianceThresholdForSearchQuery) { - this.heapVarianceThresholdForSearchQuery = heapVarianceThresholdForSearchQuery; - } - - public void setHeapVarianceThreshold(double heapVarianceThreshold) { - this.heapVarianceThreshold = heapVarianceThreshold; - } - - public void setHeapMovingAverageWindowSizeForSearchQuery(int heapMovingAverageWindowSizeForSearchQuery) { - this.heapMovingAverageWindowSizeForSearchQuery = heapMovingAverageWindowSizeForSearchQuery; - this.movingAverageReferenceForSearchQuery.set(new MovingAverage(heapMovingAverageWindowSizeForSearchQuery)); - } - - public void setHeapMovingAverageWindowSize(int heapMovingAverageWindowSize) { - this.heapMovingAverageWindowSize = heapMovingAverageWindowSize; + private void updateWindowSize(int heapMovingAverageWindowSize) { this.movingAverageReference.set(new MovingAverage(heapMovingAverageWindowSize)); } @Override - public TaskResourceUsageTracker.Stats searchTaskStats(List searchTasks) { - long currentMax = searchTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).max().orElse(0); - long currentAvg = (long) searchTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).average().orElse(0); - return new Stats( - getSearchTaskCancellationCount(), - currentMax, - currentAvg, - (long) movingAverageReferenceForSearchQuery.get().getAverage() - ); - } - - @Override - public TaskResourceUsageTracker.Stats searchShardTaskStats(List searchShardTasks) { - long currentMax = searchShardTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).max().orElse(0); - long currentAvg = (long) searchShardTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).average().orElse(0); - return new Stats(getSearchShardTaskCancellationCount(), currentMax, currentAvg, (long) movingAverageReference.get().getAverage()); + public TaskResourceUsageTracker.Stats stats(List tasks) { + long currentMax = tasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).max().orElse(0); + long currentAvg = (long) tasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).average().orElse(0); + return new Stats(getCancellations(), currentMax, currentAvg, (long) movingAverageReference.get().getAverage()); } /** diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTracker.java index a08ca34cd37bc..e54cfcd5d3970 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTracker.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTracker.java @@ -26,23 +26,14 @@ public abstract class TaskResourceUsageTracker { /** * Counts the number of cancellations made due to this tracker. */ - private final AtomicLong searchTaskCancellationCount = new AtomicLong(); - private final AtomicLong searchShardTaskCancellationCount = new AtomicLong(); + private final AtomicLong cancellations = new AtomicLong(); - public long incrementSearchTaskCancellations() { - return searchTaskCancellationCount.incrementAndGet(); + public long incrementCancellations() { + return cancellations.incrementAndGet(); } - public long incrementSearchShardTaskCancellations() { - return searchShardTaskCancellationCount.incrementAndGet(); - } - - public long getSearchTaskCancellationCount() { - return searchTaskCancellationCount.get(); - } - - public long getSearchShardTaskCancellationCount() { - return searchShardTaskCancellationCount.get(); + public long getCancellations() { + return cancellations.get(); } /** @@ -61,14 +52,9 @@ public void update(Task task) {} public abstract Optional checkAndMaybeGetCancellationReason(Task task); /** - * Returns the tracker's state for SearchTasks as seen in the stats API. - */ - public abstract Stats searchTaskStats(List activeTasks); - - /** - * Returns the tracker's state for SearchShardTasks as seen in the stats API. + * Returns the tracker's state for tasks as seen in the stats API. */ - public abstract Stats searchShardTaskStats(List activeTasks); + public abstract Stats stats(List activeTasks); /** * Represents the tracker's state as seen in the stats API. diff --git a/server/src/main/java/org/opensearch/tasks/SearchBackpressureTask.java b/server/src/main/java/org/opensearch/tasks/SearchBackpressureTask.java new file mode 100644 index 0000000000000..89dc5ef1938e2 --- /dev/null +++ b/server/src/main/java/org/opensearch/tasks/SearchBackpressureTask.java @@ -0,0 +1,11 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.tasks; + +public interface SearchBackpressureTask {} diff --git a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java index df3f725e25f45..d2dce8731d141 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java @@ -8,6 +8,7 @@ package org.opensearch.search.backpressure; +import org.apache.logging.log4j.LogManager; import org.opensearch.action.search.SearchShardTask; import org.opensearch.action.search.SearchTask; import org.opensearch.common.io.stream.StreamInput; @@ -18,11 +19,10 @@ import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; import org.opensearch.search.backpressure.settings.SearchShardTaskSettings; import org.opensearch.search.backpressure.settings.SearchTaskSettings; -import org.opensearch.search.backpressure.stats.SearchTaskStats; import org.opensearch.search.backpressure.trackers.NodeDuressTracker; import org.opensearch.common.xcontent.XContentBuilder; import org.opensearch.search.backpressure.stats.SearchBackpressureStats; -import org.opensearch.search.backpressure.stats.SearchShardTaskStats; +import org.opensearch.search.backpressure.stats.SearchBackpressureTaskStats; import org.opensearch.search.backpressure.trackers.TaskResourceUsageTracker; import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType; import org.opensearch.tasks.CancellableTask; @@ -75,6 +75,7 @@ public void testIsNodeInDuress() { mockThreadPool, System::nanoTime, List.of(cpuUsageTracker, heapUsageTracker), + Collections.emptyList(), Collections.emptyList() ); @@ -115,13 +116,15 @@ public void testTrackerStateUpdateOnSearchTaskCompletion() { mockThreadPool, mockTimeNanosSupplier, Collections.emptyList(), - List.of(mockTaskResourceUsageTracker) + List.of(mockTaskResourceUsageTracker), + Collections.emptyList() ); for (int i = 0; i < 100; i++) { + // service.onTaskCompleted(new SearchTask(1, "test", "test", () -> "Test", TaskId.EMPTY_TASK_ID, new HashMap<>())); service.onTaskCompleted(createMockTaskWithResourceStats(SearchTask.class, 100, 200)); } - assertEquals(100, service.getSearchTasksState().getCompletionCount()); + assertEquals(100, service.getSearchBackpressureTaskStats(SearchTask.class).getCompletionCount()); verify(mockTaskResourceUsageTracker, times(100)).update(any()); } @@ -142,6 +145,7 @@ public void testTrackerStateUpdateOnSearchShardTaskCompletion() { mockThreadPool, mockTimeNanosSupplier, Collections.emptyList(), + Collections.emptyList(), List.of(mockTaskResourceUsageTracker) ); @@ -150,7 +154,7 @@ public void testTrackerStateUpdateOnSearchShardTaskCompletion() { for (int i = 0; i < 100; i++) { service.onTaskCompleted(createMockTaskWithResourceStats(SearchShardTask.class, 100, 200)); } - assertEquals(100, service.getSearchShardTasksState().getCompletionCount()); + assertEquals(100, service.getSearchBackpressureTaskStats(SearchShardTask.class).getCompletionCount()); verify(mockTaskResourceUsageTracker, times(100)).update(any()); } @@ -172,7 +176,8 @@ public void testSearchTaskInFlightCancellation() { mockThreadPool, mockTimeNanosSupplier, List.of(mockNodeDuressTracker), - List.of(mockTaskResourceUsageTracker) + List.of(mockTaskResourceUsageTracker), + Collections.emptyList() ); // Run two iterations so that node is marked 'in duress' from the third iteration onwards. @@ -197,26 +202,29 @@ public void testSearchTaskInFlightCancellation() { doReturn(activeSearchTasks).when(mockTaskResourceTrackingService).getResourceAwareTasks(); // There are 25 SearchTasks eligible for cancellation but only 10 will be cancelled (burst limit). + LogManager.getLogger(SearchBackpressureServiceTests.class).info("first run"); service.doRun(); - assertEquals(10, service.getSearchTasksState().getCancellationCount()); - assertEquals(1, service.getSearchTasksState().getLimitReachedCount()); + assertEquals(10, service.getSearchBackpressureTaskStats(SearchTask.class).getCancellationCount()); + assertEquals(1, service.getSearchBackpressureTaskStats(SearchTask.class).getLimitReachedCount()); // If the clock or completed task count haven't made sufficient progress, we'll continue to be rate-limited. + LogManager.getLogger(SearchBackpressureServiceTests.class).info("second run"); service.doRun(); - assertEquals(10, service.getSearchTasksState().getCancellationCount()); - assertEquals(2, service.getSearchTasksState().getLimitReachedCount()); + assertEquals(10, service.getSearchBackpressureTaskStats(SearchTask.class).getCancellationCount()); + assertEquals(2, service.getSearchBackpressureTaskStats(SearchTask.class).getLimitReachedCount()); // Fast-forward the clock by ten second to replenish some tokens. // This will add 50 tokens (time delta * rate) to 'rateLimitPerTime' but it will cancel only 10 tasks (burst limit). mockTime.addAndGet(TimeUnit.SECONDS.toNanos(10)); + LogManager.getLogger(SearchBackpressureServiceTests.class).info("third run"); service.doRun(); - assertEquals(20, service.getSearchTasksState().getCancellationCount()); - assertEquals(3, service.getSearchTasksState().getLimitReachedCount()); + assertEquals(20, service.getSearchBackpressureTaskStats(SearchTask.class).getCancellationCount()); + assertEquals(3, service.getSearchBackpressureTaskStats(SearchTask.class).getLimitReachedCount()); // Verify search backpressure stats. SearchBackpressureStats expectedStats = new SearchBackpressureStats( - new SearchTaskStats(20, 3, Map.of(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, new MockStats(20))), - new SearchShardTaskStats(0, 0, Map.of(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, new MockStats(0))), + new SearchBackpressureTaskStats(20, 3, Map.of(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, new MockStats(20))), + new SearchBackpressureTaskStats(0, 0, Collections.emptyMap()), SearchBackpressureMode.ENFORCED ); SearchBackpressureStats actualStats = service.nodeStats(); @@ -241,6 +249,7 @@ public void testSearchShardTaskInFlightCancellation() { mockThreadPool, mockTimeNanosSupplier, List.of(mockNodeDuressTracker), + Collections.emptyList(), List.of(mockTaskResourceUsageTracker) ); @@ -267,13 +276,13 @@ public void testSearchShardTaskInFlightCancellation() { // There are 15 SearchShardTasks eligible for cancellation but only 10 will be cancelled (burst limit). service.doRun(); - assertEquals(10, service.getSearchShardTasksState().getCancellationCount()); - assertEquals(1, service.getSearchShardTasksState().getLimitReachedCount()); + assertEquals(10, service.getSearchBackpressureTaskStats(SearchShardTask.class).getCancellationCount()); + assertEquals(1, service.getSearchBackpressureTaskStats(SearchShardTask.class).getLimitReachedCount()); // If the clock or completed task count haven't made sufficient progress, we'll continue to be rate-limited. service.doRun(); - assertEquals(10, service.getSearchShardTasksState().getCancellationCount()); - assertEquals(2, service.getSearchShardTasksState().getLimitReachedCount()); + assertEquals(10, service.getSearchBackpressureTaskStats(SearchShardTask.class).getCancellationCount()); + assertEquals(2, service.getSearchBackpressureTaskStats(SearchShardTask.class).getLimitReachedCount()); // Simulate task completion to replenish some tokens. // This will add 2 tokens (task count delta * cancellationRatio) to 'rateLimitPerTaskCompletion'. @@ -281,20 +290,21 @@ public void testSearchShardTaskInFlightCancellation() { service.onTaskCompleted(createMockTaskWithResourceStats(SearchShardTask.class, 100, taskHeapUsageBytes)); } service.doRun(); - assertEquals(12, service.getSearchShardTasksState().getCancellationCount()); - assertEquals(3, service.getSearchShardTasksState().getLimitReachedCount()); + assertEquals(12, service.getSearchBackpressureTaskStats(SearchShardTask.class).getCancellationCount()); + assertEquals(3, service.getSearchBackpressureTaskStats(SearchShardTask.class).getLimitReachedCount()); // Fast-forward the clock by one second to replenish some tokens. // This will add 3 tokens (time delta * rate) to 'rateLimitPerTime'. mockTime.addAndGet(TimeUnit.SECONDS.toNanos(1)); service.doRun(); - assertEquals(15, service.getSearchShardTasksState().getCancellationCount()); - assertEquals(3, service.getSearchShardTasksState().getLimitReachedCount()); // no more tasks to cancel; limit not reached + assertEquals(15, service.getSearchBackpressureTaskStats(SearchShardTask.class).getCancellationCount()); + assertEquals(3, service.getSearchBackpressureTaskStats(SearchShardTask.class).getLimitReachedCount()); // no more tasks to cancel; + // limit not reached // Verify search backpressure stats. SearchBackpressureStats expectedStats = new SearchBackpressureStats( - new SearchTaskStats(0, 0, Map.of(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, new MockStats(0))), - new SearchShardTaskStats(15, 3, Map.of(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, new MockStats(15))), + new SearchBackpressureTaskStats(0, 0, Collections.emptyMap()), + new SearchBackpressureTaskStats(15, 3, Map.of(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, new MockStats(15))), SearchBackpressureMode.ENFORCED ); SearchBackpressureStats actualStats = service.nodeStats(); @@ -306,9 +316,9 @@ private SearchBackpressureSettings getBackpressureSettings(String mode, double r new SearchBackpressureSettings( Settings.builder() .put(SearchBackpressureSettings.SETTING_MODE.getKey(), mode) - .put(SearchBackpressureSettings.SETTING_CANCELLATION_RATIO.getKey(), ratio) - .put(SearchBackpressureSettings.SETTING_CANCELLATION_RATE.getKey(), rate) - .put(SearchBackpressureSettings.SETTING_CANCELLATION_BURST.getKey(), burst) + .put(SearchBackpressureSettings.SETTING_CANCELLATION_RATIO_SEARCH_SHARD_TASK.getKey(), ratio) + .put(SearchBackpressureSettings.SETTING_CANCELLATION_RATE_SEARCH_TASK.getKey(), rate) + .put(SearchBackpressureSettings.SETTING_CANCELLATION_BURST_SEARCH_TASK.getKey(), burst) .build(), new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS) ) @@ -335,13 +345,8 @@ public Optional checkAndMaybeGetCancellationReason(Task } @Override - public Stats searchTaskStats(List searchTasks) { - return new MockStats(getSearchTaskCancellationCount()); - } - - @Override - public Stats searchShardTaskStats(List searchShardTasks) { - return new MockStats(getSearchShardTaskCancellationCount()); + public Stats stats(List tasks) { + return new MockStats(getCancellations()); } }; } diff --git a/server/src/test/java/org/opensearch/search/backpressure/stats/SearchBackpressureStatsTests.java b/server/src/test/java/org/opensearch/search/backpressure/stats/SearchBackpressureStatsTests.java index 0c86cf4b11239..3c301b40b5f4f 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/stats/SearchBackpressureStatsTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/stats/SearchBackpressureStatsTests.java @@ -25,8 +25,8 @@ protected SearchBackpressureStats createTestInstance() { public static SearchBackpressureStats randomInstance() { return new SearchBackpressureStats( - SearchTaskStatsTests.randomInstance(), - SearchShardTaskStatsTests.randomInstance(), + SearchBackpressureTaskStatsTests.randomInstance(), + SearchBackpressureTaskStatsTests.randomInstance(), randomFrom(SearchBackpressureMode.DISABLED, SearchBackpressureMode.MONITOR_ONLY, SearchBackpressureMode.ENFORCED) ); } diff --git a/server/src/test/java/org/opensearch/search/backpressure/stats/SearchShardTaskStatsTests.java b/server/src/test/java/org/opensearch/search/backpressure/stats/SearchBackpressureTaskStatsTests.java similarity index 75% rename from server/src/test/java/org/opensearch/search/backpressure/stats/SearchShardTaskStatsTests.java rename to server/src/test/java/org/opensearch/search/backpressure/stats/SearchBackpressureTaskStatsTests.java index d5bc9398492eb..92ff3ccee6227 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/stats/SearchShardTaskStatsTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/stats/SearchBackpressureTaskStatsTests.java @@ -18,18 +18,18 @@ import java.util.Map; -public class SearchShardTaskStatsTests extends AbstractWireSerializingTestCase { +public class SearchBackpressureTaskStatsTests extends AbstractWireSerializingTestCase { @Override - protected Writeable.Reader instanceReader() { - return SearchShardTaskStats::new; + protected Writeable.Reader instanceReader() { + return SearchBackpressureTaskStats::new; } @Override - protected SearchShardTaskStats createTestInstance() { + protected SearchBackpressureTaskStats createTestInstance() { return randomInstance(); } - public static SearchShardTaskStats randomInstance() { + public static SearchBackpressureTaskStats randomInstance() { Map resourceUsageTrackerStats = Map.of( TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, new CpuUsageTracker.Stats(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()), @@ -39,6 +39,6 @@ public static SearchShardTaskStats randomInstance() { new ElapsedTimeTracker.Stats(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()) ); - return new SearchShardTaskStats(randomNonNegativeLong(), randomNonNegativeLong(), resourceUsageTrackerStats); + return new SearchBackpressureTaskStats(randomNonNegativeLong(), randomNonNegativeLong(), resourceUsageTrackerStats); } } diff --git a/server/src/test/java/org/opensearch/search/backpressure/stats/SearchTaskStatsTests.java b/server/src/test/java/org/opensearch/search/backpressure/stats/SearchTaskStatsTests.java deleted file mode 100644 index 59375c22bb932..0000000000000 --- a/server/src/test/java/org/opensearch/search/backpressure/stats/SearchTaskStatsTests.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.search.backpressure.stats; - -import org.opensearch.common.io.stream.Writeable; -import org.opensearch.search.backpressure.trackers.CpuUsageTracker; -import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; -import org.opensearch.search.backpressure.trackers.HeapUsageTracker; -import org.opensearch.search.backpressure.trackers.TaskResourceUsageTracker; -import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType; -import org.opensearch.test.AbstractWireSerializingTestCase; - -import java.util.Map; - -public class SearchTaskStatsTests extends AbstractWireSerializingTestCase { - public static SearchTaskStats randomInstance() { - Map resourceUsageTrackerStats = Map.of( - TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, - new CpuUsageTracker.Stats(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()), - TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER, - new HeapUsageTracker.Stats(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()), - TaskResourceUsageTrackerType.ELAPSED_TIME_TRACKER, - new ElapsedTimeTracker.Stats(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()) - ); - - return new SearchTaskStats(randomNonNegativeLong(), randomNonNegativeLong(), resourceUsageTrackerStats); - } - - @Override - protected Writeable.Reader instanceReader() { - return SearchTaskStats::new; - } - - @Override - protected SearchTaskStats createTestInstance() { - return randomInstance(); - } -} diff --git a/server/src/test/java/org/opensearch/search/backpressure/trackers/CpuUsageTrackerTests.java b/server/src/test/java/org/opensearch/search/backpressure/trackers/CpuUsageTrackerTests.java index 62e2950189436..8cdcbc7511bd2 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/trackers/CpuUsageTrackerTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/trackers/CpuUsageTrackerTests.java @@ -13,6 +13,8 @@ import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; +import org.opensearch.search.backpressure.settings.SearchShardTaskSettings; +import org.opensearch.search.backpressure.settings.SearchTaskSettings; import org.opensearch.tasks.Task; import org.opensearch.tasks.TaskCancellation; import org.opensearch.test.OpenSearchTestCase; @@ -24,15 +26,15 @@ public class CpuUsageTrackerTests extends OpenSearchTestCase { private static final SearchBackpressureSettings mockSettings = new SearchBackpressureSettings( Settings.builder() - .put(CpuUsageTracker.SETTING_CPU_TIME_MILLIS_THRESHOLD.getKey(), 15) // 15 ms - .put(CpuUsageTracker.SETTING_CPU_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY.getKey(), 25) // 25 ms + .put(SearchShardTaskSettings.SETTING_CPU_TIME_MILLIS_THRESHOLD.getKey(), 15) // 15 ms + .put(SearchTaskSettings.SETTING_CPU_TIME_MILLIS_THRESHOLD.getKey(), 25) // 25 ms .build(), new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS) ); public void testSearchTaskEligibleForCancellation() { Task task = createMockTaskWithResourceStats(SearchTask.class, 100000000, 200); - CpuUsageTracker tracker = new CpuUsageTracker(mockSettings); + CpuUsageTracker tracker = new CpuUsageTracker(mockSettings.getSearchTaskSettings()::getCpuTimeNanosThreshold); Optional reason = tracker.checkAndMaybeGetCancellationReason(task); assertTrue(reason.isPresent()); @@ -42,7 +44,7 @@ public void testSearchTaskEligibleForCancellation() { public void testSearchShardTaskEligibleForCancellation() { Task task = createMockTaskWithResourceStats(SearchShardTask.class, 200000000, 200); - CpuUsageTracker tracker = new CpuUsageTracker(mockSettings); + CpuUsageTracker tracker = new CpuUsageTracker(mockSettings.getSearchShardTaskSettings()::getCpuTimeNanosThreshold); Optional reason = tracker.checkAndMaybeGetCancellationReason(task); assertTrue(reason.isPresent()); @@ -52,7 +54,7 @@ public void testSearchShardTaskEligibleForCancellation() { public void testNotEligibleForCancellation() { Task task = createMockTaskWithResourceStats(SearchShardTask.class, 5000000, 200); - CpuUsageTracker tracker = new CpuUsageTracker(mockSettings); + CpuUsageTracker tracker = new CpuUsageTracker(mockSettings.getSearchShardTaskSettings()::getCpuTimeNanosThreshold); Optional reason = tracker.checkAndMaybeGetCancellationReason(task); assertFalse(reason.isPresent()); diff --git a/server/src/test/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTrackerTests.java b/server/src/test/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTrackerTests.java index 1748ce8d7c253..921d01e7355a7 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTrackerTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTrackerTests.java @@ -13,6 +13,8 @@ import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; +import org.opensearch.search.backpressure.settings.SearchShardTaskSettings; +import org.opensearch.search.backpressure.settings.SearchTaskSettings; import org.opensearch.tasks.Task; import org.opensearch.tasks.TaskCancellation; import org.opensearch.test.OpenSearchTestCase; @@ -25,15 +27,18 @@ public class ElapsedTimeTrackerTests extends OpenSearchTestCase { private static final SearchBackpressureSettings mockSettings = new SearchBackpressureSettings( Settings.builder() - .put(ElapsedTimeTracker.SETTING_ELAPSED_TIME_MILLIS_THRESHOLD.getKey(), 100) // 100 ms - .put(ElapsedTimeTracker.SETTING_ELAPSED_TIME_MILLIS_THRESHOLD_FOR_SEARCH_QUERY.getKey(), 150) // 150 ms + .put(SearchShardTaskSettings.SETTING_ELAPSED_TIME_MILLIS_THRESHOLD.getKey(), 100) // 100 ms + .put(SearchTaskSettings.SETTING_ELAPSED_TIME_MILLIS_THRESHOLD.getKey(), 150) // 150 ms .build(), new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS) ); public void testSearchTaskEligibleForCancellation() { Task task = createMockTaskWithResourceStats(SearchTask.class, 1, 1, 0); - ElapsedTimeTracker tracker = new ElapsedTimeTracker(mockSettings, () -> 150000000); + ElapsedTimeTracker tracker = new ElapsedTimeTracker( + mockSettings.getSearchTaskSettings()::getElapsedTimeNanosThreshold, + () -> 150000000 + ); Optional reason = tracker.checkAndMaybeGetCancellationReason(task); assertTrue(reason.isPresent()); @@ -43,7 +48,10 @@ public void testSearchTaskEligibleForCancellation() { public void testSearchShardTaskEligibleForCancellation() { Task task = createMockTaskWithResourceStats(SearchShardTask.class, 1, 1, 0); - ElapsedTimeTracker tracker = new ElapsedTimeTracker(mockSettings, () -> 200000000); + ElapsedTimeTracker tracker = new ElapsedTimeTracker( + mockSettings.getSearchShardTaskSettings()::getElapsedTimeNanosThreshold, + () -> 200000000 + ); Optional reason = tracker.checkAndMaybeGetCancellationReason(task); assertTrue(reason.isPresent()); @@ -53,7 +61,10 @@ public void testSearchShardTaskEligibleForCancellation() { public void testNotEligibleForCancellation() { Task task = createMockTaskWithResourceStats(SearchShardTask.class, 1, 1, 150000000); - ElapsedTimeTracker tracker = new ElapsedTimeTracker(mockSettings, () -> 200000000); + ElapsedTimeTracker tracker = new ElapsedTimeTracker( + mockSettings.getSearchShardTaskSettings()::getElapsedTimeNanosThreshold, + () -> 200000000 + ); Optional reason = tracker.checkAndMaybeGetCancellationReason(task); assertFalse(reason.isPresent()); diff --git a/server/src/test/java/org/opensearch/search/backpressure/trackers/HeapUsageTrackerTests.java b/server/src/test/java/org/opensearch/search/backpressure/trackers/HeapUsageTrackerTests.java index 74f36fe15551e..aa9bd39fb3451 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/trackers/HeapUsageTrackerTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/trackers/HeapUsageTrackerTests.java @@ -13,6 +13,8 @@ import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; +import org.opensearch.search.backpressure.settings.SearchShardTaskSettings; +import org.opensearch.search.backpressure.settings.SearchTaskSettings; import org.opensearch.tasks.Task; import org.opensearch.tasks.TaskCancellation; import org.opensearch.test.OpenSearchTestCase; @@ -24,23 +26,33 @@ import static org.opensearch.search.backpressure.SearchBackpressureTestHelpers.createMockTaskWithResourceStats; public class HeapUsageTrackerTests extends OpenSearchTestCase { - private static final long HEAP_BYTES_THRESHOLD = 100; - private static final long HEAP_BYTES_THRESHOLD_FOR_SEARCH_QUERY = 50; + private static final long HEAP_BYTES_THRESHOLD_SEARCH_SHARD_TASK = 100; + private static final long HEAP_BYTES_THRESHOLD_SEARCH_TASK = 50; private static final int HEAP_MOVING_AVERAGE_WINDOW_SIZE = 100; private static final SearchBackpressureSettings mockSettings = new SearchBackpressureSettings( Settings.builder() - .put(HeapUsageTracker.SETTING_HEAP_VARIANCE_THRESHOLD_FOR_SEARCH_QUERY.getKey(), 3.0) - .put(HeapUsageTracker.SETTING_HEAP_VARIANCE_THRESHOLD.getKey(), 2.0) - .put(HeapUsageTracker.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE_FOR_SEARCH_QUERY.getKey(), HEAP_MOVING_AVERAGE_WINDOW_SIZE) - .put(HeapUsageTracker.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE.getKey(), HEAP_MOVING_AVERAGE_WINDOW_SIZE) + .put(SearchTaskSettings.SETTING_HEAP_VARIANCE_THRESHOLD.getKey(), 3.0) + .put(SearchShardTaskSettings.SETTING_HEAP_VARIANCE_THRESHOLD.getKey(), 2.0) + .put(SearchTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE.getKey(), HEAP_MOVING_AVERAGE_WINDOW_SIZE) + .put(SearchShardTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE.getKey(), HEAP_MOVING_AVERAGE_WINDOW_SIZE) .build(), new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS) ); public void testSearchTaskEligibleForCancellation() { - HeapUsageTracker tracker = spy(new HeapUsageTracker(mockSettings)); - when(tracker.getHeapBytesThresholdForSearchQuery()).thenReturn(HEAP_BYTES_THRESHOLD_FOR_SEARCH_QUERY); + SearchTaskSettings mockSearchTaskSettings = spy( + new SearchTaskSettings(mockSettings.getSettings(), mockSettings.getClusterSettings()) + ); + when(mockSearchTaskSettings.getHeapBytesThreshold()).thenReturn(HEAP_BYTES_THRESHOLD_SEARCH_TASK); + HeapUsageTracker tracker = spy( + new HeapUsageTracker( + mockSearchTaskSettings::getHeapVarianceThreshold, + mockSearchTaskSettings::getHeapBytesThreshold, + mockSearchTaskSettings::getHeapMovingAverageWindowSize, + mockSettings.getClusterSettings() + ) + ); Task task = createMockTaskWithResourceStats(SearchTask.class, 1, 50); // Record enough observations to make the moving average 'ready'. @@ -57,8 +69,18 @@ public void testSearchTaskEligibleForCancellation() { } public void testSearchShardTaskEligibleForCancellation() { - HeapUsageTracker tracker = spy(new HeapUsageTracker(mockSettings)); - when(tracker.getHeapBytesThreshold()).thenReturn(HEAP_BYTES_THRESHOLD); + SearchShardTaskSettings mockSearchShardTaskSettings = spy( + new SearchShardTaskSettings(mockSettings.getSettings(), mockSettings.getClusterSettings()) + ); + when(mockSearchShardTaskSettings.getHeapBytesThreshold()).thenReturn(HEAP_BYTES_THRESHOLD_SEARCH_TASK); + HeapUsageTracker tracker = spy( + new HeapUsageTracker( + mockSearchShardTaskSettings::getHeapVarianceThreshold, + mockSearchShardTaskSettings::getHeapBytesThreshold, + mockSearchShardTaskSettings::getHeapMovingAverageWindowSize, + mockSettings.getClusterSettings() + ) + ); Task task = createMockTaskWithResourceStats(SearchShardTask.class, 1, 50); // Record enough observations to make the moving average 'ready'. @@ -77,8 +99,18 @@ public void testSearchShardTaskEligibleForCancellation() { public void testNotEligibleForCancellation() { Task task; Optional reason; - HeapUsageTracker tracker = spy(new HeapUsageTracker(mockSettings)); - when(tracker.getHeapBytesThreshold()).thenReturn(HEAP_BYTES_THRESHOLD); + SearchShardTaskSettings mockSearchShardTaskSettings = spy( + new SearchShardTaskSettings(mockSettings.getSettings(), mockSettings.getClusterSettings()) + ); + when(mockSearchShardTaskSettings.getHeapBytesThreshold()).thenReturn(HEAP_BYTES_THRESHOLD_SEARCH_SHARD_TASK); + HeapUsageTracker tracker = spy( + new HeapUsageTracker( + mockSearchShardTaskSettings::getHeapVarianceThreshold, + mockSearchShardTaskSettings::getHeapBytesThreshold, + mockSearchShardTaskSettings::getHeapMovingAverageWindowSize, + mockSettings.getClusterSettings() + ) + ); // Task with heap usage < heapBytesThreshold. task = createMockTaskWithResourceStats(SearchShardTask.class, 1, 99); diff --git a/server/src/test/java/org/opensearch/tasks/TaskCancellationTests.java b/server/src/test/java/org/opensearch/tasks/TaskCancellationTests.java index b8fa91f2d438b..bb577edd6667d 100644 --- a/server/src/test/java/org/opensearch/tasks/TaskCancellationTests.java +++ b/server/src/test/java/org/opensearch/tasks/TaskCancellationTests.java @@ -27,19 +27,16 @@ public void testTaskCancellation() { TaskResourceUsageTracker mockTracker3 = createMockTaskResourceUsageTracker("mock_tracker_3"); List reasons = new ArrayList<>(); - List callbacks = List.of( - mockTracker1::incrementSearchShardTaskCancellations, - mockTracker2::incrementSearchShardTaskCancellations - ); + List callbacks = List.of(mockTracker1::incrementCancellations, mockTracker2::incrementCancellations); TaskCancellation taskCancellation = new TaskCancellation(mockTask, reasons, callbacks); // Task does not have any reason to be cancelled. assertEquals(0, taskCancellation.totalCancellationScore()); assertFalse(taskCancellation.isEligibleForCancellation()); taskCancellation.cancel(); - assertEquals(0, mockTracker1.getSearchShardTaskCancellationCount()); - assertEquals(0, mockTracker2.getSearchShardTaskCancellationCount()); - assertEquals(0, mockTracker3.getSearchShardTaskCancellationCount()); + assertEquals(0, mockTracker1.getCancellations()); + assertEquals(0, mockTracker2.getCancellations()); + assertEquals(0, mockTracker3.getCancellations()); // Task has one or more reasons to be cancelled. reasons.add(new TaskCancellation.Reason("limits exceeded 1", 10)); @@ -51,9 +48,9 @@ public void testTaskCancellation() { // Cancel the task and validate the cancellation reason and invocation of callbacks. taskCancellation.cancel(); assertTrue(mockTask.getReasonCancelled().contains("limits exceeded 1, limits exceeded 2, limits exceeded 3")); - assertEquals(1, mockTracker1.getSearchShardTaskCancellationCount()); - assertEquals(1, mockTracker2.getSearchShardTaskCancellationCount()); - assertEquals(0, mockTracker3.getSearchShardTaskCancellationCount()); + assertEquals(1, mockTracker1.getCancellations()); + assertEquals(1, mockTracker2.getCancellations()); + assertEquals(0, mockTracker3.getCancellations()); } private static TaskResourceUsageTracker createMockTaskResourceUsageTracker(String name) { @@ -72,12 +69,7 @@ public Optional checkAndMaybeGetCancellationReason(Task } @Override - public Stats searchTaskStats(List searchTasks) { - return null; - } - - @Override - public Stats searchShardTaskStats(List searchShardTasks) { + public Stats stats(List searchShardTasks) { return null; } }; From 6f75c23655b89708433fe8d5c91c29fe3c6a3af8 Mon Sep 17 00:00:00 2001 From: PritLadani Date: Fri, 6 Jan 2023 09:40:59 +0530 Subject: [PATCH 16/34] Adding java docs Signed-off-by: PritLadani --- .../java/org/opensearch/tasks/SearchBackpressureTask.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/server/src/main/java/org/opensearch/tasks/SearchBackpressureTask.java b/server/src/main/java/org/opensearch/tasks/SearchBackpressureTask.java index 89dc5ef1938e2..0cab67e35ab02 100644 --- a/server/src/main/java/org/opensearch/tasks/SearchBackpressureTask.java +++ b/server/src/main/java/org/opensearch/tasks/SearchBackpressureTask.java @@ -8,4 +8,9 @@ package org.opensearch.tasks; +/** + * A task related to search backpressure + * + * @opensearch.internal + */ public interface SearchBackpressureTask {} From 2f299cecb322882bb1dbb206ed44410805ff073f Mon Sep 17 00:00:00 2001 From: PritLadani Date: Wed, 18 Jan 2023 20:02:17 +0530 Subject: [PATCH 17/34] Moving cancellation settings to task specific settings Signed-off-by: PritLadani --- .../common/settings/ClusterSettings.java | 25 ++- .../SearchBackpressureService.java | 150 +++++++------ .../settings/SearchBackpressureSettings.java | 210 +----------------- .../settings/SearchShardTaskSettings.java | 120 +++++++++- .../settings/SearchTaskSettings.java | 117 +++++++++- .../stats/SearchBackpressureStats.java | 18 +- .../stats/SearchBackpressureTaskStats.java | 17 +- .../trackers/CpuUsageTracker.java | 6 +- .../trackers/ElapsedTimeTracker.java | 6 +- .../trackers/HeapUsageTracker.java | 26 +-- .../SearchBackpressureServiceTests.java | 7 +- .../trackers/HeapUsageTrackerTests.java | 15 +- 12 files changed, 360 insertions(+), 357 deletions(-) diff --git a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java index 6d2de14c5c61b..e476fed1540d9 100644 --- a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java @@ -596,27 +596,28 @@ public void apply(Settings value, Settings current, Settings previous) { // Settings related to search backpressure SearchBackpressureSettings.SETTING_MODE, - SearchBackpressureSettings.SETTING_CANCELLATION_RATIO_SEARCH_TASK, - SearchBackpressureSettings.SETTING_CANCELLATION_RATE_SEARCH_TASK, - SearchBackpressureSettings.SETTING_CANCELLATION_BURST_SEARCH_TASK, - SearchBackpressureSettings.SETTING_CANCELLATION_RATIO_SEARCH_SHARD_TASK, - SearchBackpressureSettings.SETTING_CANCELLATION_RATE_SEARCH_SHARD_TASK, - SearchBackpressureSettings.SETTING_CANCELLATION_BURST_SEARCH_SHARD_TASK, + NodeDuressSettings.SETTING_NUM_SUCCESSIVE_BREACHES, NodeDuressSettings.SETTING_CPU_THRESHOLD, NodeDuressSettings.SETTING_HEAP_THRESHOLD, - SearchShardTaskSettings.SETTING_TOTAL_HEAP_PERCENT_THRESHOLD, + SearchTaskSettings.SETTING_CANCELLATION_RATIO, + SearchTaskSettings.SETTING_CANCELLATION_RATE, + SearchTaskSettings.SETTING_CANCELLATION_BURST, SearchTaskSettings.SETTING_HEAP_PERCENT_THRESHOLD, - SearchShardTaskSettings.SETTING_HEAP_PERCENT_THRESHOLD, SearchTaskSettings.SETTING_HEAP_VARIANCE_THRESHOLD, - SearchShardTaskSettings.SETTING_HEAP_VARIANCE_THRESHOLD, SearchTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE, - SearchShardTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE, SearchTaskSettings.SETTING_CPU_TIME_MILLIS_THRESHOLD, - SearchShardTaskSettings.SETTING_CPU_TIME_MILLIS_THRESHOLD, SearchTaskSettings.SETTING_ELAPSED_TIME_MILLIS_THRESHOLD, + SearchTaskSettings.SETTING_TOTAL_HEAP_PERCENT_THRESHOLD, + SearchShardTaskSettings.SETTING_CANCELLATION_RATIO, + SearchShardTaskSettings.SETTING_CANCELLATION_RATE, + SearchShardTaskSettings.SETTING_CANCELLATION_BURST, + SearchShardTaskSettings.SETTING_HEAP_PERCENT_THRESHOLD, + SearchShardTaskSettings.SETTING_HEAP_VARIANCE_THRESHOLD, + SearchShardTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE, + SearchShardTaskSettings.SETTING_CPU_TIME_MILLIS_THRESHOLD, SearchShardTaskSettings.SETTING_ELAPSED_TIME_MILLIS_THRESHOLD, - SearchTaskSettings.SETTING_TOTAL_HEAP_PERCENT_THRESHOLD + SearchShardTaskSettings.SETTING_TOTAL_HEAP_PERCENT_THRESHOLD ) ) ); diff --git a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java index 928b26a835bc4..7efff0fd5ffdb 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java +++ b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java @@ -19,6 +19,8 @@ import org.opensearch.monitor.process.ProcessProbe; import org.opensearch.search.backpressure.settings.SearchBackpressureMode; import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; +import org.opensearch.search.backpressure.settings.SearchShardTaskSettings; +import org.opensearch.search.backpressure.settings.SearchTaskSettings; import org.opensearch.search.backpressure.stats.SearchBackpressureStats; import org.opensearch.search.backpressure.stats.SearchBackpressureTaskStats; import org.opensearch.search.backpressure.trackers.CpuUsageTracker; @@ -55,7 +57,8 @@ public class SearchBackpressureService extends AbstractLifecycleComponent implements TaskCompletionListener, - SearchBackpressureSettings.Listener { + SearchTaskSettings.Listener, + SearchShardTaskSettings.Listener { private static final Logger logger = LogManager.getLogger(SearchBackpressureService.class); private volatile Scheduler.Cancellable scheduledFuture; @@ -69,10 +72,8 @@ public class SearchBackpressureService extends AbstractLifecycleComponent private final List searchTaskTrackers; private final List searchShardTaskTrackers; - private final AtomicReference searchTaskCancellationRateLimiter = new AtomicReference<>(); - private final AtomicReference searchTaskCancellationRatioLimiter = new AtomicReference<>(); - private final AtomicReference searchShardTaskCancellationRateLimiter = new AtomicReference<>(); - private final AtomicReference searchShardTaskCancellationRatioLimiter = new AtomicReference<>(); + private final Map, AtomicReference> rateLimiters; + private final Map, AtomicReference> ratioLimiters; private final Map, SearchBackpressureState> searchBackpressureStates; @@ -99,8 +100,9 @@ public SearchBackpressureService( new HeapUsageTracker( settings.getSearchTaskSettings()::getHeapVarianceThreshold, settings.getSearchTaskSettings()::getHeapBytesThreshold, - settings.getSearchTaskSettings()::getHeapMovingAverageWindowSize, - settings.getClusterSettings() + settings.getSearchTaskSettings().getHeapMovingAverageWindowSize(), + settings.getClusterSettings(), + SearchTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE ), new ElapsedTimeTracker(settings.getSearchTaskSettings()::getElapsedTimeNanosThreshold, System::nanoTime) ), @@ -109,8 +111,9 @@ public SearchBackpressureService( new HeapUsageTracker( settings.getSearchShardTaskSettings()::getHeapVarianceThreshold, settings.getSearchShardTaskSettings()::getHeapBytesThreshold, - settings.getSearchShardTaskSettings()::getHeapMovingAverageWindowSize, - settings.getClusterSettings() + settings.getSearchShardTaskSettings().getHeapMovingAverageWindowSize(), + settings.getClusterSettings(), + SearchShardTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE ), new ElapsedTimeTracker(settings.getSearchShardTaskSettings()::getElapsedTimeNanosThreshold, System::nanoTime) ) @@ -127,7 +130,8 @@ public SearchBackpressureService( List searchShardTaskTrackers ) { this.settings = settings; - this.settings.addListener(this); + this.settings.getSearchTaskSettings().addListener(this); + this.settings.getSearchShardTaskSettings().addListener(this); this.taskResourceTrackingService = taskResourceTrackingService; this.taskResourceTrackingService.addTaskCompletionListener(this); this.threadPool = threadPool; @@ -143,35 +147,41 @@ public SearchBackpressureService( new SearchBackpressureState() ); - this.searchTaskCancellationRateLimiter.set( - new TokenBucket( - timeNanosSupplier, - getSettings().getCancellationRateSearchTaskNanos(), - getSettings().getCancellationBurstSearchTask() - ) - ); - - this.searchTaskCancellationRatioLimiter.set( - new TokenBucket( - this::getSearchTaskCompletionCount, - getSettings().getCancellationRatioSearchTask(), - getSettings().getCancellationBurstSearchTask() - ) - ); - - this.searchShardTaskCancellationRateLimiter.set( - new TokenBucket( - timeNanosSupplier, - getSettings().getCancellationRateSearchShardTaskNanos(), - getSettings().getCancellationBurstSearchShardTask() + this.rateLimiters = Map.of( + SearchTask.class, + new AtomicReference<>( + new TokenBucket( + timeNanosSupplier, + getSettings().getSearchTaskSettings().getCancellationRateNanos(), + getSettings().getSearchTaskSettings().getCancellationBurst() + ) + ), + SearchShardTask.class, + new AtomicReference<>( + new TokenBucket( + timeNanosSupplier, + getSettings().getSearchShardTaskSettings().getCancellationRateNanos(), + getSettings().getSearchShardTaskSettings().getCancellationBurst() + ) ) ); - this.searchShardTaskCancellationRatioLimiter.set( - new TokenBucket( - this::getSearchShardTaskCompletionCount, - getSettings().getCancellationRatioSearchShardTask(), - getSettings().getCancellationBurstSearchShardTask() + this.ratioLimiters = Map.of( + SearchTask.class, + new AtomicReference<>( + new TokenBucket( + this::getSearchTaskCompletionCount, + getSettings().getSearchTaskSettings().getCancellationRatio(), + getSettings().getSearchTaskSettings().getCancellationBurst() + ) + ), + SearchShardTask.class, + new AtomicReference<>( + new TokenBucket( + this::getSearchShardTaskCompletionCount, + getSettings().getSearchShardTaskSettings().getCancellationRatio(), + getSettings().getSearchShardTaskSettings().getCancellationBurst() + ) ) ); } @@ -233,11 +243,11 @@ void doRun() { // Independently remove tokens from both token buckets. boolean rateLimitReached = isSearchTask - ? searchTaskCancellationRateLimiter.get().request() == false - : searchShardTaskCancellationRateLimiter.get().request() == false; + ? rateLimiters.get(SearchTask.class).get().request() == false + : rateLimiters.get(SearchShardTask.class).get().request() == false; boolean ratioLimitReached = isSearchTask - ? searchTaskCancellationRatioLimiter.get().request() == false - : searchShardTaskCancellationRatioLimiter.get().request() == false; + ? ratioLimiters.get(SearchTask.class).get().request() == false + : ratioLimiters.get(SearchShardTask.class).get().request() == false; // Stop cancelling tasks if there are no tokens in either of the two token buckets. if (rateLimitReached && ratioLimitReached) { @@ -380,24 +390,26 @@ public void onTaskCompleted(Task task) { @Override public void onCancellationRatioSearchTaskChanged() { - searchTaskCancellationRatioLimiter.set( - new TokenBucket( - this::getSearchTaskCompletionCount, - getSettings().getCancellationRatioSearchTask(), - getSettings().getCancellationBurstSearchTask() - ) - ); + ratioLimiters.get(SearchTask.class) + .set( + new TokenBucket( + this::getSearchTaskCompletionCount, + getSettings().getSearchTaskSettings().getCancellationRatio(), + getSettings().getSearchTaskSettings().getCancellationBurst() + ) + ); } @Override public void onCancellationRateSearchTaskChanged() { - searchTaskCancellationRateLimiter.set( - new TokenBucket( - timeNanosSupplier, - getSettings().getCancellationRateSearchTaskNanos(), - getSettings().getCancellationBurstSearchTask() - ) - ); + rateLimiters.get(SearchTask.class) + .set( + new TokenBucket( + timeNanosSupplier, + getSettings().getSearchTaskSettings().getCancellationRateNanos(), + getSettings().getSearchTaskSettings().getCancellationBurst() + ) + ); } @Override @@ -408,24 +420,26 @@ public void onCancellationBurstSearchTaskChanged() { @Override public void onCancellationRatioSearchShardTaskChanged() { - searchShardTaskCancellationRatioLimiter.set( - new TokenBucket( - this::getSearchShardTaskCompletionCount, - getSettings().getCancellationRatioSearchShardTask(), - getSettings().getCancellationBurstSearchShardTask() - ) - ); + ratioLimiters.get(SearchShardTask.class) + .set( + new TokenBucket( + this::getSearchShardTaskCompletionCount, + getSettings().getSearchShardTaskSettings().getCancellationRatio(), + getSettings().getSearchShardTaskSettings().getCancellationBurst() + ) + ); } @Override public void onCancellationRateSearchShardTaskChanged() { - searchShardTaskCancellationRateLimiter.set( - new TokenBucket( - timeNanosSupplier, - getSettings().getCancellationRateSearchShardTaskNanos(), - getSettings().getCancellationBurstSearchShardTask() - ) - ); + rateLimiters.get(SearchShardTask.class) + .set( + new TokenBucket( + timeNanosSupplier, + getSettings().getSearchShardTaskSettings().getCancellationRateNanos(), + getSettings().getSearchShardTaskSettings().getCancellationBurst() + ) + ); } @Override diff --git a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchBackpressureSettings.java b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchBackpressureSettings.java index e0cd4efd43aac..13287d04886c1 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchBackpressureSettings.java +++ b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchBackpressureSettings.java @@ -8,19 +8,13 @@ package org.opensearch.search.backpressure.settings; -import org.opensearch.ExceptionsHelper; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; -import java.util.ArrayList; -import java.util.List; -import java.util.concurrent.TimeUnit; -import java.util.function.Consumer; - /** - * Settings related to search backpressure and cancellation of in-flight requests. + * Settings related to search backpressure mode and internal * * @opensearch.internal */ @@ -28,14 +22,6 @@ public class SearchBackpressureSettings { private static class Defaults { private static final long INTERVAL_MILLIS = 1000; private static final String MODE = "monitor_only"; - - // TODO: decide on default settings for SearchTask - private static final double CANCELLATION_RATIO_SEARCH_TASK = 0.1; - private static final double CANCELLATION_RATE_SEARCH_TASK = 0.003; - private static final double CANCELLATION_BURST_SEARCH_TASK = 10.0; - private static final double CANCELLATION_RATIO_SEARCH_SHARD_TASK = 0.1; - private static final double CANCELLATION_RATE_SEARCH_SHARD_TASK = 0.003; - private static final double CANCELLATION_BURST_SEARCH_SHARD_TASK = 10.0; } /** @@ -60,102 +46,6 @@ private static class Defaults { Setting.Property.NodeScope ); - /** - * Defines the percentage of SearchTasks to cancel relative to the number of successful SearchTask completions. - * In other words, it is the number of tokens added to the bucket on each successful SearchTask completion. - */ - private volatile double cancellationRatioSearchTask; - public static final Setting SETTING_CANCELLATION_RATIO_SEARCH_TASK = Setting.doubleSetting( - "search_backpressure.cancellation_ratio_search_task", - Defaults.CANCELLATION_RATIO_SEARCH_TASK, - 0.0, - 1.0, - Setting.Property.Dynamic, - Setting.Property.NodeScope - ); - - /** - * Defines the number of tasks to cancel per unit time (in millis). - * In other words, it is the number of tokens added to the bucket each millisecond. - */ - private volatile double cancellationRateSearchTask; - public static final Setting SETTING_CANCELLATION_RATE_SEARCH_TASK = Setting.doubleSetting( - "search_backpressure.cancellation_rate_search_task", - Defaults.CANCELLATION_RATE_SEARCH_TASK, - 0.0, - Setting.Property.Dynamic, - Setting.Property.NodeScope - ); - - /** - * Defines the maximum number of tasks that can be cancelled before being rate-limited. - */ - private volatile double cancellationBurstSearchTask; - public static final Setting SETTING_CANCELLATION_BURST_SEARCH_TASK = Setting.doubleSetting( - "search_backpressure.cancellation_burst_search_task", - Defaults.CANCELLATION_BURST_SEARCH_TASK, - 1.0, - Setting.Property.Dynamic, - Setting.Property.NodeScope - ); - - /** - * Defines the percentage of tasks to cancel relative to the number of successful task completions. - * In other words, it is the number of tokens added to the bucket on each successful task completion. - */ - private volatile double cancellationRatioSearchShardTask; - public static final Setting SETTING_CANCELLATION_RATIO_SEARCH_SHARD_TASK = Setting.doubleSetting( - "search_backpressure.cancellation_ratio_search_shard_task", - Defaults.CANCELLATION_RATIO_SEARCH_SHARD_TASK, - 0.0, - 1.0, - Setting.Property.Dynamic, - Setting.Property.NodeScope - ); - - /** - * Defines the number of tasks to cancel per unit time (in millis). - * In other words, it is the number of tokens added to the bucket each millisecond. - */ - private volatile double cancellationRateSearchShardTask; - public static final Setting SETTING_CANCELLATION_RATE_SEARCH_SHARD_TASK = Setting.doubleSetting( - "search_backpressure.cancellation_rate_search_shard_task", - Defaults.CANCELLATION_RATE_SEARCH_SHARD_TASK, - 0.0, - Setting.Property.Dynamic, - Setting.Property.NodeScope - ); - - /** - * Defines the maximum number of tasks that can be cancelled before being rate-limited. - */ - private volatile double cancellationBurstSearchShardTask; - public static final Setting SETTING_CANCELLATION_BURST_SEARCH_SHARD_TASK = Setting.doubleSetting( - "search_backpressure.cancellation_burst_search_shard_task", - Defaults.CANCELLATION_BURST_SEARCH_SHARD_TASK, - 1.0, - Setting.Property.Dynamic, - Setting.Property.NodeScope - ); - - /** - * Callback listeners. - */ - public interface Listener { - void onCancellationRatioSearchTaskChanged(); - - void onCancellationRateSearchTaskChanged(); - - void onCancellationBurstSearchTaskChanged(); - - void onCancellationRatioSearchShardTaskChanged(); - - void onCancellationRateSearchShardTaskChanged(); - - void onCancellationBurstSearchShardTaskChanged(); - } - - private final List listeners = new ArrayList<>(); private final Settings settings; private final ClusterSettings clusterSettings; private final NodeDuressSettings nodeDuressSettings; @@ -173,28 +63,6 @@ public SearchBackpressureSettings(Settings settings, ClusterSettings clusterSett mode = SearchBackpressureMode.fromName(SETTING_MODE.get(settings)); clusterSettings.addSettingsUpdateConsumer(SETTING_MODE, s -> this.setMode(SearchBackpressureMode.fromName(s))); - - cancellationRatioSearchTask = SETTING_CANCELLATION_RATIO_SEARCH_TASK.get(settings); - clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_RATIO_SEARCH_TASK, this::setCancellationRatioSearchTask); - - cancellationRateSearchTask = SETTING_CANCELLATION_RATE_SEARCH_TASK.get(settings); - clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_RATE_SEARCH_TASK, this::setCancellationRateSearchTask); - - cancellationBurstSearchTask = SETTING_CANCELLATION_BURST_SEARCH_TASK.get(settings); - clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_BURST_SEARCH_TASK, this::setCancellationBurstSearchTask); - - cancellationRatioSearchShardTask = SETTING_CANCELLATION_RATIO_SEARCH_SHARD_TASK.get(settings); - clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_RATIO_SEARCH_SHARD_TASK, this::setCancellationRatioSearchShardTask); - - cancellationRateSearchShardTask = SETTING_CANCELLATION_RATE_SEARCH_SHARD_TASK.get(settings); - clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_RATE_SEARCH_SHARD_TASK, this::setCancellationRateSearchShardTask); - - cancellationBurstSearchShardTask = SETTING_CANCELLATION_BURST_SEARCH_SHARD_TASK.get(settings); - clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_BURST_SEARCH_SHARD_TASK, this::setCancellationBurstSearchShardTask); - } - - public void addListener(Listener listener) { - listeners.add(listener); } public Settings getSettings() { @@ -228,80 +96,4 @@ public SearchBackpressureMode getMode() { public void setMode(SearchBackpressureMode mode) { this.mode = mode; } - - public double getCancellationRatioSearchTask() { - return cancellationRatioSearchTask; - } - - private void setCancellationRatioSearchTask(double cancellationRatioSearchTask) { - this.cancellationRatioSearchTask = cancellationRatioSearchTask; - notifyListeners(Listener::onCancellationRatioSearchTaskChanged); - } - - public double getCancellationRateSearchTask() { - return cancellationRateSearchTask; - } - - public double getCancellationRateSearchTaskNanos() { - return getCancellationRateSearchTask() / TimeUnit.MILLISECONDS.toNanos(1); // rate per nanoseconds - } - - private void setCancellationRateSearchTask(double cancellationRateSearchTask) { - this.cancellationRateSearchTask = cancellationRateSearchTask; - notifyListeners(Listener::onCancellationRateSearchTaskChanged); - } - - public double getCancellationBurstSearchTask() { - return cancellationBurstSearchTask; - } - - private void setCancellationBurstSearchTask(double cancellationBurstSearchTask) { - this.cancellationBurstSearchTask = cancellationBurstSearchTask; - notifyListeners(Listener::onCancellationBurstSearchTaskChanged); - } - - public double getCancellationRatioSearchShardTask() { - return cancellationRatioSearchShardTask; - } - - private void setCancellationRatioSearchShardTask(double cancellationRatioSearchShardTask) { - this.cancellationRatioSearchShardTask = cancellationRatioSearchShardTask; - notifyListeners(Listener::onCancellationRatioSearchShardTaskChanged); - } - - public double getCancellationRateSearchShardTask() { - return cancellationRateSearchShardTask; - } - - public double getCancellationRateSearchShardTaskNanos() { - return getCancellationRateSearchShardTask() / TimeUnit.MILLISECONDS.toNanos(1); // rate per nanoseconds - } - - private void setCancellationRateSearchShardTask(double cancellationRateSearchShardTask) { - this.cancellationRateSearchShardTask = cancellationRateSearchShardTask; - notifyListeners(Listener::onCancellationRateSearchShardTaskChanged); - } - - public double getCancellationBurstSearchShardTask() { - return cancellationBurstSearchShardTask; - } - - private void setCancellationBurstSearchShardTask(double cancellationBurstSearchShardTask) { - this.cancellationBurstSearchShardTask = cancellationBurstSearchShardTask; - notifyListeners(Listener::onCancellationBurstSearchShardTaskChanged); - } - - private void notifyListeners(Consumer consumer) { - List exceptions = new ArrayList<>(); - - for (Listener listener : listeners) { - try { - consumer.accept(listener); - } catch (Exception e) { - exceptions.add(e); - } - } - - ExceptionsHelper.maybeThrowRuntimeAndSuppress(exceptions); - } } diff --git a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java index 98599e9478a29..1cfb2a5a350f5 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java +++ b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java @@ -8,12 +8,16 @@ package org.opensearch.search.backpressure.settings; +import org.opensearch.ExceptionsHelper; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; import org.opensearch.monitor.jvm.JvmStats; +import java.util.ArrayList; +import java.util.List; import java.util.concurrent.TimeUnit; +import java.util.function.Consumer; /** * Defines the settings related to the cancellation of SearchShardTasks. @@ -22,8 +26,12 @@ */ public class SearchShardTaskSettings { private static final long HEAP_SIZE_BYTES = JvmStats.jvmStats().getMem().getHeapMax().getBytes(); + private final List listeners = new ArrayList<>(); private static class Defaults { + private static final double CANCELLATION_RATIO = 0.1; + private static final double CANCELLATION_RATE = 0.003; + private static final double CANCELLATION_BURST = 10.0; private static final double TOTAL_HEAP_PERCENT_THRESHOLD = 0.05; private static final long CPU_TIME_MILLIS_THRESHOLD = 15000; private static final long ELAPSED_TIME_MILLIS_THRESHOLD = 30000; @@ -32,6 +40,45 @@ private static class Defaults { private static final int HEAP_MOVING_AVERAGE_WINDOW_SIZE = 100; } + /** + * Defines the percentage of SearchShardTasks to cancel relative to the number of successful SearchShardTasks completions. + * In other words, it is the number of tokens added to the bucket on each successful SearchShardTask completion. + */ + private volatile double cancellationRatio; + public static final Setting SETTING_CANCELLATION_RATIO = Setting.doubleSetting( + "search_backpressure.search_shard_task.cancellation_ratio", + Defaults.CANCELLATION_RATIO, + 0.0, + 1.0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Defines the number of SearchShardTasks to cancel per unit time (in millis). + * In other words, it is the number of tokens added to the bucket each millisecond. + */ + private volatile double cancellationRate; + public static final Setting SETTING_CANCELLATION_RATE = Setting.doubleSetting( + "search_backpressure.search_shard_task.cancellation_rate", + Defaults.CANCELLATION_RATE, + 0.0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Defines the maximum number of SearchShardTasks that can be cancelled before being rate-limited. + */ + private volatile double cancellationBurst; + public static final Setting SETTING_CANCELLATION_BURST = Setting.doubleSetting( + "search_backpressure.search_shard_task.cancellation_burst", + Defaults.CANCELLATION_BURST, + 1.0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + /** * Defines the heap usage threshold (in percentage) for the sum of heap usages across all search shard tasks * before in-flight cancellation is applied. @@ -112,15 +159,33 @@ public SearchShardTaskSettings(Settings settings, ClusterSettings clusterSetting totalHeapPercentThreshold = SETTING_TOTAL_HEAP_PERCENT_THRESHOLD.get(settings); this.cpuTimeMillisThreshold = SETTING_CPU_TIME_MILLIS_THRESHOLD.get(settings); this.elapsedTimeMillisThreshold = SETTING_ELAPSED_TIME_MILLIS_THRESHOLD.get(settings); - heapPercentThreshold = SETTING_HEAP_PERCENT_THRESHOLD.get(settings); - heapVarianceThreshold = SETTING_HEAP_VARIANCE_THRESHOLD.get(settings); - heapMovingAverageWindowSize = SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE.get(settings); + this.heapPercentThreshold = SETTING_HEAP_PERCENT_THRESHOLD.get(settings); + this.heapVarianceThreshold = SETTING_HEAP_VARIANCE_THRESHOLD.get(settings); + this.heapMovingAverageWindowSize = SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE.get(settings); + this.cancellationRatio = SETTING_CANCELLATION_RATIO.get(settings); + this.cancellationRate = SETTING_CANCELLATION_RATE.get(settings); + this.cancellationBurst = SETTING_CANCELLATION_BURST.get(settings); + clusterSettings.addSettingsUpdateConsumer(SETTING_TOTAL_HEAP_PERCENT_THRESHOLD, this::setTotalHeapPercentThreshold); clusterSettings.addSettingsUpdateConsumer(SETTING_CPU_TIME_MILLIS_THRESHOLD, this::setCpuTimeMillisThreshold); clusterSettings.addSettingsUpdateConsumer(SETTING_ELAPSED_TIME_MILLIS_THRESHOLD, this::setElapsedTimeMillisThreshold); clusterSettings.addSettingsUpdateConsumer(SETTING_HEAP_PERCENT_THRESHOLD, this::setHeapPercentThreshold); clusterSettings.addSettingsUpdateConsumer(SETTING_HEAP_VARIANCE_THRESHOLD, this::setHeapVarianceThreshold); clusterSettings.addSettingsUpdateConsumer(SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE, this::setHeapMovingAverageWindowSize); + clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_RATIO, this::setCancellationRatio); + clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_RATE, this::setCancellationRate); + clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_BURST, this::setCancellationBurst); + } + + /** + * Callback listeners. + */ + public interface Listener { + void onCancellationRatioSearchShardTaskChanged(); + + void onCancellationRateSearchShardTaskChanged(); + + void onCancellationBurstSearchShardTaskChanged(); } public double getTotalHeapPercentThreshold() { @@ -174,4 +239,53 @@ public void setHeapVarianceThreshold(double heapVarianceThreshold) { public void setHeapMovingAverageWindowSize(int heapMovingAverageWindowSize) { this.heapMovingAverageWindowSize = heapMovingAverageWindowSize; } + + public double getCancellationRatio() { + return cancellationRatio; + } + + private void setCancellationRatio(double cancellationRatio) { + this.cancellationRatio = cancellationRatio; + notifyListeners(Listener::onCancellationRatioSearchShardTaskChanged); + } + + public double getCancellationRate() { + return cancellationRate; + } + + public double getCancellationRateNanos() { + return getCancellationRate() / TimeUnit.MILLISECONDS.toNanos(1); // rate per nanoseconds + } + + private void setCancellationRate(double cancellationRate) { + this.cancellationRate = cancellationRate; + notifyListeners(Listener::onCancellationRateSearchShardTaskChanged); + } + + public double getCancellationBurst() { + return cancellationBurst; + } + + private void setCancellationBurst(double cancellationBurst) { + this.cancellationBurst = cancellationBurst; + notifyListeners(Listener::onCancellationBurstSearchShardTaskChanged); + } + + public void addListener(Listener listener) { + listeners.add(listener); + } + + private void notifyListeners(Consumer consumer) { + List exceptions = new ArrayList<>(); + + for (Listener listener : listeners) { + try { + consumer.accept(listener); + } catch (Exception e) { + exceptions.add(e); + } + } + + ExceptionsHelper.maybeThrowRuntimeAndSuppress(exceptions); + } } diff --git a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java index 3b89ba7f3492d..af216f84d790e 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java +++ b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java @@ -8,13 +8,16 @@ package org.opensearch.search.backpressure.settings; -import org.apache.logging.log4j.LogManager; +import org.opensearch.ExceptionsHelper; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; import org.opensearch.monitor.jvm.JvmStats; +import java.util.ArrayList; +import java.util.List; import java.util.concurrent.TimeUnit; +import java.util.function.Consumer; /** * Defines the settings related to the cancellation of SearchTasks. @@ -24,8 +27,13 @@ public class SearchTaskSettings { private static final long HEAP_SIZE_BYTES = JvmStats.jvmStats().getMem().getHeapMax().getBytes(); + private final List listeners = new ArrayList<>(); private static class Defaults { + // TODO: decide on default settings for SearchTask + private static final double CANCELLATION_RATIO = 0.1; + private static final double CANCELLATION_RATE = 0.003; + private static final double CANCELLATION_BURST = 10.0; private static final double TOTAL_HEAP_PERCENT_THRESHOLD = 0.05; private static final long CPU_TIME_MILLIS_THRESHOLD = 60000; private static final long ELAPSED_TIME_MILLIS_THRESHOLD = 120000; @@ -34,6 +42,45 @@ private static class Defaults { private static final int HEAP_MOVING_AVERAGE_WINDOW_SIZE = 100; } + /** + * Defines the percentage of SearchTasks to cancel relative to the number of successful SearchTask completions. + * In other words, it is the number of tokens added to the bucket on each successful SearchTask completion. + */ + private volatile double cancellationRatio; + public static final Setting SETTING_CANCELLATION_RATIO = Setting.doubleSetting( + "search_backpressure.search_task.cancellation_ratio", + Defaults.CANCELLATION_RATIO, + 0.0, + 1.0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Defines the number of SearchTasks to cancel per unit time (in millis). + * In other words, it is the number of tokens added to the bucket each millisecond. + */ + private volatile double cancellationRate; + public static final Setting SETTING_CANCELLATION_RATE = Setting.doubleSetting( + "search_backpressure.search_task.cancellation_rate", + Defaults.CANCELLATION_RATE, + 0.0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Defines the maximum number of SearchTasks that can be cancelled before being rate-limited. + */ + private volatile double cancellationBurst; + public static final Setting SETTING_CANCELLATION_BURST = Setting.doubleSetting( + "search_backpressure.search_task.cancellation_burst", + Defaults.CANCELLATION_BURST, + 1.0, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + /** * Defines the heap usage threshold (in percentage) for the sum of heap usages across all search tasks * before in-flight cancellation is applied. @@ -117,12 +164,30 @@ public SearchTaskSettings(Settings settings, ClusterSettings clusterSettings) { this.heapPercentThreshold = SETTING_HEAP_PERCENT_THRESHOLD.get(settings); this.heapVarianceThreshold = SETTING_HEAP_VARIANCE_THRESHOLD.get(settings); this.heapMovingAverageWindowSize = SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE.get(settings); + this.cancellationRatio = SETTING_CANCELLATION_RATIO.get(settings); + this.cancellationRate = SETTING_CANCELLATION_RATE.get(settings); + this.cancellationBurst = SETTING_CANCELLATION_BURST.get(settings); + clusterSettings.addSettingsUpdateConsumer(SETTING_TOTAL_HEAP_PERCENT_THRESHOLD, this::setTotalHeapPercentThreshold); clusterSettings.addSettingsUpdateConsumer(SETTING_CPU_TIME_MILLIS_THRESHOLD, this::setCpuTimeMillisThreshold); clusterSettings.addSettingsUpdateConsumer(SETTING_ELAPSED_TIME_MILLIS_THRESHOLD, this::setElapsedTimeMillisThreshold); clusterSettings.addSettingsUpdateConsumer(SETTING_HEAP_PERCENT_THRESHOLD, this::setHeapPercentThreshold); clusterSettings.addSettingsUpdateConsumer(SETTING_HEAP_VARIANCE_THRESHOLD, this::setHeapVarianceThreshold); clusterSettings.addSettingsUpdateConsumer(SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE, this::setHeapMovingAverageWindowSize); + clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_RATIO, this::setCancellationRatio); + clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_RATE, this::setCancellationRate); + clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_BURST, this::setCancellationBurst); + } + + /** + * Callback listeners. + */ + public interface Listener { + void onCancellationRatioSearchTaskChanged(); + + void onCancellationRateSearchTaskChanged(); + + void onCancellationBurstSearchTaskChanged(); } public double getTotalHeapPercentThreshold() { @@ -158,7 +223,6 @@ public void setTotalHeapPercentThreshold(double totalHeapPercentThreshold) { } public void setCpuTimeMillisThreshold(long cpuTimeMillisThreshold) { - LogManager.getLogger(SearchTaskSettings.class).info("setCpuTimeMillisThreshold " + cpuTimeMillisThreshold); this.cpuTimeMillisThreshold = cpuTimeMillisThreshold; } @@ -177,4 +241,53 @@ public void setHeapVarianceThreshold(double heapVarianceThreshold) { public void setHeapMovingAverageWindowSize(int heapMovingAverageWindowSize) { this.heapMovingAverageWindowSize = heapMovingAverageWindowSize; } + + public double getCancellationRatio() { + return cancellationRatio; + } + + private void setCancellationRatio(double cancellationRatio) { + this.cancellationRatio = cancellationRatio; + notifyListeners(Listener::onCancellationRatioSearchTaskChanged); + } + + public double getCancellationRate() { + return cancellationRate; + } + + public double getCancellationRateNanos() { + return getCancellationRate() / TimeUnit.MILLISECONDS.toNanos(1); // rate per nanoseconds + } + + private void setCancellationRate(double cancellationRate) { + this.cancellationRate = cancellationRate; + notifyListeners(Listener::onCancellationRateSearchTaskChanged); + } + + public double getCancellationBurst() { + return cancellationBurst; + } + + private void setCancellationBurst(double cancellationBurst) { + this.cancellationBurst = cancellationBurst; + notifyListeners(Listener::onCancellationBurstSearchTaskChanged); + } + + public void addListener(Listener listener) { + listeners.add(listener); + } + + private void notifyListeners(Consumer consumer) { + List exceptions = new ArrayList<>(); + + for (Listener listener : listeners) { + try { + consumer.accept(listener); + } catch (Exception e) { + exceptions.add(e); + } + } + + ExceptionsHelper.maybeThrowRuntimeAndSuppress(exceptions); + } } diff --git a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java index bd5f24ef0dbee..2d2eba16aa7a5 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java +++ b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java @@ -8,7 +8,6 @@ package org.opensearch.search.backpressure.stats; -import org.opensearch.Version; import org.opensearch.common.io.stream.StreamInput; import org.opensearch.common.io.stream.StreamOutput; import org.opensearch.common.io.stream.Writeable; @@ -40,11 +39,7 @@ public SearchBackpressureStats( public SearchBackpressureStats(StreamInput in) throws IOException { searchShardTaskStats = new SearchBackpressureTaskStats(in); mode = SearchBackpressureMode.fromName(in.readString()); - if (in.getVersion().onOrAfter(Version.V_3_0_0)) { - searchTaskStats = new SearchBackpressureTaskStats(in); - } else { - searchTaskStats = null; - } + searchTaskStats = in.readOptionalWriteable(SearchBackpressureTaskStats::new); } @Override @@ -60,9 +55,8 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws public void writeTo(StreamOutput out) throws IOException { searchShardTaskStats.writeTo(out); out.writeString(mode.getName()); - if (Version.CURRENT.onOrAfter(Version.V_3_0_0) && out.getVersion().onOrAfter(Version.V_3_0_0)) { - searchTaskStats.writeTo(out); - } + // searchTaskStats.writeTo(out); + out.writeOptionalWriteable(searchTaskStats); } @Override @@ -70,9 +64,9 @@ public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; SearchBackpressureStats that = (SearchBackpressureStats) o; - return (Version.CURRENT.onOrAfter(Version.V_3_0_0) - && searchTaskStats.equals(that.searchTaskStats) - && searchShardTaskStats.equals(that.searchShardTaskStats)) && mode == that.mode; + return mode == that.mode + && Objects.equals(searchTaskStats, that.searchTaskStats) + && Objects.equals(searchShardTaskStats, that.searchShardTaskStats); } @Override diff --git a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureTaskStats.java b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureTaskStats.java index f6925c4c6bc8c..5d7bb31ae2fbb 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureTaskStats.java +++ b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureTaskStats.java @@ -88,22 +88,7 @@ public boolean equals(Object o) { SearchBackpressureTaskStats that = (SearchBackpressureTaskStats) o; return cancellationCount == that.cancellationCount && limitReachedCount == that.limitReachedCount - && compareMaps(resourceUsageTrackerStats, that.resourceUsageTrackerStats); - } - - private boolean compareMaps( - Map trackers1, - Map trackers2 - ) { - if (trackers1.size() != trackers2.size()) { - return false; - } - for (Map.Entry e1 : trackers1.entrySet()) { - if (trackers2.containsKey(e1.getKey()) == false || trackers2.get(e1.getKey()).equals(e1.getValue()) == false) { - return false; - } - } - return true; + && resourceUsageTrackerStats.equals(that.resourceUsageTrackerStats); } @Override diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/CpuUsageTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/CpuUsageTracker.java index 5215a17a61a8c..fb4cd342de25b 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/CpuUsageTracker.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/CpuUsageTracker.java @@ -64,9 +64,9 @@ public Optional checkAndMaybeGetCancellationReason(Task } @Override - public TaskResourceUsageTracker.Stats stats(List tasks) { - long currentMax = tasks.stream().mapToLong(t -> t.getTotalResourceStats().getCpuTimeInNanos()).max().orElse(0); - long currentAvg = (long) tasks.stream().mapToLong(t -> t.getTotalResourceStats().getCpuTimeInNanos()).average().orElse(0); + public TaskResourceUsageTracker.Stats stats(List activeTasks) { + long currentMax = activeTasks.stream().mapToLong(t -> t.getTotalResourceStats().getCpuTimeInNanos()).max().orElse(0); + long currentAvg = (long) activeTasks.stream().mapToLong(t -> t.getTotalResourceStats().getCpuTimeInNanos()).average().orElse(0); return new Stats(getCancellations(), currentMax, currentAvg); } diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTracker.java index d1700861476d6..1175d68fb8550 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTracker.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTracker.java @@ -65,10 +65,10 @@ public Optional checkAndMaybeGetCancellationReason(Task } @Override - public TaskResourceUsageTracker.Stats stats(List tasks) { + public TaskResourceUsageTracker.Stats stats(List activeTasks) { long now = timeNanosSupplier.getAsLong(); - long currentMax = tasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).max().orElse(0); - long currentAvg = (long) tasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).average().orElse(0); + long currentMax = activeTasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).max().orElse(0); + long currentAvg = (long) activeTasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).average().orElse(0); return new Stats(getCancellations(), currentMax, currentAvg); } diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java index 915b6ed60f685..15058bf8fe156 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java @@ -11,12 +11,10 @@ import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.io.stream.StreamInput; import org.opensearch.common.io.stream.StreamOutput; +import org.opensearch.common.settings.Setting; import org.opensearch.common.unit.ByteSizeValue; import org.opensearch.common.util.MovingAverage; -import org.opensearch.monitor.jvm.JvmStats; import org.opensearch.common.xcontent.XContentBuilder; -import org.opensearch.search.backpressure.settings.SearchShardTaskSettings; -import org.opensearch.search.backpressure.settings.SearchTaskSettings; import org.opensearch.tasks.Task; import org.opensearch.tasks.TaskCancellation; @@ -26,7 +24,6 @@ import java.util.Optional; import java.util.concurrent.atomic.AtomicReference; import java.util.function.DoubleSupplier; -import java.util.function.IntSupplier; import java.util.function.LongSupplier; import static org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER; @@ -38,26 +35,21 @@ * @opensearch.internal */ public class HeapUsageTracker extends TaskResourceUsageTracker { - private static final long HEAP_SIZE_BYTES = JvmStats.jvmStats().getMem().getHeapMax().getBytes(); private final DoubleSupplier heapVarianceSupplier; private final LongSupplier heapBytesThresholdSupplier; - private final IntSupplier windowSizeSupplier; private final AtomicReference movingAverageReference; public HeapUsageTracker( DoubleSupplier heapVarianceSupplier, LongSupplier heapBytesThresholdSupplier, - IntSupplier windowSizeSupplier, - ClusterSettings clusterSettings + int heapMovingAverageWindowSize, + ClusterSettings clusterSettings, + Setting windowSizeSetting ) { this.heapVarianceSupplier = heapVarianceSupplier; this.heapBytesThresholdSupplier = heapBytesThresholdSupplier; - this.windowSizeSupplier = windowSizeSupplier; - this.movingAverageReference = new AtomicReference<>(new MovingAverage(windowSizeSupplier.getAsInt())); - // TODO: find a way to get the type of the setting SearchTaskSettings/SearchShardTaskSettings and then add consumer only for the - // required setting - clusterSettings.addSettingsUpdateConsumer(SearchTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE, this::updateWindowSize); - clusterSettings.addSettingsUpdateConsumer(SearchShardTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE, this::updateWindowSize); + this.movingAverageReference = new AtomicReference<>(new MovingAverage(heapMovingAverageWindowSize)); + clusterSettings.addSettingsUpdateConsumer(windowSizeSetting, this::updateWindowSize); } @Override @@ -102,9 +94,9 @@ private void updateWindowSize(int heapMovingAverageWindowSize) { } @Override - public TaskResourceUsageTracker.Stats stats(List tasks) { - long currentMax = tasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).max().orElse(0); - long currentAvg = (long) tasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).average().orElse(0); + public TaskResourceUsageTracker.Stats stats(List activeTasks) { + long currentMax = activeTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).max().orElse(0); + long currentAvg = (long) activeTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).average().orElse(0); return new Stats(getCancellations(), currentMax, currentAvg, (long) movingAverageReference.get().getAverage()); } diff --git a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java index d2dce8731d141..1b61ecb9e0341 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java @@ -314,12 +314,7 @@ public void testSearchShardTaskInFlightCancellation() { private SearchBackpressureSettings getBackpressureSettings(String mode, double ratio, double rate, double burst) { return spy( new SearchBackpressureSettings( - Settings.builder() - .put(SearchBackpressureSettings.SETTING_MODE.getKey(), mode) - .put(SearchBackpressureSettings.SETTING_CANCELLATION_RATIO_SEARCH_SHARD_TASK.getKey(), ratio) - .put(SearchBackpressureSettings.SETTING_CANCELLATION_RATE_SEARCH_TASK.getKey(), rate) - .put(SearchBackpressureSettings.SETTING_CANCELLATION_BURST_SEARCH_TASK.getKey(), burst) - .build(), + Settings.builder().put(SearchBackpressureSettings.SETTING_MODE.getKey(), mode).build(), new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS) ) ); diff --git a/server/src/test/java/org/opensearch/search/backpressure/trackers/HeapUsageTrackerTests.java b/server/src/test/java/org/opensearch/search/backpressure/trackers/HeapUsageTrackerTests.java index aa9bd39fb3451..4af23be03f9f2 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/trackers/HeapUsageTrackerTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/trackers/HeapUsageTrackerTests.java @@ -49,8 +49,9 @@ public void testSearchTaskEligibleForCancellation() { new HeapUsageTracker( mockSearchTaskSettings::getHeapVarianceThreshold, mockSearchTaskSettings::getHeapBytesThreshold, - mockSearchTaskSettings::getHeapMovingAverageWindowSize, - mockSettings.getClusterSettings() + mockSearchTaskSettings.getHeapMovingAverageWindowSize(), + mockSettings.getClusterSettings(), + SearchTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE ) ); Task task = createMockTaskWithResourceStats(SearchTask.class, 1, 50); @@ -77,8 +78,9 @@ public void testSearchShardTaskEligibleForCancellation() { new HeapUsageTracker( mockSearchShardTaskSettings::getHeapVarianceThreshold, mockSearchShardTaskSettings::getHeapBytesThreshold, - mockSearchShardTaskSettings::getHeapMovingAverageWindowSize, - mockSettings.getClusterSettings() + mockSearchShardTaskSettings.getHeapMovingAverageWindowSize(), + mockSettings.getClusterSettings(), + SearchShardTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE ) ); Task task = createMockTaskWithResourceStats(SearchShardTask.class, 1, 50); @@ -107,8 +109,9 @@ public void testNotEligibleForCancellation() { new HeapUsageTracker( mockSearchShardTaskSettings::getHeapVarianceThreshold, mockSearchShardTaskSettings::getHeapBytesThreshold, - mockSearchShardTaskSettings::getHeapMovingAverageWindowSize, - mockSettings.getClusterSettings() + mockSearchShardTaskSettings.getHeapMovingAverageWindowSize(), + mockSettings.getClusterSettings(), + SearchShardTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE ) ); From 339a7466fd9c85d32198310747ff161addf1cffe Mon Sep 17 00:00:00 2001 From: PritLadani Date: Thu, 19 Jan 2023 16:26:16 +0530 Subject: [PATCH 18/34] Separating SearchTaskStats and SearchShardTaskStats Signed-off-by: PritLadani --- .../SearchBackpressureService.java | 7 +-- .../stats/SearchBackpressureStats.java | 12 ++--- .../stats/SearchBackpressureTaskStats.java | 4 +- .../stats/SearchShardTaskStats.java | 35 +++++++++++++++ .../backpressure/stats/SearchTaskStats.java | 35 +++++++++++++++ .../SearchBackpressureServiceTests.java | 11 ++--- .../stats/SearchBackpressureStatsTests.java | 4 +- .../stats/SearchShardTaskStatsTests.java | 44 ++++++++++++++++++ .../stats/SearchTaskStatsTests.java | 45 +++++++++++++++++++ 9 files changed, 180 insertions(+), 17 deletions(-) create mode 100644 server/src/main/java/org/opensearch/search/backpressure/stats/SearchShardTaskStats.java create mode 100644 server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java create mode 100644 server/src/test/java/org/opensearch/search/backpressure/stats/SearchShardTaskStatsTests.java create mode 100644 server/src/test/java/org/opensearch/search/backpressure/stats/SearchTaskStatsTests.java diff --git a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java index 7efff0fd5ffdb..c3a36b6ac613a 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java +++ b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java @@ -22,7 +22,8 @@ import org.opensearch.search.backpressure.settings.SearchShardTaskSettings; import org.opensearch.search.backpressure.settings.SearchTaskSettings; import org.opensearch.search.backpressure.stats.SearchBackpressureStats; -import org.opensearch.search.backpressure.stats.SearchBackpressureTaskStats; +import org.opensearch.search.backpressure.stats.SearchShardTaskStats; +import org.opensearch.search.backpressure.stats.SearchTaskStats; import org.opensearch.search.backpressure.trackers.CpuUsageTracker; import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; import org.opensearch.search.backpressure.trackers.HeapUsageTracker; @@ -472,14 +473,14 @@ protected void doClose() throws IOException {} public SearchBackpressureStats nodeStats() { List searchTasks = getTaskByType(SearchTask.class); List searchShardTasks = getTaskByType(SearchShardTask.class); - SearchBackpressureTaskStats searchTaskStats = new SearchBackpressureTaskStats( + SearchTaskStats searchTaskStats = new SearchTaskStats( searchBackpressureStates.get(SearchTask.class).getCancellationCount(), searchBackpressureStates.get(SearchTask.class).getLimitReachedCount(), searchTaskTrackers.stream() .collect(Collectors.toUnmodifiableMap(t -> TaskResourceUsageTrackerType.fromName(t.name()), t -> t.stats(searchTasks))) ); - SearchBackpressureTaskStats searchShardTaskStats = new SearchBackpressureTaskStats( + SearchShardTaskStats searchShardTaskStats = new SearchShardTaskStats( searchBackpressureStates.get(SearchShardTask.class).getCancellationCount(), searchBackpressureStates.get(SearchShardTask.class).getLimitReachedCount(), searchShardTaskTrackers.stream() diff --git a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java index 2d2eba16aa7a5..756ce79d3a769 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java +++ b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java @@ -22,13 +22,13 @@ * Stats related to search backpressure. */ public class SearchBackpressureStats implements ToXContentFragment, Writeable { - private final SearchBackpressureTaskStats searchTaskStats; - private final SearchBackpressureTaskStats searchShardTaskStats; + private final SearchTaskStats searchTaskStats; + private final SearchShardTaskStats searchShardTaskStats; private final SearchBackpressureMode mode; public SearchBackpressureStats( - SearchBackpressureTaskStats searchTaskStats, - SearchBackpressureTaskStats searchShardTaskStats, + SearchTaskStats searchTaskStats, + SearchShardTaskStats searchShardTaskStats, SearchBackpressureMode mode ) { this.searchTaskStats = searchTaskStats; @@ -37,9 +37,9 @@ public SearchBackpressureStats( } public SearchBackpressureStats(StreamInput in) throws IOException { - searchShardTaskStats = new SearchBackpressureTaskStats(in); + searchShardTaskStats = new SearchShardTaskStats(in); mode = SearchBackpressureMode.fromName(in.readString()); - searchTaskStats = in.readOptionalWriteable(SearchBackpressureTaskStats::new); + searchTaskStats = in.readOptionalWriteable(SearchTaskStats::new); } @Override diff --git a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureTaskStats.java b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureTaskStats.java index 5d7bb31ae2fbb..ce517d831b2eb 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureTaskStats.java +++ b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureTaskStats.java @@ -25,7 +25,9 @@ import java.util.Objects; /** - * Stats related to cancelled search shard tasks. + * Stats related to cancelled SearchBackpressureTasks. + * Since the children of this class has exact same structures, we have extracted the common stats to this class. + * However, in the future, if some task stats does not have this common stats, we can remove this class. */ public class SearchBackpressureTaskStats implements ToXContentObject, Writeable { private final long cancellationCount; diff --git a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchShardTaskStats.java b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchShardTaskStats.java new file mode 100644 index 0000000000000..c0db0d342d02e --- /dev/null +++ b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchShardTaskStats.java @@ -0,0 +1,35 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.backpressure.stats; + +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.search.backpressure.trackers.TaskResourceUsageTracker; +import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType; + +import java.io.IOException; +import java.util.Map; + +/** + * Stats related to cancelled SearchShardTasks. + */ + +public class SearchShardTaskStats extends SearchBackpressureTaskStats { + + public SearchShardTaskStats( + long cancellationCount, + long limitReachedCount, + Map resourceUsageTrackerStats + ) { + super(cancellationCount, limitReachedCount, resourceUsageTrackerStats); + } + + public SearchShardTaskStats(StreamInput in) throws IOException { + super(in); + } +} diff --git a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java new file mode 100644 index 0000000000000..023e97298b6c4 --- /dev/null +++ b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java @@ -0,0 +1,35 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.backpressure.stats; + +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.search.backpressure.trackers.TaskResourceUsageTracker; +import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType; + +import java.io.IOException; +import java.util.Map; + +/** + * Stats related to cancelled SearchTasks. + */ + +public class SearchTaskStats extends SearchBackpressureTaskStats { + + public SearchTaskStats( + long cancellationCount, + long limitReachedCount, + Map resourceUsageTrackerStats + ) { + super(cancellationCount, limitReachedCount, resourceUsageTrackerStats); + } + + public SearchTaskStats(StreamInput in) throws IOException { + super(in); + } +} diff --git a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java index 1b61ecb9e0341..d43fedac047dc 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java @@ -19,10 +19,11 @@ import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; import org.opensearch.search.backpressure.settings.SearchShardTaskSettings; import org.opensearch.search.backpressure.settings.SearchTaskSettings; +import org.opensearch.search.backpressure.stats.SearchShardTaskStats; +import org.opensearch.search.backpressure.stats.SearchTaskStats; import org.opensearch.search.backpressure.trackers.NodeDuressTracker; import org.opensearch.common.xcontent.XContentBuilder; import org.opensearch.search.backpressure.stats.SearchBackpressureStats; -import org.opensearch.search.backpressure.stats.SearchBackpressureTaskStats; import org.opensearch.search.backpressure.trackers.TaskResourceUsageTracker; import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType; import org.opensearch.tasks.CancellableTask; @@ -223,8 +224,8 @@ public void testSearchTaskInFlightCancellation() { // Verify search backpressure stats. SearchBackpressureStats expectedStats = new SearchBackpressureStats( - new SearchBackpressureTaskStats(20, 3, Map.of(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, new MockStats(20))), - new SearchBackpressureTaskStats(0, 0, Collections.emptyMap()), + new SearchTaskStats(20, 3, Map.of(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, new MockStats(20))), + new SearchShardTaskStats(0, 0, Collections.emptyMap()), SearchBackpressureMode.ENFORCED ); SearchBackpressureStats actualStats = service.nodeStats(); @@ -303,8 +304,8 @@ public void testSearchShardTaskInFlightCancellation() { // Verify search backpressure stats. SearchBackpressureStats expectedStats = new SearchBackpressureStats( - new SearchBackpressureTaskStats(0, 0, Collections.emptyMap()), - new SearchBackpressureTaskStats(15, 3, Map.of(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, new MockStats(15))), + new SearchTaskStats(0, 0, Collections.emptyMap()), + new SearchShardTaskStats(15, 3, Map.of(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, new MockStats(15))), SearchBackpressureMode.ENFORCED ); SearchBackpressureStats actualStats = service.nodeStats(); diff --git a/server/src/test/java/org/opensearch/search/backpressure/stats/SearchBackpressureStatsTests.java b/server/src/test/java/org/opensearch/search/backpressure/stats/SearchBackpressureStatsTests.java index 3c301b40b5f4f..0c86cf4b11239 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/stats/SearchBackpressureStatsTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/stats/SearchBackpressureStatsTests.java @@ -25,8 +25,8 @@ protected SearchBackpressureStats createTestInstance() { public static SearchBackpressureStats randomInstance() { return new SearchBackpressureStats( - SearchBackpressureTaskStatsTests.randomInstance(), - SearchBackpressureTaskStatsTests.randomInstance(), + SearchTaskStatsTests.randomInstance(), + SearchShardTaskStatsTests.randomInstance(), randomFrom(SearchBackpressureMode.DISABLED, SearchBackpressureMode.MONITOR_ONLY, SearchBackpressureMode.ENFORCED) ); } diff --git a/server/src/test/java/org/opensearch/search/backpressure/stats/SearchShardTaskStatsTests.java b/server/src/test/java/org/opensearch/search/backpressure/stats/SearchShardTaskStatsTests.java new file mode 100644 index 0000000000000..d5bc9398492eb --- /dev/null +++ b/server/src/test/java/org/opensearch/search/backpressure/stats/SearchShardTaskStatsTests.java @@ -0,0 +1,44 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.backpressure.stats; + +import org.opensearch.common.io.stream.Writeable; +import org.opensearch.search.backpressure.trackers.CpuUsageTracker; +import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; +import org.opensearch.search.backpressure.trackers.HeapUsageTracker; +import org.opensearch.search.backpressure.trackers.TaskResourceUsageTracker; +import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType; +import org.opensearch.test.AbstractWireSerializingTestCase; + +import java.util.Map; + +public class SearchShardTaskStatsTests extends AbstractWireSerializingTestCase { + @Override + protected Writeable.Reader instanceReader() { + return SearchShardTaskStats::new; + } + + @Override + protected SearchShardTaskStats createTestInstance() { + return randomInstance(); + } + + public static SearchShardTaskStats randomInstance() { + Map resourceUsageTrackerStats = Map.of( + TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, + new CpuUsageTracker.Stats(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()), + TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER, + new HeapUsageTracker.Stats(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()), + TaskResourceUsageTrackerType.ELAPSED_TIME_TRACKER, + new ElapsedTimeTracker.Stats(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()) + ); + + return new SearchShardTaskStats(randomNonNegativeLong(), randomNonNegativeLong(), resourceUsageTrackerStats); + } +} diff --git a/server/src/test/java/org/opensearch/search/backpressure/stats/SearchTaskStatsTests.java b/server/src/test/java/org/opensearch/search/backpressure/stats/SearchTaskStatsTests.java new file mode 100644 index 0000000000000..07cec723efb17 --- /dev/null +++ b/server/src/test/java/org/opensearch/search/backpressure/stats/SearchTaskStatsTests.java @@ -0,0 +1,45 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.backpressure.stats; + +import org.opensearch.common.io.stream.Writeable; +import org.opensearch.search.backpressure.trackers.CpuUsageTracker; +import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; +import org.opensearch.search.backpressure.trackers.HeapUsageTracker; +import org.opensearch.search.backpressure.trackers.TaskResourceUsageTracker; +import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType; +import org.opensearch.test.AbstractWireSerializingTestCase; + +import java.util.Map; + +public class SearchTaskStatsTests extends AbstractWireSerializingTestCase { + + @Override + protected Writeable.Reader instanceReader() { + return SearchTaskStats::new; + } + + @Override + protected SearchTaskStats createTestInstance() { + return randomInstance(); + } + + public static SearchTaskStats randomInstance() { + Map resourceUsageTrackerStats = Map.of( + TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, + new CpuUsageTracker.Stats(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()), + TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER, + new HeapUsageTracker.Stats(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()), + TaskResourceUsageTrackerType.ELAPSED_TIME_TRACKER, + new ElapsedTimeTracker.Stats(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()) + ); + + return new SearchTaskStats(randomNonNegativeLong(), randomNonNegativeLong(), resourceUsageTrackerStats); + } +} From 7d52b5292bcd881092a0c3c9c48ffd92668d03f0 Mon Sep 17 00:00:00 2001 From: PritLadani Date: Fri, 20 Jan 2023 16:49:27 +0530 Subject: [PATCH 19/34] Changing default values for SearchTaskSettings Signed-off-by: PritLadani --- .../SearchBackpressureService.java | 56 ++++++++++--------- .../settings/SearchTaskSettings.java | 6 +- .../SearchBackpressureServiceTests.java | 14 ++--- 3 files changed, 40 insertions(+), 36 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java index c3a36b6ac613a..c7bd4033417fa 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java +++ b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java @@ -70,8 +70,7 @@ public class SearchBackpressureService extends AbstractLifecycleComponent private final LongSupplier timeNanosSupplier; private final List nodeDuressTrackers; - private final List searchTaskTrackers; - private final List searchShardTaskTrackers; + private final Map, List> taskTrackers; private final Map, AtomicReference> rateLimiters; private final Map, AtomicReference> ratioLimiters; @@ -138,8 +137,6 @@ public SearchBackpressureService( this.threadPool = threadPool; this.timeNanosSupplier = timeNanosSupplier; this.nodeDuressTrackers = nodeDuressTrackers; - this.searchTaskTrackers = searchTaskTrackers; - this.searchShardTaskTrackers = searchShardTaskTrackers; this.searchBackpressureStates = Map.of( SearchTask.class, @@ -148,6 +145,8 @@ public SearchBackpressureService( new SearchBackpressureState() ); + this.taskTrackers = Map.of(SearchTask.class, searchTaskTrackers, SearchShardTask.class, searchShardTaskTrackers); + this.rateLimiters = Map.of( SearchTask.class, new AtomicReference<>( @@ -240,22 +239,16 @@ void doRun() { continue; } - boolean isSearchTask = taskCancellation.getTask() instanceof SearchTask; + Class taskType = getTaskType(taskCancellation.getTask()); // Independently remove tokens from both token buckets. - boolean rateLimitReached = isSearchTask - ? rateLimiters.get(SearchTask.class).get().request() == false - : rateLimiters.get(SearchShardTask.class).get().request() == false; - boolean ratioLimitReached = isSearchTask - ? ratioLimiters.get(SearchTask.class).get().request() == false - : ratioLimiters.get(SearchShardTask.class).get().request() == false; + boolean rateLimitReached = rateLimiters.get(taskType).get().request() == false; + boolean ratioLimitReached = ratioLimiters.get(taskType).get().request() == false; // Stop cancelling tasks if there are no tokens in either of the two token buckets. if (rateLimitReached && ratioLimitReached) { logger.debug("task cancellation limit reached"); - SearchBackpressureState searchBackpressureState = searchBackpressureStates.get( - (taskCancellation.getTask() instanceof SearchTask) ? SearchTask.class : SearchShardTask.class - ); + SearchBackpressureState searchBackpressureState = searchBackpressureStates.get(taskType); if (searchBackpressureState != null) { searchBackpressureState.incrementLimitReachedCount(); } @@ -266,6 +259,19 @@ void doRun() { } } + /** + * Given a task, returns the type of the task + */ + Class getTaskType(Task task) { + if (task instanceof SearchTask) { + return SearchTask.class; + } else if (task instanceof SearchShardTask) { + return SearchShardTask.class; + } else { + throw new IllegalArgumentException(""); + } + } + /** * Returns true if the node is in duress consecutively for the past 'n' observations. */ @@ -315,8 +321,8 @@ List getTa TaskCancellation getTaskCancellation(CancellableTask task) { List reasons = new ArrayList<>(); List callbacks = new ArrayList<>(); - boolean isSearchTask = task instanceof SearchTask; - List trackers = isSearchTask ? searchTaskTrackers : searchShardTaskTrackers; + Class taskType = getTaskType(task); + List trackers = taskTrackers.get(taskType); for (TaskResourceUsageTracker tracker : trackers) { Optional reason = tracker.checkAndMaybeGetCancellationReason(task); if (reason.isPresent()) { @@ -325,9 +331,7 @@ TaskCancellation getTaskCancellation(CancellableTask task) { } } - SearchBackpressureState searchBackpressureState = searchBackpressureStates.get( - isSearchTask ? SearchTask.class : SearchShardTask.class - ); + SearchBackpressureState searchBackpressureState = searchBackpressureStates.get(taskType); if (searchBackpressureState != null) { callbacks.add(searchBackpressureState::incrementCancellationCount); } @@ -366,18 +370,16 @@ public void onTaskCompleted(Task task) { } CancellableTask cancellableTask = (CancellableTask) task; - boolean isSearchTask = task instanceof SearchTask; + Class taskType = getTaskType(task); if (cancellableTask.isCancelled() == false) { - SearchBackpressureState searchBackpressureState = searchBackpressureStates.get( - isSearchTask ? SearchTask.class : SearchShardTask.class - ); + SearchBackpressureState searchBackpressureState = searchBackpressureStates.get(taskType); if (searchBackpressureState != null) { searchBackpressureState.incrementCompletionCount(); } } List exceptions = new ArrayList<>(); - List trackers = isSearchTask ? searchTaskTrackers : searchShardTaskTrackers; + List trackers = taskTrackers.get(taskType); for (TaskResourceUsageTracker tracker : trackers) { try { tracker.update(task); @@ -476,14 +478,16 @@ public SearchBackpressureStats nodeStats() { SearchTaskStats searchTaskStats = new SearchTaskStats( searchBackpressureStates.get(SearchTask.class).getCancellationCount(), searchBackpressureStates.get(SearchTask.class).getLimitReachedCount(), - searchTaskTrackers.stream() + taskTrackers.get(SearchTask.class) + .stream() .collect(Collectors.toUnmodifiableMap(t -> TaskResourceUsageTrackerType.fromName(t.name()), t -> t.stats(searchTasks))) ); SearchShardTaskStats searchShardTaskStats = new SearchShardTaskStats( searchBackpressureStates.get(SearchShardTask.class).getCancellationCount(), searchBackpressureStates.get(SearchShardTask.class).getLimitReachedCount(), - searchShardTaskTrackers.stream() + taskTrackers.get(SearchShardTask.class) + .stream() .collect(Collectors.toUnmodifiableMap(t -> TaskResourceUsageTrackerType.fromName(t.name()), t -> t.stats(searchShardTasks))) ); diff --git a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java index af216f84d790e..911b3d91c3609 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java +++ b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java @@ -33,10 +33,10 @@ private static class Defaults { // TODO: decide on default settings for SearchTask private static final double CANCELLATION_RATIO = 0.1; private static final double CANCELLATION_RATE = 0.003; - private static final double CANCELLATION_BURST = 10.0; + private static final double CANCELLATION_BURST = 5.0; private static final double TOTAL_HEAP_PERCENT_THRESHOLD = 0.05; - private static final long CPU_TIME_MILLIS_THRESHOLD = 60000; - private static final long ELAPSED_TIME_MILLIS_THRESHOLD = 120000; + private static final long CPU_TIME_MILLIS_THRESHOLD = 30000; + private static final long ELAPSED_TIME_MILLIS_THRESHOLD = 45000; private static final double HEAP_PERCENT_THRESHOLD = 0.02; private static final double HEAP_VARIANCE_THRESHOLD = 2.0; private static final int HEAP_MOVING_AVERAGE_WINDOW_SIZE = 100; diff --git a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java index d43fedac047dc..e76490cc4019b 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java @@ -169,7 +169,7 @@ public void testSearchTaskInFlightCancellation() { TaskResourceUsageTracker mockTaskResourceUsageTracker = getMockedTaskResourceUsageTracker(); // Mocking 'settings' with predictable rate limiting thresholds. - SearchBackpressureSettings settings = getBackpressureSettings("enforced", 0.2, 0.005, 10.0); + SearchBackpressureSettings settings = getBackpressureSettings("enforced", 0.1, 0.003, 5.0); SearchBackpressureService service = new SearchBackpressureService( settings, @@ -202,29 +202,29 @@ public void testSearchTaskInFlightCancellation() { } doReturn(activeSearchTasks).when(mockTaskResourceTrackingService).getResourceAwareTasks(); - // There are 25 SearchTasks eligible for cancellation but only 10 will be cancelled (burst limit). + // There are 25 SearchTasks eligible for cancellation but only 5 will be cancelled (burst limit). LogManager.getLogger(SearchBackpressureServiceTests.class).info("first run"); service.doRun(); - assertEquals(10, service.getSearchBackpressureTaskStats(SearchTask.class).getCancellationCount()); + assertEquals(5, service.getSearchBackpressureTaskStats(SearchTask.class).getCancellationCount()); assertEquals(1, service.getSearchBackpressureTaskStats(SearchTask.class).getLimitReachedCount()); // If the clock or completed task count haven't made sufficient progress, we'll continue to be rate-limited. LogManager.getLogger(SearchBackpressureServiceTests.class).info("second run"); service.doRun(); - assertEquals(10, service.getSearchBackpressureTaskStats(SearchTask.class).getCancellationCount()); + assertEquals(5, service.getSearchBackpressureTaskStats(SearchTask.class).getCancellationCount()); assertEquals(2, service.getSearchBackpressureTaskStats(SearchTask.class).getLimitReachedCount()); // Fast-forward the clock by ten second to replenish some tokens. - // This will add 50 tokens (time delta * rate) to 'rateLimitPerTime' but it will cancel only 10 tasks (burst limit). + // This will add 50 tokens (time delta * rate) to 'rateLimitPerTime' but it will cancel only 5 tasks (burst limit). mockTime.addAndGet(TimeUnit.SECONDS.toNanos(10)); LogManager.getLogger(SearchBackpressureServiceTests.class).info("third run"); service.doRun(); - assertEquals(20, service.getSearchBackpressureTaskStats(SearchTask.class).getCancellationCount()); + assertEquals(10, service.getSearchBackpressureTaskStats(SearchTask.class).getCancellationCount()); assertEquals(3, service.getSearchBackpressureTaskStats(SearchTask.class).getLimitReachedCount()); // Verify search backpressure stats. SearchBackpressureStats expectedStats = new SearchBackpressureStats( - new SearchTaskStats(20, 3, Map.of(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, new MockStats(20))), + new SearchTaskStats(10, 3, Map.of(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, new MockStats(10))), new SearchShardTaskStats(0, 0, Collections.emptyMap()), SearchBackpressureMode.ENFORCED ); From 5933c2d1d8600b643d7519c0f0ffc384209d9cf1 Mon Sep 17 00:00:00 2001 From: PritLadani Date: Mon, 23 Jan 2023 17:33:09 +0530 Subject: [PATCH 20/34] Adding version checks for SearchTaskStats Signed-off-by: PritLadani --- .../backpressure/stats/SearchBackpressureStats.java | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java index 756ce79d3a769..848e133701467 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java +++ b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java @@ -8,6 +8,7 @@ package org.opensearch.search.backpressure.stats; +import org.opensearch.Version; import org.opensearch.common.io.stream.StreamInput; import org.opensearch.common.io.stream.StreamOutput; import org.opensearch.common.io.stream.Writeable; @@ -39,7 +40,11 @@ public SearchBackpressureStats( public SearchBackpressureStats(StreamInput in) throws IOException { searchShardTaskStats = new SearchShardTaskStats(in); mode = SearchBackpressureMode.fromName(in.readString()); - searchTaskStats = in.readOptionalWriteable(SearchTaskStats::new); + if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + searchTaskStats = in.readOptionalWriteable(SearchTaskStats::new); + } else { + searchTaskStats = null; + } } @Override @@ -55,8 +60,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws public void writeTo(StreamOutput out) throws IOException { searchShardTaskStats.writeTo(out); out.writeString(mode.getName()); - // searchTaskStats.writeTo(out); - out.writeOptionalWriteable(searchTaskStats); + if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + out.writeOptionalWriteable(searchTaskStats); + } } @Override From d933936a3851695ccd1eb4c0a0171e7d34f351c8 Mon Sep 17 00:00:00 2001 From: PritLadani Date: Sun, 29 Jan 2023 11:56:18 +0530 Subject: [PATCH 21/34] Avoiding heap usage cancellation in case of undefined maximum heap memory Signed-off-by: PritLadani --- .../settings/SearchShardTaskSettings.java | 4 ++-- .../settings/SearchTaskSettings.java | 5 ++--- .../stats/SearchBackpressureStats.java | 16 +++++++++------- .../backpressure/trackers/HeapUsageTracker.java | 4 +++- 4 files changed, 16 insertions(+), 13 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java index 1cfb2a5a350f5..1bb7a36858c77 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java +++ b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java @@ -12,20 +12,20 @@ import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; -import org.opensearch.monitor.jvm.JvmStats; import java.util.ArrayList; import java.util.List; import java.util.concurrent.TimeUnit; import java.util.function.Consumer; +import static org.opensearch.search.backpressure.trackers.HeapUsageTracker.HEAP_SIZE_BYTES; + /** * Defines the settings related to the cancellation of SearchShardTasks. * * @opensearch.internal */ public class SearchShardTaskSettings { - private static final long HEAP_SIZE_BYTES = JvmStats.jvmStats().getMem().getHeapMax().getBytes(); private final List listeners = new ArrayList<>(); private static class Defaults { diff --git a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java index 911b3d91c3609..af2c3389dbbef 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java +++ b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java @@ -12,13 +12,14 @@ import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; -import org.opensearch.monitor.jvm.JvmStats; import java.util.ArrayList; import java.util.List; import java.util.concurrent.TimeUnit; import java.util.function.Consumer; +import static org.opensearch.search.backpressure.trackers.HeapUsageTracker.HEAP_SIZE_BYTES; + /** * Defines the settings related to the cancellation of SearchTasks. * @@ -26,11 +27,9 @@ */ public class SearchTaskSettings { - private static final long HEAP_SIZE_BYTES = JvmStats.jvmStats().getMem().getHeapMax().getBytes(); private final List listeners = new ArrayList<>(); private static class Defaults { - // TODO: decide on default settings for SearchTask private static final double CANCELLATION_RATIO = 0.1; private static final double CANCELLATION_RATE = 0.003; private static final double CANCELLATION_BURST = 5.0; diff --git a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java index 848e133701467..b1aa164fe66ec 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java +++ b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java @@ -23,18 +23,18 @@ * Stats related to search backpressure. */ public class SearchBackpressureStats implements ToXContentFragment, Writeable { - private final SearchTaskStats searchTaskStats; private final SearchShardTaskStats searchShardTaskStats; private final SearchBackpressureMode mode; + private final SearchTaskStats searchTaskStats; public SearchBackpressureStats( SearchTaskStats searchTaskStats, SearchShardTaskStats searchShardTaskStats, SearchBackpressureMode mode ) { - this.searchTaskStats = searchTaskStats; this.searchShardTaskStats = searchShardTaskStats; this.mode = mode; + this.searchTaskStats = searchTaskStats; } public SearchBackpressureStats(StreamInput in) throws IOException { @@ -49,11 +49,13 @@ public SearchBackpressureStats(StreamInput in) throws IOException { @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { - return builder.startObject("search_backpressure") - .field("search_task", searchTaskStats) - .field("search_shard_task", searchShardTaskStats) - .field("mode", mode.getName()) - .endObject(); + builder.startObject("search_backpressure"); + if (searchTaskStats != null) { + builder.field("search_task", searchTaskStats); + } + builder.field("search_shard_task", searchShardTaskStats); + builder.field("mode", mode.getName()); + return builder.endObject(); } @Override diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java index 15058bf8fe156..4e38b6583e803 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java @@ -15,6 +15,7 @@ import org.opensearch.common.unit.ByteSizeValue; import org.opensearch.common.util.MovingAverage; import org.opensearch.common.xcontent.XContentBuilder; +import org.opensearch.monitor.jvm.JvmStats; import org.opensearch.tasks.Task; import org.opensearch.tasks.TaskCancellation; @@ -35,6 +36,7 @@ * @opensearch.internal */ public class HeapUsageTracker extends TaskResourceUsageTracker { + public static final long HEAP_SIZE_BYTES = JvmStats.jvmStats().getMem().getHeapMax().getBytes(); private final DoubleSupplier heapVarianceSupplier; private final LongSupplier heapBytesThresholdSupplier; private final AtomicReference movingAverageReference; @@ -77,7 +79,7 @@ public Optional checkAndMaybeGetCancellationReason(Task double allowedUsage = averageUsage * variance; double threshold = heapBytesThresholdSupplier.getAsLong(); - if (currentUsage < threshold || currentUsage < allowedUsage) { + if (currentUsage < threshold || currentUsage < allowedUsage || HEAP_SIZE_BYTES == 0) { return Optional.empty(); } From 31f3f07ef6ec9d6367a1a52cce4bd4018577efdb Mon Sep 17 00:00:00 2001 From: PritLadani Date: Mon, 30 Jan 2023 00:06:38 +0530 Subject: [PATCH 22/34] Moving limiters in SearchBackpressureState Signed-off-by: PritLadani --- CHANGELOG.md | 8 - .../SearchBackpressureService.java | 137 ++++-------------- .../backpressure/SearchBackpressureState.java | 44 +++++- .../SearchBackpressureServiceTests.java | 32 ++-- 4 files changed, 89 insertions(+), 132 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 538f4fd5baae9..8d38562cf87e4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,14 +12,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Add support for ppc64le architecture ([#5459](https://github.com/opensearch-project/OpenSearch/pull/5459)) - Cancellation of in-flight SearchTasks based on resource consumption ([#5606](https://github.com/opensearch-project/OpenSearch/pull/5605)) -- Support versioning for Weighted routing apis([#5255](https://github.com/opensearch-project/OpenSearch/pull/5255)) -- Added @gbbafna as an OpenSearch maintainer ([#5668](https://github.com/opensearch-project/OpenSearch/pull/5668)) -- Add support for discovered cluster manager and remove local weights ([#5680](https://github.com/opensearch-project/OpenSearch/pull/5680)) -- Added support for feature flags in opensearch.yml ([#4959](https://github.com/opensearch-project/OpenSearch/pull/4959)) -- Add query for initialized extensions ([#5658](https://github.com/opensearch-project/OpenSearch/pull/5658)) -- Revert 'Added jackson dependency to server' and change extension reading ([#5768](https://github.com/opensearch-project/OpenSearch/pull/5768)) -- Add support to disallow search request with preference parameter with strict weighted shard routing([#5874](https://github.com/opensearch-project/OpenSearch/pull/5874)) - ### Dependencies - Bumps `log4j-core` from 2.18.0 to 2.19.0 - Bumps `reactor-netty-http` from 1.0.18 to 1.0.23 diff --git a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java index c7bd4033417fa..dd23995cc720a 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java +++ b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java @@ -72,9 +72,6 @@ public class SearchBackpressureService extends AbstractLifecycleComponent private final List nodeDuressTrackers; private final Map, List> taskTrackers; - private final Map, AtomicReference> rateLimiters; - private final Map, AtomicReference> ratioLimiters; - private final Map, SearchBackpressureState> searchBackpressureStates; public SearchBackpressureService( @@ -140,58 +137,20 @@ public SearchBackpressureService( this.searchBackpressureStates = Map.of( SearchTask.class, - new SearchBackpressureState(), + new SearchBackpressureState( + timeNanosSupplier, + getSettings().getSearchTaskSettings().getCancellationRateNanos(), + getSettings().getSearchTaskSettings().getCancellationBurst(), + getSettings().getSearchTaskSettings().getCancellationRatio()), SearchShardTask.class, - new SearchBackpressureState() + new SearchBackpressureState( + timeNanosSupplier, + getSettings().getSearchShardTaskSettings().getCancellationRateNanos(), + getSettings().getSearchShardTaskSettings().getCancellationBurst(), + getSettings().getSearchShardTaskSettings().getCancellationRatio()) ); this.taskTrackers = Map.of(SearchTask.class, searchTaskTrackers, SearchShardTask.class, searchShardTaskTrackers); - - this.rateLimiters = Map.of( - SearchTask.class, - new AtomicReference<>( - new TokenBucket( - timeNanosSupplier, - getSettings().getSearchTaskSettings().getCancellationRateNanos(), - getSettings().getSearchTaskSettings().getCancellationBurst() - ) - ), - SearchShardTask.class, - new AtomicReference<>( - new TokenBucket( - timeNanosSupplier, - getSettings().getSearchShardTaskSettings().getCancellationRateNanos(), - getSettings().getSearchShardTaskSettings().getCancellationBurst() - ) - ) - ); - - this.ratioLimiters = Map.of( - SearchTask.class, - new AtomicReference<>( - new TokenBucket( - this::getSearchTaskCompletionCount, - getSettings().getSearchTaskSettings().getCancellationRatio(), - getSettings().getSearchTaskSettings().getCancellationBurst() - ) - ), - SearchShardTask.class, - new AtomicReference<>( - new TokenBucket( - this::getSearchShardTaskCompletionCount, - getSettings().getSearchShardTaskSettings().getCancellationRatio(), - getSettings().getSearchShardTaskSettings().getCancellationBurst() - ) - ) - ); - } - - private long getSearchTaskCompletionCount() { - return searchBackpressureStates.get(SearchTask.class).getCompletionCount(); - } - - private long getSearchShardTaskCompletionCount() { - return searchBackpressureStates.get(SearchShardTask.class).getCompletionCount(); } void doRun() { @@ -242,16 +201,14 @@ void doRun() { Class taskType = getTaskType(taskCancellation.getTask()); // Independently remove tokens from both token buckets. - boolean rateLimitReached = rateLimiters.get(taskType).get().request() == false; - boolean ratioLimitReached = ratioLimiters.get(taskType).get().request() == false; + SearchBackpressureState searchBackpressureState = searchBackpressureStates.get(taskType); + boolean rateLimitReached = searchBackpressureState.getRateLimiter().get().request() == false; + boolean ratioLimitReached = searchBackpressureState.getRatioLimiter().get().request() == false; // Stop cancelling tasks if there are no tokens in either of the two token buckets. if (rateLimitReached && ratioLimitReached) { logger.debug("task cancellation limit reached"); - SearchBackpressureState searchBackpressureState = searchBackpressureStates.get(taskType); - if (searchBackpressureState != null) { - searchBackpressureState.incrementLimitReachedCount(); - } + searchBackpressureState.incrementLimitReachedCount(); break; } @@ -268,7 +225,7 @@ Class getTaskType(Task task) { } else if (task instanceof SearchShardTask) { return SearchShardTask.class; } else { - throw new IllegalArgumentException(""); + throw new IllegalArgumentException("task must be instance of either SearchTask or SearchShardTask"); } } @@ -330,11 +287,7 @@ TaskCancellation getTaskCancellation(CancellableTask task) { reasons.add(reason.get()); } } - - SearchBackpressureState searchBackpressureState = searchBackpressureStates.get(taskType); - if (searchBackpressureState != null) { - callbacks.add(searchBackpressureState::incrementCancellationCount); - } + callbacks.add(searchBackpressureStates.get(taskType)::incrementCancellationCount); return new TaskCancellation(task, reasons, callbacks); } @@ -343,19 +296,18 @@ TaskCancellation getTaskCancellation(CancellableTask task) { * Returns a list of TaskCancellations sorted by descending order of their cancellation scores. */ List getTaskCancellations(List tasks) { - List t = tasks.stream() + return tasks.stream() .map(this::getTaskCancellation) .filter(TaskCancellation::isEligibleForCancellation) .sorted(Comparator.reverseOrder()) .collect(Collectors.toUnmodifiableList()); - return t; } SearchBackpressureSettings getSettings() { return settings; } - SearchBackpressureState getSearchBackpressureTaskStats(Class taskType) { + SearchBackpressureState getSearchBackpressureStats(Class taskType) { return searchBackpressureStates.get(taskType); } @@ -372,10 +324,7 @@ public void onTaskCompleted(Task task) { CancellableTask cancellableTask = (CancellableTask) task; Class taskType = getTaskType(task); if (cancellableTask.isCancelled() == false) { - SearchBackpressureState searchBackpressureState = searchBackpressureStates.get(taskType); - if (searchBackpressureState != null) { - searchBackpressureState.incrementCompletionCount(); - } + searchBackpressureStates.get(taskType).incrementCompletionCount(); } List exceptions = new ArrayList<>(); @@ -393,62 +342,38 @@ public void onTaskCompleted(Task task) { @Override public void onCancellationRatioSearchTaskChanged() { - ratioLimiters.get(SearchTask.class) - .set( - new TokenBucket( - this::getSearchTaskCompletionCount, - getSettings().getSearchTaskSettings().getCancellationRatio(), - getSettings().getSearchTaskSettings().getCancellationBurst() - ) - ); + searchBackpressureStates.get(SearchTask.class) + .onCancellationRatioChanged(getSettings().getSearchTaskSettings().getCancellationRatio()); } @Override public void onCancellationRateSearchTaskChanged() { - rateLimiters.get(SearchTask.class) - .set( - new TokenBucket( - timeNanosSupplier, - getSettings().getSearchTaskSettings().getCancellationRateNanos(), - getSettings().getSearchTaskSettings().getCancellationBurst() - ) - ); + searchBackpressureStates.get(SearchTask.class) + .onCancellationRateChanged(getSettings().getSearchTaskSettings().getCancellationRate()); } @Override public void onCancellationBurstSearchTaskChanged() { - onCancellationRatioSearchTaskChanged(); - onCancellationRateSearchTaskChanged(); + searchBackpressureStates.get(SearchTask.class) + .onCancellationBurstChanged(getSettings().getSearchTaskSettings().getCancellationBurst()); } @Override public void onCancellationRatioSearchShardTaskChanged() { - ratioLimiters.get(SearchShardTask.class) - .set( - new TokenBucket( - this::getSearchShardTaskCompletionCount, - getSettings().getSearchShardTaskSettings().getCancellationRatio(), - getSettings().getSearchShardTaskSettings().getCancellationBurst() - ) - ); + searchBackpressureStates.get(SearchShardTask.class) + .onCancellationRatioChanged(getSettings().getSearchShardTaskSettings().getCancellationRatio()); } @Override public void onCancellationRateSearchShardTaskChanged() { - rateLimiters.get(SearchShardTask.class) - .set( - new TokenBucket( - timeNanosSupplier, - getSettings().getSearchShardTaskSettings().getCancellationRateNanos(), - getSettings().getSearchShardTaskSettings().getCancellationBurst() - ) - ); + searchBackpressureStates.get(SearchShardTask.class) + .onCancellationRateChanged(getSettings().getSearchShardTaskSettings().getCancellationRate()); } @Override public void onCancellationBurstSearchShardTaskChanged() { - onCancellationRatioSearchShardTaskChanged(); - onCancellationRateSearchShardTaskChanged(); + searchBackpressureStates.get(SearchShardTask.class) + .onCancellationBurstChanged(getSettings().getSearchShardTaskSettings().getCancellationBurst()); } @Override diff --git a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureState.java b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureState.java index a62231ec29ede..723f96ddda9c2 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureState.java +++ b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureState.java @@ -8,7 +8,11 @@ package org.opensearch.search.backpressure; +import org.opensearch.common.util.TokenBucket; + import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.AtomicReference; +import java.util.function.LongSupplier; /** * Tracks the current state of task completions and cancellations. @@ -16,20 +20,32 @@ * @opensearch.internal */ public class SearchBackpressureState { + private final AtomicReference rateLimiter, ratioLimiter; + private final LongSupplier timeNanosSupplier; /** * The number of successful task completions. */ private final AtomicLong completionCount = new AtomicLong(); - /** * The number of task cancellations due to limit breaches. */ private final AtomicLong cancellationCount = new AtomicLong(); - /** * The number of times task cancellation limit was reached. */ private final AtomicLong limitReachedCount = new AtomicLong(); + private double cancellationBurst, cancellationRate, cancellationRatio; + + SearchBackpressureState( + LongSupplier timeNanosSupplier, + double cancellationRateNanos, + double cancellationBurst, + double cancellationRatio) { + rateLimiter = new AtomicReference<>(new TokenBucket(timeNanosSupplier, cancellationRateNanos, cancellationBurst)); + ratioLimiter = new AtomicReference<>(new TokenBucket(this::getCompletionCount, cancellationRatio, cancellationBurst)); + this.timeNanosSupplier = timeNanosSupplier; + this.cancellationBurst = cancellationBurst; + } public long getCompletionCount() { return completionCount.get(); @@ -54,4 +70,28 @@ public long getLimitReachedCount() { long incrementLimitReachedCount() { return limitReachedCount.incrementAndGet(); } + + public AtomicReference getRateLimiter() { + return rateLimiter; + } + + public AtomicReference getRatioLimiter() { + return ratioLimiter; + } + + void onCancellationBurstChanged(double cancellationBurst) { + this.cancellationBurst = cancellationBurst; + onCancellationRateChanged(cancellationRate); + onCancellationRatioChanged(cancellationRatio); + } + + void onCancellationRateChanged(double cancellationRate) { + this.cancellationRate = cancellationRate; + rateLimiter.set(new TokenBucket(timeNanosSupplier, cancellationRate, cancellationBurst)); + } + + void onCancellationRatioChanged(double cancellationRatio) { + this.cancellationRatio = cancellationRatio; + ratioLimiter.set(new TokenBucket(this::getCompletionCount, cancellationRatio, cancellationBurst)); + } } diff --git a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java index e76490cc4019b..fb7a3e3a14c0f 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java @@ -125,7 +125,7 @@ public void testTrackerStateUpdateOnSearchTaskCompletion() { // service.onTaskCompleted(new SearchTask(1, "test", "test", () -> "Test", TaskId.EMPTY_TASK_ID, new HashMap<>())); service.onTaskCompleted(createMockTaskWithResourceStats(SearchTask.class, 100, 200)); } - assertEquals(100, service.getSearchBackpressureTaskStats(SearchTask.class).getCompletionCount()); + assertEquals(100, service.getSearchBackpressureStats(SearchTask.class).getCompletionCount()); verify(mockTaskResourceUsageTracker, times(100)).update(any()); } @@ -155,7 +155,7 @@ public void testTrackerStateUpdateOnSearchShardTaskCompletion() { for (int i = 0; i < 100; i++) { service.onTaskCompleted(createMockTaskWithResourceStats(SearchShardTask.class, 100, 200)); } - assertEquals(100, service.getSearchBackpressureTaskStats(SearchShardTask.class).getCompletionCount()); + assertEquals(100, service.getSearchBackpressureStats(SearchShardTask.class).getCompletionCount()); verify(mockTaskResourceUsageTracker, times(100)).update(any()); } @@ -205,22 +205,22 @@ public void testSearchTaskInFlightCancellation() { // There are 25 SearchTasks eligible for cancellation but only 5 will be cancelled (burst limit). LogManager.getLogger(SearchBackpressureServiceTests.class).info("first run"); service.doRun(); - assertEquals(5, service.getSearchBackpressureTaskStats(SearchTask.class).getCancellationCount()); - assertEquals(1, service.getSearchBackpressureTaskStats(SearchTask.class).getLimitReachedCount()); + assertEquals(5, service.getSearchBackpressureStats(SearchTask.class).getCancellationCount()); + assertEquals(1, service.getSearchBackpressureStats(SearchTask.class).getLimitReachedCount()); // If the clock or completed task count haven't made sufficient progress, we'll continue to be rate-limited. LogManager.getLogger(SearchBackpressureServiceTests.class).info("second run"); service.doRun(); - assertEquals(5, service.getSearchBackpressureTaskStats(SearchTask.class).getCancellationCount()); - assertEquals(2, service.getSearchBackpressureTaskStats(SearchTask.class).getLimitReachedCount()); + assertEquals(5, service.getSearchBackpressureStats(SearchTask.class).getCancellationCount()); + assertEquals(2, service.getSearchBackpressureStats(SearchTask.class).getLimitReachedCount()); // Fast-forward the clock by ten second to replenish some tokens. // This will add 50 tokens (time delta * rate) to 'rateLimitPerTime' but it will cancel only 5 tasks (burst limit). mockTime.addAndGet(TimeUnit.SECONDS.toNanos(10)); LogManager.getLogger(SearchBackpressureServiceTests.class).info("third run"); service.doRun(); - assertEquals(10, service.getSearchBackpressureTaskStats(SearchTask.class).getCancellationCount()); - assertEquals(3, service.getSearchBackpressureTaskStats(SearchTask.class).getLimitReachedCount()); + assertEquals(10, service.getSearchBackpressureStats(SearchTask.class).getCancellationCount()); + assertEquals(3, service.getSearchBackpressureStats(SearchTask.class).getLimitReachedCount()); // Verify search backpressure stats. SearchBackpressureStats expectedStats = new SearchBackpressureStats( @@ -277,13 +277,13 @@ public void testSearchShardTaskInFlightCancellation() { // There are 15 SearchShardTasks eligible for cancellation but only 10 will be cancelled (burst limit). service.doRun(); - assertEquals(10, service.getSearchBackpressureTaskStats(SearchShardTask.class).getCancellationCount()); - assertEquals(1, service.getSearchBackpressureTaskStats(SearchShardTask.class).getLimitReachedCount()); + assertEquals(10, service.getSearchBackpressureStats(SearchShardTask.class).getCancellationCount()); + assertEquals(1, service.getSearchBackpressureStats(SearchShardTask.class).getLimitReachedCount()); // If the clock or completed task count haven't made sufficient progress, we'll continue to be rate-limited. service.doRun(); - assertEquals(10, service.getSearchBackpressureTaskStats(SearchShardTask.class).getCancellationCount()); - assertEquals(2, service.getSearchBackpressureTaskStats(SearchShardTask.class).getLimitReachedCount()); + assertEquals(10, service.getSearchBackpressureStats(SearchShardTask.class).getCancellationCount()); + assertEquals(2, service.getSearchBackpressureStats(SearchShardTask.class).getLimitReachedCount()); // Simulate task completion to replenish some tokens. // This will add 2 tokens (task count delta * cancellationRatio) to 'rateLimitPerTaskCompletion'. @@ -291,15 +291,15 @@ public void testSearchShardTaskInFlightCancellation() { service.onTaskCompleted(createMockTaskWithResourceStats(SearchShardTask.class, 100, taskHeapUsageBytes)); } service.doRun(); - assertEquals(12, service.getSearchBackpressureTaskStats(SearchShardTask.class).getCancellationCount()); - assertEquals(3, service.getSearchBackpressureTaskStats(SearchShardTask.class).getLimitReachedCount()); + assertEquals(12, service.getSearchBackpressureStats(SearchShardTask.class).getCancellationCount()); + assertEquals(3, service.getSearchBackpressureStats(SearchShardTask.class).getLimitReachedCount()); // Fast-forward the clock by one second to replenish some tokens. // This will add 3 tokens (time delta * rate) to 'rateLimitPerTime'. mockTime.addAndGet(TimeUnit.SECONDS.toNanos(1)); service.doRun(); - assertEquals(15, service.getSearchBackpressureTaskStats(SearchShardTask.class).getCancellationCount()); - assertEquals(3, service.getSearchBackpressureTaskStats(SearchShardTask.class).getLimitReachedCount()); // no more tasks to cancel; + assertEquals(15, service.getSearchBackpressureStats(SearchShardTask.class).getCancellationCount()); + assertEquals(3, service.getSearchBackpressureStats(SearchShardTask.class).getLimitReachedCount()); // no more tasks to cancel; // limit not reached // Verify search backpressure stats. From 26d19a09facd3cccb9ff35d0c713824088c84007 Mon Sep 17 00:00:00 2001 From: PritLadani Date: Mon, 30 Jan 2023 00:43:43 +0530 Subject: [PATCH 23/34] code formatting Signed-off-by: PritLadani --- .../search/backpressure/SearchBackpressureService.java | 8 ++++---- .../search/backpressure/SearchBackpressureState.java | 3 ++- .../backpressure/SearchBackpressureServiceTests.java | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java index dd23995cc720a..09d48723284a3 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java +++ b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java @@ -14,7 +14,6 @@ import org.opensearch.action.search.SearchShardTask; import org.opensearch.action.search.SearchTask; import org.opensearch.common.component.AbstractLifecycleComponent; -import org.opensearch.common.util.TokenBucket; import org.opensearch.monitor.jvm.JvmStats; import org.opensearch.monitor.process.ProcessProbe; import org.opensearch.search.backpressure.settings.SearchBackpressureMode; @@ -45,7 +44,6 @@ import java.util.List; import java.util.Map; import java.util.Optional; -import java.util.concurrent.atomic.AtomicReference; import java.util.function.LongSupplier; import java.util.stream.Collectors; @@ -141,13 +139,15 @@ public SearchBackpressureService( timeNanosSupplier, getSettings().getSearchTaskSettings().getCancellationRateNanos(), getSettings().getSearchTaskSettings().getCancellationBurst(), - getSettings().getSearchTaskSettings().getCancellationRatio()), + getSettings().getSearchTaskSettings().getCancellationRatio() + ), SearchShardTask.class, new SearchBackpressureState( timeNanosSupplier, getSettings().getSearchShardTaskSettings().getCancellationRateNanos(), getSettings().getSearchShardTaskSettings().getCancellationBurst(), - getSettings().getSearchShardTaskSettings().getCancellationRatio()) + getSettings().getSearchShardTaskSettings().getCancellationRatio() + ) ); this.taskTrackers = Map.of(SearchTask.class, searchTaskTrackers, SearchShardTask.class, searchShardTaskTrackers); diff --git a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureState.java b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureState.java index 723f96ddda9c2..38608238467e9 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureState.java +++ b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureState.java @@ -40,7 +40,8 @@ public class SearchBackpressureState { LongSupplier timeNanosSupplier, double cancellationRateNanos, double cancellationBurst, - double cancellationRatio) { + double cancellationRatio + ) { rateLimiter = new AtomicReference<>(new TokenBucket(timeNanosSupplier, cancellationRateNanos, cancellationBurst)); ratioLimiter = new AtomicReference<>(new TokenBucket(this::getCompletionCount, cancellationRatio, cancellationBurst)); this.timeNanosSupplier = timeNanosSupplier; diff --git a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java index fb7a3e3a14c0f..15ae522526319 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java @@ -300,7 +300,7 @@ public void testSearchShardTaskInFlightCancellation() { service.doRun(); assertEquals(15, service.getSearchBackpressureStats(SearchShardTask.class).getCancellationCount()); assertEquals(3, service.getSearchBackpressureStats(SearchShardTask.class).getLimitReachedCount()); // no more tasks to cancel; - // limit not reached + // limit not reached // Verify search backpressure stats. SearchBackpressureStats expectedStats = new SearchBackpressureStats( From dd10073656d8dc46823de5645a3eaeae986fc21d Mon Sep 17 00:00:00 2001 From: PritLadani Date: Tue, 31 Jan 2023 01:25:01 +0530 Subject: [PATCH 24/34] Adding TaskManager#cancelTaskAndDescendants for task cancellation Signed-off-by: PritLadani --- .../main/java/org/opensearch/node/Node.java | 3 +- .../SearchBackpressureService.java | 14 +++-- .../opensearch/tasks/TaskCancellation.java | 20 +++++++ .../SearchBackpressureServiceTests.java | 56 ++++++++++++++----- 4 files changed, 73 insertions(+), 20 deletions(-) diff --git a/server/src/main/java/org/opensearch/node/Node.java b/server/src/main/java/org/opensearch/node/Node.java index 662a44a81ef85..f60b241b0ee70 100644 --- a/server/src/main/java/org/opensearch/node/Node.java +++ b/server/src/main/java/org/opensearch/node/Node.java @@ -867,7 +867,8 @@ protected Node( final SearchBackpressureService searchBackpressureService = new SearchBackpressureService( searchBackpressureSettings, taskResourceTrackingService, - threadPool + threadPool, + transportService.getTaskManager() ); final RecoverySettings recoverySettings = new RecoverySettings(settings, settingsModule.getClusterSettings()); diff --git a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java index 09d48723284a3..bf12fc3e1eac4 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java +++ b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java @@ -33,6 +33,7 @@ import org.opensearch.tasks.SearchBackpressureTask; import org.opensearch.tasks.Task; import org.opensearch.tasks.TaskCancellation; +import org.opensearch.tasks.TaskManager; import org.opensearch.tasks.TaskResourceTrackingService; import org.opensearch.tasks.TaskResourceTrackingService.TaskCompletionListener; import org.opensearch.threadpool.Scheduler; @@ -71,11 +72,13 @@ public class SearchBackpressureService extends AbstractLifecycleComponent private final Map, List> taskTrackers; private final Map, SearchBackpressureState> searchBackpressureStates; + private final TaskManager taskManager; public SearchBackpressureService( SearchBackpressureSettings settings, TaskResourceTrackingService taskResourceTrackingService, - ThreadPool threadPool + ThreadPool threadPool, + TaskManager taskManager ) { this( settings, @@ -111,7 +114,8 @@ public SearchBackpressureService( SearchShardTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE ), new ElapsedTimeTracker(settings.getSearchShardTaskSettings()::getElapsedTimeNanosThreshold, System::nanoTime) - ) + ), + taskManager ); } @@ -122,7 +126,8 @@ public SearchBackpressureService( LongSupplier timeNanosSupplier, List nodeDuressTrackers, List searchTaskTrackers, - List searchShardTaskTrackers + List searchShardTaskTrackers, + TaskManager taskManager ) { this.settings = settings; this.settings.getSearchTaskSettings().addListener(this); @@ -132,6 +137,7 @@ public SearchBackpressureService( this.threadPool = threadPool; this.timeNanosSupplier = timeNanosSupplier; this.nodeDuressTrackers = nodeDuressTrackers; + this.taskManager = taskManager; this.searchBackpressureStates = Map.of( SearchTask.class, @@ -212,7 +218,7 @@ void doRun() { break; } - taskCancellation.cancel(); + taskCancellation.cancelTaskAndDescendants(taskManager); } } diff --git a/server/src/main/java/org/opensearch/tasks/TaskCancellation.java b/server/src/main/java/org/opensearch/tasks/TaskCancellation.java index d09312f38e3eb..710c6a5bf8e94 100644 --- a/server/src/main/java/org/opensearch/tasks/TaskCancellation.java +++ b/server/src/main/java/org/opensearch/tasks/TaskCancellation.java @@ -9,6 +9,7 @@ package org.opensearch.tasks; import org.opensearch.ExceptionsHelper; +import org.opensearch.action.ActionListener; import java.util.ArrayList; import java.util.List; @@ -54,7 +55,26 @@ public void cancel() { } task.cancel(getReasonString()); + runOnCancelCallbacks(); + } + + /** + * Cancels the task and its descendants and invokes all onCancelCallbacks. + */ + public void cancelTaskAndDescendants(TaskManager taskManager) { + if (isEligibleForCancellation() == false) { + return; + } + + taskManager.cancelTaskAndDescendants(task, getReasonString(), false, ActionListener.wrap(() -> {})); + task.cancel(getReasonString()); + runOnCancelCallbacks(); + } + /** + * invokes all onCancelCallbacks. + */ + private void runOnCancelCallbacks() { List exceptions = new ArrayList<>(); for (Runnable callback : onCancelCallbacks) { try { diff --git a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java index 15ae522526319..b2a4bcb9248fc 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java @@ -9,6 +9,9 @@ package org.opensearch.search.backpressure; import org.apache.logging.log4j.LogManager; +import org.junit.After; +import org.junit.Before; +import org.opensearch.Version; import org.opensearch.action.search.SearchShardTask; import org.opensearch.action.search.SearchTask; import org.opensearch.common.io.stream.StreamInput; @@ -29,8 +32,12 @@ import org.opensearch.tasks.CancellableTask; import org.opensearch.tasks.Task; import org.opensearch.tasks.TaskCancellation; +import org.opensearch.tasks.TaskCancellationService; +import org.opensearch.tasks.TaskManager; import org.opensearch.tasks.TaskResourceTrackingService; import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.test.transport.MockTransportService; +import org.opensearch.threadpool.TestThreadPool; import org.opensearch.threadpool.ThreadPool; import java.io.IOException; @@ -55,10 +62,28 @@ import static org.opensearch.search.backpressure.SearchBackpressureTestHelpers.createMockTaskWithResourceStats; public class SearchBackpressureServiceTests extends OpenSearchTestCase { + MockTransportService transportService; + TaskManager taskManager; + ThreadPool threadPool; + + @Before + public void setup() { + threadPool = new TestThreadPool(getClass().getName()); + transportService = MockTransportService.createNewService(Settings.EMPTY, Version.CURRENT, threadPool); + transportService.start(); + transportService.acceptIncomingRequests(); + taskManager = transportService.getTaskManager(); + taskManager.setTaskCancellationService(new TaskCancellationService(transportService)); + } + + @After + public void cleanup() { + transportService.close(); + ThreadPool.terminate(threadPool, 5, TimeUnit.SECONDS); + } public void testIsNodeInDuress() { TaskResourceTrackingService mockTaskResourceTrackingService = mock(TaskResourceTrackingService.class); - ThreadPool mockThreadPool = mock(ThreadPool.class); AtomicReference cpuUsage = new AtomicReference<>(); AtomicReference heapUsage = new AtomicReference<>(); @@ -73,11 +98,12 @@ public void testIsNodeInDuress() { SearchBackpressureService service = new SearchBackpressureService( settings, mockTaskResourceTrackingService, - mockThreadPool, + threadPool, System::nanoTime, List.of(cpuUsageTracker, heapUsageTracker), Collections.emptyList(), - Collections.emptyList() + Collections.emptyList(), + taskManager ); // Node not in duress. @@ -102,7 +128,6 @@ public void testIsNodeInDuress() { public void testTrackerStateUpdateOnSearchTaskCompletion() { TaskResourceTrackingService mockTaskResourceTrackingService = mock(TaskResourceTrackingService.class); - ThreadPool mockThreadPool = mock(ThreadPool.class); LongSupplier mockTimeNanosSupplier = () -> TimeUnit.SECONDS.toNanos(1234); TaskResourceUsageTracker mockTaskResourceUsageTracker = mock(TaskResourceUsageTracker.class); @@ -114,11 +139,12 @@ public void testTrackerStateUpdateOnSearchTaskCompletion() { SearchBackpressureService service = new SearchBackpressureService( settings, mockTaskResourceTrackingService, - mockThreadPool, + threadPool, mockTimeNanosSupplier, Collections.emptyList(), List.of(mockTaskResourceUsageTracker), - Collections.emptyList() + Collections.emptyList(), + taskManager ); for (int i = 0; i < 100; i++) { @@ -131,7 +157,6 @@ public void testTrackerStateUpdateOnSearchTaskCompletion() { public void testTrackerStateUpdateOnSearchShardTaskCompletion() { TaskResourceTrackingService mockTaskResourceTrackingService = mock(TaskResourceTrackingService.class); - ThreadPool mockThreadPool = mock(ThreadPool.class); LongSupplier mockTimeNanosSupplier = () -> TimeUnit.SECONDS.toNanos(1234); TaskResourceUsageTracker mockTaskResourceUsageTracker = mock(TaskResourceUsageTracker.class); @@ -143,11 +168,12 @@ public void testTrackerStateUpdateOnSearchShardTaskCompletion() { SearchBackpressureService service = new SearchBackpressureService( settings, mockTaskResourceTrackingService, - mockThreadPool, + threadPool, mockTimeNanosSupplier, Collections.emptyList(), Collections.emptyList(), - List.of(mockTaskResourceUsageTracker) + List.of(mockTaskResourceUsageTracker), + taskManager ); // Record task completions to update the tracker state. Tasks other than SearchTask & SearchShardTask are ignored. @@ -161,7 +187,6 @@ public void testTrackerStateUpdateOnSearchShardTaskCompletion() { public void testSearchTaskInFlightCancellation() { TaskResourceTrackingService mockTaskResourceTrackingService = mock(TaskResourceTrackingService.class); - ThreadPool mockThreadPool = mock(ThreadPool.class); AtomicLong mockTime = new AtomicLong(0); LongSupplier mockTimeNanosSupplier = mockTime::get; NodeDuressTracker mockNodeDuressTracker = new NodeDuressTracker(() -> true); @@ -174,11 +199,12 @@ public void testSearchTaskInFlightCancellation() { SearchBackpressureService service = new SearchBackpressureService( settings, mockTaskResourceTrackingService, - mockThreadPool, + threadPool, mockTimeNanosSupplier, List.of(mockNodeDuressTracker), List.of(mockTaskResourceUsageTracker), - Collections.emptyList() + Collections.emptyList(), + taskManager ); // Run two iterations so that node is marked 'in duress' from the third iteration onwards. @@ -234,7 +260,6 @@ public void testSearchTaskInFlightCancellation() { public void testSearchShardTaskInFlightCancellation() { TaskResourceTrackingService mockTaskResourceTrackingService = mock(TaskResourceTrackingService.class); - ThreadPool mockThreadPool = mock(ThreadPool.class); AtomicLong mockTime = new AtomicLong(0); LongSupplier mockTimeNanosSupplier = mockTime::get; NodeDuressTracker mockNodeDuressTracker = new NodeDuressTracker(() -> true); @@ -247,11 +272,12 @@ public void testSearchShardTaskInFlightCancellation() { SearchBackpressureService service = new SearchBackpressureService( settings, mockTaskResourceTrackingService, - mockThreadPool, + threadPool, mockTimeNanosSupplier, List.of(mockNodeDuressTracker), Collections.emptyList(), - List.of(mockTaskResourceUsageTracker) + List.of(mockTaskResourceUsageTracker), + taskManager ); // Run two iterations so that node is marked 'in duress' from the third iteration onwards. From 9e0fa78d7b57c4360f3efe950c537eb075fc3343 Mon Sep 17 00:00:00 2001 From: PritLadani Date: Tue, 31 Jan 2023 01:52:55 +0530 Subject: [PATCH 25/34] Removing heap usage tracker if heap size is undefined Signed-off-by: PritLadani --- .../SearchBackpressureService.java | 27 ++----------------- .../settings/SearchShardTaskSettings.java | 25 +++++++++++++++++ .../settings/SearchTaskSettings.java | 25 +++++++++++++++++ .../trackers/HeapUsageTracker.java | 2 +- 4 files changed, 53 insertions(+), 26 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java index bf12fc3e1eac4..f0c9fa8268485 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java +++ b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java @@ -23,9 +23,6 @@ import org.opensearch.search.backpressure.stats.SearchBackpressureStats; import org.opensearch.search.backpressure.stats.SearchShardTaskStats; import org.opensearch.search.backpressure.stats.SearchTaskStats; -import org.opensearch.search.backpressure.trackers.CpuUsageTracker; -import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; -import org.opensearch.search.backpressure.trackers.HeapUsageTracker; import org.opensearch.search.backpressure.trackers.NodeDuressTracker; import org.opensearch.search.backpressure.trackers.TaskResourceUsageTracker; import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType; @@ -93,28 +90,8 @@ public SearchBackpressureService( () -> JvmStats.jvmStats().getMem().getHeapUsedPercent() / 100.0 >= settings.getNodeDuressSettings().getHeapThreshold() ) ), - List.of( - new CpuUsageTracker(settings.getSearchTaskSettings()::getCpuTimeNanosThreshold), - new HeapUsageTracker( - settings.getSearchTaskSettings()::getHeapVarianceThreshold, - settings.getSearchTaskSettings()::getHeapBytesThreshold, - settings.getSearchTaskSettings().getHeapMovingAverageWindowSize(), - settings.getClusterSettings(), - SearchTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE - ), - new ElapsedTimeTracker(settings.getSearchTaskSettings()::getElapsedTimeNanosThreshold, System::nanoTime) - ), - List.of( - new CpuUsageTracker(settings.getSearchShardTaskSettings()::getCpuTimeNanosThreshold), - new HeapUsageTracker( - settings.getSearchShardTaskSettings()::getHeapVarianceThreshold, - settings.getSearchShardTaskSettings()::getHeapBytesThreshold, - settings.getSearchShardTaskSettings().getHeapMovingAverageWindowSize(), - settings.getClusterSettings(), - SearchShardTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE - ), - new ElapsedTimeTracker(settings.getSearchShardTaskSettings()::getElapsedTimeNanosThreshold, System::nanoTime) - ), + settings.getSearchTaskSettings().getTrackers(), + settings.getSearchShardTaskSettings().getTrackers(), taskManager ); } diff --git a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java index 1bb7a36858c77..18478653192fc 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java +++ b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java @@ -12,8 +12,13 @@ import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; +import org.opensearch.search.backpressure.trackers.CpuUsageTracker; +import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; +import org.opensearch.search.backpressure.trackers.HeapUsageTracker; +import org.opensearch.search.backpressure.trackers.TaskResourceUsageTracker; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.concurrent.TimeUnit; import java.util.function.Consumer; @@ -27,6 +32,7 @@ */ public class SearchShardTaskSettings { private final List listeners = new ArrayList<>(); + private final ClusterSettings clusterSettings; private static class Defaults { private static final double CANCELLATION_RATIO = 0.1; @@ -165,6 +171,7 @@ public SearchShardTaskSettings(Settings settings, ClusterSettings clusterSetting this.cancellationRatio = SETTING_CANCELLATION_RATIO.get(settings); this.cancellationRate = SETTING_CANCELLATION_RATE.get(settings); this.cancellationBurst = SETTING_CANCELLATION_BURST.get(settings); + this.clusterSettings = clusterSettings; clusterSettings.addSettingsUpdateConsumer(SETTING_TOTAL_HEAP_PERCENT_THRESHOLD, this::setTotalHeapPercentThreshold); clusterSettings.addSettingsUpdateConsumer(SETTING_CPU_TIME_MILLIS_THRESHOLD, this::setCpuTimeMillisThreshold); @@ -271,6 +278,24 @@ private void setCancellationBurst(double cancellationBurst) { notifyListeners(Listener::onCancellationBurstSearchShardTaskChanged); } + public List getTrackers() { + List trackers = new ArrayList<>(); + trackers.add(new CpuUsageTracker(this::getCpuTimeNanosThreshold)); + if (HEAP_SIZE_BYTES > 0) { + trackers.add( + new HeapUsageTracker( + this::getHeapVarianceThreshold, + this::getHeapBytesThreshold, + this.getHeapMovingAverageWindowSize(), + clusterSettings, + SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE + ) + ); + } + trackers.add(new ElapsedTimeTracker(this::getElapsedTimeNanosThreshold, System::nanoTime)); + return Collections.unmodifiableList(trackers); + } + public void addListener(Listener listener) { listeners.add(listener); } diff --git a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java index af2c3389dbbef..4ce40f7bdc756 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java +++ b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java @@ -12,8 +12,13 @@ import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; +import org.opensearch.search.backpressure.trackers.CpuUsageTracker; +import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; +import org.opensearch.search.backpressure.trackers.HeapUsageTracker; +import org.opensearch.search.backpressure.trackers.TaskResourceUsageTracker; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.concurrent.TimeUnit; import java.util.function.Consumer; @@ -28,6 +33,7 @@ public class SearchTaskSettings { private final List listeners = new ArrayList<>(); + private final ClusterSettings clusterSettings; private static class Defaults { private static final double CANCELLATION_RATIO = 0.1; @@ -166,6 +172,7 @@ public SearchTaskSettings(Settings settings, ClusterSettings clusterSettings) { this.cancellationRatio = SETTING_CANCELLATION_RATIO.get(settings); this.cancellationRate = SETTING_CANCELLATION_RATE.get(settings); this.cancellationBurst = SETTING_CANCELLATION_BURST.get(settings); + this.clusterSettings = clusterSettings; clusterSettings.addSettingsUpdateConsumer(SETTING_TOTAL_HEAP_PERCENT_THRESHOLD, this::setTotalHeapPercentThreshold); clusterSettings.addSettingsUpdateConsumer(SETTING_CPU_TIME_MILLIS_THRESHOLD, this::setCpuTimeMillisThreshold); @@ -272,6 +279,24 @@ private void setCancellationBurst(double cancellationBurst) { notifyListeners(Listener::onCancellationBurstSearchTaskChanged); } + public List getTrackers() { + List trackers = new ArrayList<>(); + trackers.add(new CpuUsageTracker(this::getCpuTimeNanosThreshold)); + if (HEAP_SIZE_BYTES > 0) { + trackers.add( + new HeapUsageTracker( + this::getHeapVarianceThreshold, + this::getHeapBytesThreshold, + this.getHeapMovingAverageWindowSize(), + clusterSettings, + SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE + ) + ); + } + trackers.add(new ElapsedTimeTracker(this::getElapsedTimeNanosThreshold, System::nanoTime)); + return Collections.unmodifiableList(trackers); + } + public void addListener(Listener listener) { listeners.add(listener); } diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java index 4e38b6583e803..7d3ac80be9bbb 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java @@ -79,7 +79,7 @@ public Optional checkAndMaybeGetCancellationReason(Task double allowedUsage = averageUsage * variance; double threshold = heapBytesThresholdSupplier.getAsLong(); - if (currentUsage < threshold || currentUsage < allowedUsage || HEAP_SIZE_BYTES == 0) { + if (currentUsage < threshold || currentUsage < allowedUsage) { return Optional.empty(); } From 1f17964718c3626c3e7dc71ca29cd7b235f2d936 Mon Sep 17 00:00:00 2001 From: PritLadani Date: Tue, 31 Jan 2023 14:33:21 +0530 Subject: [PATCH 26/34] Extracting CancellationListener Signed-off-by: PritLadani --- .../backpressure/SearchBackpressureIT.java | 11 +++++ .../backpressure/CancellationListener.java | 21 ++++++++ .../SearchBackpressureService.java | 48 ++----------------- .../backpressure/SearchBackpressureState.java | 30 +++++++----- .../settings/SearchShardTaskSettings.java | 26 ++++------ .../settings/SearchTaskSettings.java | 22 ++++----- 6 files changed, 69 insertions(+), 89 deletions(-) create mode 100644 server/src/main/java/org/opensearch/search/backpressure/CancellationListener.java diff --git a/server/src/internalClusterTest/java/org/opensearch/search/backpressure/SearchBackpressureIT.java b/server/src/internalClusterTest/java/org/opensearch/search/backpressure/SearchBackpressureIT.java index 8d343821cfa87..a63c3287ea124 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/backpressure/SearchBackpressureIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/backpressure/SearchBackpressureIT.java @@ -16,6 +16,7 @@ import org.opensearch.action.ActionRequestValidationException; import org.opensearch.action.ActionResponse; import org.opensearch.action.ActionType; +import org.opensearch.action.admin.cluster.settings.ClusterUpdateSettingsResponse; import org.opensearch.action.search.SearchShardTask; import org.opensearch.action.search.SearchTask; import org.opensearch.action.support.ActionFilters; @@ -89,6 +90,16 @@ public final void cleanupNodeSettings() { ); } + public void testCancellationSettingsChanged() { + Settings request = Settings.builder().put(SearchTaskSettings.SETTING_CANCELLATION_RATE.getKey(), "0.05").build(); + ClusterUpdateSettingsResponse response = client().admin().cluster().prepareUpdateSettings().setPersistentSettings(request).get(); + assertEquals(response.getPersistentSettings().get(SearchTaskSettings.SETTING_CANCELLATION_RATE.getKey()), "0.05"); + + request = Settings.builder().put(SearchShardTaskSettings.SETTING_CANCELLATION_RATIO.getKey(), "0.7").build(); + response = client().admin().cluster().prepareUpdateSettings().setPersistentSettings(request).get(); + assertEquals(response.getPersistentSettings().get(SearchShardTaskSettings.SETTING_CANCELLATION_RATIO.getKey()), "0.7"); + } + public void testSearchTaskCancellationWithHighElapsedTime() throws InterruptedException { Settings request = Settings.builder() .put(SearchBackpressureSettings.SETTING_MODE.getKey(), "enforced") diff --git a/server/src/main/java/org/opensearch/search/backpressure/CancellationListener.java b/server/src/main/java/org/opensearch/search/backpressure/CancellationListener.java new file mode 100644 index 0000000000000..127f1b7983f92 --- /dev/null +++ b/server/src/main/java/org/opensearch/search/backpressure/CancellationListener.java @@ -0,0 +1,21 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.backpressure; + +/** + * Listener for callbacks related to cancellation settings + */ +public interface CancellationListener { + + void onRatioChanged(double ratio); + + void onRateChanged(double rate); + + void onBurstChanged(double burst); +} diff --git a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java index f0c9fa8268485..2ebb34d5ecdc5 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java +++ b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java @@ -18,8 +18,6 @@ import org.opensearch.monitor.process.ProcessProbe; import org.opensearch.search.backpressure.settings.SearchBackpressureMode; import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; -import org.opensearch.search.backpressure.settings.SearchShardTaskSettings; -import org.opensearch.search.backpressure.settings.SearchTaskSettings; import org.opensearch.search.backpressure.stats.SearchBackpressureStats; import org.opensearch.search.backpressure.stats.SearchShardTaskStats; import org.opensearch.search.backpressure.stats.SearchTaskStats; @@ -51,11 +49,7 @@ * * @opensearch.internal */ -public class SearchBackpressureService extends AbstractLifecycleComponent - implements - TaskCompletionListener, - SearchTaskSettings.Listener, - SearchShardTaskSettings.Listener { +public class SearchBackpressureService extends AbstractLifecycleComponent implements TaskCompletionListener { private static final Logger logger = LogManager.getLogger(SearchBackpressureService.class); private volatile Scheduler.Cancellable scheduledFuture; @@ -107,8 +101,6 @@ public SearchBackpressureService( TaskManager taskManager ) { this.settings = settings; - this.settings.getSearchTaskSettings().addListener(this); - this.settings.getSearchShardTaskSettings().addListener(this); this.taskResourceTrackingService = taskResourceTrackingService; this.taskResourceTrackingService.addTaskCompletionListener(this); this.threadPool = threadPool; @@ -132,6 +124,8 @@ public SearchBackpressureService( getSettings().getSearchShardTaskSettings().getCancellationRatio() ) ); + this.settings.getSearchTaskSettings().addListener(searchBackpressureStates.get(SearchTask.class)); + this.settings.getSearchShardTaskSettings().addListener(searchBackpressureStates.get(SearchShardTask.class)); this.taskTrackers = Map.of(SearchTask.class, searchTaskTrackers, SearchShardTask.class, searchShardTaskTrackers); } @@ -323,42 +317,6 @@ public void onTaskCompleted(Task task) { ExceptionsHelper.maybeThrowRuntimeAndSuppress(exceptions); } - @Override - public void onCancellationRatioSearchTaskChanged() { - searchBackpressureStates.get(SearchTask.class) - .onCancellationRatioChanged(getSettings().getSearchTaskSettings().getCancellationRatio()); - } - - @Override - public void onCancellationRateSearchTaskChanged() { - searchBackpressureStates.get(SearchTask.class) - .onCancellationRateChanged(getSettings().getSearchTaskSettings().getCancellationRate()); - } - - @Override - public void onCancellationBurstSearchTaskChanged() { - searchBackpressureStates.get(SearchTask.class) - .onCancellationBurstChanged(getSettings().getSearchTaskSettings().getCancellationBurst()); - } - - @Override - public void onCancellationRatioSearchShardTaskChanged() { - searchBackpressureStates.get(SearchShardTask.class) - .onCancellationRatioChanged(getSettings().getSearchShardTaskSettings().getCancellationRatio()); - } - - @Override - public void onCancellationRateSearchShardTaskChanged() { - searchBackpressureStates.get(SearchShardTask.class) - .onCancellationRateChanged(getSettings().getSearchShardTaskSettings().getCancellationRate()); - } - - @Override - public void onCancellationBurstSearchShardTaskChanged() { - searchBackpressureStates.get(SearchShardTask.class) - .onCancellationBurstChanged(getSettings().getSearchShardTaskSettings().getCancellationBurst()); - } - @Override protected void doStart() { scheduledFuture = threadPool.scheduleWithFixedDelay(() -> { diff --git a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureState.java b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureState.java index 38608238467e9..de759d8425a8d 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureState.java +++ b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureState.java @@ -19,8 +19,9 @@ * * @opensearch.internal */ -public class SearchBackpressureState { - private final AtomicReference rateLimiter, ratioLimiter; +public class SearchBackpressureState implements CancellationListener { + private final AtomicReference rateLimiter; + private final AtomicReference ratioLimiter; private final LongSupplier timeNanosSupplier; /** * The number of successful task completions. @@ -34,7 +35,9 @@ public class SearchBackpressureState { * The number of times task cancellation limit was reached. */ private final AtomicLong limitReachedCount = new AtomicLong(); - private double cancellationBurst, cancellationRate, cancellationRatio; + private double cancellationBurst; + private double cancellationRate; + private double cancellationRatio; SearchBackpressureState( LongSupplier timeNanosSupplier, @@ -80,19 +83,22 @@ public AtomicReference getRatioLimiter() { return ratioLimiter; } - void onCancellationBurstChanged(double cancellationBurst) { - this.cancellationBurst = cancellationBurst; - onCancellationRateChanged(cancellationRate); - onCancellationRatioChanged(cancellationRatio); + @Override + public void onRatioChanged(double ratio) { + this.cancellationRatio = ratio; + ratioLimiter.set(new TokenBucket(this::getCompletionCount, cancellationRatio, cancellationBurst)); } - void onCancellationRateChanged(double cancellationRate) { - this.cancellationRate = cancellationRate; + @Override + public void onRateChanged(double rate) { + this.cancellationRate = rate; rateLimiter.set(new TokenBucket(timeNanosSupplier, cancellationRate, cancellationBurst)); } - void onCancellationRatioChanged(double cancellationRatio) { - this.cancellationRatio = cancellationRatio; - ratioLimiter.set(new TokenBucket(this::getCompletionCount, cancellationRatio, cancellationBurst)); + @Override + public void onBurstChanged(double burst) { + this.cancellationBurst = burst; + onRateChanged(cancellationRate); + onRatioChanged(cancellationRatio); } } diff --git a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java index 18478653192fc..3eb51d9a80cc3 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java +++ b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java @@ -12,6 +12,7 @@ import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; +import org.opensearch.search.backpressure.CancellationListener; import org.opensearch.search.backpressure.trackers.CpuUsageTracker; import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; import org.opensearch.search.backpressure.trackers.HeapUsageTracker; @@ -31,7 +32,7 @@ * @opensearch.internal */ public class SearchShardTaskSettings { - private final List listeners = new ArrayList<>(); + private final List listeners = new ArrayList<>(); private final ClusterSettings clusterSettings; private static class Defaults { @@ -184,17 +185,6 @@ public SearchShardTaskSettings(Settings settings, ClusterSettings clusterSetting clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_BURST, this::setCancellationBurst); } - /** - * Callback listeners. - */ - public interface Listener { - void onCancellationRatioSearchShardTaskChanged(); - - void onCancellationRateSearchShardTaskChanged(); - - void onCancellationBurstSearchShardTaskChanged(); - } - public double getTotalHeapPercentThreshold() { return totalHeapPercentThreshold; } @@ -253,7 +243,7 @@ public double getCancellationRatio() { private void setCancellationRatio(double cancellationRatio) { this.cancellationRatio = cancellationRatio; - notifyListeners(Listener::onCancellationRatioSearchShardTaskChanged); + notifyListeners(listener -> listener.onRatioChanged(cancellationRatio)); } public double getCancellationRate() { @@ -266,7 +256,7 @@ public double getCancellationRateNanos() { private void setCancellationRate(double cancellationRate) { this.cancellationRate = cancellationRate; - notifyListeners(Listener::onCancellationRateSearchShardTaskChanged); + notifyListeners(listener -> listener.onRateChanged(cancellationRate)); } public double getCancellationBurst() { @@ -275,7 +265,7 @@ public double getCancellationBurst() { private void setCancellationBurst(double cancellationBurst) { this.cancellationBurst = cancellationBurst; - notifyListeners(Listener::onCancellationBurstSearchShardTaskChanged); + notifyListeners(listener -> listener.onBurstChanged(cancellationBurst)); } public List getTrackers() { @@ -296,14 +286,14 @@ public List getTrackers() { return Collections.unmodifiableList(trackers); } - public void addListener(Listener listener) { + public void addListener(CancellationListener listener) { listeners.add(listener); } - private void notifyListeners(Consumer consumer) { + private void notifyListeners(Consumer consumer) { List exceptions = new ArrayList<>(); - for (Listener listener : listeners) { + for (CancellationListener listener : listeners) { try { consumer.accept(listener); } catch (Exception e) { diff --git a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java index 4ce40f7bdc756..e05ca325d89a5 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java +++ b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java @@ -12,6 +12,7 @@ import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; +import org.opensearch.search.backpressure.CancellationListener; import org.opensearch.search.backpressure.trackers.CpuUsageTracker; import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; import org.opensearch.search.backpressure.trackers.HeapUsageTracker; @@ -32,7 +33,7 @@ */ public class SearchTaskSettings { - private final List listeners = new ArrayList<>(); + private final List listeners = new ArrayList<>(); private final ClusterSettings clusterSettings; private static class Defaults { @@ -188,13 +189,6 @@ public SearchTaskSettings(Settings settings, ClusterSettings clusterSettings) { /** * Callback listeners. */ - public interface Listener { - void onCancellationRatioSearchTaskChanged(); - - void onCancellationRateSearchTaskChanged(); - - void onCancellationBurstSearchTaskChanged(); - } public double getTotalHeapPercentThreshold() { return totalHeapPercentThreshold; @@ -254,7 +248,7 @@ public double getCancellationRatio() { private void setCancellationRatio(double cancellationRatio) { this.cancellationRatio = cancellationRatio; - notifyListeners(Listener::onCancellationRatioSearchTaskChanged); + notifyListeners(listener -> listener.onRatioChanged(cancellationRatio)); } public double getCancellationRate() { @@ -267,7 +261,7 @@ public double getCancellationRateNanos() { private void setCancellationRate(double cancellationRate) { this.cancellationRate = cancellationRate; - notifyListeners(Listener::onCancellationRateSearchTaskChanged); + notifyListeners(listener -> listener.onRateChanged(cancellationRate)); } public double getCancellationBurst() { @@ -276,7 +270,7 @@ public double getCancellationBurst() { private void setCancellationBurst(double cancellationBurst) { this.cancellationBurst = cancellationBurst; - notifyListeners(Listener::onCancellationBurstSearchTaskChanged); + notifyListeners(listener -> listener.onBurstChanged(cancellationBurst)); } public List getTrackers() { @@ -297,14 +291,14 @@ public List getTrackers() { return Collections.unmodifiableList(trackers); } - public void addListener(Listener listener) { + public void addListener(CancellationListener listener) { listeners.add(listener); } - private void notifyListeners(Consumer consumer) { + private void notifyListeners(Consumer consumer) { List exceptions = new ArrayList<>(); - for (Listener listener : listeners) { + for (CancellationListener listener : listeners) { try { consumer.accept(listener); } catch (Exception e) { From 7d06d9194f98af969514c2b1ddc6842cd5649d3f Mon Sep 17 00:00:00 2001 From: PritLadani Date: Tue, 31 Jan 2023 14:33:21 +0530 Subject: [PATCH 27/34] Extracting CancellationListener Signed-off-by: PritLadani --- .../backpressure/SearchBackpressureIT.java | 11 +++++ .../backpressure/CancellationListener.java | 21 ++++++++ .../SearchBackpressureService.java | 48 ++----------------- .../backpressure/SearchBackpressureState.java | 30 +++++++----- .../settings/SearchShardTaskSettings.java | 26 ++++------ .../settings/SearchTaskSettings.java | 22 ++++----- 6 files changed, 69 insertions(+), 89 deletions(-) create mode 100644 server/src/main/java/org/opensearch/search/backpressure/CancellationListener.java diff --git a/server/src/internalClusterTest/java/org/opensearch/search/backpressure/SearchBackpressureIT.java b/server/src/internalClusterTest/java/org/opensearch/search/backpressure/SearchBackpressureIT.java index 8d343821cfa87..a63c3287ea124 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/backpressure/SearchBackpressureIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/backpressure/SearchBackpressureIT.java @@ -16,6 +16,7 @@ import org.opensearch.action.ActionRequestValidationException; import org.opensearch.action.ActionResponse; import org.opensearch.action.ActionType; +import org.opensearch.action.admin.cluster.settings.ClusterUpdateSettingsResponse; import org.opensearch.action.search.SearchShardTask; import org.opensearch.action.search.SearchTask; import org.opensearch.action.support.ActionFilters; @@ -89,6 +90,16 @@ public final void cleanupNodeSettings() { ); } + public void testCancellationSettingsChanged() { + Settings request = Settings.builder().put(SearchTaskSettings.SETTING_CANCELLATION_RATE.getKey(), "0.05").build(); + ClusterUpdateSettingsResponse response = client().admin().cluster().prepareUpdateSettings().setPersistentSettings(request).get(); + assertEquals(response.getPersistentSettings().get(SearchTaskSettings.SETTING_CANCELLATION_RATE.getKey()), "0.05"); + + request = Settings.builder().put(SearchShardTaskSettings.SETTING_CANCELLATION_RATIO.getKey(), "0.7").build(); + response = client().admin().cluster().prepareUpdateSettings().setPersistentSettings(request).get(); + assertEquals(response.getPersistentSettings().get(SearchShardTaskSettings.SETTING_CANCELLATION_RATIO.getKey()), "0.7"); + } + public void testSearchTaskCancellationWithHighElapsedTime() throws InterruptedException { Settings request = Settings.builder() .put(SearchBackpressureSettings.SETTING_MODE.getKey(), "enforced") diff --git a/server/src/main/java/org/opensearch/search/backpressure/CancellationListener.java b/server/src/main/java/org/opensearch/search/backpressure/CancellationListener.java new file mode 100644 index 0000000000000..127f1b7983f92 --- /dev/null +++ b/server/src/main/java/org/opensearch/search/backpressure/CancellationListener.java @@ -0,0 +1,21 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.backpressure; + +/** + * Listener for callbacks related to cancellation settings + */ +public interface CancellationListener { + + void onRatioChanged(double ratio); + + void onRateChanged(double rate); + + void onBurstChanged(double burst); +} diff --git a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java index f0c9fa8268485..2ebb34d5ecdc5 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java +++ b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java @@ -18,8 +18,6 @@ import org.opensearch.monitor.process.ProcessProbe; import org.opensearch.search.backpressure.settings.SearchBackpressureMode; import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; -import org.opensearch.search.backpressure.settings.SearchShardTaskSettings; -import org.opensearch.search.backpressure.settings.SearchTaskSettings; import org.opensearch.search.backpressure.stats.SearchBackpressureStats; import org.opensearch.search.backpressure.stats.SearchShardTaskStats; import org.opensearch.search.backpressure.stats.SearchTaskStats; @@ -51,11 +49,7 @@ * * @opensearch.internal */ -public class SearchBackpressureService extends AbstractLifecycleComponent - implements - TaskCompletionListener, - SearchTaskSettings.Listener, - SearchShardTaskSettings.Listener { +public class SearchBackpressureService extends AbstractLifecycleComponent implements TaskCompletionListener { private static final Logger logger = LogManager.getLogger(SearchBackpressureService.class); private volatile Scheduler.Cancellable scheduledFuture; @@ -107,8 +101,6 @@ public SearchBackpressureService( TaskManager taskManager ) { this.settings = settings; - this.settings.getSearchTaskSettings().addListener(this); - this.settings.getSearchShardTaskSettings().addListener(this); this.taskResourceTrackingService = taskResourceTrackingService; this.taskResourceTrackingService.addTaskCompletionListener(this); this.threadPool = threadPool; @@ -132,6 +124,8 @@ public SearchBackpressureService( getSettings().getSearchShardTaskSettings().getCancellationRatio() ) ); + this.settings.getSearchTaskSettings().addListener(searchBackpressureStates.get(SearchTask.class)); + this.settings.getSearchShardTaskSettings().addListener(searchBackpressureStates.get(SearchShardTask.class)); this.taskTrackers = Map.of(SearchTask.class, searchTaskTrackers, SearchShardTask.class, searchShardTaskTrackers); } @@ -323,42 +317,6 @@ public void onTaskCompleted(Task task) { ExceptionsHelper.maybeThrowRuntimeAndSuppress(exceptions); } - @Override - public void onCancellationRatioSearchTaskChanged() { - searchBackpressureStates.get(SearchTask.class) - .onCancellationRatioChanged(getSettings().getSearchTaskSettings().getCancellationRatio()); - } - - @Override - public void onCancellationRateSearchTaskChanged() { - searchBackpressureStates.get(SearchTask.class) - .onCancellationRateChanged(getSettings().getSearchTaskSettings().getCancellationRate()); - } - - @Override - public void onCancellationBurstSearchTaskChanged() { - searchBackpressureStates.get(SearchTask.class) - .onCancellationBurstChanged(getSettings().getSearchTaskSettings().getCancellationBurst()); - } - - @Override - public void onCancellationRatioSearchShardTaskChanged() { - searchBackpressureStates.get(SearchShardTask.class) - .onCancellationRatioChanged(getSettings().getSearchShardTaskSettings().getCancellationRatio()); - } - - @Override - public void onCancellationRateSearchShardTaskChanged() { - searchBackpressureStates.get(SearchShardTask.class) - .onCancellationRateChanged(getSettings().getSearchShardTaskSettings().getCancellationRate()); - } - - @Override - public void onCancellationBurstSearchShardTaskChanged() { - searchBackpressureStates.get(SearchShardTask.class) - .onCancellationBurstChanged(getSettings().getSearchShardTaskSettings().getCancellationBurst()); - } - @Override protected void doStart() { scheduledFuture = threadPool.scheduleWithFixedDelay(() -> { diff --git a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureState.java b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureState.java index 38608238467e9..e40a5dbc679c7 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureState.java +++ b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureState.java @@ -19,8 +19,9 @@ * * @opensearch.internal */ -public class SearchBackpressureState { - private final AtomicReference rateLimiter, ratioLimiter; +public class SearchBackpressureState implements CancellationListener { + private final AtomicReference rateLimiter; + private final AtomicReference ratioLimiter; private final LongSupplier timeNanosSupplier; /** * The number of successful task completions. @@ -34,7 +35,9 @@ public class SearchBackpressureState { * The number of times task cancellation limit was reached. */ private final AtomicLong limitReachedCount = new AtomicLong(); - private double cancellationBurst, cancellationRate, cancellationRatio; + private volatile double cancellationBurst; + private volatile double cancellationRate; + private volatile double cancellationRatio; SearchBackpressureState( LongSupplier timeNanosSupplier, @@ -80,19 +83,22 @@ public AtomicReference getRatioLimiter() { return ratioLimiter; } - void onCancellationBurstChanged(double cancellationBurst) { - this.cancellationBurst = cancellationBurst; - onCancellationRateChanged(cancellationRate); - onCancellationRatioChanged(cancellationRatio); + @Override + public void onRatioChanged(double ratio) { + this.cancellationRatio = ratio; + ratioLimiter.set(new TokenBucket(this::getCompletionCount, cancellationRatio, cancellationBurst)); } - void onCancellationRateChanged(double cancellationRate) { - this.cancellationRate = cancellationRate; + @Override + public void onRateChanged(double rate) { + this.cancellationRate = rate; rateLimiter.set(new TokenBucket(timeNanosSupplier, cancellationRate, cancellationBurst)); } - void onCancellationRatioChanged(double cancellationRatio) { - this.cancellationRatio = cancellationRatio; - ratioLimiter.set(new TokenBucket(this::getCompletionCount, cancellationRatio, cancellationBurst)); + @Override + public void onBurstChanged(double burst) { + this.cancellationBurst = burst; + onRateChanged(cancellationRate); + onRatioChanged(cancellationRatio); } } diff --git a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java index 18478653192fc..3eb51d9a80cc3 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java +++ b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java @@ -12,6 +12,7 @@ import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; +import org.opensearch.search.backpressure.CancellationListener; import org.opensearch.search.backpressure.trackers.CpuUsageTracker; import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; import org.opensearch.search.backpressure.trackers.HeapUsageTracker; @@ -31,7 +32,7 @@ * @opensearch.internal */ public class SearchShardTaskSettings { - private final List listeners = new ArrayList<>(); + private final List listeners = new ArrayList<>(); private final ClusterSettings clusterSettings; private static class Defaults { @@ -184,17 +185,6 @@ public SearchShardTaskSettings(Settings settings, ClusterSettings clusterSetting clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_BURST, this::setCancellationBurst); } - /** - * Callback listeners. - */ - public interface Listener { - void onCancellationRatioSearchShardTaskChanged(); - - void onCancellationRateSearchShardTaskChanged(); - - void onCancellationBurstSearchShardTaskChanged(); - } - public double getTotalHeapPercentThreshold() { return totalHeapPercentThreshold; } @@ -253,7 +243,7 @@ public double getCancellationRatio() { private void setCancellationRatio(double cancellationRatio) { this.cancellationRatio = cancellationRatio; - notifyListeners(Listener::onCancellationRatioSearchShardTaskChanged); + notifyListeners(listener -> listener.onRatioChanged(cancellationRatio)); } public double getCancellationRate() { @@ -266,7 +256,7 @@ public double getCancellationRateNanos() { private void setCancellationRate(double cancellationRate) { this.cancellationRate = cancellationRate; - notifyListeners(Listener::onCancellationRateSearchShardTaskChanged); + notifyListeners(listener -> listener.onRateChanged(cancellationRate)); } public double getCancellationBurst() { @@ -275,7 +265,7 @@ public double getCancellationBurst() { private void setCancellationBurst(double cancellationBurst) { this.cancellationBurst = cancellationBurst; - notifyListeners(Listener::onCancellationBurstSearchShardTaskChanged); + notifyListeners(listener -> listener.onBurstChanged(cancellationBurst)); } public List getTrackers() { @@ -296,14 +286,14 @@ public List getTrackers() { return Collections.unmodifiableList(trackers); } - public void addListener(Listener listener) { + public void addListener(CancellationListener listener) { listeners.add(listener); } - private void notifyListeners(Consumer consumer) { + private void notifyListeners(Consumer consumer) { List exceptions = new ArrayList<>(); - for (Listener listener : listeners) { + for (CancellationListener listener : listeners) { try { consumer.accept(listener); } catch (Exception e) { diff --git a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java index 4ce40f7bdc756..e05ca325d89a5 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java +++ b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java @@ -12,6 +12,7 @@ import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; +import org.opensearch.search.backpressure.CancellationListener; import org.opensearch.search.backpressure.trackers.CpuUsageTracker; import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; import org.opensearch.search.backpressure.trackers.HeapUsageTracker; @@ -32,7 +33,7 @@ */ public class SearchTaskSettings { - private final List listeners = new ArrayList<>(); + private final List listeners = new ArrayList<>(); private final ClusterSettings clusterSettings; private static class Defaults { @@ -188,13 +189,6 @@ public SearchTaskSettings(Settings settings, ClusterSettings clusterSettings) { /** * Callback listeners. */ - public interface Listener { - void onCancellationRatioSearchTaskChanged(); - - void onCancellationRateSearchTaskChanged(); - - void onCancellationBurstSearchTaskChanged(); - } public double getTotalHeapPercentThreshold() { return totalHeapPercentThreshold; @@ -254,7 +248,7 @@ public double getCancellationRatio() { private void setCancellationRatio(double cancellationRatio) { this.cancellationRatio = cancellationRatio; - notifyListeners(Listener::onCancellationRatioSearchTaskChanged); + notifyListeners(listener -> listener.onRatioChanged(cancellationRatio)); } public double getCancellationRate() { @@ -267,7 +261,7 @@ public double getCancellationRateNanos() { private void setCancellationRate(double cancellationRate) { this.cancellationRate = cancellationRate; - notifyListeners(Listener::onCancellationRateSearchTaskChanged); + notifyListeners(listener -> listener.onRateChanged(cancellationRate)); } public double getCancellationBurst() { @@ -276,7 +270,7 @@ public double getCancellationBurst() { private void setCancellationBurst(double cancellationBurst) { this.cancellationBurst = cancellationBurst; - notifyListeners(Listener::onCancellationBurstSearchTaskChanged); + notifyListeners(listener -> listener.onBurstChanged(cancellationBurst)); } public List getTrackers() { @@ -297,14 +291,14 @@ public List getTrackers() { return Collections.unmodifiableList(trackers); } - public void addListener(Listener listener) { + public void addListener(CancellationListener listener) { listeners.add(listener); } - private void notifyListeners(Consumer consumer) { + private void notifyListeners(Consumer consumer) { List exceptions = new ArrayList<>(); - for (Listener listener : listeners) { + for (CancellationListener listener : listeners) { try { consumer.accept(listener); } catch (Exception e) { From d9c113d7c67cdf919fc83fd5e2d8d4d79ec8fc6c Mon Sep 17 00:00:00 2001 From: PritLadani Date: Wed, 1 Feb 2023 13:23:55 +0530 Subject: [PATCH 28/34] Separating SearchTaskStats and SearchShardTaskStats to avoid BWC issues Signed-off-by: PritLadani --- .../stats/SearchBackpressureStats.java | 2 + .../stats/SearchBackpressureTaskStats.java | 100 ------------------ .../stats/SearchShardTaskStats.java | 71 ++++++++++++- .../backpressure/stats/SearchTaskStats.java | 70 +++++++++++- .../SearchBackpressureTaskStatsTests.java | 44 -------- 5 files changed, 137 insertions(+), 150 deletions(-) delete mode 100644 server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureTaskStats.java delete mode 100644 server/src/test/java/org/opensearch/search/backpressure/stats/SearchBackpressureTaskStatsTests.java diff --git a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java index b1aa164fe66ec..8f8f41b583c42 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java +++ b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureStats.java @@ -9,6 +9,7 @@ package org.opensearch.search.backpressure.stats; import org.opensearch.Version; +import org.opensearch.common.Nullable; import org.opensearch.common.io.stream.StreamInput; import org.opensearch.common.io.stream.StreamOutput; import org.opensearch.common.io.stream.Writeable; @@ -25,6 +26,7 @@ public class SearchBackpressureStats implements ToXContentFragment, Writeable { private final SearchShardTaskStats searchShardTaskStats; private final SearchBackpressureMode mode; + @Nullable private final SearchTaskStats searchTaskStats; public SearchBackpressureStats( diff --git a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureTaskStats.java b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureTaskStats.java deleted file mode 100644 index ce517d831b2eb..0000000000000 --- a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchBackpressureTaskStats.java +++ /dev/null @@ -1,100 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.search.backpressure.stats; - -import org.opensearch.common.collect.MapBuilder; -import org.opensearch.common.io.stream.StreamInput; -import org.opensearch.common.io.stream.StreamOutput; -import org.opensearch.common.io.stream.Writeable; -import org.opensearch.common.xcontent.ToXContentObject; -import org.opensearch.common.xcontent.XContentBuilder; -import org.opensearch.search.backpressure.trackers.CpuUsageTracker; -import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; -import org.opensearch.search.backpressure.trackers.HeapUsageTracker; -import org.opensearch.search.backpressure.trackers.TaskResourceUsageTracker; -import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType; - -import java.io.IOException; -import java.util.Map; -import java.util.Objects; - -/** - * Stats related to cancelled SearchBackpressureTasks. - * Since the children of this class has exact same structures, we have extracted the common stats to this class. - * However, in the future, if some task stats does not have this common stats, we can remove this class. - */ -public class SearchBackpressureTaskStats implements ToXContentObject, Writeable { - private final long cancellationCount; - private final long limitReachedCount; - private final Map resourceUsageTrackerStats; - - public SearchBackpressureTaskStats( - long cancellationCount, - long limitReachedCount, - Map resourceUsageTrackerStats - ) { - this.cancellationCount = cancellationCount; - this.limitReachedCount = limitReachedCount; - this.resourceUsageTrackerStats = resourceUsageTrackerStats; - } - - public SearchBackpressureTaskStats(StreamInput in) throws IOException { - this.cancellationCount = in.readVLong(); - this.limitReachedCount = in.readVLong(); - - MapBuilder builder = new MapBuilder<>(); - builder.put(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, in.readOptionalWriteable(CpuUsageTracker.Stats::new)); - builder.put(TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER, in.readOptionalWriteable(HeapUsageTracker.Stats::new)); - builder.put(TaskResourceUsageTrackerType.ELAPSED_TIME_TRACKER, in.readOptionalWriteable(ElapsedTimeTracker.Stats::new)); - this.resourceUsageTrackerStats = builder.immutableMap(); - } - - @Override - public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { - builder.startObject(); - - builder.startObject("resource_tracker_stats"); - for (Map.Entry entry : resourceUsageTrackerStats.entrySet()) { - builder.field(entry.getKey().getName(), entry.getValue()); - } - builder.endObject(); - - builder.startObject("cancellation_stats") - .field("cancellation_count", cancellationCount) - .field("cancellation_limit_reached_count", limitReachedCount) - .endObject(); - - return builder.endObject(); - } - - @Override - public void writeTo(StreamOutput out) throws IOException { - out.writeVLong(cancellationCount); - out.writeVLong(limitReachedCount); - - out.writeOptionalWriteable(resourceUsageTrackerStats.get(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER)); - out.writeOptionalWriteable(resourceUsageTrackerStats.get(TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER)); - out.writeOptionalWriteable(resourceUsageTrackerStats.get(TaskResourceUsageTrackerType.ELAPSED_TIME_TRACKER)); - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - SearchBackpressureTaskStats that = (SearchBackpressureTaskStats) o; - return cancellationCount == that.cancellationCount - && limitReachedCount == that.limitReachedCount - && resourceUsageTrackerStats.equals(that.resourceUsageTrackerStats); - } - - @Override - public int hashCode() { - return Objects.hash(cancellationCount, limitReachedCount, resourceUsageTrackerStats); - } -} diff --git a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchShardTaskStats.java b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchShardTaskStats.java index c0db0d342d02e..00eb21b7f3d57 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchShardTaskStats.java +++ b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchShardTaskStats.java @@ -8,28 +8,93 @@ package org.opensearch.search.backpressure.stats; +import org.opensearch.common.collect.MapBuilder; import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.common.io.stream.StreamOutput; +import org.opensearch.common.io.stream.Writeable; +import org.opensearch.common.xcontent.ToXContent; +import org.opensearch.common.xcontent.ToXContentObject; +import org.opensearch.common.xcontent.XContentBuilder; +import org.opensearch.search.backpressure.trackers.CpuUsageTracker; +import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; +import org.opensearch.search.backpressure.trackers.HeapUsageTracker; import org.opensearch.search.backpressure.trackers.TaskResourceUsageTracker; import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType; import java.io.IOException; import java.util.Map; +import java.util.Objects; /** * Stats related to cancelled SearchShardTasks. */ -public class SearchShardTaskStats extends SearchBackpressureTaskStats { +public class SearchShardTaskStats implements ToXContentObject, Writeable { + private final long cancellationCount; + private final long limitReachedCount; + private final Map resourceUsageTrackerStats; public SearchShardTaskStats( long cancellationCount, long limitReachedCount, Map resourceUsageTrackerStats ) { - super(cancellationCount, limitReachedCount, resourceUsageTrackerStats); + this.cancellationCount = cancellationCount; + this.limitReachedCount = limitReachedCount; + this.resourceUsageTrackerStats = resourceUsageTrackerStats; } public SearchShardTaskStats(StreamInput in) throws IOException { - super(in); + this.cancellationCount = in.readVLong(); + this.limitReachedCount = in.readVLong(); + + MapBuilder builder = new MapBuilder<>(); + builder.put(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, in.readOptionalWriteable(CpuUsageTracker.Stats::new)); + builder.put(TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER, in.readOptionalWriteable(HeapUsageTracker.Stats::new)); + builder.put(TaskResourceUsageTrackerType.ELAPSED_TIME_TRACKER, in.readOptionalWriteable(ElapsedTimeTracker.Stats::new)); + this.resourceUsageTrackerStats = builder.immutableMap(); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params params) throws IOException { + builder.startObject(); + + builder.startObject("resource_tracker_stats"); + for (Map.Entry entry : resourceUsageTrackerStats.entrySet()) { + builder.field(entry.getKey().getName(), entry.getValue()); + } + builder.endObject(); + + builder.startObject("cancellation_stats") + .field("cancellation_count", cancellationCount) + .field("cancellation_limit_reached_count", limitReachedCount) + .endObject(); + + return builder.endObject(); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeVLong(cancellationCount); + out.writeVLong(limitReachedCount); + + out.writeOptionalWriteable(resourceUsageTrackerStats.get(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER)); + out.writeOptionalWriteable(resourceUsageTrackerStats.get(TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER)); + out.writeOptionalWriteable(resourceUsageTrackerStats.get(TaskResourceUsageTrackerType.ELAPSED_TIME_TRACKER)); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + SearchShardTaskStats that = (SearchShardTaskStats) o; + return cancellationCount == that.cancellationCount + && limitReachedCount == that.limitReachedCount + && resourceUsageTrackerStats.equals(that.resourceUsageTrackerStats); + } + + @Override + public int hashCode() { + return Objects.hash(cancellationCount, limitReachedCount, resourceUsageTrackerStats); } } diff --git a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java index 023e97298b6c4..08b7f39f8aeff 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java +++ b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java @@ -8,28 +8,92 @@ package org.opensearch.search.backpressure.stats; +import org.opensearch.common.collect.MapBuilder; import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.common.io.stream.StreamOutput; +import org.opensearch.common.io.stream.Writeable; +import org.opensearch.common.xcontent.ToXContentObject; +import org.opensearch.common.xcontent.XContentBuilder; +import org.opensearch.search.backpressure.trackers.CpuUsageTracker; +import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; +import org.opensearch.search.backpressure.trackers.HeapUsageTracker; import org.opensearch.search.backpressure.trackers.TaskResourceUsageTracker; import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType; import java.io.IOException; import java.util.Map; +import java.util.Objects; /** * Stats related to cancelled SearchTasks. */ -public class SearchTaskStats extends SearchBackpressureTaskStats { +public class SearchTaskStats implements ToXContentObject, Writeable { + private final long cancellationCount; + private final long limitReachedCount; + private final Map resourceUsageTrackerStats; public SearchTaskStats( long cancellationCount, long limitReachedCount, Map resourceUsageTrackerStats ) { - super(cancellationCount, limitReachedCount, resourceUsageTrackerStats); + this.cancellationCount = cancellationCount; + this.limitReachedCount = limitReachedCount; + this.resourceUsageTrackerStats = resourceUsageTrackerStats; } public SearchTaskStats(StreamInput in) throws IOException { - super(in); + this.cancellationCount = in.readVLong(); + this.limitReachedCount = in.readVLong(); + + MapBuilder builder = new MapBuilder<>(); + builder.put(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, in.readOptionalWriteable(CpuUsageTracker.Stats::new)); + builder.put(TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER, in.readOptionalWriteable(HeapUsageTracker.Stats::new)); + builder.put(TaskResourceUsageTrackerType.ELAPSED_TIME_TRACKER, in.readOptionalWriteable(ElapsedTimeTracker.Stats::new)); + this.resourceUsageTrackerStats = builder.immutableMap(); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + + builder.startObject("resource_tracker_stats"); + for (Map.Entry entry : resourceUsageTrackerStats.entrySet()) { + builder.field(entry.getKey().getName(), entry.getValue()); + } + builder.endObject(); + + builder.startObject("cancellation_stats") + .field("cancellation_count", cancellationCount) + .field("cancellation_limit_reached_count", limitReachedCount) + .endObject(); + + return builder.endObject(); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeVLong(cancellationCount); + out.writeVLong(limitReachedCount); + + out.writeOptionalWriteable(resourceUsageTrackerStats.get(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER)); + out.writeOptionalWriteable(resourceUsageTrackerStats.get(TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER)); + out.writeOptionalWriteable(resourceUsageTrackerStats.get(TaskResourceUsageTrackerType.ELAPSED_TIME_TRACKER)); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + SearchTaskStats that = (SearchTaskStats) o; + return cancellationCount == that.cancellationCount + && limitReachedCount == that.limitReachedCount + && resourceUsageTrackerStats.equals(that.resourceUsageTrackerStats); + } + + @Override + public int hashCode() { + return Objects.hash(cancellationCount, limitReachedCount, resourceUsageTrackerStats); } } diff --git a/server/src/test/java/org/opensearch/search/backpressure/stats/SearchBackpressureTaskStatsTests.java b/server/src/test/java/org/opensearch/search/backpressure/stats/SearchBackpressureTaskStatsTests.java deleted file mode 100644 index 92ff3ccee6227..0000000000000 --- a/server/src/test/java/org/opensearch/search/backpressure/stats/SearchBackpressureTaskStatsTests.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.search.backpressure.stats; - -import org.opensearch.common.io.stream.Writeable; -import org.opensearch.search.backpressure.trackers.CpuUsageTracker; -import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; -import org.opensearch.search.backpressure.trackers.HeapUsageTracker; -import org.opensearch.search.backpressure.trackers.TaskResourceUsageTracker; -import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType; -import org.opensearch.test.AbstractWireSerializingTestCase; - -import java.util.Map; - -public class SearchBackpressureTaskStatsTests extends AbstractWireSerializingTestCase { - @Override - protected Writeable.Reader instanceReader() { - return SearchBackpressureTaskStats::new; - } - - @Override - protected SearchBackpressureTaskStats createTestInstance() { - return randomInstance(); - } - - public static SearchBackpressureTaskStats randomInstance() { - Map resourceUsageTrackerStats = Map.of( - TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, - new CpuUsageTracker.Stats(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()), - TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER, - new HeapUsageTracker.Stats(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()), - TaskResourceUsageTrackerType.ELAPSED_TIME_TRACKER, - new ElapsedTimeTracker.Stats(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()) - ); - - return new SearchBackpressureTaskStats(randomNonNegativeLong(), randomNonNegativeLong(), resourceUsageTrackerStats); - } -} From 8e09787a5553f60629d5ac5be02b038482d8b25c Mon Sep 17 00:00:00 2001 From: PritLadani Date: Wed, 1 Feb 2023 23:06:10 +0530 Subject: [PATCH 29/34] Moving getTrackers to SearchBackpressureService Signed-off-by: PritLadani --- ...java => CancellationSettingsListener.java} | 2 +- .../SearchBackpressureService.java | 66 +++++++++++++++++-- .../backpressure/SearchBackpressureState.java | 10 +-- .../settings/SearchShardTaskSettings.java | 33 ++-------- .../settings/SearchTaskSettings.java | 33 ++-------- .../opensearch/tasks/TaskCancellation.java | 1 - .../tasks/TaskCancellationService.java | 2 +- .../org/opensearch/tasks/TaskManager.java | 2 +- .../SearchBackpressureServiceTests.java | 25 ++++--- .../opensearch/tasks/TaskManagerTests.java | 14 +++- 10 files changed, 109 insertions(+), 79 deletions(-) rename server/src/main/java/org/opensearch/search/backpressure/{CancellationListener.java => CancellationSettingsListener.java} (90%) diff --git a/server/src/main/java/org/opensearch/search/backpressure/CancellationListener.java b/server/src/main/java/org/opensearch/search/backpressure/CancellationSettingsListener.java similarity index 90% rename from server/src/main/java/org/opensearch/search/backpressure/CancellationListener.java rename to server/src/main/java/org/opensearch/search/backpressure/CancellationSettingsListener.java index 127f1b7983f92..f630b3b8ed987 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/CancellationListener.java +++ b/server/src/main/java/org/opensearch/search/backpressure/CancellationSettingsListener.java @@ -11,7 +11,7 @@ /** * Listener for callbacks related to cancellation settings */ -public interface CancellationListener { +public interface CancellationSettingsListener { void onRatioChanged(double ratio); diff --git a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java index 2ebb34d5ecdc5..0afebb509341e 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java +++ b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java @@ -14,13 +14,20 @@ import org.opensearch.action.search.SearchShardTask; import org.opensearch.action.search.SearchTask; import org.opensearch.common.component.AbstractLifecycleComponent; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Setting; import org.opensearch.monitor.jvm.JvmStats; import org.opensearch.monitor.process.ProcessProbe; import org.opensearch.search.backpressure.settings.SearchBackpressureMode; import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; +import org.opensearch.search.backpressure.settings.SearchShardTaskSettings; +import org.opensearch.search.backpressure.settings.SearchTaskSettings; import org.opensearch.search.backpressure.stats.SearchBackpressureStats; import org.opensearch.search.backpressure.stats.SearchShardTaskStats; import org.opensearch.search.backpressure.stats.SearchTaskStats; +import org.opensearch.search.backpressure.trackers.CpuUsageTracker; +import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; +import org.opensearch.search.backpressure.trackers.HeapUsageTracker; import org.opensearch.search.backpressure.trackers.NodeDuressTracker; import org.opensearch.search.backpressure.trackers.TaskResourceUsageTracker; import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType; @@ -36,13 +43,17 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Collections; import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.function.DoubleSupplier; import java.util.function.LongSupplier; import java.util.stream.Collectors; +import static org.opensearch.search.backpressure.trackers.HeapUsageTracker.HEAP_SIZE_BYTES; + /** * SearchBackpressureService is responsible for monitoring and cancelling in-flight search tasks if they are * breaching resource usage limits when the node is in duress. @@ -84,8 +95,24 @@ public SearchBackpressureService( () -> JvmStats.jvmStats().getMem().getHeapUsedPercent() / 100.0 >= settings.getNodeDuressSettings().getHeapThreshold() ) ), - settings.getSearchTaskSettings().getTrackers(), - settings.getSearchShardTaskSettings().getTrackers(), + getTrackers( + settings.getSearchTaskSettings()::getCpuTimeNanosThreshold, + settings.getSearchTaskSettings()::getHeapVarianceThreshold, + settings.getSearchTaskSettings()::getHeapBytesThreshold, + settings.getSearchTaskSettings().getHeapMovingAverageWindowSize(), + settings.getSearchTaskSettings()::getElapsedTimeNanosThreshold, + settings.getClusterSettings(), + SearchTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE + ), + getTrackers( + settings.getSearchShardTaskSettings()::getCpuTimeNanosThreshold, + settings.getSearchShardTaskSettings()::getHeapVarianceThreshold, + settings.getSearchShardTaskSettings()::getHeapBytesThreshold, + settings.getSearchShardTaskSettings().getHeapMovingAverageWindowSize(), + settings.getSearchShardTaskSettings()::getElapsedTimeNanosThreshold, + settings.getClusterSettings(), + SearchShardTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE + ), taskManager ); } @@ -179,8 +206,8 @@ void doRun() { // Independently remove tokens from both token buckets. SearchBackpressureState searchBackpressureState = searchBackpressureStates.get(taskType); - boolean rateLimitReached = searchBackpressureState.getRateLimiter().get().request() == false; - boolean ratioLimitReached = searchBackpressureState.getRatioLimiter().get().request() == false; + boolean rateLimitReached = searchBackpressureState.getRateLimiter().request() == false; + boolean ratioLimitReached = searchBackpressureState.getRatioLimiter().request() == false; // Stop cancelling tasks if there are no tokens in either of the two token buckets. if (rateLimitReached && ratioLimitReached) { @@ -288,6 +315,37 @@ SearchBackpressureState getSearchBackpressureStats(Class getTrackers( + LongSupplier cpuThresholdSupplier, + DoubleSupplier heapVarianceSupplier, + LongSupplier heapBytesThresholdSupplier, + int heapMovingAverageWindowSize, + LongSupplier ElapsedTimeNanosSupplier, + ClusterSettings clusterSettings, + Setting windowSizeSetting + ) { + List trackers = new ArrayList<>(); + trackers.add(new CpuUsageTracker(cpuThresholdSupplier)); + if (HEAP_SIZE_BYTES > 0) { + trackers.add( + new HeapUsageTracker( + heapVarianceSupplier, + heapBytesThresholdSupplier, + heapMovingAverageWindowSize, + clusterSettings, + windowSizeSetting + ) + ); + } else { + logger.warn("heap size couldn't be determined"); + } + trackers.add(new ElapsedTimeTracker(ElapsedTimeNanosSupplier, System::nanoTime)); + return Collections.unmodifiableList(trackers); + } + @Override public void onTaskCompleted(Task task) { if (getSettings().getMode() == SearchBackpressureMode.DISABLED) { diff --git a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureState.java b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureState.java index e40a5dbc679c7..5f086bd498036 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureState.java +++ b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureState.java @@ -19,7 +19,7 @@ * * @opensearch.internal */ -public class SearchBackpressureState implements CancellationListener { +public class SearchBackpressureState implements CancellationSettingsListener { private final AtomicReference rateLimiter; private final AtomicReference ratioLimiter; private final LongSupplier timeNanosSupplier; @@ -75,12 +75,12 @@ long incrementLimitReachedCount() { return limitReachedCount.incrementAndGet(); } - public AtomicReference getRateLimiter() { - return rateLimiter; + public TokenBucket getRateLimiter() { + return rateLimiter.get(); } - public AtomicReference getRatioLimiter() { - return ratioLimiter; + public TokenBucket getRatioLimiter() { + return ratioLimiter.get(); } @Override diff --git a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java index 3eb51d9a80cc3..f7fb95f08f0c2 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java +++ b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java @@ -12,14 +12,9 @@ import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; -import org.opensearch.search.backpressure.CancellationListener; -import org.opensearch.search.backpressure.trackers.CpuUsageTracker; -import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; -import org.opensearch.search.backpressure.trackers.HeapUsageTracker; -import org.opensearch.search.backpressure.trackers.TaskResourceUsageTracker; +import org.opensearch.search.backpressure.CancellationSettingsListener; import java.util.ArrayList; -import java.util.Collections; import java.util.List; import java.util.concurrent.TimeUnit; import java.util.function.Consumer; @@ -32,7 +27,7 @@ * @opensearch.internal */ public class SearchShardTaskSettings { - private final List listeners = new ArrayList<>(); + private final List listeners = new ArrayList<>(); private final ClusterSettings clusterSettings; private static class Defaults { @@ -268,32 +263,14 @@ private void setCancellationBurst(double cancellationBurst) { notifyListeners(listener -> listener.onBurstChanged(cancellationBurst)); } - public List getTrackers() { - List trackers = new ArrayList<>(); - trackers.add(new CpuUsageTracker(this::getCpuTimeNanosThreshold)); - if (HEAP_SIZE_BYTES > 0) { - trackers.add( - new HeapUsageTracker( - this::getHeapVarianceThreshold, - this::getHeapBytesThreshold, - this.getHeapMovingAverageWindowSize(), - clusterSettings, - SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE - ) - ); - } - trackers.add(new ElapsedTimeTracker(this::getElapsedTimeNanosThreshold, System::nanoTime)); - return Collections.unmodifiableList(trackers); - } - - public void addListener(CancellationListener listener) { + public void addListener(CancellationSettingsListener listener) { listeners.add(listener); } - private void notifyListeners(Consumer consumer) { + private void notifyListeners(Consumer consumer) { List exceptions = new ArrayList<>(); - for (CancellationListener listener : listeners) { + for (CancellationSettingsListener listener : listeners) { try { consumer.accept(listener); } catch (Exception e) { diff --git a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java index e05ca325d89a5..ce300410a9cbe 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java +++ b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java @@ -12,14 +12,9 @@ import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; -import org.opensearch.search.backpressure.CancellationListener; -import org.opensearch.search.backpressure.trackers.CpuUsageTracker; -import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; -import org.opensearch.search.backpressure.trackers.HeapUsageTracker; -import org.opensearch.search.backpressure.trackers.TaskResourceUsageTracker; +import org.opensearch.search.backpressure.CancellationSettingsListener; import java.util.ArrayList; -import java.util.Collections; import java.util.List; import java.util.concurrent.TimeUnit; import java.util.function.Consumer; @@ -33,7 +28,7 @@ */ public class SearchTaskSettings { - private final List listeners = new ArrayList<>(); + private final List listeners = new ArrayList<>(); private final ClusterSettings clusterSettings; private static class Defaults { @@ -273,32 +268,14 @@ private void setCancellationBurst(double cancellationBurst) { notifyListeners(listener -> listener.onBurstChanged(cancellationBurst)); } - public List getTrackers() { - List trackers = new ArrayList<>(); - trackers.add(new CpuUsageTracker(this::getCpuTimeNanosThreshold)); - if (HEAP_SIZE_BYTES > 0) { - trackers.add( - new HeapUsageTracker( - this::getHeapVarianceThreshold, - this::getHeapBytesThreshold, - this.getHeapMovingAverageWindowSize(), - clusterSettings, - SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE - ) - ); - } - trackers.add(new ElapsedTimeTracker(this::getElapsedTimeNanosThreshold, System::nanoTime)); - return Collections.unmodifiableList(trackers); - } - - public void addListener(CancellationListener listener) { + public void addListener(CancellationSettingsListener listener) { listeners.add(listener); } - private void notifyListeners(Consumer consumer) { + private void notifyListeners(Consumer consumer) { List exceptions = new ArrayList<>(); - for (CancellationListener listener : listeners) { + for (CancellationSettingsListener listener : listeners) { try { consumer.accept(listener); } catch (Exception e) { diff --git a/server/src/main/java/org/opensearch/tasks/TaskCancellation.java b/server/src/main/java/org/opensearch/tasks/TaskCancellation.java index 710c6a5bf8e94..b718bd2395cc5 100644 --- a/server/src/main/java/org/opensearch/tasks/TaskCancellation.java +++ b/server/src/main/java/org/opensearch/tasks/TaskCancellation.java @@ -67,7 +67,6 @@ public void cancelTaskAndDescendants(TaskManager taskManager) { } taskManager.cancelTaskAndDescendants(task, getReasonString(), false, ActionListener.wrap(() -> {})); - task.cancel(getReasonString()); runOnCancelCallbacks(); } diff --git a/server/src/main/java/org/opensearch/tasks/TaskCancellationService.java b/server/src/main/java/org/opensearch/tasks/TaskCancellationService.java index ebce26c4bbfbc..1b6b810a3c1f2 100644 --- a/server/src/main/java/org/opensearch/tasks/TaskCancellationService.java +++ b/server/src/main/java/org/opensearch/tasks/TaskCancellationService.java @@ -82,7 +82,7 @@ private String localNodeId() { return transportService.getLocalNode().getId(); } - void cancelTaskAndDescendants(CancellableTask task, String reason, boolean waitForCompletion, ActionListener listener) { + public void cancelTaskAndDescendants(CancellableTask task, String reason, boolean waitForCompletion, ActionListener listener) { final TaskId taskId = task.taskInfo(localNodeId(), false).getTaskId(); if (task.shouldCancelChildrenOnCancellation()) { logger.trace("cancelling task [{}] and its descendants", taskId); diff --git a/server/src/main/java/org/opensearch/tasks/TaskManager.java b/server/src/main/java/org/opensearch/tasks/TaskManager.java index ff760219716e6..cc7733a552356 100644 --- a/server/src/main/java/org/opensearch/tasks/TaskManager.java +++ b/server/src/main/java/org/opensearch/tasks/TaskManager.java @@ -226,7 +226,7 @@ protected void innerOnFailure(Exception e) { return task; } - private void registerCancellableTask(Task task) { + public void registerCancellableTask(Task task) { CancellableTask cancellableTask = (CancellableTask) task; CancellableTaskHolder holder = new CancellableTaskHolder(cancellableTask); CancellableTaskHolder oldHolder = cancellableTasks.put(task.getId(), holder); diff --git a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java index b2a4bcb9248fc..170e7594c862a 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java @@ -8,7 +8,6 @@ package org.opensearch.search.backpressure; -import org.apache.logging.log4j.LogManager; import org.junit.After; import org.junit.Before; import org.opensearch.Version; @@ -33,6 +32,7 @@ import org.opensearch.tasks.Task; import org.opensearch.tasks.TaskCancellation; import org.opensearch.tasks.TaskCancellationService; +import org.opensearch.tasks.TaskId; import org.opensearch.tasks.TaskManager; import org.opensearch.tasks.TaskResourceTrackingService; import org.opensearch.test.OpenSearchTestCase; @@ -220,22 +220,26 @@ public void testSearchTaskInFlightCancellation() { // Create a mix of low and high resource usage SearchTasks (50 low + 25 high resource usage tasks). Map activeSearchTasks = new HashMap<>(); for (long i = 0; i < 75; i++) { + Task task; if (i % 3 == 0) { - activeSearchTasks.put(i, createMockTaskWithResourceStats(SearchTask.class, 500, taskHeapUsageBytes)); + task = createMockTaskWithResourceStats(SearchTask.class, 500, taskHeapUsageBytes); + activeSearchTasks.put(i, task); } else { - activeSearchTasks.put(i, createMockTaskWithResourceStats(SearchTask.class, 100, taskHeapUsageBytes)); + task = createMockTaskWithResourceStats(SearchTask.class, 100, taskHeapUsageBytes); + activeSearchTasks.put(i, task); } + doReturn(new TaskId("test", 123)).when(task).getParentTaskId(); + doReturn(i).when(task).getId(); + taskManager.registerCancellableTask(task); } doReturn(activeSearchTasks).when(mockTaskResourceTrackingService).getResourceAwareTasks(); // There are 25 SearchTasks eligible for cancellation but only 5 will be cancelled (burst limit). - LogManager.getLogger(SearchBackpressureServiceTests.class).info("first run"); service.doRun(); assertEquals(5, service.getSearchBackpressureStats(SearchTask.class).getCancellationCount()); assertEquals(1, service.getSearchBackpressureStats(SearchTask.class).getLimitReachedCount()); // If the clock or completed task count haven't made sufficient progress, we'll continue to be rate-limited. - LogManager.getLogger(SearchBackpressureServiceTests.class).info("second run"); service.doRun(); assertEquals(5, service.getSearchBackpressureStats(SearchTask.class).getCancellationCount()); assertEquals(2, service.getSearchBackpressureStats(SearchTask.class).getLimitReachedCount()); @@ -243,7 +247,6 @@ public void testSearchTaskInFlightCancellation() { // Fast-forward the clock by ten second to replenish some tokens. // This will add 50 tokens (time delta * rate) to 'rateLimitPerTime' but it will cancel only 5 tasks (burst limit). mockTime.addAndGet(TimeUnit.SECONDS.toNanos(10)); - LogManager.getLogger(SearchBackpressureServiceTests.class).info("third run"); service.doRun(); assertEquals(10, service.getSearchBackpressureStats(SearchTask.class).getCancellationCount()); assertEquals(3, service.getSearchBackpressureStats(SearchTask.class).getLimitReachedCount()); @@ -293,11 +296,17 @@ public void testSearchShardTaskInFlightCancellation() { // Create a mix of low and high resource usage tasks (60 low + 15 high resource usage tasks). Map activeSearchShardTasks = new HashMap<>(); for (long i = 0; i < 75; i++) { + Task task; if (i % 5 == 0) { - activeSearchShardTasks.put(i, createMockTaskWithResourceStats(SearchShardTask.class, 500, taskHeapUsageBytes)); + task = createMockTaskWithResourceStats(SearchShardTask.class, 500, taskHeapUsageBytes); + activeSearchShardTasks.put(i, task); } else { - activeSearchShardTasks.put(i, createMockTaskWithResourceStats(SearchShardTask.class, 100, taskHeapUsageBytes)); + task = createMockTaskWithResourceStats(SearchShardTask.class, 100, taskHeapUsageBytes); + activeSearchShardTasks.put(i, task); } + doReturn(new TaskId("test", 123)).when(task).getParentTaskId(); + doReturn(i).when(task).getId(); + taskManager.registerCancellableTask(task); } doReturn(activeSearchShardTasks).when(mockTaskResourceTrackingService).getResourceAwareTasks(); diff --git a/server/src/test/java/org/opensearch/tasks/TaskManagerTests.java b/server/src/test/java/org/opensearch/tasks/TaskManagerTests.java index ab49109eb8247..636afe08f3d20 100644 --- a/server/src/test/java/org/opensearch/tasks/TaskManagerTests.java +++ b/server/src/test/java/org/opensearch/tasks/TaskManagerTests.java @@ -100,7 +100,12 @@ public void testTrackingChannelTask() throws Exception { Set cancelledTasks = ConcurrentCollections.newConcurrentSet(); taskManager.setTaskCancellationService(new TaskCancellationService(mock(TransportService.class)) { @Override - void cancelTaskAndDescendants(CancellableTask task, String reason, boolean waitForCompletion, ActionListener listener) { + public void cancelTaskAndDescendants( + CancellableTask task, + String reason, + boolean waitForCompletion, + ActionListener listener + ) { assertThat(reason, equalTo("channel was closed")); assertFalse(waitForCompletion); assertTrue("task [" + task + "] was cancelled already", cancelledTasks.add(task)); @@ -148,7 +153,12 @@ public void testTrackingTaskAndCloseChannelConcurrently() throws Exception { Set cancelledTasks = ConcurrentCollections.newConcurrentSet(); taskManager.setTaskCancellationService(new TaskCancellationService(mock(TransportService.class)) { @Override - void cancelTaskAndDescendants(CancellableTask task, String reason, boolean waitForCompletion, ActionListener listener) { + public void cancelTaskAndDescendants( + CancellableTask task, + String reason, + boolean waitForCompletion, + ActionListener listener + ) { assertTrue("task [" + task + "] was cancelled already", cancelledTasks.add(task)); } }); From 0732c741320378a464eb1a0fb7926a7512fa995d Mon Sep 17 00:00:00 2001 From: PritLadani Date: Thu, 2 Feb 2023 17:20:12 +0530 Subject: [PATCH 30/34] Updating UTs Signed-off-by: PritLadani --- .../tasks/TaskCancellationService.java | 2 +- .../org/opensearch/tasks/TaskManager.java | 2 +- .../SearchBackpressureServiceTests.java | 51 +++++++------------ .../opensearch/tasks/TaskManagerTests.java | 14 +---- 4 files changed, 21 insertions(+), 48 deletions(-) diff --git a/server/src/main/java/org/opensearch/tasks/TaskCancellationService.java b/server/src/main/java/org/opensearch/tasks/TaskCancellationService.java index 1b6b810a3c1f2..ebce26c4bbfbc 100644 --- a/server/src/main/java/org/opensearch/tasks/TaskCancellationService.java +++ b/server/src/main/java/org/opensearch/tasks/TaskCancellationService.java @@ -82,7 +82,7 @@ private String localNodeId() { return transportService.getLocalNode().getId(); } - public void cancelTaskAndDescendants(CancellableTask task, String reason, boolean waitForCompletion, ActionListener listener) { + void cancelTaskAndDescendants(CancellableTask task, String reason, boolean waitForCompletion, ActionListener listener) { final TaskId taskId = task.taskInfo(localNodeId(), false).getTaskId(); if (task.shouldCancelChildrenOnCancellation()) { logger.trace("cancelling task [{}] and its descendants", taskId); diff --git a/server/src/main/java/org/opensearch/tasks/TaskManager.java b/server/src/main/java/org/opensearch/tasks/TaskManager.java index cc7733a552356..ff760219716e6 100644 --- a/server/src/main/java/org/opensearch/tasks/TaskManager.java +++ b/server/src/main/java/org/opensearch/tasks/TaskManager.java @@ -226,7 +226,7 @@ protected void innerOnFailure(Exception e) { return task; } - public void registerCancellableTask(Task task) { + private void registerCancellableTask(Task task) { CancellableTask cancellableTask = (CancellableTask) task; CancellableTaskHolder holder = new CancellableTaskHolder(cancellableTask); CancellableTaskHolder oldHolder = cancellableTasks.put(task.getId(), holder); diff --git a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java index 170e7594c862a..25bd27a6fd480 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java @@ -32,7 +32,6 @@ import org.opensearch.tasks.Task; import org.opensearch.tasks.TaskCancellation; import org.opensearch.tasks.TaskCancellationService; -import org.opensearch.tasks.TaskId; import org.opensearch.tasks.TaskManager; import org.opensearch.tasks.TaskResourceTrackingService; import org.opensearch.test.OpenSearchTestCase; @@ -52,6 +51,8 @@ import java.util.concurrent.atomic.AtomicReference; import java.util.function.LongSupplier; +import static org.mockito.ArgumentMatchers.anyBoolean; +import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.Mockito.any; import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.mock; @@ -186,6 +187,7 @@ public void testTrackerStateUpdateOnSearchShardTaskCompletion() { } public void testSearchTaskInFlightCancellation() { + TaskManager mockTaskManager = spy(taskManager); TaskResourceTrackingService mockTaskResourceTrackingService = mock(TaskResourceTrackingService.class); AtomicLong mockTime = new AtomicLong(0); LongSupplier mockTimeNanosSupplier = mockTime::get; @@ -204,7 +206,7 @@ public void testSearchTaskInFlightCancellation() { List.of(mockNodeDuressTracker), List.of(mockTaskResourceUsageTracker), Collections.emptyList(), - taskManager + mockTaskManager ); // Run two iterations so that node is marked 'in duress' from the third iteration onwards. @@ -220,35 +222,29 @@ public void testSearchTaskInFlightCancellation() { // Create a mix of low and high resource usage SearchTasks (50 low + 25 high resource usage tasks). Map activeSearchTasks = new HashMap<>(); for (long i = 0; i < 75; i++) { - Task task; if (i % 3 == 0) { - task = createMockTaskWithResourceStats(SearchTask.class, 500, taskHeapUsageBytes); - activeSearchTasks.put(i, task); + activeSearchTasks.put(i, createMockTaskWithResourceStats(SearchTask.class, 500, taskHeapUsageBytes)); } else { - task = createMockTaskWithResourceStats(SearchTask.class, 100, taskHeapUsageBytes); - activeSearchTasks.put(i, task); + activeSearchTasks.put(i, createMockTaskWithResourceStats(SearchTask.class, 100, taskHeapUsageBytes)); } - doReturn(new TaskId("test", 123)).when(task).getParentTaskId(); - doReturn(i).when(task).getId(); - taskManager.registerCancellableTask(task); } doReturn(activeSearchTasks).when(mockTaskResourceTrackingService).getResourceAwareTasks(); // There are 25 SearchTasks eligible for cancellation but only 5 will be cancelled (burst limit). service.doRun(); - assertEquals(5, service.getSearchBackpressureStats(SearchTask.class).getCancellationCount()); + verify(mockTaskManager, times(5)).cancelTaskAndDescendants(any(), anyString(), anyBoolean(), any()); assertEquals(1, service.getSearchBackpressureStats(SearchTask.class).getLimitReachedCount()); // If the clock or completed task count haven't made sufficient progress, we'll continue to be rate-limited. service.doRun(); - assertEquals(5, service.getSearchBackpressureStats(SearchTask.class).getCancellationCount()); + verify(mockTaskManager, times(5)).cancelTaskAndDescendants(any(), anyString(), anyBoolean(), any()); assertEquals(2, service.getSearchBackpressureStats(SearchTask.class).getLimitReachedCount()); // Fast-forward the clock by ten second to replenish some tokens. // This will add 50 tokens (time delta * rate) to 'rateLimitPerTime' but it will cancel only 5 tasks (burst limit). mockTime.addAndGet(TimeUnit.SECONDS.toNanos(10)); service.doRun(); - assertEquals(10, service.getSearchBackpressureStats(SearchTask.class).getCancellationCount()); + verify(mockTaskManager, times(10)).cancelTaskAndDescendants(any(), anyString(), anyBoolean(), any()); assertEquals(3, service.getSearchBackpressureStats(SearchTask.class).getLimitReachedCount()); // Verify search backpressure stats. @@ -262,6 +258,7 @@ public void testSearchTaskInFlightCancellation() { } public void testSearchShardTaskInFlightCancellation() { + TaskManager mockTaskManager = spy(taskManager); TaskResourceTrackingService mockTaskResourceTrackingService = mock(TaskResourceTrackingService.class); AtomicLong mockTime = new AtomicLong(0); LongSupplier mockTimeNanosSupplier = mockTime::get; @@ -280,7 +277,7 @@ public void testSearchShardTaskInFlightCancellation() { List.of(mockNodeDuressTracker), Collections.emptyList(), List.of(mockTaskResourceUsageTracker), - taskManager + mockTaskManager ); // Run two iterations so that node is marked 'in duress' from the third iteration onwards. @@ -296,28 +293,22 @@ public void testSearchShardTaskInFlightCancellation() { // Create a mix of low and high resource usage tasks (60 low + 15 high resource usage tasks). Map activeSearchShardTasks = new HashMap<>(); for (long i = 0; i < 75; i++) { - Task task; if (i % 5 == 0) { - task = createMockTaskWithResourceStats(SearchShardTask.class, 500, taskHeapUsageBytes); - activeSearchShardTasks.put(i, task); + activeSearchShardTasks.put(i, createMockTaskWithResourceStats(SearchShardTask.class, 500, taskHeapUsageBytes)); } else { - task = createMockTaskWithResourceStats(SearchShardTask.class, 100, taskHeapUsageBytes); - activeSearchShardTasks.put(i, task); + activeSearchShardTasks.put(i, createMockTaskWithResourceStats(SearchShardTask.class, 100, taskHeapUsageBytes)); } - doReturn(new TaskId("test", 123)).when(task).getParentTaskId(); - doReturn(i).when(task).getId(); - taskManager.registerCancellableTask(task); } doReturn(activeSearchShardTasks).when(mockTaskResourceTrackingService).getResourceAwareTasks(); // There are 15 SearchShardTasks eligible for cancellation but only 10 will be cancelled (burst limit). service.doRun(); - assertEquals(10, service.getSearchBackpressureStats(SearchShardTask.class).getCancellationCount()); + verify(mockTaskManager, times(10)).cancelTaskAndDescendants(any(), anyString(), anyBoolean(), any()); assertEquals(1, service.getSearchBackpressureStats(SearchShardTask.class).getLimitReachedCount()); // If the clock or completed task count haven't made sufficient progress, we'll continue to be rate-limited. service.doRun(); - assertEquals(10, service.getSearchBackpressureStats(SearchShardTask.class).getCancellationCount()); + verify(mockTaskManager, times(10)).cancelTaskAndDescendants(any(), anyString(), anyBoolean(), any()); assertEquals(2, service.getSearchBackpressureStats(SearchShardTask.class).getLimitReachedCount()); // Simulate task completion to replenish some tokens. @@ -326,21 +317,13 @@ public void testSearchShardTaskInFlightCancellation() { service.onTaskCompleted(createMockTaskWithResourceStats(SearchShardTask.class, 100, taskHeapUsageBytes)); } service.doRun(); - assertEquals(12, service.getSearchBackpressureStats(SearchShardTask.class).getCancellationCount()); + verify(mockTaskManager, times(12)).cancelTaskAndDescendants(any(), anyString(), anyBoolean(), any()); assertEquals(3, service.getSearchBackpressureStats(SearchShardTask.class).getLimitReachedCount()); - // Fast-forward the clock by one second to replenish some tokens. - // This will add 3 tokens (time delta * rate) to 'rateLimitPerTime'. - mockTime.addAndGet(TimeUnit.SECONDS.toNanos(1)); - service.doRun(); - assertEquals(15, service.getSearchBackpressureStats(SearchShardTask.class).getCancellationCount()); - assertEquals(3, service.getSearchBackpressureStats(SearchShardTask.class).getLimitReachedCount()); // no more tasks to cancel; - // limit not reached - // Verify search backpressure stats. SearchBackpressureStats expectedStats = new SearchBackpressureStats( new SearchTaskStats(0, 0, Collections.emptyMap()), - new SearchShardTaskStats(15, 3, Map.of(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, new MockStats(15))), + new SearchShardTaskStats(12, 3, Map.of(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, new MockStats(12))), SearchBackpressureMode.ENFORCED ); SearchBackpressureStats actualStats = service.nodeStats(); diff --git a/server/src/test/java/org/opensearch/tasks/TaskManagerTests.java b/server/src/test/java/org/opensearch/tasks/TaskManagerTests.java index 636afe08f3d20..ab49109eb8247 100644 --- a/server/src/test/java/org/opensearch/tasks/TaskManagerTests.java +++ b/server/src/test/java/org/opensearch/tasks/TaskManagerTests.java @@ -100,12 +100,7 @@ public void testTrackingChannelTask() throws Exception { Set cancelledTasks = ConcurrentCollections.newConcurrentSet(); taskManager.setTaskCancellationService(new TaskCancellationService(mock(TransportService.class)) { @Override - public void cancelTaskAndDescendants( - CancellableTask task, - String reason, - boolean waitForCompletion, - ActionListener listener - ) { + void cancelTaskAndDescendants(CancellableTask task, String reason, boolean waitForCompletion, ActionListener listener) { assertThat(reason, equalTo("channel was closed")); assertFalse(waitForCompletion); assertTrue("task [" + task + "] was cancelled already", cancelledTasks.add(task)); @@ -153,12 +148,7 @@ public void testTrackingTaskAndCloseChannelConcurrently() throws Exception { Set cancelledTasks = ConcurrentCollections.newConcurrentSet(); taskManager.setTaskCancellationService(new TaskCancellationService(mock(TransportService.class)) { @Override - public void cancelTaskAndDescendants( - CancellableTask task, - String reason, - boolean waitForCompletion, - ActionListener listener - ) { + void cancelTaskAndDescendants(CancellableTask task, String reason, boolean waitForCompletion, ActionListener listener) { assertTrue("task [" + task + "] was cancelled already", cancelledTasks.add(task)); } }); From 4ee781c828188d02ba849fc3846f516c8b2e77e1 Mon Sep 17 00:00:00 2001 From: PritLadani Date: Fri, 3 Feb 2023 14:08:57 +0530 Subject: [PATCH 31/34] Re-adding deprecated settings to address BWC issues Signed-off-by: PritLadani --- .../common/settings/ClusterSettings.java | 5 +- .../settings/SearchBackpressureSettings.java | 55 ++++++++++ .../settings/SearchShardTaskSettings.java | 12 +-- ...earchBackpressureRenamedSettingsTests.java | 101 ++++++++++++++++++ 4 files changed, 166 insertions(+), 7 deletions(-) create mode 100644 server/src/test/java/org/opensearch/search/backpressure/settings/SearchBackpressureRenamedSettingsTests.java diff --git a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java index e476fed1540d9..e616521cdf4f3 100644 --- a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java @@ -617,7 +617,10 @@ public void apply(Settings value, Settings current, Settings previous) { SearchShardTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE, SearchShardTaskSettings.SETTING_CPU_TIME_MILLIS_THRESHOLD, SearchShardTaskSettings.SETTING_ELAPSED_TIME_MILLIS_THRESHOLD, - SearchShardTaskSettings.SETTING_TOTAL_HEAP_PERCENT_THRESHOLD + SearchShardTaskSettings.SETTING_TOTAL_HEAP_PERCENT_THRESHOLD, + SearchBackpressureSettings.SETTING_CANCELLATION_RATIO, // deprecated + SearchBackpressureSettings.SETTING_CANCELLATION_RATE, // deprecated + SearchBackpressureSettings.SETTING_CANCELLATION_BURST // deprecated ) ) ); diff --git a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchBackpressureSettings.java b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchBackpressureSettings.java index 13287d04886c1..f06acb4c952a5 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchBackpressureSettings.java +++ b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchBackpressureSettings.java @@ -22,6 +22,10 @@ public class SearchBackpressureSettings { private static class Defaults { private static final long INTERVAL_MILLIS = 1000; private static final String MODE = "monitor_only"; + + private static final double CANCELLATION_RATIO = 0.1; + private static final double CANCELLATION_RATE = 0.003; + private static final double CANCELLATION_BURST = 10.0; } /** @@ -46,6 +50,54 @@ private static class Defaults { Setting.Property.NodeScope ); + /** + * Defines the percentage of tasks to cancel relative to the number of successful task completions. + * In other words, it is the number of tokens added to the bucket on each successful task completion. + * + * The setting below is deprecated. + * To keep backwards compatibility, the old usage is remained, and it's also used as the fallback for the new usage. + */ + public static final Setting SETTING_CANCELLATION_RATIO = Setting.doubleSetting( + "search_backpressure.cancellation_ratio", + Defaults.CANCELLATION_RATIO, + 0.0, + 1.0, + Setting.Property.Deprecated, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Defines the number of tasks to cancel per unit time (in millis). + * In other words, it is the number of tokens added to the bucket each millisecond. + * + * The setting below is deprecated. + * To keep backwards compatibility, the old usage is remained, and it's also used as the fallback for the new usage. + */ + public static final Setting SETTING_CANCELLATION_RATE = Setting.doubleSetting( + "search_backpressure.cancellation_rate", + Defaults.CANCELLATION_RATE, + 0.0, + Setting.Property.Deprecated, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Defines the maximum number of tasks that can be cancelled before being rate-limited. + * + * The setting below is deprecated. + * To keep backwards compatibility, the old usage is remained, and it's also used as the fallback for the new usage. + */ + public static final Setting SETTING_CANCELLATION_BURST = Setting.doubleSetting( + "search_backpressure.cancellation_burst", + Defaults.CANCELLATION_BURST, + 1.0, + Setting.Property.Deprecated, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + private final Settings settings; private final ClusterSettings clusterSettings; private final NodeDuressSettings nodeDuressSettings; @@ -63,6 +115,9 @@ public SearchBackpressureSettings(Settings settings, ClusterSettings clusterSett mode = SearchBackpressureMode.fromName(SETTING_MODE.get(settings)); clusterSettings.addSettingsUpdateConsumer(SETTING_MODE, s -> this.setMode(SearchBackpressureMode.fromName(s))); + clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_RATIO, searchShardTaskSettings::setCancellationRatio); + clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_RATE, searchShardTaskSettings::setCancellationRate); + clusterSettings.addSettingsUpdateConsumer(SETTING_CANCELLATION_BURST, searchShardTaskSettings::setCancellationBurst); } public Settings getSettings() { diff --git a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java index f7fb95f08f0c2..3c6fd3d25c5d3 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java +++ b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java @@ -49,7 +49,7 @@ private static class Defaults { private volatile double cancellationRatio; public static final Setting SETTING_CANCELLATION_RATIO = Setting.doubleSetting( "search_backpressure.search_shard_task.cancellation_ratio", - Defaults.CANCELLATION_RATIO, + SearchBackpressureSettings.SETTING_CANCELLATION_RATIO, 0.0, 1.0, Setting.Property.Dynamic, @@ -63,7 +63,7 @@ private static class Defaults { private volatile double cancellationRate; public static final Setting SETTING_CANCELLATION_RATE = Setting.doubleSetting( "search_backpressure.search_shard_task.cancellation_rate", - Defaults.CANCELLATION_RATE, + SearchBackpressureSettings.SETTING_CANCELLATION_RATE, 0.0, Setting.Property.Dynamic, Setting.Property.NodeScope @@ -75,7 +75,7 @@ private static class Defaults { private volatile double cancellationBurst; public static final Setting SETTING_CANCELLATION_BURST = Setting.doubleSetting( "search_backpressure.search_shard_task.cancellation_burst", - Defaults.CANCELLATION_BURST, + SearchBackpressureSettings.SETTING_CANCELLATION_BURST, 1.0, Setting.Property.Dynamic, Setting.Property.NodeScope @@ -236,7 +236,7 @@ public double getCancellationRatio() { return cancellationRatio; } - private void setCancellationRatio(double cancellationRatio) { + void setCancellationRatio(double cancellationRatio) { this.cancellationRatio = cancellationRatio; notifyListeners(listener -> listener.onRatioChanged(cancellationRatio)); } @@ -249,7 +249,7 @@ public double getCancellationRateNanos() { return getCancellationRate() / TimeUnit.MILLISECONDS.toNanos(1); // rate per nanoseconds } - private void setCancellationRate(double cancellationRate) { + void setCancellationRate(double cancellationRate) { this.cancellationRate = cancellationRate; notifyListeners(listener -> listener.onRateChanged(cancellationRate)); } @@ -258,7 +258,7 @@ public double getCancellationBurst() { return cancellationBurst; } - private void setCancellationBurst(double cancellationBurst) { + void setCancellationBurst(double cancellationBurst) { this.cancellationBurst = cancellationBurst; notifyListeners(listener -> listener.onBurstChanged(cancellationBurst)); } diff --git a/server/src/test/java/org/opensearch/search/backpressure/settings/SearchBackpressureRenamedSettingsTests.java b/server/src/test/java/org/opensearch/search/backpressure/settings/SearchBackpressureRenamedSettingsTests.java new file mode 100644 index 0000000000000..a0eb05ecaa91d --- /dev/null +++ b/server/src/test/java/org/opensearch/search/backpressure/settings/SearchBackpressureRenamedSettingsTests.java @@ -0,0 +1,101 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.backpressure.settings; + +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Setting; +import org.opensearch.common.settings.Settings; +import org.opensearch.test.OpenSearchTestCase; + +import java.util.Arrays; +import java.util.Set; + +public class SearchBackpressureRenamedSettingsTests extends OpenSearchTestCase { + + /** + * Validate the both settings are known and supported. + */ + public void testOldSettingsExist() { + Set> settings = ClusterSettings.BUILT_IN_CLUSTER_SETTINGS; + assertTrue( + "Both 'search_backpressure.search_shard_task.cancellation_ratio' and its predecessor should be supported built-in settings", + settings.containsAll( + Arrays.asList(SearchBackpressureSettings.SETTING_CANCELLATION_RATIO, SearchShardTaskSettings.SETTING_CANCELLATION_RATIO) + ) + ); + + assertTrue( + "Both 'search_backpressure.search_shard_task.cancellation_rate' and its predecessor should be supported built-in settings", + settings.containsAll( + Arrays.asList(SearchBackpressureSettings.SETTING_CANCELLATION_RATE, SearchShardTaskSettings.SETTING_CANCELLATION_RATE) + ) + ); + + assertTrue( + "Both 'search_backpressure.search_shard_task.cancellation_burst' and its predecessor should be supported built-in settings", + settings.containsAll( + Arrays.asList(SearchBackpressureSettings.SETTING_CANCELLATION_BURST, SearchShardTaskSettings.SETTING_CANCELLATION_BURST) + ) + ); + } + + /** + * Validate the default value of the both settings is the same. + */ + public void testSettingFallback() { + assertEquals( + SearchBackpressureSettings.SETTING_CANCELLATION_RATIO.get(Settings.EMPTY), + SearchShardTaskSettings.SETTING_CANCELLATION_RATIO.get(Settings.EMPTY) + ); + + assertEquals( + SearchBackpressureSettings.SETTING_CANCELLATION_RATE.get(Settings.EMPTY), + SearchShardTaskSettings.SETTING_CANCELLATION_RATE.get(Settings.EMPTY) + ); + + assertEquals( + SearchBackpressureSettings.SETTING_CANCELLATION_BURST.get(Settings.EMPTY), + SearchShardTaskSettings.SETTING_CANCELLATION_BURST.get(Settings.EMPTY) + ); + } + + /** + * Validate the new setting can be configured correctly, and it doesn't impact the old setting. + */ + public void testSettingGetValue() { + Settings settings = Settings.builder().put("search_backpressure.search_shard_task.cancellation_ratio", "0.5").build(); + assertEquals(Double.valueOf(0.5), SearchShardTaskSettings.SETTING_CANCELLATION_RATIO.get(settings)); + assertEquals( + SearchBackpressureSettings.SETTING_CANCELLATION_RATIO.getDefault(Settings.EMPTY), + SearchBackpressureSettings.SETTING_CANCELLATION_RATIO.get(settings) + ); + } + + /** + * Validate the value of the old setting will be applied to the new setting, if the new setting is not configured. + */ + public void testSettingGetValueWithFallback() { + Settings settings = Settings.builder().put("search_backpressure.cancellation_ratio", "0.3").build(); + assertEquals(Double.valueOf(0.3), SearchShardTaskSettings.SETTING_CANCELLATION_RATIO.get(settings)); + assertSettingDeprecationsAndWarnings(new Setting[] { SearchBackpressureSettings.SETTING_CANCELLATION_RATIO }); + } + + /** + * Validate the value of the old setting will be ignored, if the new setting is configured. + */ + public void testSettingGetValueWhenBothAreConfigured() { + Settings settings = Settings.builder() + .put("search_backpressure.search_shard_task.cancellation_ratio", "0.2") + .put("search_backpressure.cancellation_ratio", "0.4") + .build(); + assertEquals(Double.valueOf(0.2), SearchShardTaskSettings.SETTING_CANCELLATION_RATIO.get(settings)); + assertEquals(Double.valueOf(0.4), SearchBackpressureSettings.SETTING_CANCELLATION_RATIO.get(settings)); + assertSettingDeprecationsAndWarnings(new Setting[] { SearchBackpressureSettings.SETTING_CANCELLATION_RATIO }); + } +} From 874cd976ca3716ab37ee2b5569fcc8596e7beffd Mon Sep 17 00:00:00 2001 From: PritLadani Date: Fri, 3 Feb 2023 20:30:19 +0530 Subject: [PATCH 32/34] Adding method to check if heap usage tracking is enabled Signed-off-by: PritLadani --- .../search/backpressure/SearchBackpressureService.java | 4 ++-- .../search/backpressure/trackers/HeapUsageTracker.java | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java index 0afebb509341e..f1856a21a3fc8 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java +++ b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java @@ -52,7 +52,7 @@ import java.util.function.LongSupplier; import java.util.stream.Collectors; -import static org.opensearch.search.backpressure.trackers.HeapUsageTracker.HEAP_SIZE_BYTES; +import static org.opensearch.search.backpressure.trackers.HeapUsageTracker.Stats.isHeapTrackingSupported; /** * SearchBackpressureService is responsible for monitoring and cancelling in-flight search tasks if they are @@ -329,7 +329,7 @@ public static List getTrackers( ) { List trackers = new ArrayList<>(); trackers.add(new CpuUsageTracker(cpuThresholdSupplier)); - if (HEAP_SIZE_BYTES > 0) { + if (isHeapTrackingSupported()) { trackers.add( new HeapUsageTracker( heapVarianceSupplier, diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java index 7d3ac80be9bbb..0236cbf28415f 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java @@ -122,6 +122,10 @@ public Stats(StreamInput in) throws IOException { this(in.readVLong(), in.readVLong(), in.readVLong(), in.readVLong()); } + public static boolean isHeapTrackingSupported() { + return HEAP_SIZE_BYTES > 0; + } + @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { return builder.startObject() From 881bdee7d889641df1677d0f755406d57e83dfbd Mon Sep 17 00:00:00 2001 From: PritLadani Date: Fri, 3 Feb 2023 23:46:26 +0530 Subject: [PATCH 33/34] Isolating HEAP_SIZE_BYTES value Signed-off-by: PritLadani --- .../SearchBackpressureService.java | 29 +++++++------- .../settings/SearchShardTaskSettings.java | 10 +---- .../settings/SearchTaskSettings.java | 10 +---- .../trackers/HeapUsageTracker.java | 39 +++++++++++++------ .../SearchBackpressureServiceTests.java | 6 ++- .../trackers/HeapUsageTrackerTests.java | 15 ++++--- 6 files changed, 58 insertions(+), 51 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java index f1856a21a3fc8..765f2c5b6b228 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java +++ b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java @@ -52,7 +52,7 @@ import java.util.function.LongSupplier; import java.util.stream.Collectors; -import static org.opensearch.search.backpressure.trackers.HeapUsageTracker.Stats.isHeapTrackingSupported; +import static org.opensearch.search.backpressure.trackers.HeapUsageTracker.isHeapTrackingSupported; /** * SearchBackpressureService is responsible for monitoring and cancelling in-flight search tasks if they are @@ -98,7 +98,7 @@ public SearchBackpressureService( getTrackers( settings.getSearchTaskSettings()::getCpuTimeNanosThreshold, settings.getSearchTaskSettings()::getHeapVarianceThreshold, - settings.getSearchTaskSettings()::getHeapBytesThreshold, + settings.getSearchTaskSettings()::getHeapPercentThreshold, settings.getSearchTaskSettings().getHeapMovingAverageWindowSize(), settings.getSearchTaskSettings()::getElapsedTimeNanosThreshold, settings.getClusterSettings(), @@ -107,7 +107,7 @@ public SearchBackpressureService( getTrackers( settings.getSearchShardTaskSettings()::getCpuTimeNanosThreshold, settings.getSearchShardTaskSettings()::getHeapVarianceThreshold, - settings.getSearchShardTaskSettings()::getHeapBytesThreshold, + settings.getSearchShardTaskSettings()::getHeapPercentThreshold, settings.getSearchShardTaskSettings().getHeapMovingAverageWindowSize(), settings.getSearchShardTaskSettings()::getElapsedTimeNanosThreshold, settings.getClusterSettings(), @@ -176,12 +176,18 @@ void doRun() { taskResourceTrackingService.refreshResourceStats(searchShardTasks.toArray(new Task[0])); // Check if increase in heap usage is due to SearchTasks - if (isHeapUsageDominatedBySearch(searchTasks, getSettings().getSearchTaskSettings().getTotalHeapBytesThreshold())) { + if (HeapUsageTracker.isHeapUsageDominatedBySearch( + searchTasks, + getSettings().getSearchTaskSettings().getTotalHeapPercentThreshold() + )) { cancellableTasks.addAll(searchTasks); } // Check if increase in heap usage is due to SearchShardTasks - if (isHeapUsageDominatedBySearch(searchShardTasks, getSettings().getSearchShardTaskSettings().getTotalHeapBytesThreshold())) { + if (HeapUsageTracker.isHeapUsageDominatedBySearch( + searchShardTasks, + getSettings().getSearchShardTaskSettings().getTotalHeapPercentThreshold() + )) { cancellableTasks.addAll(searchShardTasks); } @@ -252,15 +258,6 @@ boolean isNodeInDuress() { /** * Returns true if the increase in heap usage is due to search requests. */ - boolean isHeapUsageDominatedBySearch(List cancellableTasks, long threshold) { - long usage = cancellableTasks.stream().mapToLong(task -> task.getTotalResourceStats().getMemoryInBytes()).sum(); - if (usage < threshold) { - logger.debug("heap usage not dominated by search requests [{}/{}]", usage, threshold); - return false; - } - - return true; - } /** * Filters and returns the list of currently running tasks of specified type. @@ -321,7 +318,7 @@ SearchBackpressureState getSearchBackpressureStats(Class getTrackers( LongSupplier cpuThresholdSupplier, DoubleSupplier heapVarianceSupplier, - LongSupplier heapBytesThresholdSupplier, + DoubleSupplier heapPercentThresholdSupplier, int heapMovingAverageWindowSize, LongSupplier ElapsedTimeNanosSupplier, ClusterSettings clusterSettings, @@ -333,7 +330,7 @@ public static List getTrackers( trackers.add( new HeapUsageTracker( heapVarianceSupplier, - heapBytesThresholdSupplier, + heapPercentThresholdSupplier, heapMovingAverageWindowSize, clusterSettings, windowSizeSetting diff --git a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java index 3c6fd3d25c5d3..105023a20173f 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java +++ b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java @@ -19,8 +19,6 @@ import java.util.concurrent.TimeUnit; import java.util.function.Consumer; -import static org.opensearch.search.backpressure.trackers.HeapUsageTracker.HEAP_SIZE_BYTES; - /** * Defines the settings related to the cancellation of SearchShardTasks. * @@ -184,10 +182,6 @@ public double getTotalHeapPercentThreshold() { return totalHeapPercentThreshold; } - public long getTotalHeapBytesThreshold() { - return (long) (HEAP_SIZE_BYTES * getTotalHeapPercentThreshold()); - } - public long getCpuTimeNanosThreshold() { return TimeUnit.MILLISECONDS.toNanos(cpuTimeMillisThreshold); } @@ -196,8 +190,8 @@ public long getElapsedTimeNanosThreshold() { return TimeUnit.MILLISECONDS.toNanos(elapsedTimeMillisThreshold); } - public long getHeapBytesThreshold() { - return (long) (HEAP_SIZE_BYTES * heapPercentThreshold); + public double getHeapPercentThreshold() { + return heapPercentThreshold; } public double getHeapVarianceThreshold() { diff --git a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java index ce300410a9cbe..74f41f286de6c 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java +++ b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java @@ -19,8 +19,6 @@ import java.util.concurrent.TimeUnit; import java.util.function.Consumer; -import static org.opensearch.search.backpressure.trackers.HeapUsageTracker.HEAP_SIZE_BYTES; - /** * Defines the settings related to the cancellation of SearchTasks. * @@ -189,10 +187,6 @@ public double getTotalHeapPercentThreshold() { return totalHeapPercentThreshold; } - public long getTotalHeapBytesThreshold() { - return (long) (HEAP_SIZE_BYTES * getTotalHeapPercentThreshold()); - } - public long getCpuTimeNanosThreshold() { return TimeUnit.MILLISECONDS.toNanos(cpuTimeMillisThreshold); } @@ -201,8 +195,8 @@ public long getElapsedTimeNanosThreshold() { return TimeUnit.MILLISECONDS.toNanos(elapsedTimeMillisThreshold); } - public long getHeapBytesThreshold() { - return (long) (HEAP_SIZE_BYTES * heapPercentThreshold); + public double getHeapPercentThreshold() { + return heapPercentThreshold; } public double getHeapVarianceThreshold() { diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java index 0236cbf28415f..d6a38c8797174 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java @@ -8,6 +8,8 @@ package org.opensearch.search.backpressure.trackers; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.io.stream.StreamInput; import org.opensearch.common.io.stream.StreamOutput; @@ -16,6 +18,7 @@ import org.opensearch.common.util.MovingAverage; import org.opensearch.common.xcontent.XContentBuilder; import org.opensearch.monitor.jvm.JvmStats; +import org.opensearch.tasks.CancellableTask; import org.opensearch.tasks.Task; import org.opensearch.tasks.TaskCancellation; @@ -25,7 +28,6 @@ import java.util.Optional; import java.util.concurrent.atomic.AtomicReference; import java.util.function.DoubleSupplier; -import java.util.function.LongSupplier; import static org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER; @@ -36,20 +38,21 @@ * @opensearch.internal */ public class HeapUsageTracker extends TaskResourceUsageTracker { - public static final long HEAP_SIZE_BYTES = JvmStats.jvmStats().getMem().getHeapMax().getBytes(); + private static final Logger logger = LogManager.getLogger(HeapUsageTracker.class); + private static final long HEAP_SIZE_BYTES = JvmStats.jvmStats().getMem().getHeapMax().getBytes(); private final DoubleSupplier heapVarianceSupplier; - private final LongSupplier heapBytesThresholdSupplier; + private final DoubleSupplier heapPercentThresholdSupplier; private final AtomicReference movingAverageReference; public HeapUsageTracker( DoubleSupplier heapVarianceSupplier, - LongSupplier heapBytesThresholdSupplier, + DoubleSupplier heapPercentThresholdSupplier, int heapMovingAverageWindowSize, ClusterSettings clusterSettings, Setting windowSizeSetting ) { this.heapVarianceSupplier = heapVarianceSupplier; - this.heapBytesThresholdSupplier = heapBytesThresholdSupplier; + this.heapPercentThresholdSupplier = heapPercentThresholdSupplier; this.movingAverageReference = new AtomicReference<>(new MovingAverage(heapMovingAverageWindowSize)); clusterSettings.addSettingsUpdateConsumer(windowSizeSetting, this::updateWindowSize); } @@ -77,9 +80,9 @@ public Optional checkAndMaybeGetCancellationReason(Task double averageUsage = movingAverage.getAverage(); double variance = heapVarianceSupplier.getAsDouble(); double allowedUsage = averageUsage * variance; - double threshold = heapBytesThresholdSupplier.getAsLong(); + double threshold = heapPercentThresholdSupplier.getAsDouble() * HEAP_SIZE_BYTES; - if (currentUsage < threshold || currentUsage < allowedUsage) { + if (isHeapTrackingSupported() == false || currentUsage < threshold || currentUsage < allowedUsage) { return Optional.empty(); } @@ -95,6 +98,24 @@ private void updateWindowSize(int heapMovingAverageWindowSize) { this.movingAverageReference.set(new MovingAverage(heapMovingAverageWindowSize)); } + public static boolean isHeapTrackingSupported() { + return HEAP_SIZE_BYTES > 0; + } + + /** + * Returns true if the increase in heap usage is due to search requests. + */ + public static boolean isHeapUsageDominatedBySearch(List cancellableTasks, double heapPercentThreshold) { + long usage = cancellableTasks.stream().mapToLong(task -> task.getTotalResourceStats().getMemoryInBytes()).sum(); + long threshold = (long) heapPercentThreshold * HEAP_SIZE_BYTES; + if (isHeapTrackingSupported() && usage < threshold) { + logger.debug("heap usage not dominated by search requests [{}/{}]", usage, threshold); + return false; + } + + return true; + } + @Override public TaskResourceUsageTracker.Stats stats(List activeTasks) { long currentMax = activeTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).max().orElse(0); @@ -122,10 +143,6 @@ public Stats(StreamInput in) throws IOException { this(in.readVLong(), in.readVLong(), in.readVLong(), in.readVLong()); } - public static boolean isHeapTrackingSupported() { - return HEAP_SIZE_BYTES > 0; - } - @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { return builder.startObject() diff --git a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java index 25bd27a6fd480..3515f02ff13d7 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java @@ -216,7 +216,8 @@ public void testSearchTaskInFlightCancellation() { // Mocking 'settings' with predictable totalHeapBytesThreshold so that cancellation logic doesn't get skipped. long taskHeapUsageBytes = 500; SearchTaskSettings searchTaskSettings = mock(SearchTaskSettings.class); - when(searchTaskSettings.getTotalHeapBytesThreshold()).thenReturn(taskHeapUsageBytes); + // setting the total heap percent threshold to minimum so that circuit does not break in SearchBackpressureService + when(searchTaskSettings.getTotalHeapPercentThreshold()).thenReturn(0.0); when(settings.getSearchTaskSettings()).thenReturn(searchTaskSettings); // Create a mix of low and high resource usage SearchTasks (50 low + 25 high resource usage tasks). @@ -287,7 +288,8 @@ public void testSearchShardTaskInFlightCancellation() { // Mocking 'settings' with predictable totalHeapBytesThreshold so that cancellation logic doesn't get skipped. long taskHeapUsageBytes = 500; SearchShardTaskSettings searchShardTaskSettings = mock(SearchShardTaskSettings.class); - when(searchShardTaskSettings.getTotalHeapBytesThreshold()).thenReturn(taskHeapUsageBytes); + // setting the total heap percent threshold to minimum so that circuit does not break in SearchBackpressureService + when(searchShardTaskSettings.getTotalHeapPercentThreshold()).thenReturn(0.0); when(settings.getSearchShardTaskSettings()).thenReturn(searchShardTaskSettings); // Create a mix of low and high resource usage tasks (60 low + 15 high resource usage tasks). diff --git a/server/src/test/java/org/opensearch/search/backpressure/trackers/HeapUsageTrackerTests.java b/server/src/test/java/org/opensearch/search/backpressure/trackers/HeapUsageTrackerTests.java index 4af23be03f9f2..2acb23641667a 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/trackers/HeapUsageTrackerTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/trackers/HeapUsageTrackerTests.java @@ -44,11 +44,12 @@ public void testSearchTaskEligibleForCancellation() { SearchTaskSettings mockSearchTaskSettings = spy( new SearchTaskSettings(mockSettings.getSettings(), mockSettings.getClusterSettings()) ); - when(mockSearchTaskSettings.getHeapBytesThreshold()).thenReturn(HEAP_BYTES_THRESHOLD_SEARCH_TASK); + // setting the heap percent threshold to minimum + when(mockSearchTaskSettings.getHeapPercentThreshold()).thenReturn(0.0); HeapUsageTracker tracker = spy( new HeapUsageTracker( mockSearchTaskSettings::getHeapVarianceThreshold, - mockSearchTaskSettings::getHeapBytesThreshold, + mockSearchTaskSettings::getHeapPercentThreshold, mockSearchTaskSettings.getHeapMovingAverageWindowSize(), mockSettings.getClusterSettings(), SearchTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE @@ -73,11 +74,12 @@ public void testSearchShardTaskEligibleForCancellation() { SearchShardTaskSettings mockSearchShardTaskSettings = spy( new SearchShardTaskSettings(mockSettings.getSettings(), mockSettings.getClusterSettings()) ); - when(mockSearchShardTaskSettings.getHeapBytesThreshold()).thenReturn(HEAP_BYTES_THRESHOLD_SEARCH_TASK); + // setting the heap percent threshold to minimum + when(mockSearchShardTaskSettings.getHeapPercentThreshold()).thenReturn(0.0); HeapUsageTracker tracker = spy( new HeapUsageTracker( mockSearchShardTaskSettings::getHeapVarianceThreshold, - mockSearchShardTaskSettings::getHeapBytesThreshold, + mockSearchShardTaskSettings::getHeapPercentThreshold, mockSearchShardTaskSettings.getHeapMovingAverageWindowSize(), mockSettings.getClusterSettings(), SearchShardTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE @@ -104,11 +106,12 @@ public void testNotEligibleForCancellation() { SearchShardTaskSettings mockSearchShardTaskSettings = spy( new SearchShardTaskSettings(mockSettings.getSettings(), mockSettings.getClusterSettings()) ); - when(mockSearchShardTaskSettings.getHeapBytesThreshold()).thenReturn(HEAP_BYTES_THRESHOLD_SEARCH_SHARD_TASK); + // setting the heap percent threshold to minimum + when(mockSearchShardTaskSettings.getHeapPercentThreshold()).thenReturn(0.0); HeapUsageTracker tracker = spy( new HeapUsageTracker( mockSearchShardTaskSettings::getHeapVarianceThreshold, - mockSearchShardTaskSettings::getHeapBytesThreshold, + mockSearchShardTaskSettings::getHeapPercentThreshold, mockSearchShardTaskSettings.getHeapMovingAverageWindowSize(), mockSettings.getClusterSettings(), SearchShardTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE From 5da58173363a492392264411bf58d427b02f9baa Mon Sep 17 00:00:00 2001 From: PritLadani Date: Sat, 4 Feb 2023 00:37:08 +0530 Subject: [PATCH 34/34] Moving changelog to unreleased 2.x section Signed-off-by: PritLadani --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f7825bc08b5e3..adf01cef0382d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Add getter for path field in NestedQueryBuilder ([#4636](https://github.com/opensearch-project/OpenSearch/pull/4636)) - Allow mmap to use new JDK-19 preview APIs in Apache Lucene 9.4+ ([#5151](https://github.com/opensearch-project/OpenSearch/pull/5151)) - Add support for ppc64le architecture ([#5459](https://github.com/opensearch-project/OpenSearch/pull/5459)) -- Cancellation of in-flight SearchTasks based on resource consumption ([#5606](https://github.com/opensearch-project/OpenSearch/pull/5605)) ### Dependencies - Bumps `log4j-core` from 2.18.0 to 2.19.0 @@ -85,6 +84,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Add support to disallow search request with preference parameter with strict weighted shard routing([#5874](https://github.com/opensearch-project/OpenSearch/pull/5874)) - Changing ExtensionActionRequest streaminput constructor to be public ([#6094](https://github.com/opensearch-project/OpenSearch/pull/6094)) - Adds support for minimum compatible version for extensions ([#6003](https://github.com/opensearch-project/OpenSearch/pull/6003)) +- Cancellation of in-flight SearchTasks based on resource consumption ([#5606](https://github.com/opensearch-project/OpenSearch/pull/5605)) ### Dependencies - Update nebula-publishing-plugin to 19.2.0 ([#5704](https://github.com/opensearch-project/OpenSearch/pull/5704))