From e901da09bc4a1bc44a8f026da1394ef3888ceaec Mon Sep 17 00:00:00 2001 From: Benjamin Trent Date: Thu, 1 Jul 2021 07:08:45 -0400 Subject: [PATCH] [7.14] [ML] prevent accidentally asking for more resources when scaling down and improve scaling size estimations (#74691) (#74781) * [ML] prevent accidentally asking for more resources when scaling down and improve scaling size estimations (#74691) This commit addresses two problems: - Our memory estimations are not very exact. Consequently, its possible to request for too much or too little by a handful of KBs, while this is not a large issue in ESS, for custom tier sizes, it may be. - When scaling down, it was possible that part of the scale down was actually a scale up! This was due to some floating point rounding errors and poor estimations. Even though are estimations are better, it is best to NOT request higher resources in a scale down, no matter what. One of the ways we improve the calculation is during JVM size calculations. Instead of having the knot point be `2gb` it has been changed to `1.2gb`. This accounts for the "window of uncertainty" for JVM sizes. closes: #74709 --- .../xpack/ml/integration/AutoscalingIT.java | 27 ++++-- .../MlAutoscalingDeciderService.java | 56 +++++++++-- .../ml/autoscaling/NativeMemoryCapacity.java | 15 ++- .../ml/utils/NativeMemoryCalculator.java | 51 +++++----- .../MlAutoscalingDeciderServiceTests.java | 97 +++++++++++++++++-- .../NativeMemoryCapacityTests.java | 8 +- .../ml/utils/NativeMemoryCalculatorTests.java | 86 +++++++++++----- 7 files changed, 259 insertions(+), 81 deletions(-) diff --git a/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/AutoscalingIT.java b/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/AutoscalingIT.java index 9f432a9b02af4..8df550debe5ad 100644 --- a/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/AutoscalingIT.java +++ b/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/AutoscalingIT.java @@ -26,6 +26,7 @@ import org.elasticsearch.xpack.ml.autoscaling.MlAutoscalingDeciderService; import org.elasticsearch.xpack.ml.autoscaling.NativeMemoryCapacity; +import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.SortedMap; @@ -35,7 +36,7 @@ import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.hamcrest.Matchers.containsString; -import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; import static org.hamcrest.Matchers.hasKey; public class AutoscalingIT extends MlNativeAutodetectIntegTestCase { @@ -46,25 +47,37 @@ public class AutoscalingIT extends MlNativeAutodetectIntegTestCase { // This test assumes that xpack.ml.max_machine_memory_percent is 30 // and that xpack.ml.use_auto_machine_memory_percent is false - public void testMLAutoscalingCapacity() { + public void testMLAutoscalingCapacity() throws Exception { SortedMap deciders = new TreeMap<>(); deciders.put(MlAutoscalingDeciderService.NAME, Settings.builder().put(MlAutoscalingDeciderService.DOWN_SCALE_DELAY.getKey(), TimeValue.ZERO).build()); final PutAutoscalingPolicyAction.Request request = new PutAutoscalingPolicyAction.Request( "ml_test", - new TreeSet<>(Set.of("ml")), + new TreeSet<>(Arrays.asList( + "transform", + "data_frozen", + "master", + "remote_cluster_client", + "data", + "ml", + "data_content", + "data_hot", + "data_warm", + "data_cold", + "ingest" + )), deciders ); assertAcked(client().execute(PutAutoscalingPolicyAction.INSTANCE, request).actionGet()); - assertMlCapacity( + assertBusy(() -> assertMlCapacity( client().execute( GetAutoscalingCapacityAction.INSTANCE, new GetAutoscalingCapacityAction.Request() ).actionGet(), "Requesting scale down as tier and/or node size could be smaller", 0L, - 0L); + 0L)); putJob("job1", 100); putJob("job2", 200); @@ -151,8 +164,8 @@ private void assertMlCapacity(GetAutoscalingCapacityAction.Response capacity, St AutoscalingDeciderResult autoscalingDeciderResult = autoscalingDeciderResults.results().get("ml"); assertThat(autoscalingDeciderResult.reason().summary(), containsString(reason)); - assertThat(autoscalingDeciderResult.requiredCapacity().total().memory().getBytes(), equalTo(tierBytes)); - assertThat(autoscalingDeciderResult.requiredCapacity().node().memory().getBytes(), equalTo(nodeBytes)); + assertThat(autoscalingDeciderResult.requiredCapacity().total().memory().getBytes(), greaterThanOrEqualTo(tierBytes - 1L)); + assertThat(autoscalingDeciderResult.requiredCapacity().node().memory().getBytes(), greaterThanOrEqualTo(nodeBytes - 1L)); } private void putJob(String jobId, long limitMb) { diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingDeciderService.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingDeciderService.java index 2f0d04a3664a6..b133da32d218b 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingDeciderService.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingDeciderService.java @@ -71,6 +71,8 @@ public class MlAutoscalingDeciderService implements AutoscalingDeciderService, private static final Duration DEFAULT_MEMORY_REFRESH_RATE = Duration.ofMinutes(15); private static final String MEMORY_STALE = "unable to make scaling decision as job memory requirements are stale"; private static final long NO_SCALE_DOWN_POSSIBLE = -1L; + // If ensureScaleDown changes the calculation by more than this much, log the error + private static final long ACCEPTABLE_DIFFERENCE = ByteSizeValue.ofMb(1).getBytes(); public static final String NAME = "ml"; public static final Setting NUM_ANOMALY_JOBS_IN_QUEUE = Setting.intSetting("num_anomaly_jobs_in_queue", 0, 0); @@ -359,6 +361,7 @@ public AutoscalingDeciderResult scale(Settings configuration, AutoscalingDecider final List nodes = getNodes(clusterState); final NativeMemoryCapacity currentScale = currentScale(nodes); + final MlScalingReason.Builder reasonBuilder = MlScalingReason.builder() .setWaitingAnomalyJobs(waitingAnomalyJobs) .setWaitingAnalyticsJobs(waitingAnalyticsJobs) @@ -527,9 +530,20 @@ public AutoscalingDeciderResult scale(Settings configuration, AutoscalingDecider .build())); } - final Optional scaleDownDecision = checkForScaleDown(nodeLoads, largestJob, currentScale, reasonBuilder); + final Optional maybeScaleDown = checkForScaleDown(nodeLoads, largestJob, currentScale, reasonBuilder) + // Due to weird rounding errors, it may be that a scale down result COULD cause a scale up + // Ensuring the scaleDown here forces the scale down result to always be lower than the current capacity. + // This is safe as we know that ALL jobs are assigned at the current capacity + .map(result -> { + AutoscalingCapacity capacity = ensureScaleDown(result.requiredCapacity(), context.currentCapacity()); + if (capacity == null) { + return null; + } + return new AutoscalingDeciderResult(capacity, result.reason()); + }); - if (scaleDownDecision.isPresent()) { + if (maybeScaleDown.isPresent()) { + final AutoscalingDeciderResult scaleDownDecisionResult = maybeScaleDown.get(); // Given maxOpenJobs, could we scale down to just one node? // We have no way of saying "we need X nodes" if (nodeLoads.size() > 1) { @@ -546,14 +560,14 @@ public AutoscalingDeciderResult scale(Settings configuration, AutoscalingDecider MAX_OPEN_JOBS_PER_NODE.getKey()); logger.info(() -> new ParameterizedMessage("{} Calculated potential scaled down capacity [{}] ", msg, - scaleDownDecision.get().requiredCapacity())); + scaleDownDecisionResult.requiredCapacity())); return new AutoscalingDeciderResult(context.currentCapacity(), reasonBuilder.setSimpleReason(msg).build()); } } long msLeftToScale = msLeftToDownScale(configuration); if (msLeftToScale <= 0) { - return scaleDownDecision.get(); + return scaleDownDecisionResult; } TimeValue downScaleDelay = DOWN_SCALE_DELAY.get(configuration); logger.debug(() -> new ParameterizedMessage( @@ -561,7 +575,7 @@ public AutoscalingDeciderResult scale(Settings configuration, AutoscalingDecider " The last time scale down was detected [{}]. Calculated scaled down capacity [{}] ", downScaleDelay.getStringRep(), XContentElasticsearchExtension.DEFAULT_DATE_PRINTER.print(scaleDownDetected), - scaleDownDecision.get().requiredCapacity())); + scaleDownDecisionResult.requiredCapacity())); return new AutoscalingDeciderResult( context.currentCapacity(), reasonBuilder @@ -586,6 +600,31 @@ public AutoscalingDeciderResult scale(Settings configuration, AutoscalingDecider .build())); } + static AutoscalingCapacity ensureScaleDown(AutoscalingCapacity scaleDownResult, AutoscalingCapacity currentCapacity) { + if (currentCapacity == null || scaleDownResult == null) { + return null; + } + AutoscalingCapacity newCapacity = new AutoscalingCapacity( + new AutoscalingCapacity.AutoscalingResources( + currentCapacity.total().storage(), + ByteSizeValue.ofBytes(Math.min(scaleDownResult.total().memory().getBytes(), currentCapacity.total().memory().getBytes())) + ), + new AutoscalingCapacity.AutoscalingResources( + currentCapacity.node().storage(), + ByteSizeValue.ofBytes(Math.min(scaleDownResult.node().memory().getBytes(), currentCapacity.node().memory().getBytes())) + ) + ); + if (scaleDownResult.node().memory().getBytes() - newCapacity.node().memory().getBytes() > ACCEPTABLE_DIFFERENCE + || scaleDownResult.total().memory().getBytes() - newCapacity.total().memory().getBytes() > ACCEPTABLE_DIFFERENCE) { + logger.warn( + "scale down accidentally requested a scale up, auto-corrected; initial scaling [{}], corrected [{}]", + scaleDownResult, + newCapacity + ); + } + return newCapacity; + } + AutoscalingDeciderResult noScaleResultOrRefresh(MlScalingReason.Builder reasonBuilder, boolean memoryTrackingStale, AutoscalingDeciderResult potentialResult) { @@ -842,8 +881,11 @@ Optional checkForScaleDown(List nodeLoads, // Or our largest job could be on a smaller node (meaning the same size tier but smaller nodes are possible). if (currentlyNecessaryTier < currentCapacity.getTier() || currentlyNecessaryNode < currentCapacity.getNode()) { NativeMemoryCapacity nativeMemoryCapacity = new NativeMemoryCapacity( - currentlyNecessaryTier, - currentlyNecessaryNode, + // Since we are in the `scaleDown` branch, we know jobs are running and we could be smaller + // If we have some weird rounding errors, it may be that the `currentlyNecessary` values are larger than + // current capacity. We never want to accidentally say "scale up" via a scale down. + Math.min(currentlyNecessaryTier, currentCapacity.getTier()), + Math.min(currentlyNecessaryNode, currentCapacity.getNode()), // If our newly suggested native capacity is the same, we can use the previously stored jvm size currentlyNecessaryNode == currentCapacity.getNode() ? currentCapacity.getJvmSize() : null); AutoscalingCapacity requiredCapacity = nativeMemoryCapacity.autoscalingCapacity(maxMachineMemoryPercent, useAuto); diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/autoscaling/NativeMemoryCapacity.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/autoscaling/NativeMemoryCapacity.java index 96ac77546e8bb..f79e6e709b17a 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/autoscaling/NativeMemoryCapacity.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/autoscaling/NativeMemoryCapacity.java @@ -12,6 +12,9 @@ import org.elasticsearch.xpack.ml.utils.NativeMemoryCalculator; import java.util.Objects; +import java.util.Optional; + +import static org.elasticsearch.xpack.ml.utils.NativeMemoryCalculator.dynamicallyCalculateJvmSizeFromNativeMemorySize; // Used for storing native memory capacity and then transforming it into an autoscaling capacity // which takes into account the whole node size @@ -49,7 +52,11 @@ NativeMemoryCapacity merge(NativeMemoryCapacity nativeMemoryCapacity) { return this; } - AutoscalingCapacity autoscalingCapacity(int maxMemoryPercent, boolean useAuto) { + public AutoscalingCapacity autoscalingCapacity(int maxMemoryPercent, boolean useAuto) { + // We calculate the JVM size here first to ensure it stays the same given the rest of the calculations + final Long jvmSize = useAuto ? + Optional.ofNullable(this.jvmSize).orElse(dynamicallyCalculateJvmSizeFromNativeMemorySize(node)) : + null; // We first need to calculate the actual node size given the current native memory size. // This way we can accurately determine the required node size AND what the overall memory percentage will be long actualNodeSize = NativeMemoryCalculator.calculateApproxNecessaryNodeSize(node, jvmSize, maxMemoryPercent, useAuto); @@ -57,14 +64,14 @@ AutoscalingCapacity autoscalingCapacity(int maxMemoryPercent, boolean useAuto) { // This simplifies calculating the tier as it means that each node in the tier // will have the same dynamic memory calculation. And thus the tier is simply the sum of the memory necessary // times that scaling factor. - int memoryPercentForMl = (int)Math.floor(NativeMemoryCalculator.modelMemoryPercent( + double memoryPercentForMl = NativeMemoryCalculator.modelMemoryPercent( actualNodeSize, jvmSize, maxMemoryPercent, useAuto - )); + ); double inverseScale = memoryPercentForMl <= 0 ? 0 : 100.0 / memoryPercentForMl; - long actualTier = (long)Math.ceil(tier * inverseScale); + long actualTier = Math.round(tier * inverseScale); return new AutoscalingCapacity( // Tier should always be AT LEAST the largest node size. // This Math.max catches any strange rounding errors or weird input. diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/utils/NativeMemoryCalculator.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/utils/NativeMemoryCalculator.java index b4e32c16a15a9..e4183e981a5c3 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/utils/NativeMemoryCalculator.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/utils/NativeMemoryCalculator.java @@ -23,6 +23,7 @@ public final class NativeMemoryCalculator { + private static final long STATIC_JVM_UPPER_THRESHOLD = ByteSizeValue.ofGb(2).getBytes(); static final long MINIMUM_AUTOMATIC_NODE_SIZE = ByteSizeValue.ofGb(1).getBytes(); private static final long OS_OVERHEAD = ByteSizeValue.ofMb(200L).getBytes(); @@ -80,15 +81,11 @@ public static long calculateApproxNecessaryNodeSize(long nativeMachineMemory, Lo if (useAuto) { // TODO utilize official ergonomic JVM size calculations when available. jvmSize = jvmSize == null ? dynamicallyCalculateJvmSizeFromNativeMemorySize(nativeMachineMemory) : jvmSize; - // We use a Math.floor here to ensure we have AT LEAST enough memory given rounding. - int modelMemoryPercent = (int)Math.floor(modelMemoryPercent( - nativeMachineMemory + jvmSize + OS_OVERHEAD, - jvmSize, - maxMemoryPercent, - true)); - // We calculate the inverse percentage of `nativeMachineMemory + OS_OVERHEAD` as `OS_OVERHEAD` is always present - // on the native memory side and we need to account for it when we invert the model memory percentage - return Math.max((long)Math.ceil((100.0/modelMemoryPercent) * (nativeMachineMemory + OS_OVERHEAD)), MINIMUM_AUTOMATIC_NODE_SIZE); + // We haven't reached our 90% threshold, so, simply summing up the values is adequate + if ((jvmSize + OS_OVERHEAD)/(double)nativeMachineMemory > 0.1) { + return Math.max(nativeMachineMemory + jvmSize + OS_OVERHEAD, MINIMUM_AUTOMATIC_NODE_SIZE); + } + return Math.round((nativeMachineMemory/0.9)); } return (long) ((100.0/maxMemoryPercent) * nativeMachineMemory); } @@ -118,18 +115,11 @@ public static double modelMemoryPercent(long machineMemory, Long jvmSize, int ma return maxMemoryPercent; } - public static int modelMemoryPercent(long machineMemory, int maxMemoryPercent, boolean useAuto) { - return (int)Math.ceil(modelMemoryPercent(machineMemory, - null, - maxMemoryPercent, - useAuto)); - } - - private static long allowedBytesForMl(long machineMemory, Long jvmSize, int maxMemoryPercent, boolean useAuto) { + static long allowedBytesForMl(long machineMemory, Long jvmSize, int maxMemoryPercent, boolean useAuto) { if (useAuto && jvmSize != null) { // It is conceivable that there is a machine smaller than 200MB. // If the administrator wants to use the auto configuration, the node should be larger. - if (machineMemory - jvmSize < OS_OVERHEAD || machineMemory == 0) { + if (machineMemory - jvmSize <= OS_OVERHEAD || machineMemory == 0) { return machineMemory / 100; } // This calculation is dynamic and designed to maximally take advantage of the underlying machine for machine learning @@ -139,8 +129,8 @@ private static long allowedBytesForMl(long machineMemory, Long jvmSize, int maxM // 2GB node -> 66% // 16GB node -> 87% // 64GB node -> 90% - long memoryPercent = Math.min(90, (int)Math.ceil(((machineMemory - jvmSize - OS_OVERHEAD) / (double)machineMemory) * 100.0D)); - return (long)(machineMemory * (memoryPercent / 100.0)); + double memoryProportion = Math.min(0.90, (machineMemory - jvmSize - OS_OVERHEAD) / (double)machineMemory); + return Math.round(machineMemory * memoryProportion); } return (long)(machineMemory * (maxMemoryPercent / 100.0)); @@ -154,17 +144,20 @@ public static long allowedBytesForMl(long machineMemory, int maxMemoryPercent, b } // TODO replace with official ergonomic calculation - private static long dynamicallyCalculateJvmSizeFromNodeSize(long nodeSize) { - if (nodeSize < ByteSizeValue.ofGb(2).getBytes()) { - return (long) (nodeSize * 0.40); + public static long dynamicallyCalculateJvmSizeFromNodeSize(long nodeSize) { + // While the original idea here was to predicate on 2Gb, it has been found that the knot points of + // 2GB and 8GB cause weird issues where the JVM size will "jump the gap" from one to the other when + // considering true tier sizes in elastic cloud. + if (nodeSize < ByteSizeValue.ofMb(1280).getBytes()) { + return (long)(nodeSize * 0.40); } if (nodeSize < ByteSizeValue.ofGb(8).getBytes()) { - return (long) (nodeSize * 0.25); + return (long)(nodeSize * 0.25); } - return ByteSizeValue.ofGb(2).getBytes(); + return STATIC_JVM_UPPER_THRESHOLD; } - private static long dynamicallyCalculateJvmSizeFromNativeMemorySize(long nativeMachineMemory) { + public static long dynamicallyCalculateJvmSizeFromNativeMemorySize(long nativeMachineMemory) { // See dynamicallyCalculateJvm the following JVM calculations are arithmetic inverses of JVM calculation // // Example: For < 2GB node, the JVM is 0.4 * total_node_size. This means, the rest is 0.6 the node size. @@ -172,12 +165,12 @@ private static long dynamicallyCalculateJvmSizeFromNativeMemorySize(long nativeM // Consequently jvmSize = (nativeAndOverHead / 0.6)*0.4 = nativeAndOverHead * 2/3 long nativeAndOverhead = nativeMachineMemory + OS_OVERHEAD; if (nativeAndOverhead < (ByteSizeValue.ofGb(2).getBytes() * 0.60)) { - return (long) Math.ceil(nativeAndOverhead * (2.0 / 3.0)); + return Math.round((nativeAndOverhead / 0.6) * 0.4); } if (nativeAndOverhead < (ByteSizeValue.ofGb(8).getBytes() * 0.75)) { - return (long) Math.ceil(nativeAndOverhead / 3.0); + return Math.round((nativeAndOverhead / 0.75) * 0.25); } - return ByteSizeValue.ofGb(2).getBytes(); + return STATIC_JVM_UPPER_THRESHOLD; } } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingDeciderServiceTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingDeciderServiceTests.java index a543900812fa6..5ff66cdf35416 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingDeciderServiceTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingDeciderServiceTests.java @@ -153,8 +153,19 @@ public void testScaleUp_withWaitingJobsAndAutoMemoryAndNoRoomInNodes() { NativeMemoryCapacity.ZERO, reasonBuilder); assertTrue(decision.isPresent()); - assertThat(decision.get().requiredCapacity().node().memory().getBytes(), equalTo(3512729601L)); - assertThat(decision.get().requiredCapacity().total().memory().getBytes(), equalTo(9382475687L)); + AutoscalingDeciderResult result = decision.get(); + long allowedBytesForMlNode = NativeMemoryCalculator.allowedBytesForMl( + result.requiredCapacity().node().memory().getBytes(), + 30, + true + ); + long allowedBytesForMlTier = NativeMemoryCalculator.allowedBytesForMl( + result.requiredCapacity().total().memory().getBytes(), + 30, + true + ); + assertThat(allowedBytesForMlNode, greaterThanOrEqualTo(ByteSizeValue.ofGb(2).getBytes() + OVERHEAD)); + assertThat(allowedBytesForMlTier, greaterThanOrEqualTo(ByteSizeValue.ofGb(2).getBytes() * 3 + OVERHEAD)); } { // we allow one job in the analytics queue Optional decision = service.checkForScaleUp(0, 1, @@ -165,8 +176,19 @@ public void testScaleUp_withWaitingJobsAndAutoMemoryAndNoRoomInNodes() { NativeMemoryCapacity.ZERO, reasonBuilder); assertTrue(decision.isPresent()); - assertThat(decision.get().requiredCapacity().node().memory().getBytes(), equalTo(3512729601L)); - assertThat(decision.get().requiredCapacity().total().memory().getBytes(), equalTo(6270180545L)); + AutoscalingDeciderResult result = decision.get(); + long allowedBytesForMlNode = NativeMemoryCalculator.allowedBytesForMl( + result.requiredCapacity().node().memory().getBytes(), + 30, + true + ); + long allowedBytesForMlTier = NativeMemoryCalculator.allowedBytesForMl( + result.requiredCapacity().total().memory().getBytes(), + 30, + true + ); + assertThat(allowedBytesForMlNode, greaterThanOrEqualTo(ByteSizeValue.ofGb(2).getBytes() + OVERHEAD)); + assertThat(allowedBytesForMlTier, greaterThanOrEqualTo(ByteSizeValue.ofGb(2).getBytes() * 2 + OVERHEAD)); } { // we allow one job in the anomaly queue and analytics queue Optional decision = service.checkForScaleUp(1, 1, @@ -177,8 +199,19 @@ public void testScaleUp_withWaitingJobsAndAutoMemoryAndNoRoomInNodes() { NativeMemoryCapacity.ZERO, reasonBuilder); assertTrue(decision.isPresent()); - assertThat(decision.get().requiredCapacity().node().memory().getBytes(), equalTo(3512729601L)); - assertThat(decision.get().requiredCapacity().total().memory().getBytes(), equalTo(3512729601L)); + AutoscalingDeciderResult result = decision.get(); + long allowedBytesForMlNode = NativeMemoryCalculator.allowedBytesForMl( + result.requiredCapacity().node().memory().getBytes(), + 30, + true + ); + long allowedBytesForMlTier = NativeMemoryCalculator.allowedBytesForMl( + result.requiredCapacity().total().memory().getBytes(), + 30, + true + ); + assertThat(allowedBytesForMlNode, greaterThanOrEqualTo(ByteSizeValue.ofGb(2).getBytes() + OVERHEAD)); + assertThat(allowedBytesForMlTier, greaterThanOrEqualTo(ByteSizeValue.ofGb(2).getBytes() + OVERHEAD)); } } @@ -362,7 +395,7 @@ public void testScaleDown() { {// Current capacity allows for smaller tier Optional result = service.checkForScaleDown(nodeLoads, ByteSizeValue.ofMb(100).getBytes(), - new NativeMemoryCapacity(ByteSizeValue.ofGb(4).getBytes(), ByteSizeValue.ofMb(100).getBytes()), + new NativeMemoryCapacity(ByteSizeValue.ofGb(4).getBytes(), ByteSizeValue.ofGb(1).getBytes()), reasonBuilder); assertThat(result.isPresent(), is(true)); AutoscalingDeciderResult autoscalingDeciderResult = result.get(); @@ -380,6 +413,56 @@ public void testScaleDown() { } } + public void testEnsureScaleDown() { + assertThat( + MlAutoscalingDeciderService.ensureScaleDown( + new AutoscalingCapacity( + new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(8)), + new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(1)) + ), + new AutoscalingCapacity( + new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(4)), + new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(2)) + ) + ), equalTo(new AutoscalingCapacity( + new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(4)), + new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(1)) + )) + ); + + assertThat( + MlAutoscalingDeciderService.ensureScaleDown( + new AutoscalingCapacity( + new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(8)), + new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(3)) + ), + new AutoscalingCapacity( + new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(4)), + new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(2)) + ) + ), equalTo(new AutoscalingCapacity( + new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(4)), + new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(2)) + )) + ); + + assertThat( + MlAutoscalingDeciderService.ensureScaleDown( + new AutoscalingCapacity( + new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(4)), + new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(3)) + ), + new AutoscalingCapacity( + new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(3)), + new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(2)) + ) + ), equalTo(new AutoscalingCapacity( + new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(3)), + new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(2)) + )) + ); + } + public void testFutureAvailableCapacity() { nodeLoadDetector = new NodeLoadDetector(mlMemoryTracker); MlAutoscalingDeciderService service = buildService(); diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/autoscaling/NativeMemoryCapacityTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/autoscaling/NativeMemoryCapacityTests.java index 2d483b950e3f8..63c72ae414531 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/autoscaling/NativeMemoryCapacityTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/autoscaling/NativeMemoryCapacityTests.java @@ -59,8 +59,8 @@ public void testAutoscalingCapacity() { } { // auto is true AutoscalingCapacity autoscalingCapacity = capacity.autoscalingCapacity(25, true); - assertThat(autoscalingCapacity.node().memory().getBytes(), equalTo(1604321280L)); - assertThat(autoscalingCapacity.total().memory().getBytes(), equalTo(5174659393L)); + assertThat(autoscalingCapacity.node().memory().getBytes(), equalTo(1335885824L)); + assertThat(autoscalingCapacity.total().memory().getBytes(), equalTo(5343543296L)); } { // auto is true with unknown jvm size capacity = new NativeMemoryCapacity( @@ -68,8 +68,8 @@ public void testAutoscalingCapacity() { ByteSizeValue.ofGb(1).getBytes() ); AutoscalingCapacity autoscalingCapacity = capacity.autoscalingCapacity(25, true); - assertThat(autoscalingCapacity.node().memory().getBytes(), equalTo(2566914048L)); - assertThat(autoscalingCapacity.total().memory().getBytes(), equalTo(6507526207L)); + assertThat(autoscalingCapacity.node().memory().getBytes(), equalTo(2139095040L)); + assertThat(autoscalingCapacity.total().memory().getBytes(), equalTo(8556380160L)); } } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/utils/NativeMemoryCalculatorTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/utils/NativeMemoryCalculatorTests.java index 3163a05dadbf5..aa17cffc58199 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/utils/NativeMemoryCalculatorTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/utils/NativeMemoryCalculatorTests.java @@ -15,9 +15,14 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.util.set.Sets; +import org.elasticsearch.core.Tuple; import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.autoscaling.capacity.AutoscalingCapacity; +import org.elasticsearch.xpack.ml.autoscaling.NativeMemoryCapacity; +import java.util.Arrays; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.OptionalLong; import java.util.function.BiConsumer; @@ -27,6 +32,7 @@ import static org.elasticsearch.xpack.ml.MachineLearning.MAX_MACHINE_MEMORY_PERCENT; import static org.elasticsearch.xpack.ml.MachineLearning.USE_AUTO_MACHINE_MEMORY_PERCENT; import static org.elasticsearch.xpack.ml.utils.NativeMemoryCalculator.MINIMUM_AUTOMATIC_NODE_SIZE; +import static org.elasticsearch.xpack.ml.utils.NativeMemoryCalculator.dynamicallyCalculateJvmSizeFromNodeSize; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThanOrEqualTo; @@ -49,6 +55,54 @@ public void testAllowedBytesForMLWhenAutoIsFalse() { } } + public void testConsistencyInAutoCalculation() { + for (Tuple nodeAndJvmSize : Arrays.asList( + Tuple.tuple(1073741824L, 432013312L), // 1GB and true JVM size + Tuple.tuple(2147483648L, 536870912L), // 2GB ... + Tuple.tuple(4294967296L, 1073741824L), // 4GB ... + Tuple.tuple(8589934592L, 2147483648L), // 8GB ... + Tuple.tuple(17179869184L, 2147483648L), // 16GB ... + Tuple.tuple(34359738368L, 2147483648L), // 32GB ... + Tuple.tuple(68719476736L, 2147483648L), // 64GB ... + Tuple.tuple(16106127360L, 2147483648L), // 15GB ... + Tuple.tuple(32212254720L, 2147483648L), // 30GB ... + Tuple.tuple(64424509440L, 2147483648L) // 60GB ... + )) { + final long trueJvmSize = nodeAndJvmSize.v2(); + final long trueNodeSize = nodeAndJvmSize.v1(); + List nodeSizes = Arrays.asList( + trueNodeSize + ByteSizeValue.ofMb(10).getBytes(), + trueNodeSize - ByteSizeValue.ofMb(10).getBytes(), + trueNodeSize + ); + for (long nodeSize : nodeSizes) { + // Simulate having a true size that already exists from the node vs. us dynamically calculating it + long jvmSize = randomBoolean() ? dynamicallyCalculateJvmSizeFromNodeSize(nodeSize) : trueJvmSize; + DiscoveryNode node = newNode(jvmSize, nodeSize); + Settings settings = newSettings(30, true); + ClusterSettings clusterSettings = newClusterSettings(30, true); + + long bytesForML = randomBoolean() ? + NativeMemoryCalculator.allowedBytesForMl(node, settings).getAsLong() : + NativeMemoryCalculator.allowedBytesForMl(node, clusterSettings).getAsLong(); + + NativeMemoryCapacity nativeMemoryCapacity = new NativeMemoryCapacity( + bytesForML, + bytesForML, + jvmSize + ); + + AutoscalingCapacity capacity = nativeMemoryCapacity.autoscalingCapacity(30, true); + // We don't allow node sizes below 1GB, so we will always be at least that large + // Also, allow 1 byte off for weird rounding issues + assertThat(capacity.node().memory().getBytes(), greaterThanOrEqualTo( + Math.max(nodeSize, ByteSizeValue.ofGb(1).getBytes()) - 1L)); + assertThat(capacity.total().memory().getBytes(), greaterThanOrEqualTo( + Math.max(nodeSize, ByteSizeValue.ofGb(1).getBytes()) - 1L)); + } + } + } + public void testAllowedBytesForMlWhenAutoIsTrue() { for (int i = 0; i < NUM_TEST_RUNS; i++) { long nodeSize = randomLongBetween(ByteSizeValue.ofMb(500).getBytes(), ByteSizeValue.ofGb(64).getBytes()); @@ -58,10 +112,10 @@ public void testAllowedBytesForMlWhenAutoIsTrue() { Settings settings = newSettings(percent, true); ClusterSettings clusterSettings = newClusterSettings(percent, true); - int truePercent = Math.min( + double truePercent = Math.min( 90, - (int)Math.ceil(((nodeSize - jvmSize - ByteSizeValue.ofMb(200).getBytes()) / (double)nodeSize) * 100.0D)); - long expected = (long)(nodeSize * (truePercent / 100.0)); + ((nodeSize - jvmSize - ByteSizeValue.ofMb(200).getBytes()) / (double)nodeSize) * 100.0D); + long expected = Math.round(nodeSize * (truePercent / 100.0)); assertThat(NativeMemoryCalculator.allowedBytesForMl(node, settings).getAsLong(), equalTo(expected)); assertThat(NativeMemoryCalculator.allowedBytesForMl(node, clusterSettings).getAsLong(), equalTo(expected)); @@ -69,20 +123,6 @@ public void testAllowedBytesForMlWhenAutoIsTrue() { } } - public void testAllowedBytesForMlWhenAutoIsTrueButJVMSizeIsUnknown() { - long nodeSize = randomLongBetween(ByteSizeValue.ofMb(500).getBytes(), ByteSizeValue.ofGb(64).getBytes()); - int percent = randomIntBetween(5, 200); - DiscoveryNode node = newNode(null, nodeSize); - Settings settings = newSettings(percent, true); - ClusterSettings clusterSettings = newClusterSettings(percent, true); - - long expected = (long)(nodeSize * (percent / 100.0)); - - assertThat(NativeMemoryCalculator.allowedBytesForMl(node, settings).getAsLong(), equalTo(expected)); - assertThat(NativeMemoryCalculator.allowedBytesForMl(node, clusterSettings).getAsLong(), equalTo(expected)); - assertThat(NativeMemoryCalculator.allowedBytesForMl(node, percent, false).getAsLong(), equalTo(expected)); - } - public void testAllowedBytesForMlWhenBothJVMAndNodeSizeAreUnknown() { int percent = randomIntBetween(5, 200); DiscoveryNode node = newNode(null, null); @@ -110,7 +150,6 @@ public void testTinyNode() { } public void testActualNodeSizeCalculationConsistency() { - final TriConsumer consistentAutoAssertions = (nativeMemory, memoryPercentage, delta) -> { long autoNodeSize = NativeMemoryCalculator.calculateApproxNecessaryNodeSize(nativeMemory, null, memoryPercentage, true); // It should always be greater than the minimum supported node size @@ -119,12 +158,13 @@ public void testActualNodeSizeCalculationConsistency() { greaterThanOrEqualTo(MINIMUM_AUTOMATIC_NODE_SIZE)); // Our approximate real node size should always return a usable native memory size that is at least the original native memory // size. Rounding errors may cause it to be non-exact. + long allowedBytesForMl = NativeMemoryCalculator.allowedBytesForMl(autoNodeSize, memoryPercentage, true); assertThat("native memory [" - + NativeMemoryCalculator.allowedBytesForMl(autoNodeSize, memoryPercentage, true) + + allowedBytesForMl + "] smaller than original native memory [" + nativeMemory + "]", - NativeMemoryCalculator.allowedBytesForMl(autoNodeSize, memoryPercentage, true), + allowedBytesForMl, greaterThanOrEqualTo(nativeMemory - delta)); }; @@ -155,18 +195,18 @@ public void testActualNodeSizeCalculationConsistency() { int memoryPercentage = randomIntBetween(5, 200); { // tiny memory long nodeMemory = randomLongBetween(ByteSizeValue.ofKb(100).getBytes(), ByteSizeValue.ofMb(500).getBytes()); - consistentAutoAssertions.apply(nodeMemory, memoryPercentage, 0L); + consistentAutoAssertions.apply(nodeMemory, memoryPercentage, 1L); consistentManualAssertions.accept(nodeMemory, memoryPercentage); } { // normal-ish memory long nodeMemory = randomLongBetween(ByteSizeValue.ofMb(500).getBytes(), ByteSizeValue.ofGb(4).getBytes()); // periodically, the calculated assertions end up being about 6% off, allowing this small delta to account for flakiness - consistentAutoAssertions.apply(nodeMemory, memoryPercentage, (long) (0.06 * nodeMemory)); + consistentAutoAssertions.apply(nodeMemory, memoryPercentage, 1L); consistentManualAssertions.accept(nodeMemory, memoryPercentage); } { // huge memory long nodeMemory = randomLongBetween(ByteSizeValue.ofGb(30).getBytes(), ByteSizeValue.ofGb(60).getBytes()); - consistentAutoAssertions.apply(nodeMemory, memoryPercentage, 0L); + consistentAutoAssertions.apply(nodeMemory, memoryPercentage, 1L); consistentManualAssertions.accept(nodeMemory, memoryPercentage); } }