From e901da09bc4a1bc44a8f026da1394ef3888ceaec Mon Sep 17 00:00:00 2001
From: Benjamin Trent <ben.w.trent@gmail.com>
Date: Thu, 1 Jul 2021 07:08:45 -0400
Subject: [PATCH] [7.14] [ML] prevent accidentally asking for more resources
 when scaling down and improve scaling size estimations (#74691) (#74781)

* [ML] prevent accidentally asking for more resources when scaling down and improve scaling size estimations (#74691)

This commit addresses two problems:

 - Our memory estimations are not very exact. Consequently, its possible to request for too much or too little by a handful of KBs, while this is not a large issue in ESS, for custom tier sizes, it may be.
 - When scaling down, it was possible that part of the scale down was actually a scale up! This was due to some floating point rounding errors and poor estimations. Even though are estimations are better, it is best to NOT request higher resources in a scale down, no matter what.

One of the ways we improve the calculation is during JVM size calculations. Instead of having the knot point be `2gb` it has been changed to `1.2gb`. This accounts for the "window of uncertainty" for JVM sizes.

closes: #74709
---
 .../xpack/ml/integration/AutoscalingIT.java   | 27 ++++--
 .../MlAutoscalingDeciderService.java          | 56 +++++++++--
 .../ml/autoscaling/NativeMemoryCapacity.java  | 15 ++-
 .../ml/utils/NativeMemoryCalculator.java      | 51 +++++-----
 .../MlAutoscalingDeciderServiceTests.java     | 97 +++++++++++++++++--
 .../NativeMemoryCapacityTests.java            |  8 +-
 .../ml/utils/NativeMemoryCalculatorTests.java | 86 +++++++++++-----
 7 files changed, 259 insertions(+), 81 deletions(-)

diff --git a/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/AutoscalingIT.java b/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/AutoscalingIT.java
index 9f432a9b02af4..8df550debe5ad 100644
--- a/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/AutoscalingIT.java
+++ b/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/AutoscalingIT.java
@@ -26,6 +26,7 @@
 import org.elasticsearch.xpack.ml.autoscaling.MlAutoscalingDeciderService;
 import org.elasticsearch.xpack.ml.autoscaling.NativeMemoryCapacity;
 
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
 import java.util.SortedMap;
@@ -35,7 +36,7 @@
 
 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
 import static org.hamcrest.Matchers.containsString;
-import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.greaterThanOrEqualTo;
 import static org.hamcrest.Matchers.hasKey;
 
 public class AutoscalingIT extends MlNativeAutodetectIntegTestCase {
@@ -46,25 +47,37 @@ public class AutoscalingIT extends MlNativeAutodetectIntegTestCase {
 
     // This test assumes that xpack.ml.max_machine_memory_percent is 30
     // and that xpack.ml.use_auto_machine_memory_percent is false
-    public void testMLAutoscalingCapacity() {
+    public void testMLAutoscalingCapacity() throws Exception {
         SortedMap<String, Settings> deciders = new TreeMap<>();
         deciders.put(MlAutoscalingDeciderService.NAME,
             Settings.builder().put(MlAutoscalingDeciderService.DOWN_SCALE_DELAY.getKey(), TimeValue.ZERO).build());
         final PutAutoscalingPolicyAction.Request request = new PutAutoscalingPolicyAction.Request(
             "ml_test",
-            new TreeSet<>(Set.of("ml")),
+            new TreeSet<>(Arrays.asList(
+                "transform",
+                "data_frozen",
+                "master",
+                "remote_cluster_client",
+                "data",
+                "ml",
+                "data_content",
+                "data_hot",
+                "data_warm",
+                "data_cold",
+                "ingest"
+            )),
             deciders
         );
         assertAcked(client().execute(PutAutoscalingPolicyAction.INSTANCE, request).actionGet());
 
-        assertMlCapacity(
+        assertBusy(() -> assertMlCapacity(
             client().execute(
                 GetAutoscalingCapacityAction.INSTANCE,
                 new GetAutoscalingCapacityAction.Request()
             ).actionGet(),
             "Requesting scale down as tier and/or node size could be smaller",
             0L,
-            0L);
+            0L));
 
         putJob("job1", 100);
         putJob("job2", 200);
@@ -151,8 +164,8 @@ private void assertMlCapacity(GetAutoscalingCapacityAction.Response capacity, St
 
         AutoscalingDeciderResult autoscalingDeciderResult = autoscalingDeciderResults.results().get("ml");
         assertThat(autoscalingDeciderResult.reason().summary(), containsString(reason));
-        assertThat(autoscalingDeciderResult.requiredCapacity().total().memory().getBytes(), equalTo(tierBytes));
-        assertThat(autoscalingDeciderResult.requiredCapacity().node().memory().getBytes(), equalTo(nodeBytes));
+        assertThat(autoscalingDeciderResult.requiredCapacity().total().memory().getBytes(), greaterThanOrEqualTo(tierBytes - 1L));
+        assertThat(autoscalingDeciderResult.requiredCapacity().node().memory().getBytes(), greaterThanOrEqualTo(nodeBytes - 1L));
     }
 
     private void putJob(String jobId, long limitMb) {
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingDeciderService.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingDeciderService.java
index 2f0d04a3664a6..b133da32d218b 100644
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingDeciderService.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingDeciderService.java
@@ -71,6 +71,8 @@ public class MlAutoscalingDeciderService implements AutoscalingDeciderService,
     private static final Duration DEFAULT_MEMORY_REFRESH_RATE = Duration.ofMinutes(15);
     private static final String MEMORY_STALE = "unable to make scaling decision as job memory requirements are stale";
     private static final long NO_SCALE_DOWN_POSSIBLE = -1L;
+    // If ensureScaleDown changes the calculation by more than this much, log the error
+    private static final long ACCEPTABLE_DIFFERENCE = ByteSizeValue.ofMb(1).getBytes();
 
     public static final String NAME = "ml";
     public static final Setting<Integer> NUM_ANOMALY_JOBS_IN_QUEUE = Setting.intSetting("num_anomaly_jobs_in_queue", 0, 0);
@@ -359,6 +361,7 @@ public AutoscalingDeciderResult scale(Settings configuration, AutoscalingDecider
 
         final List<DiscoveryNode> nodes = getNodes(clusterState);
         final NativeMemoryCapacity currentScale = currentScale(nodes);
+
         final MlScalingReason.Builder reasonBuilder = MlScalingReason.builder()
             .setWaitingAnomalyJobs(waitingAnomalyJobs)
             .setWaitingAnalyticsJobs(waitingAnalyticsJobs)
@@ -527,9 +530,20 @@ public AutoscalingDeciderResult scale(Settings configuration, AutoscalingDecider
                     .build()));
         }
 
-        final Optional<AutoscalingDeciderResult> scaleDownDecision = checkForScaleDown(nodeLoads, largestJob, currentScale, reasonBuilder);
+        final Optional<AutoscalingDeciderResult> maybeScaleDown = checkForScaleDown(nodeLoads, largestJob, currentScale, reasonBuilder)
+            // Due to weird rounding errors, it may be that a scale down result COULD cause a scale up
+            // Ensuring the scaleDown here forces the scale down result to always be lower than the current capacity.
+            // This is safe as we know that ALL jobs are assigned at the current capacity
+            .map(result -> {
+                AutoscalingCapacity capacity = ensureScaleDown(result.requiredCapacity(), context.currentCapacity());
+                if (capacity == null) {
+                    return null;
+                }
+                return new AutoscalingDeciderResult(capacity, result.reason());
+            });
 
-        if (scaleDownDecision.isPresent()) {
+        if (maybeScaleDown.isPresent()) {
+            final AutoscalingDeciderResult scaleDownDecisionResult = maybeScaleDown.get();
             // Given maxOpenJobs, could we scale down to just one node?
             // We have no way of saying "we need X nodes"
             if (nodeLoads.size() > 1) {
@@ -546,14 +560,14 @@ public AutoscalingDeciderResult scale(Settings configuration, AutoscalingDecider
                         MAX_OPEN_JOBS_PER_NODE.getKey());
                     logger.info(() -> new ParameterizedMessage("{} Calculated potential scaled down capacity [{}] ",
                         msg,
-                        scaleDownDecision.get().requiredCapacity()));
+                        scaleDownDecisionResult.requiredCapacity()));
                     return new AutoscalingDeciderResult(context.currentCapacity(), reasonBuilder.setSimpleReason(msg).build());
                 }
             }
 
             long msLeftToScale = msLeftToDownScale(configuration);
             if (msLeftToScale <= 0) {
-                return scaleDownDecision.get();
+                return scaleDownDecisionResult;
             }
             TimeValue downScaleDelay = DOWN_SCALE_DELAY.get(configuration);
             logger.debug(() -> new ParameterizedMessage(
@@ -561,7 +575,7 @@ public AutoscalingDeciderResult scale(Settings configuration, AutoscalingDecider
                     " The last time scale down was detected [{}]. Calculated scaled down capacity [{}] ",
                 downScaleDelay.getStringRep(),
                 XContentElasticsearchExtension.DEFAULT_DATE_PRINTER.print(scaleDownDetected),
-                scaleDownDecision.get().requiredCapacity()));
+                scaleDownDecisionResult.requiredCapacity()));
             return new AutoscalingDeciderResult(
                 context.currentCapacity(),
                 reasonBuilder
@@ -586,6 +600,31 @@ public AutoscalingDeciderResult scale(Settings configuration, AutoscalingDecider
                     .build()));
     }
 
+    static AutoscalingCapacity ensureScaleDown(AutoscalingCapacity scaleDownResult, AutoscalingCapacity currentCapacity) {
+        if (currentCapacity == null || scaleDownResult == null) {
+            return null;
+        }
+        AutoscalingCapacity newCapacity = new AutoscalingCapacity(
+            new AutoscalingCapacity.AutoscalingResources(
+                currentCapacity.total().storage(),
+                ByteSizeValue.ofBytes(Math.min(scaleDownResult.total().memory().getBytes(), currentCapacity.total().memory().getBytes()))
+            ),
+            new AutoscalingCapacity.AutoscalingResources(
+                currentCapacity.node().storage(),
+                ByteSizeValue.ofBytes(Math.min(scaleDownResult.node().memory().getBytes(), currentCapacity.node().memory().getBytes()))
+            )
+        );
+        if (scaleDownResult.node().memory().getBytes() - newCapacity.node().memory().getBytes() > ACCEPTABLE_DIFFERENCE
+            || scaleDownResult.total().memory().getBytes() - newCapacity.total().memory().getBytes() > ACCEPTABLE_DIFFERENCE) {
+            logger.warn(
+                "scale down accidentally requested a scale up, auto-corrected; initial scaling [{}], corrected [{}]",
+                scaleDownResult,
+                newCapacity
+            );
+        }
+        return newCapacity;
+    }
+
     AutoscalingDeciderResult noScaleResultOrRefresh(MlScalingReason.Builder reasonBuilder,
                                                     boolean memoryTrackingStale,
                                                     AutoscalingDeciderResult potentialResult) {
@@ -842,8 +881,11 @@ Optional<AutoscalingDeciderResult> checkForScaleDown(List<NodeLoad> nodeLoads,
         // Or our largest job could be on a smaller node (meaning the same size tier but smaller nodes are possible).
         if (currentlyNecessaryTier < currentCapacity.getTier() || currentlyNecessaryNode < currentCapacity.getNode()) {
             NativeMemoryCapacity nativeMemoryCapacity = new NativeMemoryCapacity(
-                currentlyNecessaryTier,
-                currentlyNecessaryNode,
+                // Since we are in the `scaleDown` branch, we know jobs are running and we could be smaller
+                // If we have some weird rounding errors, it may be that the `currentlyNecessary` values are larger than
+                // current capacity. We never want to accidentally say "scale up" via a scale down.
+                Math.min(currentlyNecessaryTier, currentCapacity.getTier()),
+                Math.min(currentlyNecessaryNode, currentCapacity.getNode()),
                 // If our newly suggested native capacity is the same, we can use the previously stored jvm size
                 currentlyNecessaryNode == currentCapacity.getNode() ? currentCapacity.getJvmSize() : null);
             AutoscalingCapacity requiredCapacity = nativeMemoryCapacity.autoscalingCapacity(maxMachineMemoryPercent, useAuto);
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/autoscaling/NativeMemoryCapacity.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/autoscaling/NativeMemoryCapacity.java
index 96ac77546e8bb..f79e6e709b17a 100644
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/autoscaling/NativeMemoryCapacity.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/autoscaling/NativeMemoryCapacity.java
@@ -12,6 +12,9 @@
 import org.elasticsearch.xpack.ml.utils.NativeMemoryCalculator;
 
 import java.util.Objects;
+import java.util.Optional;
+
+import static org.elasticsearch.xpack.ml.utils.NativeMemoryCalculator.dynamicallyCalculateJvmSizeFromNativeMemorySize;
 
 // Used for storing native memory capacity and then transforming it into an autoscaling capacity
 // which takes into account the whole node size
@@ -49,7 +52,11 @@ NativeMemoryCapacity merge(NativeMemoryCapacity nativeMemoryCapacity) {
         return this;
     }
 
-    AutoscalingCapacity autoscalingCapacity(int maxMemoryPercent, boolean useAuto) {
+    public AutoscalingCapacity autoscalingCapacity(int maxMemoryPercent, boolean useAuto) {
+        // We calculate the JVM size here first to ensure it stays the same given the rest of the calculations
+        final Long jvmSize = useAuto ?
+            Optional.ofNullable(this.jvmSize).orElse(dynamicallyCalculateJvmSizeFromNativeMemorySize(node)) :
+            null;
         // We first need to calculate the actual node size given the current native memory size.
         // This way we can accurately determine the required node size AND what the overall memory percentage will be
         long actualNodeSize = NativeMemoryCalculator.calculateApproxNecessaryNodeSize(node, jvmSize, maxMemoryPercent, useAuto);
@@ -57,14 +64,14 @@ AutoscalingCapacity autoscalingCapacity(int maxMemoryPercent, boolean useAuto) {
         // This simplifies calculating the tier as it means that each node in the tier
         // will have the same dynamic memory calculation. And thus the tier is simply the sum of the memory necessary
         // times that scaling factor.
-        int memoryPercentForMl = (int)Math.floor(NativeMemoryCalculator.modelMemoryPercent(
+        double memoryPercentForMl = NativeMemoryCalculator.modelMemoryPercent(
             actualNodeSize,
             jvmSize,
             maxMemoryPercent,
             useAuto
-        ));
+        );
         double inverseScale = memoryPercentForMl <= 0 ? 0 : 100.0 / memoryPercentForMl;
-        long actualTier = (long)Math.ceil(tier * inverseScale);
+        long actualTier = Math.round(tier * inverseScale);
         return new AutoscalingCapacity(
             // Tier should always be AT LEAST the largest node size.
             // This Math.max catches any strange rounding errors or weird input.
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/utils/NativeMemoryCalculator.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/utils/NativeMemoryCalculator.java
index b4e32c16a15a9..e4183e981a5c3 100644
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/utils/NativeMemoryCalculator.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/utils/NativeMemoryCalculator.java
@@ -23,6 +23,7 @@
 
 public final class NativeMemoryCalculator {
 
+    private static final long STATIC_JVM_UPPER_THRESHOLD = ByteSizeValue.ofGb(2).getBytes();
     static final long MINIMUM_AUTOMATIC_NODE_SIZE = ByteSizeValue.ofGb(1).getBytes();
     private static final long OS_OVERHEAD = ByteSizeValue.ofMb(200L).getBytes();
 
@@ -80,15 +81,11 @@ public static long calculateApproxNecessaryNodeSize(long nativeMachineMemory, Lo
         if (useAuto) {
             // TODO utilize official ergonomic JVM size calculations when available.
             jvmSize = jvmSize == null ? dynamicallyCalculateJvmSizeFromNativeMemorySize(nativeMachineMemory) : jvmSize;
-            // We use a Math.floor here to ensure we have AT LEAST enough memory given rounding.
-            int modelMemoryPercent = (int)Math.floor(modelMemoryPercent(
-                nativeMachineMemory + jvmSize + OS_OVERHEAD,
-                jvmSize,
-                maxMemoryPercent,
-                true));
-            // We calculate the inverse percentage of `nativeMachineMemory + OS_OVERHEAD` as `OS_OVERHEAD` is always present
-            // on the native memory side and we need to account for it when we invert the model memory percentage
-            return Math.max((long)Math.ceil((100.0/modelMemoryPercent) * (nativeMachineMemory + OS_OVERHEAD)), MINIMUM_AUTOMATIC_NODE_SIZE);
+            // We haven't reached our 90% threshold, so, simply summing up the values is adequate
+            if ((jvmSize + OS_OVERHEAD)/(double)nativeMachineMemory > 0.1) {
+                return Math.max(nativeMachineMemory + jvmSize + OS_OVERHEAD, MINIMUM_AUTOMATIC_NODE_SIZE);
+            }
+            return Math.round((nativeMachineMemory/0.9));
         }
         return (long) ((100.0/maxMemoryPercent) * nativeMachineMemory);
     }
@@ -118,18 +115,11 @@ public static double modelMemoryPercent(long machineMemory, Long jvmSize, int ma
         return maxMemoryPercent;
     }
 
-    public static int modelMemoryPercent(long machineMemory, int maxMemoryPercent, boolean useAuto) {
-        return (int)Math.ceil(modelMemoryPercent(machineMemory,
-            null,
-            maxMemoryPercent,
-            useAuto));
-    }
-
-    private static long allowedBytesForMl(long machineMemory, Long jvmSize, int maxMemoryPercent, boolean useAuto) {
+    static long allowedBytesForMl(long machineMemory, Long jvmSize, int maxMemoryPercent, boolean useAuto) {
         if (useAuto && jvmSize != null) {
             // It is conceivable that there is a machine smaller than 200MB.
             // If the administrator wants to use the auto configuration, the node should be larger.
-            if (machineMemory - jvmSize < OS_OVERHEAD || machineMemory == 0) {
+            if (machineMemory - jvmSize <= OS_OVERHEAD || machineMemory == 0) {
                 return machineMemory / 100;
             }
             // This calculation is dynamic and designed to maximally take advantage of the underlying machine for machine learning
@@ -139,8 +129,8 @@ private static long allowedBytesForMl(long machineMemory, Long jvmSize, int maxM
             // 2GB node -> 66%
             // 16GB node -> 87%
             // 64GB node -> 90%
-            long memoryPercent = Math.min(90, (int)Math.ceil(((machineMemory - jvmSize - OS_OVERHEAD) / (double)machineMemory) * 100.0D));
-            return (long)(machineMemory * (memoryPercent / 100.0));
+            double memoryProportion = Math.min(0.90, (machineMemory - jvmSize - OS_OVERHEAD) / (double)machineMemory);
+            return Math.round(machineMemory * memoryProportion);
         }
 
         return (long)(machineMemory * (maxMemoryPercent / 100.0));
@@ -154,17 +144,20 @@ public static long allowedBytesForMl(long machineMemory, int maxMemoryPercent, b
     }
 
     // TODO replace with official ergonomic calculation
-    private static long dynamicallyCalculateJvmSizeFromNodeSize(long nodeSize) {
-        if (nodeSize < ByteSizeValue.ofGb(2).getBytes()) {
-            return (long) (nodeSize * 0.40);
+    public static long dynamicallyCalculateJvmSizeFromNodeSize(long nodeSize) {
+        // While the original idea here was to predicate on 2Gb, it has been found that the knot points of
+        // 2GB and 8GB cause weird issues where the JVM size will "jump the gap" from one to the other when
+        // considering true tier sizes in elastic cloud.
+        if (nodeSize < ByteSizeValue.ofMb(1280).getBytes()) {
+            return (long)(nodeSize * 0.40);
         }
         if (nodeSize < ByteSizeValue.ofGb(8).getBytes()) {
-            return (long) (nodeSize * 0.25);
+            return (long)(nodeSize * 0.25);
         }
-        return ByteSizeValue.ofGb(2).getBytes();
+        return STATIC_JVM_UPPER_THRESHOLD;
     }
 
-    private static long dynamicallyCalculateJvmSizeFromNativeMemorySize(long nativeMachineMemory) {
+    public static long dynamicallyCalculateJvmSizeFromNativeMemorySize(long nativeMachineMemory) {
         // See dynamicallyCalculateJvm the following JVM calculations are arithmetic inverses of JVM calculation
         //
         // Example: For < 2GB node, the JVM is 0.4 * total_node_size. This means, the rest is 0.6 the node size.
@@ -172,12 +165,12 @@ private static long dynamicallyCalculateJvmSizeFromNativeMemorySize(long nativeM
         // Consequently jvmSize = (nativeAndOverHead / 0.6)*0.4 = nativeAndOverHead * 2/3
         long nativeAndOverhead = nativeMachineMemory + OS_OVERHEAD;
         if (nativeAndOverhead < (ByteSizeValue.ofGb(2).getBytes() * 0.60)) {
-            return (long) Math.ceil(nativeAndOverhead * (2.0 / 3.0));
+            return Math.round((nativeAndOverhead / 0.6) * 0.4);
         }
         if (nativeAndOverhead < (ByteSizeValue.ofGb(8).getBytes() * 0.75)) {
-            return (long) Math.ceil(nativeAndOverhead / 3.0);
+            return Math.round((nativeAndOverhead / 0.75) * 0.25);
         }
-        return ByteSizeValue.ofGb(2).getBytes();
+        return STATIC_JVM_UPPER_THRESHOLD;
     }
 
 }
diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingDeciderServiceTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingDeciderServiceTests.java
index a543900812fa6..5ff66cdf35416 100644
--- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingDeciderServiceTests.java
+++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingDeciderServiceTests.java
@@ -153,8 +153,19 @@ public void testScaleUp_withWaitingJobsAndAutoMemoryAndNoRoomInNodes() {
                 NativeMemoryCapacity.ZERO,
                 reasonBuilder);
             assertTrue(decision.isPresent());
-            assertThat(decision.get().requiredCapacity().node().memory().getBytes(), equalTo(3512729601L));
-            assertThat(decision.get().requiredCapacity().total().memory().getBytes(), equalTo(9382475687L));
+            AutoscalingDeciderResult result = decision.get();
+            long allowedBytesForMlNode = NativeMemoryCalculator.allowedBytesForMl(
+                result.requiredCapacity().node().memory().getBytes(),
+                30,
+                true
+            );
+            long allowedBytesForMlTier = NativeMemoryCalculator.allowedBytesForMl(
+                result.requiredCapacity().total().memory().getBytes(),
+                30,
+                true
+            );
+            assertThat(allowedBytesForMlNode, greaterThanOrEqualTo(ByteSizeValue.ofGb(2).getBytes() + OVERHEAD));
+            assertThat(allowedBytesForMlTier, greaterThanOrEqualTo(ByteSizeValue.ofGb(2).getBytes() * 3 + OVERHEAD));
         }
         { // we allow one job in the analytics queue
             Optional<AutoscalingDeciderResult> decision = service.checkForScaleUp(0, 1,
@@ -165,8 +176,19 @@ public void testScaleUp_withWaitingJobsAndAutoMemoryAndNoRoomInNodes() {
                 NativeMemoryCapacity.ZERO,
                 reasonBuilder);
             assertTrue(decision.isPresent());
-            assertThat(decision.get().requiredCapacity().node().memory().getBytes(), equalTo(3512729601L));
-            assertThat(decision.get().requiredCapacity().total().memory().getBytes(), equalTo(6270180545L));
+            AutoscalingDeciderResult result = decision.get();
+            long allowedBytesForMlNode = NativeMemoryCalculator.allowedBytesForMl(
+                result.requiredCapacity().node().memory().getBytes(),
+                30,
+                true
+            );
+            long allowedBytesForMlTier = NativeMemoryCalculator.allowedBytesForMl(
+                result.requiredCapacity().total().memory().getBytes(),
+                30,
+                true
+            );
+            assertThat(allowedBytesForMlNode, greaterThanOrEqualTo(ByteSizeValue.ofGb(2).getBytes() + OVERHEAD));
+            assertThat(allowedBytesForMlTier, greaterThanOrEqualTo(ByteSizeValue.ofGb(2).getBytes() * 2 + OVERHEAD));
         }
         { // we allow one job in the anomaly queue and analytics queue
             Optional<AutoscalingDeciderResult> decision = service.checkForScaleUp(1, 1,
@@ -177,8 +199,19 @@ public void testScaleUp_withWaitingJobsAndAutoMemoryAndNoRoomInNodes() {
                 NativeMemoryCapacity.ZERO,
                 reasonBuilder);
             assertTrue(decision.isPresent());
-            assertThat(decision.get().requiredCapacity().node().memory().getBytes(), equalTo(3512729601L));
-            assertThat(decision.get().requiredCapacity().total().memory().getBytes(), equalTo(3512729601L));
+            AutoscalingDeciderResult result = decision.get();
+            long allowedBytesForMlNode = NativeMemoryCalculator.allowedBytesForMl(
+                result.requiredCapacity().node().memory().getBytes(),
+                30,
+                true
+            );
+            long allowedBytesForMlTier = NativeMemoryCalculator.allowedBytesForMl(
+                result.requiredCapacity().total().memory().getBytes(),
+                30,
+                true
+            );
+            assertThat(allowedBytesForMlNode, greaterThanOrEqualTo(ByteSizeValue.ofGb(2).getBytes() + OVERHEAD));
+            assertThat(allowedBytesForMlTier, greaterThanOrEqualTo(ByteSizeValue.ofGb(2).getBytes() + OVERHEAD));
         }
     }
 
@@ -362,7 +395,7 @@ public void testScaleDown() {
         {// Current capacity allows for smaller tier
             Optional<AutoscalingDeciderResult> result = service.checkForScaleDown(nodeLoads,
                 ByteSizeValue.ofMb(100).getBytes(),
-                new NativeMemoryCapacity(ByteSizeValue.ofGb(4).getBytes(), ByteSizeValue.ofMb(100).getBytes()),
+                new NativeMemoryCapacity(ByteSizeValue.ofGb(4).getBytes(), ByteSizeValue.ofGb(1).getBytes()),
                 reasonBuilder);
             assertThat(result.isPresent(), is(true));
             AutoscalingDeciderResult autoscalingDeciderResult = result.get();
@@ -380,6 +413,56 @@ public void testScaleDown() {
         }
     }
 
+    public void testEnsureScaleDown() {
+        assertThat(
+            MlAutoscalingDeciderService.ensureScaleDown(
+                new AutoscalingCapacity(
+                    new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(8)),
+                    new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(1))
+                ),
+                new AutoscalingCapacity(
+                    new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(4)),
+                    new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(2))
+                )
+            ), equalTo(new AutoscalingCapacity(
+                new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(4)),
+                new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(1))
+            ))
+        );
+
+        assertThat(
+            MlAutoscalingDeciderService.ensureScaleDown(
+                new AutoscalingCapacity(
+                    new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(8)),
+                    new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(3))
+                ),
+                new AutoscalingCapacity(
+                    new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(4)),
+                    new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(2))
+                )
+            ), equalTo(new AutoscalingCapacity(
+                new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(4)),
+                new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(2))
+            ))
+        );
+
+        assertThat(
+            MlAutoscalingDeciderService.ensureScaleDown(
+                new AutoscalingCapacity(
+                    new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(4)),
+                    new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(3))
+                ),
+                new AutoscalingCapacity(
+                    new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(3)),
+                    new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(2))
+                )
+            ), equalTo(new AutoscalingCapacity(
+                new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(3)),
+                new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(2))
+            ))
+        );
+    }
+
     public void testFutureAvailableCapacity() {
         nodeLoadDetector = new NodeLoadDetector(mlMemoryTracker);
         MlAutoscalingDeciderService service = buildService();
diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/autoscaling/NativeMemoryCapacityTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/autoscaling/NativeMemoryCapacityTests.java
index 2d483b950e3f8..63c72ae414531 100644
--- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/autoscaling/NativeMemoryCapacityTests.java
+++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/autoscaling/NativeMemoryCapacityTests.java
@@ -59,8 +59,8 @@ public void testAutoscalingCapacity() {
         }
         { // auto is true
             AutoscalingCapacity autoscalingCapacity = capacity.autoscalingCapacity(25, true);
-            assertThat(autoscalingCapacity.node().memory().getBytes(), equalTo(1604321280L));
-            assertThat(autoscalingCapacity.total().memory().getBytes(), equalTo(5174659393L));
+            assertThat(autoscalingCapacity.node().memory().getBytes(), equalTo(1335885824L));
+            assertThat(autoscalingCapacity.total().memory().getBytes(), equalTo(5343543296L));
         }
         { // auto is true with unknown jvm size
             capacity = new NativeMemoryCapacity(
@@ -68,8 +68,8 @@ public void testAutoscalingCapacity() {
                 ByteSizeValue.ofGb(1).getBytes()
             );
             AutoscalingCapacity autoscalingCapacity = capacity.autoscalingCapacity(25, true);
-            assertThat(autoscalingCapacity.node().memory().getBytes(), equalTo(2566914048L));
-            assertThat(autoscalingCapacity.total().memory().getBytes(), equalTo(6507526207L));
+            assertThat(autoscalingCapacity.node().memory().getBytes(), equalTo(2139095040L));
+            assertThat(autoscalingCapacity.total().memory().getBytes(), equalTo(8556380160L));
         }
     }
 
diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/utils/NativeMemoryCalculatorTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/utils/NativeMemoryCalculatorTests.java
index 3163a05dadbf5..aa17cffc58199 100644
--- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/utils/NativeMemoryCalculatorTests.java
+++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/utils/NativeMemoryCalculatorTests.java
@@ -15,9 +15,14 @@
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.common.util.set.Sets;
+import org.elasticsearch.core.Tuple;
 import org.elasticsearch.test.ESTestCase;
+import org.elasticsearch.xpack.autoscaling.capacity.AutoscalingCapacity;
+import org.elasticsearch.xpack.ml.autoscaling.NativeMemoryCapacity;
 
+import java.util.Arrays;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 import java.util.OptionalLong;
 import java.util.function.BiConsumer;
@@ -27,6 +32,7 @@
 import static org.elasticsearch.xpack.ml.MachineLearning.MAX_MACHINE_MEMORY_PERCENT;
 import static org.elasticsearch.xpack.ml.MachineLearning.USE_AUTO_MACHINE_MEMORY_PERCENT;
 import static org.elasticsearch.xpack.ml.utils.NativeMemoryCalculator.MINIMUM_AUTOMATIC_NODE_SIZE;
+import static org.elasticsearch.xpack.ml.utils.NativeMemoryCalculator.dynamicallyCalculateJvmSizeFromNodeSize;
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.greaterThanOrEqualTo;
 
@@ -49,6 +55,54 @@ public void testAllowedBytesForMLWhenAutoIsFalse() {
         }
     }
 
+    public void testConsistencyInAutoCalculation() {
+        for (Tuple<Long, Long> nodeAndJvmSize : Arrays.asList(
+            Tuple.tuple(1073741824L, 432013312L), // 1GB and true JVM size
+            Tuple.tuple(2147483648L, 536870912L), // 2GB ...
+            Tuple.tuple(4294967296L, 1073741824L), // 4GB ...
+            Tuple.tuple(8589934592L, 2147483648L), // 8GB ...
+            Tuple.tuple(17179869184L, 2147483648L), // 16GB ...
+            Tuple.tuple(34359738368L, 2147483648L), // 32GB ...
+            Tuple.tuple(68719476736L, 2147483648L), // 64GB ...
+            Tuple.tuple(16106127360L, 2147483648L), // 15GB ...
+            Tuple.tuple(32212254720L, 2147483648L), // 30GB ...
+            Tuple.tuple(64424509440L, 2147483648L) // 60GB ...
+        )) {
+            final long trueJvmSize = nodeAndJvmSize.v2();
+            final long trueNodeSize = nodeAndJvmSize.v1();
+            List<Long> nodeSizes = Arrays.asList(
+                trueNodeSize + ByteSizeValue.ofMb(10).getBytes(),
+                trueNodeSize - ByteSizeValue.ofMb(10).getBytes(),
+                trueNodeSize
+            );
+            for (long nodeSize : nodeSizes) {
+                // Simulate having a true size that already exists from the node vs. us dynamically calculating it
+                long jvmSize = randomBoolean() ? dynamicallyCalculateJvmSizeFromNodeSize(nodeSize) : trueJvmSize;
+                DiscoveryNode node = newNode(jvmSize, nodeSize);
+                Settings settings = newSettings(30, true);
+                ClusterSettings clusterSettings = newClusterSettings(30, true);
+
+                long bytesForML = randomBoolean() ?
+                    NativeMemoryCalculator.allowedBytesForMl(node, settings).getAsLong() :
+                    NativeMemoryCalculator.allowedBytesForMl(node, clusterSettings).getAsLong();
+
+                NativeMemoryCapacity nativeMemoryCapacity = new NativeMemoryCapacity(
+                    bytesForML,
+                    bytesForML,
+                    jvmSize
+                );
+
+                AutoscalingCapacity capacity = nativeMemoryCapacity.autoscalingCapacity(30, true);
+                // We don't allow node sizes below 1GB, so we will always be at least that large
+                // Also, allow 1 byte off for weird rounding issues
+                assertThat(capacity.node().memory().getBytes(), greaterThanOrEqualTo(
+                    Math.max(nodeSize, ByteSizeValue.ofGb(1).getBytes()) - 1L));
+                assertThat(capacity.total().memory().getBytes(), greaterThanOrEqualTo(
+                    Math.max(nodeSize, ByteSizeValue.ofGb(1).getBytes()) - 1L));
+            }
+        }
+    }
+
     public void testAllowedBytesForMlWhenAutoIsTrue() {
         for (int i = 0; i < NUM_TEST_RUNS; i++) {
             long nodeSize = randomLongBetween(ByteSizeValue.ofMb(500).getBytes(), ByteSizeValue.ofGb(64).getBytes());
@@ -58,10 +112,10 @@ public void testAllowedBytesForMlWhenAutoIsTrue() {
             Settings settings = newSettings(percent, true);
             ClusterSettings clusterSettings = newClusterSettings(percent, true);
 
-            int truePercent = Math.min(
+            double truePercent = Math.min(
                 90,
-                (int)Math.ceil(((nodeSize - jvmSize - ByteSizeValue.ofMb(200).getBytes()) / (double)nodeSize) * 100.0D));
-            long expected = (long)(nodeSize * (truePercent / 100.0));
+                ((nodeSize - jvmSize - ByteSizeValue.ofMb(200).getBytes()) / (double)nodeSize) * 100.0D);
+            long expected = Math.round(nodeSize * (truePercent / 100.0));
 
             assertThat(NativeMemoryCalculator.allowedBytesForMl(node, settings).getAsLong(), equalTo(expected));
             assertThat(NativeMemoryCalculator.allowedBytesForMl(node, clusterSettings).getAsLong(), equalTo(expected));
@@ -69,20 +123,6 @@ public void testAllowedBytesForMlWhenAutoIsTrue() {
         }
     }
 
-    public void testAllowedBytesForMlWhenAutoIsTrueButJVMSizeIsUnknown() {
-        long nodeSize = randomLongBetween(ByteSizeValue.ofMb(500).getBytes(), ByteSizeValue.ofGb(64).getBytes());
-        int percent = randomIntBetween(5, 200);
-        DiscoveryNode node = newNode(null, nodeSize);
-        Settings settings = newSettings(percent, true);
-        ClusterSettings clusterSettings = newClusterSettings(percent, true);
-
-        long expected = (long)(nodeSize * (percent / 100.0));
-
-        assertThat(NativeMemoryCalculator.allowedBytesForMl(node, settings).getAsLong(), equalTo(expected));
-        assertThat(NativeMemoryCalculator.allowedBytesForMl(node, clusterSettings).getAsLong(), equalTo(expected));
-        assertThat(NativeMemoryCalculator.allowedBytesForMl(node, percent, false).getAsLong(), equalTo(expected));
-    }
-
     public void testAllowedBytesForMlWhenBothJVMAndNodeSizeAreUnknown() {
         int percent = randomIntBetween(5, 200);
         DiscoveryNode node = newNode(null, null);
@@ -110,7 +150,6 @@ public void testTinyNode() {
     }
 
     public void testActualNodeSizeCalculationConsistency() {
-
         final TriConsumer<Long, Integer, Long> consistentAutoAssertions = (nativeMemory, memoryPercentage, delta) -> {
             long autoNodeSize = NativeMemoryCalculator.calculateApproxNecessaryNodeSize(nativeMemory, null, memoryPercentage, true);
             // It should always be greater than the minimum supported node size
@@ -119,12 +158,13 @@ public void testActualNodeSizeCalculationConsistency() {
                 greaterThanOrEqualTo(MINIMUM_AUTOMATIC_NODE_SIZE));
             // Our approximate real node size should always return a usable native memory size that is at least the original native memory
             // size. Rounding errors may cause it to be non-exact.
+            long allowedBytesForMl = NativeMemoryCalculator.allowedBytesForMl(autoNodeSize, memoryPercentage, true);
             assertThat("native memory ["
-                    + NativeMemoryCalculator.allowedBytesForMl(autoNodeSize, memoryPercentage, true)
+                    + allowedBytesForMl
                     + "] smaller than original native memory ["
                     + nativeMemory
                     + "]",
-                NativeMemoryCalculator.allowedBytesForMl(autoNodeSize, memoryPercentage, true),
+                allowedBytesForMl,
                 greaterThanOrEqualTo(nativeMemory - delta));
         };
 
@@ -155,18 +195,18 @@ public void testActualNodeSizeCalculationConsistency() {
             int memoryPercentage = randomIntBetween(5, 200);
             { // tiny memory
                 long nodeMemory = randomLongBetween(ByteSizeValue.ofKb(100).getBytes(), ByteSizeValue.ofMb(500).getBytes());
-                consistentAutoAssertions.apply(nodeMemory, memoryPercentage, 0L);
+                consistentAutoAssertions.apply(nodeMemory, memoryPercentage, 1L);
                 consistentManualAssertions.accept(nodeMemory, memoryPercentage);
             }
             { // normal-ish memory
                 long nodeMemory = randomLongBetween(ByteSizeValue.ofMb(500).getBytes(), ByteSizeValue.ofGb(4).getBytes());
                 // periodically, the calculated assertions end up being about 6% off, allowing this small delta to account for flakiness
-                consistentAutoAssertions.apply(nodeMemory, memoryPercentage, (long) (0.06 * nodeMemory));
+                consistentAutoAssertions.apply(nodeMemory, memoryPercentage, 1L);
                 consistentManualAssertions.accept(nodeMemory, memoryPercentage);
             }
             { // huge memory
                 long nodeMemory = randomLongBetween(ByteSizeValue.ofGb(30).getBytes(), ByteSizeValue.ofGb(60).getBytes());
-                consistentAutoAssertions.apply(nodeMemory, memoryPercentage, 0L);
+                consistentAutoAssertions.apply(nodeMemory, memoryPercentage, 1L);
                 consistentManualAssertions.accept(nodeMemory, memoryPercentage);
             }
         }