elastic · dimitris-athanasiou · Jul 21, 2022 · Jul 20, 2022 · Jul 21, 2022 · Jul 21, 2022
diff --git a/...de-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/PyTorchModelIT.java b/...de-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/PyTorchModelIT.java
@@ -773,6 +773,26 @@ public void testNotifications() throws IOException {
         assertSystemNotificationsContain("Rebalanced trained model allocations because [model deployment started]");
     }
 
+    public void testStartDeployment_TooManyAllocations() throws IOException {
+        String modelId = "test_start_deployment_too_many_allocations";
+        createTrainedModel(modelId);
+        putModelDefinition(modelId);
+        putVocabulary(List.of("these", "are", "my", "words"), modelId);
+
+        ResponseException ex = expectThrows(
+            ResponseException.class,
+            () -> startDeployment(modelId, AllocationStatus.State.STARTED.toString(), 100, 1)
+        );
+        assertThat(ex.getResponse().getStatusLine().getStatusCode(), equalTo(429));
+        assertThat(
+            EntityUtils.toString(ex.getResponse().getEntity()),
+            containsString("Could not start deployment because there are not enough resources to provide all requested allocations")
+        );
+
+        Response response = getTrainedModelStats(modelId);
+        assertThat(EntityUtils.toString(response.getEntity()), not(containsString("deployment_stats")));
+    }
+
     @SuppressWarnings("unchecked")
     private void assertAllocationCount(String modelId, int expectedAllocationCount) throws IOException {
         Response response = getTrainedModelStats(modelId);

diff --git a/...in/java/org/elasticsearch/xpack/ml/action/TransportStartTrainedModelDeploymentAction.java b/...in/java/org/elasticsearch/xpack/ml/action/TransportStartTrainedModelDeploymentAction.java
@@ -487,15 +487,11 @@ public boolean test(ClusterState clusterState) {
                 .stream()
                 .filter(d -> nodesShuttingDown.contains(d.getId()) == false)
                 .filter(TaskParams::mayAssignToNode)
-                .collect(Collectors.toList());
-            OptionalLong smallestMLNode = nodes.stream().map(NodeLoadDetector::getNodeSize).flatMapToLong(OptionalLong::stream).min();
+                .toList();
+            boolean isScalingPossible = isScalingPossible(nodes);
 
             // No nodes allocated at all!
-            if (nodeIdsAndRouting.isEmpty()
-                // We cannot scale horizontally
-                && maxLazyMLNodes <= nodes.size()
-                // We cannot scale vertically
-                && (smallestMLNode.isEmpty() || smallestMLNode.getAsLong() >= maxMLNodeSize)) {
+            if (nodeIdsAndRouting.isEmpty() && isScalingPossible == false) {
                 String msg = "Could not start deployment because no suitable nodes were found, allocation explanation ["
                     + trainedModelAssignment.getReason()
                     + "]";
@@ -509,6 +505,15 @@ public boolean test(ClusterState clusterState) {
                 return true;
             }
 
+            // We cannot add more nodes and the assignment is not satisfied
+            if (isScalingPossible == false
+                && trainedModelAssignment.isSatisfied(nodes.stream().map(DiscoveryNode::getId).collect(Collectors.toSet())) == false) {
+                String msg = "Could not start deployment because there are not enough resources to provide all requested allocations";
+                logger.debug(() -> format("[%s] %s", modelId, msg));
+                exception = new ElasticsearchStatusException(msg, RestStatus.TOO_MANY_REQUESTS);
+                return true;
+            }
+
             AllocationStatus allocationStatus = trainedModelAssignment.calculateAllocationStatus().orElse(null);
             if (allocationStatus == null || allocationStatus.calculateState().compareTo(waitForState) >= 0) {
                 return true;
@@ -527,6 +532,16 @@ public boolean test(ClusterState clusterState) {
             );
             return false;
         }
+
+        private boolean isScalingPossible(List<DiscoveryNode> nodes) {
+            OptionalLong smallestMLNode = nodes.stream().map(NodeLoadDetector::getNodeSize).flatMapToLong(OptionalLong::stream).min();
+
+            // We can scale horizontally
+            return maxLazyMLNodes > nodes.size()
+                // We can scale vertically
+                // TODO this currently only considers memory. We should also consider CPU when autoscaling by CPU is possible.
+                || (smallestMLNode.isEmpty() == false && smallestMLNode.getAsLong() < maxMLNodeSize);
+        }
     }
 
     static Set<String> nodesShuttingDown(final ClusterState state) {