Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ML] Fail model deployment if all allocations cannot be provided #88656

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -773,6 +773,26 @@ public void testNotifications() throws IOException {
assertSystemNotificationsContain("Rebalanced trained model allocations because [model deployment started]");
}

public void testStartDeployment_TooManyAllocations() throws IOException {
String modelId = "test_start_deployment_too_many_allocations";
createTrainedModel(modelId);
putModelDefinition(modelId);
putVocabulary(List.of("these", "are", "my", "words"), modelId);

ResponseException ex = expectThrows(
ResponseException.class,
() -> startDeployment(modelId, AllocationStatus.State.STARTED.toString(), 100, 1)
);
assertThat(ex.getResponse().getStatusLine().getStatusCode(), equalTo(429));
assertThat(
EntityUtils.toString(ex.getResponse().getEntity()),
containsString("Could not start deployment because there are not enough resources to provide all requested allocations")
);

Response response = getTrainedModelStats(modelId);
assertThat(EntityUtils.toString(response.getEntity()), not(containsString("deployment_stats")));
}

@SuppressWarnings("unchecked")
private void assertAllocationCount(String modelId, int expectedAllocationCount) throws IOException {
Response response = getTrainedModelStats(modelId);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -487,15 +487,11 @@ public boolean test(ClusterState clusterState) {
.stream()
.filter(d -> nodesShuttingDown.contains(d.getId()) == false)
.filter(TaskParams::mayAssignToNode)
.collect(Collectors.toList());
OptionalLong smallestMLNode = nodes.stream().map(NodeLoadDetector::getNodeSize).flatMapToLong(OptionalLong::stream).min();
.toList();
boolean isScalingPossible = isScalingPossible(nodes);

// No nodes allocated at all!
if (nodeIdsAndRouting.isEmpty()
// We cannot scale horizontally
&& maxLazyMLNodes <= nodes.size()
// We cannot scale vertically
&& (smallestMLNode.isEmpty() || smallestMLNode.getAsLong() >= maxMLNodeSize)) {
if (nodeIdsAndRouting.isEmpty() && isScalingPossible == false) {
String msg = "Could not start deployment because no suitable nodes were found, allocation explanation ["
+ trainedModelAssignment.getReason()
+ "]";
Expand All @@ -509,6 +505,15 @@ public boolean test(ClusterState clusterState) {
return true;
}

// We cannot add more nodes and the assignment is not satisfied
if (isScalingPossible == false
&& trainedModelAssignment.isSatisfied(nodes.stream().map(DiscoveryNode::getId).collect(Collectors.toSet())) == false) {
String msg = "Could not start deployment because there are not enough resources to provide all requested allocations";
logger.debug(() -> format("[%s] %s", modelId, msg));
exception = new ElasticsearchStatusException(msg, RestStatus.TOO_MANY_REQUESTS);
return true;
}

AllocationStatus allocationStatus = trainedModelAssignment.calculateAllocationStatus().orElse(null);
if (allocationStatus == null || allocationStatus.calculateState().compareTo(waitForState) >= 0) {
return true;
Expand All @@ -527,6 +532,16 @@ public boolean test(ClusterState clusterState) {
);
return false;
}

private boolean isScalingPossible(List<DiscoveryNode> nodes) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you should add a comment that this only considers memory, and in the future it would be nice to consider CPU too.

It means there's a discrepancy in how we handle different situations:

  • If autoscaling is enabled, a cluster is scaled to maximum size, there are 2 free CPUs, and you ask to start a deployment that needs 3 CPUs then you get told you cannot.
  • If autoscaling is enabled, a cluster is scaled to one step below its maximum size, there are no free CPUs, and you ask to start a deployment that needs 100000 CPUs then that's fine - we start it, the cluster scales to its maximum size and the deployment goes ahead but with far fewer than 100000 CPUs allocated to it.

While autoscaling doesn't understand CPU there's not a lot we can do about this, but it's worth at least adding comments to acknowledge where we are today.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have added a TODO comment.

OptionalLong smallestMLNode = nodes.stream().map(NodeLoadDetector::getNodeSize).flatMapToLong(OptionalLong::stream).min();

// We can scale horizontally
return maxLazyMLNodes > nodes.size()
// We can scale vertically
// TODO this currently only considers memory. We should also consider CPU when autoscaling by CPU is possible.
|| (smallestMLNode.isEmpty() == false && smallestMLNode.getAsLong() < maxMLNodeSize);
}
}

static Set<String> nodesShuttingDown(final ClusterState state) {
Expand Down