diff --git a/server/src/main/java/org/elasticsearch/indices/ShardLimitValidator.java b/server/src/main/java/org/elasticsearch/indices/ShardLimitValidator.java index 8f6a091851fa5..aa106aaeebdd5 100644 --- a/server/src/main/java/org/elasticsearch/indices/ShardLimitValidator.java +++ b/server/src/main/java/org/elasticsearch/indices/ShardLimitValidator.java @@ -192,6 +192,29 @@ private Optional checkShardLimit(int newShards, int newFrozenShards, Clu : checkShardLimit(newFrozenShards, state, shardLimitPerNodeFrozen.get(), frozenNodeCount, "frozen"); } + /** + * This method decides whether there is enough room in the cluster to add the given number of shards with the given number of replicas + * without exceeding the "cluster.max_shards_per_node.frozen" setting if the shards are going on frozen nodes or the + * "cluster.max_shards_per_node" setting if the shards are going on normal nodes. This check does not guarantee that the number of + * shards can be added, just that there is theoretically room to add them without exceeding the shards per node configuration. + * @param numberOfNewShards The number of primary shards that we want to be able to add to the cluster + * @param replicas The number of replcas of the primary shards that we want to be able to add to the cluster + * @param state The cluster state, used to get cluster settings and to get the number of open shards already in the cluster + * @param frozenNodes If true, check whether there is room to put these shards onto frozen nodes. If false, check whether there is room + * to put these shards onto normal nodes. + * @return True if there is room to add the requested number of shards to the cluster, and false if there is not + */ + public static boolean canAddShardsToCluster(int numberOfNewShards, int replicas, ClusterState state, boolean frozenNodes) { + Settings clusterSettings = state.getMetadata().settings(); + int maxShardsPerNode = frozenNodes + ? SETTING_CLUSTER_MAX_SHARDS_PER_NODE_FROZEN.get(clusterSettings) + : SETTING_CLUSTER_MAX_SHARDS_PER_NODE.get(clusterSettings); + int nodeCount = nodeCount(state, frozenNodes ? ShardLimitValidator::hasFrozen : ShardLimitValidator::hasNonFrozen); + String nodeGroup = frozenNodes ? FROZEN_GROUP : "normal"; + Optional errorMessage = checkShardLimit(numberOfNewShards * (1 + replicas), state, maxShardsPerNode, nodeCount, nodeGroup); + return errorMessage.isPresent() == false; + } + // package-private for testing static Optional checkShardLimit(int newShards, ClusterState state, int maxShardsPerNode, int nodeCount, String group) { // Only enforce the shard limit if we have at least one data node, so that we don't block diff --git a/server/src/test/java/org/elasticsearch/indices/ShardLimitValidatorTests.java b/server/src/test/java/org/elasticsearch/indices/ShardLimitValidatorTests.java index b80a3a844c807..37de0bec0e5a4 100644 --- a/server/src/test/java/org/elasticsearch/indices/ShardLimitValidatorTests.java +++ b/server/src/test/java/org/elasticsearch/indices/ShardLimitValidatorTests.java @@ -48,6 +48,7 @@ public void testOverShardLimit() { nodesInCluster, counts.getFirstIndexShards(), counts.getFirstIndexReplicas(), + counts.getShardsPerNode(), group ); @@ -76,6 +77,14 @@ public void testOverShardLimit() { + " shards open", errorMessage.get() ); + assertFalse( + ShardLimitValidator.canAddShardsToCluster( + counts.getFailingIndexShards(), + counts.getFailingIndexReplicas(), + state, + ShardLimitValidator.FROZEN_GROUP.equals(group) + ) + ); } public void testUnderShardLimit() { @@ -88,6 +97,7 @@ public void testUnderShardLimit() { nodesInCluster, counts.getFirstIndexShards(), counts.getFirstIndexReplicas(), + counts.getShardsPerNode(), group ); @@ -102,6 +112,7 @@ public void testUnderShardLimit() { ); assertFalse(errorMessage.isPresent()); + assertTrue(ShardLimitValidator.canAddShardsToCluster(shardsToAdd, 0, state, ShardLimitValidator.FROZEN_GROUP.equals(group))); } public void testValidateShardLimitOpenIndices() { @@ -189,7 +200,13 @@ private ClusterState createClusterStateForReplicaUpdate(int nodesInCluster, int return state; } - public static ClusterState createClusterForShardLimitTest(int nodesInCluster, int shardsInIndex, int replicas, String group) { + public static ClusterState createClusterForShardLimitTest( + int nodesInCluster, + int shardsInIndex, + int replicas, + int maxShardsPerNode, + String group + ) { DiscoveryNodes nodes = createDiscoveryNodes(nodesInCluster, group); Settings.Builder settings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT); @@ -202,10 +219,15 @@ public static ClusterState createClusterForShardLimitTest(int nodesInCluster, in .numberOfShards(shardsInIndex) .numberOfReplicas(replicas); Metadata.Builder metadata = Metadata.builder().put(indexMetadata); + Settings.Builder clusterSettings = Settings.builder() + .put(ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE_FROZEN.getKey(), maxShardsPerNode) + .put(ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE.getKey(), maxShardsPerNode); if (randomBoolean()) { metadata.transientSettings(Settings.EMPTY); + metadata.persistentSettings(clusterSettings.build()); } else { metadata.persistentSettings(Settings.EMPTY); + metadata.transientSettings(clusterSettings.build()); } return ClusterState.builder(ClusterName.DEFAULT).metadata(metadata).nodes(nodes).build(); diff --git a/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/ClusterDeprecationChecks.java b/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/ClusterDeprecationChecks.java index 5f8ec3e9e187e..5230221c7ae00 100644 --- a/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/ClusterDeprecationChecks.java +++ b/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/ClusterDeprecationChecks.java @@ -25,6 +25,7 @@ import org.elasticsearch.index.mapper.FieldNamesFieldMapper; import org.elasticsearch.index.mapper.GeoShapeFieldMapper; import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.indices.ShardLimitValidator; import org.elasticsearch.ingest.IngestService; import org.elasticsearch.ingest.PipelineConfiguration; import org.elasticsearch.xcontent.XContentType; @@ -990,6 +991,37 @@ static DeprecationIssue checkTransientSettingsExistence(ClusterState state) { return null; } + /** + * Upgrading can require the addition of one ore more small indices. This method checks that based on configuration we have the room + * to add a small number of additional shards to the cluster. The goal is to prevent a failure during upgrade. + * @param clusterState The cluster state, used to get settings and information about nodes + * @return A deprecation issue if there is not enough room in this cluster to add a few more shards, or null otherwise + */ + static DeprecationIssue checkShards(ClusterState clusterState) { + // Make sure we have room to add a small non-frozen index if needed + final int shardsInFutureNewSmallIndex = 5; + final int replicasForFutureIndex = 1; + if (ShardLimitValidator.canAddShardsToCluster(shardsInFutureNewSmallIndex, replicasForFutureIndex, clusterState, false)) { + return null; + } else { + final int totalShardsToAdd = shardsInFutureNewSmallIndex * (1 + replicasForFutureIndex); + return new DeprecationIssue( + DeprecationIssue.Level.WARNING, + "The cluster has too many shards to be able to upgrade", + "https://ela.st/es-deprecation-7-shard-limit", + String.format( + Locale.ROOT, + "Upgrading requires adding a small number of new shards. There is not enough room for %d more " + + "shards. Increase the cluster.max_shards_per_node setting, or remove indices " + + "to clear up resources.", + totalShardsToAdd + ), + false, + null + ); + } + } + static DeprecationIssue emptyDataTierPreferenceCheck(ClusterState clusterState) { if (DataTier.dataNodesWithoutAllDataRoles(clusterState).isEmpty() == false) { List indices = new ArrayList<>(); diff --git a/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/DeprecationChecks.java b/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/DeprecationChecks.java index 8e471b0767886..ffaf14b4c2e31 100644 --- a/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/DeprecationChecks.java +++ b/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/DeprecationChecks.java @@ -60,7 +60,8 @@ private DeprecationChecks() {} ClusterDeprecationChecks::checkGeoShapeTemplates, ClusterDeprecationChecks::checkSparseVectorTemplates, ClusterDeprecationChecks::checkILMFreezeActions, - ClusterDeprecationChecks::emptyDataTierPreferenceCheck + ClusterDeprecationChecks::emptyDataTierPreferenceCheck, + ClusterDeprecationChecks::checkShards ) ); diff --git a/x-pack/plugin/deprecation/src/test/java/org/elasticsearch/xpack/deprecation/ClusterDeprecationChecksTests.java b/x-pack/plugin/deprecation/src/test/java/org/elasticsearch/xpack/deprecation/ClusterDeprecationChecksTests.java index a55b6d3db3e82..e2d18f725016f 100644 --- a/x-pack/plugin/deprecation/src/test/java/org/elasticsearch/xpack/deprecation/ClusterDeprecationChecksTests.java +++ b/x-pack/plugin/deprecation/src/test/java/org/elasticsearch/xpack/deprecation/ClusterDeprecationChecksTests.java @@ -28,6 +28,7 @@ import org.elasticsearch.core.TimeValue; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.mapper.FieldNamesFieldMapper; +import org.elasticsearch.indices.ShardLimitValidator; import org.elasticsearch.ingest.IngestService; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xcontent.XContentBuilder; @@ -47,6 +48,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.UUID; import static java.util.Collections.singletonList; import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder; @@ -1649,6 +1651,62 @@ public void testEmptyDataTierPreference() { } } + public void testCheckShards() { + /* + * This test sets the number of allowed shards per node to 5 and creates 2 nodes. So we have room for 10 shards, which is the + * number of shards that checkShards() is making sure we can add. The first time there are no indices, so the check passes. The + * next time there is an index with one shard and one replica, leaving room for 8 shards. So the check fails. + */ + final ClusterState state = ClusterState.builder(new ClusterName(randomAlphaOfLength(5))) + .metadata( + Metadata.builder() + .persistentSettings(Settings.builder().put(ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE.getKey(), 5).build()) + .build() + ) + .nodes( + DiscoveryNodes.builder() + .add(new DiscoveryNode(UUID.randomUUID().toString(), buildNewFakeTransportAddress(), Version.CURRENT)) + .add(new DiscoveryNode(UUID.randomUUID().toString(), buildNewFakeTransportAddress(), Version.CURRENT)) + ) + .build(); + List issues = DeprecationChecks.filterChecks(CLUSTER_SETTINGS_CHECKS, c -> c.apply(state)); + assertThat(0, equalTo(issues.size())); + + final ClusterState stateWithProblems = ClusterState.builder(new ClusterName(randomAlphaOfLength(5))) + .metadata( + Metadata.builder() + .persistentSettings(Settings.builder().put(ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE.getKey(), 4).build()) + .put( + IndexMetadata.builder(randomAlphaOfLength(10)) + .settings(settings(Version.CURRENT).put(DataTier.TIER_PREFERENCE_SETTING.getKey(), " ")) + .numberOfShards(1) + .numberOfReplicas(1) + .build(), + false + ) + .build() + ) + .nodes( + DiscoveryNodes.builder() + .add(new DiscoveryNode(UUID.randomUUID().toString(), buildNewFakeTransportAddress(), Version.CURRENT)) + .add(new DiscoveryNode(UUID.randomUUID().toString(), buildNewFakeTransportAddress(), Version.CURRENT)) + ) + .build(); + + issues = DeprecationChecks.filterChecks(CLUSTER_SETTINGS_CHECKS, c -> c.apply(stateWithProblems)); + + DeprecationIssue expected = new DeprecationIssue( + DeprecationIssue.Level.WARNING, + "The cluster has too many shards to be able to upgrade", + "https://ela.st/es-deprecation-7-shard-limit", + "Upgrading requires adding a small number of new shards. There is not enough room for 10 more shards. Increase the cluster" + + ".max_shards_per_node setting, or remove indices to clear up resources.", + false, + null + ); + assertEquals(singletonList(expected), issues); + } + private static ClusterState clusterStateWithoutAllDataRoles() { DiscoveryNodes.Builder discoBuilder = DiscoveryNodes.builder(); List nodesList = org.elasticsearch.core.List.of(