Skip to content

Commit

Permalink
[7.x] Use query param instead of a system property for opting in for …
Browse files Browse the repository at this point in the history
…new cluster health response code (elastic#79397)

Backport elastic#79351 to 7.x:

The original change was implemented in elastic#78940, but we have decided to move from a system property
to a request parameter, so Cloud users/clients have an easier way to opt-in for the new status code.

Relates elastic#70849
  • Loading branch information
arteam committed Oct 19, 2021
1 parent 4e89777 commit 7cad75f
Show file tree
Hide file tree
Showing 14 changed files with 113 additions and 59 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,13 @@
import org.elasticsearch.common.compress.CompressedXContent;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.ByteSizeUnit;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.xcontent.XContentType;
import org.elasticsearch.common.xcontent.support.XContentMapValues;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.indices.recovery.RecoverySettings;
import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.transport.RemoteClusterService;
import org.elasticsearch.transport.SniffConnectionStrategy;
import org.elasticsearch.xcontent.XContentType;

import java.io.IOException;
import java.util.Collections;
Expand Down Expand Up @@ -317,7 +317,7 @@ public void testClusterHealthNotFoundIndex() throws IOException {
assertThat(response.getStatus(), equalTo(ClusterHealthStatus.RED));
assertNoIndices(response);
assertWarnings("The HTTP status code for a cluster health timeout will be changed from 408 to 200 in a " +
"future version. Set the [es.cluster_health.request_timeout_200] system property to [true] to suppress this message and " +
"future version. Set the [return_200_for_cluster_health_timeout] query parameter to [true] to suppress this message and " +
"opt in to the future behaviour now.");
}

Expand Down
5 changes: 5 additions & 0 deletions docs/reference/cluster/health.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,11 @@ include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=timeoutparms]
provided or better, i.e. `green` > `yellow` > `red`. By default, will not
wait for any status.

`return_200_for_cluster_health_timeout`::
(Optional, Boolean) A boolean value which controls whether to return HTTP 200
status code instead of HTTP 408 in case of a cluster health timeout from
the server side. Defaults to false.

[[cluster-health-api-response-body]]
==== {api-response-body-title}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,10 @@
"red"
],
"description":"Wait until cluster is in a specific state"
},
"return_200_for_cluster_health_timeout":{
"type":"boolean",
"description":"Whether to return HTTP 200 instead of 408 in case of a cluster health timeout from the server side"
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,25 @@
- match: { initializing_shards: 0 }
- match: { unassigned_shards: 0 }
- gte: { number_of_pending_tasks: 0 }

---
"cluster health request timeout with 200 response code":
- skip:
version: " - 7.15.99"
reason: "return_200_for_cluster_health_timeout was added in 7.16"
- do:
cluster.health:
timeout: 1ms
wait_for_active_shards: 5
return_200_for_cluster_health_timeout: true

- is_true: cluster_name
- is_true: timed_out
- gte: { number_of_nodes: 1 }
- gte: { number_of_data_nodes: 1 }
- match: { active_primary_shards: 0 }
- match: { active_shards: 0 }
- match: { relocating_shards: 0 }
- match: { initializing_shards: 0 }
- match: { unassigned_shards: 0 }
- gte: { number_of_pending_tasks: 0 }
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ public class ClusterHealthRequest extends MasterNodeReadRequest<ClusterHealthReq
private ActiveShardCount waitForActiveShards = ActiveShardCount.NONE;
private String waitForNodes = "";
private Priority waitForEvents = null;
private boolean return200ForClusterHealthTimeout;

/**
* Only used by the high-level REST Client. Controls the details level of the health information returned.
* The default value is 'cluster'.
Expand Down Expand Up @@ -69,6 +71,9 @@ public ClusterHealthRequest(StreamInput in) throws IOException {
} else {
indicesOptions = IndicesOptions.lenientExpandOpen();
}
if (in.getVersion().onOrAfter(Version.V_7_16_0)) {
return200ForClusterHealthTimeout = in.readBoolean();
}
}

@Override
Expand Down Expand Up @@ -101,6 +106,11 @@ public void writeTo(StreamOutput out) throws IOException {
if (out.getVersion().onOrAfter(Version.V_7_2_0)) {
indicesOptions.writeIndicesOptions(out);
}
if (out.getVersion().onOrAfter(Version.V_7_16_0)) {
out.writeBoolean(return200ForClusterHealthTimeout);
} else if (return200ForClusterHealthTimeout) {
throw new IllegalArgumentException("Can't fix response code in a cluster involving nodes with version " + out.getVersion());
}
}

@Override
Expand Down Expand Up @@ -244,6 +254,18 @@ public Priority waitForEvents() {
return this.waitForEvents;
}

public boolean doesReturn200ForClusterHealthTimeout() {
return return200ForClusterHealthTimeout;
}

/**
* Sets whether to return HTTP 200 status code instead of HTTP 408 in case of a
* cluster health timeout from the server side.
*/
public void return200ForClusterHealthTimeout(boolean return200ForClusterHealthTimeout) {
this.return200ForClusterHealthTimeout = return200ForClusterHealthTimeout;
}

/**
* Set the level of detail for the health information to be returned.
* Only used by the high-level REST Client.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,25 +8,26 @@

package org.elasticsearch.action.admin.cluster.health;

import org.elasticsearch.Version;
import org.elasticsearch.action.ActionResponse;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.health.ClusterHealthStatus;
import org.elasticsearch.cluster.health.ClusterIndexHealth;
import org.elasticsearch.cluster.health.ClusterStateHealth;
import org.elasticsearch.common.logging.DeprecationCategory;
import org.elasticsearch.common.logging.DeprecationLogger;
import org.elasticsearch.xcontent.ParseField;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.logging.DeprecationCategory;
import org.elasticsearch.common.logging.DeprecationLogger;
import org.elasticsearch.common.xcontent.StatusToXContentObject;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.rest.action.search.RestSearchAction;
import org.elasticsearch.xcontent.ConstructingObjectParser;
import org.elasticsearch.xcontent.ObjectParser;
import org.elasticsearch.common.xcontent.StatusToXContentObject;
import org.elasticsearch.xcontent.ParseField;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentParser;
import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.rest.action.search.RestSearchAction;

import java.io.IOException;
import java.util.HashMap;
Expand Down Expand Up @@ -102,10 +103,10 @@ public class ClusterHealthResponse extends ActionResponse implements StatusToXCo

private static final ObjectParser.NamedObjectParser<ClusterIndexHealth, Void> INDEX_PARSER =
(XContentParser parser, Void context, String index) -> ClusterIndexHealth.innerFromXContent(parser, index);
private static final String ES_CLUSTER_HEALTH_REQUEST_TIMEOUT_200_KEY = "es.cluster_health.request_timeout_200";
static final String ES_CLUSTER_HEALTH_REQUEST_TIMEOUT_200_KEY = "return_200_for_cluster_health_timeout";
static final String CLUSTER_HEALTH_REQUEST_TIMEOUT_DEPRECATION_MSG = "The HTTP status code for a cluster health timeout " +
"will be changed from 408 to 200 in a future version. Set the [" + ES_CLUSTER_HEALTH_REQUEST_TIMEOUT_200_KEY + "] " +
"system property to [true] to suppress this message and opt in to the future behaviour now.";
"query parameter to [true] to suppress this message and opt in to the future behaviour now.";

static {
// ClusterStateHealth fields
Expand Down Expand Up @@ -138,15 +139,7 @@ public class ClusterHealthResponse extends ActionResponse implements StatusToXCo
private boolean timedOut = false;
private ClusterStateHealth clusterStateHealth;
private ClusterHealthStatus clusterHealthStatus;
private boolean esClusterHealthRequestTimeout200 = readEsClusterHealthRequestTimeout200FromProperty();

public ClusterHealthResponse() {
}

/** For the testing of opting in for the 200 status code without setting a system property */
ClusterHealthResponse(boolean esClusterHealthRequestTimeout200) {
this.esClusterHealthRequestTimeout200 = esClusterHealthRequestTimeout200;
}
private boolean return200ForClusterHealthTimeout;

public ClusterHealthResponse(StreamInput in) throws IOException {
super(in);
Expand All @@ -158,22 +151,29 @@ public ClusterHealthResponse(StreamInput in) throws IOException {
numberOfInFlightFetch = in.readInt();
delayedUnassignedShards= in.readInt();
taskMaxWaitingTime = in.readTimeValue();
if (in.getVersion().onOrAfter(Version.V_7_16_0)) {
return200ForClusterHealthTimeout = in.readBoolean();
}
}

/** needed for plugins BWC */
public ClusterHealthResponse(String clusterName, String[] concreteIndices, ClusterState clusterState) {
this(clusterName, concreteIndices, clusterState, -1, -1, -1, TimeValue.timeValueHours(0));
public ClusterHealthResponse(String clusterName, String[] concreteIndices, ClusterState clusterState,
boolean return200ForServerTimeout) {
this(clusterName, concreteIndices, clusterState, -1, -1, -1, TimeValue.timeValueHours(0),
return200ForServerTimeout);
}

public ClusterHealthResponse(String clusterName, String[] concreteIndices, ClusterState clusterState, int numberOfPendingTasks,
int numberOfInFlightFetch, int delayedUnassignedShards, TimeValue taskMaxWaitingTime) {
int numberOfInFlightFetch, int delayedUnassignedShards, TimeValue taskMaxWaitingTime,
boolean return200ForServerTimeout) {
this.clusterName = clusterName;
this.numberOfPendingTasks = numberOfPendingTasks;
this.numberOfInFlightFetch = numberOfInFlightFetch;
this.delayedUnassignedShards = delayedUnassignedShards;
this.taskMaxWaitingTime = taskMaxWaitingTime;
this.clusterStateHealth = new ClusterStateHealth(clusterState, concreteIndices);
this.clusterHealthStatus = clusterStateHealth.getStatus();
this.return200ForClusterHealthTimeout = return200ForServerTimeout;
}

/**
Expand Down Expand Up @@ -305,6 +305,11 @@ public void writeTo(StreamOutput out) throws IOException {
out.writeInt(numberOfInFlightFetch);
out.writeInt(delayedUnassignedShards);
out.writeTimeValue(taskMaxWaitingTime);
if (out.getVersion().onOrAfter(Version.V_7_16_0)) {
out.writeBoolean(return200ForClusterHealthTimeout);
} else if (return200ForClusterHealthTimeout) {
throw new IllegalArgumentException("Can't fix response code in a cluster involving nodes with version " + out.getVersion());
}
}

@Override
Expand All @@ -317,7 +322,7 @@ public RestStatus status() {
if (isTimedOut() == false) {
return RestStatus.OK;
}
if (esClusterHealthRequestTimeout200) {
if (return200ForClusterHealthTimeout) {
return RestStatus.OK;
} else {
deprecationLogger.critical(DeprecationCategory.API,"cluster_health_request_timeout",
Expand Down Expand Up @@ -383,17 +388,4 @@ public int hashCode() {
return Objects.hash(clusterName, numberOfPendingTasks, numberOfInFlightFetch, delayedUnassignedShards, taskMaxWaitingTime,
timedOut, clusterStateHealth, clusterHealthStatus);
}

private static boolean readEsClusterHealthRequestTimeout200FromProperty() {
String property = System.getProperty(ES_CLUSTER_HEALTH_REQUEST_TIMEOUT_200_KEY);
if (property == null) {
return false;
}
if (Boolean.parseBoolean(property)) {
return true;
} else {
throw new IllegalArgumentException(ES_CLUSTER_HEALTH_REQUEST_TIMEOUT_200_KEY + " can only be unset or [true] but was ["
+ property + "]");
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@
import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.common.util.CollectionUtils;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.index.IndexNotFoundException;
import org.elasticsearch.node.NodeClosedException;
import org.elasticsearch.tasks.Task;
Expand Down Expand Up @@ -232,7 +232,8 @@ private enum TimeoutState {

private ClusterHealthResponse getResponse(final ClusterHealthRequest request, ClusterState clusterState,
final int waitFor, TimeoutState timeoutState) {
ClusterHealthResponse response = clusterHealth(request, clusterState, clusterService.getMasterService().numberOfPendingTasks(),
ClusterHealthResponse response = clusterHealth(request, clusterState,
clusterService.getMasterService().numberOfPendingTasks(),
allocationService.getNumberOfInFlightFetches(), clusterService.getMasterService().getMaxTaskWaitTime());
int readyCounter = prepareResponse(request, response, clusterState, indexNameExpressionResolver);
boolean valid = (readyCounter == waitFor);
Expand Down Expand Up @@ -331,8 +332,8 @@ static int prepareResponse(final ClusterHealthRequest request, final ClusterHeal
}


private ClusterHealthResponse clusterHealth(ClusterHealthRequest request, ClusterState clusterState, int numberOfPendingTasks,
int numberOfInFlightFetch, TimeValue pendingTaskTimeInQueue) {
private ClusterHealthResponse clusterHealth(ClusterHealthRequest request, ClusterState clusterState,
int numberOfPendingTasks, int numberOfInFlightFetch, TimeValue pendingTaskTimeInQueue) {
if (logger.isTraceEnabled()) {
logger.trace("Calculating health based on state version [{}]", clusterState.version());
}
Expand All @@ -344,12 +345,13 @@ private ClusterHealthResponse clusterHealth(ClusterHealthRequest request, Cluste
// one of the specified indices is not there - treat it as RED.
ClusterHealthResponse response = new ClusterHealthResponse(clusterState.getClusterName().value(), Strings.EMPTY_ARRAY,
clusterState, numberOfPendingTasks, numberOfInFlightFetch, UnassignedInfo.getNumberOfDelayedUnassigned(clusterState),
pendingTaskTimeInQueue);
pendingTaskTimeInQueue, request.doesReturn200ForClusterHealthTimeout());
response.setStatus(ClusterHealthStatus.RED);
return response;
}

return new ClusterHealthResponse(clusterState.getClusterName().value(), concreteIndices, clusterState, numberOfPendingTasks,
numberOfInFlightFetch, UnassignedInfo.getNumberOfDelayedUnassigned(clusterState), pendingTaskTimeInQueue);
return new ClusterHealthResponse(clusterState.getClusterName().value(), concreteIndices, clusterState,
numberOfPendingTasks, numberOfInFlightFetch, UnassignedInfo.getNumberOfDelayedUnassigned(clusterState), pendingTaskTimeInQueue,
request.doesReturn200ForClusterHealthTimeout());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,9 @@ public static ClusterHealthRequest fromRequest(final RestRequest request) {
if (request.param("wait_for_events") != null) {
clusterHealthRequest.waitForEvents(Priority.valueOf(request.param("wait_for_events").toUpperCase(Locale.ROOT)));
}
clusterHealthRequest.return200ForClusterHealthTimeout(request.paramAsBoolean(
"return_200_for_cluster_health_timeout",
clusterHealthRequest.doesReturn200ForClusterHealthTimeout()));
return clusterHealthRequest;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ public void testSerialize() throws Exception {
assertThat(cloneRequest.waitForEvents(), equalTo(originalRequest.waitForEvents()));
assertIndicesEquals(cloneRequest.indices(), originalRequest.indices());
assertThat(cloneRequest.indicesOptions(), equalTo(originalRequest.indicesOptions()));
assertThat(cloneRequest.doesReturn200ForClusterHealthTimeout(), equalTo(originalRequest.doesReturn200ForClusterHealthTimeout()));
}

public void testRequestReturnsHiddenIndicesByDefault() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.xcontent.ToXContent;
import org.elasticsearch.xcontent.XContentParser;
import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.test.AbstractSerializingTestCase;
import org.elasticsearch.xcontent.ToXContent;
import org.elasticsearch.xcontent.XContentParser;
import org.hamcrest.Matchers;

import java.io.IOException;
Expand All @@ -43,7 +43,7 @@ public class ClusterHealthResponsesTests extends AbstractSerializingTestCase<Clu
private final ClusterHealthRequest.Level level = randomFrom(ClusterHealthRequest.Level.values());

public void testIsTimeout() {
ClusterHealthResponse res = new ClusterHealthResponse();
ClusterHealthResponse res = new ClusterHealthResponse("", new String[]{}, ClusterState.EMPTY_STATE, false);
for (int i = 0; i < 5; i++) {
res.setTimedOut(randomBoolean());
if (res.isTimedOut()) {
Expand All @@ -56,7 +56,7 @@ public void testIsTimeout() {
}

public void testTimeoutReturns200IfOptedIn() {
ClusterHealthResponse res = new ClusterHealthResponse(true);
ClusterHealthResponse res = new ClusterHealthResponse("", new String[]{}, ClusterState.EMPTY_STATE, true);
for (int i = 0; i < 5; i++) {
res.setTimedOut(randomBoolean());
assertEquals(RestStatus.OK, res.status());
Expand All @@ -70,7 +70,7 @@ public void testClusterHealth() throws IOException {
int delayedUnassigned = randomIntBetween(0, 200);
TimeValue pendingTaskInQueueTime = TimeValue.timeValueMillis(randomIntBetween(1000, 100000));
ClusterHealthResponse clusterHealth = new ClusterHealthResponse("bla", new String[] {Metadata.ALL},
clusterState, pendingTasks, inFlight, delayedUnassigned, pendingTaskInQueueTime);
clusterState, pendingTasks, inFlight, delayedUnassigned, pendingTaskInQueueTime, false);
clusterHealth = maybeSerialize(clusterHealth);
assertClusterHealth(clusterHealth);
assertThat(clusterHealth.getNumberOfPendingTasks(), Matchers.equalTo(pendingTasks));
Expand Down
Loading

0 comments on commit 7cad75f

Please sign in to comment.