Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Stacktraces Taking Memory in ILM Error Step Serialization #84266

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/84266.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 84266
summary: Fix Stacktraces Taking Memory in ILM Error Step Serialization
area: ILM+SLM
type: bug
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ public void handleFailure(Exception e) {
logger.warn(new ParameterizedMessage("policy [{}] for index [{}] failed on step [{}].", policy, index, startStep.getKey()), e);
}

private ClusterState moveToErrorStep(final ClusterState state, Step.StepKey currentStepKey, Exception cause) throws IOException {
private ClusterState moveToErrorStep(final ClusterState state, Step.StepKey currentStepKey, Exception cause) {
this.failure = cause;
logger.warn(
"policy [{}] for index [{}] failed on cluster state step [{}]. Moving to ERROR step",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,10 @@
import org.elasticsearch.cluster.metadata.LifecycleExecutionState;
import org.elasticsearch.cluster.metadata.Metadata;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.license.XPackLicenseState;
import org.elasticsearch.xcontent.ToXContent;
import org.elasticsearch.xcontent.ToXContentObject;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.json.JsonXContent;
import org.elasticsearch.xpack.core.ilm.ErrorStep;
import org.elasticsearch.xpack.core.ilm.IndexLifecycleMetadata;
import org.elasticsearch.xpack.core.ilm.InitializePolicyContextStep;
Expand All @@ -38,8 +34,6 @@
import org.elasticsearch.xpack.core.ilm.Step;
import org.elasticsearch.xpack.core.ilm.TerminalPolicyStep;

import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Objects;
Expand All @@ -48,8 +42,8 @@
import java.util.function.BiFunction;
import java.util.function.LongSupplier;

import static org.elasticsearch.ElasticsearchException.REST_EXCEPTION_SKIP_STACK_TRACE;
import static org.elasticsearch.cluster.metadata.LifecycleExecutionState.ILM_CUSTOM_METADATA_KEY;
import static org.elasticsearch.xcontent.ToXContent.EMPTY_PARAMS;

/**
* The {@link IndexLifecycleTransition} class handles cluster state transitions
Expand All @@ -61,9 +55,6 @@
*/
public final class IndexLifecycleTransition {
private static final Logger logger = LogManager.getLogger(IndexLifecycleTransition.class);
private static final ToXContent.Params STACKTRACE_PARAMS = new ToXContent.MapParams(
Collections.singletonMap(REST_EXCEPTION_SKIP_STACK_TRACE, "false")
);

/**
* Validates that the given transition from {@code currentStepKey} to {@code newStepKey} can be accomplished
Expand Down Expand Up @@ -158,14 +149,10 @@ static ClusterState moveClusterStateToErrorStep(
Exception cause,
LongSupplier nowSupplier,
BiFunction<IndexMetadata, Step.StepKey, Step> stepLookupFunction
) throws IOException {
) {
IndexMetadata idxMeta = clusterState.getMetadata().index(index);
IndexLifecycleMetadata ilmMeta = clusterState.metadata().custom(IndexLifecycleMetadata.TYPE);
LifecyclePolicyMetadata policyMetadata = ilmMeta.getPolicyMetadatas().get(idxMeta.getLifecyclePolicyName());
XContentBuilder causeXContentBuilder = JsonXContent.contentBuilder();
causeXContentBuilder.startObject();
ElasticsearchException.generateThrowableXContent(causeXContentBuilder, STACKTRACE_PARAMS, cause);
causeXContentBuilder.endObject();
LifecycleExecutionState currentState = idxMeta.getLifecycleExecutionState();
Step.StepKey currentStep;
// if an error is encountered while initialising the policy the lifecycle execution state will not yet contain any step information
Expand All @@ -189,7 +176,10 @@ static ClusterState moveClusterStateToErrorStep(

LifecycleExecutionState.Builder failedState = LifecycleExecutionState.builder(nextStepState);
failedState.setFailedStep(currentStep.getName());
failedState.setStepInfo(BytesReference.bytes(causeXContentBuilder).utf8ToString());
failedState.setStepInfo(Strings.toString(((builder, params) -> {
ElasticsearchException.generateThrowableXContent(builder, EMPTY_PARAMS, cause);
return builder;
})));
Step failedStep = stepLookupFunction.apply(idxMeta, currentStep);

if (failedStep != null) {
Expand Down Expand Up @@ -444,20 +434,15 @@ public static ClusterState.Builder newClusterStateWithLifecycleState(
* @param stepInfo the new step info to update
* @return Updated cluster state with <code>stepInfo</code> if changed, otherwise the same cluster state
* if no changes to step info exist
* @throws IOException if parsing step info fails
*/
static ClusterState addStepInfoToClusterState(Index index, ClusterState clusterState, ToXContentObject stepInfo) throws IOException {
static ClusterState addStepInfoToClusterState(Index index, ClusterState clusterState, ToXContentObject stepInfo) {
IndexMetadata indexMetadata = clusterState.getMetadata().index(index);
if (indexMetadata == null) {
// This index doesn't exist anymore, we can't do anything
return clusterState;
}
LifecycleExecutionState lifecycleState = indexMetadata.getLifecycleExecutionState();
final String stepInfoString;
try (XContentBuilder infoXContentBuilder = JsonXContent.contentBuilder()) {
stepInfo.toXContent(infoXContentBuilder, ToXContent.EMPTY_PARAMS);
stepInfoString = BytesReference.bytes(infoXContentBuilder).utf8ToString();
}
final String stepInfoString = Strings.toString(stepInfo);
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Admittedly slightly unrelated but I figured I'd fix it here since I fixed the other spot.

if (stepInfoString.equals(lifecycleState.stepInfo())) {
return clusterState;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
import org.elasticsearch.index.Index;
import org.elasticsearch.xpack.core.ilm.Step;

import java.io.IOException;
import java.util.function.BiFunction;
import java.util.function.Consumer;
import java.util.function.LongSupplier;
Expand Down Expand Up @@ -56,7 +55,7 @@ public MoveToErrorStepUpdateTask(
}

@Override
public ClusterState execute(ClusterState currentState) throws IOException {
public ClusterState execute(ClusterState currentState) {
IndexMetadata idxMeta = currentState.getMetadata().index(index);
if (idxMeta == null) {
// Index must have been since deleted, ignore it
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ public void testClusterActionStepThrowsException() throws Exception {
assertThat(lifecycleState.phaseTime(), nullValue());
assertThat(lifecycleState.actionTime(), nullValue());
assertThat(lifecycleState.stepInfo(), containsString("""
{"type":"runtime_exception","reason":"error","stack_trace":\""""));
{"type":"runtime_exception","reason":"error\""""));
}

public void testClusterWaitStepThrowsException() throws Exception {
Expand All @@ -322,7 +322,7 @@ public void testClusterWaitStepThrowsException() throws Exception {
assertThat(lifecycleState.phaseTime(), nullValue());
assertThat(lifecycleState.actionTime(), nullValue());
assertThat(lifecycleState.stepInfo(), containsString("""
{"type":"runtime_exception","reason":"error","stack_trace":\""""));
{"type":"runtime_exception","reason":"error\""""));
}

private void setStateToKey(StepKey stepKey) throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ public void testMoveClusterStateToErrorStep() throws IOException {
(idxMeta, stepKey) -> new MockStep(stepKey, nextStepKey)
);
assertClusterStateOnErrorStep(clusterState, index, currentStep, newClusterState, now, """
{"type":"illegal_argument_exception","reason":"non elasticsearch-exception","stack_trace":\"""");
{"type":"illegal_argument_exception","reason":"non elasticsearch-exception\"""");
}

public void testAddStepInfoToClusterState() throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,9 @@
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.cluster.metadata.LifecycleExecutionState;
import org.elasticsearch.cluster.metadata.Metadata;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xcontent.ToXContent;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.json.JsonXContent;
import org.elasticsearch.xpack.core.ilm.ErrorStep;
import org.elasticsearch.xpack.core.ilm.IndexLifecycleMetadata;
import org.elasticsearch.xpack.core.ilm.LifecyclePolicy;
Expand Down Expand Up @@ -98,13 +94,8 @@ public void testExecuteSuccessfullyMoved() throws IOException {
assertThat(lifecycleState.actionTime(), nullValue());
assertThat(lifecycleState.stepTime(), equalTo(now));

XContentBuilder causeXContentBuilder = JsonXContent.contentBuilder();
causeXContentBuilder.startObject();
ElasticsearchException.generateThrowableXContent(causeXContentBuilder, ToXContent.EMPTY_PARAMS, cause);
causeXContentBuilder.endObject();
String expectedCauseValue = BytesReference.bytes(causeXContentBuilder).utf8ToString();
assertThat(lifecycleState.stepInfo(), containsString("""
{"type":"exception","reason":"THIS IS AN EXPECTED CAUSE","stack_trace":\""""));
{"type":"exception","reason":"THIS IS AN EXPECTED CAUSE\""""));
}

public void testExecuteNoopDifferentStep() throws IOException {
Expand Down