From d63b1cca83141ec61f00d0dfbe07d6aaf9629bba Mon Sep 17 00:00:00 2001
From: Jonas Kunz
Date: Tue, 23 Apr 2024 13:41:34 +0200
Subject: [PATCH] Prevent cycles in inferred spans (#3588)
---
CHANGELOG.asciidoc | 4 ++
.../agent/impl/transaction/TraceContext.java | 17 ++++-
.../elastic/apm/agent/profiler/CallTree.java | 56 ++++++++++------
.../elastic/apm/agent/profiler/ChildList.java | 64 +++++++++++++++++++
.../apm/agent/profiler/SamplingProfiler.java | 2 +-
.../agent/profiler/CallTreeSpanifyTest.java | 59 ++++++++++++++++-
.../apm/agent/profiler/CallTreeTest.java | 2 +-
7 files changed, 180 insertions(+), 24 deletions(-)
create mode 100644 apm-agent-plugins/apm-profiling-plugin/src/main/java/co/elastic/apm/agent/profiler/ChildList.java
diff --git a/CHANGELOG.asciidoc b/CHANGELOG.asciidoc
index 44b9c2f8e5..c04a3f9a2c 100644
--- a/CHANGELOG.asciidoc
+++ b/CHANGELOG.asciidoc
@@ -31,6 +31,10 @@ Use subheadings with the "=====" level for adding notes for unreleased changes:
=== Unreleased
+[float]
+===== Bug fixes
+* Fixed edge case where inferred spans could cause cycles in the trace parent-child relationships, subsequently resulting in the UI crashing - {pull}3588[#3588]
+
[[release-notes-1.x]]
=== Java Agent version 1.x
diff --git a/apm-agent-core/src/main/java/co/elastic/apm/agent/impl/transaction/TraceContext.java b/apm-agent-core/src/main/java/co/elastic/apm/agent/impl/transaction/TraceContext.java
index f8854468f0..cc3df527cb 100644
--- a/apm-agent-core/src/main/java/co/elastic/apm/agent/impl/transaction/TraceContext.java
+++ b/apm-agent-core/src/main/java/co/elastic/apm/agent/impl/transaction/TraceContext.java
@@ -64,7 +64,7 @@ public class TraceContext implements Recyclable, co.elastic.apm.agent.tracer.Tra
public static final String ELASTIC_TRACE_PARENT_TEXTUAL_HEADER_NAME = "elastic-apm-traceparent";
public static final String W3C_TRACE_PARENT_TEXTUAL_HEADER_NAME = "traceparent";
public static final String TRACESTATE_HEADER_NAME = "tracestate";
- public static final int SERIALIZED_LENGTH = 42;
+ public static final int SERIALIZED_LENGTH = 51;
private static final int TEXT_HEADER_EXPECTED_LENGTH = 55;
private static final int TEXT_HEADER_TRACE_ID_OFFSET = 3;
private static final int TEXT_HEADER_PARENT_ID_OFFSET = 36;
@@ -227,6 +227,7 @@ private TraceContext(ElasticApmTracer tracer, Id id) {
*
* Note: the {@link #traceId} will still be 128 bit
*
+ *
* @param tracer a valid tracer
*/
public static TraceContext with64BitId(ElasticApmTracer tracer) {
@@ -621,7 +622,7 @@ public boolean equals(Object o) {
if (o == null || getClass() != o.getClass()) return false;
TraceContext that = (TraceContext) o;
return id.equals(that.id) &&
- traceId.equals(that.traceId);
+ traceId.equals(that.traceId);
}
public boolean idEquals(@Nullable TraceContext o) {
@@ -650,6 +651,8 @@ public void serialize(byte[] buffer) {
offset = traceId.toBytes(buffer, offset);
offset = id.toBytes(buffer, offset);
offset = transactionId.toBytes(buffer, offset);
+ buffer[offset++] = parentId.isEmpty() ? (byte) 0 : (byte) 1;
+ offset = parentId.toBytes(buffer, offset);
buffer[offset++] = flags;
buffer[offset++] = (byte) (discardable ? 1 : 0);
ByteUtils.putLong(buffer, offset, clock.getOffset());
@@ -660,6 +663,12 @@ public void deserialize(byte[] buffer, @Nullable String serviceName, @Nullable S
offset += traceId.fromBytes(buffer, offset);
offset += id.fromBytes(buffer, offset);
offset += transactionId.fromBytes(buffer, offset);
+ if (buffer[offset++] != 0) {
+ offset += parentId.fromBytes(buffer, offset);
+ } else {
+ parentId.resetState();
+ offset += 8;
+ }
flags = buffer[offset++];
discardable = buffer[offset++] == (byte) 1;
clock.init(ByteUtils.getLong(buffer, offset));
@@ -672,6 +681,10 @@ public static long getSpanId(byte[] serializedTraceContext) {
return ByteUtils.getLong(serializedTraceContext, 16);
}
+ public static long getParentId(byte[] serializedTraceContext) {
+ return ByteUtils.getLong(serializedTraceContext, 33);
+ }
+
public boolean traceIdAndIdEquals(byte[] serialized) {
return id.dataEquals(serialized, traceId.getLength()) && traceId.dataEquals(serialized, 0);
}
diff --git a/apm-agent-plugins/apm-profiling-plugin/src/main/java/co/elastic/apm/agent/profiler/CallTree.java b/apm-agent-plugins/apm-profiling-plugin/src/main/java/co/elastic/apm/agent/profiler/CallTree.java
index 551a74b8be..3042fee406 100644
--- a/apm-agent-plugins/apm-profiling-plugin/src/main/java/co/elastic/apm/agent/profiler/CallTree.java
+++ b/apm-agent-plugins/apm-profiling-plugin/src/main/java/co/elastic/apm/agent/profiler/CallTree.java
@@ -79,9 +79,9 @@ public class CallTree implements Recyclable {
* @see co.elastic.apm.agent.impl.transaction.AbstractSpan#childIds
*/
@Nullable
- private LongList childIds;
+ private ChildList childIds;
@Nullable
- private LongList maybeChildIds;
+ private ChildList maybeChildIds;
public CallTree() {
}
@@ -375,20 +375,21 @@ private void toString(Appendable out, int level) throws IOException {
}
}
- int spanify(CallTree.Root root, TraceContext parentContext) {
+ int spanify(CallTree.Root root, TraceContext parentContext, TraceContext nonInferredParentContext) {
int createdSpans = 0;
if (activeContextOfDirectParent != null) {
parentContext = activeContextOfDirectParent;
+ nonInferredParentContext = activeContextOfDirectParent;
}
Span span = null;
if (!isPillar() || isLeaf()) {
createdSpans++;
- span = asSpan(root, parentContext);
+ span = asSpan(root, parentContext, nonInferredParentContext);
this.isSpan = true;
}
List children = getChildren();
for (int i = 0, size = children.size(); i < size; i++) {
- createdSpans += children.get(i).spanify(root, span != null ? span.getTraceContext() : parentContext);
+ createdSpans += children.get(i).spanify(root, span != null ? span.getTraceContext() : parentContext, nonInferredParentContext);
}
if (span != null) {
span.end(span.getTimestamp() + getDurationUs());
@@ -396,7 +397,7 @@ int spanify(CallTree.Root root, TraceContext parentContext) {
return createdSpans;
}
- protected Span asSpan(Root root, TraceContext parentContext) {
+ protected Span asSpan(Root root, TraceContext parentContext, TraceContext nonInferredParentContext) {
transferMaybeChildIdsToChildIds();
Span span = parentContext.createSpan(root.getEpochMicros(this.start))
.withType("app")
@@ -410,7 +411,21 @@ protected Span asSpan(Root root, TraceContext parentContext) {
}
span.appendToName("#");
span.appendToName(frame.getMethodName());
- span.withChildIds(childIds);
+
+ LongList childSpanIds = null;
+ if (childIds != null) {
+ long expectedParent = nonInferredParentContext.getId().readLong(0);
+ childSpanIds = new LongList(childIds.getSize());
+ for (int i = 0; i < childIds.getSize(); i++) {
+ // to avoid cycles, we only insert child-ids if the parent of the child is also
+ // the parent of the stack of inferred spans inserted
+ if (childIds.getParentId(i) == expectedParent) {
+ childSpanIds.add(childIds.getId(i));
+ }
+ }
+ }
+
+ span.withChildIds(childSpanIds);
// we're not interested in the very bottom of the stack which contains things like accepting and handling connections
if (!root.rootContext.idEquals(parentContext)) {
@@ -498,19 +513,20 @@ public void resetState() {
*
*
* @param id the child span id to add to this call tree element
+ * @param parentId the parent id of the span represented by the id parameter
*/
- public void addMaybeChildId(long id) {
+ public void addMaybeChildId(long id, long parentId) {
if (maybeChildIds == null) {
- maybeChildIds = new LongList();
+ maybeChildIds = new ChildList();
}
- maybeChildIds.add(id);
+ maybeChildIds.add(id, parentId);
}
- public void addChildId(long id) {
+ public void addChildId(long id, long parentId) {
if (childIds == null) {
- childIds = new LongList();
+ childIds = new ChildList();
}
- childIds.add(id);
+ childIds.add(id, parentId);
}
public boolean hasChildIds() {
@@ -541,7 +557,11 @@ void giveChildIdsTo(CallTree giveTo) {
void giveLastChildIdTo(CallTree giveTo) {
if (childIds != null && !childIds.isEmpty()) {
- giveTo.addChildId(childIds.remove(childIds.getSize() - 1));
+ int size = childIds.getSize();
+ long id = childIds.getId(size - 1);
+ long parentId = childIds.getParentId(size - 1);
+ giveTo.addChildId(id, parentId);
+ childIds.removeLast();
}
}
@@ -619,7 +639,7 @@ public void onActivation(byte[] active, long timestamp) {
long spanId = TraceContext.getSpanId(active);
activeSet.add(spanId);
if (!isNestedActivation(topOfStack)) {
- topOfStack.addMaybeChildId(spanId);
+ topOfStack.addMaybeChildId(spanId, TraceContext.getParentId(active));
}
}
}
@@ -628,12 +648,12 @@ private boolean isNestedActivation(CallTree topOfStack) {
return isAnyActive(topOfStack.childIds) || isAnyActive(topOfStack.maybeChildIds);
}
- private boolean isAnyActive(@Nullable LongList spanIds) {
+ private boolean isAnyActive(@Nullable ChildList spanIds) {
if (spanIds == null) {
return false;
}
for (int i = 0, size = spanIds.getSize(); i < size; i++) {
- if (activeSet.contains(spanIds.get(i))) {
+ if (activeSet.contains(spanIds.getId(i))) {
return true;
}
}
@@ -719,7 +739,7 @@ public int spanify() {
int createdSpans = 0;
List callTrees = getChildren();
for (int i = 0, size = callTrees.size(); i < size; i++) {
- createdSpans += callTrees.get(i).spanify(this, rootContext);
+ createdSpans += callTrees.get(i).spanify(this, rootContext, rootContext);
}
return createdSpans;
}
diff --git a/apm-agent-plugins/apm-profiling-plugin/src/main/java/co/elastic/apm/agent/profiler/ChildList.java b/apm-agent-plugins/apm-profiling-plugin/src/main/java/co/elastic/apm/agent/profiler/ChildList.java
new file mode 100644
index 0000000000..c4b079f9f8
--- /dev/null
+++ b/apm-agent-plugins/apm-profiling-plugin/src/main/java/co/elastic/apm/agent/profiler/ChildList.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to Elasticsearch B.V. under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package co.elastic.apm.agent.profiler;
+
+
+import co.elastic.apm.agent.sdk.internal.collections.LongList;
+
+/** List for maintaining pairs of (spanId,parentIds) both represented as longs. */
+public class ChildList {
+
+ // this list contains the (spanId,parentIds) flattened
+ private final LongList idsWithParentIds = new LongList();
+
+ public void add(long id, long parentId) {
+ idsWithParentIds.add(id);
+ idsWithParentIds.add(parentId);
+ }
+
+ public long getId(int index) {
+ return idsWithParentIds.get(index * 2);
+ }
+
+ public long getParentId(int index) {
+ return idsWithParentIds.get(index * 2 + 1);
+ }
+
+ public int getSize() {
+ return idsWithParentIds.getSize() / 2;
+ }
+
+ public void addAll(ChildList other) {
+ idsWithParentIds.addAll(other.idsWithParentIds);
+ }
+
+ public void clear() {
+ idsWithParentIds.clear();
+ }
+
+ public boolean isEmpty() {
+ return getSize() == 0;
+ }
+
+ public void removeLast() {
+ int size = idsWithParentIds.getSize();
+ idsWithParentIds.remove(size - 1);
+ idsWithParentIds.remove(size - 2);
+ }
+}
diff --git a/apm-agent-plugins/apm-profiling-plugin/src/main/java/co/elastic/apm/agent/profiler/SamplingProfiler.java b/apm-agent-plugins/apm-profiling-plugin/src/main/java/co/elastic/apm/agent/profiler/SamplingProfiler.java
index 0c8cb3f674..860a0896de 100644
--- a/apm-agent-plugins/apm-profiling-plugin/src/main/java/co/elastic/apm/agent/profiler/SamplingProfiler.java
+++ b/apm-agent-plugins/apm-profiling-plugin/src/main/java/co/elastic/apm/agent/profiler/SamplingProfiler.java
@@ -106,7 +106,7 @@
* and at least one stack trace.
* Once {@linkplain ActivationEvent#handleDeactivationEvent(SamplingProfiler) handling the deactivation event} of the root span in a thread
* (after which {@link ElasticApmTracer#getActive()} would return {@code null}),
- * the {@link CallTree} is {@linkplain CallTree#spanify(CallTree.Root, TraceContext) converted into regular spans}.
+ * the {@link CallTree} is {@linkplain CallTree#spanify(CallTree.Root, TraceContext, TraceContext) converted into regular spans}.
*
*
* Overall, the allocation rate does not depend on the number of {@link ActivationEvent}s but only on
diff --git a/apm-agent-plugins/apm-profiling-plugin/src/test/java/co/elastic/apm/agent/profiler/CallTreeSpanifyTest.java b/apm-agent-plugins/apm-profiling-plugin/src/test/java/co/elastic/apm/agent/profiler/CallTreeSpanifyTest.java
index aea204c257..738712e825 100644
--- a/apm-agent-plugins/apm-profiling-plugin/src/test/java/co/elastic/apm/agent/profiler/CallTreeSpanifyTest.java
+++ b/apm-agent-plugins/apm-profiling-plugin/src/test/java/co/elastic/apm/agent/profiler/CallTreeSpanifyTest.java
@@ -35,6 +35,7 @@
import org.stagemonitor.configuration.ConfigurationRegistry;
import java.io.IOException;
+import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.concurrent.TimeUnit;
@@ -141,8 +142,62 @@ void testCallTreeWithActiveSpan() {
root.spanify();
assertThat(reporter.getSpans()).hasSize(2);
- assertThat(reporter.getSpans().get(1).getTraceContext().isChildOf(spanContext));
- assertThat(reporter.getSpans().get(0).getTraceContext().isChildOf(rootContext));
+ assertThat(reporter.getSpans().get(0).getTraceContext().isChildOf(spanContext)).isTrue();
+ assertThat(reporter.getSpans().get(1).getTraceContext().isChildOf(rootContext)).isTrue();
+ }
+
+
+ @Test
+ void testSpanWithInvertedActivation() {
+ TraceContext rootContext = CallTreeTest.rootTraceContext(tracer);
+
+ TraceContext childSpanContext = TraceContext.with64BitId(tracer);
+ TraceContext.fromParentContext().asChildOf(childSpanContext, rootContext);
+
+ NoopObjectPool rootPool = NoopObjectPool.ofRecyclable(() -> new CallTree.Root(tracer));
+ NoopObjectPool childPool = NoopObjectPool.ofRecyclable(CallTree::new);
+
+ CallTree.Root root = CallTree.createRoot(rootPool, childSpanContext.serialize(), rootContext.getServiceName(), rootContext.getServiceVersion(), 0);
+ root.addStackTrace(tracer, Collections.singletonList(StackFrame.of("A", "a")), 10_000, childPool, 0);
+
+ root.onActivation(rootContext.serialize(), 20_000);
+ root.onDeactivation(rootContext.serialize(), childSpanContext.serialize(), 30_000);
+
+ root.addStackTrace(tracer, Collections.singletonList(StackFrame.of("A", "a")), 40_000, childPool, 0);
+ root.end(childPool, 0);
+
+ root.spanify();
+
+ assertThat(reporter.getSpans()).hasSize(1);
+ assertThat(reporter.getSpans().get(0).getTraceContext().isChildOf(childSpanContext)).isTrue();
+ // the inferred span should not have any span links because this
+ // span link would cause a cycle in the trace
+ assertThat(reporter.getSpans().get(0).getChildIds().getSize()).isEqualTo(0L);
+ }
+
+ @Test
+ void testSpanWithNestedActivation() {
+ TraceContext rootContext = CallTreeTest.rootTraceContext(tracer);
+
+ NoopObjectPool rootPool = NoopObjectPool.ofRecyclable(() -> new CallTree.Root(tracer));
+ NoopObjectPool childPool = NoopObjectPool.ofRecyclable(CallTree::new);
+
+ CallTree.Root root = CallTree.createRoot(rootPool, rootContext.serialize(), rootContext.getServiceName(), rootContext.getServiceVersion(), 0);
+ root.addStackTrace(tracer, Collections.singletonList(StackFrame.of("A", "a")), 10_000, childPool, 0);
+
+ root.onActivation(rootContext.serialize(), 20_000);
+ root.onDeactivation(rootContext.serialize(), rootContext.serialize(), 30_000);
+
+ root.addStackTrace(tracer, Collections.singletonList(StackFrame.of("A", "a")), 40_000, childPool, 0);
+ root.end(childPool, 0);
+
+ root.spanify();
+
+ assertThat(reporter.getSpans()).hasSize(1);
+ assertThat(reporter.getSpans().get(0).getTraceContext().isChildOf(rootContext)).isTrue();
+ // the inferred span should not have any span links because this
+ // span link would cause a cycle in the trace
+ assertThat(reporter.getSpans().get(0).getChildIds().getSize()).isEqualTo(0L);
}
}
diff --git a/apm-agent-plugins/apm-profiling-plugin/src/test/java/co/elastic/apm/agent/profiler/CallTreeTest.java b/apm-agent-plugins/apm-profiling-plugin/src/test/java/co/elastic/apm/agent/profiler/CallTreeTest.java
index b18d86ecdd..ae64d3387a 100644
--- a/apm-agent-plugins/apm-profiling-plugin/src/test/java/co/elastic/apm/agent/profiler/CallTreeTest.java
+++ b/apm-agent-plugins/apm-profiling-plugin/src/test/java/co/elastic/apm/agent/profiler/CallTreeTest.java
@@ -120,7 +120,7 @@ void testCallTree() {
@Test
void testGiveEmptyChildIdsTo() {
CallTree rich = new CallTree();
- rich.addChildId(42);
+ rich.addChildId(42, 0L);
CallTree robinHood = new CallTree();
CallTree poor = new CallTree();