Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add stall detection #1536

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -350,4 +350,7 @@ public interface AgentConfig extends com.newrelic.api.agent.Config, DataSenderCo
CommandParserConfig getCommandParserConfig();

InfiniteTracingConfig getInfiniteTracingConfig();

SlowTransactionsConfig getSlowTransactionsConfig();

}
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ public class AgentConfigImpl extends BaseConfig implements AgentConfig {
public static final String TRANSACTION_EVENTS = "transaction_events"; // replaces analytics_events
public static final String TRANSACTION_SEGMENTS = "transaction_segments";
public static final String TRANSACTION_TRACER = "transaction_tracer";
public static final String SLOW_TRANSACTIONS = "slow_transactions";

// defaults (alphabetized)
public static final double DEFAULT_APDEX_T = 1.0; // 1 second
Expand Down Expand Up @@ -265,6 +266,7 @@ public class AgentConfigImpl extends BaseConfig implements AgentConfig {
private final OpenTracingConfig openTracingConfig;
private final ReinstrumentConfig reinstrumentConfig;
private final TransactionTracerConfigImpl requestTransactionTracerConfig;
private final SlowTransactionsConfig slowTransactionsConfig;
private final SpanEventsConfig spanEventsConfig;
private final SqlTraceConfig sqlTraceConfig;
private final StripExceptionConfig stripExceptionConfig;
Expand Down Expand Up @@ -367,6 +369,7 @@ private AgentConfigImpl(Map<String, Object> props) {
transactionEventsConfig = initTransactionEvents();
commandParserConfig = initCommandParserConfig();
normalizationRuleConfig = new NormalizationRuleConfig(props);
slowTransactionsConfig = initSlowTransactionsConfig();

Map<String, Object> flattenedProps = new HashMap<>();
flatten("", props, flattenedProps);
Expand Down Expand Up @@ -856,6 +859,11 @@ private CommandParserConfig initCommandParserConfig() {
return new CommandParserConfigImpl(nestedProps(CommandParserConfigImpl.ROOT));
}

private SlowTransactionsConfig initSlowTransactionsConfig() {
Map<String, Object> props = nestedProps(SLOW_TRANSACTIONS);
return new SlowTransactionsConfigImpl(props);
}

@Override
public long getApdexTInMillis() {
return apdexTInMillis;
Expand Down Expand Up @@ -1067,6 +1075,11 @@ public InfiniteTracingConfig getInfiniteTracingConfig() {
return infiniteTracingConfig;
}

@Override
public SlowTransactionsConfig getSlowTransactionsConfig() {
return slowTransactionsConfig;
}

private Object findPropertyInMap(String[] property, Map<String, Object> map) {
Object result = map;
for (String component : property) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
/*
*
* * Copyright 2020 New Relic Corporation. All rights reserved.
* * SPDX-License-Identifier: Apache-2.0
*
*/

package com.newrelic.agent.config;

import com.newrelic.agent.service.slowtransactions.SlowTransactionService;

public interface SlowTransactionsConfig {

/**
* True if the {@link SlowTransactionService} is enabled, else false.
*/
boolean isEnabled();

/**
* The minimum number of milliseconds a transaction must be running to be
* reported as slow.
*/
long getThresholdMillis();

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
*
* * Copyright 2020 New Relic Corporation. All rights reserved.
* * SPDX-License-Identifier: Apache-2.0
*
*/

package com.newrelic.agent.config;

import java.util.Map;

public class SlowTransactionsConfigImpl extends BaseConfig implements SlowTransactionsConfig {

public static final String ROOT = "slow_transactions";
public static final String SYSTEM_PROPERTY_ROOT = "newrelic.config." + ROOT + ".";
public static final String ENABLED = "enabled";
public static final String THRESHOLD = "threshold";

public static final boolean DEFAULT_ENABLED = false;
public static final int DEFAULT_THRESHOLD_MILLIS = 1000;

private final boolean isEnabled;
private final int thresholdMillis;

public SlowTransactionsConfigImpl(Map<String, Object> pProps) {
super(pProps, SYSTEM_PROPERTY_ROOT);
isEnabled = getProperty(ENABLED, DEFAULT_ENABLED);
thresholdMillis = getIntProperty(THRESHOLD, DEFAULT_THRESHOLD_MILLIS);
}

@Override
public boolean isEnabled() {
return isEnabled;
}

@Override
public long getThresholdMillis() {
return thresholdMillis;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@
import com.newrelic.agent.service.module.JarCollectorServiceProcessor;
import com.newrelic.agent.service.module.JarData;
import com.newrelic.agent.service.module.TrackedAddSet;
import com.newrelic.agent.service.slowtransactions.SlowTransactionService;
import com.newrelic.agent.sql.SqlTraceService;
import com.newrelic.agent.sql.SqlTraceServiceImpl;
import com.newrelic.agent.stats.StatsEngine;
Expand Down Expand Up @@ -147,6 +148,7 @@ public class ServiceManagerImpl extends AbstractService implements ServiceManage
private volatile SpanEventsService spanEventsService;
private volatile SourceLanguageService sourceLanguageService;
private volatile ExpirationService expirationService;
private volatile SlowTransactionService slowTransactionService;

public ServiceManagerImpl(CoreService coreService, ConfigService configService) {
super(ServiceManagerImpl.class.getSimpleName());
Expand Down Expand Up @@ -231,6 +233,7 @@ protected synchronized void doStart() throws Exception {
rpmConnectionService = new RPMConnectionServiceImpl();
transactionService = new TransactionService();


InfiniteTracing infiniteTracing = buildInfiniteTracing(configService);
InfiniteTracingEnabledCheck infiniteTracingEnabledCheck = new InfiniteTracingEnabledCheck(configService);
SpanEventCreationDecider spanEventCreationDecider = new SpanEventCreationDecider(configService);
Expand Down Expand Up @@ -277,6 +280,8 @@ protected synchronized void doStart() throws Exception {
harvestService.addHarvestListener(extensionService);
harvestService.addHarvestListener(jarCollectorHarvestListener);

slowTransactionService = new SlowTransactionService(config);

asyncTxService.start();
threadService.start();
statsService.start();
Expand Down Expand Up @@ -309,6 +314,7 @@ protected synchronized void doStart() throws Exception {
circuitBreakerService.start();
distributedTraceService.start();
spanEventsService.start();
slowTransactionService.start();

startServices();

Expand Down Expand Up @@ -379,6 +385,7 @@ protected synchronized void doStop() throws Exception {
gcService.stop();
distributedTraceService.stop();
spanEventsService.stop();
slowTransactionService.stop();
stopServices();
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
package com.newrelic.agent.service.slowtransactions;

import com.newrelic.agent.ExtendedTransactionListener;
import com.newrelic.agent.HarvestListener;
import com.newrelic.agent.Transaction;
import com.newrelic.agent.TransactionData;
import com.newrelic.agent.config.AgentConfig;
import com.newrelic.agent.config.SlowTransactionsConfig;
import com.newrelic.agent.model.CustomInsightsEvent;
import com.newrelic.agent.service.AbstractService;
import com.newrelic.agent.service.ServiceFactory;
import com.newrelic.agent.service.analytics.InsightsService;
import com.newrelic.agent.stats.StatsEngine;
import com.newrelic.agent.stats.TransactionStats;
import com.newrelic.agent.tracing.DistributedTraceServiceImpl;
import com.newrelic.agent.util.StackTraces;

import javax.annotation.Nullable;
import java.lang.management.ManagementFactory;
import java.lang.management.ThreadInfo;
import java.lang.management.ThreadMXBean;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.logging.Level;

public class SlowTransactionService extends AbstractService implements ExtendedTransactionListener, HarvestListener {

private final ConcurrentHashMap<String, Transaction> openTransactions = new ConcurrentHashMap<>();
private final ThreadMXBean threadMXBean;

private final boolean isEnabled;
private final long thresholdMillis;
private final int maxStackTraceLines;

@Nullable
private InsightsService insightsService;

public SlowTransactionService(AgentConfig agentConfig) {
this(agentConfig, ManagementFactory.getThreadMXBean());
}

// Visible for testing
SlowTransactionService(AgentConfig agentConfig, ThreadMXBean threadMXBean) {
super(SlowTransactionService.class.getSimpleName());
SlowTransactionsConfig slowTransactionsConfig = agentConfig.getSlowTransactionsConfig();
this.isEnabled = slowTransactionsConfig.isEnabled();
this.thresholdMillis = slowTransactionsConfig.getThresholdMillis();
this.maxStackTraceLines = agentConfig.getMaxStackTraceLines();
this.threadMXBean = threadMXBean;
}

@Override
protected void doStart() throws Exception {
// Short circuit if disabled
if (!isEnabled) {
return;
}
ServiceFactory.getTransactionService().addTransactionListener(this);
ServiceFactory.getHarvestService().addHarvestListener(this);
insightsService = ServiceFactory.getServiceManager().getInsights();
}

@Override
protected void doStop() throws Exception {
// Short circuit if disabled
if (!isEnabled) {
return;
}
ServiceFactory.getTransactionService().removeTransactionListener(this);
ServiceFactory.getHarvestService().removeHarvestListener(this);
}

@Override
public boolean isEnabled() {
return isEnabled;
}

@Override
public void dispatcherTransactionStarted(Transaction transaction) {
if (getLogger().isLoggable(Level.FINEST)) {
getLogger().finest("Transaction started with id " + transaction.getGuid());
}
openTransactions.put(transaction.getGuid(), transaction);
}

@Override
public void dispatcherTransactionCancelled(Transaction transaction) {
if (getLogger().isLoggable(Level.FINEST)) {
getLogger().finest("Transaction cancelled with guid " + transaction.getGuid());
}
openTransactions.remove(transaction.getGuid());
}

@Override
public void dispatcherTransactionFinished(TransactionData transactionData, TransactionStats transactionStats) {
if (getLogger().isLoggable(Level.FINEST)) {
getLogger().finest("Transaction finished with guid " + transactionData.getGuid());
}
openTransactions.remove(transactionData.getGuid());
}

// Visible for testing
Map<String, Transaction> getOpenTransactions() {
return Collections.unmodifiableMap(openTransactions);
}

@Override
public void beforeHarvest(String appName, StatsEngine statsEngine) {
run();
}

@Override
public void afterHarvest(String appName) {
}

// Visible for testing
void run() {
if (getLogger().isLoggable(Level.FINE)) {
getLogger().fine("Identifying slow threads. Open transactions: " + openTransactions.size());
}
Transaction slowestOpen = null;
long slowestOpenMillis = thresholdMillis;

// Identify the slowest open transaction we haven't yet reported
for (Transaction transaction : openTransactions.values()) {
long openMs = System.currentTimeMillis() - transaction.getWallClockStartTimeMs();
if (openMs > slowestOpenMillis) {
slowestOpen = transaction;
slowestOpenMillis = openMs;
}
}

if (slowestOpen == null) {
getLogger().fine("No new slow transactions identified.");
return;
}

// Construct and record SlowTransaction event
Map<String, Object> attributes = extractMetadata(slowestOpen, slowestOpenMillis);
String guid = slowestOpen.getGuid();
if (getLogger().isLoggable(Level.FINE)) {
getLogger().fine("Slowest open transaction has guid "
+ guid + " has been open for " + slowestOpenMillis + "ms, attributes: " + attributes);
}
if (insightsService != null) {
logger.fine("Sending slow transaction");
insightsService.storeEvent(
ServiceFactory.getRPMService().getApplicationName(),
new CustomInsightsEvent(
"SlowTransaction",
System.currentTimeMillis(),
attributes,
DistributedTraceServiceImpl.nextTruncatedFloat()));
//insightsService.recordCustomEvent("SlowTransaction", attributes);
}
// Remove from openTransactions to ensure we don't report the same Transaction
// multiple times
openTransactions.remove(guid);
}

// Visible for testing
Map<String, Object> extractMetadata(Transaction transaction, long openMillis) {
Map<String, Object> attributes = new HashMap<>();

// Attributes
// Write attributes first so hardcoded attributes are prioritized
attributes.putAll(transaction.getUserAttributes());
attributes.putAll(transaction.getErrorAttributes());
attributes.putAll(transaction.getAgentAttributes());
attributes.putAll(transaction.getIntrinsicAttributes());

// General
attributes.put("guid", transaction.getGuid());
attributes.put("name", transaction.getPriorityTransactionName().getName());
attributes.put("transactionType", transaction.getPriorityTransactionName().getCategory());
attributes.put("timestamp", transaction.getWallClockStartTimeMs());
attributes.put("elapsed_ms", openMillis);

// Initiating thread info
long initiatingThreadId = transaction.getInitiatingThreadId();
attributes.put("thread.id", initiatingThreadId);
ThreadInfo threadInfo = threadMXBean.getThreadInfo(initiatingThreadId, maxStackTraceLines);
if (threadInfo != null) {
attributes.put("thread.name", threadInfo.getThreadName());
attributes.put("thread.state", threadInfo.getThreadState().name());
List<StackTraceElement> scrubbedStackTraceElements = StackTraces.scrubAndTruncate(threadInfo.getStackTrace());
attributes.put("code.stacktrace", stackTraceString(scrubbedStackTraceElements));
}

new IllegalArgumentException().printStackTrace();
return attributes;
}

private static String stackTraceString(List<StackTraceElement> stackTrace) {
if (stackTrace.isEmpty()) {
return "";
}
StringBuilder stringBuilder = new StringBuilder();
stringBuilder.append(stackTrace.get(0)).append("\n");
for (int i = 1; i < stackTrace.size(); i++) {
stringBuilder.append("\tat ").append(stackTrace.get(i)).append("\n");
}
return stringBuilder.toString();
}
}
Loading
Loading