Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix agent connection #1678

Merged
merged 21 commits into from
Dec 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions gradle/libs.versions.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ orgSpringframework = "5.3.39"
# @pin 2.7.18 is the latest release which runs on Java 8, this version marks the en of open source support for Sb 2.x
orgSpringframeworkBoot = "2.7.18"
orgTestcontainers = "1.20.3"
# @pin resilience4j 2.* uses Java 17 APIs
ioGithubResilience4j = "1.7.1"

[libraries]
# @pin 1.3.* are the latest versions, which support java 8
Expand All @@ -42,8 +44,8 @@ commonsBeanutils = "commons-beanutils:commons-beanutils:1.9.4"
commonsIo = "commons-io:commons-io:2.17.0"
ioGithubNetmikeyLogunitLogunitCore = { module = "io.github.netmikey.logunit:logunit-core", version.ref = "ioGithubNetmikeyLogunit" }
ioGithubNetmikeyLogunitLogunitLogback = { module = "io.github.netmikey.logunit:logunit-logback", version.ref = "ioGithubNetmikeyLogunit" }
# @pin resilience4j 2.* uses Java 17 APIs
ioGithubResilience4jResilience4jRetry = "io.github.resilience4j:resilience4j-retry:1.7.1"
ioGithubResilience4jResilience4jRetry = { module = "io.github.resilience4j:resilience4j-retry", version.ref = "ioGithubResilience4j" }
ioGithubResilience4jResilience4jTimelimiter = { module = "io.github.resilience4j:resilience4j-timelimiter", version.ref = "ioGithubResilience4j" }
ioGrpcGrpcNettyShaded = { module = "io.grpc:grpc-netty-shaded", version.ref = "ioGrpc" }
ioGrpcGrpcStub = { module = "io.grpc:grpc-stub", version.ref = "ioGrpc" }
# The following dependency is required for the OC-exporter to work correctly and must be matched against the grpc version
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import lombok.NoArgsConstructor;
import rocks.inspectit.ocelot.config.model.config.RetrySettings;

import javax.validation.Valid;
import javax.validation.constraints.AssertFalse;
import java.net.URL;
import java.time.Duration;

Expand Down Expand Up @@ -32,16 +34,41 @@ public class AgentCommandSettings {
private String agentCommandPath;

/**
* The timeout duration used for requests when the agent is in discovery mode. Defining how long the agent will wait for
* The timeout duration used to establish the connection with the remote host in discovery mode.
*/
private Duration liveConnectionTimeout;

/**
* The timeout duration the client will wait to acquire a connection from the connection pool in discovery mode.
*/
private Duration liveConnectionRequestTimeout;

/**
* The timeout duration used for requests when the agent is in discovery mode. Defining how long the agent will wait for
* new commands.
*/
private Duration liveSocketTimeout;

/**
* The timeout duration used to establish the connection with the remote host in normal mode.
*/
private Duration connectionTimeout;

/**
* The timeout duration the client will wait to acquire a connection from the connection pool in normal mode.
*/
private Duration connectionRequestTimeout;

/**
* The timeout duration used for requests when the agent is in normal mode.
*/
private Duration socketTimeout;

/**
* The TTL - the time to keep an HTTP connection alive
*/
private Duration timeToLive;

/**
* The used interval for polling commands.
*/
Expand All @@ -55,5 +82,21 @@ public class AgentCommandSettings {
/**
* Settings how retries are handled regarding fetching an agent command.
*/
@Valid
private RetrySettings retry;

@AssertFalse(message = "The specified time values should not be negative!")
public boolean isNegativeTimeout() {
boolean negativeLiveConnectionTimeout = liveConnectionTimeout != null && liveConnectionTimeout.isNegative();
boolean negativeConnectionTimeout = connectionTimeout != null && connectionTimeout.isNegative();
boolean negativeLiveConnectionRequestTimeout = liveConnectionRequestTimeout != null && liveConnectionRequestTimeout.isNegative();
boolean negativeConnectionRequestTimeout = connectionRequestTimeout != null && connectionRequestTimeout.isNegative();
boolean negativeLiveSocketTimeout = liveSocketTimeout != null && liveSocketTimeout.isNegative();
boolean negativeSocketTimeout = socketTimeout != null && socketTimeout.isNegative();
boolean negativeTTL = timeToLive != null && timeToLive.isNegative();
return negativeLiveConnectionTimeout || negativeConnectionTimeout ||
negativeLiveConnectionRequestTimeout || negativeConnectionRequestTimeout ||
negativeLiveSocketTimeout || negativeSocketTimeout
|| negativeTTL;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
public class HttpConfigSettings {

/**
* Whether a HTTP property source should be used.
* Whether an HTTP property source should be used.
*/
private boolean enabled;

Expand Down Expand Up @@ -53,21 +53,34 @@ public class HttpConfigSettings {
*/
private Duration connectionTimeout;

/**
* The connection-request timeout to use - the time the client will wait to acquire a connection from the connection pool
*/
private Duration connectionRequestTimeout;

/**
* The socket timeout to use - the time waiting for data after establishing the connection; maximum time of inactivity between two data packets.
*/
private Duration socketTimeout;

/**
* The TTL - the time to keep an HTTP connection alive
*/
private Duration timeToLive;

/**
* Settings how retries are handled regarding fetching an HTTP property source.
*/
@Valid
private RetrySettings retry;

@AssertFalse(message = "The specified timeout values should not be negative!")
@AssertFalse(message = "The specified time values should not be negative!")
public boolean isNegativeTimeout() {
boolean negativeConnectionTimeout = connectionTimeout != null && connectionTimeout.isNegative();
boolean negativeConnectionRequestTimeout = connectionRequestTimeout != null && connectionRequestTimeout.isNegative();
boolean negativeReadTimeout = socketTimeout != null && socketTimeout.isNegative();
return negativeConnectionTimeout || negativeReadTimeout;
boolean negativeTTL = timeToLive != null && timeToLive.isNegative();
return negativeConnectionTimeout || negativeConnectionRequestTimeout || negativeReadTimeout ||
negativeTTL;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,10 @@ public class RetrySettings {
@NotNull
// We use a BigDecimal as there is no support for double in hibernate validator
private BigDecimal randomizationFactor;

/**
* The maximum amount of time one retry is allowed to take. May not be lower than 1
*/
@DurationMin(millis = 0, inclusive = false)
private Duration timeLimit;
}
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ inspectit:
include-service-name: true

# defines how many threads inspectIT may start for its internal tasks
thread-pool-size: 2
thread-pool-size: 4

# settings for the agent commands and communication
agent-commands:
Expand All @@ -136,7 +136,7 @@ inspectit:
socket-timeout: 5s
# the used interval for polling commands
polling-interval: 15s
# how long the agent will staying in the live mode, before falling back to the normal mode
# how long the agent will stay in the live mode, before falling back to the normal mode
live-mode-duration: 2m
retry:
# true if retries are enabled, false otherwise
Expand All @@ -149,6 +149,8 @@ inspectit:
multiplier: 2
# This factor introduces randomness to what the actual wait interval will be. This prevents that a lot of agents will issue requests towards the configuration server at the same time.
randomization-factor: 0.1
# The maximum duration one retry may take
time-limit: 32m

log-preloading:
enabled: false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,12 @@ inspectit:
# true if retries are enabled, false otherwise
enabled: true
# The maximum number of attempts to try to fetch the configuration
max-attempts: 7
max-attempts: 6
# The initial interval to wait after the first failed attempt.
initial-interval: 30s
# For each retry the last interval to wait is multiplied with this number to calculate the next interval to wait
multiplier: 2
# This factor introduces randomness to what the actual wait interval will be. This prevents that a lot of agents will issue requests towards the configuration server at the same time.
randomization-factor: 0.1
# The maximum duration one retry may take
time-limit: 32m
1 change: 1 addition & 0 deletions inspectit-ocelot-core/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ dependencies {
libs.piccolo,
libs.comFasterxmlJacksonCoreJacksonDatabind,
libs.ioGithubResilience4jResilience4jRetry,
libs.ioGithubResilience4jResilience4jTimelimiter,

libs.orgJavassist,

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.annotations.VisibleForTesting;
import io.github.resilience4j.retry.Retry;
import io.github.resilience4j.timelimiter.TimeLimiter;
import lombok.extern.slf4j.Slf4j;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
Expand All @@ -17,6 +18,9 @@

import java.io.IOException;
import java.io.InputStream;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

/**
* Component which handles the fetching of new agent commands and execution of it.
Expand Down Expand Up @@ -53,10 +57,15 @@ public class CommandHandler {
*/
private boolean liveMode = false;

/**
* Executor to cancel one command fetch after a time limit was exceeded.
*/
private final ExecutorService timeLimitExecutor = Executors.newCachedThreadPool();

/**
* Tries fetching and executing a new agent command from the server.
*/
public void nextCommand() {
public void nextCommand() throws Exception {
nextCommand(null);
}

Expand All @@ -67,7 +76,7 @@ public void nextCommand() {
*
* @param payload a {@link CommandResponse} to send with the next request
*/
private void nextCommand(CommandResponse payload) {
private void nextCommand(CommandResponse payload) throws Exception {
CommandResponse commandResponse = payload;

do {
Expand Down Expand Up @@ -101,10 +110,23 @@ private boolean isLiveModeExpired() {
return System.currentTimeMillis() >= liveModeStart + settings.getLiveModeDuration().toMillis();
}

private Command getCommandWithRetry(CommandResponse commandResponse) {
private Command getCommandWithRetry(CommandResponse commandResponse) throws Exception {
Retry retry = buildRetry();
if (retry != null) {
return retry.executeSupplier(() -> getCommand(commandResponse));
log.debug("Using Retries...");
Callable<Command> getCommand;

TimeLimiter timeLimiter = buildTimeLimiter();
if(timeLimiter != null) {
log.debug("Using TimeLimiter...");
// Use time limiter for every function call
getCommand = timeLimiter.decorateFutureSupplier(() -> timeLimitExecutor.submit(() -> getCommand(commandResponse)));
}
else getCommand = () -> getCommand(commandResponse);

Command command = retry.executeCallable(getCommand);
return command;

} else {
return getCommand(commandResponse);
}
Expand All @@ -116,6 +138,12 @@ private Retry buildRetry() {
.getRetry(), "agent-commands");
}

private TimeLimiter buildTimeLimiter() {
return RetryUtils.buildTimeLimiter(environment.getCurrentConfig()
.getAgentCommands()
.getRetry(), "agent-commands");
}

/**
* Fetches a command and processes the response.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,15 @@
import lombok.extern.slf4j.Slf4j;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.HttpClientBuilder;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import rocks.inspectit.ocelot.commons.models.command.Command;
import rocks.inspectit.ocelot.commons.models.command.CommandResponse;
import rocks.inspectit.ocelot.config.model.command.AgentCommandSettings;
import rocks.inspectit.ocelot.core.command.http.CommandHttpClientHolder;
import rocks.inspectit.ocelot.core.config.InspectitEnvironment;

import java.io.IOException;
Expand Down Expand Up @@ -43,50 +42,16 @@ public class HttpCommandFetcher {
private static final String META_HEADER_PREFIX = "X-OCELOT-";

/**
* Http client used in the normal mode.
* The holder of the HTTP clients for agent commands.
*/
private HttpClient normalHttpClient;

/**
* Http client used in the live mode (longer timeouts).
*/
private HttpClient liveHttpClient;
private final CommandHttpClientHolder clientHolder = new CommandHttpClientHolder();

/**
* The URI for fetching commands.
*/
@Setter
private URI commandUri;

/**
* Returns the {@link HttpClient} which is used for fetching commands.
*
* @return A new {@link HttpClient} instance.
*/
private HttpClient getHttpClient(boolean liveClient) {
if (normalHttpClient == null || liveHttpClient == null) {
updateHttpClients();
}

return liveClient ? liveHttpClient : normalHttpClient;
}

/**
* Updating the http clients.
*/
private void updateHttpClients() {
AgentCommandSettings settings = environment.getCurrentConfig().getAgentCommands();
int timeout = (int) settings.getSocketTimeout().toMillis();
int liveTimeout = (int) settings.getLiveSocketTimeout().toMillis();

RequestConfig normalConfig = RequestConfig.custom().setSocketTimeout(timeout).build();

RequestConfig liveConfig = RequestConfig.custom().setSocketTimeout(liveTimeout).build();

normalHttpClient = HttpClientBuilder.create().setDefaultRequestConfig(normalConfig).build();
liveHttpClient = HttpClientBuilder.create().setDefaultRequestConfig(liveConfig).build();
}

/**
* Fetches a {@link Command} by sending the given {@link CommandResponse} as payload and uses the given timeout-int as timeout.
*
Expand Down Expand Up @@ -148,4 +113,17 @@ private void setAgentMetaHeaders(HttpPost httpPost) {

httpPost.setHeader(META_HEADER_PREFIX + "AGENT-ID", runtime.getName());
}

/**
* Returns the {@link HttpClient} which is used for fetching commands.
*
* @param liveClient true, if live-mode is active
* @return A {@link HttpClient} instance.
*/
private HttpClient getHttpClient(boolean liveClient) throws IOException {
AgentCommandSettings currentSettings = environment.getCurrentConfig().getAgentCommands();

if(liveClient) return clientHolder.getLiveHttpClient(currentSettings);
else return clientHolder.getDiscoveryHttpClient(currentSettings);
}
}
Loading
Loading