Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closes #1285 - Agents regularly report status to config server #1377

Merged
merged 27 commits into from
Apr 13, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
a921ef3
draft idea of sending an agent status
Henning-Schulz Mar 16, 2022
30ac3be
merge LogMetricsAppender and LogPreloadingAppender into InternalProce…
Henning-Schulz Mar 17, 2022
b888f09
move AgentStatusChangedEvent to separate package
Henning-Schulz Mar 17, 2022
4c473e4
introduce agent status manager
Henning-Schulz Mar 18, 2022
cc06308
add configuration and trigger notification when general status drops …
Henning-Schulz Mar 18, 2022
986d563
reset the agent status when the instrumentation config changed
Henning-Schulz Mar 18, 2022
07c8d89
adjust and fix test cases
Henning-Schulz Mar 21, 2022
87e1364
categorize events into instrumentation and general
Henning-Schulz Mar 21, 2022
f7d3a8a
add AgentStatusManagerTest and fix bug
Henning-Schulz Mar 21, 2022
e5d1447
add javadoc
Henning-Schulz Mar 29, 2022
3d23b08
export status metric
Henning-Schulz Mar 25, 2022
ab07daa
rename agent status to agent health due to naming conflicts
Henning-Schulz Mar 25, 2022
9f1390a
fix test case
Henning-Schulz Mar 25, 2022
fbd8c56
handle agent health in config server
Henning-Schulz Mar 25, 2022
96a72a0
bug fixes
Henning-Schulz Mar 25, 2022
17eaf13
allow multiple different log invalidating events
Henning-Schulz Mar 29, 2022
a68d222
adjust log preloader
Henning-Schulz Mar 29, 2022
8ce32e5
fix test case
Henning-Schulz Mar 29, 2022
3e913a7
store only one log observer per type
Henning-Schulz Mar 31, 2022
1c49eef
make agent health event triggering thread-safe and cleanup log observers
Henning-Schulz Mar 31, 2022
c6cfdc6
fix event listening
Henning-Schulz Mar 31, 2022
91f8214
add documentation
Henning-Schulz Apr 1, 2022
51e93d6
rename Observer to LogEventConsumer
Henning-Schulz Apr 11, 2022
6fa34c3
add documentation as requested in review
Henning-Schulz Apr 11, 2022
58c1d3b
do not set default agent health in config server
Henning-Schulz Apr 11, 2022
40852ed
cosmetic improvements of test cases
Henning-Schulz Apr 12, 2022
91cf578
Merge branch 'master' into iss1285
Henning-Schulz Apr 13, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import rocks.inspectit.ocelot.commons.models.health.AgentHealth;

import java.util.Date;
import java.util.Map;
Expand Down Expand Up @@ -42,4 +43,9 @@ public class AgentStatus {
* The branch of which the mapping delivered to the agent originates.
*/
private String sourceBranch;

/**
* The health status of the agent.
*/
private AgentHealth health;
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
import com.google.common.annotations.VisibleForTesting;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import rocks.inspectit.ocelot.agentconfiguration.AgentConfiguration;
import rocks.inspectit.ocelot.commons.models.health.AgentHealth;
import rocks.inspectit.ocelot.config.model.InspectitServerSettings;

import javax.annotation.PostConstruct;
Expand All @@ -19,8 +21,14 @@
* This is useful for detecting which agents are active.
*/
@Component
@Slf4j
public class AgentStatusManager {

/**
* Name of the agent health header.
*/
private static final String HEADER_AGENT_HEALTH = "x-ocelot-health";

@Autowired
@VisibleForTesting
InspectitServerSettings config;
Expand Down Expand Up @@ -70,9 +78,23 @@ public void notifyAgentConfigurationFetched(Map<String, String> agentAttributes,
statusKey = agentAttributes;
}

if (headers.containsKey(HEADER_AGENT_HEALTH)) {
AgentHealth agentHealth = AgentHealth.valueOf(headers.get(HEADER_AGENT_HEALTH));
agentStatus.setHealth(agentHealth);
logHealthIfChanged(statusKey, agentHealth);
}

attributesToAgentStatusCache.put(statusKey, agentStatus);
}

private void logHealthIfChanged(Object statusKey, AgentHealth agentHealth) {
AgentStatus lastStatus = attributesToAgentStatusCache.getIfPresent(statusKey);

if (lastStatus == null || lastStatus.getHealth() != agentHealth) {
log.info("Health of agent {} changed to {}.", statusKey, agentHealth);
}
}

/**
* @return a collection of all agent statuses since {@link #reset()} was called.
*/
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
package rocks.inspectit.ocelot.commons.models.health;

import ch.qos.logback.classic.Level;
import lombok.RequiredArgsConstructor;

/**
* Represents the health status of an individual agent.
*/
@RequiredArgsConstructor
public enum AgentHealth {

OK("the agent is working properly"),

WARNING("the agent has warning messages"),

ERROR("the agent has encountered errors");

private final String description;

/**
* Decides whether this health status is more severe or equal to the passed one.
* {@code null} is always considered less severe.
*
* @param other The health to compare with
*
* @return {@code true} if both health status are equal or this is more severe than other; {@code false} otherwise.
*/
public boolean isMoreSevereOrEqualTo(AgentHealth other) {
return other != null ? compareTo(other) >= 0 : true;
}

/**
* Compares multiple health status and returns the one that is most severe.
*
* @param status The array of health status to compare (may contain {@code null})
*
* @return That health of the passed ones that is more severe. {@code null} if none is passed.
*/
public static AgentHealth mostSevere(AgentHealth... status) {
AgentHealth max = null;

for (AgentHealth curr : status) {
if (curr != null && curr.isMoreSevereOrEqualTo(max)) {
max = curr;
}
}

return max;
}

/**
* Determines the agent health based on the level of a log event that occurred (e.g., WARN level corresponds with WARNING).
*
* @param logLevel The log level that occurred
*
* @return The agent health that corresponds to the log level.
*/
public static AgentHealth fromLogLevel(Level logLevel) {
if (logLevel.isGreaterOrEqual(Level.ERROR)) {
return ERROR;
} else if (logLevel.isGreaterOrEqual(Level.WARN)) {
return WARNING;
} else {
return OK;
}
}

@Override
public String toString() {
return name() + " (" + description + ")";
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package rocks.inspectit.ocelot.config.model.selfmonitoring;

import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.NonNull;

import javax.validation.constraints.AssertFalse;
import java.time.Duration;

/**
* Defines the settings for the agent status.
*/
@Data
@NoArgsConstructor
public class AgentHealthSettings {

/**
* The period during which a non-ok and non-instrumentation-related status is valid.
* Status changes due to instrumentation errors are valid until the next re-instrumentation.
*/
@NonNull
private Duration validityPeriod;

@AssertFalse(message = "The specified period should not be negative!")
public boolean isNegativeDuration() {
return validityPeriod != null && validityPeriod.isNegative();
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,10 @@ public class SelfMonitoringSettings {
@Valid
private ActionMetricsSettings actionMetrics;

/**
* Settings for {@link rocks.inspectit.ocelot.core.selfmonitoring.AgentStatusManager}
*/
@Valid
private AgentHealthSettings agentHealth;

}
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@ inspectit:
# if true, the execution time (duration in ms) per action will be recorded
enabled: false

# settings regarding the update and notification frequency of the agent health
agent-health:
# defines how long a non-ok and non-instrumentation-related health status is valid
# health changes due to instrumentation errors are valid until the next re-instrumentation
validity-period: 1h

# definitions of existing self-monitoring metrics
metrics:
definitions:
Expand Down Expand Up @@ -43,6 +49,15 @@ inspectit:
aggregation: SUM
tags: {"level": true}

'[inspectit/self/health]':
enabled: ${inspectit.self-monitoring.enabled}
type: LONG
unit: health
description: "current health status of the agent (0 = OK, 1 = WARNING, 2 = ERROR)"
views:
'[inspectit/self/health]':
aggregation: LAST_VALUE

'[inspectit/self/action/execution-time]':
enabled: ${inspectit.self-monitoring.action-metrics.enabled}
type: LONG
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ inspectit:

self-monitoring:
enabled: false
agent-status:
validity-period: 1h

log-preloading:
enabled: false
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ public InspectitEnvironment(ConfigurableApplicationContext ctx, Optional<String>
* neither the property sources nor {@link #currentConfig} will change during the execution of propertiesUpdater.
*/
public synchronized void updatePropertySources(Consumer<MutablePropertySources> propertiesUpdater) {
eventDrain.publishEvent(new PropertySourcesReloadEvent(this));
propertiesUpdater.accept(getPropertySources());
InspectitConfig oldConfig = currentConfig;

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package rocks.inspectit.ocelot.core.config;

import org.springframework.context.ApplicationEvent;

/**
* This event is fired whenever the set of {@link org.springframework.context.annotation.PropertySource}s is to be reloaded.
* In contrast to {@link PropertySourcesChangedEvent}, this event fires <b>before</b> the reload process.
*/
public class PropertySourcesReloadEvent extends ApplicationEvent {

PropertySourcesReloadEvent(Object source) {
super(source);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@

import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.event.EventListener;
import org.springframework.stereotype.Service;
import rocks.inspectit.ocelot.config.model.InspectitConfig;
import rocks.inspectit.ocelot.config.model.config.HttpConfigSettings;
import rocks.inspectit.ocelot.core.config.InspectitEnvironment;
import rocks.inspectit.ocelot.core.selfmonitoring.event.AgentHealthChangedEvent;
import rocks.inspectit.ocelot.core.service.DynamicallyActivatableService;

import java.util.concurrent.ScheduledExecutorService;
Expand Down Expand Up @@ -80,4 +82,11 @@ public void run() {
});
}
}

@EventListener
void agentHealthChanged(AgentHealthChangedEvent event) {
if (currentState != null) {
currentState.updateAgentHealth(event.getNewHealth());
}
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package rocks.inspectit.ocelot.core.config.propertysources.http;

import lombok.Getter;
import lombok.NonNull;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.Header;
Expand All @@ -16,6 +17,7 @@
import org.springframework.core.env.PropertiesPropertySource;
import org.springframework.core.env.PropertySource;
import rocks.inspectit.ocelot.bootstrap.AgentManager;
import rocks.inspectit.ocelot.commons.models.health.AgentHealth;
import rocks.inspectit.ocelot.config.model.config.HttpConfigSettings;
import rocks.inspectit.ocelot.core.config.util.InvalidPropertiesException;
import rocks.inspectit.ocelot.core.config.util.PropertyUtils;
Expand Down Expand Up @@ -95,6 +97,8 @@ public class HttpPropertySourceState {
@Getter
private boolean firstFileWriteAttemptSuccessful = true;

private AgentHealth agentHealth = AgentHealth.OK;

/**
* Constructor.
*
Expand Down Expand Up @@ -132,6 +136,15 @@ public boolean update(boolean fallBackToFile) {
return false;
}

/**
* Updates the agent health to be sent with the request for agent configuration.
*
* @param newHealth The new agent health
*/
public void updateAgentHealth(@NonNull AgentHealth newHealth) {
agentHealth = newHealth;
}

/**
* Parse the given properties string into an instance of {@link Properties}. The string can be represented as a JSON
* or YAML document.
Expand Down Expand Up @@ -242,6 +255,7 @@ private void setAgentMetaHeaders(HttpGet httpGet) {
httpGet.setHeader(META_HEADER_PREFIX + "VM-NAME", runtime.getVmName());
httpGet.setHeader(META_HEADER_PREFIX + "VM-VENDOR", runtime.getVmVendor());
httpGet.setHeader(META_HEADER_PREFIX + "START-TIME", String.valueOf(runtime.getStartTime()));
httpGet.setHeader(META_HEADER_PREFIX + "HEALTH", agentHealth.name());
}

/**
Expand Down
Loading