diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsConfig.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsConfig.java
index d95b02d36b7..b7386dbd49a 100644
--- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsConfig.java
+++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsConfig.java
@@ -54,6 +54,12 @@ public class VeniceMetricsConfig {
*/
public static final String OTEL_VENICE_METRICS_EXPORT_TO_ENDPOINT = "otel.venice.metrics.export.to.endpoint";
+ /**
+ * Export interval in seconds for OpenTelemetry metrics
+ */
+ public static final String OTEL_VENICE_METRICS_EXPORT_INTERVAL_IN_SECONDS =
+ "otel.venice.metrics.export.interval.in.seconds";
+
/**
* Config Map to add custom dimensions to the metrics: Can be used for system dimensions
* amongst other custom dimensions
@@ -130,6 +136,7 @@ public class VeniceMetricsConfig {
* 2. {@link VeniceOpenTelemetryMetricsRepository.LogBasedMetricExporter} for debug purposes
*/
private final boolean exportOtelMetricsToEndpoint;
+ private final int exportOtelMetricsIntervalInSeconds;
private final boolean exportOtelMetricsToLog;
/** Custom dimensions */
@@ -165,6 +172,7 @@ private VeniceMetricsConfig(Builder builder) {
this.metricEntities = builder.metricEntities;
this.emitOTelMetrics = builder.emitOtelMetrics;
this.exportOtelMetricsToEndpoint = builder.exportOtelMetricsToEndpoint;
+ this.exportOtelMetricsIntervalInSeconds = builder.exportOtelMetricsIntervalInSeconds;
this.otelCustomDimensionsMap = builder.otelCustomDimensionsMap;
this.otelExportProtocol = builder.otelExportProtocol;
this.otelEndpoint = builder.otelEndpoint;
@@ -184,6 +192,7 @@ public static class Builder {
private Collection metricEntities = new ArrayList<>();
private boolean emitOtelMetrics = false;
private boolean exportOtelMetricsToEndpoint = false;
+ private int exportOtelMetricsIntervalInSeconds = 60;
private Map otelCustomDimensionsMap = new HashMap<>();
private String otelExportProtocol = OtlpConfigUtil.PROTOCOL_HTTP_PROTOBUF;
private String otelEndpoint = null;
@@ -222,6 +231,11 @@ public Builder setExportOtelMetricsToEndpoint(boolean exportOtelMetricsToEndpoin
return this;
}
+ public Builder setExportOtelMetricsIntervalInSeconds(int exportOtelMetricsIntervalInSeconds) {
+ this.exportOtelMetricsIntervalInSeconds = exportOtelMetricsIntervalInSeconds;
+ return this;
+ }
+
public Builder setOtelExportProtocol(String otelExportProtocol) {
this.otelExportProtocol = otelExportProtocol;
return this;
@@ -289,6 +303,10 @@ public Builder extractAndSetOtelConfigs(Map configs) {
setExportOtelMetricsToEndpoint(Boolean.parseBoolean(configValue));
}
+ if ((configValue = configs.get(OTEL_VENICE_METRICS_EXPORT_INTERVAL_IN_SECONDS)) != null) {
+ setExportOtelMetricsIntervalInSeconds(Integer.parseInt(configValue));
+ }
+
/**
* custom dimensions are passed as key=value pairs separated by '='
* Multiple dimensions are separated by ','
@@ -426,6 +444,10 @@ public boolean exportOtelMetricsToEndpoint() {
return exportOtelMetricsToEndpoint;
}
+ public int getExportOtelMetricsIntervalInSeconds() {
+ return exportOtelMetricsIntervalInSeconds;
+ }
+
public Map getOtelCustomDimensionsMap() {
return otelCustomDimensionsMap;
}
diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java
index 711d41cdb56..d329f4d56ed 100644
--- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java
+++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java
@@ -19,7 +19,6 @@
import io.opentelemetry.sdk.common.CompletableResultCode;
import io.opentelemetry.sdk.metrics.Aggregation;
import io.opentelemetry.sdk.metrics.InstrumentSelector;
-import io.opentelemetry.sdk.metrics.InstrumentSelectorBuilder;
import io.opentelemetry.sdk.metrics.InstrumentType;
import io.opentelemetry.sdk.metrics.SdkMeterProvider;
import io.opentelemetry.sdk.metrics.SdkMeterProviderBuilder;
@@ -34,6 +33,7 @@
import java.util.Collection;
import java.util.List;
import java.util.Map;
+import java.util.concurrent.TimeUnit;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
@@ -97,19 +97,20 @@ private void setExponentialHistogramAggregation(SdkMeterProviderBuilder builder,
}
}
- // Build an InstrumentSelector with multiple setName calls for all Exponential Histogram metrics
- InstrumentSelectorBuilder selectorBuilder = InstrumentSelector.builder().setType(InstrumentType.HISTOGRAM);
- metricNames.forEach(selectorBuilder::setName);
-
- // Register a single view with all metric names included in the InstrumentSelector
- builder.registerView(
- selectorBuilder.build(),
- View.builder()
- .setAggregation(
- Aggregation.base2ExponentialBucketHistogram(
- metricsConfig.getOtelExponentialHistogramMaxBuckets(),
- metricsConfig.getOtelExponentialHistogramMaxScale()))
- .build());
+ // Register views for all MetricType.HISTOGRAM metrics to be aggregated/exported as exponential histograms
+ for (String metricName: metricNames) {
+ InstrumentSelector selector =
+ InstrumentSelector.builder().setType(InstrumentType.HISTOGRAM).setName(metricName).build();
+
+ builder.registerView(
+ selector,
+ View.builder()
+ .setAggregation(
+ Aggregation.base2ExponentialBucketHistogram(
+ metricsConfig.getOtelExponentialHistogramMaxBuckets(),
+ metricsConfig.getOtelExponentialHistogramMaxScale()))
+ .build());
+ }
}
public VeniceOpenTelemetryMetricsRepository(VeniceMetricsConfig metricsConfig) {
@@ -129,11 +130,17 @@ public VeniceOpenTelemetryMetricsRepository(VeniceMetricsConfig metricsConfig) {
SdkMeterProviderBuilder builder = SdkMeterProvider.builder();
if (metricsConfig.exportOtelMetricsToEndpoint()) {
MetricExporter httpExporter = getOtlpHttpMetricExporter(metricsConfig);
- builder.registerMetricReader(PeriodicMetricReader.builder(httpExporter).build());
+ builder.registerMetricReader(
+ PeriodicMetricReader.builder(httpExporter)
+ .setInterval(metricsConfig.getExportOtelMetricsIntervalInSeconds(), TimeUnit.SECONDS)
+ .build());
}
if (metricsConfig.exportOtelMetricsToLog()) {
// internal to test: Disabled by default
- builder.registerMetricReader(PeriodicMetricReader.builder(new LogBasedMetricExporter(metricsConfig)).build());
+ builder.registerMetricReader(
+ PeriodicMetricReader.builder(new LogBasedMetricExporter(metricsConfig))
+ .setInterval(metricsConfig.getExportOtelMetricsIntervalInSeconds(), TimeUnit.SECONDS)
+ .build());
}
if (metricsConfig.useOtelExponentialHistogram()) {
diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/RequestValidationOutcome.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/RequestValidationOutcome.java
deleted file mode 100644
index 84b6ce30c5c..00000000000
--- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/RequestValidationOutcome.java
+++ /dev/null
@@ -1,15 +0,0 @@
-package com.linkedin.venice.stats.dimensions;
-
-public enum RequestValidationOutcome {
- VALID, INVALID_KEY_COUNT_LIMIT_EXCEEDED;
-
- private final String outcome;
-
- RequestValidationOutcome() {
- this.outcome = name().toLowerCase();
- }
-
- public String getOutcome() {
- return this.outcome;
- }
-}
diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensions.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensions.java
index 54737cc5342..c9f361d8f71 100644
--- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensions.java
+++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensions.java
@@ -21,9 +21,6 @@ public enum VeniceMetricsDimensions {
/** {@link HttpResponseStatusCodeCategory} ie. 1xx, 2xx, etc */
HTTP_RESPONSE_STATUS_CODE_CATEGORY("http.response.status_code_category"),
- /** {@link RequestValidationOutcome#outcome} */
- VENICE_REQUEST_VALIDATION_OUTCOME("venice.request.validation_outcome"),
-
/** {@link VeniceResponseStatusCategory} */
VENICE_RESPONSE_STATUS_CODE_CATEGORY("venice.response.status_code_category"),
diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceResponseStatusCategory.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceResponseStatusCategory.java
index 761c30cfdfe..b493e41c905 100644
--- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceResponseStatusCategory.java
+++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceResponseStatusCategory.java
@@ -8,7 +8,7 @@
* to account for all healthy requests. This dimensions makes it easier to make Venice specific aggregations.
*/
public enum VeniceResponseStatusCategory {
- HEALTHY, UNHEALTHY, TARDY, THROTTLED, BAD_REQUEST;
+ SUCCESS, FAIL;
private final String category;
diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricEntityState.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricEntityState.java
index 69aabb428e3..117430f30e7 100644
--- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricEntityState.java
+++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricEntityState.java
@@ -6,57 +6,48 @@
import io.opentelemetry.api.metrics.LongCounter;
import io.tehuti.metrics.MeasurableStat;
import io.tehuti.metrics.Sensor;
-import io.tehuti.utils.RedundantLogFilter;
-import java.util.HashMap;
+import java.util.Collections;
import java.util.List;
-import java.util.Map;
-import org.apache.logging.log4j.LogManager;
-import org.apache.logging.log4j.Logger;
/**
- * Operational state of a metric. It holds
- * 1. {@link MetricEntity}
- * 2. 1 Otel Instrument and
- * 3. multiple tehuti Sensors for this Otel Metric
+ * Operational state of a metric. It holds:
+ * 1. A {@link MetricEntity}
+ * 2. One OpenTelemetry (Otel) Instrument
+ * 3. Zero or one (out of zero for new metrics or more for existing metrics) Tehuti sensors for this Otel Metric.
+ *
+ * One Otel instrument can cover multiple Tehuti sensors through the use of dimensions. Ideally, this class should represent a one-to-many
+ * mapping between an Otel instrument and Tehuti sensors. However, to simplify lookup during runtime, this class holds one Otel instrument
+ * and one Tehuti sensor. If an Otel instrument corresponds to multiple Tehuti sensors, there will be multiple {@link MetricEntityState}
+ * objects, each containing the same Otel instrument but different Tehuti sensors.
*/
public class MetricEntityState {
- private static final Logger LOGGER = LogManager.getLogger(MetricEntityState.class);
- private static final RedundantLogFilter REDUNDANT_LOG_FILTER = RedundantLogFilter.getRedundantLogFilter();
private final MetricEntity metricEntity;
/** Otel metric */
private Object otelMetric = null;
- /** Map of tehuti names and sensors: 1 Otel metric can cover multiple Tehuti sensors */
- private Map tehutiSensors = null;
+ /** Respective tehuti metric */
+ private Sensor tehutiSensor = null;
public MetricEntityState(MetricEntity metricEntity, VeniceOpenTelemetryMetricsRepository otelRepository) {
- this.metricEntity = metricEntity;
- setOtelMetric(otelRepository.createInstrument(this.metricEntity));
+ this(metricEntity, otelRepository, null, null, Collections.EMPTY_LIST);
}
public MetricEntityState(
MetricEntity metricEntity,
VeniceOpenTelemetryMetricsRepository otelRepository,
- TehutiSensorRegistrationFunction registerTehutiSensor,
- Map> tehutiMetricInput) {
+ TehutiSensorRegistrationFunction registerTehutiSensorFn,
+ TehutiMetricNameEnum tehutiMetricNameEnum,
+ List tehutiMetricStats) {
this.metricEntity = metricEntity;
- createMetric(otelRepository, tehutiMetricInput, registerTehutiSensor);
+ createMetric(otelRepository, tehutiMetricNameEnum, tehutiMetricStats, registerTehutiSensorFn);
}
public void setOtelMetric(Object otelMetric) {
this.otelMetric = otelMetric;
}
- /**
- * Add Tehuti {@link Sensor} to tehutiSensors map and throw exception if sensor with same name already exists
- */
- public void addTehutiSensors(TehutiMetricNameEnum name, Sensor tehutiSensor) {
- if (tehutiSensors == null) {
- tehutiSensors = new HashMap<>();
- }
- if (tehutiSensors.put(name, tehutiSensor) != null) {
- throw new IllegalArgumentException("Sensor with name '" + name + "' already exists.");
- }
+ public void setTehutiSensor(Sensor tehutiSensor) {
+ this.tehutiSensor = tehutiSensor;
}
/**
@@ -69,18 +60,18 @@ public interface TehutiSensorRegistrationFunction {
public void createMetric(
VeniceOpenTelemetryMetricsRepository otelRepository,
- Map> tehutiMetricInput,
- TehutiSensorRegistrationFunction registerTehutiSensor) {
+ TehutiMetricNameEnum tehutiMetricNameEnum,
+ List tehutiMetricStats,
+ TehutiSensorRegistrationFunction registerTehutiSensorFn) {
// Otel metric: otelRepository will be null if otel is not enabled
if (otelRepository != null) {
setOtelMetric(otelRepository.createInstrument(this.metricEntity));
}
// tehuti metric
- for (Map.Entry> entry: tehutiMetricInput.entrySet()) {
- addTehutiSensors(
- entry.getKey(),
- registerTehutiSensor
- .register(entry.getKey().getMetricName(), entry.getValue().toArray(new MeasurableStat[0])));
+ if (tehutiMetricStats != null && !tehutiMetricStats.isEmpty()) {
+ setTehutiSensor(
+ registerTehutiSensorFn
+ .register(tehutiMetricNameEnum.getMetricName(), tehutiMetricStats.toArray(new MeasurableStat[0])));
}
}
@@ -105,38 +96,29 @@ void recordOtelMetric(double value, Attributes otelDimensions) {
}
}
- void recordTehutiMetric(TehutiMetricNameEnum tehutiMetricNameEnum, double value) {
- if (tehutiSensors != null) {
- Sensor sensor = tehutiSensors.get(tehutiMetricNameEnum);
- if (sensor != null) {
- sensor.record(value);
- } else {
- // Log using Redundant log filters to catch any bad tehutiMetricNameEnum is passed in
- String errorLog = "Tehuti Sensor with name '" + tehutiMetricNameEnum + "' not found.";
- if (!REDUNDANT_LOG_FILTER.isRedundantLog(errorLog)) {
- LOGGER.error(errorLog);
- }
- }
+ void recordTehutiMetric(double value) {
+ if (tehutiSensor != null) {
+ tehutiSensor.record(value);
}
}
- public void record(TehutiMetricNameEnum tehutiMetricNameEnum, long value, Attributes otelDimensions) {
+ public void record(long value, Attributes otelDimensions) {
recordOtelMetric(value, otelDimensions);
- recordTehutiMetric(tehutiMetricNameEnum, value);
+ recordTehutiMetric(value);
}
- public void record(TehutiMetricNameEnum tehutiMetricNameEnum, double value, Attributes otelDimensions) {
+ public void record(double value, Attributes otelDimensions) {
recordOtelMetric(value, otelDimensions);
- recordTehutiMetric(tehutiMetricNameEnum, value);
+ recordTehutiMetric(value);
}
/** used only for testing */
- Map getTehutiSensors() {
- return tehutiSensors;
+ Sensor getTehutiSensor() {
+ return tehutiSensor;
}
/** used only for testing */
- static RedundantLogFilter getRedundantLogFilter() {
- return REDUNDANT_LOG_FILTER;
+ Object getOtelMetric() {
+ return otelMetric;
}
}
diff --git a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceMetricsConfigTest.java b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceMetricsConfigTest.java
index 31906b070da..7f5199976a0 100644
--- a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceMetricsConfigTest.java
+++ b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceMetricsConfigTest.java
@@ -39,6 +39,7 @@ public void testDefaultValuesWithBasicConfig() {
assertEquals(config.getMetricPrefix(), "service");
assertFalse(config.emitOtelMetrics());
assertFalse(config.exportOtelMetricsToEndpoint());
+ assertEquals(config.getExportOtelMetricsIntervalInSeconds(), 60);
assertEquals(config.getOtelExportProtocol(), OtlpConfigUtil.PROTOCOL_HTTP_PROTOBUF);
assertEquals(config.getOtelEndpoint(), null);
assertTrue(config.getOtelHeaders().isEmpty());
@@ -63,6 +64,7 @@ public void testCustomValues() {
.setMetricPrefix("TestPrefix")
.setTehutiMetricConfig(metricConfig)
.extractAndSetOtelConfigs(otelConfigs)
+ .setExportOtelMetricsIntervalInSeconds(10)
.build();
assertEquals(config.getServiceName(), "TestService");
@@ -70,6 +72,7 @@ public void testCustomValues() {
assertTrue(config.emitOtelMetrics());
assertTrue(config.exportOtelMetricsToLog());
assertEquals(config.getTehutiMetricConfig(), metricConfig);
+ assertEquals(config.getExportOtelMetricsIntervalInSeconds(), 10);
}
@Test(expectedExceptions = IllegalArgumentException.class)
diff --git a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepositoryTest.java b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepositoryTest.java
index 1512a5db8e7..a4349f438a9 100644
--- a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepositoryTest.java
+++ b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepositoryTest.java
@@ -38,6 +38,7 @@ public void setUp() {
when(mockMetricsConfig.getServiceName()).thenReturn("test_service");
when(mockMetricsConfig.exportOtelMetricsToEndpoint()).thenReturn(true);
when(mockMetricsConfig.getOtelEndpoint()).thenReturn("http://localhost:4318");
+ when(mockMetricsConfig.getExportOtelMetricsIntervalInSeconds()).thenReturn(60);
metricsRepository = new VeniceOpenTelemetryMetricsRepository(mockMetricsConfig);
}
diff --git a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/RequestValidationOutcomeTest.java b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/RequestValidationOutcomeTest.java
deleted file mode 100644
index f144850be73..00000000000
--- a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/RequestValidationOutcomeTest.java
+++ /dev/null
@@ -1,24 +0,0 @@
-package com.linkedin.venice.stats.dimensions;
-
-import static org.testng.Assert.assertEquals;
-
-import org.testng.annotations.Test;
-
-
-public class RequestValidationOutcomeTest {
- @Test
- public void testVeniceRequestValidationOutcome() {
- for (RequestValidationOutcome outcome: RequestValidationOutcome.values()) {
- switch (outcome) {
- case VALID:
- assertEquals(outcome.getOutcome(), "valid");
- break;
- case INVALID_KEY_COUNT_LIMIT_EXCEEDED:
- assertEquals(outcome.getOutcome(), "invalid_key_count_limit_exceeded");
- break;
- default:
- throw new IllegalArgumentException("Unknown outcome: " + outcome);
- }
- }
- }
-}
diff --git a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensionsTest.java b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensionsTest.java
index 13a9eb97aa3..56a5ab48f6c 100644
--- a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensionsTest.java
+++ b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensionsTest.java
@@ -28,9 +28,6 @@ public void testGetDimensionNameInSnakeCase() {
case HTTP_RESPONSE_STATUS_CODE_CATEGORY:
assertEquals(dimension.getDimensionName(format), "http.response.status_code_category");
break;
- case VENICE_REQUEST_VALIDATION_OUTCOME:
- assertEquals(dimension.getDimensionName(format), "venice.request.validation_outcome");
- break;
case VENICE_RESPONSE_STATUS_CODE_CATEGORY:
assertEquals(dimension.getDimensionName(format), "venice.response.status_code_category");
break;
@@ -66,9 +63,6 @@ public void testGetDimensionNameInCamelCase() {
case HTTP_RESPONSE_STATUS_CODE_CATEGORY:
assertEquals(dimension.getDimensionName(format), "http.response.statusCodeCategory");
break;
- case VENICE_REQUEST_VALIDATION_OUTCOME:
- assertEquals(dimension.getDimensionName(format), "venice.request.validationOutcome");
- break;
case VENICE_RESPONSE_STATUS_CODE_CATEGORY:
assertEquals(dimension.getDimensionName(format), "venice.response.statusCodeCategory");
break;
@@ -104,9 +98,6 @@ public void testGetDimensionNameInPascalCase() {
case HTTP_RESPONSE_STATUS_CODE_CATEGORY:
assertEquals(dimension.getDimensionName(format), "Http.Response.StatusCodeCategory");
break;
- case VENICE_REQUEST_VALIDATION_OUTCOME:
- assertEquals(dimension.getDimensionName(format), "Venice.Request.ValidationOutcome");
- break;
case VENICE_RESPONSE_STATUS_CODE_CATEGORY:
assertEquals(dimension.getDimensionName(format), "Venice.Response.StatusCodeCategory");
break;
diff --git a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceResponseStatusCategoryTest.java b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceResponseStatusCategoryTest.java
index 22272d35763..940b8eaeced 100644
--- a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceResponseStatusCategoryTest.java
+++ b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceResponseStatusCategoryTest.java
@@ -10,20 +10,11 @@ public class VeniceResponseStatusCategoryTest {
public void testVeniceResponseStatusCategory() {
for (VeniceResponseStatusCategory responseStatusCategory: VeniceResponseStatusCategory.values()) {
switch (responseStatusCategory) {
- case HEALTHY:
- assertEquals(responseStatusCategory.getCategory(), "healthy");
+ case SUCCESS:
+ assertEquals(responseStatusCategory.getCategory(), "success");
break;
- case UNHEALTHY:
- assertEquals(responseStatusCategory.getCategory(), "unhealthy");
- break;
- case TARDY:
- assertEquals(responseStatusCategory.getCategory(), "tardy");
- break;
- case THROTTLED:
- assertEquals(responseStatusCategory.getCategory(), "throttled");
- break;
- case BAD_REQUEST:
- assertEquals(responseStatusCategory.getCategory(), "bad_request");
+ case FAIL:
+ assertEquals(responseStatusCategory.getCategory(), "fail");
break;
default:
throw new IllegalArgumentException("Unknown response status category: " + responseStatusCategory);
diff --git a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/metrics/MetricEntityStateTest.java b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/metrics/MetricEntityStateTest.java
index 93aeea3749e..955f64edffe 100644
--- a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/metrics/MetricEntityStateTest.java
+++ b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/metrics/MetricEntityStateTest.java
@@ -1,22 +1,19 @@
package com.linkedin.venice.stats.metrics;
+import static com.linkedin.venice.stats.metrics.MetricType.HISTOGRAM;
+import static java.util.Collections.singletonList;
+import static org.mockito.Mockito.doReturn;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
-import static org.testng.Assert.assertFalse;
-import static org.testng.Assert.assertTrue;
import com.linkedin.venice.stats.VeniceOpenTelemetryMetricsRepository;
import io.opentelemetry.api.common.Attributes;
import io.opentelemetry.api.metrics.DoubleHistogram;
import io.opentelemetry.api.metrics.LongCounter;
-import io.tehuti.metrics.MeasurableStat;
import io.tehuti.metrics.Sensor;
-import io.tehuti.utils.RedundantLogFilter;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
+import io.tehuti.metrics.stats.Count;
import org.testng.Assert;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
@@ -29,7 +26,7 @@ public class MetricEntityStateTest {
private Sensor mockSensor;
private enum TestTehutiMetricNameEnum implements TehutiMetricNameEnum {
- TEST_METRIC_1, TEST_METRIC_2;
+ TEST_METRIC;
private final String metricName;
@@ -47,6 +44,7 @@ public String getMetricName() {
public void setUp() {
mockOtelRepository = mock(VeniceOpenTelemetryMetricsRepository.class);
mockMetricEntity = mock(MetricEntity.class);
+ doReturn(HISTOGRAM).when(mockMetricEntity).getMetricType();
sensorRegistrationFunction = (name, stats) -> mock(Sensor.class);
mockSensor = mock(Sensor.class);
}
@@ -57,36 +55,28 @@ public void testCreateMetricWithOtelEnabled() {
LongCounter longCounter = mock(LongCounter.class);
when(mockOtelRepository.createInstrument(mockMetricEntity)).thenReturn(longCounter);
- Map> tehutiMetricInput = new HashMap<>();
- MetricEntityState metricEntityState =
- new MetricEntityState(mockMetricEntity, mockOtelRepository, sensorRegistrationFunction, tehutiMetricInput);
-
- Assert.assertNotNull(metricEntityState);
- Assert.assertNull(metricEntityState.getTehutiSensors()); // No Tehuti sensors added
- }
-
- @Test
- public void testAddTehutiSensorsSuccessfully() {
- MetricEntityState metricEntityState = new MetricEntityState(mockMetricEntity, mockOtelRepository);
- metricEntityState.addTehutiSensors(TestTehutiMetricNameEnum.TEST_METRIC_1, mockSensor);
-
- Assert.assertNotNull(metricEntityState.getTehutiSensors());
- assertTrue(metricEntityState.getTehutiSensors().containsKey(TestTehutiMetricNameEnum.TEST_METRIC_1));
- }
-
- @Test(expectedExceptions = IllegalArgumentException.class, expectedExceptionsMessageRegExp = ".*Sensor with name 'TEST_METRIC_1' already exists.*")
- public void testAddTehutiSensorThrowsExceptionOnDuplicate() {
+ // without tehuti sensor
MetricEntityState metricEntityState = new MetricEntityState(mockMetricEntity, mockOtelRepository);
- metricEntityState.addTehutiSensors(TestTehutiMetricNameEnum.TEST_METRIC_1, mockSensor);
-
- // Adding the same sensor name again should throw an exception
- metricEntityState.addTehutiSensors(TestTehutiMetricNameEnum.TEST_METRIC_1, mockSensor);
+ Assert.assertNotNull(metricEntityState);
+ Assert.assertNotNull(metricEntityState.getOtelMetric());
+ Assert.assertNull(metricEntityState.getTehutiSensor()); // No Tehuti sensors added
+
+ // with tehuti sensor
+ metricEntityState = new MetricEntityState(
+ mockMetricEntity,
+ mockOtelRepository,
+ sensorRegistrationFunction,
+ TestTehutiMetricNameEnum.TEST_METRIC,
+ singletonList(new Count()));
+ Assert.assertNotNull(metricEntityState);
+ Assert.assertNotNull(metricEntityState.getOtelMetric());
+ Assert.assertNotNull(metricEntityState.getTehutiSensor());
}
@Test
public void testRecordOtelMetricHistogram() {
DoubleHistogram doubleHistogram = mock(DoubleHistogram.class);
- when(mockMetricEntity.getMetricType()).thenReturn(MetricType.HISTOGRAM);
+ when(mockMetricEntity.getMetricType()).thenReturn(HISTOGRAM);
MetricEntityState metricEntityState = new MetricEntityState(mockMetricEntity, mockOtelRepository);
metricEntityState.setOtelMetric(doubleHistogram);
@@ -114,40 +104,34 @@ public void testRecordOtelMetricCounter() {
@Test
public void testRecordTehutiMetric() {
MetricEntityState metricEntityState = new MetricEntityState(mockMetricEntity, mockOtelRepository);
- metricEntityState.addTehutiSensors(TestTehutiMetricNameEnum.TEST_METRIC_1, mockSensor);
-
- metricEntityState.recordTehutiMetric(TestTehutiMetricNameEnum.TEST_METRIC_1, 15.0);
-
+ metricEntityState.setTehutiSensor(mockSensor);
+ metricEntityState.recordTehutiMetric(15.0);
verify(mockSensor, times(1)).record(15.0);
}
@Test
public void testRecordMetricsWithBothOtelAndTehuti() {
DoubleHistogram doubleHistogram = mock(DoubleHistogram.class);
- when(mockMetricEntity.getMetricType()).thenReturn(MetricType.HISTOGRAM);
+ when(mockMetricEntity.getMetricType()).thenReturn(HISTOGRAM);
MetricEntityState metricEntityState = new MetricEntityState(mockMetricEntity, mockOtelRepository);
- RedundantLogFilter logFilter = metricEntityState.getRedundantLogFilter();
metricEntityState.setOtelMetric(doubleHistogram);
- metricEntityState.addTehutiSensors(TestTehutiMetricNameEnum.TEST_METRIC_1, mockSensor);
+ metricEntityState.setTehutiSensor(mockSensor);
Attributes attributes = Attributes.builder().put("key", "value").build();
- // case 1: check using valid Tehuti metric that was added to metricEntityState
- metricEntityState.record(TestTehutiMetricNameEnum.TEST_METRIC_1, 20.0, attributes);
+ // called 0 times
+ verify(doubleHistogram, times(0)).record(20.0, attributes);
+ verify(mockSensor, times(0)).record(20.0);
+
+ // called 1 time
+ metricEntityState.record(20.0, attributes);
verify(doubleHistogram, times(1)).record(20.0, attributes);
verify(mockSensor, times(1)).record(20.0);
- assertFalse(logFilter.isRedundantLog("Tehuti Sensor with name 'TEST_METRIC_1' not found.", false));
- // case 2: check using a Tehuti metric that was not added to metricEntityState and verify it called
- // REDUNDANT_LOG_FILTER
- metricEntityState.record(TestTehutiMetricNameEnum.TEST_METRIC_2, 20.0, attributes);
- // otel metric should be called for the second time
+ // called 2 times
+ metricEntityState.record(20.0, attributes);
verify(doubleHistogram, times(2)).record(20.0, attributes);
- // Tehuti metric should be not called for the second time as we passed in an invalid metric name
- verify(mockSensor, times(1)).record(20.0);
- // This should have invoked the log filter for TEST_METRIC_2
- assertFalse(logFilter.isRedundantLog("Tehuti Sensor with name 'TEST_METRIC_1' not found.", false));
- assertTrue(logFilter.isRedundantLog("Tehuti Sensor with name 'TEST_METRIC_2' not found.", false));
+ verify(mockSensor, times(2)).record(20.0);
}
}
diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/api/VenicePathParser.java b/services/venice-router/src/main/java/com/linkedin/venice/router/api/VenicePathParser.java
index 7fc3deddf06..b5665a041bf 100644
--- a/services/venice-router/src/main/java/com/linkedin/venice/router/api/VenicePathParser.java
+++ b/services/venice-router/src/main/java/com/linkedin/venice/router/api/VenicePathParser.java
@@ -320,6 +320,7 @@ public VenicePath parseResourceUri(String uri, BasicFullHttpRequest fullHttpRequ
}
AggRouterHttpRequestStats aggRouterHttpRequestStats = routerStats.getStatsByType(requestType);
+
if (!requestType.equals(SINGLE_GET)) {
/**
* Here we only track key num for non single-get request, since single-get request will be always 1.
@@ -344,6 +345,7 @@ public VenicePath parseResourceUri(String uri, BasicFullHttpRequest fullHttpRequ
routerStats.getStatsByType(keyCountLimitException.getRequestType())
.recordBadRequestKeyCount(
keyCountLimitException.getStoreName(),
+ responseStatus,
keyCountLimitException.getRequestKeyCount());
}
/**
diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/api/VeniceResponseAggregator.java b/services/venice-router/src/main/java/com/linkedin/venice/router/api/VeniceResponseAggregator.java
index 40dce6958c1..c3369fcfeb3 100644
--- a/services/venice-router/src/main/java/com/linkedin/venice/router/api/VeniceResponseAggregator.java
+++ b/services/venice-router/src/main/java/com/linkedin/venice/router/api/VeniceResponseAggregator.java
@@ -250,20 +250,20 @@ public FullHttpResponse buildResponse(
// here...
double latency = LatencyUtils.convertNSToMS(timeValue.getRawValue(TimeUnit.NANOSECONDS));
stats.recordLatency(storeName, latency);
+ int keyNum = venicePath.getPartitionKeys().size();
if (HEALTHY_STATUSES.contains(httpResponseStatus)) {
- routerStats.getStatsByType(RequestType.SINGLE_GET)
- .recordReadQuotaUsage(storeName, venicePath.getPartitionKeys().size());
+ routerStats.getStatsByType(RequestType.SINGLE_GET).recordReadQuotaUsage(storeName, keyNum);
if (isFastRequest(latency, requestType)) {
- stats.recordHealthyRequest(storeName, latency, httpResponseStatus);
+ stats.recordHealthyRequest(storeName, latency, httpResponseStatus, keyNum);
} else {
- stats.recordTardyRequest(storeName, latency, httpResponseStatus);
+ stats.recordTardyRequest(storeName, latency, httpResponseStatus, keyNum);
}
} else if (httpResponseStatus.equals(TOO_MANY_REQUESTS)) {
LOGGER.debug("request is rejected by storage node because quota is exceeded");
- stats.recordThrottledRequest(storeName, latency, httpResponseStatus);
+ stats.recordThrottledRequest(storeName, latency, httpResponseStatus, keyNum);
} else {
LOGGER.debug("Unhealthy request detected, latency: {}ms, response status: {}", latency, httpResponseStatus);
- stats.recordUnhealthyRequest(storeName, latency, httpResponseStatus);
+ stats.recordUnhealthyRequest(storeName, latency, httpResponseStatus, keyNum);
}
}
timeValue = allMetrics.get(ROUTER_RESPONSE_WAIT_TIME);
diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/AggRouterHttpRequestStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/AggRouterHttpRequestStats.java
index 3f9dc822871..81c4d3ad0e0 100644
--- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/AggRouterHttpRequestStats.java
+++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/AggRouterHttpRequestStats.java
@@ -80,9 +80,9 @@ public void recordRequest(String storeName) {
recordStoreStats(storeName, RouterHttpRequestStats::recordIncomingRequest);
}
- public void recordHealthyRequest(String storeName, double latency, HttpResponseStatus responseStatus) {
- totalStats.recordHealthyRequest(latency, responseStatus);
- recordStoreStats(storeName, stats -> stats.recordHealthyRequest(latency, responseStatus));
+ public void recordHealthyRequest(String storeName, double latency, HttpResponseStatus responseStatus, int keyNum) {
+ totalStats.recordHealthyRequest(latency, responseStatus, keyNum);
+ recordStoreStats(storeName, stats -> stats.recordHealthyRequest(latency, responseStatus, keyNum));
}
public void recordUnhealthyRequest(String storeName, HttpResponseStatus responseStatus) {
@@ -97,10 +97,10 @@ public void recordUnavailableReplicaStreamingRequest(String storeName) {
recordStoreStats(storeName, RouterHttpRequestStats::recordUnavailableReplicaStreamingRequest);
}
- public void recordUnhealthyRequest(String storeName, double latency, HttpResponseStatus responseStatus) {
- totalStats.recordUnhealthyRequest(latency, responseStatus);
+ public void recordUnhealthyRequest(String storeName, double latency, HttpResponseStatus responseStatus, int keyNum) {
+ totalStats.recordUnhealthyRequest(latency, responseStatus, keyNum);
if (storeName != null) {
- recordStoreStats(storeName, stats -> stats.recordUnhealthyRequest(latency, responseStatus));
+ recordStoreStats(storeName, stats -> stats.recordUnhealthyRequest(latency, responseStatus, keyNum));
}
}
@@ -115,9 +115,9 @@ public void recordReadQuotaUsage(String storeName, int quotaUsage) {
recordStoreStats(storeName, stats -> stats.recordReadQuotaUsage(quotaUsage));
}
- public void recordTardyRequest(String storeName, double latency, HttpResponseStatus responseStatus) {
- totalStats.recordTardyRequest(latency, responseStatus);
- recordStoreStats(storeName, stats -> stats.recordTardyRequest(latency, responseStatus));
+ public void recordTardyRequest(String storeName, double latency, HttpResponseStatus responseStatus, int keyNum) {
+ totalStats.recordTardyRequest(latency, responseStatus, keyNum);
+ recordStoreStats(storeName, stats -> stats.recordTardyRequest(latency, responseStatus, keyNum));
}
/**
@@ -132,9 +132,13 @@ public void recordThrottledRequest(String storeName, HttpResponseStatus httpResp
recordStoreStats(storeName, stats -> stats.recordThrottledRequest(httpResponseStatus));
}
- public void recordThrottledRequest(String storeName, double latency, HttpResponseStatus httpResponseStatus) {
- totalStats.recordThrottledRequest(latency, httpResponseStatus);
- recordStoreStats(storeName, stats -> stats.recordThrottledRequest(latency, httpResponseStatus));
+ public void recordThrottledRequest(
+ String storeName,
+ double latency,
+ HttpResponseStatus httpResponseStatus,
+ int keyNum) {
+ totalStats.recordThrottledRequest(latency, httpResponseStatus, keyNum);
+ recordStoreStats(storeName, stats -> stats.recordThrottledRequest(latency, httpResponseStatus, keyNum));
}
public void recordBadRequest(String storeName, HttpResponseStatus responseStatus) {
@@ -144,10 +148,10 @@ public void recordBadRequest(String storeName, HttpResponseStatus responseStatus
}
}
- public void recordBadRequestKeyCount(String storeName, int keyCount) {
- totalStats.recordBadRequestKeyCount(keyCount);
+ public void recordBadRequestKeyCount(String storeName, HttpResponseStatus responseStatus, int keyNum) {
+ totalStats.recordIncomingBadRequestKeyCountMetric(responseStatus, keyNum);
if (storeName != null) {
- recordStoreStats(storeName, stats -> stats.recordBadRequestKeyCount(keyCount));
+ recordStoreStats(storeName, stats -> stats.recordIncomingBadRequestKeyCountMetric(responseStatus, keyNum));
}
}
@@ -261,8 +265,8 @@ public long getTotalRetriesError() {
}
public void recordKeyNum(String storeName, int keyNum) {
- totalStats.recordKeyNum(keyNum);
- recordStoreStats(storeName, stats -> stats.recordKeyNum(keyNum));
+ totalStats.recordIncomingKeyCountMetric(keyNum);
+ recordStoreStats(storeName, stats -> stats.recordIncomingKeyCountMetric(keyNum));
}
public void recordRequestUsage(String storeName, int usage) {
@@ -291,18 +295,18 @@ public void recordUnavailableRequest(String storeName) {
}
public void recordDelayConstraintAbortedRetryRequest(String storeName) {
- totalStats.recordDelayConstraintAbortedRetryRequest();
- recordStoreStats(storeName, RouterHttpRequestStats::recordDelayConstraintAbortedRetryRequest);
+ totalStats.recordDelayConstraintAbortedRetryCountMetric();
+ recordStoreStats(storeName, RouterHttpRequestStats::recordDelayConstraintAbortedRetryCountMetric);
}
public void recordSlowRouteAbortedRetryRequest(String storeName) {
- totalStats.recordSlowRouteAbortedRetryRequest();
- recordStoreStats(storeName, RouterHttpRequestStats::recordSlowRouteAbortedRetryRequest);
+ totalStats.recordSlowRouteAbortedRetryCountMetric();
+ recordStoreStats(storeName, RouterHttpRequestStats::recordSlowRouteAbortedRetryCountMetric);
}
public void recordRetryRouteLimitAbortedRetryRequest(String storeName) {
- totalStats.recordRetryRouteLimitAbortedRetryRequest();
- recordStoreStats(storeName, RouterHttpRequestStats::recordRetryRouteLimitAbortedRetryRequest);
+ totalStats.recordRetryRouteLimitAbortedRetryCountMetric();
+ recordStoreStats(storeName, RouterHttpRequestStats::recordRetryRouteLimitAbortedRetryCountMetric);
}
public void recordKeySize(long keySize) {
@@ -320,8 +324,8 @@ public void recordDisallowedRetryRequest(String storeName) {
}
public void recordNoAvailableReplicaAbortedRetryRequest(String storeName) {
- totalStats.recordNoAvailableReplicaAbortedRetryRequest();
- recordStoreStats(storeName, RouterHttpRequestStats::recordNoAvailableReplicaAbortedRetryRequest);
+ totalStats.recordNoAvailableReplicaAbortedRetryCountMetric();
+ recordStoreStats(storeName, RouterHttpRequestStats::recordNoAvailableReplicaAbortedRetryCountMetric);
}
public void recordErrorRetryAttemptTriggeredByPendingRequestCheck(String storeName) {
diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java
index ed9debd11ce..cdade9e5e6c 100644
--- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java
+++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java
@@ -5,21 +5,23 @@
import static com.linkedin.venice.router.stats.RouterMetricEntity.ABORTED_RETRY_COUNT;
import static com.linkedin.venice.router.stats.RouterMetricEntity.ALLOWED_RETRY_COUNT;
import static com.linkedin.venice.router.stats.RouterMetricEntity.CALL_COUNT;
-import static com.linkedin.venice.router.stats.RouterMetricEntity.CALL_KEY_COUNT;
import static com.linkedin.venice.router.stats.RouterMetricEntity.CALL_TIME;
import static com.linkedin.venice.router.stats.RouterMetricEntity.DISALLOWED_RETRY_COUNT;
-import static com.linkedin.venice.router.stats.RouterMetricEntity.INCOMING_CALL_COUNT;
+import static com.linkedin.venice.router.stats.RouterMetricEntity.KEY_COUNT;
import static com.linkedin.venice.router.stats.RouterMetricEntity.RETRY_COUNT;
import static com.linkedin.venice.router.stats.RouterMetricEntity.RETRY_DELAY;
import static com.linkedin.venice.stats.AbstractVeniceAggStats.STORE_NAME_FOR_TOTAL_STAT;
import static com.linkedin.venice.stats.dimensions.HttpResponseStatusCodeCategory.getVeniceHttpResponseStatusCodeCategory;
+import static com.linkedin.venice.stats.dimensions.RequestRetryAbortReason.DELAY_CONSTRAINT;
+import static com.linkedin.venice.stats.dimensions.RequestRetryAbortReason.MAX_RETRY_ROUTE_LIMIT;
+import static com.linkedin.venice.stats.dimensions.RequestRetryAbortReason.NO_AVAILABLE_REPLICA;
+import static com.linkedin.venice.stats.dimensions.RequestRetryAbortReason.SLOW_ROUTE;
import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.HTTP_RESPONSE_STATUS_CODE;
import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.HTTP_RESPONSE_STATUS_CODE_CATEGORY;
import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_CLUSTER_NAME;
import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_REQUEST_METHOD;
import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_REQUEST_RETRY_ABORT_REASON;
import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_REQUEST_RETRY_TYPE;
-import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_REQUEST_VALIDATION_OUTCOME;
import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_RESPONSE_STATUS_CODE_CATEGORY;
import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_STORE_NAME;
import static java.util.Collections.singletonList;
@@ -36,12 +38,10 @@
import com.linkedin.venice.stats.VeniceOpenTelemetryMetricsRepository;
import com.linkedin.venice.stats.dimensions.RequestRetryAbortReason;
import com.linkedin.venice.stats.dimensions.RequestRetryType;
-import com.linkedin.venice.stats.dimensions.RequestValidationOutcome;
import com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions;
import com.linkedin.venice.stats.dimensions.VeniceResponseStatusCategory;
import com.linkedin.venice.stats.metrics.MetricEntityState;
import com.linkedin.venice.stats.metrics.TehutiMetricNameEnum;
-import com.linkedin.venice.utils.CollectionUtils;
import io.netty.handler.codec.http.HttpResponseStatus;
import io.opentelemetry.api.common.Attributes;
import io.opentelemetry.api.common.AttributesBuilder;
@@ -59,7 +59,6 @@
import io.tehuti.metrics.stats.Rate;
import io.tehuti.metrics.stats.Total;
import java.util.Arrays;
-import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
@@ -79,16 +78,24 @@ public class RouterHttpRequestStats extends AbstractVeniceHttpStats {
}
/** metrics to track incoming requests */
- private final MetricEntityState incomingRequestMetric;
+ private final Sensor requestSensor;
/** metrics to track response handling */
- private final MetricEntityState requestMetric;
+ private final MetricEntityState healthyRequestMetric;
+ private final MetricEntityState unhealthyRequestMetric;
+ private final MetricEntityState tardyRequestMetric;
+ private final MetricEntityState throttledRequestMetric;
+ private final MetricEntityState badRequestMetric;
+
private final Sensor healthyRequestRateSensor;
private final Sensor tardyRequestRatioSensor;
/** latency metrics */
private final Sensor latencyTehutiSensor; // This can be removed while removing tehuti
- private final MetricEntityState latencyMetric;
+ private final MetricEntityState healthyLatencyMetric;
+ private final MetricEntityState unhealthyLatencyMetric;
+ private final MetricEntityState tardyLatencyMetric;
+ private final MetricEntityState throttledLatencyMetric;
/** retry metrics */
private final MetricEntityState retryCountMetric;
@@ -97,10 +104,15 @@ public class RouterHttpRequestStats extends AbstractVeniceHttpStats {
private final MetricEntityState retryDelayMetric;
/** retry aborted metrics */
- private final MetricEntityState abortedRetryCountMetric;
+ private final MetricEntityState delayConstraintAbortedRetryCountMetric;
+ private final MetricEntityState slowRouteAbortedRetryCountMetric;
+ private final MetricEntityState retryRouteLimitAbortedRetryCountMetric;
+ private final MetricEntityState noAvailableReplicaAbortedRetryCountMetric;
/** key count metrics */
private final MetricEntityState keyCountMetric;
+ private final Sensor keyNumSensor;
+ private final Sensor badRequestKeyCountSensor;
/** OTel metrics yet to be added */
private final Sensor requestSizeSensor;
@@ -146,7 +158,8 @@ public RouterHttpRequestStats(
if (metricsRepository instanceof VeniceMetricsRepository) {
VeniceMetricsRepository veniceMetricsRepository = (VeniceMetricsRepository) metricsRepository;
VeniceMetricsConfig veniceMetricsConfig = veniceMetricsRepository.getVeniceMetricsConfig();
- emitOpenTelemetryMetrics = veniceMetricsConfig.emitOtelMetrics();
+ // total stats won't be emitted by OTel
+ emitOpenTelemetryMetrics = veniceMetricsConfig.emitOtelMetrics() && !isTotalStats();
openTelemetryMetricFormat = veniceMetricsConfig.getMetricNamingFormat();
if (emitOpenTelemetryMetrics) {
otelRepository = veniceMetricsRepository.getOpenTelemetryMetricsRepository();
@@ -172,104 +185,141 @@ public RouterHttpRequestStats(
Rate requestRate = new OccurrenceRate();
Rate healthyRequestRate = new OccurrenceRate();
Rate tardyRequestRate = new OccurrenceRate();
-
- incomingRequestMetric = new MetricEntityState(
- INCOMING_CALL_COUNT.getMetricEntity(),
- otelRepository,
- this::registerSensorFinal,
- CollectionUtils.>mapBuilder()
- .put(RouterTehutiMetricNameEnum.REQUEST, Arrays.asList(new Count(), requestRate))
- .build());
+ requestSensor = registerSensor("request", new Count(), requestRate);
healthyRequestRateSensor =
registerSensor(new TehutiUtils.SimpleRatioStat(healthyRequestRate, requestRate, "healthy_request_ratio"));
tardyRequestRatioSensor =
registerSensor(new TehutiUtils.SimpleRatioStat(tardyRequestRate, requestRate, "tardy_request_ratio"));
+ keyNumSensor = registerSensor("key_num", new Avg(), new Max(0));
+ badRequestKeyCountSensor = registerSensor("bad_request_key_count", new OccurrenceRate(), new Avg(), new Max());
+
+ healthyRequestMetric = new MetricEntityState(
+ CALL_COUNT.getMetricEntity(),
+ otelRepository,
+ this::registerSensorFinal,
+ RouterTehutiMetricNameEnum.HEALTHY_REQUEST,
+ Arrays.asList(new Count(), healthyRequestRate));
- requestMetric = new MetricEntityState(
+ unhealthyRequestMetric = new MetricEntityState(
CALL_COUNT.getMetricEntity(),
otelRepository,
this::registerSensorFinal,
- CollectionUtils.>mapBuilder()
- .put(RouterTehutiMetricNameEnum.HEALTHY_REQUEST, Arrays.asList(new Count(), healthyRequestRate))
- .put(RouterTehutiMetricNameEnum.UNHEALTHY_REQUEST, singletonList(new Count()))
- .put(RouterTehutiMetricNameEnum.TARDY_REQUEST, Arrays.asList(new Count(), tardyRequestRate))
- .put(RouterTehutiMetricNameEnum.THROTTLED_REQUEST, singletonList(new Count()))
- .put(RouterTehutiMetricNameEnum.BAD_REQUEST, singletonList(new Count()))
- .build());
+ RouterTehutiMetricNameEnum.UNHEALTHY_REQUEST,
+ singletonList(new Count()));
+
+ tardyRequestMetric = new MetricEntityState(
+ CALL_COUNT.getMetricEntity(),
+ otelRepository,
+ this::registerSensorFinal,
+ RouterTehutiMetricNameEnum.TARDY_REQUEST,
+ Arrays.asList(new Count(), tardyRequestRate));
+
+ throttledRequestMetric = new MetricEntityState(
+ CALL_COUNT.getMetricEntity(),
+ otelRepository,
+ this::registerSensorFinal,
+ RouterTehutiMetricNameEnum.THROTTLED_REQUEST,
+ singletonList(new Count()));
+
+ badRequestMetric = new MetricEntityState(
+ CALL_COUNT.getMetricEntity(),
+ otelRepository,
+ this::registerSensorFinal,
+ RouterTehutiMetricNameEnum.BAD_REQUEST,
+ singletonList(new Count()));
latencyTehutiSensor = registerSensorWithDetailedPercentiles("latency", new Avg(), new Max(0));
- latencyMetric = new MetricEntityState(
+ healthyLatencyMetric = new MetricEntityState(
+ CALL_TIME.getMetricEntity(),
+ otelRepository,
+ this::registerSensorFinal,
+ RouterTehutiMetricNameEnum.HEALTHY_REQUEST_LATENCY,
+ Arrays.asList(
+ new Avg(),
+ new Max(0),
+ TehutiUtils.getPercentileStatForNetworkLatency(
+ getName(),
+ getFullMetricName(RouterTehutiMetricNameEnum.HEALTHY_REQUEST_LATENCY.getMetricName()))));
+
+ unhealthyLatencyMetric = new MetricEntityState(
CALL_TIME.getMetricEntity(),
otelRepository,
this::registerSensorFinal,
- CollectionUtils.>mapBuilder()
- .put(
- RouterTehutiMetricNameEnum.HEALTHY_REQUEST_LATENCY,
- Arrays.asList(
- new Avg(),
- new Max(0),
- TehutiUtils.getPercentileStatForNetworkLatency(
- getName(),
- getFullMetricName(RouterTehutiMetricNameEnum.HEALTHY_REQUEST_LATENCY.getMetricName()))))
- .put(RouterTehutiMetricNameEnum.UNHEALTHY_REQUEST_LATENCY, Arrays.asList(new Avg(), new Max(0)))
- .put(RouterTehutiMetricNameEnum.TARDY_REQUEST_LATENCY, Arrays.asList(new Avg(), new Max(0)))
- .put(RouterTehutiMetricNameEnum.THROTTLED_REQUEST_LATENCY, Arrays.asList(new Avg(), new Max(0)))
- .build());
+ RouterTehutiMetricNameEnum.UNHEALTHY_REQUEST_LATENCY,
+ Arrays.asList(new Avg(), new Max(0)));
+
+ tardyLatencyMetric = new MetricEntityState(
+ CALL_TIME.getMetricEntity(),
+ otelRepository,
+ this::registerSensorFinal,
+ RouterTehutiMetricNameEnum.TARDY_REQUEST_LATENCY,
+ Arrays.asList(new Avg(), new Max(0)));
+
+ throttledLatencyMetric = new MetricEntityState(
+ CALL_TIME.getMetricEntity(),
+ otelRepository,
+ this::registerSensorFinal,
+ RouterTehutiMetricNameEnum.THROTTLED_REQUEST_LATENCY,
+ Arrays.asList(new Avg(), new Max(0)));
retryCountMetric = new MetricEntityState(
RETRY_COUNT.getMetricEntity(),
otelRepository,
this::registerSensorFinal,
- CollectionUtils.>mapBuilder()
- .put(RouterTehutiMetricNameEnum.ERROR_RETRY, singletonList(new Count()))
- .build());
+ RouterTehutiMetricNameEnum.ERROR_RETRY,
+ singletonList(new Count()));
allowedRetryCountMetric = new MetricEntityState(
ALLOWED_RETRY_COUNT.getMetricEntity(),
otelRepository,
this::registerSensorFinal,
- CollectionUtils.>mapBuilder()
- .put(RouterTehutiMetricNameEnum.ALLOWED_RETRY_REQUEST_COUNT, singletonList(new OccurrenceRate()))
- .build());
+ RouterTehutiMetricNameEnum.ALLOWED_RETRY_REQUEST_COUNT,
+ singletonList(new OccurrenceRate()));
disallowedRetryCountMetric = new MetricEntityState(
DISALLOWED_RETRY_COUNT.getMetricEntity(),
otelRepository,
this::registerSensorFinal,
- CollectionUtils.>mapBuilder()
- .put(RouterTehutiMetricNameEnum.DISALLOWED_RETRY_REQUEST_COUNT, singletonList(new OccurrenceRate()))
- .build());
+ RouterTehutiMetricNameEnum.DISALLOWED_RETRY_REQUEST_COUNT,
+ singletonList(new OccurrenceRate()));
retryDelayMetric = new MetricEntityState(
RETRY_DELAY.getMetricEntity(),
otelRepository,
this::registerSensorFinal,
- CollectionUtils.>mapBuilder()
- .put(RouterTehutiMetricNameEnum.RETRY_DELAY, Arrays.asList(new Avg(), new Max()))
- .build());
+ RouterTehutiMetricNameEnum.RETRY_DELAY,
+ Arrays.asList(new Avg(), new Max()));
+
+ delayConstraintAbortedRetryCountMetric = new MetricEntityState(
+ ABORTED_RETRY_COUNT.getMetricEntity(),
+ otelRepository,
+ this::registerSensorFinal,
+ RouterTehutiMetricNameEnum.DELAY_CONSTRAINT_ABORTED_RETRY_REQUEST,
+ singletonList(new Count()));
- abortedRetryCountMetric = new MetricEntityState(
+ slowRouteAbortedRetryCountMetric = new MetricEntityState(
ABORTED_RETRY_COUNT.getMetricEntity(),
otelRepository,
this::registerSensorFinal,
- CollectionUtils.>mapBuilder()
- .put(RouterTehutiMetricNameEnum.DELAY_CONSTRAINT_ABORTED_RETRY_REQUEST, singletonList(new Count()))
- .put(RouterTehutiMetricNameEnum.SLOW_ROUTE_ABORTED_RETRY_REQUEST, singletonList(new Count()))
- .put(RouterTehutiMetricNameEnum.RETRY_ROUTE_LIMIT_ABORTED_RETRY_REQUEST, singletonList(new Count()))
- .put(RouterTehutiMetricNameEnum.NO_AVAILABLE_REPLICA_ABORTED_RETRY_REQUEST, singletonList(new Count()))
- .build());
-
- keyCountMetric = new MetricEntityState(
- CALL_KEY_COUNT.getMetricEntity(),
+ RouterTehutiMetricNameEnum.SLOW_ROUTE_ABORTED_RETRY_REQUEST,
+ singletonList(new Count()));
+
+ retryRouteLimitAbortedRetryCountMetric = new MetricEntityState(
+ ABORTED_RETRY_COUNT.getMetricEntity(),
otelRepository,
this::registerSensorFinal,
- CollectionUtils.>mapBuilder()
- .put(RouterTehutiMetricNameEnum.KEY_NUM, Arrays.asList(new OccurrenceRate(), new Avg(), new Max(0)))
- .put(
- RouterTehutiMetricNameEnum.BAD_REQUEST_KEY_COUNT,
- Arrays.asList(new OccurrenceRate(), new Avg(), new Max(0)))
- .build());
+ RouterTehutiMetricNameEnum.RETRY_ROUTE_LIMIT_ABORTED_RETRY_REQUEST,
+ singletonList(new Count()));
+
+ noAvailableReplicaAbortedRetryCountMetric = new MetricEntityState(
+ ABORTED_RETRY_COUNT.getMetricEntity(),
+ otelRepository,
+ this::registerSensorFinal,
+ RouterTehutiMetricNameEnum.NO_AVAILABLE_REPLICA_ABORTED_RETRY_REQUEST,
+ singletonList(new Count()));
+
+ keyCountMetric = new MetricEntityState(KEY_COUNT.getMetricEntity(), otelRepository);
errorRetryAttemptTriggeredByPendingRequestCheckSensor =
registerSensor("error_retry_attempt_triggered_by_pending_request_check", new OccurrenceRate());
@@ -360,37 +410,43 @@ private String getDimensionName(VeniceMetricsDimensions dimension) {
* types of requests also have their latencies logged at the same time.
*/
public void recordIncomingRequest() {
- incomingRequestMetric.record(RouterTehutiMetricNameEnum.REQUEST, 1, commonMetricDimensions);
+ requestSensor.record(1);
inFlightRequestSensor.record(currentInFlightRequest.incrementAndGet());
totalInflightRequestSensor.record();
}
- public void recordHealthyRequest(Double latency, HttpResponseStatus responseStatus) {
- VeniceResponseStatusCategory veniceResponseStatusCategory = VeniceResponseStatusCategory.HEALTHY;
- recordRequestMetric(RouterTehutiMetricNameEnum.HEALTHY_REQUEST, responseStatus, veniceResponseStatusCategory);
+ Attributes getRequestMetricDimensions(
+ HttpResponseStatus responseStatus,
+ VeniceResponseStatusCategory veniceResponseStatusCategory) {
+ return Attributes.builder()
+ .putAll(commonMetricDimensions)
+ .put(getDimensionName(HTTP_RESPONSE_STATUS_CODE), responseStatus.codeAsText().toString())
+ .put(
+ getDimensionName(HTTP_RESPONSE_STATUS_CODE_CATEGORY),
+ getVeniceHttpResponseStatusCodeCategory(responseStatus))
+ .put(getDimensionName(VENICE_RESPONSE_STATUS_CODE_CATEGORY), veniceResponseStatusCategory.getCategory())
+ .build();
+ }
+
+ public void recordHealthyRequest(Double latency, HttpResponseStatus responseStatus, int keyNum) {
+ Attributes dimensions = getRequestMetricDimensions(responseStatus, VeniceResponseStatusCategory.SUCCESS);
+ healthyRequestMetric.record(1, dimensions);
+ keyCountMetric.record(keyNum, dimensions);
if (latency != null) {
- recordLatencyMetric(
- RouterTehutiMetricNameEnum.HEALTHY_REQUEST_LATENCY,
- latency,
- responseStatus,
- veniceResponseStatusCategory);
+ healthyLatencyMetric.record(latency, dimensions);
}
}
public void recordUnhealthyRequest(HttpResponseStatus responseStatus) {
- recordRequestMetric(
- RouterTehutiMetricNameEnum.UNHEALTHY_REQUEST,
- responseStatus,
- VeniceResponseStatusCategory.UNHEALTHY);
+ Attributes dimensions = getRequestMetricDimensions(responseStatus, VeniceResponseStatusCategory.FAIL);
+ unhealthyRequestMetric.record(1, dimensions);
}
- public void recordUnhealthyRequest(double latency, HttpResponseStatus responseStatus) {
- recordUnhealthyRequest(responseStatus);
- recordLatencyMetric(
- RouterTehutiMetricNameEnum.UNHEALTHY_REQUEST_LATENCY,
- latency,
- responseStatus,
- VeniceResponseStatusCategory.UNHEALTHY);
+ public void recordUnhealthyRequest(double latency, HttpResponseStatus responseStatus, int keyNum) {
+ Attributes dimensions = getRequestMetricDimensions(responseStatus, VeniceResponseStatusCategory.FAIL);
+ unhealthyRequestMetric.record(1, dimensions);
+ keyCountMetric.record(keyNum, dimensions);
+ unhealthyLatencyMetric.record(latency, dimensions);
}
public void recordUnavailableReplicaStreamingRequest() {
@@ -405,23 +461,18 @@ public void recordReadQuotaUsage(int quotaUsage) {
readQuotaUsageSensor.record(quotaUsage);
}
- public void recordTardyRequest(double latency, HttpResponseStatus responseStatus) {
- VeniceResponseStatusCategory veniceResponseStatusCategory = VeniceResponseStatusCategory.TARDY;
- recordRequestMetric(RouterTehutiMetricNameEnum.TARDY_REQUEST, responseStatus, veniceResponseStatusCategory);
- recordLatencyMetric(
- RouterTehutiMetricNameEnum.TARDY_REQUEST_LATENCY,
- latency,
- responseStatus,
- veniceResponseStatusCategory);
+ public void recordTardyRequest(double latency, HttpResponseStatus responseStatus, int keyNum) {
+ Attributes dimensions = getRequestMetricDimensions(responseStatus, VeniceResponseStatusCategory.SUCCESS);
+ tardyRequestMetric.record(1, dimensions);
+ keyCountMetric.record(keyNum, dimensions);
+ tardyLatencyMetric.record(latency, dimensions);
}
- public void recordThrottledRequest(double latency, HttpResponseStatus responseStatus) {
- recordThrottledRequest(responseStatus);
- recordLatencyMetric(
- RouterTehutiMetricNameEnum.THROTTLED_REQUEST_LATENCY,
- latency,
- responseStatus,
- VeniceResponseStatusCategory.THROTTLED);
+ public void recordThrottledRequest(double latency, HttpResponseStatus responseStatus, int keyNum) {
+ Attributes dimensions = getRequestMetricDimensions(responseStatus, VeniceResponseStatusCategory.FAIL);
+ throttledRequestMetric.record(1, dimensions);
+ keyCountMetric.record(keyNum, dimensions);
+ throttledLatencyMetric.record(latency, dimensions);
}
/**
@@ -432,10 +483,8 @@ public void recordThrottledRequest(double latency, HttpResponseStatus responseSt
* TODO: Remove this overload after fixing the above.
*/
public void recordThrottledRequest(HttpResponseStatus responseStatus) {
- recordRequestMetric(
- RouterTehutiMetricNameEnum.THROTTLED_REQUEST,
- responseStatus,
- VeniceResponseStatusCategory.THROTTLED);
+ Attributes dimensions = getRequestMetricDimensions(responseStatus, VeniceResponseStatusCategory.FAIL);
+ throttledRequestMetric.record(1, dimensions);
}
public void recordErrorRetryCount() {
@@ -450,12 +499,10 @@ public void recordRetryTriggeredSensorOtel(RequestRetryType retryType) {
.put(getDimensionName(VENICE_REQUEST_RETRY_TYPE), retryType.getRetryType())
.build();
}
- retryCountMetric.record(RouterTehutiMetricNameEnum.ERROR_RETRY, 1, dimensions);
+ retryCountMetric.record(1, dimensions);
}
- public void recordAbortedRetrySensorOtel(
- TehutiMetricNameEnum tehutiMetricNameEnum,
- RequestRetryAbortReason abortReason) {
+ private Attributes getRetryRequestAbortDimensions(RequestRetryAbortReason abortReason) {
Attributes dimensions = null;
if (emitOpenTelemetryMetrics) {
dimensions = Attributes.builder()
@@ -463,21 +510,32 @@ public void recordAbortedRetrySensorOtel(
.put(getDimensionName(VENICE_REQUEST_RETRY_ABORT_REASON), abortReason.getAbortReason())
.build();
}
- abortedRetryCountMetric.record(tehutiMetricNameEnum, 1, dimensions);
+ return dimensions;
}
- public void recordBadRequest(HttpResponseStatus responseStatus) {
- recordRequestMetric(
- RouterTehutiMetricNameEnum.BAD_REQUEST,
- responseStatus,
- VeniceResponseStatusCategory.BAD_REQUEST);
+ public void recordDelayConstraintAbortedRetryCountMetric() {
+ Attributes dimensions = getRetryRequestAbortDimensions(DELAY_CONSTRAINT);
+ delayConstraintAbortedRetryCountMetric.record(1, dimensions);
+ }
+
+ public void recordSlowRouteAbortedRetryCountMetric() {
+ Attributes dimensions = getRetryRequestAbortDimensions(SLOW_ROUTE);
+ slowRouteAbortedRetryCountMetric.record(1, dimensions);
}
- public void recordBadRequestKeyCount(int keyCount) {
- recordKeyCountMetric(
- RouterTehutiMetricNameEnum.BAD_REQUEST_KEY_COUNT,
- keyCount,
- RequestValidationOutcome.INVALID_KEY_COUNT_LIMIT_EXCEEDED);
+ public void recordRetryRouteLimitAbortedRetryCountMetric() {
+ Attributes dimensions = getRetryRequestAbortDimensions(MAX_RETRY_ROUTE_LIMIT);
+ retryRouteLimitAbortedRetryCountMetric.record(1, dimensions);
+ }
+
+ public void recordNoAvailableReplicaAbortedRetryCountMetric() {
+ Attributes dimensions = getRetryRequestAbortDimensions(NO_AVAILABLE_REPLICA);
+ noAvailableReplicaAbortedRetryCountMetric.record(1, dimensions);
+ }
+
+ public void recordBadRequest(HttpResponseStatus responseStatus) {
+ Attributes dimensions = getRequestMetricDimensions(responseStatus, VeniceResponseStatusCategory.FAIL);
+ badRequestMetric.record(1, dimensions);
}
public void recordRequestThrottledByRouterCapacity() {
@@ -494,43 +552,6 @@ public void recordLatency(double latency) {
latencyTehutiSensor.record(latency);
}
- public void recordLatencyMetric(
- TehutiMetricNameEnum tehutiMetricNameEnum,
- double latency,
- HttpResponseStatus responseStatus,
- VeniceResponseStatusCategory veniceResponseStatusCategory) {
- Attributes dimensions = null;
- if (emitOpenTelemetryMetrics) {
- dimensions = Attributes.builder()
- .putAll(commonMetricDimensions)
- // Don't add HTTP_RESPONSE_STATUS_CODE to reduce the cardinality for histogram
- .put(
- getDimensionName(HTTP_RESPONSE_STATUS_CODE_CATEGORY),
- getVeniceHttpResponseStatusCodeCategory(responseStatus))
- .put(getDimensionName(VENICE_RESPONSE_STATUS_CODE_CATEGORY), veniceResponseStatusCategory.getCategory())
- .build();
- }
- latencyMetric.record(tehutiMetricNameEnum, latency, dimensions);
- }
-
- public void recordRequestMetric(
- TehutiMetricNameEnum tehutiMetricNameEnum,
- HttpResponseStatus responseStatus,
- VeniceResponseStatusCategory veniceResponseStatusCategory) {
- Attributes dimensions = null;
- if (emitOpenTelemetryMetrics) {
- dimensions = Attributes.builder()
- .putAll(commonMetricDimensions)
- .put(
- getDimensionName(HTTP_RESPONSE_STATUS_CODE_CATEGORY),
- getVeniceHttpResponseStatusCodeCategory(responseStatus))
- .put(getDimensionName(VENICE_RESPONSE_STATUS_CODE_CATEGORY), veniceResponseStatusCategory.getCategory())
- .put(getDimensionName(HTTP_RESPONSE_STATUS_CODE), responseStatus.codeAsText().toString())
- .build();
- }
- requestMetric.record(tehutiMetricNameEnum, 1, dimensions);
- }
-
public void recordResponseWaitingTime(double waitingTime) {
routerResponseWaitingTimeSensor.record(waitingTime);
}
@@ -559,22 +580,13 @@ public void recordFindUnhealthyHostRequest() {
findUnhealthyHostRequestSensor.record();
}
- public void recordKeyNum(int keyNum) {
- recordKeyCountMetric(RouterTehutiMetricNameEnum.KEY_NUM, keyNum, RequestValidationOutcome.VALID);
+ public void recordIncomingKeyCountMetric(int keyNum) {
+ keyNumSensor.record(keyNum);
}
- public void recordKeyCountMetric(
- TehutiMetricNameEnum tehutiMetricNameEnum,
- int keyNum,
- RequestValidationOutcome outcome) {
- Attributes dimensions = null;
- if (emitOpenTelemetryMetrics) {
- dimensions = Attributes.builder()
- .putAll(commonMetricDimensions)
- .put(getDimensionName(VENICE_REQUEST_VALIDATION_OUTCOME), outcome.getOutcome())
- .build();
- }
- keyCountMetric.record(tehutiMetricNameEnum, keyNum, dimensions);
+ public void recordIncomingBadRequestKeyCountMetric(HttpResponseStatus responseStatus, int keyNum) {
+ badRequestKeyCountSensor.record(keyNum);
+ keyCountMetric.record(keyNum, getRequestMetricDimensions(responseStatus, VeniceResponseStatusCategory.FAIL));
}
public void recordRequestUsage(int usage) {
@@ -597,30 +609,6 @@ public void recordUnavailableRequest() {
unAvailableRequestSensor.record();
}
- public void recordDelayConstraintAbortedRetryRequest() {
- recordAbortedRetrySensorOtel(
- RouterTehutiMetricNameEnum.DELAY_CONSTRAINT_ABORTED_RETRY_REQUEST,
- RequestRetryAbortReason.DELAY_CONSTRAINT);
- }
-
- public void recordSlowRouteAbortedRetryRequest() {
- recordAbortedRetrySensorOtel(
- RouterTehutiMetricNameEnum.SLOW_ROUTE_ABORTED_RETRY_REQUEST,
- RequestRetryAbortReason.SLOW_ROUTE);
- }
-
- public void recordRetryRouteLimitAbortedRetryRequest() {
- recordAbortedRetrySensorOtel(
- RouterTehutiMetricNameEnum.RETRY_ROUTE_LIMIT_ABORTED_RETRY_REQUEST,
- RequestRetryAbortReason.MAX_RETRY_ROUTE_LIMIT);
- }
-
- public void recordNoAvailableReplicaAbortedRetryRequest() {
- recordAbortedRetrySensorOtel(
- RouterTehutiMetricNameEnum.NO_AVAILABLE_REPLICA_ABORTED_RETRY_REQUEST,
- RequestRetryAbortReason.NO_AVAILABLE_REPLICA);
- }
-
public void recordKeySizeInByte(long keySize) {
if (keySizeSensor != null) {
keySizeSensor.record(keySize);
@@ -637,12 +625,11 @@ public void recordResponse() {
}
public void recordAllowedRetryRequest() {
- allowedRetryCountMetric.record(RouterTehutiMetricNameEnum.ALLOWED_RETRY_REQUEST_COUNT, 1, commonMetricDimensions);
+ allowedRetryCountMetric.record(1, commonMetricDimensions);
}
public void recordDisallowedRetryRequest() {
- disallowedRetryCountMetric
- .record(RouterTehutiMetricNameEnum.DISALLOWED_RETRY_REQUEST_COUNT, 1, commonMetricDimensions);
+ disallowedRetryCountMetric.record(1, commonMetricDimensions);
}
public void recordErrorRetryAttemptTriggeredByPendingRequestCheck() {
@@ -650,7 +637,7 @@ public void recordErrorRetryAttemptTriggeredByPendingRequestCheck() {
}
public void recordRetryDelay(double delay) {
- retryDelayMetric.record(RouterTehutiMetricNameEnum.RETRY_DELAY, delay, commonMetricDimensions);
+ retryDelayMetric.record(delay, commonMetricDimensions);
}
public void recordMetaStoreShadowRead() {
@@ -695,8 +682,6 @@ Attributes getCommonMetricDimensions() {
* Metric names for tehuti metrics used in this class
*/
enum RouterTehutiMetricNameEnum implements TehutiMetricNameEnum {
- /** for {@link RouterMetricEntity#INCOMING_CALL_COUNT} */
- REQUEST,
/** for {@link RouterMetricEntity#CALL_COUNT} */
HEALTHY_REQUEST, UNHEALTHY_REQUEST, TARDY_REQUEST, THROTTLED_REQUEST, BAD_REQUEST,
/** for {@link RouterMetricEntity#CALL_TIME} */
@@ -711,9 +696,7 @@ enum RouterTehutiMetricNameEnum implements TehutiMetricNameEnum {
RETRY_DELAY,
/** for {@link RouterMetricEntity#ABORTED_RETRY_COUNT} */
DELAY_CONSTRAINT_ABORTED_RETRY_REQUEST, SLOW_ROUTE_ABORTED_RETRY_REQUEST, RETRY_ROUTE_LIMIT_ABORTED_RETRY_REQUEST,
- NO_AVAILABLE_REPLICA_ABORTED_RETRY_REQUEST,
- /** for {@link RouterMetricEntity#CALL_KEY_COUNT} */
- KEY_NUM, BAD_REQUEST_KEY_COUNT;
+ NO_AVAILABLE_REPLICA_ABORTED_RETRY_REQUEST;
private final String metricName;
diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterMetricEntity.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterMetricEntity.java
index df0d5094071..3cda3c1ecea 100644
--- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterMetricEntity.java
+++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterMetricEntity.java
@@ -6,7 +6,6 @@
import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_REQUEST_METHOD;
import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_REQUEST_RETRY_ABORT_REASON;
import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_REQUEST_RETRY_TYPE;
-import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_REQUEST_VALIDATION_OUTCOME;
import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_RESPONSE_STATUS_CODE_CATEGORY;
import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_STORE_NAME;
import static com.linkedin.venice.utils.Utils.setOf;
@@ -22,12 +21,11 @@
* List all Metric entities for router
*/
public enum RouterMetricEntity {
- INCOMING_CALL_COUNT(
- MetricType.COUNTER, MetricUnit.NUMBER, "Count of all incoming requests",
- setOf(VENICE_STORE_NAME, VENICE_CLUSTER_NAME, VENICE_REQUEST_METHOD)
- ),
+ /**
+ * Count of all requests during response handling along with response codes
+ */
CALL_COUNT(
- MetricType.COUNTER, MetricUnit.NUMBER, "Count of all requests with response details",
+ MetricType.COUNTER, MetricUnit.NUMBER, "Count of all requests during response handling along with response codes",
setOf(
VENICE_STORE_NAME,
VENICE_CLUSTER_NAME,
@@ -36,38 +34,66 @@ public enum RouterMetricEntity {
HTTP_RESPONSE_STATUS_CODE_CATEGORY,
VENICE_RESPONSE_STATUS_CODE_CATEGORY)
),
+ /**
+ * Latency based on all responses
+ */
CALL_TIME(
MetricType.HISTOGRAM, MetricUnit.MILLISECOND, "Latency based on all responses",
setOf(
VENICE_STORE_NAME,
VENICE_CLUSTER_NAME,
VENICE_REQUEST_METHOD,
+ HTTP_RESPONSE_STATUS_CODE,
HTTP_RESPONSE_STATUS_CODE_CATEGORY,
VENICE_RESPONSE_STATUS_CODE_CATEGORY)
),
- CALL_KEY_COUNT(
- MetricType.MIN_MAX_COUNT_SUM_AGGREGATIONS, MetricUnit.NUMBER, "Count of keys in multi key requests",
- setOf(VENICE_STORE_NAME, VENICE_CLUSTER_NAME, VENICE_REQUEST_METHOD, VENICE_REQUEST_VALIDATION_OUTCOME)
+ /**
+ * Count of keys during response handling along with response codes
+ */
+ KEY_COUNT(
+ MetricType.HISTOGRAM, MetricUnit.NUMBER, "Count of keys during response handling along with response codes",
+ setOf(
+ VENICE_STORE_NAME,
+ VENICE_CLUSTER_NAME,
+ VENICE_REQUEST_METHOD,
+ HTTP_RESPONSE_STATUS_CODE,
+ HTTP_RESPONSE_STATUS_CODE_CATEGORY,
+ VENICE_RESPONSE_STATUS_CODE_CATEGORY)
),
+ /**
+ * Count of retries triggered
+ */
RETRY_COUNT(
MetricType.COUNTER, MetricUnit.NUMBER, "Count of retries triggered",
setOf(VENICE_STORE_NAME, VENICE_CLUSTER_NAME, VENICE_REQUEST_METHOD, VENICE_REQUEST_RETRY_TYPE)
),
+ /**
+ * Count of allowed retry requests
+ */
ALLOWED_RETRY_COUNT(
MetricType.COUNTER, MetricUnit.NUMBER, "Count of allowed retry requests",
setOf(VENICE_STORE_NAME, VENICE_CLUSTER_NAME, VENICE_REQUEST_METHOD)
),
+ /**
+ * Count of disallowed retry requests
+ */
DISALLOWED_RETRY_COUNT(
MetricType.COUNTER, MetricUnit.NUMBER, "Count of disallowed retry requests",
setOf(VENICE_STORE_NAME, VENICE_CLUSTER_NAME, VENICE_REQUEST_METHOD)
),
- RETRY_DELAY(
- MetricType.MIN_MAX_COUNT_SUM_AGGREGATIONS, MetricUnit.MILLISECOND, "Retry delay time",
- setOf(VENICE_STORE_NAME, VENICE_CLUSTER_NAME, VENICE_REQUEST_METHOD)
- ),
+ /**
+ * Count of aborted retry requests
+ */
ABORTED_RETRY_COUNT(
MetricType.COUNTER, MetricUnit.NUMBER, "Count of aborted retry requests",
setOf(VENICE_STORE_NAME, VENICE_CLUSTER_NAME, VENICE_REQUEST_METHOD, VENICE_REQUEST_RETRY_ABORT_REASON)
+ ),
+ /**
+ * Retry delay time: Time in milliseconds between the original request and the retry request
+ */
+ RETRY_DELAY(
+ MetricType.MIN_MAX_COUNT_SUM_AGGREGATIONS, MetricUnit.MILLISECOND, "Retry delay time",
+ setOf(VENICE_STORE_NAME, VENICE_CLUSTER_NAME, VENICE_REQUEST_METHOD)
);
private final MetricEntity metricEntity;
diff --git a/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVenicePathParser.java b/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVenicePathParser.java
index 3ba47ad8f5b..88ed517bedf 100644
--- a/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVenicePathParser.java
+++ b/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVenicePathParser.java
@@ -46,6 +46,7 @@
import io.netty.handler.codec.http.EmptyHttpHeaders;
import io.netty.handler.codec.http.HttpHeaders;
import io.netty.handler.codec.http.HttpMethod;
+import io.netty.handler.codec.http.HttpResponseStatus;
import io.netty.handler.codec.http.HttpVersion;
import java.nio.ByteBuffer;
import java.util.ArrayList;
@@ -306,7 +307,8 @@ public void parseRequestWithBatchSizeViolation() throws RouterException {
fail("A RouterException should be thrown here");
} catch (RouterException e) {
// expected and validate bad request metric
- verify(multiGetStats, times(1)).recordBadRequestKeyCount(storeName, maxKeyCount + 1);
+ verify(multiGetStats, times(1))
+ .recordBadRequestKeyCount(storeName, HttpResponseStatus.REQUEST_ENTITY_TOO_LARGE, maxKeyCount + 1);
} catch (Throwable t) {
t.printStackTrace();
fail("Only RouterException is expected, but got: " + t.getClass());
diff --git a/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVeniceResponseAggregator.java b/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVeniceResponseAggregator.java
index bb97fca491a..883dbeb6737 100644
--- a/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVeniceResponseAggregator.java
+++ b/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVeniceResponseAggregator.java
@@ -41,6 +41,7 @@
import io.netty.handler.codec.http.HttpMethod;
import io.netty.handler.codec.http.HttpResponseStatus;
import io.netty.handler.codec.http.HttpVersion;
+import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
@@ -168,6 +169,11 @@ public void testBuildResponseForMultiGet() {
VenicePath path = getPath(storeName, RequestType.MULTI_GET, mockRouterStat, request, compressorFactory);
when(path.getChunkedResponse()).thenReturn(null); // non-streaming
metrics.setPath(path);
+ List partitionKeys = new ArrayList<>();
+ partitionKeys.add(new RouterKey("key1".getBytes(StandardCharsets.UTF_8)));
+ partitionKeys.add(new RouterKey("key2".getBytes(StandardCharsets.UTF_8)));
+ partitionKeys.add(new RouterKey("key3".getBytes(StandardCharsets.UTF_8)));
+ doReturn(partitionKeys).when(path).getPartitionKeys();
VeniceResponseAggregator responseAggregator = new VeniceResponseAggregator(mockRouterStat, Optional.empty());
FullHttpResponse finalResponse = responseAggregator.buildResponse(request, metrics, gatheredResponses);
@@ -221,7 +227,7 @@ public void testBuildResponseForMultiGet() {
FullHttpResponse response5 = buildFullHttpResponse(TOO_MANY_REQUESTS, new byte[0], headers);
metrics.setMetric(MetricNames.ROUTER_SERVER_TIME, new TimeValue(1, TimeUnit.MILLISECONDS));
responseAggregator.buildResponse(request, metrics, Collections.singletonList(response5));
- verify(mockStatsForMultiGet).recordThrottledRequest(storeName, 1.0, TOO_MANY_REQUESTS);
+ verify(mockStatsForMultiGet).recordThrottledRequest(storeName, 1.0, TOO_MANY_REQUESTS, 3);
}
@Test
diff --git a/services/venice-router/src/test/java/com/linkedin/venice/router/stats/AggRouterHttpRequestStatsTest.java b/services/venice-router/src/test/java/com/linkedin/venice/router/stats/AggRouterHttpRequestStatsTest.java
index cebfcc14424..f4a84d5622b 100644
--- a/services/venice-router/src/test/java/com/linkedin/venice/router/stats/AggRouterHttpRequestStatsTest.java
+++ b/services/venice-router/src/test/java/com/linkedin/venice/router/stats/AggRouterHttpRequestStatsTest.java
@@ -45,8 +45,8 @@ public void testAggRouterMetrics() {
Assert.assertNotNull(metricsRepository.getMetric(".store1--request.Count"));
Assert.assertEquals(reporter.query(".store1--request.Count").value(), 1d);
- stats.recordThrottledRequest("store1", 1.0, TOO_MANY_REQUESTS);
- stats.recordThrottledRequest("store2", 1.0, TOO_MANY_REQUESTS);
+ stats.recordThrottledRequest("store1", 1.0, TOO_MANY_REQUESTS, 1);
+ stats.recordThrottledRequest("store2", 1.0, TOO_MANY_REQUESTS, 1);
stats.recordErrorRetryCount("store1");
Assert.assertEquals(reporter.query(".total--request.Count").value(), 2d);
Assert.assertEquals(reporter.query(".store1--request.Count").value(), 1d);
@@ -108,8 +108,8 @@ public void testDisableMultiGetStoreMetrics() {
String storeName = Utils.getUniqueString("test-store");
multiGetStats.recordRequest(storeName);
streamingMultiGetStats.recordRequest(storeName);
- multiGetStats.recordHealthyRequest(storeName, 10, HttpResponseStatus.OK);
- streamingMultiGetStats.recordHealthyRequest(storeName, 10, HttpResponseStatus.OK);
+ multiGetStats.recordHealthyRequest(storeName, 10, HttpResponseStatus.OK, 1);
+ streamingMultiGetStats.recordHealthyRequest(storeName, 10, HttpResponseStatus.OK, 1);
// Total stats should exist for streaming and non-streaming multi-get
Assert.assertEquals((int) reporter.query(".total--multiget_request.Count").value(), 1);
Assert.assertEquals((int) reporter.query(".total--multiget_streaming_request.Count").value(), 1);
diff --git a/services/venice-router/src/test/java/com/linkedin/venice/router/stats/RouterHttpRequestStatsTest.java b/services/venice-router/src/test/java/com/linkedin/venice/router/stats/RouterHttpRequestStatsTest.java
index 363fef8416e..7f711dcff23 100644
--- a/services/venice-router/src/test/java/com/linkedin/venice/router/stats/RouterHttpRequestStatsTest.java
+++ b/services/venice-router/src/test/java/com/linkedin/venice/router/stats/RouterHttpRequestStatsTest.java
@@ -78,7 +78,7 @@ public void routerMetricsTest(boolean useVeniceMetricRepository, boolean isOtelE
assertNull(routerHttpRequestStats.getCommonMetricDimensions());
}
- routerHttpRequestStats.recordHealthyRequest(1.0, HttpResponseStatus.OK);
+ routerHttpRequestStats.recordHealthyRequest(1.0, HttpResponseStatus.OK, 1);
assertEquals(
metricsRepository.getMetric("." + storeName + "--" + HEALTHY_REQUEST.getMetricName() + ".Count").value(),
1.0);
diff --git a/services/venice-router/src/test/java/com/linkedin/venice/router/stats/RouterMetricEntityTest.java b/services/venice-router/src/test/java/com/linkedin/venice/router/stats/RouterMetricEntityTest.java
index 8755ba2aeb7..c3ac31c6c9e 100644
--- a/services/venice-router/src/test/java/com/linkedin/venice/router/stats/RouterMetricEntityTest.java
+++ b/services/venice-router/src/test/java/com/linkedin/venice/router/stats/RouterMetricEntityTest.java
@@ -21,25 +21,13 @@ public class RouterMetricEntityTest {
@Test
public void testRouterMetricEntities() {
Map expectedMetrics = new HashMap<>();
-
- expectedMetrics.put(
- RouterMetricEntity.INCOMING_CALL_COUNT,
- new MetricEntity(
- "incoming_call_count",
- MetricType.COUNTER,
- MetricUnit.NUMBER,
- "Count of all incoming requests",
- Utils.setOf(
- VeniceMetricsDimensions.VENICE_STORE_NAME,
- VeniceMetricsDimensions.VENICE_CLUSTER_NAME,
- VeniceMetricsDimensions.VENICE_REQUEST_METHOD)));
expectedMetrics.put(
RouterMetricEntity.CALL_COUNT,
new MetricEntity(
"call_count",
MetricType.COUNTER,
MetricUnit.NUMBER,
- "Count of all requests with response details",
+ "Count of all requests during response handling along with response codes",
Utils.setOf(
VeniceMetricsDimensions.VENICE_STORE_NAME,
VeniceMetricsDimensions.VENICE_CLUSTER_NAME,
@@ -58,20 +46,23 @@ public void testRouterMetricEntities() {
VeniceMetricsDimensions.VENICE_STORE_NAME,
VeniceMetricsDimensions.VENICE_CLUSTER_NAME,
VeniceMetricsDimensions.VENICE_REQUEST_METHOD,
+ VeniceMetricsDimensions.HTTP_RESPONSE_STATUS_CODE,
VeniceMetricsDimensions.HTTP_RESPONSE_STATUS_CODE_CATEGORY,
VeniceMetricsDimensions.VENICE_RESPONSE_STATUS_CODE_CATEGORY)));
expectedMetrics.put(
- RouterMetricEntity.CALL_KEY_COUNT,
+ RouterMetricEntity.KEY_COUNT,
new MetricEntity(
- "call_key_count",
- MetricType.MIN_MAX_COUNT_SUM_AGGREGATIONS,
+ "key_count",
+ MetricType.HISTOGRAM,
MetricUnit.NUMBER,
- "Count of keys in multi key requests",
+ "Count of keys during response handling along with response codes",
Utils.setOf(
VeniceMetricsDimensions.VENICE_STORE_NAME,
VeniceMetricsDimensions.VENICE_CLUSTER_NAME,
VeniceMetricsDimensions.VENICE_REQUEST_METHOD,
- VeniceMetricsDimensions.VENICE_REQUEST_VALIDATION_OUTCOME)));
+ VeniceMetricsDimensions.HTTP_RESPONSE_STATUS_CODE,
+ VeniceMetricsDimensions.HTTP_RESPONSE_STATUS_CODE_CATEGORY,
+ VeniceMetricsDimensions.VENICE_RESPONSE_STATUS_CODE_CATEGORY)));
expectedMetrics.put(
RouterMetricEntity.RETRY_COUNT,
new MetricEntity(