diff --git a/.chloggen/k8sclusterreceiver-pod-status-reason.yaml b/.chloggen/k8sclusterreceiver-pod-status-reason.yaml new file mode 100755 index 000000000000..7b22578cae6e --- /dev/null +++ b/.chloggen/k8sclusterreceiver-pod-status-reason.yaml @@ -0,0 +1,20 @@ +# Use this changelog template to create an entry for release notes. +# If your change doesn't affect end users, such as a test fix or a tooling change, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: k8sclusterreceiver + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: "Add k8s.pod.status_reason option metric" + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [24034] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: diff --git a/receiver/k8sclusterreceiver/documentation.md b/receiver/k8sclusterreceiver/documentation.md index 87b52fd36a9f..a5d86f596433 100644 --- a/receiver/k8sclusterreceiver/documentation.md +++ b/receiver/k8sclusterreceiver/documentation.md @@ -386,6 +386,24 @@ The usage for a particular resource with a configured limit. | ---- | ----------- | ------ | | resource | the name of the resource on which the quota is applied | Any Str | +## Optional Metrics + +The following metrics are not emitted by default. Each of them can be enabled by applying the following configuration: + +```yaml +metrics: + : + enabled: true +``` + +### k8s.pod.status_reason + +Current status reason of the pod (1 - Evicted, 2 - NodeAffinity, 3 - NodeLost, 4 - Shutdown, 5 - UnexpectedAdmissionError, 6 - Unknown) + +| Unit | Metric Type | Value Type | +| ---- | ----------- | ---------- | +| 1 | Gauge | Int | + ## Resource Attributes | Name | Description | Values | Enabled | diff --git a/receiver/k8sclusterreceiver/internal/metadata/generated_config.go b/receiver/k8sclusterreceiver/internal/metadata/generated_config.go index d9cd05d91c50..71410bc4ce9f 100644 --- a/receiver/k8sclusterreceiver/internal/metadata/generated_config.go +++ b/receiver/k8sclusterreceiver/internal/metadata/generated_config.go @@ -53,6 +53,7 @@ type MetricsConfig struct { K8sJobSuccessfulPods MetricConfig `mapstructure:"k8s.job.successful_pods"` K8sNamespacePhase MetricConfig `mapstructure:"k8s.namespace.phase"` K8sPodPhase MetricConfig `mapstructure:"k8s.pod.phase"` + K8sPodStatusReason MetricConfig `mapstructure:"k8s.pod.status_reason"` K8sReplicasetAvailable MetricConfig `mapstructure:"k8s.replicaset.available"` K8sReplicasetDesired MetricConfig `mapstructure:"k8s.replicaset.desired"` K8sReplicationControllerAvailable MetricConfig `mapstructure:"k8s.replication_controller.available"` @@ -155,6 +156,9 @@ func DefaultMetricsConfig() MetricsConfig { K8sPodPhase: MetricConfig{ Enabled: true, }, + K8sPodStatusReason: MetricConfig{ + Enabled: false, + }, K8sReplicasetAvailable: MetricConfig{ Enabled: true, }, diff --git a/receiver/k8sclusterreceiver/internal/metadata/generated_config_test.go b/receiver/k8sclusterreceiver/internal/metadata/generated_config_test.go index 4dde07183eb4..b7e273b492ff 100644 --- a/receiver/k8sclusterreceiver/internal/metadata/generated_config_test.go +++ b/receiver/k8sclusterreceiver/internal/metadata/generated_config_test.go @@ -54,6 +54,7 @@ func TestMetricsBuilderConfig(t *testing.T) { K8sJobSuccessfulPods: MetricConfig{Enabled: true}, K8sNamespacePhase: MetricConfig{Enabled: true}, K8sPodPhase: MetricConfig{Enabled: true}, + K8sPodStatusReason: MetricConfig{Enabled: true}, K8sReplicasetAvailable: MetricConfig{Enabled: true}, K8sReplicasetDesired: MetricConfig{Enabled: true}, K8sReplicationControllerAvailable: MetricConfig{Enabled: true}, @@ -136,6 +137,7 @@ func TestMetricsBuilderConfig(t *testing.T) { K8sJobSuccessfulPods: MetricConfig{Enabled: false}, K8sNamespacePhase: MetricConfig{Enabled: false}, K8sPodPhase: MetricConfig{Enabled: false}, + K8sPodStatusReason: MetricConfig{Enabled: false}, K8sReplicasetAvailable: MetricConfig{Enabled: false}, K8sReplicasetDesired: MetricConfig{Enabled: false}, K8sReplicationControllerAvailable: MetricConfig{Enabled: false}, diff --git a/receiver/k8sclusterreceiver/internal/metadata/generated_metrics.go b/receiver/k8sclusterreceiver/internal/metadata/generated_metrics.go index cb5f1ba3cd20..d903567ddec4 100644 --- a/receiver/k8sclusterreceiver/internal/metadata/generated_metrics.go +++ b/receiver/k8sclusterreceiver/internal/metadata/generated_metrics.go @@ -1384,6 +1384,55 @@ func newMetricK8sPodPhase(cfg MetricConfig) metricK8sPodPhase { return m } +type metricK8sPodStatusReason struct { + data pmetric.Metric // data buffer for generated metric. + config MetricConfig // metric config provided by user. + capacity int // max observed number of data points added to the metric. +} + +// init fills k8s.pod.status_reason metric with initial data. +func (m *metricK8sPodStatusReason) init() { + m.data.SetName("k8s.pod.status_reason") + m.data.SetDescription("Current status reason of the pod (1 - Evicted, 2 - NodeAffinity, 3 - NodeLost, 4 - Shutdown, 5 - UnexpectedAdmissionError, 6 - Unknown)") + m.data.SetUnit("1") + m.data.SetEmptyGauge() +} + +func (m *metricK8sPodStatusReason) recordDataPoint(start pcommon.Timestamp, ts pcommon.Timestamp, val int64) { + if !m.config.Enabled { + return + } + dp := m.data.Gauge().DataPoints().AppendEmpty() + dp.SetStartTimestamp(start) + dp.SetTimestamp(ts) + dp.SetIntValue(val) +} + +// updateCapacity saves max length of data point slices that will be used for the slice capacity. +func (m *metricK8sPodStatusReason) updateCapacity() { + if m.data.Gauge().DataPoints().Len() > m.capacity { + m.capacity = m.data.Gauge().DataPoints().Len() + } +} + +// emit appends recorded metric data to a metrics slice and prepares it for recording another set of data points. +func (m *metricK8sPodStatusReason) emit(metrics pmetric.MetricSlice) { + if m.config.Enabled && m.data.Gauge().DataPoints().Len() > 0 { + m.updateCapacity() + m.data.MoveTo(metrics.AppendEmpty()) + m.init() + } +} + +func newMetricK8sPodStatusReason(cfg MetricConfig) metricK8sPodStatusReason { + m := metricK8sPodStatusReason{config: cfg} + if cfg.Enabled { + m.data = pmetric.NewMetric() + m.init() + } + return m +} + type metricK8sReplicasetAvailable struct { data pmetric.Metric // data buffer for generated metric. config MetricConfig // metric config provided by user. @@ -2120,6 +2169,7 @@ type MetricsBuilder struct { metricK8sJobSuccessfulPods metricK8sJobSuccessfulPods metricK8sNamespacePhase metricK8sNamespacePhase metricK8sPodPhase metricK8sPodPhase + metricK8sPodStatusReason metricK8sPodStatusReason metricK8sReplicasetAvailable metricK8sReplicasetAvailable metricK8sReplicasetDesired metricK8sReplicasetDesired metricK8sReplicationControllerAvailable metricK8sReplicationControllerAvailable @@ -2180,6 +2230,7 @@ func NewMetricsBuilder(mbc MetricsBuilderConfig, settings receiver.CreateSetting metricK8sJobSuccessfulPods: newMetricK8sJobSuccessfulPods(mbc.Metrics.K8sJobSuccessfulPods), metricK8sNamespacePhase: newMetricK8sNamespacePhase(mbc.Metrics.K8sNamespacePhase), metricK8sPodPhase: newMetricK8sPodPhase(mbc.Metrics.K8sPodPhase), + metricK8sPodStatusReason: newMetricK8sPodStatusReason(mbc.Metrics.K8sPodStatusReason), metricK8sReplicasetAvailable: newMetricK8sReplicasetAvailable(mbc.Metrics.K8sReplicasetAvailable), metricK8sReplicasetDesired: newMetricK8sReplicasetDesired(mbc.Metrics.K8sReplicasetDesired), metricK8sReplicationControllerAvailable: newMetricK8sReplicationControllerAvailable(mbc.Metrics.K8sReplicationControllerAvailable), @@ -2284,6 +2335,7 @@ func (mb *MetricsBuilder) EmitForResource(rmo ...ResourceMetricsOption) { mb.metricK8sJobSuccessfulPods.emit(ils.Metrics()) mb.metricK8sNamespacePhase.emit(ils.Metrics()) mb.metricK8sPodPhase.emit(ils.Metrics()) + mb.metricK8sPodStatusReason.emit(ils.Metrics()) mb.metricK8sReplicasetAvailable.emit(ils.Metrics()) mb.metricK8sReplicasetDesired.emit(ils.Metrics()) mb.metricK8sReplicationControllerAvailable.emit(ils.Metrics()) @@ -2458,6 +2510,11 @@ func (mb *MetricsBuilder) RecordK8sPodPhaseDataPoint(ts pcommon.Timestamp, val i mb.metricK8sPodPhase.recordDataPoint(mb.startTime, ts, val) } +// RecordK8sPodStatusReasonDataPoint adds a data point to k8s.pod.status_reason metric. +func (mb *MetricsBuilder) RecordK8sPodStatusReasonDataPoint(ts pcommon.Timestamp, val int64) { + mb.metricK8sPodStatusReason.recordDataPoint(mb.startTime, ts, val) +} + // RecordK8sReplicasetAvailableDataPoint adds a data point to k8s.replicaset.available metric. func (mb *MetricsBuilder) RecordK8sReplicasetAvailableDataPoint(ts pcommon.Timestamp, val int64) { mb.metricK8sReplicasetAvailable.recordDataPoint(mb.startTime, ts, val) diff --git a/receiver/k8sclusterreceiver/internal/metadata/generated_metrics_test.go b/receiver/k8sclusterreceiver/internal/metadata/generated_metrics_test.go index 5521b7db9d45..ed55b0b9d5a6 100644 --- a/receiver/k8sclusterreceiver/internal/metadata/generated_metrics_test.go +++ b/receiver/k8sclusterreceiver/internal/metadata/generated_metrics_test.go @@ -166,6 +166,9 @@ func TestMetricsBuilder(t *testing.T) { allMetricsCount++ mb.RecordK8sPodPhaseDataPoint(ts, 1) + allMetricsCount++ + mb.RecordK8sPodStatusReasonDataPoint(ts, 1) + defaultMetricsCount++ allMetricsCount++ mb.RecordK8sReplicasetAvailableDataPoint(ts, 1) @@ -612,6 +615,18 @@ func TestMetricsBuilder(t *testing.T) { assert.Equal(t, ts, dp.Timestamp()) assert.Equal(t, pmetric.NumberDataPointValueTypeInt, dp.ValueType()) assert.Equal(t, int64(1), dp.IntValue()) + case "k8s.pod.status_reason": + assert.False(t, validatedMetrics["k8s.pod.status_reason"], "Found a duplicate in the metrics slice: k8s.pod.status_reason") + validatedMetrics["k8s.pod.status_reason"] = true + assert.Equal(t, pmetric.MetricTypeGauge, ms.At(i).Type()) + assert.Equal(t, 1, ms.At(i).Gauge().DataPoints().Len()) + assert.Equal(t, "Current status reason of the pod (1 - Evicted, 2 - NodeAffinity, 3 - NodeLost, 4 - Shutdown, 5 - UnexpectedAdmissionError, 6 - Unknown)", ms.At(i).Description()) + assert.Equal(t, "1", ms.At(i).Unit()) + dp := ms.At(i).Gauge().DataPoints().At(0) + assert.Equal(t, start, dp.StartTimestamp()) + assert.Equal(t, ts, dp.Timestamp()) + assert.Equal(t, pmetric.NumberDataPointValueTypeInt, dp.ValueType()) + assert.Equal(t, int64(1), dp.IntValue()) case "k8s.replicaset.available": assert.False(t, validatedMetrics["k8s.replicaset.available"], "Found a duplicate in the metrics slice: k8s.replicaset.available") validatedMetrics["k8s.replicaset.available"] = true diff --git a/receiver/k8sclusterreceiver/internal/metadata/testdata/config.yaml b/receiver/k8sclusterreceiver/internal/metadata/testdata/config.yaml index 5e7149058ed5..afca964cf6b4 100644 --- a/receiver/k8sclusterreceiver/internal/metadata/testdata/config.yaml +++ b/receiver/k8sclusterreceiver/internal/metadata/testdata/config.yaml @@ -57,6 +57,8 @@ all_set: enabled: true k8s.pod.phase: enabled: true + k8s.pod.status_reason: + enabled: true k8s.replicaset.available: enabled: true k8s.replicaset.desired: @@ -206,6 +208,8 @@ none_set: enabled: false k8s.pod.phase: enabled: false + k8s.pod.status_reason: + enabled: false k8s.replicaset.available: enabled: false k8s.replicaset.desired: diff --git a/receiver/k8sclusterreceiver/internal/pod/pods.go b/receiver/k8sclusterreceiver/internal/pod/pods.go index 70754e415eb3..67f05951bae8 100644 --- a/receiver/k8sclusterreceiver/internal/pod/pods.go +++ b/receiver/k8sclusterreceiver/internal/pod/pods.go @@ -70,6 +70,7 @@ func Transform(pod *corev1.Pod) *corev1.Pod { func RecordMetrics(logger *zap.Logger, mb *metadata.MetricsBuilder, pod *corev1.Pod, ts pcommon.Timestamp) { mb.RecordK8sPodPhaseDataPoint(ts, int64(phaseToInt(pod.Status.Phase))) + mb.RecordK8sPodStatusReasonDataPoint(ts, int64(reasonToInt(pod.Status.Reason))) rb := mb.NewResourceBuilder() rb.SetK8sNamespaceName(pod.Namespace) rb.SetK8sNodeName(pod.Spec.NodeName) @@ -83,6 +84,23 @@ func RecordMetrics(logger *zap.Logger, mb *metadata.MetricsBuilder, pod *corev1. } } +func reasonToInt(reason string) int32 { + switch reason { + case "Evicted": + return 1 + case "NodeAffinity": + return 2 + case "NodeLost": + return 3 + case "Shutdown": + return 4 + case "UnexpectedAdmissionError": + return 5 + default: + return 6 + } +} + func phaseToInt(phase corev1.PodPhase) int32 { switch phase { case corev1.PodPending: diff --git a/receiver/k8sclusterreceiver/internal/pod/pods_test.go b/receiver/k8sclusterreceiver/internal/pod/pods_test.go index 943bdc97adc5..7c5e40f8ed85 100644 --- a/receiver/k8sclusterreceiver/internal/pod/pods_test.go +++ b/receiver/k8sclusterreceiver/internal/pod/pods_test.go @@ -59,6 +59,32 @@ func TestPodAndContainerMetricsReportCPUMetrics(t *testing.T) { ) } +func TestPodStatusReasonAndContainerMetricsReportCPUMetrics(t *testing.T) { + pod := testutils.NewPodWithContainer( + "1", + testutils.NewPodSpecWithContainer("container-name"), + testutils.NewEvictedTerminatedPodStatusWithContainer("container-name", containerIDWithPreifx("container-id")), + ) + + mbc := metadata.DefaultMetricsBuilderConfig() + mbc.Metrics.K8sPodStatusReason.Enabled = true + ts := pcommon.Timestamp(time.Now().UnixNano()) + mb := metadata.NewMetricsBuilder(mbc, receivertest.NewNopCreateSettings()) + RecordMetrics(zap.NewNop(), mb, pod, ts) + m := mb.Emit() + + expected, err := golden.ReadMetrics(filepath.Join("testdata", "expected_evicted.yaml")) + require.NoError(t, err) + require.NoError(t, pmetrictest.CompareMetrics(expected, m, + pmetrictest.IgnoreTimestamp(), + pmetrictest.IgnoreStartTimestamp(), + pmetrictest.IgnoreResourceMetricsOrder(), + pmetrictest.IgnoreMetricsOrder(), + pmetrictest.IgnoreScopeMetricsOrder(), + ), + ) +} + var containerIDWithPreifx = func(containerID string) string { return "docker://" + containerID } diff --git a/receiver/k8sclusterreceiver/internal/pod/testdata/expected_evicted.yaml b/receiver/k8sclusterreceiver/internal/pod/testdata/expected_evicted.yaml new file mode 100644 index 000000000000..15e5ec516dae --- /dev/null +++ b/receiver/k8sclusterreceiver/internal/pod/testdata/expected_evicted.yaml @@ -0,0 +1,95 @@ +resourceMetrics: + - resource: + attributes: + - key: k8s.namespace.name + value: + stringValue: test-namespace + - key: k8s.node.name + value: + stringValue: test-node + - key: k8s.pod.name + value: + stringValue: test-pod-1 + - key: k8s.pod.uid + value: + stringValue: test-pod-1-uid + - key: opencensus.resourcetype + value: + stringValue: k8s + schemaUrl: https://opentelemetry.io/schemas/1.18.0 + scopeMetrics: + - metrics: + - description: Current phase of the pod (1 - Pending, 2 - Running, 3 - Succeeded, 4 - Failed, 5 - Unknown) + gauge: + dataPoints: + - asInt: "4" + name: k8s.pod.phase + unit: "1" + - description: Current status reason of the pod (1 - Evicted, 2 - NodeAffinity, 3 - NodeLost, 4 - Shutdown, 5 - UnexpectedAdmissionError, 6 - Unknown) + gauge: + dataPoints: + - asInt: "1" + name: k8s.pod.status_reason + unit: "1" + scope: + name: otelcol/k8sclusterreceiver + version: latest + - resource: + attributes: + - key: container.id + value: + stringValue: container-id + - key: container.image.name + value: + stringValue: container-image-name + - key: container.image.tag + value: + stringValue: latest + - key: k8s.container.name + value: + stringValue: container-name + - key: k8s.namespace.name + value: + stringValue: test-namespace + - key: k8s.node.name + value: + stringValue: test-node + - key: k8s.pod.name + value: + stringValue: test-pod-1 + - key: k8s.pod.uid + value: + stringValue: test-pod-1-uid + - key: opencensus.resourcetype + value: + stringValue: container + schemaUrl: https://opentelemetry.io/schemas/1.18.0 + scopeMetrics: + - metrics: + - description: How many times the container has restarted in the recent past. This value is pulled directly from the K8s API and the value can go indefinitely high and be reset to 0 at any time depending on how your kubelet is configured to prune dead containers. It is best to not depend too much on the exact value but rather look at it as either == 0, in which case you can conclude there were no restarts in the recent past, or > 0, in which case you can conclude there were restarts in the recent past, and not try and analyze the value beyond that. + gauge: + dataPoints: + - asInt: "3" + name: k8s.container.restarts + unit: "1" + - description: Whether a container has passed its readiness probe (0 for no, 1 for yes) + gauge: + dataPoints: + - asInt: "1" + name: k8s.container.ready + unit: "1" + - description: Resource requested for the container. See https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.23/#resourcerequirements-v1-core for details + gauge: + dataPoints: + - asDouble: 10 + name: k8s.container.cpu_request + unit: "{cpu}" + - description: Maximum resource limit set for the container. See https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.23/#resourcerequirements-v1-core for details + gauge: + dataPoints: + - asDouble: 20 + name: k8s.container.cpu_limit + unit: "{cpu}" + scope: + name: otelcol/k8sclusterreceiver + version: latest diff --git a/receiver/k8sclusterreceiver/internal/testutils/objects.go b/receiver/k8sclusterreceiver/internal/testutils/objects.go index 8c5ef60f22e6..97d72021e6a2 100644 --- a/receiver/k8sclusterreceiver/internal/testutils/objects.go +++ b/receiver/k8sclusterreceiver/internal/testutils/objects.go @@ -278,6 +278,24 @@ func NewPodStatusWithContainer(containerName, containerID string) *corev1.PodSta } } +func NewEvictedTerminatedPodStatusWithContainer(containerName, containerID string) *corev1.PodStatus { + return &corev1.PodStatus{ + Phase: corev1.PodFailed, + Reason: "Evicted", + ContainerStatuses: []corev1.ContainerStatus{ + { + Name: containerName, + Ready: true, + RestartCount: 3, + Image: "container-image-name", + ContainerID: containerID, + State: corev1.ContainerState{ + Terminated: &corev1.ContainerStateTerminated{}, + }, + }, + }, + } +} func WithOwnerReferences(or []v1.OwnerReference, obj interface{}) interface{} { switch o := obj.(type) { case *corev1.Pod: diff --git a/receiver/k8sclusterreceiver/metadata.yaml b/receiver/k8sclusterreceiver/metadata.yaml index 81261b1ee773..4d21b48c4313 100644 --- a/receiver/k8sclusterreceiver/metadata.yaml +++ b/receiver/k8sclusterreceiver/metadata.yaml @@ -245,6 +245,12 @@ metrics: unit: 1 gauge: value_type: int + k8s.pod.status_reason: + enabled: false + description: Current status reason of the pod (1 - Evicted, 2 - NodeAffinity, 3 - NodeLost, 4 - Shutdown, 5 - UnexpectedAdmissionError, 6 - Unknown) + unit: 1 + gauge: + value_type: int k8s.deployment.desired: enabled: true