Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: big memory overflow #2540

Merged
merged 7 commits into from
Nov 5, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions internal/store/horizontalpodautoscaler.go
Original file line number Diff line number Diff line change
Expand Up @@ -217,10 +217,10 @@ func createHPASpecTargetMetric() generator.FamilyGenerator {
}

if metricTarget.Value != nil {
metricMap[value] = float64(metricTarget.Value.MilliValue()) / 1000
metricMap[value] = convertValueToFloat64(metricTarget.Value)
}
if metricTarget.AverageValue != nil {
metricMap[average] = float64(metricTarget.AverageValue.MilliValue()) / 1000
metricMap[average] = convertValueToFloat64(metricTarget.AverageValue)
}
if metricTarget.AverageUtilization != nil {
metricMap[utilization] = float64(*metricTarget.AverageUtilization)
Expand Down Expand Up @@ -276,10 +276,10 @@ func createHPAStatusTargetMetric() generator.FamilyGenerator {
}

if currentMetric.Value != nil {
metricMap[value] = float64(currentMetric.Value.MilliValue()) / 1000
metricMap[value] = convertValueToFloat64(currentMetric.Value)
}
if currentMetric.AverageValue != nil {
metricMap[average] = float64(currentMetric.AverageValue.MilliValue()) / 1000
metricMap[average] = convertValueToFloat64(currentMetric.AverageValue)
}
if currentMetric.AverageUtilization != nil {
metricMap[utilization] = float64(*currentMetric.AverageUtilization)
Expand Down
10 changes: 5 additions & 5 deletions internal/store/limitrange.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,35 +49,35 @@ var (
for resource, min := range rawLimitRange.Min {
ms = append(ms, &metric.Metric{
LabelValues: []string{string(resource), string(rawLimitRange.Type), "min"},
Value: float64(min.MilliValue()) / 1000,
Value: convertValueToFloat64(&min),
})
}

for resource, max := range rawLimitRange.Max {
ms = append(ms, &metric.Metric{
LabelValues: []string{string(resource), string(rawLimitRange.Type), "max"},
Value: float64(max.MilliValue()) / 1000,
Value: convertValueToFloat64(&max),
})
}

for resource, df := range rawLimitRange.Default {
ms = append(ms, &metric.Metric{
LabelValues: []string{string(resource), string(rawLimitRange.Type), "default"},
Value: float64(df.MilliValue()) / 1000,
Value: convertValueToFloat64(&df),
})
}

for resource, dfR := range rawLimitRange.DefaultRequest {
ms = append(ms, &metric.Metric{
LabelValues: []string{string(resource), string(rawLimitRange.Type), "defaultRequest"},
Value: float64(dfR.MilliValue()) / 1000,
Value: convertValueToFloat64(&dfR),
})
}

for resource, mLR := range rawLimitRange.MaxLimitRequestRatio {
ms = append(ms, &metric.Metric{
LabelValues: []string{string(resource), string(rawLimitRange.Type), "maxLimitRequestRatio"},
Value: float64(mLR.MilliValue()) / 1000,
Value: convertValueToFloat64(&mLR),
})
}
}
Expand Down
24 changes: 12 additions & 12 deletions internal/store/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ func createNodeStatusAllocatableFamilyGenerator() generator.FamilyGenerator {
SanitizeLabelName(string(resourceName)),
string(constant.UnitCore),
},
Value: float64(val.MilliValue()) / 1000,
Value: convertValueToFloat64(&val),
})
case v1.ResourceStorage:
fallthrough
Expand All @@ -336,15 +336,15 @@ func createNodeStatusAllocatableFamilyGenerator() generator.FamilyGenerator {
SanitizeLabelName(string(resourceName)),
string(constant.UnitByte),
},
Value: float64(val.MilliValue()) / 1000,
Value: convertValueToFloat64(&val),
})
case v1.ResourcePods:
ms = append(ms, &metric.Metric{
LabelValues: []string{
SanitizeLabelName(string(resourceName)),
string(constant.UnitInteger),
},
Value: float64(val.MilliValue()) / 1000,
Value: convertValueToFloat64(&val),
})
default:
if isHugePageResourceName(resourceName) {
Expand All @@ -353,7 +353,7 @@ func createNodeStatusAllocatableFamilyGenerator() generator.FamilyGenerator {
SanitizeLabelName(string(resourceName)),
string(constant.UnitByte),
},
Value: float64(val.MilliValue()) / 1000,
Value: convertValueToFloat64(&val),
})
}
if isAttachableVolumeResourceName(resourceName) {
Expand All @@ -362,7 +362,7 @@ func createNodeStatusAllocatableFamilyGenerator() generator.FamilyGenerator {
SanitizeLabelName(string(resourceName)),
string(constant.UnitByte),
},
Value: float64(val.MilliValue()) / 1000,
Value: convertValueToFloat64(&val),
})
}
if isExtendedResourceName(resourceName) {
Expand All @@ -371,7 +371,7 @@ func createNodeStatusAllocatableFamilyGenerator() generator.FamilyGenerator {
SanitizeLabelName(string(resourceName)),
string(constant.UnitInteger),
},
Value: float64(val.MilliValue()) / 1000,
Value: convertValueToFloat64(&val),
})
}
}
Expand Down Expand Up @@ -407,7 +407,7 @@ func createNodeStatusCapacityFamilyGenerator() generator.FamilyGenerator {
SanitizeLabelName(string(resourceName)),
string(constant.UnitCore),
},
Value: float64(val.MilliValue()) / 1000,
Value: convertValueToFloat64(&val),
})
case v1.ResourceStorage:
fallthrough
Expand All @@ -419,15 +419,15 @@ func createNodeStatusCapacityFamilyGenerator() generator.FamilyGenerator {
SanitizeLabelName(string(resourceName)),
string(constant.UnitByte),
},
Value: float64(val.MilliValue()) / 1000,
Value: convertValueToFloat64(&val),
})
case v1.ResourcePods:
ms = append(ms, &metric.Metric{
LabelValues: []string{
SanitizeLabelName(string(resourceName)),
string(constant.UnitInteger),
},
Value: float64(val.MilliValue()) / 1000,
Value: convertValueToFloat64(&val),
})
default:
if isHugePageResourceName(resourceName) {
Expand All @@ -436,7 +436,7 @@ func createNodeStatusCapacityFamilyGenerator() generator.FamilyGenerator {
SanitizeLabelName(string(resourceName)),
string(constant.UnitByte),
},
Value: float64(val.MilliValue()) / 1000,
Value: convertValueToFloat64(&val),
})
}
if isAttachableVolumeResourceName(resourceName) {
Expand All @@ -445,7 +445,7 @@ func createNodeStatusCapacityFamilyGenerator() generator.FamilyGenerator {
SanitizeLabelName(string(resourceName)),
string(constant.UnitByte),
},
Value: float64(val.MilliValue()) / 1000,
Value: convertValueToFloat64(&val),
})
}
if isExtendedResourceName(resourceName) {
Expand All @@ -454,7 +454,7 @@ func createNodeStatusCapacityFamilyGenerator() generator.FamilyGenerator {
SanitizeLabelName(string(resourceName)),
string(constant.UnitInteger),
},
Value: float64(val.MilliValue()) / 1000,
Value: convertValueToFloat64(&val),
})
}
}
Expand Down
88 changes: 88 additions & 0 deletions internal/store/node_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,94 @@ func TestNodeStore(t *testing.T) {
`,
MetricNames: []string{"kube_node_status_addresses"},
},
// memory overflow test
{
Obj: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "127.0.0.1",
CreationTimestamp: metav1.Time{Time: time.Unix(1500000000, 0)},
Labels: map[string]string{
"node-role.kubernetes.io/master": "",
},
},
Spec: v1.NodeSpec{
Unschedulable: true,
ProviderID: "provider://i-randomidentifier",
PodCIDR: "172.24.10.0/24",
},
Status: v1.NodeStatus{
NodeInfo: v1.NodeSystemInfo{
KernelVersion: "kernel",
KubeletVersion: "kubelet",
KubeProxyVersion: "kubeproxy",
OSImage: "osimage",
ContainerRuntimeVersion: "rkt",
SystemUUID: "6a934e21-5207-4a84-baea-3a952d926c80",
},
Addresses: []v1.NodeAddress{
{Type: "InternalIP", Address: "1.2.3.4"},
},
Capacity: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("4.3"),
// overflow test
v1.ResourceMemory: resource.MustParse("10000T"),
mrueg marked this conversation as resolved.
Show resolved Hide resolved
v1.ResourcePods: resource.MustParse("1000"),
v1.ResourceStorage: resource.MustParse("3G"),
v1.ResourceEphemeralStorage: resource.MustParse("4G"),
v1.ResourceName("nvidia.com/gpu"): resource.MustParse("4"),
},
Allocatable: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("3"),
v1.ResourceMemory: resource.MustParse("1G"),
v1.ResourcePods: resource.MustParse("555"),
v1.ResourceStorage: resource.MustParse("2G"),
v1.ResourceEphemeralStorage: resource.MustParse("3G"),
v1.ResourceName("nvidia.com/gpu"): resource.MustParse("1"),
},
},
},
Want: `
# HELP kube_node_created [STABLE] Unix creation timestamp
# HELP kube_node_info [STABLE] Information about a cluster node.
# HELP kube_node_labels [STABLE] Kubernetes labels converted to Prometheus labels.
# HELP kube_node_role The role of a cluster node.
# HELP kube_node_spec_unschedulable [STABLE] Whether a node can schedule new pods.
# HELP kube_node_status_allocatable [STABLE] The allocatable for different resources of a node that are available for scheduling.
# HELP kube_node_status_capacity [STABLE] The capacity for different resources of a node.
# TYPE kube_node_created gauge
# TYPE kube_node_info gauge
# TYPE kube_node_labels gauge
# TYPE kube_node_role gauge
# TYPE kube_node_spec_unschedulable gauge
# TYPE kube_node_status_allocatable gauge
# TYPE kube_node_status_capacity gauge
kube_node_created{node="127.0.0.1"} 1.5e+09
kube_node_info{container_runtime_version="rkt",kernel_version="kernel",kubelet_version="kubelet",kubeproxy_version="deprecated",node="127.0.0.1",os_image="osimage",pod_cidr="172.24.10.0/24",provider_id="provider://i-randomidentifier",internal_ip="1.2.3.4",system_uuid="6a934e21-5207-4a84-baea-3a952d926c80"} 1
kube_node_role{node="127.0.0.1",role="master"} 1
kube_node_spec_unschedulable{node="127.0.0.1"} 1
kube_node_status_allocatable{node="127.0.0.1",resource="cpu",unit="core"} 3
kube_node_status_allocatable{node="127.0.0.1",resource="ephemeral_storage",unit="byte"} 3e+09
kube_node_status_allocatable{node="127.0.0.1",resource="memory",unit="byte"} 1e+09
kube_node_status_allocatable{node="127.0.0.1",resource="nvidia_com_gpu",unit="integer"} 1
kube_node_status_allocatable{node="127.0.0.1",resource="pods",unit="integer"} 555
kube_node_status_allocatable{node="127.0.0.1",resource="storage",unit="byte"} 2e+09
kube_node_status_capacity{node="127.0.0.1",resource="cpu",unit="core"} 4.3
kube_node_status_capacity{node="127.0.0.1",resource="ephemeral_storage",unit="byte"} 4e+09
kube_node_status_capacity{node="127.0.0.1",resource="memory",unit="byte"} 1e+16
kube_node_status_capacity{node="127.0.0.1",resource="nvidia_com_gpu",unit="integer"} 4
kube_node_status_capacity{node="127.0.0.1",resource="pods",unit="integer"} 1000
kube_node_status_capacity{node="127.0.0.1",resource="storage",unit="byte"} 3e+09
`,
MetricNames: []string{
"kube_node_status_capacity",
"kube_node_status_allocatable",
"kube_node_spec_unschedulable",
"kube_node_labels",
"kube_node_role",
"kube_node_info",
"kube_node_created",
},
},
}
for i, c := range cases {
c.Func = generator.ComposeMetricGenFuncs(nodeMetricFamilies(nil, nil))
Expand Down
10 changes: 5 additions & 5 deletions internal/store/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ func createPodContainerResourceLimitsFamilyGenerator() generator.FamilyGenerator
case v1.ResourceCPU:
ms = append(ms, &metric.Metric{
LabelValues: []string{c.Name, p.Spec.NodeName, SanitizeLabelName(string(resourceName)), string(constant.UnitCore)},
Value: float64(val.MilliValue()) / 1000,
Value: convertValueToFloat64(&val),
})
case v1.ResourceStorage:
fallthrough
Expand Down Expand Up @@ -246,7 +246,7 @@ func createPodContainerResourceRequestsFamilyGenerator() generator.FamilyGenerat
case v1.ResourceCPU:
ms = append(ms, &metric.Metric{
LabelValues: []string{c.Name, p.Spec.NodeName, SanitizeLabelName(string(resourceName)), string(constant.UnitCore)},
Value: float64(val.MilliValue()) / 1000,
Value: convertValueToFloat64(&val),
})
case v1.ResourceStorage:
fallthrough
Expand Down Expand Up @@ -749,7 +749,7 @@ func createPodInitContainerResourceLimitsFamilyGenerator() generator.FamilyGener
case v1.ResourceCPU:
ms = append(ms, &metric.Metric{
LabelValues: []string{c.Name, p.Spec.NodeName, SanitizeLabelName(string(resourceName)), string(constant.UnitCore)},
Value: float64(val.MilliValue()) / 1000,
Value: convertValueToFloat64(&val),
})
case v1.ResourceStorage:
fallthrough
Expand Down Expand Up @@ -813,7 +813,7 @@ func createPodInitContainerResourceRequestsFamilyGenerator() generator.FamilyGen
case v1.ResourceCPU:
ms = append(ms, &metric.Metric{
LabelValues: []string{c.Name, p.Spec.NodeName, SanitizeLabelName(string(resourceName)), string(constant.UnitCore)},
Value: float64(val.MilliValue()) / 1000,
Value: convertValueToFloat64(&val),
})
case v1.ResourceStorage:
fallthrough
Expand Down Expand Up @@ -1122,7 +1122,7 @@ func createPodOverheadCPUCoresFamilyGenerator() generator.FamilyGenerator {
for resourceName, val := range p.Spec.Overhead {
if resourceName == v1.ResourceCPU {
ms = append(ms, &metric.Metric{
Value: float64(val.MilliValue()) / 1000,
Value: convertValueToFloat64(&val),
})
}
}
Expand Down
4 changes: 2 additions & 2 deletions internal/store/resourcequota.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,13 @@ func resourceQuotaMetricFamilies(allowAnnotationsList, allowLabelsList []string)
for res, qty := range r.Status.Hard {
ms = append(ms, &metric.Metric{
LabelValues: []string{string(res), "hard"},
Value: float64(qty.MilliValue()) / 1000,
Value: convertValueToFloat64(&qty),
})
}
for res, qty := range r.Status.Used {
ms = append(ms, &metric.Metric{
LabelValues: []string{string(res), "used"},
Value: float64(qty.MilliValue()) / 1000,
Value: convertValueToFloat64(&qty),
})
}

Expand Down
10 changes: 10 additions & 0 deletions internal/store/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ import (
"strconv"
"strings"

"k8s.io/apimachinery/pkg/api/resource"

v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/validation"

Expand Down Expand Up @@ -215,3 +217,11 @@ func mergeKeyValues(keyValues ...[]string) (keys, values []string) {

return keys, values
}

// convertValueToFloat64 converts a resource.Quantity to a float64.
mrueg marked this conversation as resolved.
Show resolved Hide resolved
func convertValueToFloat64(q *resource.Quantity) float64 {
if q.Value() > resource.MaxMilliValue {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we remove if-else and use float64(q.MilliValue() / 1000)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

may be not. q.MilliValue() will cause overflow when q.Value() is larger than maxInt64/1000. for example, 1000TB will return a negative number.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ack.

return float64(q.Value())
}
return float64(q.MilliValue()) / 1000
}