Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[podresapi 3/3] topology-aware: use Pod Resource API hints if present. #420

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cmd/plugins/topology-aware/policy/cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,11 +88,11 @@ func TestAllocationMarshalling(t *testing.T) {
}{
{
name: "non-zero Exclusive",
data: []byte(`{"key1":{"Exclusive":"1","Part":1,"CPUType":0,"Container":"1","Pool":"testnode","MemoryPool":0,"MemType":"DRAM,PMEM,HBM","MemSize":0,"ColdStart":0}}`),
data: []byte(`{"key1":{"PrettyName":"","Exclusive":"1","Part":1,"CPUType":0,"Container":"1","Pool":"testnode","MemoryPool":0,"MemType":"DRAM,PMEM,HBM","MemSize":0,"ColdStart":0}}`),
},
{
name: "zero Exclusive",
data: []byte(`{"key1":{"Exclusive":"","Part":1,"CPUType":0,"Container":"1","Pool":"testnode","MemoryPool":0,"MemType":"DRAM,PMEM,HBM","MemSize":0,"ColdStart":0}}`),
data: []byte(`{"key1":{"PrettyName":"","Exclusive":"","Part":1,"CPUType":0,"Container":"1","Pool":"testnode","MemoryPool":0,"MemType":"DRAM,PMEM,HBM","MemSize":0,"ColdStart":0}}`),
},
}
for _, tc := range tcases {
Expand Down
14 changes: 12 additions & 2 deletions cmd/plugins/topology-aware/policy/mocks_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"time"

nri "github.com/containerd/nri/pkg/api"
"github.com/containers/nri-plugins/pkg/agent/podresapi"
resmgr "github.com/containers/nri-plugins/pkg/apis/resmgr/v1alpha1"
"github.com/containers/nri-plugins/pkg/cpuallocator"
"github.com/containers/nri-plugins/pkg/resmgr/cache"
Expand Down Expand Up @@ -324,6 +325,9 @@ func (fake *mockSystem) SetCpusOnline(online bool, cpus idset.IDSet) (idset.IDSe
func (fake *mockSystem) NodeDistance(idset.ID, idset.ID) int {
return 10
}
func (fake *mockSystem) NodeHintToCPUs(string) string {
return ""
}

type mockContainer struct {
name string
Expand Down Expand Up @@ -538,6 +542,9 @@ func (m *mockContainer) PreserveMemoryResources() bool {
func (m *mockContainer) MemoryTypes() (libmem.TypeMask, error) {
return libmem.TypeMaskDRAM, nil
}
func (m *mockContainer) GetPodResources() *podresapi.ContainerResources {
return nil
}

type mockPod struct {
name string
Expand Down Expand Up @@ -625,14 +632,17 @@ func (m *mockPod) GetTasks(bool) ([]string, error) {
func (m *mockPod) GetCtime() time.Time {
panic("unimplemented")
}
func (m *mockPod) GetPodResources() *podresapi.PodResources {
return nil
}

type mockCache struct {
returnValueForGetPolicyEntry bool
returnValue1ForLookupContainer cache.Container
returnValue2ForLookupContainer bool
}

func (m *mockCache) InsertPod(*nri.PodSandbox) (cache.Pod, error) {
func (m *mockCache) InsertPod(*nri.PodSandbox, <-chan *podresapi.PodResources) cache.Pod {
panic("unimplemented")
}
func (m *mockCache) DeletePod(string) cache.Pod {
Expand Down Expand Up @@ -695,7 +705,7 @@ func (m *mockCache) GetPolicyEntry(string, interface{}) bool {
func (m *mockCache) Save() error {
return nil
}
func (m *mockCache) RefreshPods([]*nri.PodSandbox) ([]cache.Pod, []cache.Pod, []cache.Container) {
func (m *mockCache) RefreshPods([]*nri.PodSandbox, <-chan podresapi.PodResourcesList) ([]cache.Pod, []cache.Pod, []cache.Container) {
panic("unimplemented")
}
func (m *mockCache) RefreshContainers([]*nri.Container) ([]cache.Container, []cache.Container) {
Expand Down
1 change: 1 addition & 0 deletions cmd/plugins/topology-aware/policy/resources.go
Original file line number Diff line number Diff line change
Expand Up @@ -831,6 +831,7 @@ func (cs *supply) GetScore(req Request) Score {

// calculate real hint scores
hints := cr.container.GetTopologyHints()
hints.ResolvePartialHints(cs.GetNode().System().NodeHintToCPUs)
klihub marked this conversation as resolved.
Show resolved Hide resolved
score.hints = make(map[string]float64, len(hints))

for provider, hint := range cr.container.GetTopologyHints() {
Expand Down
16 changes: 16 additions & 0 deletions config/crd/bases/config.nri_balloonspolicies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,22 @@ spec:
spec:
description: BalloonsPolicySpec describes a balloons policy.
properties:
agent:
default:
nodeResourceTopology: true
description: AgentConfig provides access to configuration data for
the agent.
properties:
nodeResourceTopology:
description: |-
NodeResourceTopology enables support for exporting resource usage using
NodeResourceTopology Custom Resources.
type: boolean
podResourceAPI:
description: PodResourceAPI enables support for querying kubelet
Pod Resource API.
type: boolean
type: object
allocatorTopologyBalancing:
description: |-
If AllocatorTopologyBalancing is true, balloons are
Expand Down
16 changes: 16 additions & 0 deletions config/crd/bases/config.nri_templatepolicies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,22 @@ spec:
spec:
description: TemplatePolicySpec describes a template policy.
properties:
agent:
default:
nodeResourceTopology: true
description: AgentConfig provides access to configuration data for
the agent.
properties:
nodeResourceTopology:
description: |-
NodeResourceTopology enables support for exporting resource usage using
NodeResourceTopology Custom Resources.
type: boolean
podResourceAPI:
description: PodResourceAPI enables support for querying kubelet
Pod Resource API.
type: boolean
type: object
availableResources:
additionalProperties:
type: string
Expand Down
16 changes: 16 additions & 0 deletions config/crd/bases/config.nri_topologyawarepolicies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,22 @@ spec:
spec:
description: TopologyAwarePolicySpec describes a topology-aware policy.
properties:
agent:
default:
nodeResourceTopology: true
description: AgentConfig provides access to configuration data for
the agent.
properties:
nodeResourceTopology:
description: |-
NodeResourceTopology enables support for exporting resource usage using
NodeResourceTopology Custom Resources.
type: boolean
podResourceAPI:
description: PodResourceAPI enables support for querying kubelet
Pod Resource API.
type: boolean
type: object
availableResources:
additionalProperties:
type: string
Expand Down
16 changes: 16 additions & 0 deletions deployment/helm/balloons/crds/config.nri_balloonspolicies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,22 @@ spec:
spec:
description: BalloonsPolicySpec describes a balloons policy.
properties:
agent:
default:
nodeResourceTopology: true
description: AgentConfig provides access to configuration data for
the agent.
properties:
nodeResourceTopology:
description: |-
NodeResourceTopology enables support for exporting resource usage using
NodeResourceTopology Custom Resources.
type: boolean
podResourceAPI:
description: PodResourceAPI enables support for querying kubelet
Pod Resource API.
type: boolean
type: object
allocatorTopologyBalancing:
description: |-
If AllocatorTopologyBalancing is true, balloons are
Expand Down
7 changes: 7 additions & 0 deletions deployment/helm/balloons/templates/daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,9 @@ spec:
mountPath: /var/run/nri-resource-policy
- name: nrisockets
mountPath: /var/run/nri
- name: pod-resources-socket
mountPath: /var/lib/kubelet/pod-resources
readOnly: true
{{- if .Values.podPriorityClassNodeCritical }}
priorityClassName: system-node-critical
{{- end }}
Expand All @@ -136,6 +139,10 @@ spec:
hostPath:
path: /var/run/nri
type: DirectoryOrCreate
- name: pod-resources-socket
hostPath:
path: /var/lib/kubelet/pod-resources
type: DirectoryOrCreate
{{- if .Values.nri.runtime.patchConfig }}
- name: containerd-config
hostPath:
Expand Down
16 changes: 16 additions & 0 deletions deployment/helm/template/crds/config.nri_templatepolicies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,22 @@ spec:
spec:
description: TemplatePolicySpec describes a template policy.
properties:
agent:
default:
nodeResourceTopology: true
description: AgentConfig provides access to configuration data for
the agent.
properties:
nodeResourceTopology:
description: |-
NodeResourceTopology enables support for exporting resource usage using
NodeResourceTopology Custom Resources.
type: boolean
podResourceAPI:
description: PodResourceAPI enables support for querying kubelet
Pod Resource API.
type: boolean
type: object
availableResources:
additionalProperties:
type: string
Expand Down
7 changes: 7 additions & 0 deletions deployment/helm/template/templates/daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,9 @@ spec:
mountPath: /var/run/nri-resource-policy
- name: nrisockets
mountPath: /var/run/nri
- name: pod-resources-socket
mountPath: /var/lib/kubelet/pod-resources
readOnly: true
{{- if .Values.podPriorityClassNodeCritical }}
priorityClassName: system-node-critical
{{- end }}
Expand All @@ -129,6 +132,10 @@ spec:
hostPath:
path: /var/run/nri
type: DirectoryOrCreate
- name: pod-resources-socket
hostPath:
path: /var/lib/kubelet/pod-resources
type: DirectoryOrCreate
{{- if .Values.nri.runtime.patchConfig }}
- name: containerd-config
hostPath:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,22 @@ spec:
spec:
description: TopologyAwarePolicySpec describes a topology-aware policy.
properties:
agent:
default:
nodeResourceTopology: true
description: AgentConfig provides access to configuration data for
the agent.
properties:
nodeResourceTopology:
description: |-
NodeResourceTopology enables support for exporting resource usage using
NodeResourceTopology Custom Resources.
type: boolean
podResourceAPI:
description: PodResourceAPI enables support for querying kubelet
Pod Resource API.
type: boolean
type: object
availableResources:
additionalProperties:
type: string
Expand Down
7 changes: 7 additions & 0 deletions deployment/helm/topology-aware/templates/daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,9 @@ spec:
mountPath: /var/run/nri-resource-policy
- name: nrisockets
mountPath: /var/run/nri
- name: pod-resources-socket
mountPath: /var/lib/kubelet/pod-resources
readOnly: true
{{- if .Values.podPriorityClassNodeCritical }}
priorityClassName: system-node-critical
{{- end }}
Expand All @@ -136,6 +139,10 @@ spec:
hostPath:
path: /var/run/nri
type: DirectoryOrCreate
- name: pod-resources-socket
hostPath:
path: /var/lib/kubelet/pod-resources
type: DirectoryOrCreate
{{- if .Values.nri.runtime.patchConfig }}
- name: containerd-config
hostPath:
Expand Down
12 changes: 11 additions & 1 deletion docs/resource-policy/policy/topology-aware.md
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,7 @@ metadata:
prefer-reserved-cpus.resource-policy.nri.io/container.special: "false"
```

## Allowing or denying mount/device paths via annotations
## Controlling Topology Hints Via Annotations

User is able mark certain pods and containers to have allowed or denied
paths for mounts or devices. What this means is that when the system
Expand Down Expand Up @@ -486,6 +486,16 @@ metadata:
- /xy-zy/another-blacklisted-path5
```

## Using Pod Resource API for Extra Topology Hints

If access to the `kubelet`'s `Pod Resource API` is enabled in the
[Node Agent's](../developers-guide/architecture.md#node-agent) configuration,
it is automatically used to generate per-container topology hints when a
device with locality to a NUMA node is advertised by the API. Annotated allow
and deny lists can be used to selectively disable or enable per-resource hints,
using `podresapi:$RESOURCE_NAME` as the path.


## Container Affinity and Anti-Affinity

### Introduction
Expand Down
20 changes: 10 additions & 10 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ require (
github.com/onsi/ginkgo/v2 v2.19.0
github.com/onsi/gomega v1.33.1
github.com/pelletier/go-toml/v2 v2.1.0
github.com/prometheus/client_golang v1.17.0
github.com/prometheus/client_model v0.5.0
github.com/prometheus/client_golang v1.19.1
github.com/prometheus/client_model v0.6.1
github.com/sirupsen/logrus v1.9.3
github.com/stretchr/testify v1.9.0
go.opentelemetry.io/otel v1.19.0
Expand All @@ -27,11 +27,13 @@ require (
go.opentelemetry.io/otel/trace v1.19.0
golang.org/x/sys v0.21.0
golang.org/x/time v0.3.0
google.golang.org/grpc v1.65.0
k8s.io/api v0.31.2
k8s.io/apimachinery v0.31.2
k8s.io/client-go v0.31.2
k8s.io/code-generator v0.31.2
k8s.io/klog/v2 v2.130.1
k8s.io/kubelet v0.31.2
k8s.io/utils v0.0.0-20240711033017-18e509b52bc8
sigs.k8s.io/controller-runtime v0.16.2
sigs.k8s.io/yaml v1.4.0
Expand All @@ -40,7 +42,7 @@ require (
require (
github.com/beorn7/perks v1.0.1 // indirect
github.com/cenkalti/backoff/v4 v4.2.1 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
github.com/emicklei/go-restful/v3 v3.11.0 // indirect
github.com/fxamacker/cbor/v2 v2.7.0 // indirect
Expand All @@ -63,15 +65,14 @@ require (
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/opencontainers/runtime-spec v1.1.0 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/prometheus/common v0.44.0 // indirect
github.com/prometheus/procfs v0.12.0 // indirect
github.com/prometheus/common v0.55.0 // indirect
github.com/prometheus/procfs v0.15.1 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/x448/float16 v0.8.4 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.19.0 // indirect
Expand All @@ -84,15 +85,14 @@ require (
golang.org/x/term v0.21.0 // indirect
golang.org/x/text v0.16.0 // indirect
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20231009173412-8bfb1ae86b6c // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20231009173412-8bfb1ae86b6c // indirect
google.golang.org/grpc v1.58.3 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20240528184218-531527333157 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20240701130421-f6361c86f094 // indirect
google.golang.org/protobuf v1.34.2 // indirect
gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
k8s.io/cri-api v0.25.4 // indirect
k8s.io/cri-api v0.31.2 // indirect
k8s.io/gengo/v2 v2.0.0-20240228010128-51d4e06bde70 // indirect
k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
Expand Down
Loading
Loading