From 1d397a96129b00c6a4465c4eac88e74e05af058d Mon Sep 17 00:00:00 2001 From: Krisztian Litkey Date: Fri, 29 Nov 2024 12:36:46 +0200 Subject: [PATCH 1/7] agent,config,helm: make agent runtime configurable. Add configuration for the agent itself. Currently the only control enables access to Node Resource Topology Custom Resources and it is on by default. Update the agent to honor its configuration. Signed-off-by: Krisztian Litkey --- .../bases/config.nri_balloonspolicies.yaml | 12 +++ .../bases/config.nri_templatepolicies.yaml | 12 +++ .../config.nri_topologyawarepolicies.yaml | 12 +++ .../crds/config.nri_balloonspolicies.yaml | 12 +++ .../crds/config.nri_templatepolicies.yaml | 12 +++ .../config.nri_topologyawarepolicies.yaml | 12 +++ pkg/agent/agent.go | 94 ++++++++++++++----- pkg/apis/config/v1alpha1/agent.go | 31 ++++++ pkg/apis/config/v1alpha1/balloons-policy.go | 10 ++ pkg/apis/config/v1alpha1/resmgr.go | 1 + pkg/apis/config/v1alpha1/template-policy.go | 10 ++ .../config/v1alpha1/topology-aware-policy.go | 10 ++ pkg/apis/config/v1alpha1/types.go | 9 ++ .../config/v1alpha1/zz_generated.deepcopy.go | 18 ++++ 14 files changed, 232 insertions(+), 23 deletions(-) create mode 100644 pkg/apis/config/v1alpha1/agent.go diff --git a/config/crd/bases/config.nri_balloonspolicies.yaml b/config/crd/bases/config.nri_balloonspolicies.yaml index a49c020aa..c0601b9f0 100644 --- a/config/crd/bases/config.nri_balloonspolicies.yaml +++ b/config/crd/bases/config.nri_balloonspolicies.yaml @@ -40,6 +40,18 @@ spec: spec: description: BalloonsPolicySpec describes a balloons policy. properties: + agent: + default: + nodeResourceTopology: true + description: AgentConfig provides access to configuration data for + the agent. + properties: + nodeResourceTopology: + description: |- + NodeResourceTopology enables support for exporting resource usage using + NodeResourceTopology Custom Resources. + type: boolean + type: object allocatorTopologyBalancing: description: |- If AllocatorTopologyBalancing is true, balloons are diff --git a/config/crd/bases/config.nri_templatepolicies.yaml b/config/crd/bases/config.nri_templatepolicies.yaml index cea85dc1d..125e5ccb7 100644 --- a/config/crd/bases/config.nri_templatepolicies.yaml +++ b/config/crd/bases/config.nri_templatepolicies.yaml @@ -40,6 +40,18 @@ spec: spec: description: TemplatePolicySpec describes a template policy. properties: + agent: + default: + nodeResourceTopology: true + description: AgentConfig provides access to configuration data for + the agent. + properties: + nodeResourceTopology: + description: |- + NodeResourceTopology enables support for exporting resource usage using + NodeResourceTopology Custom Resources. + type: boolean + type: object availableResources: additionalProperties: type: string diff --git a/config/crd/bases/config.nri_topologyawarepolicies.yaml b/config/crd/bases/config.nri_topologyawarepolicies.yaml index a7c215f1d..5d2e6e100 100644 --- a/config/crd/bases/config.nri_topologyawarepolicies.yaml +++ b/config/crd/bases/config.nri_topologyawarepolicies.yaml @@ -40,6 +40,18 @@ spec: spec: description: TopologyAwarePolicySpec describes a topology-aware policy. properties: + agent: + default: + nodeResourceTopology: true + description: AgentConfig provides access to configuration data for + the agent. + properties: + nodeResourceTopology: + description: |- + NodeResourceTopology enables support for exporting resource usage using + NodeResourceTopology Custom Resources. + type: boolean + type: object availableResources: additionalProperties: type: string diff --git a/deployment/helm/balloons/crds/config.nri_balloonspolicies.yaml b/deployment/helm/balloons/crds/config.nri_balloonspolicies.yaml index a49c020aa..c0601b9f0 100644 --- a/deployment/helm/balloons/crds/config.nri_balloonspolicies.yaml +++ b/deployment/helm/balloons/crds/config.nri_balloonspolicies.yaml @@ -40,6 +40,18 @@ spec: spec: description: BalloonsPolicySpec describes a balloons policy. properties: + agent: + default: + nodeResourceTopology: true + description: AgentConfig provides access to configuration data for + the agent. + properties: + nodeResourceTopology: + description: |- + NodeResourceTopology enables support for exporting resource usage using + NodeResourceTopology Custom Resources. + type: boolean + type: object allocatorTopologyBalancing: description: |- If AllocatorTopologyBalancing is true, balloons are diff --git a/deployment/helm/template/crds/config.nri_templatepolicies.yaml b/deployment/helm/template/crds/config.nri_templatepolicies.yaml index cea85dc1d..125e5ccb7 100644 --- a/deployment/helm/template/crds/config.nri_templatepolicies.yaml +++ b/deployment/helm/template/crds/config.nri_templatepolicies.yaml @@ -40,6 +40,18 @@ spec: spec: description: TemplatePolicySpec describes a template policy. properties: + agent: + default: + nodeResourceTopology: true + description: AgentConfig provides access to configuration data for + the agent. + properties: + nodeResourceTopology: + description: |- + NodeResourceTopology enables support for exporting resource usage using + NodeResourceTopology Custom Resources. + type: boolean + type: object availableResources: additionalProperties: type: string diff --git a/deployment/helm/topology-aware/crds/config.nri_topologyawarepolicies.yaml b/deployment/helm/topology-aware/crds/config.nri_topologyawarepolicies.yaml index a7c215f1d..5d2e6e100 100644 --- a/deployment/helm/topology-aware/crds/config.nri_topologyawarepolicies.yaml +++ b/deployment/helm/topology-aware/crds/config.nri_topologyawarepolicies.yaml @@ -40,6 +40,18 @@ spec: spec: description: TopologyAwarePolicySpec describes a topology-aware policy. properties: + agent: + default: + nodeResourceTopology: true + description: AgentConfig provides access to configuration data for + the agent. + properties: + nodeResourceTopology: + description: |- + NodeResourceTopology enables support for exporting resource usage using + NodeResourceTopology Custom Resources. + type: boolean + type: object availableResources: additionalProperties: type: string diff --git a/pkg/agent/agent.go b/pkg/agent/agent.go index 334b298b2..f0f6b9cd5 100644 --- a/pkg/agent/agent.go +++ b/pkg/agent/agent.go @@ -268,37 +268,39 @@ func (a *Agent) hasLocalConfig() bool { func (a *Agent) setupClients() error { if a.hasLocalConfig() { + log.Warn("running with local configuration, skipping cluster access client setup...") return nil } - cfg, err := a.getRESTConfig() - if err != nil { - return err - } + // Create HTTP/REST client and K8s client on initial startup. Any failure + // to create these is a failure start up. + if a.httpCli == nil { + log.Info("setting up HTTP/REST client...") + restCfg, err := a.getRESTConfig() + if err != nil { + return err + } - a.httpCli, err = rest.HTTPClientFor(cfg) - if err != nil { - return fmt.Errorf("failed to setup kubernetes HTTP client: %w", err) - } + a.httpCli, err = rest.HTTPClientFor(restCfg) + if err != nil { + return fmt.Errorf("failed to setup kubernetes HTTP client: %w", err) + } - a.k8sCli, err = k8sclient.NewForConfigAndClient(cfg, a.httpCli) - if err != nil { - a.cleanupClients() - return fmt.Errorf("failed to setup kubernetes client: %w", err) - } + log.Info("setting up K8s client...") + a.k8sCli, err = k8sclient.NewForConfigAndClient(restCfg, a.httpCli) + if err != nil { + a.cleanupClients() + return fmt.Errorf("failed to setup kubernetes client: %w", err) + } - restCfg := *cfg - a.nrtCli, err = nrtapi.NewForConfigAndClient(&restCfg, a.httpCli) - if err != nil { - a.cleanupClients() - return fmt.Errorf("failed to setup NRT client: %w", err) + kubeCfg := *restCfg + err = a.cfgIf.SetKubeClient(a.httpCli, &kubeCfg) + if err != nil { + return fmt.Errorf("failed to setup kubernetes config resource client: %w", err) + } } - restCfg = *cfg - err = a.cfgIf.SetKubeClient(a.httpCli, &restCfg) - if err != nil { - return fmt.Errorf("failed to setup kubernetes config resource client: %w", err) - } + a.configure(a.currentCfg) return nil } @@ -312,6 +314,51 @@ func (a *Agent) cleanupClients() { a.nrtCli = nil } +var ( + defaultConfig = &cfgapi.AgentConfig{ + NodeResourceTopology: true, + } +) + +func getAgentConfig(newConfig metav1.Object) *cfgapi.AgentConfig { + cfg := cfgapi.GetAgentConfig(newConfig) + if cfg == nil { + return defaultConfig + } + return cfg +} + +func (a *Agent) configure(newConfig metav1.Object) { + if a.hasLocalConfig() { + log.Warn("running with local configuration, skipping client setup...") + return + } + + cfg := getAgentConfig(newConfig) + + // Reconfigure NRT client, both on initial startup and reconfiguration. + // Failure to create a client is not a fatal error. + switch { + case cfg.NodeResourceTopology && a.nrtCli == nil: + log.Info("enabling NRT client") + cfg, err := a.getRESTConfig() + if err != nil { + log.Error("failed to setup NRT client: %w", err) + break + } + cli, err := nrtapi.NewForConfigAndClient(cfg, a.httpCli) + if err != nil { + log.Error("failed to setup NRT client: %w", err) + break + } + a.nrtCli = cli + + case !cfg.NodeResourceTopology && a.nrtCli != nil: + log.Info("disabling NRT client") + a.nrtCli = nil + } +} + func (a *Agent) getRESTConfig() (*rest.Config, error) { var ( cfg *rest.Config @@ -556,6 +603,7 @@ func (a *Agent) updateConfig(cfg metav1.Object) { } a.currentCfg = cfg + a.configure(cfg) } func (a *Agent) patchConfigStatus(prev, curr metav1.Object, errors error) { diff --git a/pkg/apis/config/v1alpha1/agent.go b/pkg/apis/config/v1alpha1/agent.go new file mode 100644 index 000000000..c01b9305a --- /dev/null +++ b/pkg/apis/config/v1alpha1/agent.go @@ -0,0 +1,31 @@ +// Copyright The NRI Plugins Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package v1alpha1 + +// AgentConfig provides access to configuration data for the agent. +type AgentConfig struct { + // NodeResourceTopology enables support for exporting resource usage using + // NodeResourceTopology Custom Resources. + // +optional + NodeResourceTopology bool `json:"nodeResourceTopology,omitempty"` +} + +// GetAgentConfig returns the agent-specific configuration if we have one. +func GetAgentConfig(cfg interface{}) *AgentConfig { + if ac, ok := cfg.(interface{ AgentConfig() *AgentConfig }); ok { + return ac.AgentConfig() + } + return nil +} diff --git a/pkg/apis/config/v1alpha1/balloons-policy.go b/pkg/apis/config/v1alpha1/balloons-policy.go index 9622b6b43..70fcb1423 100644 --- a/pkg/apis/config/v1alpha1/balloons-policy.go +++ b/pkg/apis/config/v1alpha1/balloons-policy.go @@ -18,6 +18,16 @@ var ( _ ResmgrConfig = &BalloonsPolicy{} ) +func (c *BalloonsPolicy) AgentConfig() *AgentConfig { + if c == nil { + return nil + } + + a := c.Spec.Agent + + return &a +} + func (c *BalloonsPolicy) CommonConfig() *CommonConfig { if c == nil { return nil diff --git a/pkg/apis/config/v1alpha1/resmgr.go b/pkg/apis/config/v1alpha1/resmgr.go index e89d4d2c9..d026f3a0c 100644 --- a/pkg/apis/config/v1alpha1/resmgr.go +++ b/pkg/apis/config/v1alpha1/resmgr.go @@ -29,6 +29,7 @@ import ( // +kubebuilder:object:generate=false type ResmgrConfig interface { metav1.ObjectMetaAccessor + AgentConfig() *AgentConfig CommonConfig() *CommonConfig PolicyConfig() interface{} } diff --git a/pkg/apis/config/v1alpha1/template-policy.go b/pkg/apis/config/v1alpha1/template-policy.go index 97258c947..3a598a742 100644 --- a/pkg/apis/config/v1alpha1/template-policy.go +++ b/pkg/apis/config/v1alpha1/template-policy.go @@ -18,6 +18,16 @@ var ( _ ResmgrConfig = &TemplatePolicy{} ) +func (c *TemplatePolicy) AgentConfig() *AgentConfig { + if c == nil { + return nil + } + + a := c.Spec.Agent + + return &a +} + func (c *TemplatePolicy) CommonConfig() *CommonConfig { if c == nil { return nil diff --git a/pkg/apis/config/v1alpha1/topology-aware-policy.go b/pkg/apis/config/v1alpha1/topology-aware-policy.go index 75df70498..2d4984649 100644 --- a/pkg/apis/config/v1alpha1/topology-aware-policy.go +++ b/pkg/apis/config/v1alpha1/topology-aware-policy.go @@ -18,6 +18,16 @@ var ( _ ResmgrConfig = &TopologyAwarePolicy{} ) +func (c *TopologyAwarePolicy) AgentConfig() *AgentConfig { + if c == nil { + return nil + } + + a := c.Spec.Agent + + return &a +} + func (c *TopologyAwarePolicy) CommonConfig() *CommonConfig { if c == nil { return nil diff --git a/pkg/apis/config/v1alpha1/types.go b/pkg/apis/config/v1alpha1/types.go index 07730aea2..5acc9ba70 100644 --- a/pkg/apis/config/v1alpha1/types.go +++ b/pkg/apis/config/v1alpha1/types.go @@ -46,6 +46,9 @@ type TopologyAwarePolicySpec struct { Log log.Config `json:"log,omitempty"` // +optional Instrumentation instrumentation.Config `json:"instrumentation,omitempty"` + // +optional + // +kubebuilder:default={"nodeResourceTopology": true } + Agent AgentConfig `json:"agent,omitempty"` } // TopologyAwarePolicyList represents a list of TopologyAwarePolicies. @@ -78,6 +81,9 @@ type BalloonsPolicySpec struct { Log log.Config `json:"log,omitempty"` // +optional Instrumentation instrumentation.Config `json:"instrumentation,omitempty"` + // +optional + // +kubebuilder:default={"nodeResourceTopology": true } + Agent AgentConfig `json:"agent,omitempty"` } // BalloonsPolicyList represents a list of BalloonsPolicies. @@ -110,6 +116,9 @@ type TemplatePolicySpec struct { Log log.Config `json:"log,omitempty"` // +optional Instrumentation instrumentation.Config `json:"instrumentation,omitempty"` + // +optional + // +kubebuilder:default={"nodeResourceTopology": true } + Agent AgentConfig `json:"agent,omitempty"` } // TemplatePolicyList represents a list of TemplatePolicies. diff --git a/pkg/apis/config/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/config/v1alpha1/zz_generated.deepcopy.go index 2ca084794..21f01a9e6 100644 --- a/pkg/apis/config/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/apis/config/v1alpha1/zz_generated.deepcopy.go @@ -22,6 +22,21 @@ import ( runtime "k8s.io/apimachinery/pkg/runtime" ) +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *AgentConfig) DeepCopyInto(out *AgentConfig) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AgentConfig. +func (in *AgentConfig) DeepCopy() *AgentConfig { + if in == nil { + return nil + } + out := new(AgentConfig) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *BalloonsPolicy) DeepCopyInto(out *BalloonsPolicy) { *out = *in @@ -88,6 +103,7 @@ func (in *BalloonsPolicySpec) DeepCopyInto(out *BalloonsPolicySpec) { in.Control.DeepCopyInto(&out.Control) in.Log.DeepCopyInto(&out.Log) in.Instrumentation.DeepCopyInto(&out.Instrumentation) + out.Agent = in.Agent } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new BalloonsPolicySpec. @@ -227,6 +243,7 @@ func (in *TemplatePolicySpec) DeepCopyInto(out *TemplatePolicySpec) { in.Control.DeepCopyInto(&out.Control) in.Log.DeepCopyInto(&out.Log) in.Instrumentation.DeepCopyInto(&out.Instrumentation) + out.Agent = in.Agent } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TemplatePolicySpec. @@ -305,6 +322,7 @@ func (in *TopologyAwarePolicySpec) DeepCopyInto(out *TopologyAwarePolicySpec) { in.Control.DeepCopyInto(&out.Control) in.Log.DeepCopyInto(&out.Log) in.Instrumentation.DeepCopyInto(&out.Instrumentation) + out.Agent = in.Agent } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TopologyAwarePolicySpec. From 95afcfec7d01b820f9b60ab2d16cad14858d5d38 Mon Sep 17 00:00:00 2001 From: Krisztian Litkey Date: Tue, 3 Dec 2024 17:08:09 +0200 Subject: [PATCH 2/7] agent: add support for querying Pod Resource API. Add support for tapping into the kubelet's pod resource API to query extra information about resources assigned to pods and containers. Use such a new client to expose agent functions for listing and querying pod resources, both synchronously and asynchronously. Signed-off-by: Krisztian Litkey --- .../bases/config.nri_balloonspolicies.yaml | 4 + .../bases/config.nri_templatepolicies.yaml | 4 + .../config.nri_topologyawarepolicies.yaml | 4 + .../crds/config.nri_balloonspolicies.yaml | 4 + .../crds/config.nri_templatepolicies.yaml | 4 + .../config.nri_topologyawarepolicies.yaml | 4 + go.mod | 20 +-- go.sum | 44 ++--- pkg/agent/agent.go | 119 +++++++------ pkg/agent/pod-resource-api.go | 80 +++++++++ pkg/agent/podresapi/client.go | 167 ++++++++++++++++++ pkg/agent/podresapi/resources.go | 134 ++++++++++++++ pkg/apis/config/v1alpha1/agent.go | 3 + 13 files changed, 509 insertions(+), 82 deletions(-) create mode 100644 pkg/agent/pod-resource-api.go create mode 100644 pkg/agent/podresapi/client.go create mode 100644 pkg/agent/podresapi/resources.go diff --git a/config/crd/bases/config.nri_balloonspolicies.yaml b/config/crd/bases/config.nri_balloonspolicies.yaml index c0601b9f0..2ce11773a 100644 --- a/config/crd/bases/config.nri_balloonspolicies.yaml +++ b/config/crd/bases/config.nri_balloonspolicies.yaml @@ -51,6 +51,10 @@ spec: NodeResourceTopology enables support for exporting resource usage using NodeResourceTopology Custom Resources. type: boolean + podResourceAPI: + description: PodResourceAPI enables support for querying kubelet + Pod Resource API. + type: boolean type: object allocatorTopologyBalancing: description: |- diff --git a/config/crd/bases/config.nri_templatepolicies.yaml b/config/crd/bases/config.nri_templatepolicies.yaml index 125e5ccb7..938c4cd73 100644 --- a/config/crd/bases/config.nri_templatepolicies.yaml +++ b/config/crd/bases/config.nri_templatepolicies.yaml @@ -51,6 +51,10 @@ spec: NodeResourceTopology enables support for exporting resource usage using NodeResourceTopology Custom Resources. type: boolean + podResourceAPI: + description: PodResourceAPI enables support for querying kubelet + Pod Resource API. + type: boolean type: object availableResources: additionalProperties: diff --git a/config/crd/bases/config.nri_topologyawarepolicies.yaml b/config/crd/bases/config.nri_topologyawarepolicies.yaml index 5d2e6e100..1ac7af5f9 100644 --- a/config/crd/bases/config.nri_topologyawarepolicies.yaml +++ b/config/crd/bases/config.nri_topologyawarepolicies.yaml @@ -51,6 +51,10 @@ spec: NodeResourceTopology enables support for exporting resource usage using NodeResourceTopology Custom Resources. type: boolean + podResourceAPI: + description: PodResourceAPI enables support for querying kubelet + Pod Resource API. + type: boolean type: object availableResources: additionalProperties: diff --git a/deployment/helm/balloons/crds/config.nri_balloonspolicies.yaml b/deployment/helm/balloons/crds/config.nri_balloonspolicies.yaml index c0601b9f0..2ce11773a 100644 --- a/deployment/helm/balloons/crds/config.nri_balloonspolicies.yaml +++ b/deployment/helm/balloons/crds/config.nri_balloonspolicies.yaml @@ -51,6 +51,10 @@ spec: NodeResourceTopology enables support for exporting resource usage using NodeResourceTopology Custom Resources. type: boolean + podResourceAPI: + description: PodResourceAPI enables support for querying kubelet + Pod Resource API. + type: boolean type: object allocatorTopologyBalancing: description: |- diff --git a/deployment/helm/template/crds/config.nri_templatepolicies.yaml b/deployment/helm/template/crds/config.nri_templatepolicies.yaml index 125e5ccb7..938c4cd73 100644 --- a/deployment/helm/template/crds/config.nri_templatepolicies.yaml +++ b/deployment/helm/template/crds/config.nri_templatepolicies.yaml @@ -51,6 +51,10 @@ spec: NodeResourceTopology enables support for exporting resource usage using NodeResourceTopology Custom Resources. type: boolean + podResourceAPI: + description: PodResourceAPI enables support for querying kubelet + Pod Resource API. + type: boolean type: object availableResources: additionalProperties: diff --git a/deployment/helm/topology-aware/crds/config.nri_topologyawarepolicies.yaml b/deployment/helm/topology-aware/crds/config.nri_topologyawarepolicies.yaml index 5d2e6e100..1ac7af5f9 100644 --- a/deployment/helm/topology-aware/crds/config.nri_topologyawarepolicies.yaml +++ b/deployment/helm/topology-aware/crds/config.nri_topologyawarepolicies.yaml @@ -51,6 +51,10 @@ spec: NodeResourceTopology enables support for exporting resource usage using NodeResourceTopology Custom Resources. type: boolean + podResourceAPI: + description: PodResourceAPI enables support for querying kubelet + Pod Resource API. + type: boolean type: object availableResources: additionalProperties: diff --git a/go.mod b/go.mod index cdef4cb07..3b60e206d 100644 --- a/go.mod +++ b/go.mod @@ -16,8 +16,8 @@ require ( github.com/onsi/ginkgo/v2 v2.19.0 github.com/onsi/gomega v1.33.1 github.com/pelletier/go-toml/v2 v2.1.0 - github.com/prometheus/client_golang v1.17.0 - github.com/prometheus/client_model v0.5.0 + github.com/prometheus/client_golang v1.19.1 + github.com/prometheus/client_model v0.6.1 github.com/sirupsen/logrus v1.9.3 github.com/stretchr/testify v1.9.0 go.opentelemetry.io/otel v1.19.0 @@ -27,11 +27,13 @@ require ( go.opentelemetry.io/otel/trace v1.19.0 golang.org/x/sys v0.21.0 golang.org/x/time v0.3.0 + google.golang.org/grpc v1.65.0 k8s.io/api v0.31.2 k8s.io/apimachinery v0.31.2 k8s.io/client-go v0.31.2 k8s.io/code-generator v0.31.2 k8s.io/klog/v2 v2.130.1 + k8s.io/kubelet v0.31.2 k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 sigs.k8s.io/controller-runtime v0.16.2 sigs.k8s.io/yaml v1.4.0 @@ -40,7 +42,7 @@ require ( require ( github.com/beorn7/perks v1.0.1 // indirect github.com/cenkalti/backoff/v4 v4.2.1 // indirect - github.com/cespare/xxhash/v2 v2.2.0 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/emicklei/go-restful/v3 v3.11.0 // indirect github.com/fxamacker/cbor/v2 v2.7.0 // indirect @@ -63,15 +65,14 @@ require ( github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/mailru/easyjson v0.7.7 // indirect - github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/opencontainers/runtime-spec v1.1.0 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect - github.com/prometheus/common v0.44.0 // indirect - github.com/prometheus/procfs v0.12.0 // indirect + github.com/prometheus/common v0.55.0 // indirect + github.com/prometheus/procfs v0.15.1 // indirect github.com/spf13/pflag v1.0.5 // indirect github.com/x448/float16 v0.8.4 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.19.0 // indirect @@ -84,15 +85,14 @@ require ( golang.org/x/term v0.21.0 // indirect golang.org/x/text v0.16.0 // indirect golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d // indirect - google.golang.org/genproto/googleapis/api v0.0.0-20231009173412-8bfb1ae86b6c // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20231009173412-8bfb1ae86b6c // indirect - google.golang.org/grpc v1.58.3 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20240528184218-531527333157 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20240701130421-f6361c86f094 // indirect google.golang.org/protobuf v1.34.2 // indirect gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect - k8s.io/cri-api v0.25.4 // indirect + k8s.io/cri-api v0.31.2 // indirect k8s.io/gengo/v2 v2.0.0-20240228010128-51d4e06bde70 // indirect k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect diff --git a/go.sum b/go.sum index 8a1b7ea33..3900efd13 100644 --- a/go.sum +++ b/go.sum @@ -619,8 +619,9 @@ github.com/census-instrumentation/opencensus-proto v0.3.0/go.mod h1:f6KPmirojxKA github.com/census-instrumentation/opencensus-proto v0.4.1/go.mod h1:4T9NM4+4Vw91VeyqjLS6ao50K5bOcLKN6Q42XnYaRYw= github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= @@ -713,8 +714,9 @@ github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69 github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= github.com/golang/glog v1.0.0/go.mod h1:EWib/APOK0SL3dFbYqvxE3UYd8E6s1ouQ7iEp/0LWV4= -github.com/golang/glog v1.1.0 h1:/d3pCKDPWNnvIWe0vVUpNP32qc8U3PDVxySP/y360qE= github.com/golang/glog v1.1.0/go.mod h1:pfYeQZ3JWZoXTV5sFc986z3HTpwQs9At6P4ImfuP3NQ= +github.com/golang/glog v1.2.1 h1:OptwRhECazUx5ix5TTWC3EZhsZEHWcYWY4FQHTIubm4= +github.com/golang/glog v1.2.1/go.mod h1:6AhwSGph0fcJtXVM/PEHPqZlFeoLxhs7/t5UDAwmO+w= github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= @@ -867,8 +869,6 @@ github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJ github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= github.com/mattn/go-sqlite3 v1.14.14/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU= -github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zkfA9PSy5pEvNWRP0ET0TIVo= -github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4= github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8/go.mod h1:mC1jAcsrzbxHt8iiaC+zU4b1ylILSosueou12R++wfY= github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3/go.mod h1:RagcQ7I8IeTMnF8JTXieKnO4Z6JCsikNEzj0DwauVzE= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= @@ -899,18 +899,18 @@ github.com/pkg/sftp v1.13.1/go.mod h1:3HaPG6Dq1ILlpPZRO0HVMrsydcdLt6HRDccSgb87qR github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/prometheus/client_golang v1.17.0 h1:rl2sfwZMtSthVU752MqfjQozy7blglC+1SOtjMAMh+Q= -github.com/prometheus/client_golang v1.17.0/go.mod h1:VeL+gMmOAxkS2IqfCq0ZmHSL+LjWfWDUmp1mBz9JgUY= +github.com/prometheus/client_golang v1.19.1 h1:wZWJDwK+NameRJuPGDhlnFgx8e8HN3XHQeLaYJFJBOE= +github.com/prometheus/client_golang v1.19.1/go.mod h1:mP78NwGzrVks5S2H6ab8+ZZGJLZUq1hoULYBAYBw1Ho= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.3.0/go.mod h1:LDGWKZIo7rky3hgvBe+caln+Dr3dPggB5dvjtD7w9+w= -github.com/prometheus/client_model v0.5.0 h1:VQw1hfvPvk3Uv6Qf29VrPF32JB6rtbgI6cYPYQjL0Qw= -github.com/prometheus/client_model v0.5.0/go.mod h1:dTiFglRmd66nLR9Pv9f0mZi7B7fk5Pm3gvsjB5tr+kI= -github.com/prometheus/common v0.44.0 h1:+5BrQJwiBB9xsMygAB3TNvpQKOwlkc25LbISbrdOOfY= -github.com/prometheus/common v0.44.0/go.mod h1:ofAIvZbQ1e/nugmZGz4/qCb9Ap1VoSTIO7x0VV9VvuY= +github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= +github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= +github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G1dc= +github.com/prometheus/common v0.55.0/go.mod h1:2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8= github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= -github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k6Bo= -github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo= +github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= +github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= @@ -1561,16 +1561,14 @@ google.golang.org/genproto v0.0.0-20230330154414-c0448cd141ea/go.mod h1:UUQDJDOl google.golang.org/genproto v0.0.0-20230331144136-dcfb400f0633/go.mod h1:UUQDJDOlWu4KYeJZffbWgBkS1YFobzKbLVfK69pe0Ak= google.golang.org/genproto v0.0.0-20230525234025-438c736192d0/go.mod h1:9ExIQyXL5hZrHzQceCwuSYwZZ5QZBazOcprJ5rgs3lY= google.golang.org/genproto v0.0.0-20230526161137-0005af68ea54/go.mod h1:zqTuNwFlFRsw5zIts5VnzLQxSRqh+CGOTVMlYbY0Eyk= -google.golang.org/genproto v0.0.0-20231002182017-d307bd883b97 h1:SeZZZx0cP0fqUyA+oRzP9k7cSwJlvDFiROO72uwD6i0= -google.golang.org/genproto v0.0.0-20231002182017-d307bd883b97/go.mod h1:t1VqOqqvce95G3hIDCT5FeO3YUc6Q4Oe24L/+rNMxRk= google.golang.org/genproto/googleapis/api v0.0.0-20230525234020-1aefcd67740a/go.mod h1:ts19tUU+Z0ZShN1y3aPyq2+O3d5FUNNgT6FtOzmrNn8= google.golang.org/genproto/googleapis/api v0.0.0-20230525234035-dd9d682886f9/go.mod h1:vHYtlOoi6TsQ3Uk2yxR7NI5z8uoV+3pZtR4jmHIkRig= -google.golang.org/genproto/googleapis/api v0.0.0-20231009173412-8bfb1ae86b6c h1:0RtEmmHjemvUXloH7+RuBSIw7n+GEHMOMY1CkGYnWq4= -google.golang.org/genproto/googleapis/api v0.0.0-20231009173412-8bfb1ae86b6c/go.mod h1:Wth13BrWMRN/G+guBLupKa6fslcWZv14R0ZKDRkNfY8= +google.golang.org/genproto/googleapis/api v0.0.0-20240528184218-531527333157 h1:7whR9kGa5LUwFtpLm2ArCEejtnxlGeLbAyjFY8sGNFw= +google.golang.org/genproto/googleapis/api v0.0.0-20240528184218-531527333157/go.mod h1:99sLkeliLXfdj2J75X3Ho+rrVCaJze0uwN7zDDkjPVU= google.golang.org/genproto/googleapis/rpc v0.0.0-20230525234015-3fc162c6f38a/go.mod h1:xURIpW9ES5+/GZhnV6beoEtxQrnkRGIfP5VQG2tCBLc= google.golang.org/genproto/googleapis/rpc v0.0.0-20230525234030-28d5490b6b19/go.mod h1:66JfowdXAEgad5O9NnYcsNPLCPZJD++2L9X0PCMODrA= -google.golang.org/genproto/googleapis/rpc v0.0.0-20231009173412-8bfb1ae86b6c h1:jHkCUWkseRf+W+edG5hMzr/Uh1xkDREY4caybAq4dpY= -google.golang.org/genproto/googleapis/rpc v0.0.0-20231009173412-8bfb1ae86b6c/go.mod h1:4cYg8o5yUbm77w8ZX00LhMVNl/YVBFJRYWDc0uYWMs0= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240701130421-f6361c86f094 h1:BwIjyKYGsK9dMCBOorzRri8MQwmi7mT9rGHsCEinZkA= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240701130421-f6361c86f094/go.mod h1:Ue6ibwXGpU+dqIcODieyLOcgj7z8+IcskoNIgZxtrFY= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= @@ -1612,8 +1610,8 @@ google.golang.org/grpc v1.52.3/go.mod h1:pu6fVzoFb+NBYNAvQL08ic+lvB2IojljRYuun5v google.golang.org/grpc v1.53.0/go.mod h1:OnIrk0ipVdj4N5d9IUoFUx72/VlD7+jUsHwZgwSMQpw= google.golang.org/grpc v1.54.0/go.mod h1:PUSEXI6iWghWaB6lXM4knEgpJNu2qUcKfDtNci3EC2g= google.golang.org/grpc v1.57.0/go.mod h1:Sd+9RMTACXwmub0zcNY2c4arhtrbBYD1AUHI/dt16Mo= -google.golang.org/grpc v1.58.3 h1:BjnpXut1btbtgN/6sp+brB2Kbm2LjNXnidYujAVbSoQ= -google.golang.org/grpc v1.58.3/go.mod h1:tgX3ZQDlNJGU96V6yHh1T/JeoBQ2TXdr43YbYSsCJk0= +google.golang.org/grpc v1.65.0 h1:bs/cUb4lp1G5iImFFd3u5ixQzweKizoZJAwBNLR42lc= +google.golang.org/grpc v1.65.0/go.mod h1:WgYC2ypjlB0EiQi6wdKixMqukr6lBc0Vo+oOgjrM5ZQ= google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.1.0/go.mod h1:6Kw0yEErY5E/yWrBtf03jp27GLLJujG4z/JK95pnjjw= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= @@ -1667,14 +1665,16 @@ k8s.io/client-go v0.31.2 h1:Y2F4dxU5d3AQj+ybwSMqQnpZH9F30//1ObxOKlTI9yc= k8s.io/client-go v0.31.2/go.mod h1:NPa74jSVR/+eez2dFsEIHNa+3o09vtNaWwWwb1qSxSs= k8s.io/code-generator v0.31.2 h1:xLWxG0HEpMSHfcM//3u3Ro2Hmc6AyyLINQS//Z2GEOI= k8s.io/code-generator v0.31.2/go.mod h1:eEQHXgBU/m7LDaToDoiz3t97dUUVyOblQdwOr8rivqc= -k8s.io/cri-api v0.25.4 h1:NgyuGXa4YDJsCV5UIQpaQPrv/pc9Jg9BcnR+xqGSf40= -k8s.io/cri-api v0.25.4/go.mod h1:riC/P0yOGUf2K1735wW+CXs1aY2ctBgePtnnoFLd0dU= +k8s.io/cri-api v0.31.2 h1:O/weUnSHvM59nTio0unxIUFyRHMRKkYn96YDILSQKmo= +k8s.io/cri-api v0.31.2/go.mod h1:Po3TMAYH/+KrZabi7QiwQI4a692oZcUOUThd/rqwxrI= k8s.io/gengo/v2 v2.0.0-20240228010128-51d4e06bde70 h1:NGrVE502P0s0/1hudf8zjgwki1X/TByhmAoILTarmzo= k8s.io/gengo/v2 v2.0.0-20240228010128-51d4e06bde70/go.mod h1:VH3AT8AaQOqiGjMF9p0/IM1Dj+82ZwjfxUP1IxaHE+8= k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 h1:BZqlfIlq5YbRMFko6/PM7FjZpUb45WallggurYhKGag= k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340/go.mod h1:yD4MZYeKMBwQKVht279WycxKyM84kkAx2DPrTXaeb98= +k8s.io/kubelet v0.31.2 h1:6Hytyw4LqWqhgzoi7sPfpDGClu2UfxmPmaiXPC4FRgI= +k8s.io/kubelet v0.31.2/go.mod h1:0E4++3cMWi2cJxOwuaQP3eMBa7PSOvAFgkTPlVc/2FA= k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 h1:pUdcCO1Lk/tbT5ztQWOBi5HBgbBP1J8+AsQnQCKsi8A= k8s.io/utils v0.0.0-20240711033017-18e509b52bc8/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= lukechampine.com/uint128 v1.1.1/go.mod h1:c4eWIwlEGaxC/+H1VguhU4PHXNWDCDMUlWdIWl2j1gk= diff --git a/pkg/agent/agent.go b/pkg/agent/agent.go index f0f6b9cd5..660e083f0 100644 --- a/pkg/agent/agent.go +++ b/pkg/agent/agent.go @@ -30,6 +30,7 @@ import ( "k8s.io/client-go/tools/clientcmd" nrtapi "github.com/containers/nri-plugins/pkg/agent/nrtapi" + "github.com/containers/nri-plugins/pkg/agent/podresapi" "github.com/containers/nri-plugins/pkg/agent/watch" cfgapi "github.com/containers/nri-plugins/pkg/apis/config/v1alpha1" k8sclient "k8s.io/client-go/kubernetes" @@ -126,11 +127,12 @@ type Agent struct { kubeConfig string // kubeconfig path configFile string // configuration file to use instead of custom resource - cfgIf ConfigInterface // custom resource access interface - httpCli *http.Client // shared HTTP client - k8sCli *k8sclient.Clientset // kubernetes client - nrtCli *nrtapi.Client // NRT custom resources client - nrtLock sync.Mutex // serialize NRT custom resource updates + cfgIf ConfigInterface // custom resource access interface + httpCli *http.Client // shared HTTP client + k8sCli *k8sclient.Clientset // kubernetes client + nrtCli *nrtapi.Client // NRT custom resources client + nrtLock sync.Mutex // serialize NRT custom resource updates + podResCli *podresapi.Client // pod resources API client notifyFn NotifyFn // config resource change notification callback nodeWatch watch.Interface // kubernetes node watch @@ -262,6 +264,68 @@ func (a *Agent) Stop() { } } +var ( + defaultConfig = &cfgapi.AgentConfig{ + NodeResourceTopology: true, + } +) + +func getAgentConfig(newConfig metav1.Object) *cfgapi.AgentConfig { + cfg := cfgapi.GetAgentConfig(newConfig) + if cfg == nil { + return defaultConfig + } + return cfg +} + +func (a *Agent) configure(newConfig metav1.Object) { + if a.hasLocalConfig() { + log.Warn("running with local configuration, skipping client setup...") + return + } + + cfg := getAgentConfig(newConfig) + + // Failure to create a client is not a fatal error. + switch { + case cfg.NodeResourceTopology && a.nrtCli == nil: + log.Info("enabling NRT client") + cfg, err := a.getRESTConfig() + if err != nil { + log.Error("failed to setup NRT client: %w", err) + break + } + cli, err := nrtapi.NewForConfigAndClient(cfg, a.httpCli) + if err != nil { + log.Error("failed to setup NRT client: %w", err) + break + } + a.nrtCli = cli + + case !cfg.NodeResourceTopology && a.nrtCli != nil: + log.Info("disabling NRT client") + a.nrtCli = nil + } + + // Reconfigure pod resource client, both on initial startup and reconfiguration. + // Failure to create a client is not a fatal error. + switch { + case cfg.PodResourceAPI && a.podResCli == nil: + log.Info("enabling PodResourceAPI client") + cli, err := podresapi.NewClient() + if err != nil { + log.Error("failed to setup PodResourceAPI client: %v", err) + break + } + a.podResCli = cli + + case !cfg.PodResourceAPI && a.podResCli != nil: + log.Info("disabling PodResourceAPI client") + a.podResCli.Close() + a.podResCli = nil + } +} + func (a *Agent) hasLocalConfig() bool { return a.configFile != "" } @@ -314,51 +378,6 @@ func (a *Agent) cleanupClients() { a.nrtCli = nil } -var ( - defaultConfig = &cfgapi.AgentConfig{ - NodeResourceTopology: true, - } -) - -func getAgentConfig(newConfig metav1.Object) *cfgapi.AgentConfig { - cfg := cfgapi.GetAgentConfig(newConfig) - if cfg == nil { - return defaultConfig - } - return cfg -} - -func (a *Agent) configure(newConfig metav1.Object) { - if a.hasLocalConfig() { - log.Warn("running with local configuration, skipping client setup...") - return - } - - cfg := getAgentConfig(newConfig) - - // Reconfigure NRT client, both on initial startup and reconfiguration. - // Failure to create a client is not a fatal error. - switch { - case cfg.NodeResourceTopology && a.nrtCli == nil: - log.Info("enabling NRT client") - cfg, err := a.getRESTConfig() - if err != nil { - log.Error("failed to setup NRT client: %w", err) - break - } - cli, err := nrtapi.NewForConfigAndClient(cfg, a.httpCli) - if err != nil { - log.Error("failed to setup NRT client: %w", err) - break - } - a.nrtCli = cli - - case !cfg.NodeResourceTopology && a.nrtCli != nil: - log.Info("disabling NRT client") - a.nrtCli = nil - } -} - func (a *Agent) getRESTConfig() (*rest.Config, error) { var ( cfg *rest.Config diff --git a/pkg/agent/pod-resource-api.go b/pkg/agent/pod-resource-api.go new file mode 100644 index 000000000..83a7dc4ec --- /dev/null +++ b/pkg/agent/pod-resource-api.go @@ -0,0 +1,80 @@ +// Copyright The NRI Plugins Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package agent + +import ( + "context" + "time" + + "github.com/containers/nri-plugins/pkg/agent/podresapi" +) + +// GetPodResources queries the given pod's resources. +func (a *Agent) GetPodResources(ns, pod string, timeout time.Duration) (*podresapi.PodResources, error) { + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + + return a.podResCli.Get(ctx, ns, pod) +} + +// GoGetPodResources queries the given pod's resources asynchronously. +func (a *Agent) GoGetPodResources(ns, pod string, timeout time.Duration) <-chan *podresapi.PodResources { + if !a.podResCli.HasClient() { + return nil + } + + ch := make(chan *podresapi.PodResources, 1) + + go func() { + defer close(ch) + p, err := a.GetPodResources(ns, pod, timeout) + if err != nil { + log.Error("failed to get pod resources for %s/%s: %v", ns, pod, err) + return + } + ch <- p + }() + + return ch +} + +// ListPodResources lists all pods' resources. +func (a *Agent) ListPodResources(timeout time.Duration) (podresapi.PodResourcesList, error) { + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + + return a.podResCli.List(ctx) +} + +// GoListPodResources lists all pods' resources asynchronously. +func (a *Agent) GoListPodResources(timeout time.Duration) <-chan podresapi.PodResourcesList { + if !a.podResCli.HasClient() { + return nil + } + + ch := make(chan podresapi.PodResourcesList, 1) + + go func() { + defer close(ch) + l, err := a.ListPodResources(timeout) + if err != nil { + log.Error("failed to list pod resources: %v", err) + return + } + ch <- l + }() + + return ch +} diff --git a/pkg/agent/podresapi/client.go b/pkg/agent/podresapi/client.go new file mode 100644 index 000000000..e9a2e4909 --- /dev/null +++ b/pkg/agent/podresapi/client.go @@ -0,0 +1,167 @@ +// Copyright The NRI Plugins Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package podresapi + +import ( + "context" + "fmt" + "net" + "strings" + "time" + + logger "github.com/containers/nri-plugins/pkg/log" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" + + api "k8s.io/kubelet/pkg/apis/podresources/v1" +) + +// ClientOption is an option for the client. +type ClientOption func(*Client) + +// Client is Pod Resources API client. +type Client struct { + socketPath string + conn *grpc.ClientConn + cli api.PodResourcesListerClient + noGet bool +} + +const ( + // these constants were obtained from NFD sources, cross-checked against + // https://github.com/kubernetes/kubernetes/blob/release-1.31/test/e2e_node/util.go#L83 + defaultSocketPath = "/var/lib/kubelet/pod-resources/kubelet.sock" + timeout = 10 * time.Second + maxSize = 1024 * 1024 * 16 +) + +var ( + errGetDisabled = fmt.Errorf("PodResources API Get method disabled") + log = logger.Get("podresapi") +) + +// WithSocketPath sets the kubelet socket path to connect to. +func WithSocketPath(path string) ClientOption { + return func(c *Client) { + c.socketPath = path + } +} + +// WithClientConn sets a pre-created gRPC connection for the client. +func WithClientConn(conn *grpc.ClientConn) ClientOption { + return func(c *Client) { + c.conn = conn + } +} + +// NewClient creates a new Pod Resources API client with the given options. +func NewClient(options ...ClientOption) (*Client, error) { + c := &Client{ + socketPath: defaultSocketPath, + } + + for _, o := range options { + o(c) + } + + if c.conn == nil { + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + + dialer := func(ctx context.Context, addr string) (net.Conn, error) { + return (&net.Dialer{}).DialContext(ctx, "unix", addr) + } + + conn, err := grpc.DialContext( + ctx, + c.socketPath, + grpc.WithTransportCredentials(insecure.NewCredentials()), + grpc.WithContextDialer(dialer), + grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(maxSize)), + grpc.WithBlock(), + ) + + if err != nil { + return nil, fmt.Errorf("failed to connect podresource client: %w", err) + } + + c.conn = conn + } + + c.cli = api.NewPodResourcesListerClient(c.conn) + return c, nil +} + +// Close closes the client. +func (c *Client) Close() { + if c != nil && c.conn != nil { + c.conn.Close() + c.conn = nil + } + c.cli = nil +} + +// HasClient returns true if the client has a usable client. +func (c *Client) HasClient() bool { + return c != nil && c.cli != nil +} + +// List lists all pods' resources. +func (c *Client) List(ctx context.Context) (PodResourcesList, error) { + if !c.HasClient() { + return nil, nil + } + + reply, err := c.cli.List(ctx, &api.ListPodResourcesRequest{}) + if err != nil { + return nil, fmt.Errorf("failed to get pod resources by list: %w", err) + } + + return PodResourcesList(reply.GetPodResources()), nil +} + +// Get queries the given pod's resources. +func (c *Client) Get(ctx context.Context, namespace, pod string) (*PodResources, error) { + if !c.HasClient() { + return nil, nil + } + + if !c.noGet { + reply, err := c.cli.Get(ctx, &api.GetPodResourcesRequest{ + PodNamespace: namespace, + PodName: pod, + }) + if err == nil { + return &PodResources{reply.GetPodResources()}, nil + } + + if !strings.Contains(fmt.Sprintf("%v", err), fmt.Sprintf("%v", errGetDisabled)) { + return nil, fmt.Errorf("failed to get pod resources: %w", err) + } + + log.Warnf("PodResources API Get() disabled, falling back to List()...") + log.Warnf("You can enable Get() by passing this feature gate setting to kubelet:") + log.Warnf(" --feature-gates=KubeletPodResourcesGet=true") + + c.noGet = true + } + + l, err := c.List(ctx) + if err != nil { + return nil, fmt.Errorf("failed to get pod resources: %w", err) + } + + return l.GetPodResources(namespace, pod), nil +} diff --git a/pkg/agent/podresapi/resources.go b/pkg/agent/podresapi/resources.go new file mode 100644 index 000000000..03c6b47fd --- /dev/null +++ b/pkg/agent/podresapi/resources.go @@ -0,0 +1,134 @@ +// Copyright The NRI Plugins Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package podresapi + +import ( + "strconv" + "strings" + + "github.com/containers/nri-plugins/pkg/topology" + api "k8s.io/kubelet/pkg/apis/podresources/v1" +) + +// PodResources contains resources for a pod. +type PodResources struct { + *api.PodResources +} + +// ContainerResources contains resources for a single container. +type ContainerResources struct { + *api.ContainerResources +} + +// PodResourcesList is a list of PodResources. +type PodResourcesList []*api.PodResources + +// PodResourceMap is a map representation of PodResourcesList. +type PodResourcesMap map[string]map[string]*PodResources + +// GetContainer returns resources for the given container. +func (p *PodResources) GetContainer(ctr string) *ContainerResources { + if p == nil { + return nil + } + + for _, c := range p.GetContainers() { + if c.GetName() == ctr { + return &ContainerResources{c} + } + } + + return nil +} + +// GetPodResources returns resources for the given pod. +func (l PodResourcesList) GetPodResources(ns, pod string) *PodResources { + for _, p := range l { + if p.GetNamespace() == ns && p.GetName() == pod { + return &PodResources{p} + } + } + + return nil +} + +// Map returns a PodResourcesMap for the pod resources list. +func (l PodResourcesList) Map() PodResourcesMap { + m := make(PodResourcesMap) + + for _, p := range l { + podMap, ok := m[p.GetNamespace()] + if !ok { + podMap = make(map[string]*PodResources) + m[p.GetNamespace()] = podMap + } + podMap[p.GetName()] = &PodResources{p} + } + + return m +} + +// GetPod returns resources for the given pod. +func (m PodResourcesMap) GetPod(ns, pod string) *PodResources { + return m[ns][pod] +} + +// GetContainer returns resources for the given container. +func (m PodResourcesMap) GetContainer(ns, pod, ctr string) *ContainerResources { + return m.GetPod(ns, pod).GetContainer(ctr) +} + +// GetDeviceTopologyHints returns topology hints for the given container. checkDenied +// is used to filter out hints that are disallowed. +func (r *ContainerResources) GetDeviceTopologyHints(checkDenied func(string) bool) topology.Hints { + if r == nil { + return nil + } + + hints := make(topology.Hints) + + for _, dev := range r.GetDevices() { + name := "podresourceapi:" + dev.GetResourceName() + + if checkDenied(name) { + log.Info("filtering hints for disallowed device %s", name) + continue + } + + var ( + nodes = dev.GetTopology().GetNodes() + numas = &strings.Builder{} + sep = "" + ) + + if len(nodes) == 0 { + continue + } + + for i, n := range nodes { + numas.WriteString(sep) + numas.WriteString(strconv.FormatInt(n.GetID(), 10)) + if i == 0 { + sep = "," + } + } + + hints[name] = topology.Hint{ + NUMAs: numas.String(), + } + } + + return hints +} diff --git a/pkg/apis/config/v1alpha1/agent.go b/pkg/apis/config/v1alpha1/agent.go index c01b9305a..2fe3cd70d 100644 --- a/pkg/apis/config/v1alpha1/agent.go +++ b/pkg/apis/config/v1alpha1/agent.go @@ -20,6 +20,9 @@ type AgentConfig struct { // NodeResourceTopology Custom Resources. // +optional NodeResourceTopology bool `json:"nodeResourceTopology,omitempty"` + // PodResourceAPI enables support for querying kubelet Pod Resource API. + // +optional + PodResourceAPI bool `json:"podResourceAPI,omitempty"` } // GetAgentConfig returns the agent-specific configuration if we have one. From 0d7dd34c9122216eeb8ed8f4c8b9442e85fe8220 Mon Sep 17 00:00:00 2001 From: Krisztian Litkey Date: Tue, 26 Nov 2024 22:16:44 +0200 Subject: [PATCH 3/7] resmgr: query Pod Resource API for extra hints. Try querying Pod Resource API and generate extra topology hints using container device assignments listed there. Signed-off-by: Krisztian Litkey --- pkg/resmgr/cache/cache.go | 61 ++++++++++++++++++++++++----------- pkg/resmgr/cache/container.go | 38 ++++++++++++++++++---- pkg/resmgr/cache/pod.go | 31 +++++++++++++++++- pkg/resmgr/nri.go | 21 ++++++++---- 4 files changed, 119 insertions(+), 32 deletions(-) diff --git a/pkg/resmgr/cache/cache.go b/pkg/resmgr/cache/cache.go index ee11ce88a..dc0982617 100644 --- a/pkg/resmgr/cache/cache.go +++ b/pkg/resmgr/cache/cache.go @@ -27,6 +27,7 @@ import ( nri "github.com/containerd/nri/pkg/api" v1 "k8s.io/api/core/v1" + "github.com/containers/nri-plugins/pkg/agent/podresapi" "github.com/containers/nri-plugins/pkg/utils/cpuset" resmgr "github.com/containers/nri-plugins/pkg/apis/resmgr/v1alpha1" @@ -114,6 +115,10 @@ type Pod interface { // and return the value of the first key found. GetEffectiveAnnotation(key, container string) (string, bool) + // GetPodResources returns the pod resources for this pod, waiting for any + // pending fetch to complete or a timeout. + GetPodResources() *podresapi.PodResources + // GetContainerAffinity returns the affinity expressions for the named container. GetContainerAffinity(string) ([]*Affinity, error) // ScopeExpression returns an affinity expression for defining this pod as the scope. @@ -138,12 +143,16 @@ type Pod interface { // A cached pod. type pod struct { - cache *cache // our cache of object - Pod *nri.PodSandbox // pod data from NRI - QOSClass v1.PodQOSClass // pod QOS class - Affinity *podContainerAffinity // annotated container affinity - prettyName string // cached PrettyName() - ctime time.Time // time of pod creation + cache *cache // our cache of object + Pod *nri.PodSandbox // pod data from NRI + QOSClass v1.PodQOSClass // pod QOS class + Affinity *podContainerAffinity // annotated container affinity + PodResources *podresapi.PodResources // pod resources acquired from podresourceapi + podResCh <-chan *podresapi.PodResources // channel for pod resource fetch + waitResCh chan struct{} // channel for waiting for pod resource fetch + prettyName string // cached PrettyName() + ctime time.Time // time of pod creation + } // ContainerState is the container state in the runtime. @@ -222,6 +231,9 @@ type Container interface { // The requirements are calculated from the containers cgroup parameters. GetResourceRequirements() v1.ResourceRequirements + // GetPodResources gets container-specific resources acquired from podresourceapi. + GetPodResources() *podresapi.ContainerResources + // SetResourceUpdates sets updated resources for a container. Returns true if the // resources were really updated. SetResourceUpdates(*nri.LinuxResources) bool @@ -322,6 +334,7 @@ type container struct { State ContainerState // current state of the container Requirements v1.ResourceRequirements + PodResources *podresapi.ContainerResources ResourceUpdates *v1.ResourceRequirements request interface{} @@ -359,7 +372,7 @@ type Cacheable interface { // itself upon startup. type Cache interface { // InsertPod inserts a pod into the cache, using a runtime request or reply. - InsertPod(pod *nri.PodSandbox) (Pod, error) + InsertPod(pod *nri.PodSandbox, ch <-chan *podresapi.PodResources) Pod // DeletePod deletes a pod from the cache. DeletePod(id string) Pod // LookupPod looks up a pod in the cache. @@ -410,7 +423,7 @@ type Cache interface { Save() error // RefreshPods purges/inserts stale/new pods/containers using a pod sandbox list response. - RefreshPods([]*nri.PodSandbox) ([]Pod, []Pod, []Container) + RefreshPods([]*nri.PodSandbox, <-chan podresapi.PodResourcesList) ([]Pod, []Pod, []Container) // RefreshContainers purges/inserts stale/new containers using a container list response. RefreshContainers([]*nri.Container) ([]Container, []Container) @@ -523,12 +536,12 @@ func (cch *cache) ResetActivePolicy() error { } // Insert a pod into the cache. -func (cch *cache) InsertPod(nriPod *nri.PodSandbox) (Pod, error) { - p := cch.createPod(nriPod) +func (cch *cache) InsertPod(nriPod *nri.PodSandbox, ch <-chan *podresapi.PodResources) Pod { + p := cch.createPod(nriPod, ch) cch.Pods[nriPod.GetId()] = p cch.Save() - return p, nil + return p } // Delete a pod from the cache. @@ -620,7 +633,7 @@ func (cch *cache) LookupContainerByCgroup(path string) (Container, bool) { } // RefreshPods purges/inserts stale/new pods/containers into the cache. -func (cch *cache) RefreshPods(pods []*nri.PodSandbox) ([]Pod, []Pod, []Container) { +func (cch *cache) RefreshPods(pods []*nri.PodSandbox, resCh <-chan podresapi.PodResourcesList) ([]Pod, []Pod, []Container) { valid := make(map[string]struct{}) add := []Pod{} @@ -631,13 +644,8 @@ func (cch *cache) RefreshPods(pods []*nri.PodSandbox) ([]Pod, []Pod, []Container valid[item.Id] = struct{}{} if _, ok := cch.Pods[item.Id]; !ok { log.Debug("inserting discovered pod %s...", item.Id) - pod, err := cch.InsertPod(item) - if err != nil { - log.Error("failed to insert discovered pod %s to cache: %v", - item.Id, err) - } else { - add = append(add, pod) - } + pod := cch.InsertPod(item, nil) + add = append(add, pod) } } for _, pod := range cch.Pods { @@ -655,6 +663,16 @@ func (cch *cache) RefreshPods(pods []*nri.PodSandbox) ([]Pod, []Pod, []Container } } + if resCh != nil { + podResList := <-resCh + if len(podResList) > 0 { + podResMap := podResList.Map() + for _, pod := range cch.Pods { + pod.setPodResources(podResMap.GetPod(pod.GetNamespace(), pod.GetName())) + } + } + } + return add, del, containers } @@ -686,6 +704,11 @@ func (cch *cache) RefreshContainers(containers []*nri.Container) ([]Container, [ c.State = ContainerStateStale del = append(del, c) } + + pod, ok := cch.Pods[c.GetPodID()] + if ok { + c.PodResources = pod.GetPodResources().GetContainer(c.GetName()) + } } return add, del diff --git a/pkg/resmgr/cache/container.go b/pkg/resmgr/cache/container.go index 306492883..3031e1f5b 100644 --- a/pkg/resmgr/cache/container.go +++ b/pkg/resmgr/cache/container.go @@ -24,6 +24,7 @@ import ( "strings" "time" + "github.com/containers/nri-plugins/pkg/agent/podresapi" resmgr "github.com/containers/nri-plugins/pkg/apis/resmgr/v1alpha1" "github.com/containers/nri-plugins/pkg/cgroups" "github.com/containers/nri-plugins/pkg/kubernetes" @@ -89,18 +90,25 @@ func (c *container) getDenyPathList() (*PathList, bool, error) { // Create and initialize a cached container. func (cch *cache) createContainer(nriCtr *nri.Container) (*container, error) { podID := nriCtr.GetPodSandboxId() - _, ok := cch.Pods[podID] + pod, ok := cch.Pods[podID] if !ok { return nil, cacheError("can't find cached pod %s for container %s (%s)", podID, nriCtr.GetId(), nriCtr.GetName()) } c := &container{ - cache: cch, - Ctr: nriCtr, - State: nriCtr.GetState(), - Tags: make(map[string]string), - ctime: time.Now(), + cache: cch, + Ctr: nriCtr, + State: nriCtr.GetState(), + Tags: make(map[string]string), + ctime: time.Now(), + PodResources: pod.GetPodResources().GetContainer(nriCtr.GetName()), + } + + if c.PodResources == nil { + log.Info("no pod resources for container %s", c.PrettyName()) + } else { + log.Info("got pod resources %+v", c.PodResources) } c.generateTopologyHints() @@ -233,9 +241,19 @@ func (c *container) generateTopologyHints() { } } } + + checkDenied := func(path string) bool { + return checkAllowedAndDeniedPaths(path, allowPathList, denyPathList) + } + + if podRes := c.GetPodResources(); podRes != nil { + hints := podRes.GetDeviceTopologyHints(checkDenied) + c.TopologyHints = topology.MergeTopologyHints(c.TopologyHints, hints) + } } else { log.Info("automatic topology hint generation disabled for devices") } + } func isReadOnlyMount(m *nri.Mount) bool { @@ -452,6 +470,14 @@ func (c *container) GetResourceRequirements() v1.ResourceRequirements { return c.Requirements } +func (c *container) GetPodResources() *podresapi.ContainerResources { + pod, ok := c.GetPod() + if !ok { + return nil + } + return pod.GetPodResources().GetContainer(c.GetName()) +} + func (c *container) SetResourceUpdates(r *nri.LinuxResources) bool { r = mergeNRIResources(r, c.Ctr.GetLinux().GetResources()) diff --git a/pkg/resmgr/cache/pod.go b/pkg/resmgr/cache/pod.go index c69bcbe56..2c82083b5 100644 --- a/pkg/resmgr/cache/pod.go +++ b/pkg/resmgr/cache/pod.go @@ -21,19 +21,22 @@ import ( nri "github.com/containerd/nri/pkg/api" v1 "k8s.io/api/core/v1" + "github.com/containers/nri-plugins/pkg/agent/podresapi" resmgr "github.com/containers/nri-plugins/pkg/apis/resmgr/v1alpha1" "github.com/containers/nri-plugins/pkg/cgroups" "github.com/containers/nri-plugins/pkg/kubernetes" ) // Create and initialize a cached pod. -func (cch *cache) createPod(nriPod *nri.PodSandbox) *pod { +func (cch *cache) createPod(nriPod *nri.PodSandbox, ch <-chan *podresapi.PodResources) *pod { p := &pod{ cache: cch, Pod: nriPod, ctime: time.Now(), } + p.goFetchPodResources(ch) + if err := p.parseCgroupForQOSClass(); err != nil { log.Error("pod %s: %v", p.PrettyName(), err) } @@ -133,6 +136,32 @@ func (p *pod) GetQOSClass() v1.PodQOSClass { return p.QOSClass } +func (p *pod) goFetchPodResources(ch <-chan *podresapi.PodResources) { + go func() { + p.podResCh = ch + p.waitResCh = make(chan struct{}) + defer close(p.waitResCh) + + if p.podResCh != nil { + p.PodResources = <-p.podResCh + log.Debug("fetched pod resources %+v for %s", p.PodResources, p.GetName()) + } + }() +} + +func (p *pod) setPodResources(podRes *podresapi.PodResources) { + p.PodResources = podRes + log.Debug("set pod resources %+v for %s", p.PodResources, p.GetName()) +} + +func (p *pod) GetPodResources() *podresapi.PodResources { + if p.waitResCh != nil { + log.Debug("waiting for pod resources fetch to complete...") + _ = <-p.waitResCh + } + return p.PodResources +} + func (p *pod) GetContainerAffinity(name string) ([]*Affinity, error) { if p.Affinity != nil { return (*p.Affinity)[name], nil diff --git a/pkg/resmgr/nri.go b/pkg/resmgr/nri.go index 379b992db..ba7c58eba 100644 --- a/pkg/resmgr/nri.go +++ b/pkg/resmgr/nri.go @@ -18,6 +18,7 @@ import ( "context" "fmt" "os" + "time" "github.com/containers/nri-plugins/pkg/instrumentation/metrics" "github.com/containers/nri-plugins/pkg/instrumentation/tracing" @@ -42,6 +43,11 @@ var ( nri = logger.NewLogger("nri-plugin") ) +const ( + podResListTimeout = 2 * time.Second + podResGetTimeout = 1 * time.Second +) + func newNRIPlugin(resmgr *resmgr) (*nriPlugin, error) { p := &nriPlugin{ resmgr: resmgr, @@ -198,7 +204,7 @@ func (p *nriPlugin) syncWithNRI(pods []*api.PodSandbox, containers []*api.Contai nri.Info("synchronizing cache state with NRI runtime...") - _, _, deleted := m.cache.RefreshPods(pods) + _, _, deleted := m.cache.RefreshPods(pods, m.agent.GoListPodResources(podResListTimeout)) for _, c := range deleted { nri.Info("discovered stale container %s (%s)...", c.PrettyName(), c.GetID()) released = append(released, c) @@ -290,18 +296,21 @@ func (p *nriPlugin) RunPodSandbox(ctx context.Context, pod *api.PodSandbox) (ret span.End(tracing.WithStatus(retErr)) }() + m := p.resmgr + podResCh := m.agent.GoGetPodResources(pod.GetNamespace(), pod.GetName(), podResGetTimeout) + p.dump(in, event, pod) defer func() { p.dump(out, event, retErr) }() - m := p.resmgr m.Lock() defer m.Unlock() b := metrics.Block() defer b.Done() - m.cache.InsertPod(pod) + m.cache.InsertPod(pod, podResCh) + return nil } @@ -383,19 +392,19 @@ func (p *nriPlugin) RemovePodSandbox(ctx context.Context, podSandbox *api.PodSan return nil } -func (p *nriPlugin) CreateContainer(ctx context.Context, podSandbox *api.PodSandbox, container *api.Container) (adjust *api.ContainerAdjustment, updates []*api.ContainerUpdate, retErr error) { +func (p *nriPlugin) CreateContainer(ctx context.Context, pod *api.PodSandbox, container *api.Container) (adjust *api.ContainerAdjustment, updates []*api.ContainerUpdate, retErr error) { event := CreateContainer _, span := tracing.StartSpan( ctx, event, - tracing.WithAttributes(containerSpanTags(podSandbox, container)...), + tracing.WithAttributes(containerSpanTags(pod, container)...), ) defer func() { span.End(tracing.WithStatus(retErr)) }() - p.dump(in, event, podSandbox, container) + p.dump(in, event, pod, container) defer func() { p.dump(out, event, adjust, updates, retErr) }() From c1a14178f80266aa589b9e69676478fbb6453d95 Mon Sep 17 00:00:00 2001 From: Krisztian Litkey Date: Mon, 2 Dec 2024 13:44:26 +0200 Subject: [PATCH 4/7] helm: provide access to pod-resources kubelet socket. Bind-mount kubelet pod-resources directory read-only to plugin daemonset, to provide access to kubelet pod-resources socket. Signed-off-by: Krisztian Litkey --- deployment/helm/balloons/templates/daemonset.yaml | 7 +++++++ deployment/helm/template/templates/daemonset.yaml | 7 +++++++ deployment/helm/topology-aware/templates/daemonset.yaml | 7 +++++++ 3 files changed, 21 insertions(+) diff --git a/deployment/helm/balloons/templates/daemonset.yaml b/deployment/helm/balloons/templates/daemonset.yaml index 7a60a59bc..50f3a0d2d 100644 --- a/deployment/helm/balloons/templates/daemonset.yaml +++ b/deployment/helm/balloons/templates/daemonset.yaml @@ -117,6 +117,9 @@ spec: mountPath: /var/run/nri-resource-policy - name: nrisockets mountPath: /var/run/nri + - name: pod-resources-socket + mountPath: /var/lib/kubelet/pod-resources + readOnly: true {{- if .Values.podPriorityClassNodeCritical }} priorityClassName: system-node-critical {{- end }} @@ -136,6 +139,10 @@ spec: hostPath: path: /var/run/nri type: DirectoryOrCreate + - name: pod-resources-socket + hostPath: + path: /var/lib/kubelet/pod-resources + type: DirectoryOrCreate {{- if .Values.nri.runtime.patchConfig }} - name: containerd-config hostPath: diff --git a/deployment/helm/template/templates/daemonset.yaml b/deployment/helm/template/templates/daemonset.yaml index 626ba2837..67b9d5687 100644 --- a/deployment/helm/template/templates/daemonset.yaml +++ b/deployment/helm/template/templates/daemonset.yaml @@ -110,6 +110,9 @@ spec: mountPath: /var/run/nri-resource-policy - name: nrisockets mountPath: /var/run/nri + - name: pod-resources-socket + mountPath: /var/lib/kubelet/pod-resources + readOnly: true {{- if .Values.podPriorityClassNodeCritical }} priorityClassName: system-node-critical {{- end }} @@ -129,6 +132,10 @@ spec: hostPath: path: /var/run/nri type: DirectoryOrCreate + - name: pod-resources-socket + hostPath: + path: /var/lib/kubelet/pod-resources + type: DirectoryOrCreate {{- if .Values.nri.runtime.patchConfig }} - name: containerd-config hostPath: diff --git a/deployment/helm/topology-aware/templates/daemonset.yaml b/deployment/helm/topology-aware/templates/daemonset.yaml index d9cffa393..5e956518b 100644 --- a/deployment/helm/topology-aware/templates/daemonset.yaml +++ b/deployment/helm/topology-aware/templates/daemonset.yaml @@ -117,6 +117,9 @@ spec: mountPath: /var/run/nri-resource-policy - name: nrisockets mountPath: /var/run/nri + - name: pod-resources-socket + mountPath: /var/lib/kubelet/pod-resources + readOnly: true {{- if .Values.podPriorityClassNodeCritical }} priorityClassName: system-node-critical {{- end }} @@ -136,6 +139,10 @@ spec: hostPath: path: /var/run/nri type: DirectoryOrCreate + - name: pod-resources-socket + hostPath: + path: /var/lib/kubelet/pod-resources + type: DirectoryOrCreate {{- if .Values.nri.runtime.patchConfig }} - name: containerd-config hostPath: From a3b8918e52aa3e5e1fef807f4b2389ab083bd39b Mon Sep 17 00:00:00 2001 From: Krisztian Litkey Date: Sat, 30 Nov 2024 16:49:16 +0200 Subject: [PATCH 5/7] topology,sysfs: implement partial hint resolution. Add functions for resolving partial, NUMA-only topology hints to fully defined CPU-specific ones using a sysfs instance. Signed-off-by: Krisztian Litkey --- pkg/sysfs/system.go | 20 ++++++++++++++++++++ pkg/topology/topology.go | 11 +++++++++++ 2 files changed, 31 insertions(+) diff --git a/pkg/sysfs/system.go b/pkg/sysfs/system.go index 7dfe23181..7381a6460 100644 --- a/pkg/sysfs/system.go +++ b/pkg/sysfs/system.go @@ -123,6 +123,8 @@ type System interface { Offlined() cpuset.CPUSet Isolated() cpuset.CPUSet + + NodeHintToCPUs(string) string } // System devices @@ -742,6 +744,24 @@ func (sys *system) Isolated() cpuset.CPUSet { return sys.IsolatedCPUs() } +// Resolve given node topology hints to CPUs. +func (sys *system) NodeHintToCPUs(nodes string) string { + mset, err := cpuset.Parse(nodes) + if err != nil { + log.Error("failed to resolve nodes %q to CPUs: %v", nodes, err) + return "" + } + + cset := cpuset.New() + for _, id := range mset.List() { + if n, ok := sys.nodes[id]; ok { + cset = cset.Union(n.CPUSet()) + } + } + + return cset.Intersection(sys.OnlineCPUs()).String() +} + // Discover Cpus present in the system. func (sys *system) discoverCPUs() error { if sys.cpus != nil { diff --git a/pkg/topology/topology.go b/pkg/topology/topology.go index 6eeb116f4..d3957a1c8 100644 --- a/pkg/topology/topology.go +++ b/pkg/topology/topology.go @@ -227,6 +227,17 @@ func MergeTopologyHints(org, hints Hints) (res Hints) { return } +// ResolvePartialHints resolves NUMA-only hints to CPU hints using the given function. +func (hints Hints) ResolvePartialHints(resolve func(NUMAs string) string) { + for k, h := range hints { + if h.CPUs == "" && h.NUMAs != "" { + h.CPUs = resolve(h.NUMAs) + log.Debugf("partial NUMA hint %q resolved to CPUs %q", h.NUMAs, h.CPUs) + hints[k] = h + } + } +} + // String returns the hints as a string. func (h *Hint) String() string { cpus, nodes, sockets, sep := "", "", "", "" From e40e41f8419c7f59d88904733467af9e0d709f5e Mon Sep 17 00:00:00 2001 From: Krisztian Litkey Date: Sat, 30 Nov 2024 16:47:46 +0200 Subject: [PATCH 6/7] topology-aware: ensure all hints are fully resolved. Resolve potential partial NUMA-only topology hints to full CPU hints. Signed-off-by: Krisztian Litkey --- cmd/plugins/topology-aware/policy/resources.go | 1 + 1 file changed, 1 insertion(+) diff --git a/cmd/plugins/topology-aware/policy/resources.go b/cmd/plugins/topology-aware/policy/resources.go index f6d25a7d7..52c739605 100644 --- a/cmd/plugins/topology-aware/policy/resources.go +++ b/cmd/plugins/topology-aware/policy/resources.go @@ -831,6 +831,7 @@ func (cs *supply) GetScore(req Request) Score { // calculate real hint scores hints := cr.container.GetTopologyHints() + hints.ResolvePartialHints(cs.GetNode().System().NodeHintToCPUs) score.hints = make(map[string]float64, len(hints)) for provider, hint := range cr.container.GetTopologyHints() { From d05b4e1c78b28961269ad075eef3cc4b044803fe Mon Sep 17 00:00:00 2001 From: Krisztian Litkey Date: Sat, 30 Nov 2024 20:55:57 +0200 Subject: [PATCH 7/7] topology-aware: update docs, mock tests. Update docs with a brief mention of how to enable topology hints based on Pod Resource API queries. Make mock tests compile again. Signed-off-by: Krisztian Litkey --- cmd/plugins/topology-aware/policy/cache_test.go | 4 ++-- cmd/plugins/topology-aware/policy/mocks_test.go | 14 ++++++++++++-- docs/resource-policy/policy/topology-aware.md | 12 +++++++++++- 3 files changed, 25 insertions(+), 5 deletions(-) diff --git a/cmd/plugins/topology-aware/policy/cache_test.go b/cmd/plugins/topology-aware/policy/cache_test.go index a3696d0cd..7ce1a7b80 100644 --- a/cmd/plugins/topology-aware/policy/cache_test.go +++ b/cmd/plugins/topology-aware/policy/cache_test.go @@ -88,11 +88,11 @@ func TestAllocationMarshalling(t *testing.T) { }{ { name: "non-zero Exclusive", - data: []byte(`{"key1":{"Exclusive":"1","Part":1,"CPUType":0,"Container":"1","Pool":"testnode","MemoryPool":0,"MemType":"DRAM,PMEM,HBM","MemSize":0,"ColdStart":0}}`), + data: []byte(`{"key1":{"PrettyName":"","Exclusive":"1","Part":1,"CPUType":0,"Container":"1","Pool":"testnode","MemoryPool":0,"MemType":"DRAM,PMEM,HBM","MemSize":0,"ColdStart":0}}`), }, { name: "zero Exclusive", - data: []byte(`{"key1":{"Exclusive":"","Part":1,"CPUType":0,"Container":"1","Pool":"testnode","MemoryPool":0,"MemType":"DRAM,PMEM,HBM","MemSize":0,"ColdStart":0}}`), + data: []byte(`{"key1":{"PrettyName":"","Exclusive":"","Part":1,"CPUType":0,"Container":"1","Pool":"testnode","MemoryPool":0,"MemType":"DRAM,PMEM,HBM","MemSize":0,"ColdStart":0}}`), }, } for _, tc := range tcases { diff --git a/cmd/plugins/topology-aware/policy/mocks_test.go b/cmd/plugins/topology-aware/policy/mocks_test.go index d9e7c6c21..f9b2da507 100644 --- a/cmd/plugins/topology-aware/policy/mocks_test.go +++ b/cmd/plugins/topology-aware/policy/mocks_test.go @@ -19,6 +19,7 @@ import ( "time" nri "github.com/containerd/nri/pkg/api" + "github.com/containers/nri-plugins/pkg/agent/podresapi" resmgr "github.com/containers/nri-plugins/pkg/apis/resmgr/v1alpha1" "github.com/containers/nri-plugins/pkg/cpuallocator" "github.com/containers/nri-plugins/pkg/resmgr/cache" @@ -324,6 +325,9 @@ func (fake *mockSystem) SetCpusOnline(online bool, cpus idset.IDSet) (idset.IDSe func (fake *mockSystem) NodeDistance(idset.ID, idset.ID) int { return 10 } +func (fake *mockSystem) NodeHintToCPUs(string) string { + return "" +} type mockContainer struct { name string @@ -538,6 +542,9 @@ func (m *mockContainer) PreserveMemoryResources() bool { func (m *mockContainer) MemoryTypes() (libmem.TypeMask, error) { return libmem.TypeMaskDRAM, nil } +func (m *mockContainer) GetPodResources() *podresapi.ContainerResources { + return nil +} type mockPod struct { name string @@ -625,6 +632,9 @@ func (m *mockPod) GetTasks(bool) ([]string, error) { func (m *mockPod) GetCtime() time.Time { panic("unimplemented") } +func (m *mockPod) GetPodResources() *podresapi.PodResources { + return nil +} type mockCache struct { returnValueForGetPolicyEntry bool @@ -632,7 +642,7 @@ type mockCache struct { returnValue2ForLookupContainer bool } -func (m *mockCache) InsertPod(*nri.PodSandbox) (cache.Pod, error) { +func (m *mockCache) InsertPod(*nri.PodSandbox, <-chan *podresapi.PodResources) cache.Pod { panic("unimplemented") } func (m *mockCache) DeletePod(string) cache.Pod { @@ -695,7 +705,7 @@ func (m *mockCache) GetPolicyEntry(string, interface{}) bool { func (m *mockCache) Save() error { return nil } -func (m *mockCache) RefreshPods([]*nri.PodSandbox) ([]cache.Pod, []cache.Pod, []cache.Container) { +func (m *mockCache) RefreshPods([]*nri.PodSandbox, <-chan podresapi.PodResourcesList) ([]cache.Pod, []cache.Pod, []cache.Container) { panic("unimplemented") } func (m *mockCache) RefreshContainers([]*nri.Container) ([]cache.Container, []cache.Container) { diff --git a/docs/resource-policy/policy/topology-aware.md b/docs/resource-policy/policy/topology-aware.md index 103dc94bb..368b104a4 100644 --- a/docs/resource-policy/policy/topology-aware.md +++ b/docs/resource-policy/policy/topology-aware.md @@ -443,7 +443,7 @@ metadata: prefer-reserved-cpus.resource-policy.nri.io/container.special: "false" ``` -## Allowing or denying mount/device paths via annotations +## Controlling Topology Hints Via Annotations User is able mark certain pods and containers to have allowed or denied paths for mounts or devices. What this means is that when the system @@ -486,6 +486,16 @@ metadata: - /xy-zy/another-blacklisted-path5 ``` +## Using Pod Resource API for Extra Topology Hints + +If access to the `kubelet`'s `Pod Resource API` is enabled in the +[Node Agent's](../developers-guide/architecture.md#node-agent) configuration, +it is automatically used to generate per-container topology hints when a +device with locality to a NUMA node is advertised by the API. Annotated allow +and deny lists can be used to selectively disable or enable per-resource hints, +using `podresapi:$RESOURCE_NAME` as the path. + + ## Container Affinity and Anti-Affinity ### Introduction