From a31f9621079845b83fb610e56202633f9ada0d86 Mon Sep 17 00:00:00 2001 From: Mikhail Fedosin Date: Mon, 29 May 2023 15:00:55 +0200 Subject: [PATCH] feat: create kubernetes events in case of insufficient capacity errors We need to have improved visibility of ICE events when karpenter encounters them. --- cmd/controller/main.go | 1 + hack/docs/instancetypes_gen_docs.go | 4 +++- .../karpenter.k8s.aws_awsnodetemplates.yaml | 2 +- pkg/cloudprovider/cloudprovider.go | 21 ++++++++++++++++++- pkg/cloudprovider/suite_test.go | 2 +- .../machine/garbagecollection/suite_test.go | 4 +++- pkg/controllers/machine/link/suite_test.go | 4 +++- pkg/providers/instance/suite_test.go | 4 +++- pkg/providers/instancetype/suite_test.go | 2 +- pkg/providers/launchtemplate/suite_test.go | 2 +- 10 files changed, 37 insertions(+), 9 deletions(-) diff --git a/cmd/controller/main.go b/cmd/controller/main.go index c102afd61c0a..3388db542b2b 100644 --- a/cmd/controller/main.go +++ b/cmd/controller/main.go @@ -36,6 +36,7 @@ func main() { op.InstanceProvider, op.GetClient(), op.AMIProvider, + op.EventRecorder, ) lo.Must0(op.AddHealthzCheck("cloud-provider", awsCloudProvider.LivenessProbe)) cloudProvider := metrics.Decorate(awsCloudProvider) diff --git a/hack/docs/instancetypes_gen_docs.go b/hack/docs/instancetypes_gen_docs.go index 78360443e7ad..d5f2e7719f75 100644 --- a/hack/docs/instancetypes_gen_docs.go +++ b/hack/docs/instancetypes_gen_docs.go @@ -31,6 +31,7 @@ import ( "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/kubernetes" "k8s.io/client-go/rest" + "k8s.io/client-go/tools/record" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" "sigs.k8s.io/controller-runtime/pkg/manager" @@ -45,6 +46,7 @@ import ( "github.com/aws/karpenter/pkg/test" "github.com/aws/karpenter-core/pkg/cloudprovider" + "github.com/aws/karpenter-core/pkg/events" "github.com/aws/karpenter-core/pkg/utils/resources" "github.com/aws/karpenter/pkg/apis/v1alpha1" ) @@ -87,7 +89,7 @@ func main() { Manager: &FakeManager{}, KubernetesInterface: kubernetes.NewForConfigOrDie(&rest.Config{}), }) - cp := awscloudprovider.New(op.InstanceTypesProvider, op.InstanceProvider, op.GetClient(), op.AMIProvider) + cp := awscloudprovider.New(op.InstanceTypesProvider, op.InstanceProvider, op.GetClient(), op.AMIProvider, events.NewRecorder(&record.FakeRecorder{})) provider := v1alpha1.AWS{SubnetSelector: map[string]string{ "*": "*", diff --git a/pkg/apis/crds/karpenter.k8s.aws_awsnodetemplates.yaml b/pkg/apis/crds/karpenter.k8s.aws_awsnodetemplates.yaml index 6d3c4279a6e2..f9f08c3869c7 100644 --- a/pkg/apis/crds/karpenter.k8s.aws_awsnodetemplates.yaml +++ b/pkg/apis/crds/karpenter.k8s.aws_awsnodetemplates.yaml @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.11.3 + controller-gen.kubebuilder.io/version: v0.9.2 creationTimestamp: null name: awsnodetemplates.karpenter.k8s.aws spec: diff --git a/pkg/cloudprovider/cloudprovider.go b/pkg/cloudprovider/cloudprovider.go index a424a2a72a75..f6c41878a195 100644 --- a/pkg/cloudprovider/cloudprovider.go +++ b/pkg/cloudprovider/cloudprovider.go @@ -47,6 +47,7 @@ import ( coreapis "github.com/aws/karpenter-core/pkg/apis" "github.com/aws/karpenter-core/pkg/apis/v1alpha5" "github.com/aws/karpenter-core/pkg/cloudprovider" + "github.com/aws/karpenter-core/pkg/events" ) func init() { @@ -61,14 +62,16 @@ type CloudProvider struct { instanceProvider *instance.Provider kubeClient client.Client amiProvider *amifamily.Provider + recorder events.Recorder } -func New(instanceTypeProvider *instancetype.Provider, instanceProvider *instance.Provider, kubeClient client.Client, amiProvider *amifamily.Provider) *CloudProvider { +func New(instanceTypeProvider *instancetype.Provider, instanceProvider *instance.Provider, kubeClient client.Client, amiProvider *amifamily.Provider, recorder events.Recorder) *CloudProvider { return &CloudProvider{ instanceTypeProvider: instanceTypeProvider, instanceProvider: instanceProvider, kubeClient: kubeClient, amiProvider: amiProvider, + recorder: recorder, } } @@ -85,10 +88,26 @@ func (c *CloudProvider) Create(ctx context.Context, machine *v1alpha5.Machine) ( return nil, fmt.Errorf("resolving instance types, %w", err) } if len(instanceTypes) == 0 { + c.recorder.Publish(events.Event{ + InvolvedObject: machine, + Type: v1.EventTypeWarning, + Reason: "InsufficientCapacityError", + Message: fmt.Sprintf("Machine %s event: all requested instance types were unavailable during launch", machine.Name), + DedupeValues: []string{machine.Name}, + }) return nil, cloudprovider.NewInsufficientCapacityError(fmt.Errorf("all requested instance types were unavailable during launch")) } instance, err := c.instanceProvider.Create(ctx, nodeTemplate, machine, instanceTypes) if err != nil { + if cloudprovider.IsInsufficientCapacityError(err) { + c.recorder.Publish(events.Event{ + InvolvedObject: machine, + Type: v1.EventTypeWarning, + Reason: "InsufficientCapacityError", + Message: fmt.Sprintf("Machine %s event: %s", machine.Name, err), + DedupeValues: []string{machine.Name}, + }) + } return nil, fmt.Errorf("creating instance, %w", err) } instanceType, _ := lo.Find(instanceTypes, func(i *cloudprovider.InstanceType) bool { diff --git a/pkg/cloudprovider/suite_test.go b/pkg/cloudprovider/suite_test.go index c34abf58d7bc..6b6324c81c01 100644 --- a/pkg/cloudprovider/suite_test.go +++ b/pkg/cloudprovider/suite_test.go @@ -87,7 +87,7 @@ var _ = BeforeSuite(func() { awsEnv = test.NewEnvironment(ctx, env) fakeClock = clock.NewFakeClock(time.Now()) - cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, env.Client, awsEnv.AMIProvider) + cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, env.Client, awsEnv.AMIProvider, events.NewRecorder(&record.FakeRecorder{})) cluster = state.NewCluster(fakeClock, env.Client, cloudProvider) prov = provisioning.NewProvisioner(env.Client, env.KubernetesInterface.CoreV1(), events.NewRecorder(&record.FakeRecorder{}), cloudProvider, cluster) }) diff --git a/pkg/controllers/machine/garbagecollection/suite_test.go b/pkg/controllers/machine/garbagecollection/suite_test.go index 311464dbdf12..43e913368e03 100644 --- a/pkg/controllers/machine/garbagecollection/suite_test.go +++ b/pkg/controllers/machine/garbagecollection/suite_test.go @@ -28,12 +28,14 @@ import ( "github.com/patrickmn/go-cache" "github.com/samber/lo" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/tools/record" . "knative.dev/pkg/logging/testing" "sigs.k8s.io/controller-runtime/pkg/client" coresettings "github.com/aws/karpenter-core/pkg/apis/settings" "github.com/aws/karpenter-core/pkg/apis/v1alpha5" corecloudprovider "github.com/aws/karpenter-core/pkg/cloudprovider" + "github.com/aws/karpenter-core/pkg/events" "github.com/aws/karpenter-core/pkg/operator/controller" "github.com/aws/karpenter-core/pkg/operator/scheme" coretest "github.com/aws/karpenter-core/pkg/test" @@ -68,7 +70,7 @@ var _ = BeforeSuite(func() { env = coretest.NewEnvironment(scheme.Scheme, coretest.WithCRDs(apis.CRDs...)) awsEnv = test.NewEnvironment(ctx, env) - cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, env.Client, awsEnv.AMIProvider) + cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, env.Client, awsEnv.AMIProvider, events.NewRecorder(&record.FakeRecorder{})) linkedMachineCache = cache.New(time.Minute*10, time.Second*10) linkController := &link.Controller{ Cache: linkedMachineCache, diff --git a/pkg/controllers/machine/link/suite_test.go b/pkg/controllers/machine/link/suite_test.go index 8a218734daa0..672ba796c6a4 100644 --- a/pkg/controllers/machine/link/suite_test.go +++ b/pkg/controllers/machine/link/suite_test.go @@ -29,6 +29,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/client-go/tools/record" . "knative.dev/pkg/logging/testing" "sigs.k8s.io/controller-runtime/pkg/client" @@ -39,6 +40,7 @@ import ( coretest "github.com/aws/karpenter-core/pkg/test" . "github.com/aws/karpenter-core/pkg/test/expectations" + "github.com/aws/karpenter-core/pkg/events" "github.com/aws/karpenter/pkg/apis" "github.com/aws/karpenter/pkg/apis/settings" "github.com/aws/karpenter/pkg/apis/v1alpha1" @@ -67,7 +69,7 @@ var _ = BeforeSuite(func() { env = coretest.NewEnvironment(scheme.Scheme, coretest.WithCRDs(apis.CRDs...)) awsEnv = test.NewEnvironment(ctx, env) - cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, env.Client, awsEnv.AMIProvider) + cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, env.Client, awsEnv.AMIProvider, events.NewRecorder(&record.FakeRecorder{})) linkController = link.NewController(env.Client, cloudProvider) }) var _ = AfterSuite(func() { diff --git a/pkg/providers/instance/suite_test.go b/pkg/providers/instance/suite_test.go index 3bd099b462ac..9f4a14a18f54 100644 --- a/pkg/providers/instance/suite_test.go +++ b/pkg/providers/instance/suite_test.go @@ -24,11 +24,13 @@ import ( "github.com/samber/lo" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/tools/record" . "knative.dev/pkg/logging/testing" coresettings "github.com/aws/karpenter-core/pkg/apis/settings" "github.com/aws/karpenter-core/pkg/apis/v1alpha5" corecloudprovider "github.com/aws/karpenter-core/pkg/cloudprovider" + "github.com/aws/karpenter-core/pkg/events" "github.com/aws/karpenter-core/pkg/operator/injection" "github.com/aws/karpenter-core/pkg/operator/options" "github.com/aws/karpenter-core/pkg/operator/scheme" @@ -62,7 +64,7 @@ var _ = BeforeSuite(func() { ctx = coresettings.ToContext(ctx, coretest.Settings()) ctx = settings.ToContext(ctx, test.Settings()) awsEnv = test.NewEnvironment(ctx, env) - cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, env.Client, awsEnv.AMIProvider) + cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, env.Client, awsEnv.AMIProvider, events.NewRecorder(&record.FakeRecorder{})) }) var _ = AfterSuite(func() { diff --git a/pkg/providers/instancetype/suite_test.go b/pkg/providers/instancetype/suite_test.go index 92a12e540b88..ee7df61ef15e 100644 --- a/pkg/providers/instancetype/suite_test.go +++ b/pkg/providers/instancetype/suite_test.go @@ -89,7 +89,7 @@ var _ = BeforeSuite(func() { awsEnv = test.NewEnvironment(ctx, env) fakeClock = &clock.FakeClock{} - cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, env.Client, awsEnv.AMIProvider) + cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, env.Client, awsEnv.AMIProvider, events.NewRecorder(&record.FakeRecorder{})) cluster = state.NewCluster(fakeClock, env.Client, cloudProvider) prov = provisioning.NewProvisioner(env.Client, env.KubernetesInterface.CoreV1(), events.NewRecorder(&record.FakeRecorder{}), cloudProvider, cluster) }) diff --git a/pkg/providers/launchtemplate/suite_test.go b/pkg/providers/launchtemplate/suite_test.go index 608f6943d10a..d3fb4f8c9024 100644 --- a/pkg/providers/launchtemplate/suite_test.go +++ b/pkg/providers/launchtemplate/suite_test.go @@ -88,7 +88,7 @@ var _ = BeforeSuite(func() { awsEnv = test.NewEnvironment(ctx, env) fakeClock = &clock.FakeClock{} - cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, env.Client, awsEnv.AMIProvider) + cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, env.Client, awsEnv.AMIProvider, events.NewRecorder(&record.FakeRecorder{})) cluster = state.NewCluster(fakeClock, env.Client, cloudProvider) prov = provisioning.NewProvisioner(env.Client, env.KubernetesInterface.CoreV1(), events.NewRecorder(&record.FakeRecorder{}), cloudProvider, cluster) })