Skip to content

Commit

Permalink
feat: create kubernetes events in case of insufficient capacity errors
Browse files Browse the repository at this point in the history
We need to have improved visibility of ICE events when karpenter encounters
them.
  • Loading branch information
Fedosin committed May 29, 2023
1 parent 3400412 commit a31f962
Show file tree
Hide file tree
Showing 10 changed files with 37 additions and 9 deletions.
1 change: 1 addition & 0 deletions cmd/controller/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ func main() {
op.InstanceProvider,
op.GetClient(),
op.AMIProvider,
op.EventRecorder,
)
lo.Must0(op.AddHealthzCheck("cloud-provider", awsCloudProvider.LivenessProbe))
cloudProvider := metrics.Decorate(awsCloudProvider)
Expand Down
4 changes: 3 additions & 1 deletion hack/docs/instancetypes_gen_docs.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
"k8s.io/client-go/tools/record"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
"sigs.k8s.io/controller-runtime/pkg/manager"
Expand All @@ -45,6 +46,7 @@ import (
"github.com/aws/karpenter/pkg/test"

"github.com/aws/karpenter-core/pkg/cloudprovider"
"github.com/aws/karpenter-core/pkg/events"
"github.com/aws/karpenter-core/pkg/utils/resources"
"github.com/aws/karpenter/pkg/apis/v1alpha1"
)
Expand Down Expand Up @@ -87,7 +89,7 @@ func main() {
Manager: &FakeManager{},
KubernetesInterface: kubernetes.NewForConfigOrDie(&rest.Config{}),
})
cp := awscloudprovider.New(op.InstanceTypesProvider, op.InstanceProvider, op.GetClient(), op.AMIProvider)
cp := awscloudprovider.New(op.InstanceTypesProvider, op.InstanceProvider, op.GetClient(), op.AMIProvider, events.NewRecorder(&record.FakeRecorder{}))

provider := v1alpha1.AWS{SubnetSelector: map[string]string{
"*": "*",
Expand Down
2 changes: 1 addition & 1 deletion pkg/apis/crds/karpenter.k8s.aws_awsnodetemplates.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.11.3
controller-gen.kubebuilder.io/version: v0.9.2
creationTimestamp: null
name: awsnodetemplates.karpenter.k8s.aws
spec:
Expand Down
21 changes: 20 additions & 1 deletion pkg/cloudprovider/cloudprovider.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ import (
coreapis "github.com/aws/karpenter-core/pkg/apis"
"github.com/aws/karpenter-core/pkg/apis/v1alpha5"
"github.com/aws/karpenter-core/pkg/cloudprovider"
"github.com/aws/karpenter-core/pkg/events"
)

func init() {
Expand All @@ -61,14 +62,16 @@ type CloudProvider struct {
instanceProvider *instance.Provider
kubeClient client.Client
amiProvider *amifamily.Provider
recorder events.Recorder
}

func New(instanceTypeProvider *instancetype.Provider, instanceProvider *instance.Provider, kubeClient client.Client, amiProvider *amifamily.Provider) *CloudProvider {
func New(instanceTypeProvider *instancetype.Provider, instanceProvider *instance.Provider, kubeClient client.Client, amiProvider *amifamily.Provider, recorder events.Recorder) *CloudProvider {
return &CloudProvider{
instanceTypeProvider: instanceTypeProvider,
instanceProvider: instanceProvider,
kubeClient: kubeClient,
amiProvider: amiProvider,
recorder: recorder,
}
}

Expand All @@ -85,10 +88,26 @@ func (c *CloudProvider) Create(ctx context.Context, machine *v1alpha5.Machine) (
return nil, fmt.Errorf("resolving instance types, %w", err)
}
if len(instanceTypes) == 0 {
c.recorder.Publish(events.Event{
InvolvedObject: machine,
Type: v1.EventTypeWarning,
Reason: "InsufficientCapacityError",
Message: fmt.Sprintf("Machine %s event: all requested instance types were unavailable during launch", machine.Name),
DedupeValues: []string{machine.Name},
})
return nil, cloudprovider.NewInsufficientCapacityError(fmt.Errorf("all requested instance types were unavailable during launch"))
}
instance, err := c.instanceProvider.Create(ctx, nodeTemplate, machine, instanceTypes)
if err != nil {
if cloudprovider.IsInsufficientCapacityError(err) {
c.recorder.Publish(events.Event{
InvolvedObject: machine,
Type: v1.EventTypeWarning,
Reason: "InsufficientCapacityError",
Message: fmt.Sprintf("Machine %s event: %s", machine.Name, err),
DedupeValues: []string{machine.Name},
})
}
return nil, fmt.Errorf("creating instance, %w", err)
}
instanceType, _ := lo.Find(instanceTypes, func(i *cloudprovider.InstanceType) bool {
Expand Down
2 changes: 1 addition & 1 deletion pkg/cloudprovider/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ var _ = BeforeSuite(func() {
awsEnv = test.NewEnvironment(ctx, env)

fakeClock = clock.NewFakeClock(time.Now())
cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, env.Client, awsEnv.AMIProvider)
cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, env.Client, awsEnv.AMIProvider, events.NewRecorder(&record.FakeRecorder{}))
cluster = state.NewCluster(fakeClock, env.Client, cloudProvider)
prov = provisioning.NewProvisioner(env.Client, env.KubernetesInterface.CoreV1(), events.NewRecorder(&record.FakeRecorder{}), cloudProvider, cluster)
})
Expand Down
4 changes: 3 additions & 1 deletion pkg/controllers/machine/garbagecollection/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,14 @@ import (
"github.com/patrickmn/go-cache"
"github.com/samber/lo"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/tools/record"
. "knative.dev/pkg/logging/testing"
"sigs.k8s.io/controller-runtime/pkg/client"

coresettings "github.com/aws/karpenter-core/pkg/apis/settings"
"github.com/aws/karpenter-core/pkg/apis/v1alpha5"
corecloudprovider "github.com/aws/karpenter-core/pkg/cloudprovider"
"github.com/aws/karpenter-core/pkg/events"
"github.com/aws/karpenter-core/pkg/operator/controller"
"github.com/aws/karpenter-core/pkg/operator/scheme"
coretest "github.com/aws/karpenter-core/pkg/test"
Expand Down Expand Up @@ -68,7 +70,7 @@ var _ = BeforeSuite(func() {
env = coretest.NewEnvironment(scheme.Scheme, coretest.WithCRDs(apis.CRDs...))
awsEnv = test.NewEnvironment(ctx, env)

cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, env.Client, awsEnv.AMIProvider)
cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, env.Client, awsEnv.AMIProvider, events.NewRecorder(&record.FakeRecorder{}))
linkedMachineCache = cache.New(time.Minute*10, time.Second*10)
linkController := &link.Controller{
Cache: linkedMachineCache,
Expand Down
4 changes: 3 additions & 1 deletion pkg/controllers/machine/link/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/client-go/tools/record"
. "knative.dev/pkg/logging/testing"
"sigs.k8s.io/controller-runtime/pkg/client"

Expand All @@ -39,6 +40,7 @@ import (
coretest "github.com/aws/karpenter-core/pkg/test"
. "github.com/aws/karpenter-core/pkg/test/expectations"

"github.com/aws/karpenter-core/pkg/events"
"github.com/aws/karpenter/pkg/apis"
"github.com/aws/karpenter/pkg/apis/settings"
"github.com/aws/karpenter/pkg/apis/v1alpha1"
Expand Down Expand Up @@ -67,7 +69,7 @@ var _ = BeforeSuite(func() {
env = coretest.NewEnvironment(scheme.Scheme, coretest.WithCRDs(apis.CRDs...))
awsEnv = test.NewEnvironment(ctx, env)

cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, env.Client, awsEnv.AMIProvider)
cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, env.Client, awsEnv.AMIProvider, events.NewRecorder(&record.FakeRecorder{}))
linkController = link.NewController(env.Client, cloudProvider)
})
var _ = AfterSuite(func() {
Expand Down
4 changes: 3 additions & 1 deletion pkg/providers/instance/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,13 @@ import (
"github.com/samber/lo"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/tools/record"
. "knative.dev/pkg/logging/testing"

coresettings "github.com/aws/karpenter-core/pkg/apis/settings"
"github.com/aws/karpenter-core/pkg/apis/v1alpha5"
corecloudprovider "github.com/aws/karpenter-core/pkg/cloudprovider"
"github.com/aws/karpenter-core/pkg/events"
"github.com/aws/karpenter-core/pkg/operator/injection"
"github.com/aws/karpenter-core/pkg/operator/options"
"github.com/aws/karpenter-core/pkg/operator/scheme"
Expand Down Expand Up @@ -62,7 +64,7 @@ var _ = BeforeSuite(func() {
ctx = coresettings.ToContext(ctx, coretest.Settings())
ctx = settings.ToContext(ctx, test.Settings())
awsEnv = test.NewEnvironment(ctx, env)
cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, env.Client, awsEnv.AMIProvider)
cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, env.Client, awsEnv.AMIProvider, events.NewRecorder(&record.FakeRecorder{}))
})

var _ = AfterSuite(func() {
Expand Down
2 changes: 1 addition & 1 deletion pkg/providers/instancetype/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ var _ = BeforeSuite(func() {
awsEnv = test.NewEnvironment(ctx, env)

fakeClock = &clock.FakeClock{}
cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, env.Client, awsEnv.AMIProvider)
cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, env.Client, awsEnv.AMIProvider, events.NewRecorder(&record.FakeRecorder{}))
cluster = state.NewCluster(fakeClock, env.Client, cloudProvider)
prov = provisioning.NewProvisioner(env.Client, env.KubernetesInterface.CoreV1(), events.NewRecorder(&record.FakeRecorder{}), cloudProvider, cluster)
})
Expand Down
2 changes: 1 addition & 1 deletion pkg/providers/launchtemplate/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ var _ = BeforeSuite(func() {
awsEnv = test.NewEnvironment(ctx, env)

fakeClock = &clock.FakeClock{}
cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, env.Client, awsEnv.AMIProvider)
cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, env.Client, awsEnv.AMIProvider, events.NewRecorder(&record.FakeRecorder{}))
cluster = state.NewCluster(fakeClock, env.Client, cloudProvider)
prov = provisioning.NewProvisioner(env.Client, env.KubernetesInterface.CoreV1(), events.NewRecorder(&record.FakeRecorder{}), cloudProvider, cluster)
})
Expand Down

0 comments on commit a31f962

Please sign in to comment.