Skip to content

Commit

Permalink
Improve error logging from multiple packing errors
Browse files Browse the repository at this point in the history
  • Loading branch information
ellistarn committed Jul 26, 2021
1 parent c59ce1a commit 0228dd1
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 29 deletions.
12 changes: 7 additions & 5 deletions pkg/cloudprovider/aws/instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"github.com/aws/aws-sdk-go/service/ec2"
"github.com/aws/aws-sdk-go/service/ec2/ec2iface"
"github.com/awslabs/karpenter/pkg/cloudprovider"
mapset "github.com/deckarep/golang-set"
"go.uber.org/multierr"

v1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -96,10 +97,7 @@ func (p *InstanceProvider) Create(ctx context.Context,
if err != nil {
return nil, fmt.Errorf("creating fleet %w", err)
}
if count := len(createFleetOutput.Instances); count != 1 {
return nil, combineFleetErrors(createFleetOutput.Errors)
}
if count := len(createFleetOutput.Instances[0].InstanceIds); count != 1 {
if len(createFleetOutput.Instances) != 1 || len(createFleetOutput.Instances[0].InstanceIds) != 1 {
return nil, combineFleetErrors(createFleetOutput.Errors)
}
return createFleetOutput.Instances[0].InstanceIds[0], nil
Expand Down Expand Up @@ -127,8 +125,12 @@ func getInstanceID(node *v1.Node) (*string, error) {
}

func combineFleetErrors(errors []*ec2.CreateFleetError) (errs error) {
unique := mapset.NewSet()
for _, err := range errors {
errs = multierr.Append(errs, fmt.Errorf("%s", *err.ErrorCode))
unique.Add(aws.StringValue(err.ErrorCode))
}
for errorCode := range unique.Iter() {
errs = multierr.Append(errs, fmt.Errorf(errorCode.(string)))
}
return errs
}
4 changes: 2 additions & 2 deletions pkg/cloudprovider/aws/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,7 @@ var _ = Describe("Allocation", func() {
It("should not schedule a pod with an invalid subnet", func() {
provisioner.Spec.InstanceTypes = []string{"m5.large"} // limit instance type to simplify ConsistOf checks
ExpectCreated(env.Client, provisioner)
pods := ExpectProvisioningFailed(ctx, env.Client, controller, provisioner,
pods := ExpectProvisioningSucceeded(ctx, env.Client, controller, provisioner,
test.PendingPod(test.PodOptions{NodeSelector: map[string]string{SubnetTagKeyLabel: "Invalid"}}),
)
// Assertions
Expand Down Expand Up @@ -562,7 +562,7 @@ var _ = Describe("Allocation", func() {
})
It("should not schedule a pod with an invalid security group", func() {
ExpectCreated(env.Client, provisioner)
pods := ExpectProvisioningFailed(ctx, env.Client, controller, provisioner,
pods := ExpectProvisioningSucceeded(ctx, env.Client, controller, provisioner,
test.PendingPod(test.PodOptions{NodeSelector: map[string]string{SecurityGroupTagKeyLabel: "Invalid"}}),
)
// Assertions
Expand Down
14 changes: 9 additions & 5 deletions pkg/controllers/allocation/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,11 @@ import (
"github.com/awslabs/karpenter/pkg/apis/provisioning/v1alpha3"
"github.com/awslabs/karpenter/pkg/cloudprovider"
"github.com/awslabs/karpenter/pkg/packing"
"github.com/awslabs/karpenter/pkg/utils/result"
"go.uber.org/multierr"
"golang.org/x/time/rate"
"knative.dev/pkg/logging"

"go.uber.org/multierr"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/types"
Expand Down Expand Up @@ -93,7 +94,7 @@ func (c *Controller) Reconcile(ctx context.Context, req reconcile.Request) (reco
// 3. Filter pods
pods, err := c.Filter.GetProvisionablePods(ctx, provisioner)
if err != nil {
return reconcile.Result{}, fmt.Errorf("filtering pods, %w", err)
return result.Retry(ctx, fmt.Errorf("filtering pods, %w", err))
}
if len(pods) == 0 {
return reconcile.Result{}, nil
Expand All @@ -103,13 +104,13 @@ func (c *Controller) Reconcile(ctx context.Context, req reconcile.Request) (reco
// 4. Group by constraints
constraintGroups, err := c.Constraints.Group(ctx, provisioner, pods)
if err != nil {
return reconcile.Result{}, fmt.Errorf("building constraint groups, %w", err)
return result.Retry(ctx, fmt.Errorf("building constraint groups, %w", err))
}

// 5. Get Instance Types Options
instanceTypes, err := c.CloudProvider.GetInstanceTypes(ctx)
if err != nil {
return reconcile.Result{}, fmt.Errorf("getting instance types, %w", err)
return result.Retry(ctx, fmt.Errorf("getting instance types, %w", err))
}

// 6. Binpack each group
Expand All @@ -128,7 +129,10 @@ func (c *Controller) Reconcile(ctx context.Context, req reconcile.Request) (reco
return c.Binder.Bind(ctx, node, packing.Pods)
})
})
return reconcile.Result{}, multierr.Combine(errs...)
if multierr.Combine(errs...) != nil {
return result.Retry(ctx, errs...)
}
return reconcile.Result{}, nil
}

func (c *Controller) Register(ctx context.Context, m manager.Manager) error {
Expand Down
17 changes: 0 additions & 17 deletions pkg/test/expectations/expectations.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,23 +122,6 @@ func ExpectProvisioningSucceeded(ctx context.Context, c client.Client, reconcile
return result
}

func ExpectProvisioningFailed(ctx context.Context, c client.Client, reconciler reconcile.Reconciler, provisioner *v1alpha3.Provisioner, pods ...*v1.Pod) []*v1.Pod {
for _, pod := range pods {
ExpectCreatedWithStatus(c, pod)
}
ExpectReconcileFailed(ctx, reconciler, client.ObjectKeyFromObject(provisioner))
result := []*v1.Pod{}
for _, pod := range pods {
result = append(result, ExpectPodExists(c, pod.GetName(), pod.GetNamespace()))
}
return result
}

func ExpectReconcileFailed(ctx context.Context, reconciler reconcile.Reconciler, key client.ObjectKey) {
_, err := reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: key})
Expect(err).To(HaveOccurred())
}

func ExpectReconcileSucceeded(ctx context.Context, reconciler reconcile.Reconciler, key client.ObjectKey) {
_, err := reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: key})
Expect(err).ToNot(HaveOccurred())
Expand Down
16 changes: 16 additions & 0 deletions pkg/utils/result/result.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package result

import (
"context"

"knative.dev/pkg/logging"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
)

// Retry logs a set of retriable errors and requeues
func Retry(ctx context.Context, errs ...error) (reconcile.Result, error) {
for _, err := range errs {
logging.FromContext(ctx).Errorf("Failed allocation, %s", err.Error())
}
return reconcile.Result{Requeue: true}, nil
}

0 comments on commit 0228dd1

Please sign in to comment.