Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve error logging from multiple packing errors #552

Merged
merged 1 commit into from
Jul 28, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 9 additions & 7 deletions pkg/cloudprovider/aws/instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,10 @@ import (
"github.com/aws/aws-sdk-go/service/ec2"
"github.com/aws/aws-sdk-go/service/ec2/ec2iface"
"github.com/awslabs/karpenter/pkg/cloudprovider"
"go.uber.org/multierr"

"go.uber.org/multierr"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/sets"
)

const (
Expand Down Expand Up @@ -100,10 +101,7 @@ func (p *InstanceProvider) Create(ctx context.Context,
if err != nil {
return nil, fmt.Errorf("creating fleet %w", err)
}
if count := len(createFleetOutput.Instances); count != 1 {
return nil, combineFleetErrors(createFleetOutput.Errors)
}
if count := len(createFleetOutput.Instances[0].InstanceIds); count != 1 {
if len(createFleetOutput.Instances) != 1 || len(createFleetOutput.Instances[0].InstanceIds) != 1 {
return nil, combineFleetErrors(createFleetOutput.Errors)
}
return createFleetOutput.Instances[0].InstanceIds[0], nil
Expand Down Expand Up @@ -134,8 +132,12 @@ func getInstanceID(node *v1.Node) (*string, error) {
}

func combineFleetErrors(errors []*ec2.CreateFleetError) (errs error) {
unique := sets.NewString()
JacobGabrielson marked this conversation as resolved.
Show resolved Hide resolved
for _, err := range errors {
errs = multierr.Append(errs, fmt.Errorf("%s", *err.ErrorCode))
unique.Insert(aws.StringValue(err.ErrorCode))
}
for _, errorCode := range unique.List() {
errs = multierr.Append(errs, fmt.Errorf(errorCode))
}
return errs
return fmt.Errorf("with fleet error(s), %w", errs)
}
4 changes: 2 additions & 2 deletions pkg/cloudprovider/aws/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,7 @@ var _ = Describe("Allocation", func() {
It("should not schedule a pod with an invalid subnet", func() {
provisioner.Spec.InstanceTypes = []string{"m5.large"} // limit instance type to simplify ConsistOf checks
ExpectCreated(env.Client, provisioner)
pods := ExpectProvisioningFailed(ctx, env.Client, controller, provisioner,
pods := ExpectProvisioningSucceeded(ctx, env.Client, controller, provisioner,
test.PendingPod(test.PodOptions{NodeSelector: map[string]string{SubnetTagKeyLabel: "Invalid"}}),
)
// Assertions
Expand Down Expand Up @@ -562,7 +562,7 @@ var _ = Describe("Allocation", func() {
})
It("should not schedule a pod with an invalid security group", func() {
ExpectCreated(env.Client, provisioner)
pods := ExpectProvisioningFailed(ctx, env.Client, controller, provisioner,
pods := ExpectProvisioningSucceeded(ctx, env.Client, controller, provisioner,
test.PendingPod(test.PodOptions{NodeSelector: map[string]string{SecurityGroupTagKeyLabel: "Invalid"}}),
)
// Assertions
Expand Down
13 changes: 6 additions & 7 deletions pkg/controllers/allocation/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,11 @@ import (
"github.com/awslabs/karpenter/pkg/apis/provisioning/v1alpha3"
"github.com/awslabs/karpenter/pkg/cloudprovider"
"github.com/awslabs/karpenter/pkg/packing"
"github.com/awslabs/karpenter/pkg/utils/result"
"go.uber.org/multierr"
"golang.org/x/time/rate"
"knative.dev/pkg/logging"

"go.uber.org/multierr"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/types"
Expand Down Expand Up @@ -93,23 +94,21 @@ func (c *Controller) Reconcile(ctx context.Context, req reconcile.Request) (reco
// 3. Filter pods
pods, err := c.Filter.GetProvisionablePods(ctx, provisioner)
if err != nil {
return reconcile.Result{}, fmt.Errorf("filtering pods, %w", err)
return result.RetryIfError(ctx, fmt.Errorf("filtering pods, %w", err))
}
if len(pods) == 0 {
return reconcile.Result{}, nil
}
logging.FromContext(ctx).Infof("Found %d provisionable pods", len(pods))

// 4. Group by constraints
constraintGroups, err := c.Constraints.Group(ctx, provisioner, pods)
if err != nil {
return reconcile.Result{}, fmt.Errorf("building constraint groups, %w", err)
return result.RetryIfError(ctx, fmt.Errorf("building constraint groups, %w", err))
}

// 5. Get Instance Types Options
instanceTypes, err := c.CloudProvider.GetInstanceTypes(ctx)
if err != nil {
return reconcile.Result{}, fmt.Errorf("getting instance types, %w", err)
return result.RetryIfError(ctx, fmt.Errorf("getting instance types, %w", err))
}

// 6. Binpack each group
Expand All @@ -128,7 +127,7 @@ func (c *Controller) Reconcile(ctx context.Context, req reconcile.Request) (reco
return c.Binder.Bind(ctx, node, packing.Pods)
})
})
return reconcile.Result{}, multierr.Combine(errs...)
return result.RetryIfError(ctx, multierr.Combine(errs...))
}

func (c *Controller) Register(ctx context.Context, m manager.Manager) error {
Expand Down
1 change: 1 addition & 0 deletions pkg/controllers/allocation/filter.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ func (f *Filter) GetProvisionablePods(ctx context.Context, provisioner *v1alpha3
}
provisionable = append(provisionable, ptr.Pod(p))
}
logging.FromContext(ctx).Infof("Found %d provisionable pods", len(provisionable))
return provisionable, nil
}

Expand Down
17 changes: 0 additions & 17 deletions pkg/test/expectations/expectations.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,23 +122,6 @@ func ExpectProvisioningSucceeded(ctx context.Context, c client.Client, reconcile
return result
}

func ExpectProvisioningFailed(ctx context.Context, c client.Client, reconciler reconcile.Reconciler, provisioner *v1alpha3.Provisioner, pods ...*v1.Pod) []*v1.Pod {
for _, pod := range pods {
ExpectCreatedWithStatus(c, pod)
}
ExpectReconcileFailed(ctx, reconciler, client.ObjectKeyFromObject(provisioner))
result := []*v1.Pod{}
for _, pod := range pods {
result = append(result, ExpectPodExists(c, pod.GetName(), pod.GetNamespace()))
}
return result
}

func ExpectReconcileFailed(ctx context.Context, reconciler reconcile.Reconciler, key client.ObjectKey) {
_, err := reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: key})
Expect(err).To(HaveOccurred())
}

func ExpectReconcileSucceeded(ctx context.Context, reconciler reconcile.Reconciler, key client.ObjectKey) {
_, err := reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: key})
Expect(err).ToNot(HaveOccurred())
Expand Down
17 changes: 17 additions & 0 deletions pkg/utils/result/result.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package result

import (
"context"

"go.uber.org/multierr"
"knative.dev/pkg/logging"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
)

// RetryIfError logs any errors and requeues if not nil. Supports multierr unwrapping.
func RetryIfError(ctx context.Context, err error) (reconcile.Result, error) {
for _, err := range multierr.Errors(err) {
logging.FromContext(ctx).Errorf("Failed allocation, %s", err.Error())
}
return reconcile.Result{Requeue: err != nil}, nil
}