Skip to content

Commit

Permalink
Properly handle CARM errors and requeues (#140)
Browse files Browse the repository at this point in the history
Prior to this patch we introduced new mechanisms to handle Role ARN
retrievals from the CARM cache. Which improved ACK runtime scaling
capabilities and addressed possible race condition scenarios. However in
the process we missed two things:
- Setting an `ACK.ResourceSynced` condition stating that the resource
  isn't synced, yet.
- Returning the **correct** runtime error that will cause the reconciller to
  requeue every 15seconds.

Both of the problems stemmed from the fact that we're not "yet" in the
reconcile function (`rm.Sync`) that is wrapped by a proper error handler
(that triggers requeues, and resets/sets resource conditions). In this
special case, we need to manually inject the condition and return a
controller-runtime error that will trigger a requeue after 15seconds.

While this is a "fair" fix, we're planning on refactoring a lot of the
runtime logic to make easier to read, maintain and more importantly
expose reusable component that will help avoid falling into such traps.

Signed-off-by: Amine Hilaly <[email protected]>

By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license.
  • Loading branch information
a-hilaly authored Mar 1, 2024
1 parent 24070d9 commit 861f7ed
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 11 deletions.
1 change: 1 addition & 0 deletions pkg/condition/condition.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ var (
NotSyncedMessage = "Resource not synced"
SyncedMessage = "Resource synced successfully"
FailedReferenceResolutionMessage = "Reference resolution failed"
UnavailableIAMRoleMessage = "IAM Role is not available"
)

// Synced returns the Condition in the resource's Conditions collection that is
Expand Down
1 change: 1 addition & 0 deletions pkg/runtime/adoption_reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ func (r *adoptionReconciler) reconcile(ctx context.Context, req ctrlrt.Request)
// is not available.
roleARN, err = r.getRoleARN(acctID)
if err != nil {
ackrtlog.InfoAdoptedResource(r.log, res, fmt.Sprintf("Unable to start adoption reconcilliation %s: %v", acctID, err))
// r.getRoleARN errors are not terminal, we should requeue.
return requeue.NeededAfter(err, roleARNNotAvailableRequeueDelay)
}
Expand Down
31 changes: 20 additions & 11 deletions pkg/runtime/reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,18 @@ func (r *resourceReconciler) Reconcile(ctx context.Context, req ctrlrt.Request)
return ctrlrt.Result{}, err
}

rlog := ackrtlog.NewResourceLogger(
r.log, desired,
// All the fields for a resource that do not change during reconciliation
// can be initialized during resourceLogger creation
"kind", r.rd.GroupVersionKind().Kind,
"namespace", req.Namespace,
"name", req.Name,
)
// We're storing a logger pointer in the context, so that any changes to the logger
// will be reflected in the context.
ctx = context.WithValue(ctx, ackrtlog.ContextKey, rlog)

// If a user has specified a namespace that is annotated with the
// an owner account ID, we need an appropriate role ARN to assume
// in order to perform the reconciliation. The roles ARN are typically
Expand All @@ -174,12 +186,16 @@ func (r *resourceReconciler) Reconcile(ctx context.Context, req ctrlrt.Request)
// is not available.
roleARN, err = r.getRoleARN(acctID)
if err != nil {
// r.getRoleARN errors are not terminal, we should requeue.
return ctrlrt.Result{}, requeue.NeededAfter(err, roleARNNotAvailableRequeueDelay)
// TODO(a-hilaly): Refactor all the reconcile function to make it
// easier to understand and maintain.
reason := err.Error()
latest := desired.DeepCopy()
// set ResourceSynced condition to false with proper error message
condition.SetSynced(latest, corev1.ConditionFalse, &condition.UnavailableIAMRoleMessage, &reason)
return r.HandleReconcileError(ctx, desired, latest, requeue.NeededAfter(err, roleARNNotAvailableRequeueDelay))
}
}
region := r.getRegion(desired)

endpointURL := r.getEndpointURL(desired)
gvk := r.rd.GroupVersionKind()
// New session will only pivot to the roleARN if it is not empty.
Expand All @@ -188,18 +204,11 @@ func (r *resourceReconciler) Reconcile(ctx context.Context, req ctrlrt.Request)
return ctrlrt.Result{}, err
}

rlog := ackrtlog.NewResourceLogger(
r.log, desired,
rlog.WithValues(
"account", acctID,
"role", roleARN,
"region", region,
// All the fields for a resource that do not change during reconciliation
// can be initialized during resourceLogger creation
"kind", r.rd.GroupVersionKind().Kind,
"namespace", req.Namespace,
"name", req.Name,
)
ctx = context.WithValue(ctx, ackrtlog.ContextKey, rlog)

rm, err := r.rmf.ManagerFor(
r.cfg, r.log, r.metrics, r, sess, acctID, region,
Expand Down

0 comments on commit 861f7ed

Please sign in to comment.