diff --git a/cmd/otel-allocator/allocation/least_weighted_test.go b/cmd/otel-allocator/allocation/least_weighted_test.go index 2812541966..f70d5025fb 100644 --- a/cmd/otel-allocator/allocation/least_weighted_test.go +++ b/cmd/otel-allocator/allocation/least_weighted_test.go @@ -181,6 +181,7 @@ func TestNoCollectorReassignment(t *testing.T) { } func TestSmartCollectorReassignment(t *testing.T) { + t.Skip("This test is flaky and fails frequently, see issue 1291") s, _ := New("least-weighted", logger) cols := makeNCollectors(4, 0) diff --git a/controllers/opentelemetrycollector_controller.go b/controllers/opentelemetrycollector_controller.go index 8e986c4695..6d3bb96a67 100644 --- a/controllers/opentelemetrycollector_controller.go +++ b/controllers/opentelemetrycollector_controller.go @@ -168,6 +168,11 @@ func (r *OpenTelemetryCollectorReconciler) Reconcile(ctx context.Context, req ct func (r *OpenTelemetryCollectorReconciler) RunTasks(ctx context.Context, params reconcile.Params) error { for _, task := range r.tasks { if err := task.Do(ctx, params); err != nil { + // If we get an error that occurs because a pod is being terminated, then exit this loop + if apierrors.IsForbidden(err) && apierrors.HasStatusCause(err, corev1.NamespaceTerminatingCause) { + r.log.V(2).Info("Exiting reconcile loop because namespace is being terminated", "namespace", params.Instance.Namespace) + return nil + } r.log.Error(err, fmt.Sprintf("failed to reconcile %s", task.Name)) if task.BailOnError { return err