diff --git a/controller/health.go b/controller/health.go index ece586c00b089..21107db209a10 100644 --- a/controller/health.go +++ b/controller/health.go @@ -9,6 +9,7 @@ import ( kubeutil "github.com/argoproj/gitops-engine/pkg/utils/kube" log "github.com/sirupsen/logrus" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime/schema" "github.com/argoproj/argo-cd/v2/common" @@ -39,18 +40,56 @@ func setApplicationHealth(resources []managedResource, statuses []appv1.Resource var err error healthOverrides := lua.ResourceHealthOverrides(resourceOverrides) gvk := schema.GroupVersionKind{Group: res.Group, Version: res.Version, Kind: res.Kind} - if res.Live == nil { + + if res.Kind == "CustomResourceDefinition" && res.Group == "apiextensions.k8s.io" { + log.Infof("Processing CRD %s/%s", res.Live.GetNamespace(), res.Live.GetName()) + // Custom logic for CRD health + conditions, found, err := unstructured.NestedSlice(res.Live.Object, "status", "conditions") + if err != nil { + log.WithError(err).Warnf("Failed to retrieve conditions for CRD %s/%s", res.Live.GetNamespace(), res.Live.GetName()) + } + + if found { + log.Infof("Conditions found for CRD %s/%s: %+v", res.Live.GetNamespace(), res.Live.GetName(), conditions) + for _, condition := range conditions { + condMap, ok := condition.(map[string]interface{}) + if ok { + condType, condTypeExists := condMap["type"].(string) + log.Infof("Processing condition: %+v", condType) + condStatus, condStatusExists := condMap["status"].(string) + condMessage, _ := condMap["message"].(string) + log.Infof("Condition type: %s, status: %s, message: %s", condType, condStatus, condMessage) + if condTypeExists && condStatusExists && condType == "NonStructuralSchema" && condStatus == "True" { + healthStatus = &health.HealthStatus{ + Status: health.HealthStatusDegraded, + Message: condMessage, + } + log.Infof("Health status set to Degraded with message: %s", healthStatus.Message) + break + } + } else { + log.Warnf("Unexpected condition format for CRD %s/%s", res.Live.GetNamespace(), res.Live.GetName()) + } + } + } + if healthStatus == nil { + log.Infof("Health status set to Healthy for CRD %s/%s", res.Live.GetNamespace(), res.Live.GetName()) + healthStatus = &health.HealthStatus{Status: health.HealthStatusHealthy} + } + } else if res.Live == nil { healthStatus = &health.HealthStatus{Status: health.HealthStatusMissing} } else { - // App the manages itself should not affect own health + // App that manages itself should not affect its own health if isSelfReferencedApp(app, kubeutil.GetObjectRef(res.Live)) { continue } healthStatus, err = health.GetResourceHealth(res.Live, healthOverrides) - if err != nil && savedErr == nil { + if err != nil { errCount++ - savedErr = fmt.Errorf("failed to get resource health for %q with name %q in namespace %q: %w", res.Live.GetKind(), res.Live.GetName(), res.Live.GetNamespace(), err) - // also log so we don't lose the message + if savedErr == nil { + savedErr = fmt.Errorf("failed to get resource health for %q with name %q in namespace %q: %w", res.Live.GetKind(), res.Live.GetName(), res.Live.GetNamespace(), err) + } + // Log the error for debugging log.WithField("application", app.QualifiedName()).Warn(savedErr) } } @@ -61,17 +100,18 @@ func setApplicationHealth(resources []managedResource, statuses []appv1.Resource if persistResourceHealth { resHealth := appv1.HealthStatus{Status: healthStatus.Status, Message: healthStatus.Message} + log.Infof("Persisting health status: %+v", resHealth) statuses[i].Health = &resHealth } else { statuses[i].Health = nil } - // Is health status is missing but resource has not built-in/custom health check then it should not affect parent app health + // Health status checks if _, hasOverride := healthOverrides[lua.GetConfigMapKey(gvk)]; healthStatus.Status == health.HealthStatusMissing && !hasOverride && health.GetHealthCheckFunc(gvk) == nil { continue } - // Missing or Unknown health status of child Argo CD app should not affect parent + // Ignore certain health statuses for child apps if res.Kind == application.ApplicationKind && res.Group == application.Group && (healthStatus.Status == health.HealthStatusMissing || healthStatus.Status == health.HealthStatusUnknown) { continue } @@ -80,9 +120,10 @@ func setApplicationHealth(resources []managedResource, statuses []appv1.Resource appHealth.Status = healthStatus.Status } } + if persistResourceHealth { app.Status.ResourceHealthSource = appv1.ResourceHealthLocationInline - // if the status didn't change, don't update the timestamp + // Update timestamp only if health status changes if app.Status.Health.Status == appHealth.Status && app.Status.Health.LastTransitionTime != nil { appHealth.LastTransitionTime = app.Status.Health.LastTransitionTime } else { @@ -92,8 +133,12 @@ func setApplicationHealth(resources []managedResource, statuses []appv1.Resource } else { app.Status.ResourceHealthSource = appv1.ResourceHealthLocationAppTree } + if savedErr != nil && errCount > 1 { savedErr = fmt.Errorf("see application-controller logs for %d other errors; most recent error was: %w", errCount-1, savedErr) } + + log.Infof("Application %s health: %s", app.Name, appHealth) + return &appHealth, savedErr } diff --git a/controller/health_test.go b/controller/health_test.go index 3cc3f8d67d816..5994b4e2efade 100644 --- a/controller/health_test.go +++ b/controller/health_test.go @@ -9,6 +9,7 @@ import ( "github.com/argoproj/gitops-engine/pkg/health" synccommon "github.com/argoproj/gitops-engine/pkg/sync/common" "github.com/argoproj/gitops-engine/pkg/utils/kube" + log "github.com/sirupsen/logrus" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -250,3 +251,43 @@ return hs`, assert.Equal(t, health.HealthStatusHealthy, healthStatus.Status) }) } + +func TestSetApplicationHealth_CRDHealthCheck(t *testing.T) { + crd := resourceFromFile("./testdata/customresourcedefinition.yaml") + + // Simulate a CRD with NonStructuralSchema condition + crdConditions := []map[string]interface{}{ + { + "type": "NonStructuralSchema", + "status": "True", + "reason": "Violations", + "message": "CRD has non-structural schema issues", // Ensure the message matches what you expect + }, + } + // Convert []map[string]interface{} to []interface{} + conditionsInterface := make([]interface{}, len(crdConditions)) + for i, condition := range crdConditions { + conditionsInterface[i] = condition + } + + // Set the conditions in the CRD's status field + err := unstructured.SetNestedSlice(crd.Object, conditionsInterface, "status", "conditions") + require.NoError(t, err) + + resources := []managedResource{{ + Group: "apiextensions.k8s.io", Version: "v1", Kind: "CustomResourceDefinition", Live: &crd, + }} + resourceStatuses := initStatuses(resources) + + // Test the health check for CRDs + healthStatus, err := setApplicationHealth(resources, resourceStatuses, lua.ResourceHealthOverrides{}, app, true) + require.NoError(t, err) + + // Debug log to inspect resource statuses + log.Infof("Overall health status: %+v", healthStatus) + log.Infof("Resource statuses after health check: %+v", resourceStatuses) + + require.NotNil(t, resourceStatuses[0].Health, "Health should not be nil") + assert.Equal(t, health.HealthStatusDegraded, resourceStatuses[0].Health.Status) + assert.Equal(t, "CRD has non-structural schema issues", resourceStatuses[0].Health.Message) +} diff --git a/controller/testdata/customresourcedefinition.yaml b/controller/testdata/customresourcedefinition.yaml new file mode 100644 index 0000000000000..eaea07b19134f --- /dev/null +++ b/controller/testdata/customresourcedefinition.yaml @@ -0,0 +1,18 @@ +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: examples.example.io +spec: + group: example.io + names: + kind: Example + listKind: ExampleList + plural: examples + singular: example + scope: Namespaced + versions: + - name: v1alpha1 + served: true + storage: true +status: + conditions: []