Skip to content

Commit

Permalink
feat: report resources health errors with app event (#331)
Browse files Browse the repository at this point in the history
* feat: event-reporter: report resource health status errors with application event

* feat: event-reporter: added unit tests for reporting resource health status errors with application event

* feat: event-reporter: report resource health status errors with parent application event (+ under app-sets)

* event-reporter: lint fix

* lint issue fix
  • Loading branch information
oleksandr-codefresh authored Sep 9, 2024
1 parent 291dfc1 commit ba613c5
Show file tree
Hide file tree
Showing 7 changed files with 647 additions and 81 deletions.
4 changes: 2 additions & 2 deletions changelog/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
### Chore
- chore: argo-cd update to 2.12.3
### Features
- feat: event-reporter: report resource health status errors with application event
35 changes: 32 additions & 3 deletions event_reporter/reporter/application_errors_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,24 @@ func parseResourceSyncResultErrors(rs *appv1.ResourceStatus, os *appv1.Operation
return errors
}

func parseAggregativeHealthErrors(rs *appv1.ResourceStatus, apptree *appv1.ApplicationTree) []*events.ObjectError {
func parseAggregativeHealthErrorsOfApplication(a *appv1.Application, appTree *appv1.ApplicationTree) []*events.ObjectError {
var errors []*events.ObjectError
if a.Status.Resources == nil {
return errors
}

for _, rs := range a.Status.Resources {
if rs.Health != nil {
if rs.Health.Status != health.HealthStatusHealthy {
errors = append(errors, parseAggregativeHealthErrors(&rs, appTree, true)...)
}
}
}

return errors
}

func parseAggregativeHealthErrors(rs *appv1.ResourceStatus, apptree *appv1.ApplicationTree, addReference bool) []*events.ObjectError {
errs := make([]*events.ObjectError, 0)

if apptree == nil {
Expand All @@ -108,12 +125,24 @@ func parseAggregativeHealthErrors(rs *appv1.ResourceStatus, apptree *appv1.Appli

for _, cn := range childNodes {
if cn.Health != nil && cn.Health.Status == health.HealthStatusDegraded {
errs = append(errs, &events.ObjectError{
newErr := events.ObjectError{
Type: "health",
Level: "error",
Message: cn.Health.Message,
LastSeen: *cn.CreatedAt,
})
}

if addReference {
newErr.SourceReference = events.ErrorSourceReference{
Group: rs.Group,
Version: rs.Version,
Kind: rs.Kind,
Namespace: rs.Namespace,
Name: rs.Name,
}
}

errs = append(errs, &newErr)
}
}

Expand Down
2 changes: 1 addition & 1 deletion event_reporter/reporter/application_event_reporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ func (s *applicationEventReporter) StreamApplicationEvents(
} else {
logCtx.Info("processing as root application")
// will get here only for root applications (not managed as a resource by another application)
appEvent, err := s.getApplicationEventPayload(ctx, a, ts, appInstanceLabelKey, trackingMethod, desiredManifests.ApplicationVersions)
appEvent, err := s.getApplicationEventPayload(ctx, a, appTree, ts, appInstanceLabelKey, trackingMethod, desiredManifests.ApplicationVersions)
if err != nil {
s.metricsServer.IncErroredEventsCounter(metrics.MetricParentAppEventType, metrics.MetricEventGetPayloadErrorType, a.Name)
return fmt.Errorf("failed to get application event: %w", err)
Expand Down
83 changes: 82 additions & 1 deletion event_reporter/reporter/applications_errors_parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@ package reporter
import (
"fmt"
"testing"
"time"

"github.com/argoproj/gitops-engine/pkg/health"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

"github.com/argoproj/gitops-engine/pkg/sync/common"
"github.com/stretchr/testify/assert"
Expand Down Expand Up @@ -207,7 +211,84 @@ func TestParseAggregativeHealthErrors(t *testing.T) {
Kind: "application",
Namespace: "namespace",
Name: "name",
}, nil)
}, nil, false)
assert.Empty(t, errs)
})

t.Run("should set sourceReference", func(t *testing.T) {
rsName := "test-deployment"
ns := "test"
errMessage := "backoff pulling image test/test:0.1"
rsRef := v1alpha1.ResourceRef{
Group: "g",
Version: "v",
Kind: "ReplicaSet",
Name: rsName + "1",
Namespace: ns,
}

deployRef := v1alpha1.ResourceRef{
Group: "g",
Version: "v",
Kind: "Deployment",
Name: rsName,
Namespace: ns,
}

appTree := v1alpha1.ApplicationTree{
Nodes: []v1alpha1.ResourceNode{
{ // Pod
Health: &v1alpha1.HealthStatus{
Status: health.HealthStatusDegraded,
Message: errMessage,
},
ResourceRef: v1alpha1.ResourceRef{
Group: "g",
Version: "v",
Kind: "Pod",
Name: rsName + "1-3n235j5",
Namespace: ns,
},
ParentRefs: []v1alpha1.ResourceRef{rsRef},
CreatedAt: &metav1.Time{
Time: time.Now(),
},
},
{ // ReplicaSet
Health: &v1alpha1.HealthStatus{
Status: health.HealthStatusProgressing,
Message: "",
},
ResourceRef: rsRef,
ParentRefs: []v1alpha1.ResourceRef{deployRef},
CreatedAt: &metav1.Time{
Time: time.Now(),
},
},
{ // Deployment
Health: &v1alpha1.HealthStatus{
Status: health.HealthStatusDegraded,
Message: "",
},
ResourceRef: deployRef,
ParentRefs: []v1alpha1.ResourceRef{},
CreatedAt: &metav1.Time{
Time: time.Now(),
},
},
},
}

errs := parseAggregativeHealthErrors(&v1alpha1.ResourceStatus{
Group: deployRef.Group,
Version: deployRef.Version,
Kind: deployRef.Kind,
Name: deployRef.Name,
Namespace: deployRef.Namespace,
}, &appTree, true)
assert.Len(t, errs, 1)
assert.Equal(t, errMessage, errs[0].Message)
assert.NotNil(t, errs[0].SourceReference)
assert.Equal(t, deployRef.Name, errs[0].SourceReference.Name)
})
}
13 changes: 11 additions & 2 deletions event_reporter/reporter/event_payload.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,10 @@ func getResourceEventPayload(
errors = append(errors, parseApplicationSyncResultErrorsFromConditions(originalApplication.Status)...)
}

if originalApplication != nil {
errors = append(errors, parseAggregativeHealthErrorsOfApplication(originalApplication, apptree)...)
}

if len(desiredState.RawManifest) == 0 && len(desiredState.CompiledManifest) != 0 {
// for handling helm defined resources, etc...
y, err := yaml.JSONToYAML([]byte(desiredState.CompiledManifest))
Expand Down Expand Up @@ -172,7 +176,7 @@ func getResourceEventPayload(
source.HealthStatus = (*string)(&rs.Health.Status)
source.HealthMessage = &rs.Health.Message
if rs.Health.Status != health.HealthStatusHealthy {
errors = append(errors, parseAggregativeHealthErrors(rs, apptree)...)
errors = append(errors, parseAggregativeHealthErrors(rs, apptree, false)...)
}
}

Expand All @@ -197,6 +201,7 @@ func getResourceEventPayload(
func (s *applicationEventReporter) getApplicationEventPayload(
ctx context.Context,
a *appv1.Application,
appTree *appv1.ApplicationTree,
ts string,
appInstanceLabelKey string,
trackingMethod appv1.TrackingMethod,
Expand All @@ -206,6 +211,7 @@ func (s *applicationEventReporter) getApplicationEventPayload(
syncStarted = metav1.Now()
syncFinished *metav1.Time
logCtx = log.WithField("application", a.Name)
errors = []*events.ObjectError{}
)

obj := appv1.Application{}
Expand Down Expand Up @@ -281,11 +287,14 @@ func (s *applicationEventReporter) getApplicationEventPayload(
TrackingMethod: string(trackingMethod),
}

errors = append(errors, parseApplicationSyncResultErrorsFromConditions(a.Status)...)
errors = append(errors, parseAggregativeHealthErrorsOfApplication(a, appTree)...)

payload := events.EventPayload{
Timestamp: ts,
Object: object,
Source: source,
Errors: parseApplicationSyncResultErrorsFromConditions(a.Status),
Errors: errors,
AppVersions: applicationVersionsEvents,
}

Expand Down
Loading

0 comments on commit ba613c5

Please sign in to comment.