From c3d099ec6c6bc3b9e679ca44cf363caf2dec53c4 Mon Sep 17 00:00:00 2001
From: "mergify[bot]" <37929162+mergify[bot]@users.noreply.github.com>
Date: Wed, 28 Sep 2022 14:10:05 +0200
Subject: [PATCH] Add success log message after previous checkin failures
 (#1327) (#1343)

(cherry picked from commit e6143213c1cbc69fd83aac31b1c2730b5f670fe2)

Co-authored-by: Josh Dover <1813008+joshdover@users.noreply.github.com>
---
 CHANGELOG.next.asciidoc                          |  1 +
 .../application/gateway/fleet/fleet_gateway.go   | 16 +++++++++++-----
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc
index faef2861ba9..7d500cf8f53 100644
--- a/CHANGELOG.next.asciidoc
+++ b/CHANGELOG.next.asciidoc
@@ -119,6 +119,7 @@
 - Remove fleet event reporter and events from checkin body. {issue}993[993]
 - Fix unintended reset of source URI when downloading components {pull}1252[1252]
 - Create separate status reporter for local only events so that degraded fleet-checkins no longer affect health on successful fleet-checkins. {issue}1157[1157] {pull}1285[1285]
+- Add success log message after previous checkin failures {pull}1327[1327]
 
 ==== New features
 
diff --git a/internal/pkg/agent/application/gateway/fleet/fleet_gateway.go b/internal/pkg/agent/application/gateway/fleet/fleet_gateway.go
index 6df9f171fbe..b88a0cafee0 100644
--- a/internal/pkg/agent/application/gateway/fleet/fleet_gateway.go
+++ b/internal/pkg/agent/application/gateway/fleet/fleet_gateway.go
@@ -210,7 +210,7 @@ func (f *fleetGateway) worker() {
 				f.statusReporter.Update(state.Failed, errMsg, nil)
 			} else {
 				f.statusReporter.Update(state.Healthy, "", nil)
-				f.localReporter.Update(state.Healthy, "", nil) // we don't need to specifically set the local reporter to failed above, but it needs to be reset to healthy if a checking succeeds
+				f.localReporter.Update(state.Healthy, "", nil) // we don't need to specifically set the local reporter to failed above, but it needs to be reset to healthy if a checkin succeeds
 			}
 
 		case <-f.bgContext.Done():
@@ -280,11 +280,11 @@ func (f *fleetGateway) doExecute() (*fleetapi.CheckinResponse, error) {
 	// Guard if the context is stopped by a out of bound call,
 	// this mean we are rebooting to change the log level or the system is shutting us down.
 	for f.bgContext.Err() == nil {
-		f.log.Debugf("Checking started")
+		f.log.Debugf("Checkin started")
 		resp, err := f.execute(f.bgContext)
 		if err != nil {
 			f.checkinFailCounter++
-			f.log.Errorf("Could not communicate with fleet-server Checking API will retry, error: %s", err)
+			f.log.Errorf("Could not communicate with fleet-server checkin API will retry, error: %s", err)
 			if !f.backoff.Wait() {
 				// Something bad has happened and we log it and we should update our current state.
 				err := errors.New(
@@ -299,10 +299,16 @@ func (f *fleetGateway) doExecute() (*fleetapi.CheckinResponse, error) {
 			}
 			if f.checkinFailCounter > 1 {
 				f.localReporter.Update(state.Degraded, fmt.Sprintf("checkin failed: %v", err), nil)
-				f.log.Errorf("checking number %d failed: %s", f.checkinFailCounter, err.Error())
+				f.log.Errorf("checkin number %d failed: %s", f.checkinFailCounter, err.Error())
 			}
 			continue
 		}
+
+		if f.checkinFailCounter > 0 {
+			// Log at same level as error logs above so subsequent successes are visible when log level is set to 'error'.
+			f.log.Errorf("Checkin request to fleet-server succeeded after %d failures", f.checkinFailCounter)
+		}
+
 		f.checkinFailCounter = 0
 		// Request was successful, return the collected actions.
 		return resp, nil
@@ -338,7 +344,7 @@ func (f *fleetGateway) execute(ctx context.Context) (*fleetapi.CheckinResponse,
 		f.unauthCounter++
 
 		if f.shouldUnenroll() {
-			f.log.Warnf("retrieved an invalid api key error '%d' times. Starting to unenroll the elastic agent.", f.unauthCounter)
+			f.log.Warnf("received an invalid api key error '%d' times. Starting to unenroll the elastic agent.", f.unauthCounter)
 			return &fleetapi.CheckinResponse{
 				Actions: []fleetapi.Action{&fleetapi.ActionUnenroll{ActionID: "", ActionType: "UNENROLL", IsDetected: true}},
 			}, nil