diff --git a/pkg/cmd/root.go b/pkg/cmd/root.go index 44555d246..830d6f6a9 100644 --- a/pkg/cmd/root.go +++ b/pkg/cmd/root.go @@ -313,9 +313,11 @@ func (c *agentRootCommand) initConfig() error { // Start the initial and recurring version check jobs startVersionCheckJobs(c.centralCfg, c.agentFeaturesCfg) - // Init the healthcheck API - healthCheckServer := &hc.Server{} - healthCheckServer.HandleRequests() + + if util.IsNotTest() { + healthCheckServer := &hc.Server{} + healthCheckServer.HandleRequests() + } } c.initialized = true return nil diff --git a/pkg/util/healthcheck/healthcheck_test.go b/pkg/util/healthcheck/healthcheck_test.go index 825d993ea..2cec2ed4c 100644 --- a/pkg/util/healthcheck/healthcheck_test.go +++ b/pkg/util/healthcheck/healthcheck_test.go @@ -6,7 +6,6 @@ import ( "io/ioutil" "net/http" "net/http/httptest" - "sync" "testing" "time" @@ -46,8 +45,6 @@ func TestRegisterHealthcheck(t *testing.T) { func TestRunChecks(t *testing.T) { resetGlobalHealthChecker() - - // assert that the number of Checks is 0 assert.Equal(t, 0, len(globalHealthChecker.Checks), "The initial number of checks was not 0") hcValues := map[string]bool{ @@ -69,69 +66,45 @@ func TestRunChecks(t *testing.T) { } } - isReady := false cfg := corecfg.NewStatusConfig() SetStatusConfig(cfg) - // Start a go func to watch WaitForReady - var wg sync.WaitGroup - wg.Add((1)) - go func() { - defer wg.Done() - - // Set isReady to true on return - err := WaitForReady() - if err == nil { - isReady = true - } - - }() - - // Register healthchecks _, err1 := RegisterHealthcheck("healthcheck1", "healthcheck1", hcFunc) _, err2 := RegisterHealthcheck("healthcheck2", "healthcheck2", hcFunc) - assert.Nil(t, err1, "There was an unexpected error while registering a healthcheck1") - assert.Nil(t, err2, "There was an unexpected error while registering a healthcheck2") + assert.Nil(t, err1) + assert.Nil(t, err2) - // Run the checks, all fail res := RunChecks() assert.Equal(t, FAIL, res, "The overall healthcheck should have failed") - assert.False(t, isReady, "isReady should have been false") - // only hc1 pass hcValues["healthcheck1"] = true hcValues["healthcheck2"] = false res = RunChecks() assert.Equal(t, FAIL, res, "The overall healthcheck should have failed") - assert.Equal(t, OK, getCheckerStatus(globalHealthChecker.Checks["healthcheck1"]), "healthcheck1 should have passed") - assert.Equal(t, FAIL, getCheckerStatus(globalHealthChecker.Checks["healthcheck2"]), "healthcheck2 should have failed") - assert.False(t, isReady, "isReady should have been false") + assert.Equal(t, OK, globalHealthChecker.Checks["healthcheck1"].Status.Result, "healthcheck1 should have passed") + assert.Equal(t, FAIL, globalHealthChecker.Checks["healthcheck2"].Status.Result, "healthcheck2 should have failed") // only hc2 pass hcValues["healthcheck1"] = false hcValues["healthcheck2"] = true res = RunChecks() assert.Equal(t, FAIL, res, "The overall healthcheck should have failed") - assert.Equal(t, FAIL, getCheckerStatus(globalHealthChecker.Checks["healthcheck1"]), "healthcheck1 should have failed") - assert.Equal(t, OK, getCheckerStatus(globalHealthChecker.Checks["healthcheck2"]), "healthcheck2 should have passed") - assert.False(t, isReady, "isReady should have been false") + assert.Equal(t, FAIL, globalHealthChecker.Checks["healthcheck1"].Status.Result, "healthcheck1 should have failed") + assert.Equal(t, OK, globalHealthChecker.Checks["healthcheck2"].Status.Result, "healthcheck2 should have passed") // hall hc pass hcValues["healthcheck1"] = true hcValues["healthcheck2"] = true res = RunChecks() assert.Equal(t, OK, res, "The overall healthcheck should have passed") - assert.Equal(t, OK, getCheckerStatus(globalHealthChecker.Checks["healthcheck1"]), "healthcheck1 should have passed") - assert.Equal(t, OK, getCheckerStatus(globalHealthChecker.Checks["healthcheck2"]), "healthcheck2 should have passed") - - wg.Wait() - assert.True(t, isReady, "isReady should have been true") + assert.Equal(t, OK, globalHealthChecker.Checks["healthcheck1"].Status.Result, "healthcheck1 should have passed") + assert.Equal(t, OK, globalHealthChecker.Checks["healthcheck2"].Status.Result, "healthcheck2 should have passed") } func TestStatusAPIDoesNotAllowPrefixMatches(t *testing.T) { resetGlobalHealthChecker() cfg := &corecfg.StatusConfiguration{ - Port: 8989, // We don't use this actual port in this test + Port: 7890, HealthCheckPeriod: 3 * time.Minute, HealthCheckInterval: 30 * time.Second, } @@ -144,6 +117,12 @@ func TestStatusAPIDoesNotAllowPrefixMatches(t *testing.T) { url := fmt.Sprintf("http://localhost:%d%s", cfg.Port, path) resp, err := client.Get(url) assert.Nil(t, err) + if err != nil { + t.Errorf("test failed: %s", err.Error()) + } + if resp == nil { + return 0 + } return resp.StatusCode } diff --git a/pkg/util/healthcheck/healthchecker.go b/pkg/util/healthcheck/healthchecker.go index 41b527d59..3eada1159 100644 --- a/pkg/util/healthcheck/healthchecker.go +++ b/pkg/util/healthcheck/healthchecker.go @@ -16,7 +16,6 @@ import ( corecfg "github.com/Axway/agent-sdk/pkg/config" "github.com/Axway/agent-sdk/pkg/jobs" "github.com/Axway/agent-sdk/pkg/util" - "github.com/Axway/agent-sdk/pkg/util/errors" "github.com/Axway/agent-sdk/pkg/util/log" "github.com/google/uuid" ) @@ -24,7 +23,7 @@ import ( var globalHealthChecker *healthChecker var statusConfig corecfg.StatusConfig var logger log.FieldLogger -var statusCfgMutex sync.Mutex = sync.Mutex{} +var statusCfgMutex = sync.Mutex{} func init() { globalHealthChecker = &healthChecker{ @@ -73,13 +72,13 @@ func RegisterHealthcheck(name, endpoint string, check CheckStatus) (string, erro http.HandleFunc(fmt.Sprintf("/status/%s", endpoint), checkHandler) if util.IsNotTest() { - executeCheck(newChecker) // execute the healthcheck on registration + newChecker.executeCheck() } return newID.String(), nil } -// SetStatusConfig - Set the status config globally +// SetStatusConfig - Set the status config globally. func SetStatusConfig(statusCfg corecfg.StatusConfig) { statusCfgMutex.Lock() defer statusCfgMutex.Unlock() @@ -93,26 +92,6 @@ func GetStatusConfig() corecfg.StatusConfig { return statusConfig } -// WaitForReady - Iterate through all healthchecks, returns OK once ready or returns Fail if timeout is reached -func WaitForReady() error { - timeout := time.After(GetStatusConfig().GetHealthCheckPeriod()) - tick := time.Tick(500 * time.Millisecond) - // Keep trying until we have timed out or got a good result - for { - select { - // Got a timeout! Fail with a timeout error - case <-timeout: - return errors.ErrTimeoutServicesNotReady - // Got a tick, we should RunChecks - case <-tick: - if RunChecks() == OK { - logger.Trace("Services are Ready") - return nil - } - } - } -} - // GetStatus - returns the current status for specified service func GetStatus(endpoint string) StatusLevel { statusCheck, ok := globalHealthChecker.Checks[endpoint] @@ -132,10 +111,9 @@ func GetStatus(endpoint string) StatusLevel { // RunChecks - loop through all func RunChecks() StatusLevel { passed := true + for _, check := range globalHealthChecker.Checks { - executeCheck(check) - check.statusMutex.Lock() - defer check.statusMutex.Unlock() + check.executeCheck() if check.Status.Result == FAIL { globalHealthChecker.Status = FAIL passed = false @@ -149,13 +127,14 @@ func RunChecks() StatusLevel { return globalHealthChecker.Status } -// executeCheck - executes the specified status check and logs the result -func executeCheck(check *statusCheck) { - check.statusMutex.Lock() - defer check.statusMutex.Unlock() +func (check *statusCheck) setStatus(s *Status) { + check.Status = s +} + +func (check *statusCheck) executeCheck() { + s := check.checker(check.Name) + check.setStatus(s) - // Run the check - check.Status = check.checker(check.Name) if check.Status.Result == OK { logger. WithField("check-name", check.Name). @@ -170,16 +149,8 @@ func executeCheck(check *statusCheck) { } } -// getCheckerStatus - get the status of the checker -func getCheckerStatus(check *statusCheck) StatusLevel { - check.statusMutex.Lock() - defer check.statusMutex.Unlock() - return check.Status.Result -} - // Server contains an http server for health checks. type Server struct { - server *http.Server } // HandleRequests - starts the http server @@ -189,22 +160,17 @@ func (s *Server) HandleRequests() { globalHealthChecker.registered = true } - // Close the server if already running. This can happen due to config/agent resource change - if s.server != nil { - s.server.Close() - } - s.startHealthCheckServer() } func (s *Server) startHealthCheckServer() { if statusConfig != nil && statusConfig.GetPort() > 0 { - s.server = &http.Server{Addr: fmt.Sprintf(":%d", statusConfig.GetPort())} - go s.server.ListenAndServe() + addr := fmt.Sprintf(":%d", statusConfig.GetPort()) + go http.ListenAndServe(addr, nil) } } -// CheckIsRunning - Checks if another instance is already running by looking at the healthcheck +// CheckIsRunning - Checks if another instance is already running by looking at the healthcheck. func CheckIsRunning() error { if statusConfig != nil && statusConfig.GetPort() > 0 { apiClient := api.NewClientWithTimeout(nil, "", 5*time.Second) @@ -229,7 +195,7 @@ func GetGlobalStatus() string { return string(globalHealthChecker.Status) } -//GetHealthcheckOutput - query the http endpoint and return the body +// GetHealthcheckOutput - query the http endpoint and return the body func GetHealthcheckOutput(url string) (string, error) { client := http.DefaultClient @@ -301,7 +267,7 @@ func checkHandler(w http.ResponseWriter, r *http.Request) { return } - executeCheck(thisCheck) + thisCheck.executeCheck() w.Header().Set("Content-Type", "application/json; charset=utf-8") // If check failed change return code to 500