Skip to content

Commit

Permalink
APIGOV-01234 remove mutex in healthchecker.go (#468)
Browse files Browse the repository at this point in the history
  • Loading branch information
tsjohns9 authored Jun 30, 2022
1 parent 562dc6f commit dfa1642
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 90 deletions.
8 changes: 5 additions & 3 deletions pkg/cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -313,9 +313,11 @@ func (c *agentRootCommand) initConfig() error {

// Start the initial and recurring version check jobs
startVersionCheckJobs(c.centralCfg, c.agentFeaturesCfg)
// Init the healthcheck API
healthCheckServer := &hc.Server{}
healthCheckServer.HandleRequests()

if util.IsNotTest() {
healthCheckServer := &hc.Server{}
healthCheckServer.HandleRequests()
}
}
c.initialized = true
return nil
Expand Down
51 changes: 15 additions & 36 deletions pkg/util/healthcheck/healthcheck_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import (
"io/ioutil"
"net/http"
"net/http/httptest"
"sync"
"testing"
"time"

Expand Down Expand Up @@ -46,8 +45,6 @@ func TestRegisterHealthcheck(t *testing.T) {

func TestRunChecks(t *testing.T) {
resetGlobalHealthChecker()

// assert that the number of Checks is 0
assert.Equal(t, 0, len(globalHealthChecker.Checks), "The initial number of checks was not 0")

hcValues := map[string]bool{
Expand All @@ -69,69 +66,45 @@ func TestRunChecks(t *testing.T) {
}
}

isReady := false
cfg := corecfg.NewStatusConfig()
SetStatusConfig(cfg)

// Start a go func to watch WaitForReady
var wg sync.WaitGroup
wg.Add((1))
go func() {
defer wg.Done()

// Set isReady to true on return
err := WaitForReady()
if err == nil {
isReady = true
}

}()

// Register healthchecks
_, err1 := RegisterHealthcheck("healthcheck1", "healthcheck1", hcFunc)
_, err2 := RegisterHealthcheck("healthcheck2", "healthcheck2", hcFunc)
assert.Nil(t, err1, "There was an unexpected error while registering a healthcheck1")
assert.Nil(t, err2, "There was an unexpected error while registering a healthcheck2")
assert.Nil(t, err1)
assert.Nil(t, err2)

// Run the checks, all fail
res := RunChecks()
assert.Equal(t, FAIL, res, "The overall healthcheck should have failed")
assert.False(t, isReady, "isReady should have been false")

// only hc1 pass
hcValues["healthcheck1"] = true
hcValues["healthcheck2"] = false
res = RunChecks()
assert.Equal(t, FAIL, res, "The overall healthcheck should have failed")
assert.Equal(t, OK, getCheckerStatus(globalHealthChecker.Checks["healthcheck1"]), "healthcheck1 should have passed")
assert.Equal(t, FAIL, getCheckerStatus(globalHealthChecker.Checks["healthcheck2"]), "healthcheck2 should have failed")
assert.False(t, isReady, "isReady should have been false")
assert.Equal(t, OK, globalHealthChecker.Checks["healthcheck1"].Status.Result, "healthcheck1 should have passed")
assert.Equal(t, FAIL, globalHealthChecker.Checks["healthcheck2"].Status.Result, "healthcheck2 should have failed")

// only hc2 pass
hcValues["healthcheck1"] = false
hcValues["healthcheck2"] = true
res = RunChecks()
assert.Equal(t, FAIL, res, "The overall healthcheck should have failed")
assert.Equal(t, FAIL, getCheckerStatus(globalHealthChecker.Checks["healthcheck1"]), "healthcheck1 should have failed")
assert.Equal(t, OK, getCheckerStatus(globalHealthChecker.Checks["healthcheck2"]), "healthcheck2 should have passed")
assert.False(t, isReady, "isReady should have been false")
assert.Equal(t, FAIL, globalHealthChecker.Checks["healthcheck1"].Status.Result, "healthcheck1 should have failed")
assert.Equal(t, OK, globalHealthChecker.Checks["healthcheck2"].Status.Result, "healthcheck2 should have passed")

// hall hc pass
hcValues["healthcheck1"] = true
hcValues["healthcheck2"] = true
res = RunChecks()
assert.Equal(t, OK, res, "The overall healthcheck should have passed")
assert.Equal(t, OK, getCheckerStatus(globalHealthChecker.Checks["healthcheck1"]), "healthcheck1 should have passed")
assert.Equal(t, OK, getCheckerStatus(globalHealthChecker.Checks["healthcheck2"]), "healthcheck2 should have passed")

wg.Wait()
assert.True(t, isReady, "isReady should have been true")
assert.Equal(t, OK, globalHealthChecker.Checks["healthcheck1"].Status.Result, "healthcheck1 should have passed")
assert.Equal(t, OK, globalHealthChecker.Checks["healthcheck2"].Status.Result, "healthcheck2 should have passed")
}

func TestStatusAPIDoesNotAllowPrefixMatches(t *testing.T) {
resetGlobalHealthChecker()
cfg := &corecfg.StatusConfiguration{
Port: 8989, // We don't use this actual port in this test
Port: 7890,
HealthCheckPeriod: 3 * time.Minute,
HealthCheckInterval: 30 * time.Second,
}
Expand All @@ -144,6 +117,12 @@ func TestStatusAPIDoesNotAllowPrefixMatches(t *testing.T) {
url := fmt.Sprintf("http://localhost:%d%s", cfg.Port, path)
resp, err := client.Get(url)
assert.Nil(t, err)
if err != nil {
t.Errorf("test failed: %s", err.Error())
}
if resp == nil {
return 0
}
return resp.StatusCode
}

Expand Down
68 changes: 17 additions & 51 deletions pkg/util/healthcheck/healthchecker.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,14 @@ import (
corecfg "github.com/Axway/agent-sdk/pkg/config"
"github.com/Axway/agent-sdk/pkg/jobs"
"github.com/Axway/agent-sdk/pkg/util"
"github.com/Axway/agent-sdk/pkg/util/errors"
"github.com/Axway/agent-sdk/pkg/util/log"
"github.com/google/uuid"
)

var globalHealthChecker *healthChecker
var statusConfig corecfg.StatusConfig
var logger log.FieldLogger
var statusCfgMutex sync.Mutex = sync.Mutex{}
var statusCfgMutex = sync.Mutex{}

func init() {
globalHealthChecker = &healthChecker{
Expand Down Expand Up @@ -73,13 +72,13 @@ func RegisterHealthcheck(name, endpoint string, check CheckStatus) (string, erro
http.HandleFunc(fmt.Sprintf("/status/%s", endpoint), checkHandler)

if util.IsNotTest() {
executeCheck(newChecker) // execute the healthcheck on registration
newChecker.executeCheck()
}

return newID.String(), nil
}

// SetStatusConfig - Set the status config globally
// SetStatusConfig - Set the status config globally.
func SetStatusConfig(statusCfg corecfg.StatusConfig) {
statusCfgMutex.Lock()
defer statusCfgMutex.Unlock()
Expand All @@ -93,26 +92,6 @@ func GetStatusConfig() corecfg.StatusConfig {
return statusConfig
}

// WaitForReady - Iterate through all healthchecks, returns OK once ready or returns Fail if timeout is reached
func WaitForReady() error {
timeout := time.After(GetStatusConfig().GetHealthCheckPeriod())
tick := time.Tick(500 * time.Millisecond)
// Keep trying until we have timed out or got a good result
for {
select {
// Got a timeout! Fail with a timeout error
case <-timeout:
return errors.ErrTimeoutServicesNotReady
// Got a tick, we should RunChecks
case <-tick:
if RunChecks() == OK {
logger.Trace("Services are Ready")
return nil
}
}
}
}

// GetStatus - returns the current status for specified service
func GetStatus(endpoint string) StatusLevel {
statusCheck, ok := globalHealthChecker.Checks[endpoint]
Expand All @@ -132,10 +111,9 @@ func GetStatus(endpoint string) StatusLevel {
// RunChecks - loop through all
func RunChecks() StatusLevel {
passed := true

for _, check := range globalHealthChecker.Checks {
executeCheck(check)
check.statusMutex.Lock()
defer check.statusMutex.Unlock()
check.executeCheck()
if check.Status.Result == FAIL {
globalHealthChecker.Status = FAIL
passed = false
Expand All @@ -149,13 +127,14 @@ func RunChecks() StatusLevel {
return globalHealthChecker.Status
}

// executeCheck - executes the specified status check and logs the result
func executeCheck(check *statusCheck) {
check.statusMutex.Lock()
defer check.statusMutex.Unlock()
func (check *statusCheck) setStatus(s *Status) {
check.Status = s
}

func (check *statusCheck) executeCheck() {
s := check.checker(check.Name)
check.setStatus(s)

// Run the check
check.Status = check.checker(check.Name)
if check.Status.Result == OK {
logger.
WithField("check-name", check.Name).
Expand All @@ -170,16 +149,8 @@ func executeCheck(check *statusCheck) {
}
}

// getCheckerStatus - get the status of the checker
func getCheckerStatus(check *statusCheck) StatusLevel {
check.statusMutex.Lock()
defer check.statusMutex.Unlock()
return check.Status.Result
}

// Server contains an http server for health checks.
type Server struct {
server *http.Server
}

// HandleRequests - starts the http server
Expand All @@ -189,22 +160,17 @@ func (s *Server) HandleRequests() {
globalHealthChecker.registered = true
}

// Close the server if already running. This can happen due to config/agent resource change
if s.server != nil {
s.server.Close()
}

s.startHealthCheckServer()
}

func (s *Server) startHealthCheckServer() {
if statusConfig != nil && statusConfig.GetPort() > 0 {
s.server = &http.Server{Addr: fmt.Sprintf(":%d", statusConfig.GetPort())}
go s.server.ListenAndServe()
addr := fmt.Sprintf(":%d", statusConfig.GetPort())
go http.ListenAndServe(addr, nil)
}
}

// CheckIsRunning - Checks if another instance is already running by looking at the healthcheck
// CheckIsRunning - Checks if another instance is already running by looking at the healthcheck.
func CheckIsRunning() error {
if statusConfig != nil && statusConfig.GetPort() > 0 {
apiClient := api.NewClientWithTimeout(nil, "", 5*time.Second)
Expand All @@ -229,7 +195,7 @@ func GetGlobalStatus() string {
return string(globalHealthChecker.Status)
}

//GetHealthcheckOutput - query the http endpoint and return the body
// GetHealthcheckOutput - query the http endpoint and return the body
func GetHealthcheckOutput(url string) (string, error) {
client := http.DefaultClient

Expand Down Expand Up @@ -301,7 +267,7 @@ func checkHandler(w http.ResponseWriter, r *http.Request) {
return
}

executeCheck(thisCheck)
thisCheck.executeCheck()

w.Header().Set("Content-Type", "application/json; charset=utf-8")
// If check failed change return code to 500
Expand Down

0 comments on commit dfa1642

Please sign in to comment.