Skip to content

Commit

Permalink
Merge pull request #7753 from planetscale/jg_unhealthy_threshold
Browse files Browse the repository at this point in the history
Allow modification of tablet unhealthy_threshold via debugEnv
  • Loading branch information
deepthi authored Apr 22, 2021
2 parents 7ed6d94 + 983055c commit 5ed08b7
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 6 deletions.
21 changes: 21 additions & 0 deletions go/vt/vttablet/tabletserver/debugenv.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"net/http"
"strconv"
"text/template"
"time"

"vitess.io/vitess/go/acl"
"vitess.io/vitess/go/vt/log"
Expand Down Expand Up @@ -72,6 +73,15 @@ func debugEnvHandler(tsv *TabletServer, w http.ResponseWriter, r *http.Request)
f(ival)
msg = fmt.Sprintf("Setting %v to: %v", varname, value)
}
setDurationVal := func(f func(time.Duration)) {
durationVal, err := time.ParseDuration(value)
if err != nil {
msg = fmt.Sprintf("Failed setting value for %v: %v", varname, err)
return
}
f(durationVal)
msg = fmt.Sprintf("Setting %v to: %v", varname, value)
}
setFloat64Val := func(f func(float64)) {
fval, err := strconv.ParseFloat(value, 64)
if err != nil {
Expand All @@ -94,6 +104,10 @@ func debugEnvHandler(tsv *TabletServer, w http.ResponseWriter, r *http.Request)
setIntVal(tsv.SetMaxResultSize)
case "WarnResultSize":
setIntVal(tsv.SetWarnResultSize)
case "UnhealthyThreshold":
setDurationVal(tsv.Config().Healthcheck.UnhealthyThresholdSeconds.Set)
setDurationVal(tsv.hs.SetUnhealthyThreshold)
setDurationVal(tsv.sm.SetUnhealthyThreshold)
case "ThrottleMetricThreshold":
setFloat64Val(tsv.SetThrottleMetricThreshold)
case "Consolidator":
Expand All @@ -109,6 +123,12 @@ func debugEnvHandler(tsv *TabletServer, w http.ResponseWriter, r *http.Request)
Value: fmt.Sprintf("%v", f()),
})
}
addDurationVar := func(varname string, f func() time.Duration) {
vars = append(vars, envValue{
VarName: varname,
Value: fmt.Sprintf("%v", f()),
})
}
addFloat64Var := func(varname string, f func() float64) {
vars = append(vars, envValue{
VarName: varname,
Expand All @@ -121,6 +141,7 @@ func debugEnvHandler(tsv *TabletServer, w http.ResponseWriter, r *http.Request)
addIntVar("QueryCacheCapacity", tsv.QueryPlanCacheCap)
addIntVar("MaxResultSize", tsv.MaxResultSize)
addIntVar("WarnResultSize", tsv.WarnResultSize)
addDurationVar("UnhealthyThreshold", tsv.Config().Healthcheck.UnhealthyThresholdSeconds.Get)
addFloat64Var("ThrottleMetricThreshold", tsv.ThrottleMetricThreshold)
vars = append(vars, envValue{
VarName: "Consolidator",
Expand Down
21 changes: 18 additions & 3 deletions go/vt/vttablet/tabletserver/health_streamer.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
"github.com/golang/protobuf/proto"

"vitess.io/vitess/go/history"
"vitess.io/vitess/go/sync2"
"vitess.io/vitess/go/vt/log"
querypb "vitess.io/vitess/go/vt/proto/query"
topodatapb "vitess.io/vitess/go/vt/proto/topodata"
Expand All @@ -51,7 +52,7 @@ var (
type healthStreamer struct {
stats *tabletenv.Stats
degradedThreshold time.Duration
unhealthyThreshold time.Duration
unhealthyThreshold sync2.AtomicDuration

mu sync.Mutex
ctx context.Context
Expand All @@ -66,7 +67,7 @@ func newHealthStreamer(env tabletenv.Env, alias topodatapb.TabletAlias) *healthS
return &healthStreamer{
stats: env.Stats(),
degradedThreshold: env.Config().Healthcheck.DegradedThresholdSeconds.Get(),
unhealthyThreshold: env.Config().Healthcheck.UnhealthyThresholdSeconds.Get(),
unhealthyThreshold: sync2.NewAtomicDuration(env.Config().Healthcheck.UnhealthyThresholdSeconds.Get()),
clients: make(map[chan *querypb.StreamHealthResponse]struct{}),

state: &querypb.StreamHealthResponse{
Expand Down Expand Up @@ -220,7 +221,7 @@ func (hs *healthStreamer) AppendDetails(details []*kv) []*kv {
sbm := time.Duration(hs.state.RealtimeStats.SecondsBehindMaster) * time.Second
class := healthyClass
switch {
case sbm > hs.unhealthyThreshold:
case sbm > hs.unhealthyThreshold.Get():
class = unhealthyClass
case sbm > hs.degradedThreshold:
class = unhappyClass
Expand All @@ -240,3 +241,17 @@ func (hs *healthStreamer) AppendDetails(details []*kv) []*kv {

return details
}

func (hs *healthStreamer) SetUnhealthyThreshold(v time.Duration) {
hs.unhealthyThreshold.Set(v)
shr := proto.Clone(hs.state).(*querypb.StreamHealthResponse)
for ch := range hs.clients {
select {
case ch <- shr:
default:
log.Info("Resetting health streamer clients due to unhealthy threshold change")
close(ch)
delete(hs.clients, ch)
}
}
}
10 changes: 7 additions & 3 deletions go/vt/vttablet/tabletserver/state_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ type stateManager struct {
checkMySQLThrottler *sync2.Semaphore

timebombDuration time.Duration
unhealthyThreshold time.Duration
unhealthyThreshold sync2.AtomicDuration
shutdownGracePeriod time.Duration
transitionGracePeriod time.Duration
}
Expand Down Expand Up @@ -187,7 +187,7 @@ func (sm *stateManager) Init(env tabletenv.Env, target querypb.Target) {
sm.checkMySQLThrottler = sync2.NewSemaphore(1, 0)
sm.timebombDuration = env.Config().OltpReadPool.TimeoutSeconds.Get() * 10
sm.hcticks = timer.NewTimer(env.Config().Healthcheck.IntervalSeconds.Get())
sm.unhealthyThreshold = env.Config().Healthcheck.UnhealthyThresholdSeconds.Get()
sm.unhealthyThreshold = sync2.NewAtomicDuration(env.Config().Healthcheck.UnhealthyThresholdSeconds.Get())
sm.shutdownGracePeriod = env.Config().GracePeriods.ShutdownSeconds.Get()
sm.transitionGracePeriod = env.Config().GracePeriods.TransitionSeconds.Get()
}
Expand Down Expand Up @@ -627,7 +627,7 @@ func (sm *stateManager) refreshReplHealthLocked() (time.Duration, error) {
}
sm.replHealthy = false
} else {
if lag > sm.unhealthyThreshold {
if lag > sm.unhealthyThreshold.Get() {
if sm.replHealthy {
log.Infof("Going unhealthy due to high replication lag: %v", lag)
}
Expand Down Expand Up @@ -755,3 +755,7 @@ func (sm *stateManager) IsServingString() string {
}
return "NOT_SERVING"
}

func (sm *stateManager) SetUnhealthyThreshold(v time.Duration) {
sm.unhealthyThreshold.Set(v)
}

0 comments on commit 5ed08b7

Please sign in to comment.