diff --git a/go/vt/discovery/replicationlag.go b/go/vt/discovery/replicationlag.go index 1e8237c8dca..e6a491bc5a3 100644 --- a/go/vt/discovery/replicationlag.go +++ b/go/vt/discovery/replicationlag.go @@ -25,9 +25,10 @@ import ( var ( // lowReplicationLag defines the duration that replication lag is low enough that the VTTablet is considered healthy. - lowReplicationLag = flag.Duration("discovery_low_replication_lag", 30*time.Second, "the replication lag that is considered low enough to be healthy") - highReplicationLagMinServing = flag.Duration("discovery_high_replication_lag_minimum_serving", 2*time.Hour, "the replication lag that is considered too high when selecting the minimum num vttablets for serving") - minNumTablets = flag.Int("min_number_serving_vttablets", 2, "the minimum number of vttablets that will be continue to be used even with low replication lag") + lowReplicationLag = flag.Duration("discovery_low_replication_lag", 30*time.Second, "the replication lag that is considered low enough to be healthy") + highReplicationLagMinServing = flag.Duration("discovery_high_replication_lag_minimum_serving", 2*time.Hour, "the replication lag that is considered too high when selecting the minimum num vttablets for serving") + minNumTablets = flag.Int("min_number_serving_vttablets", 2, "the minimum number of vttablets that will be continue to be used even with low replication lag") + legacyReplicationLagAlgorithm = flag.Bool("legacy_replication_lag_algorithm", true, "use the legacy algorithm when selecting the vttablets for serving") ) // IsReplicationLagHigh verifies that the given TabletStats refers to a tablet with high @@ -43,7 +44,17 @@ func IsReplicationLagVeryHigh(tabletStats *TabletStats) bool { } // FilterByReplicationLag filters the list of TabletStats by TabletStats.Stats.SecondsBehindMaster. -// The algorithm (TabletStats that is non-serving or has error is ignored): +// Note that TabletStats that is non-serving or has error is ignored. +// +// The simplified logic: +// - Return tablets that have lag <= lowReplicationLag. +// - Make sure we return at least minNumTablets tablets, if there are enough one with lag <= highReplicationLagMinServing. +// For example, with the default of 30s / 2h / 2, this means: +// - lags of (5s, 10s, 15s, 120s) return the first three +// - lags of (30m, 35m, 40m, 45m) return the first two +// - lags of (2h, 3h, 4h, 5h) return the first one +// +// The legacy algorithm (default for now): // - Return the list if there is 0 or 1 tablet. // - Return the list if all tablets have <=30s lag. // - Filter by replication lag: for each tablet, if the mean value without it is more than 0.7 of the mean value across all tablets, it is valid. @@ -58,16 +69,46 @@ func IsReplicationLagVeryHigh(tabletStats *TabletStats) bool { // * degraded_threshold: this is only used by vttablet for display. It should match // discovery_low_replication_lag here, so the vttablet status display matches what vtgate will do of it. func FilterByReplicationLag(tabletStatsList []*TabletStats) []*TabletStats { - res := filterByLag(tabletStatsList) + if !*legacyReplicationLagAlgorithm { + return filterByLag(tabletStatsList) + } + + res := filterByLagWithLegacyAlgorithm(tabletStatsList) // run the filter again if exactly one tablet is removed, // and we have spare tablets. if len(res) > *minNumTablets && len(res) == len(tabletStatsList)-1 { - res = filterByLag(res) + res = filterByLagWithLegacyAlgorithm(res) } return res } func filterByLag(tabletStatsList []*TabletStats) []*TabletStats { + list := make([]tabletLagSnapshot, 0, len(tabletStatsList)) + // filter non-serving tablets and those with very high replication lag + for _, ts := range tabletStatsList { + if !ts.Serving || ts.LastError != nil || ts.Stats == nil || IsReplicationLagVeryHigh(ts) { + continue + } + // Pull the current replication lag for a stable sort later. + list = append(list, tabletLagSnapshot{ + ts: ts, + replag: ts.Stats.SecondsBehindMaster}) + } + + // Sort by replication lag. + sort.Sort(byReplag(list)) + + // Pick those with low replication lag, but at least minNumTablets tablets regardless. + res := make([]*TabletStats, 0, len(list)) + for i := 0; i < len(list); i++ { + if !IsReplicationLagHigh(list[i].ts) || i < *minNumTablets { + res = append(res, list[i].ts) + } + } + return res +} + +func filterByLagWithLegacyAlgorithm(tabletStatsList []*TabletStats) []*TabletStats { list := make([]*TabletStats, 0, len(tabletStatsList)) // filter non-serving tablets for _, ts := range tabletStatsList { diff --git a/go/vt/discovery/replicationlag_test.go b/go/vt/discovery/replicationlag_test.go index 2053f60d5b1..7c26283a06f 100644 --- a/go/vt/discovery/replicationlag_test.go +++ b/go/vt/discovery/replicationlag_test.go @@ -29,6 +29,11 @@ func testSetMinNumTablets(newMin int) { *minNumTablets = newMin } +// testSetLegacyReplicationLagAlgorithm is a test helper function, if this is used by a production code path, something is wrong. +func testSetLegacyReplicationLagAlgorithm(newLegacy bool) { + *legacyReplicationLagAlgorithm = newLegacy +} + func TestFilterByReplicationLagUnhealthy(t *testing.T) { // 1 healthy serving tablet, 1 not healhty ts1 := &TabletStats{ @@ -51,6 +56,84 @@ func TestFilterByReplicationLagUnhealthy(t *testing.T) { } func TestFilterByReplicationLag(t *testing.T) { + // Use simplified logic + testSetLegacyReplicationLagAlgorithm(false) + + cases := []struct { + description string + input []uint32 + output []uint32 + }{ + { + "0 tablet", + []uint32{}, + []uint32{}, + }, + { + "lags of (1s) - return all items with low lag.", + []uint32{1}, + []uint32{1}, + }, + { + "lags of (1s, 1s, 1s, 30s) - return all items with low lag.", + []uint32{1, 1, 1, 30}, + []uint32{1, 1, 1, 30}, + }, + { + "lags of (1s, 1s, 1s, 40m, 40m, 40m) - return all items with low lag.", + []uint32{1, 1, 1, 40 * 60, 40 * 60, 40 * 60}, + []uint32{1, 1, 1}, + }, + { + "lags of (1s, 40m, 40m, 40m) - return at least 2 items if they don't have very high lag.", + []uint32{1, 40 * 60, 40 * 60, 40 * 60}, + []uint32{1, 40 * 60}, + }, + { + "lags of (30m, 35m, 40m, 45m) - return at least 2 items if they don't have very high lag.", + []uint32{30 * 60, 35 * 60, 40 * 60, 45 * 60}, + []uint32{30 * 60, 35 * 60}, + }, + { + "lags of (2h, 3h, 4h, 5h) - return <2 items if the others have very high lag.", + []uint32{2 * 60 * 60, 3 * 60 * 60, 4 * 60 * 60, 5 * 60 * 60}, + []uint32{2 * 60 * 60}, + }, + { + "lags of (3h, 30h) - return nothing if all have very high lag.", + []uint32{3 * 60 * 60, 30 * 60 * 60}, + []uint32{}, + }, + } + + for _, tc := range cases { + lts := make([]*TabletStats, len(tc.input)) + for i, lag := range tc.input { + lts[i] = &TabletStats{ + Tablet: topo.NewTablet(uint32(i+1), "cell", fmt.Sprintf("host-%vs-behind", lag)), + Serving: true, + Stats: &querypb.RealtimeStats{SecondsBehindMaster: lag}, + } + } + got := FilterByReplicationLag(lts) + if len(got) != len(tc.output) { + t.Errorf("FilterByReplicationLag(%v) failed: got output:\n%v\nExpected: %v", tc.description, got, tc.output) + continue + } + for i, elag := range tc.output { + if got[i].Stats.SecondsBehindMaster != elag { + t.Errorf("FilterByReplicationLag(%v) failed: got output:\n%v\nExpected value index %v to be %v", tc.description, got, i, elag) + } + } + } + + // Reset to the default + testSetLegacyReplicationLagAlgorithm(true) +} + +func TestFilterByReplicationLagWithLegacyAlgorithm(t *testing.T) { + // Use legacy algorithm by default for now + cases := []struct { description string input []uint32 @@ -106,6 +189,23 @@ func TestFilterByReplicationLag(t *testing.T) { []uint32{3 * 60 * 60, 4 * 60 * 60}, []uint32{3 * 60 * 60, 4 * 60 * 60}, }, + { + "lags of (3h, 3h, 4h) - return 3 as they're all delayed too much, but still in a good group.", + []uint32{3 * 60 * 60, 3 * 60 * 60, 4 * 60 * 60}, + []uint32{3 * 60 * 60, 3 * 60 * 60, 4 * 60 * 60}, + }, + { + "lags of (3h, 15h, 18h) - return 3 as they're all delayed too much, but still in a good group." + + "(different test case than above to show how absurb the good group logic is)", + []uint32{3 * 60 * 60, 15 * 60 * 60, 18 * 60 * 60}, + []uint32{3 * 60 * 60, 15 * 60 * 60, 18 * 60 * 60}, + }, + { + "lags of (3h, 12h, 18h) - return 2 as they're all delayed too much, but 18h is now considered an outlier." + + "(different test case than above to show how absurb the good group logic is)", + []uint32{3 * 60 * 60, 12 * 60 * 60, 18 * 60 * 60}, + []uint32{3 * 60 * 60, 12 * 60 * 60}, + }, { "lags of (3h, 30h) - return 2 as they're all delayed too much." + "(different test case that before, as both tablet stats are" +