diff --git a/pkg/mock/mockcluster/mockcluster.go b/pkg/mock/mockcluster/mockcluster.go index a7edda7c660..122afa872be 100644 --- a/pkg/mock/mockcluster/mockcluster.go +++ b/pkg/mock/mockcluster/mockcluster.go @@ -352,6 +352,14 @@ func (mc *Cluster) AddLightWeightLeaderRegion(regionID uint64, leaderStoreID uin return region } +// AddNoLeaderRegion adds region with specified replicas, no leader. +func (mc *Cluster) AddNoLeaderRegion(regionID uint64, otherPeerStoreIDs ...uint64) *core.RegionInfo { + origin := mc.newMockRegionInfo(regionID, 0, otherPeerStoreIDs...) + region := origin.Clone(core.SetApproximateSize(defaultRegionSize/units.MiB), core.SetApproximateKeys(10)) + mc.PutRegion(region) + return region +} + // AddRegionWithLearner adds region with specified leader, followers and learners. func (mc *Cluster) AddRegionWithLearner(regionID uint64, leaderStoreID uint64, followerStoreIDs, learnerStoreIDs []uint64) *core.RegionInfo { origin := mc.MockRegionInfo(regionID, leaderStoreID, followerStoreIDs, learnerStoreIDs, nil) diff --git a/server/schedule/checker/rule_checker.go b/server/schedule/checker/rule_checker.go index 3765d09f8bf..f5f6276fda7 100644 --- a/server/schedule/checker/rule_checker.go +++ b/server/schedule/checker/rule_checker.go @@ -37,6 +37,7 @@ var ( errNoStoreToReplace = errors.New("no store to replace peer") errPeerCannotBeLeader = errors.New("peer cannot be leader") errNoNewLeader = errors.New("no new leader") + errRegionNoLeader = errors.New("region no leader") ) const maxPendingListLen = 100000 @@ -78,10 +79,17 @@ func (c *RuleChecker) Check(region *core.RegionInfo) *operator.Operator { // CheckWithFit is similar with Checker with placement.RegionFit func (c *RuleChecker) CheckWithFit(region *core.RegionInfo, fit *placement.RegionFit) (op *operator.Operator) { + // checker is paused if c.IsPaused() { checkerCounter.WithLabelValues("rule_checker", "paused").Inc() return nil } + // skip no leader region + if region.GetLeader() == nil { + checkerCounter.WithLabelValues("rule_checker", "region-no-leader").Inc() + log.Debug("fail to check region", zap.Uint64("region-id", region.GetID()), zap.Error(errRegionNoLeader)) + return + } // If the fit is fetched from cache, it seems that the region doesn't need cache if c.cluster.GetOpts().IsPlacementRulesCacheEnabled() && fit.IsCached() { failpoint.Inject("assertShouldNotCache", func() { @@ -239,7 +247,7 @@ func (c *RuleChecker) fixLooseMatchPeer(region *core.RegionInfo, fit *placement. if region.GetLeader().GetId() != peer.GetId() && rf.Rule.Role == placement.Leader { checkerCounter.WithLabelValues("rule_checker", "fix-leader-role").Inc() if c.allowLeader(fit, peer) { - return operator.CreateTransferLeaderOperator("fix-leader-role", c.cluster, region, region.GetLeader().StoreId, peer.GetStoreId(), []uint64{}, 0) + return operator.CreateTransferLeaderOperator("fix-leader-role", c.cluster, region, region.GetLeader().GetStoreId(), peer.GetStoreId(), []uint64{}, 0) } checkerCounter.WithLabelValues("rule_checker", "not-allow-leader") return nil, errPeerCannotBeLeader diff --git a/server/schedule/checker/rule_checker_test.go b/server/schedule/checker/rule_checker_test.go index 4bbaffa9a49..a07a68575ab 100644 --- a/server/schedule/checker/rule_checker_test.go +++ b/server/schedule/checker/rule_checker_test.go @@ -263,6 +263,45 @@ func (suite *ruleCheckerTestSuite) TestFixRoleLeaderIssue3130() { suite.Equal(uint64(1), op.Step(0).(operator.RemovePeer).FromStore) } +func (suite *ruleCheckerTestSuite) TestFixLeaderRoleWithUnhealthyRegion() { + suite.cluster.AddLabelsStore(1, 1, map[string]string{"rule": "follower"}) + suite.cluster.AddLabelsStore(2, 1, map[string]string{"rule": "follower"}) + suite.cluster.AddLabelsStore(3, 1, map[string]string{"rule": "leader"}) + suite.ruleManager.SetRuleGroup(&placement.RuleGroup{ + ID: "cluster", + Index: 2, + Override: true, + }) + err := suite.ruleManager.SetRules([]*placement.Rule{ + { + GroupID: "cluster", + ID: "r1", + Index: 100, + Role: placement.Follower, + Count: 2, + LabelConstraints: []placement.LabelConstraint{ + {Key: "rule", Op: "in", Values: []string{"follower"}}, + }, + }, + { + GroupID: "cluster", + ID: "r2", + Index: 100, + Role: placement.Leader, + Count: 1, + LabelConstraints: []placement.LabelConstraint{ + {Key: "rule", Op: "in", Values: []string{"leader"}}, + }, + }, + }) + suite.NoError(err) + // no Leader + suite.cluster.AddNoLeaderRegion(1, 1, 2, 3) + r := suite.cluster.GetRegion(1) + op := suite.rc.Check(r) + suite.Nil(op) +} + func (suite *ruleCheckerTestSuite) TestBetterReplacement() { suite.cluster.AddLabelsStore(1, 1, map[string]string{"host": "host1"}) suite.cluster.AddLabelsStore(2, 1, map[string]string{"host": "host1"})