From 8a214dd59416e1a776311d7d700f06cffc0bc09c Mon Sep 17 00:00:00 2001 From: "vitess-bot[bot]" <108069721+vitess-bot[bot]@users.noreply.github.com> Date: Mon, 9 Sep 2024 10:42:54 -0700 Subject: [PATCH] [release-18.0] FindErrantGTIDs: superset is not an errant GTID situation (#16725) (#16727) Signed-off-by: deepthi Co-authored-by: vitess-bot[bot] <108069721+vitess-bot[bot]@users.noreply.github.com> --- go/mysql/replication/replication_status.go | 8 ++++++ .../replication/replication_status_test.go | 10 +++++++ go/vt/vtctl/reparentutil/replication.go | 2 +- go/vt/vtctl/reparentutil/replication_test.go | 26 ++++++++++++++----- 4 files changed, 39 insertions(+), 7 deletions(-) diff --git a/go/mysql/replication/replication_status.go b/go/mysql/replication/replication_status.go index 6b3d1bf2214..0b8ba0f785f 100644 --- a/go/mysql/replication/replication_status.go +++ b/go/mysql/replication/replication_status.go @@ -201,6 +201,14 @@ func (s *ReplicationStatus) FindErrantGTIDs(otherReplicaStatuses []*ReplicationS otherSets = append(otherSets, otherSet) } + if len(otherSets) == 1 { + // If there is only one replica to compare against, and one is a subset of the other, then we consider them not to be errant. + // It simply means that one replica might be behind on replication. + if relayLogSet.Contains(otherSets[0]) || otherSets[0].Contains(relayLogSet) { + return nil, nil + } + } + // Copy set for final diffSet so we don't mutate receiver. diffSet := make(Mysql56GTIDSet, len(relayLogSet)) for sid, intervals := range relayLogSet { diff --git a/go/mysql/replication/replication_status_test.go b/go/mysql/replication/replication_status_test.go index c1f5991f253..a88cb1570f7 100644 --- a/go/mysql/replication/replication_status_test.go +++ b/go/mysql/replication/replication_status_test.go @@ -105,6 +105,16 @@ func TestFindErrantGTIDs(t *testing.T) { otherRepStatuses: []*ReplicationStatus{{SourceUUID: sid1, RelayLogPosition: Position{GTIDSet: set1}}}, // servers with the same GTID sets should not be diagnosed with errant GTIDs want: nil, + }, { + mainRepStatus: &ReplicationStatus{SourceUUID: sourceSID, RelayLogPosition: Position{GTIDSet: set2}}, + otherRepStatuses: []*ReplicationStatus{{SourceUUID: sid1, RelayLogPosition: Position{GTIDSet: set3}}}, + // set2 is a strict subset of set3 + want: nil, + }, { + mainRepStatus: &ReplicationStatus{SourceUUID: sourceSID, RelayLogPosition: Position{GTIDSet: set3}}, + otherRepStatuses: []*ReplicationStatus{{SourceUUID: sid1, RelayLogPosition: Position{GTIDSet: set2}}}, + // set3 is a strict superset of set2 + want: nil, }} for _, testcase := range testcases { diff --git a/go/vt/vtctl/reparentutil/replication.go b/go/vt/vtctl/reparentutil/replication.go index 9b33a5b0536..7c39befded1 100644 --- a/go/vt/vtctl/reparentutil/replication.go +++ b/go/vt/vtctl/reparentutil/replication.go @@ -123,7 +123,7 @@ func FindValidEmergencyReparentCandidates( case len(errantGTIDs) != 0: // This tablet has errant GTIDs. It's not a valid candidate for // reparent, so don't insert it into the final mapping. - log.Errorf("skipping %v because we detected errant GTIDs - %v", alias, errantGTIDs) + log.Errorf("skipping %v with GTIDSet:%v because we detected errant GTIDs - %v", alias, relayLogGTIDSet, errantGTIDs) continue } diff --git a/go/vt/vtctl/reparentutil/replication_test.go b/go/vt/vtctl/reparentutil/replication_test.go index ed7bd152e9c..9e6c2ef9f76 100644 --- a/go/vt/vtctl/reparentutil/replication_test.go +++ b/go/vt/vtctl/reparentutil/replication_test.go @@ -161,7 +161,7 @@ func TestFindValidEmergencyReparentCandidates(t *testing.T) { shouldErr: false, }, { - name: "tablet with errant GTIDs is excluded", + name: "tablet with superset GTIDs is included", statusMap: map[string]*replicationdatapb.StopReplicationStatus{ "r1": { After: &replicationdatapb.Status{ @@ -169,19 +169,33 @@ func TestFindValidEmergencyReparentCandidates(t *testing.T) { RelayLogPosition: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429562:1-5", }, }, - "errant": { + "r2": { After: &replicationdatapb.Status{ SourceUuid: "3E11FA47-71CA-11E1-9E33-C80AA9429562", RelayLogPosition: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429562:1-5,AAAAAAAA-71CA-11E1-9E33-C80AA9429562:1", }, }, }, - primaryStatusMap: map[string]*replicationdatapb.PrimaryStatus{ - "p1": { - Position: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429562:1-5", + expected: []string{"r1", "r2"}, + shouldErr: false, + }, + { + name: "tablets with errant GTIDs are excluded", + statusMap: map[string]*replicationdatapb.StopReplicationStatus{ + "r1": { + After: &replicationdatapb.Status{ + SourceUuid: "3E11FA47-71CA-11E1-9E33-C80AA9429562", + RelayLogPosition: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429562:1-5,AAAAAAAA-71CA-11E1-9E33-C80AA9429562:1", + }, + }, + "r2": { + After: &replicationdatapb.Status{ + SourceUuid: "3E11FA47-71CA-11E1-9E33-C80AA9429562", + RelayLogPosition: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429562:1-5,AAAAAAAA-71CA-11E1-9E33-C80AA9429562:2-3", + }, }, }, - expected: []string{"r1", "p1"}, + expected: []string{}, shouldErr: false, }, {