diff --git a/go/vt/vtctl/grpcvtctldserver/testutil/test_tmclient.go b/go/vt/vtctl/grpcvtctldserver/testutil/test_tmclient.go index b824833a7dc..9426f1327b2 100644 --- a/go/vt/vtctl/grpcvtctldserver/testutil/test_tmclient.go +++ b/go/vt/vtctl/grpcvtctldserver/testutil/test_tmclient.go @@ -281,6 +281,11 @@ type TabletManagerClient struct { Position *replicationdatapb.Status Error error } + PrimaryStatusDelays map[string]time.Duration + PrimaryStatusResults map[string]struct { + Status *replicationdatapb.PrimaryStatus + Error error + } RestoreFromBackupResults map[string]struct { Events []*logutilpb.Event EventInterval time.Duration @@ -870,6 +875,32 @@ func (fake *TabletManagerClient) ReplicationStatus(ctx context.Context, tablet * return nil, assert.AnError } +// PrimaryStatus is part of the tmclient.TabletManagerClient interface. +func (fake *TabletManagerClient) PrimaryStatus(ctx context.Context, tablet *topodatapb.Tablet) (*replicationdatapb.PrimaryStatus, error) { + if fake.PrimaryStatusResults == nil { + return nil, assert.AnError + } + + key := topoproto.TabletAliasString(tablet.Alias) + + if fake.PrimaryStatusDelays != nil { + if delay, ok := fake.PrimaryStatusDelays[key]; ok { + select { + case <-ctx.Done(): + return nil, ctx.Err() + case <-time.After(delay): + // proceed to results + } + } + } + + if result, ok := fake.PrimaryStatusResults[key]; ok { + return result.Status, result.Error + } + + return nil, assert.AnError +} + type backupRestoreStreamAdapter struct { *grpcshim.BidiStream ch chan *logutilpb.Event diff --git a/go/vt/vtctl/reparentutil/planned_reparenter.go b/go/vt/vtctl/reparentutil/planned_reparenter.go index b44024c00d2..e447d9f8150 100644 --- a/go/vt/vtctl/reparentutil/planned_reparenter.go +++ b/go/vt/vtctl/reparentutil/planned_reparenter.go @@ -719,6 +719,7 @@ func (pr *PlannedReparenter) reparentTablets( return nil } +// verifyAllTabletsReachable verifies that all the tablets are reachable when running PRS. func (pr *PlannedReparenter) verifyAllTabletsReachable(ctx context.Context, tabletMap map[string]*topo.TabletInfo) error { // Create a cancellable context for the entire set of RPCs to verify reachability. verifyCtx, verifyCancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout) diff --git a/go/vt/vtctl/reparentutil/planned_reparenter_flaky_test.go b/go/vt/vtctl/reparentutil/planned_reparenter_flaky_test.go index 20815db3dfc..e5ab2a169f8 100644 --- a/go/vt/vtctl/reparentutil/planned_reparenter_flaky_test.go +++ b/go/vt/vtctl/reparentutil/planned_reparenter_flaky_test.go @@ -3638,3 +3638,185 @@ func AssertReparentEventsEqual(t *testing.T, expected *events.Reparent, actual * AssertReparentEventsEqualWithMessage(t, expected, actual, "") } + +// TestPlannedReparenter_verifyAllTabletsReachable tests the functionality of verifyAllTabletsReachable. +func TestPlannedReparenter_verifyAllTabletsReachable(t *testing.T) { + tests := []struct { + name string + ts *topo.Server + tmc tmclient.TabletManagerClient + tabletMap map[string]*topo.TabletInfo + remoteOpTime time.Duration + wantErr string + }{ + { + name: "Success", + tmc: &testutil.TabletManagerClient{ + PrimaryStatusResults: map[string]struct { + Status *replicationdatapb.PrimaryStatus + Error error + }{ + "zone1-0000000200": { + Status: &replicationdatapb.PrimaryStatus{}, + }, + "zone1-0000000201": { + Status: &replicationdatapb.PrimaryStatus{}, + }, + "zone1-0000000100": { + Status: &replicationdatapb.PrimaryStatus{}, + }, + }, + }, + tabletMap: map[string]*topo.TabletInfo{ + "zone1-0000000100": { + Tablet: &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 100, + }, + Type: topodatapb.TabletType_PRIMARY, + }, + }, + "zone1-0000000200": { + Tablet: &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 200, + }, + Type: topodatapb.TabletType_REPLICA, + }, + }, + "zone1-0000000201": { + Tablet: &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 201, + }, + Type: topodatapb.TabletType_REPLICA, + }, + }, + }, + }, { + name: "Failure", + tmc: &testutil.TabletManagerClient{ + PrimaryStatusResults: map[string]struct { + Status *replicationdatapb.PrimaryStatus + Error error + }{ + "zone1-0000000200": { + Error: fmt.Errorf("primary status failed"), + }, + "zone1-0000000201": { + Status: &replicationdatapb.PrimaryStatus{}, + }, + "zone1-0000000100": { + Status: &replicationdatapb.PrimaryStatus{}, + }, + }, + }, + tabletMap: map[string]*topo.TabletInfo{ + "zone1-0000000100": { + Tablet: &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 100, + }, + Type: topodatapb.TabletType_PRIMARY, + }, + }, + "zone1-0000000200": { + Tablet: &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 200, + }, + Type: topodatapb.TabletType_REPLICA, + }, + }, + "zone1-0000000201": { + Tablet: &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 201, + }, + Type: topodatapb.TabletType_REPLICA, + }, + }, + }, + wantErr: "primary status failed", + }, { + name: "Timeout", + tmc: &testutil.TabletManagerClient{ + PrimaryStatusDelays: map[string]time.Duration{ + "zone1-0000000100": 20 * time.Second, + }, + PrimaryStatusResults: map[string]struct { + Status *replicationdatapb.PrimaryStatus + Error error + }{ + "zone1-0000000200": { + Status: &replicationdatapb.PrimaryStatus{}, + }, + "zone1-0000000201": { + Status: &replicationdatapb.PrimaryStatus{}, + }, + "zone1-0000000100": { + Status: &replicationdatapb.PrimaryStatus{}, + }, + }, + }, + remoteOpTime: 100 * time.Millisecond, + tabletMap: map[string]*topo.TabletInfo{ + "zone1-0000000100": { + Tablet: &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 100, + }, + Type: topodatapb.TabletType_PRIMARY, + }, + }, + "zone1-0000000200": { + Tablet: &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 200, + }, + Type: topodatapb.TabletType_REPLICA, + }, + }, + "zone1-0000000201": { + Tablet: &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 201, + }, + Type: topodatapb.TabletType_REPLICA, + }, + }, + }, + wantErr: "context deadline exceeded", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + pr := &PlannedReparenter{ + ts: tt.ts, + tmc: tt.tmc, + } + if tt.remoteOpTime != 0 { + oldTime := topo.RemoteOperationTimeout + topo.RemoteOperationTimeout = tt.remoteOpTime + defer func() { + topo.RemoteOperationTimeout = oldTime + }() + } + err := pr.verifyAllTabletsReachable(context.Background(), tt.tabletMap) + if tt.wantErr == "" { + require.NoError(t, err) + return + } + require.ErrorContains(t, err, tt.wantErr) + }) + } +}