From eddbb58cdd7c6932cdc458393b66d6ca9ad0a227 Mon Sep 17 00:00:00 2001 From: deepthi Date: Fri, 10 Apr 2020 18:39:55 -0700 Subject: [PATCH] PlannedReparent: tests for Promote failure, re-run failed PRS, PRS with current master Signed-off-by: deepthi --- .../fakemysqldaemon/fakemysqldaemon.go | 6 + go/vt/wrangler/reparent.go | 2 +- .../testlib/emergency_reparent_shard_test.go | 107 ++-- .../testlib/planned_reparent_shard_test.go | 460 +++++++++++++----- 4 files changed, 407 insertions(+), 168 deletions(-) diff --git a/go/vt/mysqlctl/fakemysqldaemon/fakemysqldaemon.go b/go/vt/mysqlctl/fakemysqldaemon/fakemysqldaemon.go index c31b5861899..0d0fea3f051 100644 --- a/go/vt/mysqlctl/fakemysqldaemon/fakemysqldaemon.go +++ b/go/vt/mysqlctl/fakemysqldaemon/fakemysqldaemon.go @@ -107,6 +107,9 @@ type FakeMysqlDaemon struct { // PromoteResult is returned by Promote PromoteResult mysql.Position + // PromoteError is used by Promote + PromoteError error + // SchemaFunc provides the return value for GetSchema. // If not defined, the "Schema" field will be used instead, see below. SchemaFunc func() (*tabletmanagerdatapb.SchemaDefinition, error) @@ -350,6 +353,9 @@ func (fmd *FakeMysqlDaemon) WaitMasterPos(_ context.Context, pos mysql.Position) // Promote is part of the MysqlDaemon interface func (fmd *FakeMysqlDaemon) Promote(hookExtraEnv map[string]string) (mysql.Position, error) { + if fmd.PromoteError != nil { + return mysql.Position{}, fmd.PromoteError + } return fmd.PromoteResult, nil } diff --git a/go/vt/wrangler/reparent.go b/go/vt/wrangler/reparent.go index fbeb7ef0b51..e24aa83a898 100644 --- a/go/vt/wrangler/reparent.go +++ b/go/vt/wrangler/reparent.go @@ -641,7 +641,7 @@ func (wr *Wrangler) plannedReparentShardLocked(ctx context.Context, ev *events.R // Check we still have the topology lock. if err := topo.CheckShardLocked(ctx, keyspace, shard); err != nil { - return fmt.Errorf("lost topology lock, aborting: %v", err) + return vterrors.Wrap(err, "lost topology lock, aborting") } // Create a cancelable context for the following RPCs. diff --git a/go/vt/wrangler/testlib/emergency_reparent_shard_test.go b/go/vt/wrangler/testlib/emergency_reparent_shard_test.go index 01045d25ca4..5fd63be499c 100644 --- a/go/vt/wrangler/testlib/emergency_reparent_shard_test.go +++ b/go/vt/wrangler/testlib/emergency_reparent_shard_test.go @@ -39,11 +39,11 @@ func TestEmergencyReparentShard(t *testing.T) { vp := NewVtctlPipe(t, ts) defer vp.Close() - // Create a master, a couple good slaves + // Create a master, a couple good replicas oldMaster := NewFakeTablet(t, wr, "cell1", 0, topodatapb.TabletType_MASTER, nil) newMaster := NewFakeTablet(t, wr, "cell1", 1, topodatapb.TabletType_REPLICA, nil) - goodSlave1 := NewFakeTablet(t, wr, "cell1", 2, topodatapb.TabletType_REPLICA, nil) - goodSlave2 := NewFakeTablet(t, wr, "cell2", 3, topodatapb.TabletType_REPLICA, nil) + goodReplica1 := NewFakeTablet(t, wr, "cell1", 2, topodatapb.TabletType_REPLICA, nil) + goodReplica2 := NewFakeTablet(t, wr, "cell2", 3, topodatapb.TabletType_REPLICA, nil) // new master newMaster.FakeMysqlDaemon.ReadOnly = true @@ -80,10 +80,10 @@ func TestEmergencyReparentShard(t *testing.T) { oldMaster.StartActionLoop(t, wr) defer oldMaster.StopActionLoop(t) - // good slave 1 is replicating - goodSlave1.FakeMysqlDaemon.ReadOnly = true - goodSlave1.FakeMysqlDaemon.Replicating = true - goodSlave1.FakeMysqlDaemon.CurrentMasterPosition = mysql.Position{ + // good replica 1 is replicating + goodReplica1.FakeMysqlDaemon.ReadOnly = true + goodReplica1.FakeMysqlDaemon.Replicating = true + goodReplica1.FakeMysqlDaemon.CurrentMasterPosition = mysql.Position{ GTIDSet: mysql.MariadbGTIDSet{ mysql.MariadbGTID{ Domain: 2, @@ -92,19 +92,19 @@ func TestEmergencyReparentShard(t *testing.T) { }, }, } - goodSlave1.FakeMysqlDaemon.SetMasterInput = topoproto.MysqlAddr(newMaster.Tablet) - goodSlave1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ + goodReplica1.FakeMysqlDaemon.SetMasterInput = topoproto.MysqlAddr(newMaster.Tablet) + goodReplica1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ "STOP SLAVE", "FAKE SET MASTER", "START SLAVE", } - goodSlave1.StartActionLoop(t, wr) - defer goodSlave1.StopActionLoop(t) + goodReplica1.StartActionLoop(t, wr) + defer goodReplica1.StopActionLoop(t) - // good slave 2 is not replicating - goodSlave2.FakeMysqlDaemon.ReadOnly = true - goodSlave2.FakeMysqlDaemon.Replicating = false - goodSlave2.FakeMysqlDaemon.CurrentMasterPosition = mysql.Position{ + // good replica 2 is not replicating + goodReplica2.FakeMysqlDaemon.ReadOnly = true + goodReplica2.FakeMysqlDaemon.Replicating = false + goodReplica2.FakeMysqlDaemon.CurrentMasterPosition = mysql.Position{ GTIDSet: mysql.MariadbGTIDSet{ mysql.MariadbGTID{ Domain: 2, @@ -113,12 +113,12 @@ func TestEmergencyReparentShard(t *testing.T) { }, }, } - goodSlave2.FakeMysqlDaemon.SetMasterInput = topoproto.MysqlAddr(newMaster.Tablet) - goodSlave2.StartActionLoop(t, wr) - goodSlave2.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ + goodReplica2.FakeMysqlDaemon.SetMasterInput = topoproto.MysqlAddr(newMaster.Tablet) + goodReplica2.StartActionLoop(t, wr) + goodReplica2.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ "FAKE SET MASTER", } - defer goodSlave2.StopActionLoop(t) + defer goodReplica2.StopActionLoop(t) // run EmergencyReparentShard if err := vp.Run([]string{"EmergencyReparentShard", "-wait_slave_timeout", "10s", newMaster.Tablet.Keyspace + "/" + newMaster.Tablet.Shard, topoproto.TabletAliasString(newMaster.Tablet.Alias)}); err != nil { @@ -132,30 +132,30 @@ func TestEmergencyReparentShard(t *testing.T) { if err := oldMaster.FakeMysqlDaemon.CheckSuperQueryList(); err != nil { t.Fatalf("oldMaster.FakeMysqlDaemon.CheckSuperQueryList failed: %v", err) } - if err := goodSlave1.FakeMysqlDaemon.CheckSuperQueryList(); err != nil { - t.Fatalf("goodSlave1.FakeMysqlDaemon.CheckSuperQueryList failed: %v", err) + if err := goodReplica1.FakeMysqlDaemon.CheckSuperQueryList(); err != nil { + t.Fatalf("goodReplica1.FakeMysqlDaemon.CheckSuperQueryList failed: %v", err) } - if err := goodSlave2.FakeMysqlDaemon.CheckSuperQueryList(); err != nil { - t.Fatalf("goodSlave2.FakeMysqlDaemon.CheckSuperQueryList failed: %v", err) + if err := goodReplica2.FakeMysqlDaemon.CheckSuperQueryList(); err != nil { + t.Fatalf("goodReplica2.FakeMysqlDaemon.CheckSuperQueryList failed: %v", err) } if newMaster.FakeMysqlDaemon.ReadOnly { t.Errorf("newMaster.FakeMysqlDaemon.ReadOnly set") } // old master read-only flag doesn't matter, it is scrapped - if !goodSlave1.FakeMysqlDaemon.ReadOnly { - t.Errorf("goodSlave1.FakeMysqlDaemon.ReadOnly not set") + if !goodReplica1.FakeMysqlDaemon.ReadOnly { + t.Errorf("goodReplica1.FakeMysqlDaemon.ReadOnly not set") } - if !goodSlave2.FakeMysqlDaemon.ReadOnly { - t.Errorf("goodSlave2.FakeMysqlDaemon.ReadOnly not set") + if !goodReplica2.FakeMysqlDaemon.ReadOnly { + t.Errorf("goodReplica2.FakeMysqlDaemon.ReadOnly not set") } - if !goodSlave1.FakeMysqlDaemon.Replicating { - t.Errorf("goodSlave1.FakeMysqlDaemon.Replicating not set") + if !goodReplica1.FakeMysqlDaemon.Replicating { + t.Errorf("goodReplica1.FakeMysqlDaemon.Replicating not set") } - if goodSlave2.FakeMysqlDaemon.Replicating { - t.Errorf("goodSlave2.FakeMysqlDaemon.Replicating set") + if goodReplica2.FakeMysqlDaemon.Replicating { + t.Errorf("goodReplica2.FakeMysqlDaemon.Replicating set") } checkSemiSyncEnabled(t, true, true, newMaster) - checkSemiSyncEnabled(t, false, true, goodSlave1, goodSlave2) + checkSemiSyncEnabled(t, false, true, goodReplica1, goodReplica2) } // TestEmergencyReparentShardMasterElectNotBest tries to emergency reparent @@ -165,13 +165,25 @@ func TestEmergencyReparentShardMasterElectNotBest(t *testing.T) { ts := memorytopo.NewServer("cell1", "cell2") wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient()) - // Create a master, a couple good slaves + // Create a master, a couple good replicas oldMaster := NewFakeTablet(t, wr, "cell1", 0, topodatapb.TabletType_MASTER, nil) newMaster := NewFakeTablet(t, wr, "cell1", 1, topodatapb.TabletType_REPLICA, nil) - moreAdvancedSlave := NewFakeTablet(t, wr, "cell1", 2, topodatapb.TabletType_REPLICA, nil) + moreAdvancedReplica := NewFakeTablet(t, wr, "cell1", 2, topodatapb.TabletType_REPLICA, nil) // new master newMaster.FakeMysqlDaemon.Replicating = true + // this server has executed upto 455, which is the highest among replicas + newMaster.FakeMysqlDaemon.CurrentMasterPosition = mysql.Position{ + GTIDSet: mysql.MariadbGTIDSet{ + mysql.MariadbGTID{ + Domain: 2, + Server: 123, + Sequence: 455, + }, + }, + } + // It has more transactions in its relay log, but not as many as + // moreAdvancedReplica newMaster.FakeMysqlDaemon.CurrentMasterPosition = mysql.Position{ GTIDSet: mysql.MariadbGTIDSet{ mysql.MariadbGTID{ @@ -191,9 +203,20 @@ func TestEmergencyReparentShardMasterElectNotBest(t *testing.T) { oldMaster.StartActionLoop(t, wr) defer oldMaster.StopActionLoop(t) - // more advanced slave - moreAdvancedSlave.FakeMysqlDaemon.Replicating = true - moreAdvancedSlave.FakeMysqlDaemon.CurrentMasterPosition = mysql.Position{ + // more advanced replica + moreAdvancedReplica.FakeMysqlDaemon.Replicating = true + // position up to which this replica has executed is behind desired new master + moreAdvancedReplica.FakeMysqlDaemon.CurrentMasterPosition = mysql.Position{ + GTIDSet: mysql.MariadbGTIDSet{ + mysql.MariadbGTID{ + Domain: 2, + Server: 123, + Sequence: 454, + }, + }, + } + // relay log position is more advanced than desired new master + moreAdvancedReplica.FakeMysqlDaemon.CurrentMasterPosition = mysql.Position{ GTIDSet: mysql.MariadbGTIDSet{ mysql.MariadbGTID{ Domain: 2, @@ -202,11 +225,11 @@ func TestEmergencyReparentShardMasterElectNotBest(t *testing.T) { }, }, } - moreAdvancedSlave.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ + moreAdvancedReplica.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ "STOP SLAVE", } - moreAdvancedSlave.StartActionLoop(t, wr) - defer moreAdvancedSlave.StopActionLoop(t) + moreAdvancedReplica.StartActionLoop(t, wr) + defer moreAdvancedReplica.StopActionLoop(t) // run EmergencyReparentShard if err := wr.EmergencyReparentShard(ctx, newMaster.Tablet.Keyspace, newMaster.Tablet.Shard, newMaster.Tablet.Alias, 10*time.Second); err == nil || !strings.Contains(err.Error(), "is more advanced than master elect tablet") { @@ -220,7 +243,7 @@ func TestEmergencyReparentShardMasterElectNotBest(t *testing.T) { if err := oldMaster.FakeMysqlDaemon.CheckSuperQueryList(); err != nil { t.Fatalf("oldMaster.FakeMysqlDaemon.CheckSuperQueryList failed: %v", err) } - if err := moreAdvancedSlave.FakeMysqlDaemon.CheckSuperQueryList(); err != nil { - t.Fatalf("moreAdvancedSlave.FakeMysqlDaemon.CheckSuperQueryList failed: %v", err) + if err := moreAdvancedReplica.FakeMysqlDaemon.CheckSuperQueryList(); err != nil { + t.Fatalf("moreAdvancedReplica.FakeMysqlDaemon.CheckSuperQueryList failed: %v", err) } } diff --git a/go/vt/wrangler/testlib/planned_reparent_shard_test.go b/go/vt/wrangler/testlib/planned_reparent_shard_test.go index 8173f9e2401..dd2dc09d5c5 100644 --- a/go/vt/wrangler/testlib/planned_reparent_shard_test.go +++ b/go/vt/wrangler/testlib/planned_reparent_shard_test.go @@ -22,6 +22,8 @@ import ( "strings" "testing" + "github.com/stretchr/testify/assert" + "vitess.io/vitess/go/mysql" "vitess.io/vitess/go/vt/logutil" "vitess.io/vitess/go/vt/topo/memorytopo" @@ -39,10 +41,10 @@ func TestPlannedReparentShardNoMasterProvided(t *testing.T) { vp := NewVtctlPipe(t, ts) defer vp.Close() - // Create a master, a couple good slaves + // Create a master, a couple good replicas oldMaster := NewFakeTablet(t, wr, "cell1", 0, topodatapb.TabletType_MASTER, nil) newMaster := NewFakeTablet(t, wr, "cell1", 1, topodatapb.TabletType_REPLICA, nil) - goodSlave1 := NewFakeTablet(t, wr, "cell2", 2, topodatapb.TabletType_REPLICA, nil) + goodReplica1 := NewFakeTablet(t, wr, "cell2", 2, topodatapb.TabletType_REPLICA, nil) // new master newMaster.FakeMysqlDaemon.ReadOnly = true @@ -96,17 +98,17 @@ func TestPlannedReparentShardNoMasterProvided(t *testing.T) { // SetMaster is called on new master to make sure it's replicating before reparenting. newMaster.FakeMysqlDaemon.SetMasterInput = topoproto.MysqlAddr(oldMaster.Tablet) - // good slave 1 is replicating - goodSlave1.FakeMysqlDaemon.ReadOnly = true - goodSlave1.FakeMysqlDaemon.Replicating = true - goodSlave1.FakeMysqlDaemon.SetMasterInput = topoproto.MysqlAddr(newMaster.Tablet) - goodSlave1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ + // good replica 1 is replicating + goodReplica1.FakeMysqlDaemon.ReadOnly = true + goodReplica1.FakeMysqlDaemon.Replicating = true + goodReplica1.FakeMysqlDaemon.SetMasterInput = topoproto.MysqlAddr(newMaster.Tablet) + goodReplica1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ "STOP SLAVE", "FAKE SET MASTER", "START SLAVE", } - goodSlave1.StartActionLoop(t, wr) - defer goodSlave1.StopActionLoop(t) + goodReplica1.StartActionLoop(t, wr) + defer goodReplica1.StopActionLoop(t) // run PlannedReparentShard if err := vp.Run([]string{"PlannedReparentShard", "-wait_slave_timeout", "10s", "-keyspace_shard", newMaster.Tablet.Keyspace + "/" + newMaster.Tablet.Shard}); err != nil { @@ -120,8 +122,8 @@ func TestPlannedReparentShardNoMasterProvided(t *testing.T) { if err := oldMaster.FakeMysqlDaemon.CheckSuperQueryList(); err != nil { t.Errorf("oldMaster.FakeMysqlDaemon.CheckSuperQueryList failed: %v", err) } - if err := goodSlave1.FakeMysqlDaemon.CheckSuperQueryList(); err != nil { - t.Errorf("goodSlave1.FakeMysqlDaemon.CheckSuperQueryList failed: %v", err) + if err := goodReplica1.FakeMysqlDaemon.CheckSuperQueryList(); err != nil { + t.Errorf("goodReplica1.FakeMysqlDaemon.CheckSuperQueryList failed: %v", err) } if newMaster.FakeMysqlDaemon.ReadOnly { t.Errorf("newMaster.FakeMysqlDaemon.ReadOnly set") @@ -129,36 +131,36 @@ func TestPlannedReparentShardNoMasterProvided(t *testing.T) { if !oldMaster.FakeMysqlDaemon.ReadOnly { t.Errorf("oldMaster.FakeMysqlDaemon.ReadOnly not set") } - if !goodSlave1.FakeMysqlDaemon.ReadOnly { - t.Errorf("goodSlave1.FakeMysqlDaemon.ReadOnly not set") + if !goodReplica1.FakeMysqlDaemon.ReadOnly { + t.Errorf("goodReplica1.FakeMysqlDaemon.ReadOnly not set") } if !oldMaster.Agent.QueryServiceControl.IsServing() { t.Errorf("oldMaster...QueryServiceControl not serving") } // verify the old master was told to start replicating (and not - // the slave that wasn't replicating in the first place) + // the replica that wasn't replicating in the first place) if !oldMaster.FakeMysqlDaemon.Replicating { t.Errorf("oldMaster.FakeMysqlDaemon.Replicating not set") } - if !goodSlave1.FakeMysqlDaemon.Replicating { - t.Errorf("goodSlave1.FakeMysqlDaemon.Replicating not set") + if !goodReplica1.FakeMysqlDaemon.Replicating { + t.Errorf("goodReplica1.FakeMysqlDaemon.Replicating not set") } checkSemiSyncEnabled(t, true, true, newMaster) - checkSemiSyncEnabled(t, false, true, goodSlave1, oldMaster) + checkSemiSyncEnabled(t, false, true, goodReplica1, oldMaster) } -func TestPlannedReparentShard(t *testing.T) { +func TestPlannedReparentShardNoError(t *testing.T) { ts := memorytopo.NewServer("cell1", "cell2") wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient()) vp := NewVtctlPipe(t, ts) defer vp.Close() - // Create a master, a couple good slaves + // Create a master, a couple good replicas oldMaster := NewFakeTablet(t, wr, "cell1", 0, topodatapb.TabletType_MASTER, nil) newMaster := NewFakeTablet(t, wr, "cell1", 1, topodatapb.TabletType_REPLICA, nil) - goodSlave1 := NewFakeTablet(t, wr, "cell1", 2, topodatapb.TabletType_REPLICA, nil) - goodSlave2 := NewFakeTablet(t, wr, "cell2", 3, topodatapb.TabletType_REPLICA, nil) + goodReplica1 := NewFakeTablet(t, wr, "cell1", 2, topodatapb.TabletType_REPLICA, nil) + goodReplica2 := NewFakeTablet(t, wr, "cell2", 3, topodatapb.TabletType_REPLICA, nil) // new master newMaster.FakeMysqlDaemon.ReadOnly = true @@ -212,27 +214,27 @@ func TestPlannedReparentShard(t *testing.T) { // SetMaster is called on new master to make sure it's replicating before reparenting. newMaster.FakeMysqlDaemon.SetMasterInput = topoproto.MysqlAddr(oldMaster.Tablet) - // good slave 1 is replicating - goodSlave1.FakeMysqlDaemon.ReadOnly = true - goodSlave1.FakeMysqlDaemon.Replicating = true - goodSlave1.FakeMysqlDaemon.SetMasterInput = topoproto.MysqlAddr(newMaster.Tablet) - goodSlave1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ + // goodReplica1 is replicating + goodReplica1.FakeMysqlDaemon.ReadOnly = true + goodReplica1.FakeMysqlDaemon.Replicating = true + goodReplica1.FakeMysqlDaemon.SetMasterInput = topoproto.MysqlAddr(newMaster.Tablet) + goodReplica1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ "STOP SLAVE", "FAKE SET MASTER", "START SLAVE", } - goodSlave1.StartActionLoop(t, wr) - defer goodSlave1.StopActionLoop(t) + goodReplica1.StartActionLoop(t, wr) + defer goodReplica1.StopActionLoop(t) - // good slave 2 is not replicating - goodSlave2.FakeMysqlDaemon.ReadOnly = true - goodSlave2.FakeMysqlDaemon.Replicating = false - goodSlave2.FakeMysqlDaemon.SetMasterInput = topoproto.MysqlAddr(newMaster.Tablet) - goodSlave2.StartActionLoop(t, wr) - goodSlave2.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ + // goodReplica2 is not replicating + goodReplica2.FakeMysqlDaemon.ReadOnly = true + goodReplica2.FakeMysqlDaemon.Replicating = false + goodReplica2.FakeMysqlDaemon.SetMasterInput = topoproto.MysqlAddr(newMaster.Tablet) + goodReplica2.StartActionLoop(t, wr) + goodReplica2.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ "FAKE SET MASTER", } - defer goodSlave2.StopActionLoop(t) + defer goodReplica2.StopActionLoop(t) // run PlannedReparentShard if err := vp.Run([]string{"PlannedReparentShard", "-wait_slave_timeout", "10s", "-keyspace_shard", newMaster.Tablet.Keyspace + "/" + newMaster.Tablet.Shard, "-new_master", topoproto.TabletAliasString(newMaster.Tablet.Alias)}); err != nil { @@ -246,11 +248,11 @@ func TestPlannedReparentShard(t *testing.T) { if err := oldMaster.FakeMysqlDaemon.CheckSuperQueryList(); err != nil { t.Errorf("oldMaster.FakeMysqlDaemon.CheckSuperQueryList failed: %v", err) } - if err := goodSlave1.FakeMysqlDaemon.CheckSuperQueryList(); err != nil { - t.Errorf("goodSlave1.FakeMysqlDaemon.CheckSuperQueryList failed: %v", err) + if err := goodReplica1.FakeMysqlDaemon.CheckSuperQueryList(); err != nil { + t.Errorf("goodReplica1.FakeMysqlDaemon.CheckSuperQueryList failed: %v", err) } - if err := goodSlave2.FakeMysqlDaemon.CheckSuperQueryList(); err != nil { - t.Errorf("goodSlave2.FakeMysqlDaemon.CheckSuperQueryList failed: %v", err) + if err := goodReplica2.FakeMysqlDaemon.CheckSuperQueryList(); err != nil { + t.Errorf("goodReplica2.FakeMysqlDaemon.CheckSuperQueryList failed: %v", err) } if newMaster.FakeMysqlDaemon.ReadOnly { t.Errorf("newMaster.FakeMysqlDaemon.ReadOnly set") @@ -258,30 +260,30 @@ func TestPlannedReparentShard(t *testing.T) { if !oldMaster.FakeMysqlDaemon.ReadOnly { t.Errorf("oldMaster.FakeMysqlDaemon.ReadOnly not set") } - if !goodSlave1.FakeMysqlDaemon.ReadOnly { - t.Errorf("goodSlave1.FakeMysqlDaemon.ReadOnly not set") + if !goodReplica1.FakeMysqlDaemon.ReadOnly { + t.Errorf("goodReplica1.FakeMysqlDaemon.ReadOnly not set") } - if !goodSlave2.FakeMysqlDaemon.ReadOnly { - t.Errorf("goodSlave2.FakeMysqlDaemon.ReadOnly not set") + if !goodReplica2.FakeMysqlDaemon.ReadOnly { + t.Errorf("goodReplica2.FakeMysqlDaemon.ReadOnly not set") } if !oldMaster.Agent.QueryServiceControl.IsServing() { t.Errorf("oldMaster...QueryServiceControl not serving") } // verify the old master was told to start replicating (and not - // the slave that wasn't replicating in the first place) + // the replica that wasn't replicating in the first place) if !oldMaster.FakeMysqlDaemon.Replicating { t.Errorf("oldMaster.FakeMysqlDaemon.Replicating not set") } - if !goodSlave1.FakeMysqlDaemon.Replicating { - t.Errorf("goodSlave1.FakeMysqlDaemon.Replicating not set") + if !goodReplica1.FakeMysqlDaemon.Replicating { + t.Errorf("goodReplica1.FakeMysqlDaemon.Replicating not set") } - if goodSlave2.FakeMysqlDaemon.Replicating { - t.Errorf("goodSlave2.FakeMysqlDaemon.Replicating set") + if goodReplica2.FakeMysqlDaemon.Replicating { + t.Errorf("goodReplica2.FakeMysqlDaemon.Replicating set") } checkSemiSyncEnabled(t, true, true, newMaster) - checkSemiSyncEnabled(t, false, true, goodSlave1, goodSlave2, oldMaster) + checkSemiSyncEnabled(t, false, true, goodReplica1, goodReplica2, oldMaster) } func TestPlannedReparentNoMaster(t *testing.T) { @@ -304,17 +306,19 @@ func TestPlannedReparentNoMaster(t *testing.T) { } } -func TestPlannedReparentShardPromoteSlaveFail(t *testing.T) { +// TestPlannedReparentShardWaitForPositionFail simulates a failure of the WaitForPosition call +// on the desired new master tablet +func TestPlannedReparentShardWaitForPositionFail(t *testing.T) { ts := memorytopo.NewServer("cell1", "cell2") wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient()) vp := NewVtctlPipe(t, ts) defer vp.Close() - // Create a master, a couple good slaves + // Create a master, a couple good replicas oldMaster := NewFakeTablet(t, wr, "cell1", 0, topodatapb.TabletType_MASTER, nil) newMaster := NewFakeTablet(t, wr, "cell1", 1, topodatapb.TabletType_REPLICA, nil) - goodSlave1 := NewFakeTablet(t, wr, "cell1", 2, topodatapb.TabletType_REPLICA, nil) - goodSlave2 := NewFakeTablet(t, wr, "cell2", 3, topodatapb.TabletType_REPLICA, nil) + goodReplica1 := NewFakeTablet(t, wr, "cell1", 2, topodatapb.TabletType_REPLICA, nil) + goodReplica2 := NewFakeTablet(t, wr, "cell2", 3, topodatapb.TabletType_REPLICA, nil) // new master newMaster.FakeMysqlDaemon.ReadOnly = true @@ -338,6 +342,9 @@ func TestPlannedReparentShardPromoteSlaveFail(t *testing.T) { }, } newMaster.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ + "STOP SLAVE", + "FAKE SET MASTER", + "START SLAVE", "CREATE DATABASE IF NOT EXISTS _vt", "SUBCREATE TABLE IF NOT EXISTS _vt.reparent_journal", "SUBINSERT INTO _vt.reparent_journal (time_created_ns, action_name, master_alias, replication_position) VALUES", @@ -358,34 +365,36 @@ func TestPlannedReparentShardPromoteSlaveFail(t *testing.T) { oldMaster.StartActionLoop(t, wr) defer oldMaster.StopActionLoop(t) oldMaster.Agent.QueryServiceControl.(*tabletservermock.Controller).SetQueryServiceEnabledForTests(true) + // SetMaster is called on new master to make sure it's replicating before reparenting. + newMaster.FakeMysqlDaemon.SetMasterInput = topoproto.MysqlAddr(oldMaster.Tablet) - // good slave 1 is replicating - goodSlave1.FakeMysqlDaemon.ReadOnly = true - goodSlave1.FakeMysqlDaemon.Replicating = true - goodSlave1.FakeMysqlDaemon.SetMasterInput = topoproto.MysqlAddr(newMaster.Tablet) - goodSlave1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ + // good replica 1 is replicating + goodReplica1.FakeMysqlDaemon.ReadOnly = true + goodReplica1.FakeMysqlDaemon.Replicating = true + goodReplica1.FakeMysqlDaemon.SetMasterInput = topoproto.MysqlAddr(newMaster.Tablet) + goodReplica1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ "STOP SLAVE", "FAKE SET MASTER", "START SLAVE", } - goodSlave1.StartActionLoop(t, wr) - defer goodSlave1.StopActionLoop(t) + goodReplica1.StartActionLoop(t, wr) + defer goodReplica1.StopActionLoop(t) - // good slave 2 is not replicating - goodSlave2.FakeMysqlDaemon.ReadOnly = true - goodSlave2.FakeMysqlDaemon.Replicating = false - goodSlave2.FakeMysqlDaemon.SetMasterInput = topoproto.MysqlAddr(newMaster.Tablet) - goodSlave2.StartActionLoop(t, wr) - goodSlave2.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ + // good replica 2 is not replicating + goodReplica2.FakeMysqlDaemon.ReadOnly = true + goodReplica2.FakeMysqlDaemon.Replicating = false + goodReplica2.FakeMysqlDaemon.SetMasterInput = topoproto.MysqlAddr(newMaster.Tablet) + goodReplica2.StartActionLoop(t, wr) + goodReplica2.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ "FAKE SET MASTER", } - defer goodSlave2.StopActionLoop(t) + defer goodReplica2.StopActionLoop(t) // run PlannedReparentShard err := vp.Run([]string{"PlannedReparentShard", "-wait_slave_timeout", "10s", "-keyspace_shard", newMaster.Tablet.Keyspace + "/" + newMaster.Tablet.Shard, "-new_master", topoproto.TabletAliasString(newMaster.Tablet.Alias)}) if err == nil { - t.Fatalf("PlannedReparentShard succeeded: %v", err) + t.Fatal("PlannedReparentShard succeeded") } if !strings.Contains(err.Error(), "replication on master-elect cell1-0000000001 did not catch up in time") { t.Fatalf("PlannedReparentShard failed with the wrong error: %v", err) @@ -400,17 +409,19 @@ func TestPlannedReparentShardPromoteSlaveFail(t *testing.T) { } } -func TestPlannedReparentShardPromoteSlaveTimeout(t *testing.T) { +// TestPlannedReparentShardWaitForPositionTimeout simulates a context timeout +// during the WaitForPosition call to the desired new master +func TestPlannedReparentShardWaitForPositionTimeout(t *testing.T) { ts := memorytopo.NewServer("cell1", "cell2") wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient()) vp := NewVtctlPipe(t, ts) defer vp.Close() - // Create a master, a couple good slaves + // Create a master, a couple good replicas oldMaster := NewFakeTablet(t, wr, "cell1", 0, topodatapb.TabletType_MASTER, nil) newMaster := NewFakeTablet(t, wr, "cell1", 1, topodatapb.TabletType_REPLICA, nil) - goodSlave1 := NewFakeTablet(t, wr, "cell1", 2, topodatapb.TabletType_REPLICA, nil) - goodSlave2 := NewFakeTablet(t, wr, "cell2", 3, topodatapb.TabletType_REPLICA, nil) + goodReplica1 := NewFakeTablet(t, wr, "cell1", 2, topodatapb.TabletType_REPLICA, nil) + goodReplica2 := NewFakeTablet(t, wr, "cell2", 3, topodatapb.TabletType_REPLICA, nil) // new master newMaster.FakeMysqlDaemon.TimeoutHook = func() error { return context.DeadlineExceeded } @@ -435,6 +446,9 @@ func TestPlannedReparentShardPromoteSlaveTimeout(t *testing.T) { }, } newMaster.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ + "STOP SLAVE", + "FAKE SET MASTER", + "START SLAVE", "CREATE DATABASE IF NOT EXISTS _vt", "SUBCREATE TABLE IF NOT EXISTS _vt.reparent_journal", "SUBINSERT INTO _vt.reparent_journal (time_created_ns, action_name, master_alias, replication_position) VALUES", @@ -455,33 +469,35 @@ func TestPlannedReparentShardPromoteSlaveTimeout(t *testing.T) { defer oldMaster.StopActionLoop(t) oldMaster.Agent.QueryServiceControl.(*tabletservermock.Controller).SetQueryServiceEnabledForTests(true) - // good slave 1 is replicating - goodSlave1.FakeMysqlDaemon.ReadOnly = true - goodSlave1.FakeMysqlDaemon.Replicating = true - goodSlave1.FakeMysqlDaemon.SetMasterInput = topoproto.MysqlAddr(newMaster.Tablet) - goodSlave1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ + // SetMaster is called on new master to make sure it's replicating before reparenting. + newMaster.FakeMysqlDaemon.SetMasterInput = topoproto.MysqlAddr(oldMaster.Tablet) + // good replica 1 is replicating + goodReplica1.FakeMysqlDaemon.ReadOnly = true + goodReplica1.FakeMysqlDaemon.Replicating = true + goodReplica1.FakeMysqlDaemon.SetMasterInput = topoproto.MysqlAddr(newMaster.Tablet) + goodReplica1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ "STOP SLAVE", "FAKE SET MASTER", - "START SLAVE", + "START replica", } - goodSlave1.StartActionLoop(t, wr) - defer goodSlave1.StopActionLoop(t) + goodReplica1.StartActionLoop(t, wr) + defer goodReplica1.StopActionLoop(t) - // good slave 2 is not replicating - goodSlave2.FakeMysqlDaemon.ReadOnly = true - goodSlave2.FakeMysqlDaemon.Replicating = false - goodSlave2.FakeMysqlDaemon.SetMasterInput = topoproto.MysqlAddr(newMaster.Tablet) - goodSlave2.StartActionLoop(t, wr) - goodSlave2.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ + // good replica 2 is not replicating + goodReplica2.FakeMysqlDaemon.ReadOnly = true + goodReplica2.FakeMysqlDaemon.Replicating = false + goodReplica2.FakeMysqlDaemon.SetMasterInput = topoproto.MysqlAddr(newMaster.Tablet) + goodReplica2.StartActionLoop(t, wr) + goodReplica2.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ "FAKE SET MASTER", } - defer goodSlave2.StopActionLoop(t) + defer goodReplica2.StopActionLoop(t) // run PlannedReparentShard err := vp.Run([]string{"PlannedReparentShard", "-wait_slave_timeout", "10s", "-keyspace_shard", newMaster.Tablet.Keyspace + "/" + newMaster.Tablet.Shard, "-new_master", topoproto.TabletAliasString(newMaster.Tablet.Alias)}) if err == nil { - t.Fatalf("PlannedReparentShard succeeded: %v", err) + t.Fatal("PlannedReparentShard succeeded") } if !strings.Contains(err.Error(), "replication on master-elect cell1-0000000001 did not catch up in time") { t.Fatalf("PlannedReparentShard failed with the wrong error: %v", err) @@ -502,9 +518,9 @@ func TestPlannedReparentShardRelayLogError(t *testing.T) { vp := NewVtctlPipe(t, ts) defer vp.Close() - // Create a master, a couple good slaves + // Create a master, a couple good replicas master := NewFakeTablet(t, wr, "cell1", 0, topodatapb.TabletType_MASTER, nil) - goodSlave1 := NewFakeTablet(t, wr, "cell1", 2, topodatapb.TabletType_REPLICA, nil) + goodReplica1 := NewFakeTablet(t, wr, "cell1", 2, topodatapb.TabletType_REPLICA, nil) // old master master.FakeMysqlDaemon.ReadOnly = false @@ -528,19 +544,19 @@ func TestPlannedReparentShardRelayLogError(t *testing.T) { defer master.StopActionLoop(t) master.Agent.QueryServiceControl.(*tabletservermock.Controller).SetQueryServiceEnabledForTests(true) - // good slave 1 is replicating - goodSlave1.FakeMysqlDaemon.ReadOnly = true - goodSlave1.FakeMysqlDaemon.Replicating = true - goodSlave1.FakeMysqlDaemon.SetMasterInput = topoproto.MysqlAddr(master.Tablet) + // goodReplica1 is replicating + goodReplica1.FakeMysqlDaemon.ReadOnly = true + goodReplica1.FakeMysqlDaemon.Replicating = true + goodReplica1.FakeMysqlDaemon.SetMasterInput = topoproto.MysqlAddr(master.Tablet) // simulate error that will trigger a call to RestartSlave - goodSlave1.FakeMysqlDaemon.SetMasterError = errors.New("Slave failed to initialize relay log info structure from the repository") - goodSlave1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ + goodReplica1.FakeMysqlDaemon.SetMasterError = errors.New("Slave failed to initialize relay log info structure from the repository") + goodReplica1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ "STOP SLAVE", "RESET SLAVE", "START SLAVE", } - goodSlave1.StartActionLoop(t, wr) - defer goodSlave1.StopActionLoop(t) + goodReplica1.StartActionLoop(t, wr) + defer goodReplica1.StopActionLoop(t) // run PlannedReparentShard if err := vp.Run([]string{"PlannedReparentShard", "-wait_slave_timeout", "10s", "-keyspace_shard", master.Tablet.Keyspace + "/" + master.Tablet.Shard, "-new_master", topoproto.TabletAliasString(master.Tablet.Alias)}); err != nil { @@ -551,23 +567,23 @@ func TestPlannedReparentShardRelayLogError(t *testing.T) { if err := master.FakeMysqlDaemon.CheckSuperQueryList(); err != nil { t.Errorf("master.FakeMysqlDaemon.CheckSuperQueryList failed: %v", err) } - if err := goodSlave1.FakeMysqlDaemon.CheckSuperQueryList(); err != nil { - t.Errorf("goodSlave1.FakeMysqlDaemon.CheckSuperQueryList failed: %v", err) + if err := goodReplica1.FakeMysqlDaemon.CheckSuperQueryList(); err != nil { + t.Errorf("goodReplica1.FakeMysqlDaemon.CheckSuperQueryList failed: %v", err) } if master.FakeMysqlDaemon.ReadOnly { t.Errorf("master.FakeMysqlDaemon.ReadOnly set") } - if !goodSlave1.FakeMysqlDaemon.ReadOnly { - t.Errorf("goodSlave1.FakeMysqlDaemon.ReadOnly not set") + if !goodReplica1.FakeMysqlDaemon.ReadOnly { + t.Errorf("goodReplica1.FakeMysqlDaemon.ReadOnly not set") } if !master.Agent.QueryServiceControl.IsServing() { t.Errorf("master...QueryServiceControl not serving") } // verify the old master was told to start replicating (and not - // the slave that wasn't replicating in the first place) - if !goodSlave1.FakeMysqlDaemon.Replicating { - t.Errorf("goodSlave1.FakeMysqlDaemon.Replicating not set") + // the replica that wasn't replicating in the first place) + if !goodReplica1.FakeMysqlDaemon.Replicating { + t.Errorf("goodReplica1.FakeMysqlDaemon.Replicating not set") } } @@ -578,9 +594,9 @@ func TestPlannedReparentShardRelayLogErrorStartSlave(t *testing.T) { vp := NewVtctlPipe(t, ts) defer vp.Close() - // Create a master, a couple good slaves + // Create a master, a couple good replicas master := NewFakeTablet(t, wr, "cell1", 0, topodatapb.TabletType_MASTER, nil) - goodSlave1 := NewFakeTablet(t, wr, "cell1", 2, topodatapb.TabletType_REPLICA, nil) + goodReplica1 := NewFakeTablet(t, wr, "cell1", 2, topodatapb.TabletType_REPLICA, nil) // old master master.FakeMysqlDaemon.ReadOnly = false @@ -604,22 +620,22 @@ func TestPlannedReparentShardRelayLogErrorStartSlave(t *testing.T) { defer master.StopActionLoop(t) master.Agent.QueryServiceControl.(*tabletservermock.Controller).SetQueryServiceEnabledForTests(true) - // good slave 1 is not replicating - goodSlave1.FakeMysqlDaemon.ReadOnly = true - goodSlave1.FakeMysqlDaemon.Replicating = true - goodSlave1.FakeMysqlDaemon.SlaveIORunning = false - goodSlave1.FakeMysqlDaemon.SetMasterInput = topoproto.MysqlAddr(master.Tablet) - goodSlave1.FakeMysqlDaemon.CurrentMasterHost = topoproto.MysqlHostname(master.Tablet) - goodSlave1.FakeMysqlDaemon.CurrentMasterPort = int(topoproto.MysqlPort(master.Tablet)) + // good replica 1 is not replicating + goodReplica1.FakeMysqlDaemon.ReadOnly = true + goodReplica1.FakeMysqlDaemon.Replicating = true + goodReplica1.FakeMysqlDaemon.SlaveIORunning = false + goodReplica1.FakeMysqlDaemon.SetMasterInput = topoproto.MysqlAddr(master.Tablet) + goodReplica1.FakeMysqlDaemon.CurrentMasterHost = topoproto.MysqlHostname(master.Tablet) + goodReplica1.FakeMysqlDaemon.CurrentMasterPort = int(topoproto.MysqlPort(master.Tablet)) // simulate error that will trigger a call to RestartSlave - goodSlave1.FakeMysqlDaemon.StartSlaveError = errors.New("Slave failed to initialize relay log info structure from the repository") - goodSlave1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ + goodReplica1.FakeMysqlDaemon.StartSlaveError = errors.New("Slave failed to initialize relay log info structure from the repository") + goodReplica1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ "STOP SLAVE", "RESET SLAVE", "START SLAVE", } - goodSlave1.StartActionLoop(t, wr) - defer goodSlave1.StopActionLoop(t) + goodReplica1.StartActionLoop(t, wr) + defer goodReplica1.StopActionLoop(t) // run PlannedReparentShard if err := vp.Run([]string{"PlannedReparentShard", "-wait_slave_timeout", "10s", "-keyspace_shard", master.Tablet.Keyspace + "/" + master.Tablet.Shard, "-new_master", topoproto.TabletAliasString(master.Tablet.Alias)}); err != nil { @@ -630,23 +646,217 @@ func TestPlannedReparentShardRelayLogErrorStartSlave(t *testing.T) { if err := master.FakeMysqlDaemon.CheckSuperQueryList(); err != nil { t.Errorf("master.FakeMysqlDaemon.CheckSuperQueryList failed: %v", err) } - if err := goodSlave1.FakeMysqlDaemon.CheckSuperQueryList(); err != nil { - t.Errorf("goodSlave1.FakeMysqlDaemon.CheckSuperQueryList failed: %v", err) + if err := goodReplica1.FakeMysqlDaemon.CheckSuperQueryList(); err != nil { + t.Errorf("goodReplica1.FakeMysqlDaemon.CheckSuperQueryList failed: %v", err) } if master.FakeMysqlDaemon.ReadOnly { t.Errorf("master.FakeMysqlDaemon.ReadOnly set") } - if !goodSlave1.FakeMysqlDaemon.ReadOnly { - t.Errorf("goodSlave1.FakeMysqlDaemon.ReadOnly not set") + if !goodReplica1.FakeMysqlDaemon.ReadOnly { + t.Errorf("goodReplica1.FakeMysqlDaemon.ReadOnly not set") } if !master.Agent.QueryServiceControl.IsServing() { t.Errorf("master...QueryServiceControl not serving") } // verify the old master was told to start replicating (and not - // the slave that wasn't replicating in the first place) - if !goodSlave1.FakeMysqlDaemon.Replicating { - t.Errorf("goodSlave1.FakeMysqlDaemon.Replicating not set") + // the replica that wasn't replicating in the first place) + if !goodReplica1.FakeMysqlDaemon.Replicating { + t.Errorf("goodReplica1.FakeMysqlDaemon.Replicating not set") + } +} + +// TestPlannedReparentShardPromoteReplicaFail simulates a failure of the PromoteReplica call +// on the desired new master tablet +func TestPlannedReparentShardPromoteReplicaFail(t *testing.T) { + ts := memorytopo.NewServer("cell1", "cell2") + wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient()) + vp := NewVtctlPipe(t, ts) + defer vp.Close() + + // Create a master, a couple good replicas + oldMaster := NewFakeTablet(t, wr, "cell1", 0, topodatapb.TabletType_MASTER, nil) + newMaster := NewFakeTablet(t, wr, "cell1", 1, topodatapb.TabletType_REPLICA, nil) + goodReplica1 := NewFakeTablet(t, wr, "cell1", 2, topodatapb.TabletType_REPLICA, nil) + goodReplica2 := NewFakeTablet(t, wr, "cell2", 3, topodatapb.TabletType_REPLICA, nil) + + // new master + newMaster.FakeMysqlDaemon.ReadOnly = true + newMaster.FakeMysqlDaemon.Replicating = true + // make promote fail + newMaster.FakeMysqlDaemon.PromoteError = errors.New("some error") + newMaster.FakeMysqlDaemon.WaitMasterPosition = mysql.Position{ + GTIDSet: mysql.MariadbGTIDSet{ + mysql.MariadbGTID{ + Domain: 7, + Server: 123, + Sequence: 990, + }, + }, + } + newMaster.FakeMysqlDaemon.PromoteResult = mysql.Position{ + GTIDSet: mysql.MariadbGTIDSet{ + mysql.MariadbGTID{ + Domain: 7, + Server: 456, + Sequence: 991, + }, + }, } + newMaster.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ + "STOP SLAVE", + "FAKE SET MASTER", + "START SLAVE", + "CREATE DATABASE IF NOT EXISTS _vt", + "SUBCREATE TABLE IF NOT EXISTS _vt.reparent_journal", + "SUBINSERT INTO _vt.reparent_journal (time_created_ns, action_name, master_alias, replication_position) VALUES", + } + newMaster.StartActionLoop(t, wr) + defer newMaster.StopActionLoop(t) + // old master + oldMaster.FakeMysqlDaemon.ReadOnly = false + oldMaster.FakeMysqlDaemon.Replicating = false + oldMaster.FakeMysqlDaemon.SlaveStatusError = mysql.ErrNotSlave + oldMaster.FakeMysqlDaemon.CurrentMasterPosition = newMaster.FakeMysqlDaemon.WaitMasterPosition + oldMaster.FakeMysqlDaemon.SetMasterInput = topoproto.MysqlAddr(newMaster.Tablet) + oldMaster.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ + "FAKE SET MASTER", + "START SLAVE", + } + oldMaster.StartActionLoop(t, wr) + defer oldMaster.StopActionLoop(t) + oldMaster.Agent.QueryServiceControl.(*tabletservermock.Controller).SetQueryServiceEnabledForTests(true) + + // SetMaster is called on new master to make sure it's replicating before reparenting. + newMaster.FakeMysqlDaemon.SetMasterInput = topoproto.MysqlAddr(oldMaster.Tablet) + // good replica 1 is replicating + goodReplica1.FakeMysqlDaemon.ReadOnly = true + goodReplica1.FakeMysqlDaemon.Replicating = true + goodReplica1.FakeMysqlDaemon.SetMasterInput = topoproto.MysqlAddr(newMaster.Tablet) + goodReplica1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ + "STOP SLAVE", + "FAKE SET MASTER", + "START SLAVE", + } + goodReplica1.StartActionLoop(t, wr) + defer goodReplica1.StopActionLoop(t) + + // good replica 2 is not replicating + goodReplica2.FakeMysqlDaemon.ReadOnly = true + goodReplica2.FakeMysqlDaemon.Replicating = false + goodReplica2.FakeMysqlDaemon.SetMasterInput = topoproto.MysqlAddr(newMaster.Tablet) + goodReplica2.StartActionLoop(t, wr) + goodReplica2.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ + "FAKE SET MASTER", + } + defer goodReplica2.StopActionLoop(t) + + // run PlannedReparentShard + err := vp.Run([]string{"PlannedReparentShard", "-wait_slave_timeout", "10s", "-keyspace_shard", newMaster.Tablet.Keyspace + "/" + newMaster.Tablet.Shard, "-new_master", topoproto.TabletAliasString(newMaster.Tablet.Alias)}) + + assert.Error(t, err) + assert.Contains(t, err.Error(), "some error") + + // when promote fails, we don't call UndoDemoteMaster, so the old master should be read-only + assert.True(t, newMaster.FakeMysqlDaemon.ReadOnly, "newMaster.FakeMysqlDaemon.ReadOnly") + assert.True(t, oldMaster.FakeMysqlDaemon.ReadOnly, "oldMaster.FakeMysqlDaemon.ReadOnly") + + // retrying should work + newMaster.FakeMysqlDaemon.PromoteError = nil + newMaster.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ + "STOP SLAVE", + "FAKE SET MASTER", + "START SLAVE", + // extra commands because of retry + "STOP SLAVE", + "FAKE SET MASTER", + "START SLAVE", + "CREATE DATABASE IF NOT EXISTS _vt", + "SUBCREATE TABLE IF NOT EXISTS _vt.reparent_journal", + "SUBINSERT INTO _vt.reparent_journal (time_created_ns, action_name, master_alias, replication_position) VALUES", + } + oldMaster.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ + "FAKE SET MASTER", + "START SLAVE", + // extra commands because of retry + "FAKE SET MASTER", + "START SLAVE", + } + + // run PlannedReparentShard + err = vp.Run([]string{"PlannedReparentShard", "-wait_slave_timeout", "10s", "-keyspace_shard", newMaster.Tablet.Keyspace + "/" + newMaster.Tablet.Shard, "-new_master", topoproto.TabletAliasString(newMaster.Tablet.Alias)}) + assert.NoError(t, err) + + // check that mastership changed correctly + assert.False(t, newMaster.FakeMysqlDaemon.ReadOnly, "newMaster.FakeMysqlDaemon.ReadOnly") + assert.True(t, oldMaster.FakeMysqlDaemon.ReadOnly, "oldMaster.FakeMysqlDaemon.ReadOnly") +} + +// TestPlannedReparentShardSameMaster tests PRS with oldMaster works correctly +// Simulate failure of previous PRS and oldMaster is ReadOnly +// Verify that master correctly gets set to ReadWrite +func TestPlannedReparentShardSameMaster(t *testing.T) { + ts := memorytopo.NewServer("cell1", "cell2") + wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient()) + vp := NewVtctlPipe(t, ts) + defer vp.Close() + + // Create a master, a couple good replicas + oldMaster := NewFakeTablet(t, wr, "cell1", 0, topodatapb.TabletType_MASTER, nil) + goodReplica1 := NewFakeTablet(t, wr, "cell1", 2, topodatapb.TabletType_REPLICA, nil) + goodReplica2 := NewFakeTablet(t, wr, "cell2", 3, topodatapb.TabletType_REPLICA, nil) + + // old master + oldMaster.FakeMysqlDaemon.ReadOnly = true + oldMaster.FakeMysqlDaemon.Replicating = false + oldMaster.FakeMysqlDaemon.SlaveStatusError = mysql.ErrNotSlave + oldMaster.FakeMysqlDaemon.CurrentMasterPosition = mysql.Position{ + GTIDSet: mysql.MariadbGTIDSet{ + mysql.MariadbGTID{ + Domain: 7, + Server: 123, + Sequence: 990, + }, + }, + } + oldMaster.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ + "CREATE DATABASE IF NOT EXISTS _vt", + "SUBCREATE TABLE IF NOT EXISTS _vt.reparent_journal", + "SUBINSERT INTO _vt.reparent_journal (time_created_ns, action_name, master_alias, replication_position) VALUES", + } + oldMaster.StartActionLoop(t, wr) + defer oldMaster.StopActionLoop(t) + oldMaster.Agent.QueryServiceControl.(*tabletservermock.Controller).SetQueryServiceEnabledForTests(true) + + // good replica 1 is replicating + goodReplica1.FakeMysqlDaemon.ReadOnly = true + goodReplica1.FakeMysqlDaemon.Replicating = true + goodReplica1.FakeMysqlDaemon.SetMasterInput = topoproto.MysqlAddr(oldMaster.Tablet) + goodReplica1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ + "STOP SLAVE", + "FAKE SET MASTER", + "START SLAVE", + } + goodReplica1.StartActionLoop(t, wr) + defer goodReplica1.StopActionLoop(t) + + // goodReplica2 is not replicating + goodReplica2.FakeMysqlDaemon.ReadOnly = true + goodReplica2.FakeMysqlDaemon.Replicating = false + goodReplica2.FakeMysqlDaemon.SetMasterInput = topoproto.MysqlAddr(oldMaster.Tablet) + goodReplica2.StartActionLoop(t, wr) + goodReplica2.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ + "FAKE SET MASTER", + } + defer goodReplica2.StopActionLoop(t) + + // run PlannedReparentShard + if err := vp.Run([]string{"PlannedReparentShard", "-wait_slave_timeout", "10s", "-keyspace_shard", oldMaster.Tablet.Keyspace + "/" + oldMaster.Tablet.Shard, "-new_master", topoproto.TabletAliasString(oldMaster.Tablet.Alias)}); err != nil { + t.Fatalf("PlannedReparent failed: %v", err) + } + + if oldMaster.FakeMysqlDaemon.ReadOnly { + t.Errorf("oldMaster.FakeMysqlDaemon.ReadOnly set") + } }