From 6fef68ab053cc983cb385bb663f503f659cca426 Mon Sep 17 00:00:00 2001 From: Manan Gupta <35839558+GuptaManan100@users.noreply.github.com> Date: Wed, 28 Jun 2023 20:24:50 +0530 Subject: [PATCH] [release-15.0] Prevent resetting replication every time we set replication source (#13377) (#13393) --- .../fakemysqldaemon/fakemysqldaemon.go | 1 - go/vt/mysqlctl/replication.go | 8 --- .../endtoend/init_shard_primary_test.go | 6 -- .../vttablet/tabletmanager/rpc_replication.go | 18 ++--- go/vt/vttablet/tabletmanager/tm_init_test.go | 1 - go/vt/wrangler/testlib/backup_test.go | 25 ++----- .../testlib/copy_schema_shard_test.go | 3 +- .../testlib/emergency_reparent_shard_test.go | 13 +--- .../testlib/external_reparent_test.go | 14 +--- go/vt/wrangler/testlib/permissions_test.go | 3 +- .../testlib/planned_reparent_shard_test.go | 68 ++++-------------- go/vt/wrangler/testlib/reparent_utils_test.go | 71 +++++++++++++++++-- go/vt/wrangler/testlib/version_test.go | 3 +- 13 files changed, 101 insertions(+), 133 deletions(-) diff --git a/go/vt/mysqlctl/fakemysqldaemon/fakemysqldaemon.go b/go/vt/mysqlctl/fakemysqldaemon/fakemysqldaemon.go index 5402be5e540..5831d817d40 100644 --- a/go/vt/mysqlctl/fakemysqldaemon/fakemysqldaemon.go +++ b/go/vt/mysqlctl/fakemysqldaemon/fakemysqldaemon.go @@ -431,7 +431,6 @@ func (fmd *FakeMysqlDaemon) SetReplicationSource(ctx context.Context, host strin if stopReplicationBefore { cmds = append(cmds, "STOP SLAVE") } - cmds = append(cmds, "RESET SLAVE ALL") cmds = append(cmds, "FAKE SET MASTER") if startReplicationAfter { cmds = append(cmds, "START SLAVE") diff --git a/go/vt/mysqlctl/replication.go b/go/vt/mysqlctl/replication.go index 4aba50b8903..670a9b4e1ec 100644 --- a/go/vt/mysqlctl/replication.go +++ b/go/vt/mysqlctl/replication.go @@ -396,14 +396,6 @@ func (mysqld *Mysqld) SetReplicationSource(ctx context.Context, host string, por if replicationStopBefore { cmds = append(cmds, conn.StopReplicationCommand()) } - // Reset replication parameters commands makes the instance forget the source host port - // This is required because sometimes MySQL gets stuck due to improper initialization of - // master info structure or related failures and throws errors like - // ERROR 1201 (HY000): Could not initialize master info structure; more error messages can be found in the MySQL error log - // These errors can only be resolved by resetting the replication parameters, otherwise START SLAVE fails. - // Therefore, we have elected to always reset the replication parameters whenever we try to set the source host port - // Since there is no real overhead, but it makes this function robust enough to also handle failures like these. - cmds = append(cmds, conn.ResetReplicationParametersCommands()...) smc := conn.SetReplicationSourceCommand(params, host, port, int(replicationConnectRetry.Seconds())) cmds = append(cmds, smc) if replicationStartAfter { diff --git a/go/vt/vtctl/grpcvtctldserver/endtoend/init_shard_primary_test.go b/go/vt/vtctl/grpcvtctldserver/endtoend/init_shard_primary_test.go index 6e3916c22a5..43ae4d95475 100644 --- a/go/vt/vtctl/grpcvtctldserver/endtoend/init_shard_primary_test.go +++ b/go/vt/vtctl/grpcvtctldserver/endtoend/init_shard_primary_test.go @@ -64,13 +64,11 @@ func TestInitShardPrimary(t *testing.T) { tablet2.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ // These come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", // These come from InitShardPrimary "FAKE RESET ALL REPLICATION", "FAKE SET SLAVE POSITION", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -78,12 +76,10 @@ func TestInitShardPrimary(t *testing.T) { tablet3.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", "FAKE RESET ALL REPLICATION", "FAKE SET SLAVE POSITION", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -133,7 +129,6 @@ func TestInitShardPrimaryNoFormerPrimary(t *testing.T) { tablet2.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ "FAKE RESET ALL REPLICATION", "FAKE SET SLAVE POSITION", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -142,7 +137,6 @@ func TestInitShardPrimaryNoFormerPrimary(t *testing.T) { tablet3.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ "FAKE RESET ALL REPLICATION", "FAKE SET SLAVE POSITION", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } diff --git a/go/vt/vttablet/tabletmanager/rpc_replication.go b/go/vt/vttablet/tabletmanager/rpc_replication.go index 3bee3383b3a..a6966c607b1 100644 --- a/go/vt/vttablet/tabletmanager/rpc_replication.go +++ b/go/vt/vttablet/tabletmanager/rpc_replication.go @@ -774,14 +774,8 @@ func (tm *TabletManager) setReplicationSourceLocked(ctx context.Context, parentA } host := parent.Tablet.MysqlHostname port := int(parent.Tablet.MysqlPort) - // We want to reset the replication parameters and set replication source again when forceStartReplication is provided - // because sometimes MySQL gets stuck due to improper initialization of master info structure or related failures and throws errors like - // ERROR 1201 (HY000): Could not initialize master info structure; more error messages can be found in the MySQL error log - // These errors can only be resolved by resetting the replication parameters, otherwise START SLAVE fails. So when this RPC - // gets called from VTOrc or replication manager to fix the replication in these cases with forceStartReplication, we should also - // reset the replication parameters and set the source port information again. - if status.SourceHost != host || status.SourcePort != port || forceStartReplication { - // This handles reseting the replication parameters, changing the address and then starting the replication. + if status.SourceHost != host || status.SourcePort != port { + // This handles both changing the address and starting replication. if err := tm.MysqlDaemon.SetReplicationSource(ctx, host, port, wasReplicating, shouldbeReplicating); err != nil { if err := tm.handleRelayLogError(err); err != nil { return err @@ -1072,11 +1066,17 @@ func (tm *TabletManager) fixSemiSyncAndReplication(tabletType topodatapb.TabletT return nil } +// handleRelayLogError resets replication of the instance. +// This is required because sometimes MySQL gets stuck due to improper initialization of +// master info structure or related failures and throws errors like +// ERROR 1201 (HY000): Could not initialize master info structure; more error messages can be found in the MySQL error log +// These errors can only be resolved by resetting the replication, otherwise START SLAVE fails. func (tm *TabletManager) handleRelayLogError(err error) error { // attempt to fix this error: // Slave failed to initialize relay log info structure from the repository (errno 1872) (sqlstate HY000) during query: START SLAVE // see https://bugs.mysql.com/bug.php?id=83713 or https://github.com/vitessio/vitess/issues/5067 - if strings.Contains(err.Error(), "Slave failed to initialize relay log info structure from the repository") { + // The same fix also works for https://github.com/vitessio/vitess/issues/10955. + if strings.Contains(err.Error(), "Slave failed to initialize relay log info structure from the repository") || strings.Contains(err.Error(), "Could not initialize master info structure") { // Stop, reset and start replication again to resolve this error if err := tm.MysqlDaemon.RestartReplication(tm.hookExtraEnv()); err != nil { return err diff --git a/go/vt/vttablet/tabletmanager/tm_init_test.go b/go/vt/vttablet/tabletmanager/tm_init_test.go index fbf5566d2ed..56ebbbd32e1 100644 --- a/go/vt/vttablet/tabletmanager/tm_init_test.go +++ b/go/vt/vttablet/tabletmanager/tm_init_test.go @@ -383,7 +383,6 @@ func TestCheckPrimaryShip(t *testing.T) { fakeMysql.SetReplicationSourceInputs = append(fakeMysql.SetReplicationSourceInputs, fmt.Sprintf("%v:%v", otherTablet.MysqlHostname, otherTablet.MysqlPort)) fakeMysql.ExpectedExecuteSuperQueryList = []string{ "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } diff --git a/go/vt/wrangler/testlib/backup_test.go b/go/vt/wrangler/testlib/backup_test.go index 408329b1a98..a192be4122b 100644 --- a/go/vt/wrangler/testlib/backup_test.go +++ b/go/vt/wrangler/testlib/backup_test.go @@ -182,9 +182,8 @@ func testBackupRestore(t *testing.T, cDetails *compressionDetails) error { }, } sourceTablet.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", // This first set of STOP and START commands come from @@ -195,7 +194,6 @@ func testBackupRestore(t *testing.T, cDetails *compressionDetails) error { // These commands come from SetReplicationSource RPC called // to set the correct primary and semi-sync after Backup has concluded "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -232,16 +230,14 @@ func testBackupRestore(t *testing.T, cDetails *compressionDetails) error { }, } destTablet.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", "STOP SLAVE", "RESET SLAVE ALL", "FAKE SET SLAVE POSITION", "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -294,7 +290,6 @@ func testBackupRestore(t *testing.T, cDetails *compressionDetails) error { "STOP SLAVE", "RESET SLAVE ALL", "FAKE SET SLAVE POSITION", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -422,9 +417,8 @@ func TestBackupRestoreLagged(t *testing.T) { } sourceTablet.FakeMysqlDaemon.SetReplicationSourceInputs = []string{fmt.Sprintf("%s:%d", primary.Tablet.MysqlHostname, primary.Tablet.MysqlPort)} sourceTablet.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", // This first set of STOP and START commands come from @@ -435,7 +429,6 @@ func TestBackupRestoreLagged(t *testing.T) { // These commands come from SetReplicationSource RPC called // to set the correct primary and semi-sync after Backup has concluded "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -493,16 +486,14 @@ func TestBackupRestoreLagged(t *testing.T) { }, } destTablet.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", "STOP SLAVE", "RESET SLAVE ALL", "FAKE SET SLAVE POSITION", "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -648,9 +639,8 @@ func TestRestoreUnreachablePrimary(t *testing.T) { } sourceTablet.FakeMysqlDaemon.SetReplicationSourceInputs = []string{fmt.Sprintf("%s:%d", primary.Tablet.MysqlHostname, primary.Tablet.MysqlPort)} sourceTablet.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", // This first set of STOP and START commands come from @@ -661,7 +651,6 @@ func TestRestoreUnreachablePrimary(t *testing.T) { // These commands come from SetReplicationSource RPC called // to set the correct primary and semi-sync after Backup has concluded "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -691,16 +680,14 @@ func TestRestoreUnreachablePrimary(t *testing.T) { }, } destTablet.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", "STOP SLAVE", "RESET SLAVE ALL", "FAKE SET SLAVE POSITION", "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } diff --git a/go/vt/wrangler/testlib/copy_schema_shard_test.go b/go/vt/wrangler/testlib/copy_schema_shard_test.go index 2c59fbb16cf..0f62db641b6 100644 --- a/go/vt/wrangler/testlib/copy_schema_shard_test.go +++ b/go/vt/wrangler/testlib/copy_schema_shard_test.go @@ -73,9 +73,8 @@ func copySchema(t *testing.T, useShardAsSource bool) { sourceRdonly := NewFakeTablet(t, wr, "cell1", 1, topodatapb.TabletType_RDONLY, sourceRdonlyDb, TabletKeyspaceShard(t, "ks", "-80")) sourceRdonly.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } diff --git a/go/vt/wrangler/testlib/emergency_reparent_shard_test.go b/go/vt/wrangler/testlib/emergency_reparent_shard_test.go index 466d1782f4b..8d80fa7d68f 100644 --- a/go/vt/wrangler/testlib/emergency_reparent_shard_test.go +++ b/go/vt/wrangler/testlib/emergency_reparent_shard_test.go @@ -137,14 +137,12 @@ func TestEmergencyReparentShard(t *testing.T) { goodReplica1.FakeMysqlDaemon.WaitPrimaryPositions = append(goodReplica1.FakeMysqlDaemon.WaitPrimaryPositions, goodReplica1.FakeMysqlDaemon.CurrentSourceFilePosition) goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet), topoproto.MysqlAddr(oldPrimary.Tablet)) goodReplica1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", "STOP SLAVE IO_THREAD", "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -170,12 +168,10 @@ func TestEmergencyReparentShard(t *testing.T) { goodReplica2.FakeMysqlDaemon.WaitPrimaryPositions = append(goodReplica2.FakeMysqlDaemon.WaitPrimaryPositions, goodReplica2.FakeMysqlDaemon.CurrentSourceFilePosition) goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet), topoproto.MysqlAddr(oldPrimary.Tablet)) goodReplica2.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", } goodReplica2.StartActionLoop(t, wr) @@ -237,7 +233,6 @@ func TestEmergencyReparentShardPrimaryElectNotBest(t *testing.T) { newPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ "STOP SLAVE IO_THREAD", "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", "CREATE DATABASE IF NOT EXISTS _vt", @@ -273,14 +268,12 @@ func TestEmergencyReparentShardPrimaryElectNotBest(t *testing.T) { moreAdvancedReplica.FakeMysqlDaemon.WaitPrimaryPositions = append(moreAdvancedReplica.FakeMysqlDaemon.WaitPrimaryPositions, moreAdvancedReplica.FakeMysqlDaemon.CurrentSourceFilePosition) newPrimary.FakeMysqlDaemon.WaitPrimaryPositions = append(newPrimary.FakeMysqlDaemon.WaitPrimaryPositions, moreAdvancedReplica.FakeMysqlDaemon.CurrentPrimaryPosition) moreAdvancedReplica.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", "STOP SLAVE IO_THREAD", "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } diff --git a/go/vt/wrangler/testlib/external_reparent_test.go b/go/vt/wrangler/testlib/external_reparent_test.go index 2acc250faa0..00fd10f703f 100644 --- a/go/vt/wrangler/testlib/external_reparent_test.go +++ b/go/vt/wrangler/testlib/external_reparent_test.go @@ -90,7 +90,6 @@ func TestTabletExternallyReparentedBasic(t *testing.T) { oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs = append(oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet)) oldPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - "RESET SLAVE ALL", "FAKE SET MASTER", "START Replica", } @@ -170,7 +169,6 @@ func TestTabletExternallyReparentedToReplica(t *testing.T) { // primary is still good to go. oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs = append(oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet)) oldPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - "RESET SLAVE ALL", "FAKE SET MASTER", "START Replica", } @@ -249,7 +247,6 @@ func TestTabletExternallyReparentedWithDifferentMysqlPort(t *testing.T) { oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs = append(oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet)) oldPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - "RESET SLAVE ALL", "FAKE SET MASTER", "START Replica", } @@ -262,9 +259,8 @@ func TestTabletExternallyReparentedWithDifferentMysqlPort(t *testing.T) { // TabletActionReplicaWasRestarted and point to the new mysql port goodReplica.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(oldPrimary.Tablet)) goodReplica.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -339,7 +335,6 @@ func TestTabletExternallyReparentedContinueOnUnexpectedPrimary(t *testing.T) { oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs = append(oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet)) oldPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - "RESET SLAVE ALL", "FAKE SET MASTER", "START Replica", } @@ -352,9 +347,8 @@ func TestTabletExternallyReparentedContinueOnUnexpectedPrimary(t *testing.T) { // TabletActionReplicaWasRestarted and point to a bad host goodReplica.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(oldPrimary.Tablet)) goodReplica.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -425,7 +419,6 @@ func TestTabletExternallyReparentedRerun(t *testing.T) { oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs = append(oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet)) oldPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - "RESET SLAVE ALL", "FAKE SET MASTER", "START Replica", } @@ -438,9 +431,8 @@ func TestTabletExternallyReparentedRerun(t *testing.T) { // On the good replica, we will respond to // TabletActionReplicaWasRestarted. goodReplica.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } diff --git a/go/vt/wrangler/testlib/permissions_test.go b/go/vt/wrangler/testlib/permissions_test.go index fec85ca3124..9d2a950c8e3 100644 --- a/go/vt/wrangler/testlib/permissions_test.go +++ b/go/vt/wrangler/testlib/permissions_test.go @@ -563,9 +563,8 @@ func TestPermissions(t *testing.T) { } replica.FakeMysqlDaemon.SetReplicationSourceInputs = append(replica.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(primary.Tablet)) replica.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } diff --git a/go/vt/wrangler/testlib/planned_reparent_shard_test.go b/go/vt/wrangler/testlib/planned_reparent_shard_test.go index 7fb174652c1..cc761b9f130 100644 --- a/go/vt/wrangler/testlib/planned_reparent_shard_test.go +++ b/go/vt/wrangler/testlib/planned_reparent_shard_test.go @@ -79,7 +79,6 @@ func TestPlannedReparentShardNoPrimaryProvided(t *testing.T) { } newPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", "CREATE DATABASE IF NOT EXISTS _vt", @@ -97,11 +96,9 @@ func TestPlannedReparentShardNoPrimaryProvided(t *testing.T) { oldPrimary.FakeMysqlDaemon.CurrentPrimaryPosition = newPrimary.FakeMysqlDaemon.WaitPrimaryPositions[0] oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs = append(oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet)) oldPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", // we end up calling SetReplicationSource twice on the old primary - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -117,13 +114,11 @@ func TestPlannedReparentShardNoPrimaryProvided(t *testing.T) { goodReplica1.FakeMysqlDaemon.Replicating = true goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet), topoproto.MysqlAddr(oldPrimary.Tablet)) goodReplica1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -199,7 +194,6 @@ func TestPlannedReparentShardNoError(t *testing.T) { } newPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", "CREATE DATABASE IF NOT EXISTS _vt", @@ -217,11 +211,9 @@ func TestPlannedReparentShardNoError(t *testing.T) { oldPrimary.FakeMysqlDaemon.CurrentPrimaryPosition = newPrimary.FakeMysqlDaemon.WaitPrimaryPositions[0] oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs = append(oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet)) oldPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", // we end up calling SetReplicationSource twice on the old primary - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -237,13 +229,11 @@ func TestPlannedReparentShardNoError(t *testing.T) { goodReplica1.FakeMysqlDaemon.Replicating = true goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet), topoproto.MysqlAddr(oldPrimary.Tablet)) goodReplica1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -254,12 +244,10 @@ func TestPlannedReparentShardNoError(t *testing.T) { goodReplica2.FakeMysqlDaemon.ReadOnly = true goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet), topoproto.MysqlAddr(oldPrimary.Tablet)) goodReplica2.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", } goodReplica2.StartActionLoop(t, wr) @@ -346,7 +334,6 @@ func TestPlannedReparentInitialization(t *testing.T) { goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet)) goodReplica1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -359,7 +346,6 @@ func TestPlannedReparentInitialization(t *testing.T) { goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet)) goodReplica2.StartActionLoop(t, wr) goodReplica2.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - "RESET SLAVE ALL", "FAKE SET MASTER", } defer goodReplica2.StopActionLoop(t) @@ -430,7 +416,6 @@ func TestPlannedReparentShardWaitForPositionFail(t *testing.T) { } newPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", "CREATE DATABASE IF NOT EXISTS _vt", @@ -448,7 +433,6 @@ func TestPlannedReparentShardWaitForPositionFail(t *testing.T) { oldPrimary.FakeMysqlDaemon.CurrentPrimaryPosition = newPrimary.FakeMysqlDaemon.PromoteResult oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs = append(oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet)) oldPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -463,13 +447,11 @@ func TestPlannedReparentShardWaitForPositionFail(t *testing.T) { goodReplica1.FakeMysqlDaemon.Replicating = true goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet), topoproto.MysqlAddr(oldPrimary.Tablet)) goodReplica1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -480,12 +462,10 @@ func TestPlannedReparentShardWaitForPositionFail(t *testing.T) { goodReplica2.FakeMysqlDaemon.ReadOnly = true goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet), topoproto.MysqlAddr(oldPrimary.Tablet)) goodReplica2.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", } goodReplica2.StartActionLoop(t, wr) @@ -546,7 +526,6 @@ func TestPlannedReparentShardWaitForPositionTimeout(t *testing.T) { } newPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", "CREATE DATABASE IF NOT EXISTS _vt", @@ -563,7 +542,6 @@ func TestPlannedReparentShardWaitForPositionTimeout(t *testing.T) { oldPrimary.FakeMysqlDaemon.CurrentPrimaryPosition = newPrimary.FakeMysqlDaemon.WaitPrimaryPositions[0] oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs = append(oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet)) oldPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -578,13 +556,11 @@ func TestPlannedReparentShardWaitForPositionTimeout(t *testing.T) { goodReplica1.FakeMysqlDaemon.Replicating = true goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet), topoproto.MysqlAddr(oldPrimary.Tablet)) goodReplica1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -595,12 +571,10 @@ func TestPlannedReparentShardWaitForPositionTimeout(t *testing.T) { goodReplica2.FakeMysqlDaemon.ReadOnly = true goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet), topoproto.MysqlAddr(oldPrimary.Tablet)) goodReplica2.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", } goodReplica2.StartActionLoop(t, wr) @@ -661,9 +635,8 @@ func TestPlannedReparentShardRelayLogError(t *testing.T) { goodReplica1.FakeMysqlDaemon.Replicating = true goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(primary.Tablet)) goodReplica1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", // simulate error that will trigger a call to RestartReplication @@ -747,9 +720,8 @@ func TestPlannedReparentShardRelayLogErrorStartReplication(t *testing.T) { goodReplica1.FakeMysqlDaemon.CurrentSourcePort = int(primary.Tablet.MysqlPort) goodReplica1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ // simulate error that will trigger a call to RestartReplication - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", // In SetReplicationSource, we find that the source host and port was already set correctly, @@ -828,7 +800,6 @@ func TestPlannedReparentShardPromoteReplicaFail(t *testing.T) { } newPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", "CREATE DATABASE IF NOT EXISTS _vt", @@ -846,7 +817,6 @@ func TestPlannedReparentShardPromoteReplicaFail(t *testing.T) { oldPrimary.FakeMysqlDaemon.CurrentPrimaryPosition = newPrimary.FakeMysqlDaemon.WaitPrimaryPositions[0] oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs = append(oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet)) oldPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -861,13 +831,11 @@ func TestPlannedReparentShardPromoteReplicaFail(t *testing.T) { goodReplica1.FakeMysqlDaemon.Replicating = true goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet), topoproto.MysqlAddr(oldPrimary.Tablet)) goodReplica1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -878,12 +846,10 @@ func TestPlannedReparentShardPromoteReplicaFail(t *testing.T) { goodReplica2.FakeMysqlDaemon.ReadOnly = true goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet), topoproto.MysqlAddr(oldPrimary.Tablet)) goodReplica2.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", } goodReplica2.StartActionLoop(t, wr) @@ -904,12 +870,10 @@ func TestPlannedReparentShardPromoteReplicaFail(t *testing.T) { newPrimary.FakeMysqlDaemon.PromoteError = nil newPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", // extra commands because of retry "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", "CREATE DATABASE IF NOT EXISTS _vt", @@ -918,11 +882,9 @@ func TestPlannedReparentShardPromoteReplicaFail(t *testing.T) { "SUBINSERT INTO _vt.reparent_journal (time_created_ns, action_name, primary_alias, replication_position) VALUES", } oldPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", // extra commands because of retry - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -984,13 +946,11 @@ func TestPlannedReparentShardSamePrimary(t *testing.T) { goodReplica1.FakeMysqlDaemon.Replicating = true goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(oldPrimary.Tablet)) goodReplica1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -1001,12 +961,10 @@ func TestPlannedReparentShardSamePrimary(t *testing.T) { goodReplica2.FakeMysqlDaemon.ReadOnly = true goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(oldPrimary.Tablet)) goodReplica2.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", } goodReplica2.StartActionLoop(t, wr) diff --git a/go/vt/wrangler/testlib/reparent_utils_test.go b/go/vt/wrangler/testlib/reparent_utils_test.go index 482705c1fe4..f0971a6886b 100644 --- a/go/vt/wrangler/testlib/reparent_utils_test.go +++ b/go/vt/wrangler/testlib/reparent_utils_test.go @@ -17,6 +17,8 @@ limitations under the License. package testlib import ( + "context" + "errors" "testing" "time" @@ -25,8 +27,6 @@ import ( "vitess.io/vitess/go/vt/discovery" "vitess.io/vitess/go/vt/vtctl/reparentutil" - "context" - "vitess.io/vitess/go/mysql" "vitess.io/vitess/go/vt/logutil" "vitess.io/vitess/go/vt/topo" @@ -91,9 +91,8 @@ func TestShardReplicationStatuses(t *testing.T) { replica.FakeMysqlDaemon.CurrentSourcePort = int(primary.Tablet.MysqlPort) replica.FakeMysqlDaemon.SetReplicationSourceInputs = append(replica.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(primary.Tablet)) replica.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -161,13 +160,11 @@ func TestReparentTablet(t *testing.T) { replica.FakeMysqlDaemon.IOThreadRunning = true replica.FakeMysqlDaemon.SetReplicationSourceInputs = append(replica.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(primary.Tablet)) replica.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -185,3 +182,63 @@ func TestReparentTablet(t *testing.T) { } checkSemiSyncEnabled(t, false, true, replica) } + +// TestSetReplicationSourceRelayLogError tests that SetReplicationSource works as intended when we receive a relay log error while starting replication. +func TestSetReplicationSourceRelayLogError(t *testing.T) { + ctx := context.Background() + ts := memorytopo.NewServer("cell1", "cell2") + wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient()) + + // create shard and tablets + if _, err := ts.GetOrCreateShard(ctx, "test_keyspace", "0"); err != nil { + t.Fatalf("CreateShard failed: %v", err) + } + primary := NewFakeTablet(t, wr, "cell1", 1, topodatapb.TabletType_PRIMARY, nil) + replica := NewFakeTablet(t, wr, "cell1", 2, topodatapb.TabletType_REPLICA, nil) + reparenttestutil.SetKeyspaceDurability(context.Background(), t, ts, "test_keyspace", "semi_sync") + + // mark the primary inside the shard + if _, err := ts.UpdateShardFields(ctx, "test_keyspace", "0", func(si *topo.ShardInfo) error { + si.PrimaryAlias = primary.Tablet.Alias + return nil + }); err != nil { + t.Fatalf("UpdateShardFields failed: %v", err) + } + + // primary action loop (to initialize host and port) + primary.StartActionLoop(t, wr) + defer primary.StopActionLoop(t) + + // replica loop + // We have to set the settings as replicating. Otherwise, + // the replication manager intervenes and tries to fix replication, + // which ends up making this test unpredictable. + replica.FakeMysqlDaemon.Replicating = true + replica.FakeMysqlDaemon.IOThreadRunning = true + replica.FakeMysqlDaemon.SetReplicationSourceInputs = append(replica.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(primary.Tablet)) + replica.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ + // These 3 statements come from tablet startup + "STOP SLAVE", + "FAKE SET MASTER", + "START SLAVE", + // We stop and reset the replication parameters because of relay log issues. + "STOP SLAVE", + "RESET SLAVE", + "START SLAVE", + } + replica.StartActionLoop(t, wr) + defer replica.StopActionLoop(t) + + // Set the correct error message that indicates we have received a relay log error. + replica.FakeMysqlDaemon.SetReplicationSourceError = errors.New("ERROR 1201 (HY000): Could not initialize master info structure; more error messages can be found in the MySQL error log") + // run ReparentTablet + if err := wr.SetReplicationSource(ctx, replica.Tablet); err != nil { + t.Fatalf("SetReplicationSource failed: %v", err) + } + + // check what was run + if err := replica.FakeMysqlDaemon.CheckSuperQueryList(); err != nil { + t.Fatalf("replica.FakeMysqlDaemon.CheckSuperQueryList failed: %v", err) + } + checkSemiSyncEnabled(t, false, true, replica) +} diff --git a/go/vt/wrangler/testlib/version_test.go b/go/vt/wrangler/testlib/version_test.go index 18b29d61ac2..c54a0811948 100644 --- a/go/vt/wrangler/testlib/version_test.go +++ b/go/vt/wrangler/testlib/version_test.go @@ -93,9 +93,8 @@ func TestVersion(t *testing.T) { sourceReplicaGitRev := "fake git rev" sourceReplica.FakeMysqlDaemon.SetReplicationSourceInputs = append(sourceReplica.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(sourcePrimary.Tablet)) sourceReplica.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", }