From 3082f5db55d7b9078025a184f8ac03344d9c757e Mon Sep 17 00:00:00 2001 From: Manan Gupta Date: Sun, 28 Aug 2022 15:44:23 +0530 Subject: [PATCH] refactor: refactor vtorc tests to run as a single test with sub-tests Signed-off-by: Manan Gupta --- go/test/endtoend/vtorc/general/main_test.go | 4 +- go/test/endtoend/vtorc/general/vtorc_test.go | 228 +++++++------------ 2 files changed, 79 insertions(+), 153 deletions(-) diff --git a/go/test/endtoend/vtorc/general/main_test.go b/go/test/endtoend/vtorc/general/main_test.go index 018e6da21fa..c52502d7c9b 100644 --- a/go/test/endtoend/vtorc/general/main_test.go +++ b/go/test/endtoend/vtorc/general/main_test.go @@ -32,8 +32,8 @@ func TestMain(m *testing.M) { var cellInfos []*utils.CellInfo cellInfos = append(cellInfos, &utils.CellInfo{ CellName: utils.Cell1, - NumReplicas: 6, - NumRdonly: 2, + NumReplicas: 4, + NumRdonly: 1, UIDBase: 100, }) diff --git a/go/test/endtoend/vtorc/general/vtorc_test.go b/go/test/endtoend/vtorc/general/vtorc_test.go index c829e1d4f57..1446e192274 100644 --- a/go/test/endtoend/vtorc/general/vtorc_test.go +++ b/go/test/endtoend/vtorc/general/vtorc_test.go @@ -90,32 +90,15 @@ func TestKeyspaceShard(t *testing.T) { utils.CheckReplication(t, clusterInfo, shard0.Vttablets[0], shard0.Vttablets[1:], 10*time.Second) } -// 3. make primary readonly, let orc repair -func TestPrimaryReadOnly(t *testing.T) { - defer cluster.PanicHandler(t) - utils.SetupVttabletsAndVtorc(t, clusterInfo, 2, 0, nil, cluster.VtorcConfiguration{ - PreventCrossDataCenterPrimaryFailover: true, - }, 1, "") - keyspace := &clusterInfo.ClusterInstance.Keyspaces[0] - shard0 := &keyspace.Shards[0] - - // find primary from topo - curPrimary := utils.ShardPrimaryTablet(t, clusterInfo, keyspace, shard0) - assert.NotNil(t, curPrimary, "should have elected a primary") - - // Make the current primary database read-only. - _, err := utils.RunSQL(t, "set global read_only=ON", curPrimary, "") - require.NoError(t, err) - - // wait for repair - match := utils.WaitForReadOnlyValue(t, curPrimary, 0) - require.True(t, match) -} - -// 4. make replica ReadWrite, let orc repair -func TestReplicaReadWrite(t *testing.T) { +// Cases to test: +// 1. make primary readonly, let vtorc repair +// 2. make replica ReadWrite, let vtorc repair +// 3. stop replication, let vtorc repair +// 4. setup replication from non-primary, let vtorc repair +// 5. make instance A replicates from B and B from A, wait for repair +func TestVTOrcRepairs(t *testing.T) { defer cluster.PanicHandler(t) - utils.SetupVttabletsAndVtorc(t, clusterInfo, 2, 0, nil, cluster.VtorcConfiguration{ + utils.SetupVttabletsAndVtorc(t, clusterInfo, 3, 0, nil, cluster.VtorcConfiguration{ PreventCrossDataCenterPrimaryFailover: true, }, 1, "") keyspace := &clusterInfo.ClusterInstance.Keyspaces[0] @@ -125,112 +108,97 @@ func TestReplicaReadWrite(t *testing.T) { curPrimary := utils.ShardPrimaryTablet(t, clusterInfo, keyspace, shard0) assert.NotNil(t, curPrimary, "should have elected a primary") - var replica *cluster.Vttablet + var replica, otherReplica *cluster.Vttablet for _, tablet := range shard0.Vttablets { // we know we have only two tablets, so the "other" one must be the new primary if tablet.Alias != curPrimary.Alias { - replica = tablet - break + if replica == nil { + replica = tablet + } else { + otherReplica = tablet + } } } - // Make the replica database read-write. - _, err := utils.RunSQL(t, "set global read_only=OFF", replica, "") - require.NoError(t, err) + require.NotNil(t, replica, "should be able to find a replica") + require.NotNil(t, otherReplica, "should be able to find 2nd replica") - // wait for repair - match := utils.WaitForReadOnlyValue(t, replica, 1) - require.True(t, match) -} + // check replication is setup correctly + utils.CheckReplication(t, clusterInfo, curPrimary, []*cluster.Vttablet{replica, otherReplica}, 15*time.Second) -// 5. stop replication, let orc repair -func TestStopReplication(t *testing.T) { - defer cluster.PanicHandler(t) - utils.SetupVttabletsAndVtorc(t, clusterInfo, 2, 0, nil, cluster.VtorcConfiguration{ - PreventCrossDataCenterPrimaryFailover: true, - }, 1, "") - keyspace := &clusterInfo.ClusterInstance.Keyspaces[0] - shard0 := &keyspace.Shards[0] + t.Run("PrimaryReadOnly", func(t *testing.T) { + // Make the current primary database read-only. + _, err := utils.RunSQL(t, "set global read_only=ON", curPrimary, "") + require.NoError(t, err) - // find primary from topo - curPrimary := utils.ShardPrimaryTablet(t, clusterInfo, keyspace, shard0) - assert.NotNil(t, curPrimary, "should have elected a primary") + // wait for repair + match := utils.WaitForReadOnlyValue(t, curPrimary, 0) + require.True(t, match) + }) - var replica *cluster.Vttablet - for _, tablet := range shard0.Vttablets { - // we know we have only two tablets, so the "other" one must be the new primary - if tablet.Alias != curPrimary.Alias { - replica = tablet - break - } - } - require.NotNil(t, replica, "should be able to find a replica") - // use vtctlclient to stop replication - _, err := clusterInfo.ClusterInstance.VtctlclientProcess.ExecuteCommandWithOutput("StopReplication", replica.Alias) - require.NoError(t, err) + t.Run("ReplicaReadWrite", func(t *testing.T) { + // Make the replica database read-write. + _, err := utils.RunSQL(t, "set global read_only=OFF", replica, "") + require.NoError(t, err) - // check replication is setup correctly - utils.CheckReplication(t, clusterInfo, curPrimary, []*cluster.Vttablet{replica}, 15*time.Second) + // wait for repair + match := utils.WaitForReadOnlyValue(t, replica, 1) + require.True(t, match) + }) - // Stop just the IO thread on the replica - _, err = utils.RunSQL(t, "STOP SLAVE IO_THREAD", replica, "") - require.NoError(t, err) + t.Run("StopReplication", func(t *testing.T) { + // use vtctlclient to stop replication + _, err := clusterInfo.ClusterInstance.VtctlclientProcess.ExecuteCommandWithOutput("StopReplication", replica.Alias) + require.NoError(t, err) - // check replication is setup correctly - utils.CheckReplication(t, clusterInfo, curPrimary, []*cluster.Vttablet{replica}, 15*time.Second) + // check replication is setup correctly + utils.CheckReplication(t, clusterInfo, curPrimary, []*cluster.Vttablet{replica, otherReplica}, 15*time.Second) - // Stop just the SQL thread on the replica - _, err = utils.RunSQL(t, "STOP SLAVE SQL_THREAD", replica, "") - require.NoError(t, err) + // Stop just the IO thread on the replica + _, err = utils.RunSQL(t, "STOP SLAVE IO_THREAD", replica, "") + require.NoError(t, err) - // check replication is setup correctly - utils.CheckReplication(t, clusterInfo, curPrimary, []*cluster.Vttablet{replica}, 15*time.Second) -} + // check replication is setup correctly + utils.CheckReplication(t, clusterInfo, curPrimary, []*cluster.Vttablet{replica, otherReplica}, 15*time.Second) -// 6. setup replication from non-primary, let orc repair -func TestReplicationFromOtherReplica(t *testing.T) { - defer cluster.PanicHandler(t) - utils.SetupVttabletsAndVtorc(t, clusterInfo, 3, 0, nil, cluster.VtorcConfiguration{ - PreventCrossDataCenterPrimaryFailover: true, - }, 1, "") - keyspace := &clusterInfo.ClusterInstance.Keyspaces[0] - shard0 := &keyspace.Shards[0] + // Stop just the SQL thread on the replica + _, err = utils.RunSQL(t, "STOP SLAVE SQL_THREAD", replica, "") + require.NoError(t, err) - // find primary from topo - curPrimary := utils.ShardPrimaryTablet(t, clusterInfo, keyspace, shard0) - assert.NotNil(t, curPrimary, "should have elected a primary") + // check replication is setup correctly + utils.CheckReplication(t, clusterInfo, curPrimary, []*cluster.Vttablet{replica, otherReplica}, 15*time.Second) + }) - // TODO(deepthi): we should not need to do this, the DB should be created automatically - _, err := curPrimary.VttabletProcess.QueryTablet(fmt.Sprintf("create database IF NOT EXISTS vt_%s", keyspace.Name), keyspace.Name, false) - require.NoError(t, err) + t.Run("ReplicationFromOtherReplica", func(t *testing.T) { + // point replica at otherReplica + changeReplicationSourceCommand := fmt.Sprintf("STOP SLAVE; RESET SLAVE ALL;"+ + "CHANGE MASTER TO MASTER_HOST='%s', MASTER_PORT=%d, MASTER_USER='vt_repl', MASTER_AUTO_POSITION = 1; START SLAVE", utils.Hostname, otherReplica.MySQLPort) + _, err := utils.RunSQL(t, changeReplicationSourceCommand, replica, "") + require.NoError(t, err) - var replica, otherReplica *cluster.Vttablet - for _, tablet := range shard0.Vttablets { - // we know we have only two tablets, so the "other" one must be the new primary - if tablet.Alias != curPrimary.Alias { - if replica == nil { - replica = tablet - } else { - otherReplica = tablet - } - } - } - require.NotNil(t, replica, "should be able to find a replica") - require.NotNil(t, otherReplica, "should be able to find 2nd replica") + // wait until the source port is set back correctly by vtorc + utils.CheckSourcePort(t, replica, curPrimary, 15*time.Second) - // check replication is setup correctly - utils.CheckReplication(t, clusterInfo, curPrimary, []*cluster.Vttablet{replica, otherReplica}, 15*time.Second) + // check that writes succeed + utils.VerifyWritesSucceed(t, clusterInfo, curPrimary, []*cluster.Vttablet{replica, otherReplica}, 15*time.Second) + }) - // point replica at otherReplica - changeReplicationSourceCommand := fmt.Sprintf("STOP SLAVE; RESET SLAVE ALL;"+ - "CHANGE MASTER TO MASTER_HOST='%s', MASTER_PORT=%d, MASTER_USER='vt_repl', MASTER_AUTO_POSITION = 1; START SLAVE", utils.Hostname, otherReplica.MySQLPort) - _, err = utils.RunSQL(t, changeReplicationSourceCommand, replica, "") - require.NoError(t, err) + t.Run("CircularReplication", func(t *testing.T) { + // change the replication source on the primary + changeReplicationSourceCommands := fmt.Sprintf("STOP SLAVE; RESET SLAVE ALL;"+ + "CHANGE MASTER TO MASTER_HOST='%s', MASTER_PORT=%d, MASTER_USER='vt_repl', MASTER_AUTO_POSITION = 1;"+ + "START SLAVE;", replica.VttabletProcess.TabletHostname, replica.MySQLPort) + _, err := utils.RunSQL(t, changeReplicationSourceCommands, curPrimary, "") + require.NoError(t, err) - // wait until the source port is set back correctly by vtorc - utils.CheckSourcePort(t, replica, curPrimary, 15*time.Second) + // wait for curPrimary to reach stable state + time.Sleep(1 * time.Second) - // check that writes succeed - utils.VerifyWritesSucceed(t, clusterInfo, curPrimary, []*cluster.Vttablet{replica, otherReplica}, 15*time.Second) + // wait for repair + err = utils.WaitForReplicationToStop(t, curPrimary) + require.NoError(t, err) + // check that the writes still succeed + utils.VerifyWritesSucceed(t, clusterInfo, curPrimary, []*cluster.Vttablet{replica, otherReplica}, 10*time.Second) + }) } func TestRepairAfterTER(t *testing.T) { @@ -267,48 +235,6 @@ func TestRepairAfterTER(t *testing.T) { utils.CheckReplication(t, clusterInfo, newPrimary, []*cluster.Vttablet{curPrimary}, 15*time.Second) } -// 7. make instance A replicates from B and B from A, wait for repair -func TestCircularReplication(t *testing.T) { - defer cluster.PanicHandler(t) - utils.SetupVttabletsAndVtorc(t, clusterInfo, 2, 0, nil, cluster.VtorcConfiguration{ - PreventCrossDataCenterPrimaryFailover: true, - }, 1, "") - keyspace := &clusterInfo.ClusterInstance.Keyspaces[0] - shard0 := &keyspace.Shards[0] - - // find primary from topo - primary := utils.ShardPrimaryTablet(t, clusterInfo, keyspace, shard0) - assert.NotNil(t, primary, "should have elected a primary") - - var replica *cluster.Vttablet - for _, tablet := range shard0.Vttablets { - // we know we have only two tablets, so the "other" one must be the new primary - if tablet.Alias != primary.Alias { - replica = tablet - break - } - } - - // check replication is setup correctly - utils.CheckReplication(t, clusterInfo, primary, []*cluster.Vttablet{replica}, 15*time.Second) - - // change the replication source on the primary - changeReplicationSourceCommands := fmt.Sprintf("STOP SLAVE; RESET SLAVE ALL;"+ - "CHANGE MASTER TO MASTER_HOST='%s', MASTER_PORT=%d, MASTER_USER='vt_repl', MASTER_AUTO_POSITION = 1;"+ - "START SLAVE;", replica.VttabletProcess.TabletHostname, replica.MySQLPort) - _, err := utils.RunSQL(t, changeReplicationSourceCommands, primary, "") - require.NoError(t, err) - - // wait for primary to reach stable state - time.Sleep(1 * time.Second) - - // wait for repair - err = utils.WaitForReplicationToStop(t, primary) - require.NoError(t, err) - // check that the writes still succeed - utils.VerifyWritesSucceed(t, clusterInfo, primary, []*cluster.Vttablet{replica}, 10*time.Second) -} - // TestSemiSync tests that semi-sync is setup correctly by vtorc if it is incorrectly set func TestSemiSync(t *testing.T) { // stop any vtorc instance running due to a previous test.