diff --git a/config/mycnf/mariadb100.cnf b/config/mycnf/mariadb100.cnf index 4e202ea3183..3f840530566 100644 --- a/config/mycnf/mariadb100.cnf +++ b/config/mycnf/mariadb100.cnf @@ -4,9 +4,8 @@ # (when the primary goes away). Here we just load the plugin so it's # available if desired, but it's disabled at startup. # -# If the -enable_semi_sync flag is used, VTTablet will enable semi-sync -# at the proper time when replication is set up, or when a primary is -# promoted or demoted. +# VTTablet will enable semi-sync at the proper time when replication is set up, +# or when a primary is promoted or demoted based on the durability policy configured. plugin-load = rpl_semi_sync_master=semisync_master.so;rpl_semi_sync_slave=semisync_slave.so slave_net_timeout = 60 diff --git a/config/mycnf/mariadb101.cnf b/config/mycnf/mariadb101.cnf index 40a358d85f2..1c660bf6f61 100644 --- a/config/mycnf/mariadb101.cnf +++ b/config/mycnf/mariadb101.cnf @@ -4,9 +4,8 @@ # (when the primary goes away). Here we just load the plugin so it's # available if desired, but it's disabled at startup. # -# If the -enable_semi_sync flag is used, VTTablet will enable semi-sync -# at the proper time when replication is set up, or when a primary is -# promoted or demoted. +# VTTablet will enable semi-sync at the proper time when replication is set up, +# or when a primary is promoted or demoted based on the durability policy configured. plugin-load = rpl_semi_sync_master=semisync_master.so;rpl_semi_sync_slave=semisync_slave.so slave_net_timeout = 60 diff --git a/config/mycnf/mariadb102.cnf b/config/mycnf/mariadb102.cnf index efd165f18fb..ae1da3d9a71 100644 --- a/config/mycnf/mariadb102.cnf +++ b/config/mycnf/mariadb102.cnf @@ -4,9 +4,8 @@ # (when the primary goes away). Here we just load the plugin so it's # available if desired, but it's disabled at startup. # -# If the -enable_semi_sync flag is used, VTTablet will enable semi-sync -# at the proper time when replication is set up, or when a primary is -# promoted or demoted. +# VTTablet will enable semi-sync at the proper time when replication is set up, +# or when a primary is promoted or demoted based on the durability policy configured. plugin-load = rpl_semi_sync_master=semisync_master.so;rpl_semi_sync_slave=semisync_slave.so # enable strict mode so it's safe to compare sequence numbers across different server IDs. diff --git a/config/mycnf/mysql57.cnf b/config/mycnf/mysql57.cnf index 5808b27b12b..7a8c45a187c 100644 --- a/config/mycnf/mysql57.cnf +++ b/config/mycnf/mysql57.cnf @@ -21,9 +21,8 @@ collation_server = utf8_general_ci # (when the primary goes away). Here we just load the plugin so it's # available if desired, but it's disabled at startup. # -# If the -enable_semi_sync flag is used, VTTablet will enable semi-sync -# at the proper time when replication is set up, or when a primary is -# promoted or demoted. +# VTTablet will enable semi-sync at the proper time when replication is set up, +# or when a primary is promoted or demoted based on the durability policy configured. plugin-load = rpl_semi_sync_master=semisync_master.so;rpl_semi_sync_slave=semisync_slave.so # When semi-sync is enabled, don't allow fallback to async diff --git a/config/mycnf/mysql80.cnf b/config/mycnf/mysql80.cnf index 95867383c5e..f0fe3d9378b 100644 --- a/config/mycnf/mysql80.cnf +++ b/config/mycnf/mysql80.cnf @@ -18,9 +18,8 @@ default_authentication_plugin = mysql_native_password # (when the primary goes away). Here we just load the plugin so it's # available if desired, but it's disabled at startup. # -# If the -enable_semi_sync flag is used, VTTablet will enable semi-sync -# at the proper time when replication is set up, or when a primary is -# promoted or demoted. +# VTTablet will enable semi-sync at the proper time when replication is set up, +# or when a primary is promoted or demoted based on the durability policy configured. plugin-load = rpl_semi_sync_master=semisync_master.so;rpl_semi_sync_slave=semisync_slave.so # MySQL 8.0 will not load plugins during --initialize diff --git a/doc/releasenotes/15_0_0_summary.md b/doc/releasenotes/15_0_0_summary.md index 265b7d3d61b..e779a89b202 100644 --- a/doc/releasenotes/15_0_0_summary.md +++ b/doc/releasenotes/15_0_0_summary.md @@ -5,6 +5,9 @@ #### vttablet startup flag --enable-query-plan-field-caching This flag is now deprecated. It will be removed in v16. +#### vttablet startup flag deprecations +- --enable_semi_sync is now deprecated. It will be removed in v16. Instead, set the correct durability policy using `SetKeyspaceDurabilityPolicy` + ### New Syntax ### VDiff2 @@ -94,3 +97,10 @@ $ curl -s http://127.0.0.1:15100/debug/vars | jq . | grep Throttler Added new parameter `multi_shard_autocommit` to lookup vindex definition in vschema, if enabled will send lookup vindex dml query as autocommit to all shards This is slighly different from `autocommit` parameter where the query is sent in its own transaction separate from the ongoing transaction if any i.e. begin -> lookup query execs -> commit/rollback + +### Durability Policy + +#### Cross Cell + +A new durabilty policy `cross_cell` is now supported. `cross_cell` durability policy only allows replica tablets from a different cell than the current primary to +send semi sync ACKs. This ensures that any committed write exists in atleast 2 tablets belonging to different cells. \ No newline at end of file diff --git a/examples/compose/external_db/mysql/mysql56.cnf b/examples/compose/external_db/mysql/mysql56.cnf index 7454231c33d..fdd34b1bd2e 100644 --- a/examples/compose/external_db/mysql/mysql56.cnf +++ b/examples/compose/external_db/mysql/mysql56.cnf @@ -19,9 +19,8 @@ innodb_use_native_aio = 0 # (when the master goes away). Here we just load the plugin so it's # available if desired, but it's disabled at startup. # -# If the -enable_semi_sync flag is used, VTTablet will enable semi-sync -# at the proper time when replication is set up, or when masters are -# promoted or demoted. +# VTTablet will enable semi-sync at the proper time when replication is set up, +# or when a primary is promoted or demoted based on the durability policy configured. plugin-load = rpl_semi_sync_master=semisync_master.so;rpl_semi_sync_slave=semisync_slave.so # When semi-sync is enabled, don't allow fallback to async diff --git a/examples/compose/external_db/mysql/mysql57.cnf b/examples/compose/external_db/mysql/mysql57.cnf index 08935674b37..ebf301187eb 100644 --- a/examples/compose/external_db/mysql/mysql57.cnf +++ b/examples/compose/external_db/mysql/mysql57.cnf @@ -21,9 +21,8 @@ collation_server = utf8_general_ci # (when the master goes away). Here we just load the plugin so it's # available if desired, but it's disabled at startup. # -# If the -enable_semi_sync flag is used, VTTablet will enable semi-sync -# at the proper time when replication is set up, or when masters are -# promoted or demoted. +# VTTablet will enable semi-sync at the proper time when replication is set up, +# or when a primary is promoted or demoted based on the durability policy configured. plugin-load = rpl_semi_sync_master=semisync_master.so;rpl_semi_sync_slave=semisync_slave.so # When semi-sync is enabled, don't allow fallback to async diff --git a/examples/compose/vttablet-up.sh b/examples/compose/vttablet-up.sh index 72ac44fcd48..a4bf31a5c42 100755 --- a/examples/compose/vttablet-up.sh +++ b/examples/compose/vttablet-up.sh @@ -150,7 +150,6 @@ exec $VTROOT/bin/vttablet \ --tablet-path $alias \ --tablet_hostname "$vthost" \ --health_check_interval 5s \ - --enable_semi_sync=false \ --disable_active_reparents=true \ --port $web_port \ --grpc_port $grpc_port \ diff --git a/examples/local/scripts/vttablet-up.sh b/examples/local/scripts/vttablet-up.sh index d817ed82ab8..846f7788d8c 100755 --- a/examples/local/scripts/vttablet-up.sh +++ b/examples/local/scripts/vttablet-up.sh @@ -46,7 +46,6 @@ vttablet \ --init_shard $shard \ --init_tablet_type $tablet_type \ --health_check_interval 5s \ - --enable_semi_sync \ --enable_replication_reporter \ --backup_storage_implementation file \ --file_backup_storage_root $VTDATAROOT/backups \ diff --git a/examples/operator/vtorc_example.yaml b/examples/operator/vtorc_example.yaml index 29ea79bc8e5..60c8f2915ca 100644 --- a/examples/operator/vtorc_example.yaml +++ b/examples/operator/vtorc_example.yaml @@ -74,7 +74,6 @@ spec: extraFlags: db_charset: utf8mb4 disable_active_reparents: "true" - enable_semi_sync: "false" resources: requests: cpu: 100m diff --git a/go/flags/endtoend/vttablet.txt b/go/flags/endtoend/vttablet.txt index 03828c0d49b..336fc426e90 100644 --- a/go/flags/endtoend/vttablet.txt +++ b/go/flags/endtoend/vttablet.txt @@ -396,7 +396,7 @@ Usage of vttablet: --enable_replication_reporter Use polling to track replication lag. --enable_semi_sync - Enable semi-sync when configuring replication, on primary and replica tablets only (rdonly tablets will not ack). + DEPRECATED - Set the correct durability policy on the keyspace instead. --enable_transaction_limit If true, limit on number of transactions open at the same time will be enforced for all users. User trying to open a new transaction after exhausting their limit will receive an error immediately, regardless of whether there are available slots or not. --enable_transaction_limit_dry_run diff --git a/go/test/endtoend/backup/vtctlbackup/backup_utils.go b/go/test/endtoend/backup/vtctlbackup/backup_utils.go index f4e2e92a9ac..d1ce45abc3a 100644 --- a/go/test/endtoend/backup/vtctlbackup/backup_utils.go +++ b/go/test/endtoend/backup/vtctlbackup/backup_utils.go @@ -147,7 +147,6 @@ func LaunchCluster(setupType int, streamMode string, stripes int) (int, error) { tablet.VttabletProcess.DbPassword = dbPassword tablet.VttabletProcess.ExtraArgs = commonTabletArg tablet.VttabletProcess.SupportsBackup = true - tablet.VttabletProcess.EnableSemiSync = true if setupType == Mysqlctld { tablet.MysqlctldProcess = *cluster.MysqlCtldProcessInstance(tablet.TabletUID, tablet.MySQLPort, localCluster.TmpDirectory) diff --git a/go/test/endtoend/recovery/pitr/shardedpitr_test.go b/go/test/endtoend/recovery/pitr/shardedpitr_test.go index 9d4c0691b6d..a8aa20af66d 100644 --- a/go/test/endtoend/recovery/pitr/shardedpitr_test.go +++ b/go/test/endtoend/recovery/pitr/shardedpitr_test.go @@ -405,7 +405,7 @@ func initializeCluster(t *testing.T) { shard1.Vttablets = []*cluster.Vttablet{shard1Primary, shard1Replica} clusterInstance.VtTabletExtraArgs = append(clusterInstance.VtTabletExtraArgs, commonTabletArg...) - clusterInstance.VtTabletExtraArgs = append(clusterInstance.VtTabletExtraArgs, "--restore_from_backup", "--enable_semi_sync") + clusterInstance.VtTabletExtraArgs = append(clusterInstance.VtTabletExtraArgs, "--restore_from_backup") err = clusterInstance.SetupCluster(keyspace, []cluster.Shard{*shard, *shard0, *shard1}) require.NoError(t, err) @@ -516,7 +516,6 @@ func launchRecoveryTablet(t *testing.T, tablet *cluster.Vttablet, binlogServer * tablet.Alias = tablet.VttabletProcess.TabletPath tablet.VttabletProcess.SupportsBackup = true tablet.VttabletProcess.Keyspace = restoreKeyspaceName - tablet.VttabletProcess.EnableSemiSync = true tablet.VttabletProcess.ExtraArgs = []string{ "--disable_active_reparents", "--enable_replication_reporter=false", diff --git a/go/test/endtoend/recovery/pitrtls/shardedpitr_tls_test.go b/go/test/endtoend/recovery/pitrtls/shardedpitr_tls_test.go index eed826aa0a6..f323333ce1e 100644 --- a/go/test/endtoend/recovery/pitrtls/shardedpitr_tls_test.go +++ b/go/test/endtoend/recovery/pitrtls/shardedpitr_tls_test.go @@ -142,7 +142,7 @@ func initializeCluster(t *testing.T) { shard1.Vttablets = []*cluster.Vttablet{shard1Primary, shard1Replica} clusterInstance.VtTabletExtraArgs = append(clusterInstance.VtTabletExtraArgs, commonTabletArg...) - clusterInstance.VtTabletExtraArgs = append(clusterInstance.VtTabletExtraArgs, "--restore_from_backup", "--enable_semi_sync") + clusterInstance.VtTabletExtraArgs = append(clusterInstance.VtTabletExtraArgs, "--restore_from_backup") err = clusterInstance.SetupCluster(keyspace, []cluster.Shard{*shard, *shard0, *shard1}) require.NoError(t, err) @@ -495,7 +495,6 @@ func tlsLaunchRecoveryTablet(t *testing.T, tablet *cluster.Vttablet, tabletForBi tablet.Alias = tablet.VttabletProcess.TabletPath tablet.VttabletProcess.SupportsBackup = true tablet.VttabletProcess.Keyspace = restoreKeyspaceName - tablet.VttabletProcess.EnableSemiSync = true certDir := path.Join(os.Getenv("VTDATAROOT"), fmt.Sprintf("/ssl_%010d", tablet.MysqlctlProcess.TabletUID)) tablet.VttabletProcess.ExtraArgs = []string{ diff --git a/go/test/endtoend/recovery/unshardedrecovery/recovery.go b/go/test/endtoend/recovery/unshardedrecovery/recovery.go index 0f85ad5131b..d628c8c5037 100644 --- a/go/test/endtoend/recovery/unshardedrecovery/recovery.go +++ b/go/test/endtoend/recovery/unshardedrecovery/recovery.go @@ -125,7 +125,6 @@ SET GLOBAL old_alter_table = ON; tablet.VttabletProcess.ExtraArgs = append(tablet.VttabletProcess.ExtraArgs, recovery.XbArgs...) } tablet.VttabletProcess.SupportsBackup = true - tablet.VttabletProcess.EnableSemiSync = true tablet.MysqlctlProcess = *cluster.MysqlCtlProcessInstance(tablet.TabletUID, tablet.MySQLPort, localCluster.TmpDirectory) tablet.MysqlctlProcess.InitDBFile = newInitDBFile diff --git a/go/test/endtoend/reparent/emergencyreparent/ers_test.go b/go/test/endtoend/reparent/emergencyreparent/ers_test.go index c7f3514a677..3497e7d8298 100644 --- a/go/test/endtoend/reparent/emergencyreparent/ers_test.go +++ b/go/test/endtoend/reparent/emergencyreparent/ers_test.go @@ -32,7 +32,7 @@ import ( func TestTrivialERS(t *testing.T) { defer cluster.PanicHandler(t) - clusterInstance := utils.SetupReparentClusterLegacy(t, true) + clusterInstance := utils.SetupReparentCluster(t, "semi_sync") defer utils.TeardownCluster(clusterInstance) tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets @@ -57,7 +57,7 @@ func TestTrivialERS(t *testing.T) { func TestReparentIgnoreReplicas(t *testing.T) { defer cluster.PanicHandler(t) - clusterInstance := utils.SetupReparentClusterLegacy(t, true) + clusterInstance := utils.SetupReparentCluster(t, "semi_sync") defer utils.TeardownCluster(clusterInstance) tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets var err error @@ -99,7 +99,7 @@ func TestReparentIgnoreReplicas(t *testing.T) { func TestReparentDownPrimary(t *testing.T) { defer cluster.PanicHandler(t) - clusterInstance := utils.SetupReparentClusterLegacy(t, true) + clusterInstance := utils.SetupReparentCluster(t, "semi_sync") defer utils.TeardownCluster(clusterInstance) tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets @@ -135,7 +135,7 @@ func TestReparentDownPrimary(t *testing.T) { func TestReparentNoChoiceDownPrimary(t *testing.T) { defer cluster.PanicHandler(t) - clusterInstance := utils.SetupReparentClusterLegacy(t, true) + clusterInstance := utils.SetupReparentCluster(t, "semi_sync") defer utils.TeardownCluster(clusterInstance) tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets var err error @@ -171,7 +171,7 @@ func TestReparentNoChoiceDownPrimary(t *testing.T) { func TestSemiSyncSetupCorrectly(t *testing.T) { t.Run("semi-sync enabled", func(t *testing.T) { defer cluster.PanicHandler(t) - clusterInstance := utils.SetupReparentClusterLegacy(t, true) + clusterInstance := utils.SetupReparentCluster(t, "semi_sync") defer utils.TeardownCluster(clusterInstance) tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets @@ -199,7 +199,7 @@ func TestSemiSyncSetupCorrectly(t *testing.T) { t.Run("semi-sync disabled", func(t *testing.T) { defer cluster.PanicHandler(t) - clusterInstance := utils.SetupReparentClusterLegacy(t, false) + clusterInstance := utils.SetupReparentCluster(t, "none") defer utils.TeardownCluster(clusterInstance) tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets @@ -229,7 +229,7 @@ func TestSemiSyncSetupCorrectly(t *testing.T) { // TestERSPromoteRdonly tests that we never end up promoting a rdonly instance as the primary func TestERSPromoteRdonly(t *testing.T) { defer cluster.PanicHandler(t) - clusterInstance := utils.SetupReparentCluster(t, true) + clusterInstance := utils.SetupReparentCluster(t, "semi_sync") defer utils.TeardownCluster(clusterInstance) tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets var err error @@ -257,7 +257,7 @@ func TestERSPromoteRdonly(t *testing.T) { // TestERSPreventCrossCellPromotion tests that we promote a replica in the same cell as the previous primary if prevent cross cell promotion flag is set func TestERSPreventCrossCellPromotion(t *testing.T) { defer cluster.PanicHandler(t) - clusterInstance := utils.SetupReparentCluster(t, true) + clusterInstance := utils.SetupReparentCluster(t, "semi_sync") defer utils.TeardownCluster(clusterInstance) tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets var err error @@ -280,7 +280,7 @@ func TestERSPreventCrossCellPromotion(t *testing.T) { // caught up to it by pulling transactions from it func TestPullFromRdonly(t *testing.T) { defer cluster.PanicHandler(t) - clusterInstance := utils.SetupReparentCluster(t, true) + clusterInstance := utils.SetupReparentCluster(t, "semi_sync") defer utils.TeardownCluster(clusterInstance) tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets var err error @@ -345,7 +345,7 @@ func TestPullFromRdonly(t *testing.T) { // is stopped on the primary elect. func TestNoReplicationStatusAndIOThreadStopped(t *testing.T) { defer cluster.PanicHandler(t) - clusterInstance := utils.SetupReparentCluster(t, true) + clusterInstance := utils.SetupReparentCluster(t, "semi_sync") defer utils.TeardownCluster(clusterInstance) tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets utils.ConfirmReplication(t, tablets[0], []*cluster.Vttablet{tablets[1], tablets[2], tablets[3]}) @@ -442,5 +442,96 @@ func TestERSForInitialization(t *testing.T) { strArray := utils.GetShardReplicationPositions(t, clusterInstance, utils.KeyspaceName, utils.ShardName, true) assert.Equal(t, len(tablets), len(strArray)) assert.Contains(t, strArray[0], "primary") // primary first +} + +func TestRecoverWithMultipleFailures(t *testing.T) { + defer cluster.PanicHandler(t) + clusterInstance := utils.SetupReparentCluster(t, "semi_sync") + defer utils.TeardownCluster(clusterInstance) + tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets + utils.ConfirmReplication(t, tablets[0], []*cluster.Vttablet{tablets[1], tablets[2], tablets[3]}) + + // make tablets[1] a rdonly tablet. + err := clusterInstance.VtctlclientProcess.ExecuteCommand("ChangeTabletType", tablets[1].Alias, "rdonly") + require.NoError(t, err) + + // Confirm that replication is still working as intended utils.ConfirmReplication(t, tablets[0], tablets[1:]) + + // Make the rdonly and primary tablets and databases unavailable. + utils.StopTablet(t, tablets[1], true) + utils.StopTablet(t, tablets[0], true) + + // We expect this to succeed since we only have 1 primary eligible tablet which is down + out, err := utils.Ers(clusterInstance, nil, "30s", "10s") + require.NoError(t, err, out) + + newPrimary := utils.GetNewPrimary(t, clusterInstance) + utils.ConfirmReplication(t, newPrimary, []*cluster.Vttablet{tablets[2], tablets[3]}) +} + +// TestERSFailFast tests that ERS will fail fast if it cannot find any tablet which can be safely promoted instead of promoting +// a tablet and hanging while inserting a row in the reparent journal on getting semi-sync ACKs +func TestERSFailFast(t *testing.T) { + defer cluster.PanicHandler(t) + clusterInstance := utils.SetupReparentCluster(t, "semi_sync") + defer utils.TeardownCluster(clusterInstance) + tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets + utils.ConfirmReplication(t, tablets[0], []*cluster.Vttablet{tablets[1], tablets[2], tablets[3]}) + + // make tablets[1] a rdonly tablet. + err := clusterInstance.VtctlclientProcess.ExecuteCommand("ChangeTabletType", tablets[1].Alias, "rdonly") + require.NoError(t, err) + + // Confirm that replication is still working as intended + utils.ConfirmReplication(t, tablets[0], tablets[1:]) + + strChan := make(chan string) + go func() { + // We expect this to fail since we have ignored all replica tablets and only the rdonly is left, which is not capable of sending semi-sync ACKs + out, err := utils.ErsIgnoreTablet(clusterInstance, tablets[2], "240s", "90s", []*cluster.Vttablet{tablets[0], tablets[3]}, false) + require.Error(t, err) + strChan <- out + }() + + select { + case out := <-strChan: + require.Contains(t, out, "proposed primary zone1-0000000103 will not be able to make forward progress on being promoted") + case <-time.After(60 * time.Second): + require.Fail(t, "Emergency Reparent Shard did not fail in 60 seconds") + } +} + +// TestReplicationStopped checks that ERS ignores the tablets that have sql thread stopped. +// If there are more than 1, we also fail. +func TestReplicationStopped(t *testing.T) { + defer cluster.PanicHandler(t) + clusterInstance := utils.SetupReparentCluster(t, "semi_sync") + defer utils.TeardownCluster(clusterInstance) + tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets + utils.ConfirmReplication(t, tablets[0], []*cluster.Vttablet{tablets[1], tablets[2], tablets[3]}) + + err := clusterInstance.VtctlclientProcess.ExecuteCommand("ExecuteFetchAsDba", tablets[1].Alias, `STOP SLAVE SQL_THREAD;`) + require.NoError(t, err) + err = clusterInstance.VtctlclientProcess.ExecuteCommand("ExecuteFetchAsDba", tablets[2].Alias, `STOP SLAVE;`) + require.NoError(t, err) + // Run an additional command in the current primary which will only be acked by tablets[3] and be in its relay log. + insertedVal := utils.ConfirmReplication(t, tablets[0], nil) + // Failover to tablets[3] + _, err = utils.Ers(clusterInstance, tablets[3], "60s", "30s") + require.Error(t, err, "ERS should fail with 2 replicas having replication stopped") + + // Start replication back on tablet[1] + err = clusterInstance.VtctlclientProcess.ExecuteCommand("ExecuteFetchAsDba", tablets[1].Alias, `START SLAVE;`) + require.NoError(t, err) + // Failover to tablets[3] again. This time it should succeed + out, err := utils.Ers(clusterInstance, tablets[3], "60s", "30s") + require.NoError(t, err, out) + // Verify that the tablet has the inserted value + err = utils.CheckInsertedValues(context.Background(), t, tablets[3], insertedVal) + require.NoError(t, err) + // Confirm that replication is setup correctly from tablets[3] to tablets[0] + utils.ConfirmReplication(t, tablets[3], tablets[:1]) + // Confirm that tablets[2] which had replication stopped initially still has its replication stopped + utils.CheckReplicationStatus(context.Background(), t, tablets[2], false, false) } diff --git a/go/test/endtoend/reparent/newfeaturetest/reparent_test.go b/go/test/endtoend/reparent/newfeaturetest/reparent_test.go index 35632d61c69..94d4e96b20d 100644 --- a/go/test/endtoend/reparent/newfeaturetest/reparent_test.go +++ b/go/test/endtoend/reparent/newfeaturetest/reparent_test.go @@ -17,200 +17,38 @@ limitations under the License. package newfeaturetest import ( - "context" - "strconv" "testing" - "time" - "vitess.io/vitess/go/mysql" + "github.com/stretchr/testify/require" + "vitess.io/vitess/go/test/endtoend/cluster" "vitess.io/vitess/go/test/endtoend/reparent/utils" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" ) -// ERS TESTS - -func TestRecoverWithMultipleFailures(t *testing.T) { +func TestCrossCellDurability(t *testing.T) { defer cluster.PanicHandler(t) - clusterInstance := utils.SetupReparentCluster(t, true) + clusterInstance := utils.SetupReparentCluster(t, "cross_cell") defer utils.TeardownCluster(clusterInstance) tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets - utils.ConfirmReplication(t, tablets[0], []*cluster.Vttablet{tablets[1], tablets[2], tablets[3]}) - - // make tablets[1] a rdonly tablet. - err := clusterInstance.VtctlclientProcess.ExecuteCommand("ChangeTabletType", tablets[1].Alias, "rdonly") - require.NoError(t, err) - - // Confirm that replication is still working as intended - utils.ConfirmReplication(t, tablets[0], tablets[1:]) - - // Make the rdonly and primary tablets and databases unavailable. - utils.StopTablet(t, tablets[1], true) - utils.StopTablet(t, tablets[0], true) - - // We expect this to succeed since we only have 1 primary eligible tablet which is down - out, err := utils.Ers(clusterInstance, nil, "30s", "10s") - require.NoError(t, err, out) - - newPrimary := utils.GetNewPrimary(t, clusterInstance) - utils.ConfirmReplication(t, newPrimary, []*cluster.Vttablet{tablets[2], tablets[3]}) -} - -// TestERSFailFast tests that ERS will fail fast if it cannot find any tablet which can be safely promoted instead of promoting -// a tablet and hanging while inserting a row in the reparent journal on getting semi-sync ACKs -func TestERSFailFast(t *testing.T) { - defer cluster.PanicHandler(t) - clusterInstance := utils.SetupReparentCluster(t, true) - defer utils.TeardownCluster(clusterInstance) - tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets - utils.ConfirmReplication(t, tablets[0], []*cluster.Vttablet{tablets[1], tablets[2], tablets[3]}) - - // make tablets[1] a rdonly tablet. - err := clusterInstance.VtctlclientProcess.ExecuteCommand("ChangeTabletType", tablets[1].Alias, "rdonly") - require.NoError(t, err) - // Confirm that replication is still working as intended - utils.ConfirmReplication(t, tablets[0], tablets[1:]) - - strChan := make(chan string) - go func() { - // We expect this to fail since we have ignored all replica tablets and only the rdonly is left, which is not capable of sending semi-sync ACKs - out, err := utils.ErsIgnoreTablet(clusterInstance, tablets[2], "240s", "90s", []*cluster.Vttablet{tablets[0], tablets[3]}, false) - require.Error(t, err) - strChan <- out - }() - - select { - case out := <-strChan: - require.Contains(t, out, "proposed primary zone1-0000000103 will not be able to make forward progress on being promoted") - case <-time.After(60 * time.Second): - require.Fail(t, "Emergency Reparent Shard did not fail in 60 seconds") - } -} - -// TestReplicationStopped checks that ERS ignores the tablets that have sql thread stopped. -// If there are more than 1, we also fail. -func TestReplicationStopped(t *testing.T) { - defer cluster.PanicHandler(t) - clusterInstance := utils.SetupReparentCluster(t, true) - defer utils.TeardownCluster(clusterInstance) - tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets utils.ConfirmReplication(t, tablets[0], []*cluster.Vttablet{tablets[1], tablets[2], tablets[3]}) - err := clusterInstance.VtctlclientProcess.ExecuteCommand("ExecuteFetchAsDba", tablets[1].Alias, `STOP SLAVE SQL_THREAD;`) - require.NoError(t, err) - err = clusterInstance.VtctlclientProcess.ExecuteCommand("ExecuteFetchAsDba", tablets[2].Alias, `STOP SLAVE;`) - require.NoError(t, err) - // Run an additional command in the current primary which will only be acked by tablets[3] and be in its relay log. - insertedVal := utils.ConfirmReplication(t, tablets[0], nil) - // Failover to tablets[3] - _, err = utils.Ers(clusterInstance, tablets[3], "60s", "30s") - require.Error(t, err, "ERS should fail with 2 replicas having replication stopped") + // When tablets[0] is the primary, the only tablet in a different cell is tablets[3]. + // So the other two should have semi-sync turned off + utils.CheckSemiSyncSetupCorrectly(t, tablets[0], "ON") + utils.CheckSemiSyncSetupCorrectly(t, tablets[3], "ON") + utils.CheckSemiSyncSetupCorrectly(t, tablets[1], "OFF") + utils.CheckSemiSyncSetupCorrectly(t, tablets[2], "OFF") - // Start replication back on tablet[1] - err = clusterInstance.VtctlclientProcess.ExecuteCommand("ExecuteFetchAsDba", tablets[1].Alias, `START SLAVE;`) - require.NoError(t, err) - // Failover to tablets[3] again. This time it should succeed - out, err := utils.Ers(clusterInstance, tablets[3], "60s", "30s") + // Run forced reparent operation, this should proceed unimpeded. + out, err := utils.Prs(t, clusterInstance, tablets[3]) require.NoError(t, err, out) - // Verify that the tablet has the inserted value - err = utils.CheckInsertedValues(context.Background(), t, tablets[3], insertedVal) - require.NoError(t, err) - // Confirm that replication is setup correctly from tablets[3] to tablets[0] - utils.ConfirmReplication(t, tablets[3], tablets[:1]) - // Confirm that tablets[2] which had replication stopped initially still has its replication stopped - utils.CheckReplicationStatus(context.Background(), t, tablets[2], false, false) -} -// TestFullStatus tests that the RPC FullStatus works as intended. -func TestFullStatus(t *testing.T) { - defer cluster.PanicHandler(t) - clusterInstance := utils.SetupReparentCluster(t, true) - defer utils.TeardownCluster(clusterInstance) - tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets - utils.ConfirmReplication(t, tablets[0], []*cluster.Vttablet{tablets[1], tablets[2], tablets[3]}) + utils.ConfirmReplication(t, tablets[3], []*cluster.Vttablet{tablets[0], tablets[1], tablets[2]}) - // Check that full status gives the correct result for a primary tablet - primaryStatus, err := utils.TmcFullStatus(context.Background(), tablets[0]) - require.NoError(t, err) - assert.NotEmpty(t, primaryStatus.ServerUuid) - assert.NotEmpty(t, primaryStatus.ServerId) - // For a primary tablet there is no replication status - assert.Nil(t, primaryStatus.ReplicationStatus) - assert.Contains(t, primaryStatus.PrimaryStatus.String(), "vt-0000000101-bin") - assert.Equal(t, primaryStatus.GtidPurged, "MySQL56/") - assert.False(t, primaryStatus.ReadOnly) - assert.True(t, primaryStatus.SemiSyncPrimaryEnabled) - assert.True(t, primaryStatus.SemiSyncReplicaEnabled) - assert.True(t, primaryStatus.SemiSyncPrimaryStatus) - assert.False(t, primaryStatus.SemiSyncReplicaStatus) - assert.EqualValues(t, 3, primaryStatus.SemiSyncPrimaryClients) - assert.EqualValues(t, 1000000000000000000, primaryStatus.SemiSyncPrimaryTimeout) - assert.EqualValues(t, 1, primaryStatus.SemiSyncWaitForReplicaCount) - assert.Equal(t, "ROW", primaryStatus.BinlogFormat) - assert.Equal(t, "FULL", primaryStatus.BinlogRowImage) - assert.Equal(t, "ON", primaryStatus.GtidMode) - assert.True(t, primaryStatus.LogReplicaUpdates) - assert.True(t, primaryStatus.LogBinEnabled) - assert.Regexp(t, `[58]\.[07].*`, primaryStatus.Version) - assert.NotEmpty(t, primaryStatus.VersionComment) - - // Check that full status gives the correct result for a replica tablet - replicaStatus, err := utils.TmcFullStatus(context.Background(), tablets[1]) - require.NoError(t, err) - assert.NotEmpty(t, replicaStatus.ServerUuid) - assert.NotEmpty(t, replicaStatus.ServerId) - assert.Contains(t, replicaStatus.ReplicationStatus.Position, "MySQL56/"+replicaStatus.ReplicationStatus.SourceUuid) - assert.EqualValues(t, mysql.ReplicationStateRunning, replicaStatus.ReplicationStatus.IoState) - assert.EqualValues(t, mysql.ReplicationStateRunning, replicaStatus.ReplicationStatus.SqlState) - assert.Equal(t, fileNameFromPosition(replicaStatus.ReplicationStatus.FilePosition), fileNameFromPosition(primaryStatus.PrimaryStatus.FilePosition)) - assert.LessOrEqual(t, rowNumberFromPosition(replicaStatus.ReplicationStatus.FilePosition), rowNumberFromPosition(primaryStatus.PrimaryStatus.FilePosition)) - assert.Equal(t, replicaStatus.ReplicationStatus.RelayLogSourceBinlogEquivalentPosition, primaryStatus.PrimaryStatus.FilePosition) - assert.Contains(t, replicaStatus.ReplicationStatus.RelayLogFilePosition, "vt-0000000102-relay") - assert.Equal(t, replicaStatus.ReplicationStatus.Position, primaryStatus.PrimaryStatus.Position) - assert.Equal(t, replicaStatus.ReplicationStatus.RelayLogPosition, primaryStatus.PrimaryStatus.Position) - assert.Empty(t, replicaStatus.ReplicationStatus.LastIoError) - assert.Empty(t, replicaStatus.ReplicationStatus.LastSqlError) - assert.Equal(t, replicaStatus.ReplicationStatus.SourceUuid, primaryStatus.ServerUuid) - assert.LessOrEqual(t, int(replicaStatus.ReplicationStatus.ReplicationLagSeconds), 1) - assert.False(t, replicaStatus.ReplicationStatus.ReplicationLagUnknown) - assert.EqualValues(t, 0, replicaStatus.ReplicationStatus.SqlDelay) - assert.False(t, replicaStatus.ReplicationStatus.SslAllowed) - assert.False(t, replicaStatus.ReplicationStatus.HasReplicationFilters) - assert.False(t, replicaStatus.ReplicationStatus.UsingGtid) - assert.True(t, replicaStatus.ReplicationStatus.AutoPosition) - assert.Equal(t, replicaStatus.ReplicationStatus.SourceHost, utils.Hostname) - assert.EqualValues(t, replicaStatus.ReplicationStatus.SourcePort, tablets[0].MySQLPort) - assert.Equal(t, replicaStatus.ReplicationStatus.SourceUser, "vt_repl") - assert.Contains(t, replicaStatus.PrimaryStatus.String(), "vt-0000000102-bin") - assert.Equal(t, replicaStatus.GtidPurged, "MySQL56/") - assert.True(t, replicaStatus.ReadOnly) - assert.False(t, replicaStatus.SemiSyncPrimaryEnabled) - assert.True(t, replicaStatus.SemiSyncReplicaEnabled) - assert.False(t, replicaStatus.SemiSyncPrimaryStatus) - assert.True(t, replicaStatus.SemiSyncReplicaStatus) - assert.EqualValues(t, 0, replicaStatus.SemiSyncPrimaryClients) - assert.EqualValues(t, 1000000000000000000, replicaStatus.SemiSyncPrimaryTimeout) - assert.EqualValues(t, 1, replicaStatus.SemiSyncWaitForReplicaCount) - assert.Equal(t, "ROW", replicaStatus.BinlogFormat) - assert.Equal(t, "FULL", replicaStatus.BinlogRowImage) - assert.Equal(t, "ON", replicaStatus.GtidMode) - assert.True(t, replicaStatus.LogReplicaUpdates) - assert.True(t, replicaStatus.LogBinEnabled) - assert.Regexp(t, `[58]\.[07].*`, replicaStatus.Version) - assert.NotEmpty(t, replicaStatus.VersionComment) -} - -// fileNameFromPosition gets the file name from the position -func fileNameFromPosition(pos string) string { - return pos[0 : len(pos)-4] -} - -// rowNumberFromPosition gets the row number from the position -func rowNumberFromPosition(pos string) int { - rowNumStr := pos[len(pos)-4:] - rowNum, _ := strconv.Atoi(rowNumStr) - return rowNum + // All the tablets will have semi-sync setup since tablets[3] is in Cell2 and all + // others are in Cell1, so all of them are eligible to send semi-sync ACKs + for _, tablet := range tablets { + utils.CheckSemiSyncSetupCorrectly(t, tablet, "ON") + } } diff --git a/go/test/endtoend/reparent/plannedreparent/reparent_test.go b/go/test/endtoend/reparent/plannedreparent/reparent_test.go index fae5d630c8b..09635204d19 100644 --- a/go/test/endtoend/reparent/plannedreparent/reparent_test.go +++ b/go/test/endtoend/reparent/plannedreparent/reparent_test.go @@ -19,12 +19,14 @@ package plannedreparent import ( "context" "fmt" + "strconv" "testing" "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "vitess.io/vitess/go/mysql" "vitess.io/vitess/go/test/endtoend/cluster" "vitess.io/vitess/go/test/endtoend/reparent/utils" "vitess.io/vitess/go/vt/log" @@ -32,7 +34,7 @@ import ( func TestPrimaryToSpareStateChangeImpossible(t *testing.T) { defer cluster.PanicHandler(t) - clusterInstance := utils.SetupReparentClusterLegacy(t, true) + clusterInstance := utils.SetupReparentCluster(t, "semi_sync") defer utils.TeardownCluster(clusterInstance) tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets @@ -44,7 +46,7 @@ func TestPrimaryToSpareStateChangeImpossible(t *testing.T) { func TestReparentCrossCell(t *testing.T) { defer cluster.PanicHandler(t) - clusterInstance := utils.SetupReparentClusterLegacy(t, true) + clusterInstance := utils.SetupReparentCluster(t, "semi_sync") defer utils.TeardownCluster(clusterInstance) tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets @@ -58,7 +60,7 @@ func TestReparentCrossCell(t *testing.T) { func TestReparentGraceful(t *testing.T) { defer cluster.PanicHandler(t) - clusterInstance := utils.SetupReparentClusterLegacy(t, true) + clusterInstance := utils.SetupReparentCluster(t, "semi_sync") defer utils.TeardownCluster(clusterInstance) tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets @@ -83,7 +85,7 @@ func TestReparentGraceful(t *testing.T) { // TestPRSWithDrainedLaggingTablet tests that PRS succeeds even if we have a lagging drained tablet func TestPRSWithDrainedLaggingTablet(t *testing.T) { defer cluster.PanicHandler(t) - clusterInstance := utils.SetupReparentClusterLegacy(t, true) + clusterInstance := utils.SetupReparentCluster(t, "semi_sync") defer utils.TeardownCluster(clusterInstance) tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets @@ -110,7 +112,7 @@ func TestPRSWithDrainedLaggingTablet(t *testing.T) { func TestReparentReplicaOffline(t *testing.T) { defer cluster.PanicHandler(t) - clusterInstance := utils.SetupReparentClusterLegacy(t, true) + clusterInstance := utils.SetupReparentCluster(t, "semi_sync") defer utils.TeardownCluster(clusterInstance) tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets @@ -127,7 +129,7 @@ func TestReparentReplicaOffline(t *testing.T) { func TestReparentAvoid(t *testing.T) { defer cluster.PanicHandler(t) - clusterInstance := utils.SetupReparentClusterLegacy(t, true) + clusterInstance := utils.SetupReparentCluster(t, "semi_sync") defer utils.TeardownCluster(clusterInstance) tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets utils.DeleteTablet(t, clusterInstance, tablets[2]) @@ -159,14 +161,14 @@ func TestReparentAvoid(t *testing.T) { func TestReparentFromOutside(t *testing.T) { defer cluster.PanicHandler(t) - clusterInstance := utils.SetupReparentClusterLegacy(t, true) + clusterInstance := utils.SetupReparentCluster(t, "semi_sync") defer utils.TeardownCluster(clusterInstance) reparentFromOutside(t, clusterInstance, false) } func TestReparentFromOutsideWithNoPrimary(t *testing.T) { defer cluster.PanicHandler(t) - clusterInstance := utils.SetupReparentClusterLegacy(t, true) + clusterInstance := utils.SetupReparentCluster(t, "semi_sync") defer utils.TeardownCluster(clusterInstance) tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets @@ -255,7 +257,7 @@ func reparentFromOutside(t *testing.T, clusterInstance *cluster.LocalProcessClus func TestReparentWithDownReplica(t *testing.T) { defer cluster.PanicHandler(t) - clusterInstance := utils.SetupReparentClusterLegacy(t, true) + clusterInstance := utils.SetupReparentCluster(t, "semi_sync") defer utils.TeardownCluster(clusterInstance) tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets @@ -298,7 +300,7 @@ func TestReparentWithDownReplica(t *testing.T) { func TestChangeTypeSemiSync(t *testing.T) { defer cluster.PanicHandler(t) - clusterInstance := utils.SetupReparentClusterLegacy(t, true) + clusterInstance := utils.SetupReparentCluster(t, "semi_sync") defer utils.TeardownCluster(clusterInstance) tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets @@ -362,7 +364,7 @@ func TestChangeTypeSemiSync(t *testing.T) { func TestReparentDoesntHangIfPrimaryFails(t *testing.T) { defer cluster.PanicHandler(t) - clusterInstance := utils.SetupReparentClusterLegacy(t, true) + clusterInstance := utils.SetupReparentCluster(t, "semi_sync") defer utils.TeardownCluster(clusterInstance) tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets @@ -381,7 +383,7 @@ func TestReparentDoesntHangIfPrimaryFails(t *testing.T) { func TestReplicationStatus(t *testing.T) { defer cluster.PanicHandler(t) - clusterInstance := utils.SetupReparentCluster(t, true) + clusterInstance := utils.SetupReparentCluster(t, "semi_sync") defer utils.TeardownCluster(clusterInstance) tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets utils.ConfirmReplication(t, tablets[0], []*cluster.Vttablet{tablets[1], tablets[2], tablets[3]}) @@ -439,3 +441,94 @@ func TestReplicationStatus(t *testing.T) { assert.NotEmpty(t, replicationStatus.Position) assert.NotEmpty(t, replicationStatus.RelayLogPosition) } + +// TestFullStatus tests that the RPC FullStatus works as intended. +func TestFullStatus(t *testing.T) { + defer cluster.PanicHandler(t) + clusterInstance := utils.SetupReparentCluster(t, "semi_sync") + defer utils.TeardownCluster(clusterInstance) + tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets + utils.ConfirmReplication(t, tablets[0], []*cluster.Vttablet{tablets[1], tablets[2], tablets[3]}) + + // Check that full status gives the correct result for a primary tablet + primaryStatus, err := utils.TmcFullStatus(context.Background(), tablets[0]) + require.NoError(t, err) + assert.NotEmpty(t, primaryStatus.ServerUuid) + assert.NotEmpty(t, primaryStatus.ServerId) + // For a primary tablet there is no replication status + assert.Nil(t, primaryStatus.ReplicationStatus) + assert.Contains(t, primaryStatus.PrimaryStatus.String(), "vt-0000000101-bin") + assert.Equal(t, primaryStatus.GtidPurged, "MySQL56/") + assert.False(t, primaryStatus.ReadOnly) + assert.True(t, primaryStatus.SemiSyncPrimaryEnabled) + assert.True(t, primaryStatus.SemiSyncReplicaEnabled) + assert.True(t, primaryStatus.SemiSyncPrimaryStatus) + assert.False(t, primaryStatus.SemiSyncReplicaStatus) + assert.EqualValues(t, 3, primaryStatus.SemiSyncPrimaryClients) + assert.EqualValues(t, 1000000000000000000, primaryStatus.SemiSyncPrimaryTimeout) + assert.EqualValues(t, 1, primaryStatus.SemiSyncWaitForReplicaCount) + assert.Equal(t, "ROW", primaryStatus.BinlogFormat) + assert.Equal(t, "FULL", primaryStatus.BinlogRowImage) + assert.Equal(t, "ON", primaryStatus.GtidMode) + assert.True(t, primaryStatus.LogReplicaUpdates) + assert.True(t, primaryStatus.LogBinEnabled) + assert.Regexp(t, `[58]\.[07].*`, primaryStatus.Version) + assert.NotEmpty(t, primaryStatus.VersionComment) + + // Check that full status gives the correct result for a replica tablet + replicaStatus, err := utils.TmcFullStatus(context.Background(), tablets[1]) + require.NoError(t, err) + assert.NotEmpty(t, replicaStatus.ServerUuid) + assert.NotEmpty(t, replicaStatus.ServerId) + assert.Contains(t, replicaStatus.ReplicationStatus.Position, "MySQL56/"+replicaStatus.ReplicationStatus.SourceUuid) + assert.EqualValues(t, mysql.ReplicationStateRunning, replicaStatus.ReplicationStatus.IoState) + assert.EqualValues(t, mysql.ReplicationStateRunning, replicaStatus.ReplicationStatus.SqlState) + assert.Equal(t, fileNameFromPosition(replicaStatus.ReplicationStatus.FilePosition), fileNameFromPosition(primaryStatus.PrimaryStatus.FilePosition)) + assert.LessOrEqual(t, rowNumberFromPosition(replicaStatus.ReplicationStatus.FilePosition), rowNumberFromPosition(primaryStatus.PrimaryStatus.FilePosition)) + assert.Equal(t, replicaStatus.ReplicationStatus.RelayLogSourceBinlogEquivalentPosition, primaryStatus.PrimaryStatus.FilePosition) + assert.Contains(t, replicaStatus.ReplicationStatus.RelayLogFilePosition, "vt-0000000102-relay") + assert.Equal(t, replicaStatus.ReplicationStatus.Position, primaryStatus.PrimaryStatus.Position) + assert.Equal(t, replicaStatus.ReplicationStatus.RelayLogPosition, primaryStatus.PrimaryStatus.Position) + assert.Empty(t, replicaStatus.ReplicationStatus.LastIoError) + assert.Empty(t, replicaStatus.ReplicationStatus.LastSqlError) + assert.Equal(t, replicaStatus.ReplicationStatus.SourceUuid, primaryStatus.ServerUuid) + assert.LessOrEqual(t, int(replicaStatus.ReplicationStatus.ReplicationLagSeconds), 1) + assert.False(t, replicaStatus.ReplicationStatus.ReplicationLagUnknown) + assert.EqualValues(t, 0, replicaStatus.ReplicationStatus.SqlDelay) + assert.False(t, replicaStatus.ReplicationStatus.SslAllowed) + assert.False(t, replicaStatus.ReplicationStatus.HasReplicationFilters) + assert.False(t, replicaStatus.ReplicationStatus.UsingGtid) + assert.True(t, replicaStatus.ReplicationStatus.AutoPosition) + assert.Equal(t, replicaStatus.ReplicationStatus.SourceHost, utils.Hostname) + assert.EqualValues(t, replicaStatus.ReplicationStatus.SourcePort, tablets[0].MySQLPort) + assert.Equal(t, replicaStatus.ReplicationStatus.SourceUser, "vt_repl") + assert.Contains(t, replicaStatus.PrimaryStatus.String(), "vt-0000000102-bin") + assert.Equal(t, replicaStatus.GtidPurged, "MySQL56/") + assert.True(t, replicaStatus.ReadOnly) + assert.False(t, replicaStatus.SemiSyncPrimaryEnabled) + assert.True(t, replicaStatus.SemiSyncReplicaEnabled) + assert.False(t, replicaStatus.SemiSyncPrimaryStatus) + assert.True(t, replicaStatus.SemiSyncReplicaStatus) + assert.EqualValues(t, 0, replicaStatus.SemiSyncPrimaryClients) + assert.EqualValues(t, 1000000000000000000, replicaStatus.SemiSyncPrimaryTimeout) + assert.EqualValues(t, 1, replicaStatus.SemiSyncWaitForReplicaCount) + assert.Equal(t, "ROW", replicaStatus.BinlogFormat) + assert.Equal(t, "FULL", replicaStatus.BinlogRowImage) + assert.Equal(t, "ON", replicaStatus.GtidMode) + assert.True(t, replicaStatus.LogReplicaUpdates) + assert.True(t, replicaStatus.LogBinEnabled) + assert.Regexp(t, `[58]\.[07].*`, replicaStatus.Version) + assert.NotEmpty(t, replicaStatus.VersionComment) +} + +// fileNameFromPosition gets the file name from the position +func fileNameFromPosition(pos string) string { + return pos[0 : len(pos)-4] +} + +// rowNumberFromPosition gets the row number from the position +func rowNumberFromPosition(pos string) int { + rowNumStr := pos[len(pos)-4:] + rowNum, _ := strconv.Atoi(rowNumStr) + return rowNum +} diff --git a/go/test/endtoend/reparent/utils/utils.go b/go/test/endtoend/reparent/utils/utils.go index 255a663b749..541db9ca828 100644 --- a/go/test/endtoend/reparent/utils/utils.go +++ b/go/test/endtoend/reparent/utils/utils.go @@ -66,19 +66,14 @@ var ( //region cluster setup/teardown -// SetupReparentClusterLegacy is used to setup the reparent cluster -func SetupReparentClusterLegacy(t *testing.T, enableSemiSync bool) *cluster.LocalProcessCluster { - return setupClusterLegacy(context.Background(), t, ShardName, []string{cell1, cell2}, []int{3, 1}, enableSemiSync) -} - // SetupReparentCluster is used to setup the reparent cluster -func SetupReparentCluster(t *testing.T, enableSemiSync bool) *cluster.LocalProcessCluster { - return setupCluster(context.Background(), t, ShardName, []string{cell1, cell2}, []int{3, 1}, enableSemiSync) +func SetupReparentCluster(t *testing.T, durability string) *cluster.LocalProcessCluster { + return setupCluster(context.Background(), t, ShardName, []string{cell1, cell2}, []int{3, 1}, durability) } // SetupRangeBasedCluster sets up the range based cluster func SetupRangeBasedCluster(ctx context.Context, t *testing.T) *cluster.LocalProcessCluster { - return setupClusterLegacy(ctx, t, ShardName, []string{cell1}, []int{2}, true) + return setupCluster(ctx, t, ShardName, []string{cell1}, []int{2}, "semi_sync") } // TeardownCluster is used to teardown the reparent cluster @@ -86,15 +81,13 @@ func TeardownCluster(clusterInstance *cluster.LocalProcessCluster) { clusterInstance.Teardown() } -func setupCluster(ctx context.Context, t *testing.T, shardName string, cells []string, numTablets []int, enableSemiSync bool) *cluster.LocalProcessCluster { +func setupCluster(ctx context.Context, t *testing.T, shardName string, cells []string, numTablets []int, durability string) *cluster.LocalProcessCluster { var tablets []*cluster.Vttablet clusterInstance := cluster.NewCluster(cells[0], Hostname) keyspace := &cluster.Keyspace{Name: KeyspaceName} - durability := "none" - if enableSemiSync { + if durability == "semi_sync" { clusterInstance.VtTabletExtraArgs = append(clusterInstance.VtTabletExtraArgs, "--enable_semi_sync") - durability = "semi_sync" } // Start topo server @@ -204,129 +197,6 @@ func setupShard(ctx context.Context, t *testing.T, clusterInstance *cluster.Loca assert.Contains(t, strArray[0], "primary") // primary first } -func setupClusterLegacy(ctx context.Context, t *testing.T, shardName string, cells []string, numTablets []int, enableSemiSync bool) *cluster.LocalProcessCluster { - var tablets []*cluster.Vttablet - clusterInstance := cluster.NewCluster(cells[0], Hostname) - keyspace := &cluster.Keyspace{Name: KeyspaceName} - - durability := "none" - if enableSemiSync { - clusterInstance.VtTabletExtraArgs = append(clusterInstance.VtTabletExtraArgs, "--enable_semi_sync") - durability = "semi_sync" - } - - // Start topo server - err := clusterInstance.StartTopo() - require.NoError(t, err, "Error starting topo") - err = clusterInstance.TopoProcess.ManageTopoDir("mkdir", "/vitess/"+cells[0]) - require.NoError(t, err, "Error managing topo") - numCell := 1 - for numCell < len(cells) { - err = clusterInstance.VtctlProcess.AddCellInfo(cells[numCell]) - require.NoError(t, err, "Error managing topo") - numCell++ - } - - // Adding another cell in the same cluster - numCell = 0 - for numCell < len(cells) { - i := 0 - for i < numTablets[numCell] { - i++ - tablet := clusterInstance.NewVttabletInstance("replica", 100*(numCell+1)+i, cells[numCell]) - tablets = append(tablets, tablet) - } - numCell++ - } - - shard := &cluster.Shard{Name: shardName} - shard.Vttablets = tablets - - clusterInstance.VtTabletExtraArgs = append(clusterInstance.VtTabletExtraArgs, - "--lock_tables_timeout", "5s", - "--init_populate_metadata", - "--track_schema_versions=true", - // disabling online-ddl for reparent tests. This is done to reduce flakiness. - // All the tests in this package reparent frequently between different tablets - // This means that Promoting a tablet to primary is sometimes immediately followed by a DemotePrimary call. - // In this case, the close method and initSchema method of the onlineDDL executor race. - // If the initSchema acquires the lock, then it takes about 30 seconds for it to run during which time the - // DemotePrimary rpc is stalled! - "--queryserver_enable_online_ddl=false", - // disabling active reparents on the tablet since we don't want the replication manager - // to fix replication if it is stopped. Some tests deliberately do that. Also, we don't want - // the replication manager to silently fix the replication in case ERS or PRS mess up. All the - // tests in this test suite should work irrespective of this flag. Each run of ERS, PRS should be - // setting up the replication correctly. - "--disable_active_reparents") - - // Initialize Cluster - err = clusterInstance.SetupCluster(keyspace, []cluster.Shard{*shard}) - require.NoError(t, err, "Cannot launch cluster") - - //Start MySql - var mysqlCtlProcessList []*exec.Cmd - for _, shard := range clusterInstance.Keyspaces[0].Shards { - for _, tablet := range shard.Vttablets { - log.Infof("Starting MySql for tablet %v", tablet.Alias) - proc, err := tablet.MysqlctlProcess.StartProcess() - require.NoError(t, err, "Error starting start mysql") - mysqlCtlProcessList = append(mysqlCtlProcessList, proc) - } - } - - // Wait for mysql processes to start - for _, proc := range mysqlCtlProcessList { - if err := proc.Wait(); err != nil { - clusterInstance.PrintMysqlctlLogFiles() - require.FailNow(t, "Error starting mysql: %s", err.Error()) - } - } - - if clusterInstance.VtctlMajorVersion >= 14 { - vtctldClientProcess := cluster.VtctldClientProcessInstance("localhost", clusterInstance.VtctldProcess.GrpcPort, clusterInstance.TmpDirectory) - out, err := vtctldClientProcess.ExecuteCommandWithOutput("SetKeyspaceDurabilityPolicy", KeyspaceName, fmt.Sprintf("--durability-policy=%s", durability)) - require.NoError(t, err, out) - } - - setupShardLegacy(ctx, t, clusterInstance, shardName, tablets) - return clusterInstance -} - -func setupShardLegacy(ctx context.Context, t *testing.T, clusterInstance *cluster.LocalProcessCluster, shardName string, tablets []*cluster.Vttablet) { - for _, tablet := range tablets { - // create database - err := tablet.VttabletProcess.CreateDB(KeyspaceName) - require.NoError(t, err) - // Start the tablet - err = tablet.VttabletProcess.Setup() - require.NoError(t, err) - } - - for _, tablet := range tablets { - err := tablet.VttabletProcess.WaitForTabletStatuses([]string{"SERVING", "NOT_SERVING"}) - require.NoError(t, err) - } - - // Force the replica to reparent assuming that all the datasets are identical. - err := clusterInstance.VtctlclientProcess.ExecuteCommand("InitShardPrimary", "--", - "--force", fmt.Sprintf("%s/%s", KeyspaceName, shardName), tablets[0].Alias) - require.NoError(t, err) - - ValidateTopology(t, clusterInstance, true) - - // create Tables - RunSQL(ctx, t, sqlSchema, tablets[0]) - - CheckPrimaryTablet(t, clusterInstance, tablets[0]) - - ValidateTopology(t, clusterInstance, false) - time.Sleep(100 * time.Millisecond) // wait for replication to catchup - strArray := GetShardReplicationPositions(t, clusterInstance, KeyspaceName, shardName, true) - assert.Equal(t, len(tablets), len(strArray)) - assert.Contains(t, strArray[0], "primary") // primary first -} - //endregion //region database queries diff --git a/go/test/endtoend/vault/vault_test.go b/go/test/endtoend/vault/vault_test.go index eb5549308e6..fceca135497 100644 --- a/go/test/endtoend/vault/vault_test.go +++ b/go/test/endtoend/vault/vault_test.go @@ -282,7 +282,6 @@ func initializeClusterLate(t *testing.T) { _, err = tablet.VttabletProcess.QueryTablet(query, keyspace.Name, false) require.NoError(t, err) - tablet.VttabletProcess.EnableSemiSync = true err = tablet.VttabletProcess.Setup() require.NoError(t, err) diff --git a/go/test/endtoend/vtorc/utils/utils.go b/go/test/endtoend/vtorc/utils/utils.go index 1afa2a23cf8..75a10a72308 100644 --- a/go/test/endtoend/vtorc/utils/utils.go +++ b/go/test/endtoend/vtorc/utils/utils.go @@ -761,7 +761,6 @@ func SetupNewClusterSemiSync(t *testing.T) *VtOrcClusterInfo { clusterInstance.VtTabletExtraArgs = []string{ "--lock_tables_timeout", "5s", "--disable_active_reparents", - "--enable_semi_sync", } // Initialize Cluster @@ -840,7 +839,6 @@ func AddSemiSyncKeyspace(t *testing.T, clusterInfo *VtOrcClusterInfo) { clusterInfo.ClusterInstance.VtTabletExtraArgs = []string{ "--lock_tables_timeout", "5s", "--disable_active_reparents", - "--enable_semi_sync", } // Initialize Cluster diff --git a/go/vt/mysqlctl/rice-box.go b/go/vt/mysqlctl/rice-box.go index 6bca5b30235..c897a9048cf 100644 --- a/go/vt/mysqlctl/rice-box.go +++ b/go/vt/mysqlctl/rice-box.go @@ -11,97 +11,97 @@ func init() { // define files file2 := &embedded.EmbeddedFile{ Filename: "gomysql.pc.tmpl", - FileModTime: time.Unix(1655743820, 0), + FileModTime: time.Unix(1647748756, 0), Content: string("Name: GoMysql\nDescription: Flags for using mysql C client in go\n"), } file3 := &embedded.EmbeddedFile{ Filename: "init_db.sql", - FileModTime: time.Unix(1658248574, 0), + FileModTime: time.Unix(1658489960, 0), Content: string("# This file is executed immediately after mysql_install_db,\n# to initialize a fresh data directory.\n\n###############################################################################\n# WARNING: This sql is *NOT* safe for production use,\n# as it contains default well-known users and passwords.\n# Care should be taken to change these users and passwords\n# for production.\n###############################################################################\n\n###############################################################################\n# Equivalent of mysql_secure_installation\n###############################################################################\n\n# Changes during the init db should not make it to the binlog.\n# They could potentially create errant transactions on replicas.\nSET sql_log_bin = 0;\n# Remove anonymous users.\nDELETE FROM mysql.user WHERE User = '';\n\n# Disable remote root access (only allow UNIX socket).\nDELETE FROM mysql.user WHERE User = 'root' AND Host != 'localhost';\n\n# Remove test database.\nDROP DATABASE IF EXISTS test;\n\n###############################################################################\n# Vitess defaults\n###############################################################################\n\n# Vitess-internal database.\nCREATE DATABASE IF NOT EXISTS _vt;\n# Note that definitions of local_metadata and shard_metadata should be the same\n# as in production which is defined in go/vt/mysqlctl/metadata_tables.go.\nCREATE TABLE IF NOT EXISTS _vt.local_metadata (\n name VARCHAR(255) NOT NULL,\n value VARCHAR(255) NOT NULL,\n db_name VARBINARY(255) NOT NULL,\n PRIMARY KEY (db_name, name)\n ) ENGINE=InnoDB;\nCREATE TABLE IF NOT EXISTS _vt.shard_metadata (\n name VARCHAR(255) NOT NULL,\n value MEDIUMBLOB NOT NULL,\n db_name VARBINARY(255) NOT NULL,\n PRIMARY KEY (db_name, name)\n ) ENGINE=InnoDB;\n\n# Admin user with all privileges.\nCREATE USER 'vt_dba'@'localhost';\nGRANT ALL ON *.* TO 'vt_dba'@'localhost';\nGRANT GRANT OPTION ON *.* TO 'vt_dba'@'localhost';\n\n# User for app traffic, with global read-write access.\nCREATE USER 'vt_app'@'localhost';\nGRANT SELECT, INSERT, UPDATE, DELETE, CREATE, DROP, RELOAD, PROCESS, FILE,\n REFERENCES, INDEX, ALTER, SHOW DATABASES, CREATE TEMPORARY TABLES,\n LOCK TABLES, EXECUTE, REPLICATION CLIENT, CREATE VIEW,\n SHOW VIEW, CREATE ROUTINE, ALTER ROUTINE, CREATE USER, EVENT, TRIGGER\n ON *.* TO 'vt_app'@'localhost';\n\n# User for app debug traffic, with global read access.\nCREATE USER 'vt_appdebug'@'localhost';\nGRANT SELECT, SHOW DATABASES, PROCESS ON *.* TO 'vt_appdebug'@'localhost';\n\n# User for administrative operations that need to be executed as non-SUPER.\n# Same permissions as vt_app here.\nCREATE USER 'vt_allprivs'@'localhost';\nGRANT SELECT, INSERT, UPDATE, DELETE, CREATE, DROP, RELOAD, PROCESS, FILE,\n REFERENCES, INDEX, ALTER, SHOW DATABASES, CREATE TEMPORARY TABLES,\n LOCK TABLES, EXECUTE, REPLICATION SLAVE, REPLICATION CLIENT, CREATE VIEW,\n SHOW VIEW, CREATE ROUTINE, ALTER ROUTINE, CREATE USER, EVENT, TRIGGER\n ON *.* TO 'vt_allprivs'@'localhost';\n\n# User for slave replication connections.\nCREATE USER 'vt_repl'@'%';\nGRANT REPLICATION SLAVE ON *.* TO 'vt_repl'@'%';\n\n# User for Vitess VReplication (base vstreamers and vplayer).\nCREATE USER 'vt_filtered'@'localhost';\nGRANT SELECT, INSERT, UPDATE, DELETE, CREATE, DROP, RELOAD, PROCESS, FILE,\n REFERENCES, INDEX, ALTER, SHOW DATABASES, CREATE TEMPORARY TABLES,\n LOCK TABLES, EXECUTE, REPLICATION SLAVE, REPLICATION CLIENT, CREATE VIEW,\n SHOW VIEW, CREATE ROUTINE, ALTER ROUTINE, CREATE USER, EVENT, TRIGGER\n ON *.* TO 'vt_filtered'@'localhost';\n\n# User for general MySQL monitoring.\nCREATE USER 'vt_monitoring'@'localhost';\nGRANT SELECT, PROCESS, SUPER, REPLICATION CLIENT, RELOAD\n ON *.* TO 'vt_monitoring'@'localhost';\nGRANT SELECT, UPDATE, DELETE, DROP\n ON performance_schema.* TO 'vt_monitoring'@'localhost';\n\n# User for Orchestrator (https://github.com/openark/orchestrator).\nCREATE USER 'orc_client_user'@'%' IDENTIFIED BY 'orc_client_user_password';\nGRANT SUPER, PROCESS, REPLICATION SLAVE, RELOAD\n ON *.* TO 'orc_client_user'@'%';\nGRANT SELECT\n ON _vt.* TO 'orc_client_user'@'%';\n\nFLUSH PRIVILEGES;\n\nRESET SLAVE ALL;\nRESET MASTER;\n"), } file5 := &embedded.EmbeddedFile{ Filename: "mycnf/default.cnf", - FileModTime: time.Unix(1658182438, 0), + FileModTime: time.Unix(1647748756, 0), Content: string("# Global configuration that is auto-included for all MySQL/MariaDB versions\n\ndatadir = {{.DataDir}}\ninnodb_data_home_dir = {{.InnodbDataHomeDir}}\ninnodb_log_group_home_dir = {{.InnodbLogGroupHomeDir}}\nlog-error = {{.ErrorLogPath}}\nlog-bin = {{.BinLogPath}}\nrelay-log = {{.RelayLogPath}}\nrelay-log-index = {{.RelayLogIndexPath}}\npid-file = {{.PidFile}}\nport = {{.MysqlPort}}\n\n{{if .SecureFilePriv}}\nsecure-file-priv = {{.SecureFilePriv}}\n{{end}}\n\n# all db instances should start in read-only mode - once the db is started and\n# fully functional, we'll push it into read-write mode\nread-only\nserver-id = {{.ServerID}}\n\n# all db instances should skip starting replication threads - that way we can do any\n# additional configuration (like enabling semi-sync) before we connect to\n# the source.\nskip_slave_start\nsocket = {{.SocketFile}}\ntmpdir = {{.TmpDir}}\n\nslow-query-log-file = {{.SlowLogPath}}\n\n# These are sensible defaults that apply to all MySQL/MariaDB versions\n\nlong_query_time = 2\nslow-query-log\nskip-name-resolve\nconnect_timeout = 30\ninnodb_lock_wait_timeout = 20\nmax_allowed_packet = 64M\nmax_connections = 500\n\n\n"), } file6 := &embedded.EmbeddedFile{ Filename: "mycnf/mariadb100.cnf", - FileModTime: time.Unix(1658182438, 0), + FileModTime: time.Unix(1658506955, 0), - Content: string("# This file is auto-included when MariaDB 10.0 is detected.\n\n# Semi-sync replication is required for automated unplanned failover\n# (when the primary goes away). Here we just load the plugin so it's\n# available if desired, but it's disabled at startup.\n#\n# If the -enable_semi_sync flag is used, VTTablet will enable semi-sync\n# at the proper time when replication is set up, or when a primary is\n# promoted or demoted.\nplugin-load = rpl_semi_sync_master=semisync_master.so;rpl_semi_sync_slave=semisync_slave.so\n\nslave_net_timeout = 60\n\n# MariaDB 10.0 is unstrict by default\nsql_mode = STRICT_TRANS_TABLES,NO_ENGINE_SUBSTITUTION\n\n# enable strict mode so it's safe to compare sequence numbers across different server IDs.\ngtid_strict_mode = 1\ninnodb_stats_persistent = 0\n\n# When semi-sync is enabled, don't allow fallback to async\n# if you get no ack, or have no replicas. This is necessary to\n# prevent alternate futures when doing a failover in response to\n# a primary that becomes unresponsive.\nrpl_semi_sync_master_timeout = 1000000000000000000\nrpl_semi_sync_master_wait_no_slave = 1\n\n\ncharacter_set_server = utf8\ncollation_server = utf8_general_ci\n\nexpire_logs_days = 3\n\nsync_binlog = 1\nbinlog_format = ROW\nlog_slave_updates\nexpire_logs_days = 3\n\n# In MariaDB the default charset is latin1\n\ncharacter_set_server = utf8\ncollation_server = utf8_general_ci\n\n"), + Content: string("# This file is auto-included when MariaDB 10.0 is detected.\n\n# Semi-sync replication is required for automated unplanned failover\n# (when the primary goes away). Here we just load the plugin so it's\n# available if desired, but it's disabled at startup.\n#\n# VTTablet will enable semi-sync at the proper time when replication is set up,\n# or when a primary is promoted or demoted based on the durability policy configured.\nplugin-load = rpl_semi_sync_master=semisync_master.so;rpl_semi_sync_slave=semisync_slave.so\n\nslave_net_timeout = 60\n\n# MariaDB 10.0 is unstrict by default\nsql_mode = STRICT_TRANS_TABLES,NO_ENGINE_SUBSTITUTION\n\n# enable strict mode so it's safe to compare sequence numbers across different server IDs.\ngtid_strict_mode = 1\ninnodb_stats_persistent = 0\n\n# When semi-sync is enabled, don't allow fallback to async\n# if you get no ack, or have no replicas. This is necessary to\n# prevent alternate futures when doing a failover in response to\n# a primary that becomes unresponsive.\nrpl_semi_sync_master_timeout = 1000000000000000000\nrpl_semi_sync_master_wait_no_slave = 1\n\n\ncharacter_set_server = utf8\ncollation_server = utf8_general_ci\n\nexpire_logs_days = 3\n\nsync_binlog = 1\nbinlog_format = ROW\nlog_slave_updates\nexpire_logs_days = 3\n\n# In MariaDB the default charset is latin1\n\ncharacter_set_server = utf8\ncollation_server = utf8_general_ci\n\n"), } file7 := &embedded.EmbeddedFile{ Filename: "mycnf/mariadb101.cnf", - FileModTime: time.Unix(1658182438, 0), + FileModTime: time.Unix(1658506955, 0), - Content: string("# This file is auto-included when MariaDB 10.1 is detected.\n\n# Semi-sync replication is required for automated unplanned failover\n# (when the primary goes away). Here we just load the plugin so it's\n# available if desired, but it's disabled at startup.\n#\n# If the -enable_semi_sync flag is used, VTTablet will enable semi-sync\n# at the proper time when replication is set up, or when a primary is\n# promoted or demoted.\nplugin-load = rpl_semi_sync_master=semisync_master.so;rpl_semi_sync_slave=semisync_slave.so\n\nslave_net_timeout = 60\n\n# MariaDB 10.1 default is only no-engine-substitution and no-auto-create-user\nsql_mode = STRICT_TRANS_TABLES,NO_ENGINE_SUBSTITUTION,NO_AUTO_CREATE_USER\n\n# enable strict mode so it's safe to compare sequence numbers across different server IDs.\ngtid_strict_mode = 1\ninnodb_stats_persistent = 0\n\n# When semi-sync is enabled, don't allow fallback to async\n# if you get no ack, or have no replicas. This is necessary to\n# prevent alternate futures when doing a failover in response to\n# a primary that becomes unresponsive.\nrpl_semi_sync_master_timeout = 1000000000000000000\nrpl_semi_sync_master_wait_no_slave = 1\n\n\ncharacter_set_server = utf8\ncollation_server = utf8_general_ci\n\nexpire_logs_days = 3\n\nsync_binlog = 1\nbinlog_format = ROW\nlog_slave_updates\nexpire_logs_days = 3\n\n# In MariaDB the default charset is latin1\n\ncharacter_set_server = utf8\ncollation_server = utf8_general_ci\n"), + Content: string("# This file is auto-included when MariaDB 10.1 is detected.\n\n# Semi-sync replication is required for automated unplanned failover\n# (when the primary goes away). Here we just load the plugin so it's\n# available if desired, but it's disabled at startup.\n#\n# VTTablet will enable semi-sync at the proper time when replication is set up,\n# or when a primary is promoted or demoted based on the durability policy configured.\nplugin-load = rpl_semi_sync_master=semisync_master.so;rpl_semi_sync_slave=semisync_slave.so\n\nslave_net_timeout = 60\n\n# MariaDB 10.1 default is only no-engine-substitution and no-auto-create-user\nsql_mode = STRICT_TRANS_TABLES,NO_ENGINE_SUBSTITUTION,NO_AUTO_CREATE_USER\n\n# enable strict mode so it's safe to compare sequence numbers across different server IDs.\ngtid_strict_mode = 1\ninnodb_stats_persistent = 0\n\n# When semi-sync is enabled, don't allow fallback to async\n# if you get no ack, or have no replicas. This is necessary to\n# prevent alternate futures when doing a failover in response to\n# a primary that becomes unresponsive.\nrpl_semi_sync_master_timeout = 1000000000000000000\nrpl_semi_sync_master_wait_no_slave = 1\n\n\ncharacter_set_server = utf8\ncollation_server = utf8_general_ci\n\nexpire_logs_days = 3\n\nsync_binlog = 1\nbinlog_format = ROW\nlog_slave_updates\nexpire_logs_days = 3\n\n# In MariaDB the default charset is latin1\n\ncharacter_set_server = utf8\ncollation_server = utf8_general_ci\n"), } file8 := &embedded.EmbeddedFile{ Filename: "mycnf/mariadb102.cnf", - FileModTime: time.Unix(1658182438, 0), + FileModTime: time.Unix(1658506955, 0), - Content: string("# This file is auto-included when MariaDB 10.2 is detected.\n\n# Semi-sync replication is required for automated unplanned failover\n# (when the primary goes away). Here we just load the plugin so it's\n# available if desired, but it's disabled at startup.\n#\n# If the -enable_semi_sync flag is used, VTTablet will enable semi-sync\n# at the proper time when replication is set up, or when a primary is\n# promoted or demoted.\nplugin-load = rpl_semi_sync_master=semisync_master.so;rpl_semi_sync_slave=semisync_slave.so\n\n# enable strict mode so it's safe to compare sequence numbers across different server IDs.\ngtid_strict_mode = 1\ninnodb_stats_persistent = 0\n\n# When semi-sync is enabled, don't allow fallback to async\n# if you get no ack, or have no replicas. This is necessary to\n# prevent alternate futures when doing a failover in response to\n# a primary that becomes unresponsive.\nrpl_semi_sync_master_timeout = 1000000000000000000\nrpl_semi_sync_master_wait_no_slave = 1\n\n\ncharacter_set_server = utf8\ncollation_server = utf8_general_ci\n\nexpire_logs_days = 3\n\nsync_binlog = 1\nbinlog_format = ROW\nlog_slave_updates\nexpire_logs_days = 3\n\n# In MariaDB the default charset is latin1\n\ncharacter_set_server = utf8\ncollation_server = utf8_general_ci\n"), + Content: string("# This file is auto-included when MariaDB 10.2 is detected.\n\n# Semi-sync replication is required for automated unplanned failover\n# (when the primary goes away). Here we just load the plugin so it's\n# available if desired, but it's disabled at startup.\n#\n# VTTablet will enable semi-sync at the proper time when replication is set up,\n# or when a primary is promoted or demoted based on the durability policy configured.\nplugin-load = rpl_semi_sync_master=semisync_master.so;rpl_semi_sync_slave=semisync_slave.so\n\n# enable strict mode so it's safe to compare sequence numbers across different server IDs.\ngtid_strict_mode = 1\ninnodb_stats_persistent = 0\n\n# When semi-sync is enabled, don't allow fallback to async\n# if you get no ack, or have no replicas. This is necessary to\n# prevent alternate futures when doing a failover in response to\n# a primary that becomes unresponsive.\nrpl_semi_sync_master_timeout = 1000000000000000000\nrpl_semi_sync_master_wait_no_slave = 1\n\n\ncharacter_set_server = utf8\ncollation_server = utf8_general_ci\n\nexpire_logs_days = 3\n\nsync_binlog = 1\nbinlog_format = ROW\nlog_slave_updates\nexpire_logs_days = 3\n\n# In MariaDB the default charset is latin1\n\ncharacter_set_server = utf8\ncollation_server = utf8_general_ci\n"), } file9 := &embedded.EmbeddedFile{ Filename: "mycnf/mariadb103.cnf", - FileModTime: time.Unix(1658182438, 0), + FileModTime: time.Unix(1647748756, 0), Content: string("# This file is auto-included when MariaDB 10.3 is detected.\n\n# enable strict mode so it's safe to compare sequence numbers across different server IDs.\ngtid_strict_mode = 1\ninnodb_stats_persistent = 0\n\n# When semi-sync is enabled, don't allow fallback to async\n# if you get no ack, or have no replicas. This is necessary to\n# prevent alternate futures when doing a failover in response to\n# a primary that becomes unresponsive.\nrpl_semi_sync_master_timeout = 1000000000000000000\nrpl_semi_sync_master_wait_no_slave = 1\n\n\ncharacter_set_server = utf8\ncollation_server = utf8_general_ci\n\nexpire_logs_days = 3\n\nsync_binlog = 1\nbinlog_format = ROW\nlog_slave_updates\nexpire_logs_days = 3\n\n# In MariaDB the default charset is latin1\n\ncharacter_set_server = utf8\ncollation_server = utf8_general_ci\n\n\n"), } filea := &embedded.EmbeddedFile{ Filename: "mycnf/mariadb104.cnf", - FileModTime: time.Unix(1658182438, 0), + FileModTime: time.Unix(1647748756, 0), Content: string("# This file is auto-included when MariaDB 10.4 is detected.\n\n# enable strict mode so it's safe to compare sequence numbers across different server IDs.\ngtid_strict_mode = 1\ninnodb_stats_persistent = 0\n\n# When semi-sync is enabled, don't allow fallback to async\n# if you get no ack, or have no replicas. This is necessary to\n# prevent alternate futures when doing a failover in response to\n# a primary that becomes unresponsive.\nrpl_semi_sync_master_timeout = 1000000000000000000\nrpl_semi_sync_master_wait_no_slave = 1\n\n\ncharacter_set_server = utf8\ncollation_server = utf8_general_ci\n\nexpire_logs_days = 3\n\nsync_binlog = 1\nbinlog_format = ROW\nlog_slave_updates\nexpire_logs_days = 3\n\n# In MariaDB the default charset is latin1\n\ncharacter_set_server = utf8\ncollation_server = utf8_general_ci\n\n\n"), } fileb := &embedded.EmbeddedFile{ Filename: "mycnf/mysql57.cnf", - FileModTime: time.Unix(1658248574, 0), + FileModTime: time.Unix(1658506955, 0), - Content: string("# This file is auto-included when MySQL 5.7 is detected.\n\n# MySQL 5.7 does not enable the binary log by default, and \n# info repositories default to file\n\ngtid_mode = ON\nlog_slave_updates\nenforce_gtid_consistency\nexpire_logs_days = 3\nmaster_info_repository = TABLE\nrelay_log_info_repository = TABLE\nrelay_log_purge = 1\nrelay_log_recovery = 1\n\n# In MySQL 5.7 the default charset is latin1\n\ncharacter_set_server = utf8\ncollation_server = utf8_general_ci\n\n# Semi-sync replication is required for automated unplanned failover\n# (when the primary goes away). Here we just load the plugin so it's\n# available if desired, but it's disabled at startup.\n#\n# If the -enable_semi_sync flag is used, VTTablet will enable semi-sync\n# at the proper time when replication is set up, or when a primary is\n# promoted or demoted.\nplugin-load = rpl_semi_sync_master=semisync_master.so;rpl_semi_sync_slave=semisync_slave.so\n\n# When semi-sync is enabled, don't allow fallback to async\n# if you get no ack, or have no replicas. This is necessary to\n# prevent alternate futures when doing a failover in response to\n# a primary that becomes unresponsive.\nrpl_semi_sync_master_timeout = 1000000000000000000\nrpl_semi_sync_master_wait_no_slave = 1\n\n"), + Content: string("# This file is auto-included when MySQL 5.7 is detected.\n\n# MySQL 5.7 does not enable the binary log by default, and \n# info repositories default to file\n\ngtid_mode = ON\nlog_slave_updates\nenforce_gtid_consistency\nexpire_logs_days = 3\nmaster_info_repository = TABLE\nrelay_log_info_repository = TABLE\nrelay_log_purge = 1\nrelay_log_recovery = 1\n\n# In MySQL 5.7 the default charset is latin1\n\ncharacter_set_server = utf8\ncollation_server = utf8_general_ci\n\n# Semi-sync replication is required for automated unplanned failover\n# (when the primary goes away). Here we just load the plugin so it's\n# available if desired, but it's disabled at startup.\n#\n# VTTablet will enable semi-sync at the proper time when replication is set up,\n# or when a primary is promoted or demoted based on the durability policy configured.\nplugin-load = rpl_semi_sync_master=semisync_master.so;rpl_semi_sync_slave=semisync_slave.so\n\n# When semi-sync is enabled, don't allow fallback to async\n# if you get no ack, or have no replicas. This is necessary to\n# prevent alternate futures when doing a failover in response to\n# a primary that becomes unresponsive.\nrpl_semi_sync_master_timeout = 1000000000000000000\nrpl_semi_sync_master_wait_no_slave = 1\n\n"), } filec := &embedded.EmbeddedFile{ Filename: "mycnf/mysql80.cnf", - FileModTime: time.Unix(1658263780, 0), + FileModTime: time.Unix(1658506955, 0), - Content: string("# This file is auto-included when MySQL 8.0 is detected.\n\n# MySQL 8.0 enables binlog by default with sync_binlog and TABLE info repositories\n# It does not enable GTIDs or enforced GTID consistency\n\ngtid_mode = ON\nenforce_gtid_consistency\nrelay_log_recovery = 1\nbinlog_expire_logs_seconds = 259200\n\n# disable mysqlx\nmysqlx = 0\n\n# 8.0 changes the default auth-plugin to caching_sha2_password\ndefault_authentication_plugin = mysql_native_password\n\n# Semi-sync replication is required for automated unplanned failover\n# (when the primary goes away). Here we just load the plugin so it's\n# available if desired, but it's disabled at startup.\n#\n# If the -enable_semi_sync flag is used, VTTablet will enable semi-sync\n# at the proper time when replication is set up, or when a primary is\n# promoted or demoted.\nplugin-load = rpl_semi_sync_master=semisync_master.so;rpl_semi_sync_slave=semisync_slave.so\n\n# MySQL 8.0 will not load plugins during --initialize\n# which makes these options unknown. Prefixing with --loose\n# tells the server it's fine if they are not understood.\nloose_rpl_semi_sync_master_timeout = 1000000000000000000\nloose_rpl_semi_sync_master_wait_no_slave = 1\n\n# Allow the information_schema table stats to track reality\n# closer than the default of 24 hours. MySQL 8.0 only variable\ninformation_schema_stats_expiry = 3600\n\n"), + Content: string("# This file is auto-included when MySQL 8.0 is detected.\n\n# MySQL 8.0 enables binlog by default with sync_binlog and TABLE info repositories\n# It does not enable GTIDs or enforced GTID consistency\n\ngtid_mode = ON\nenforce_gtid_consistency\nrelay_log_recovery = 1\nbinlog_expire_logs_seconds = 259200\n\n# disable mysqlx\nmysqlx = 0\n\n# 8.0 changes the default auth-plugin to caching_sha2_password\ndefault_authentication_plugin = mysql_native_password\n\n# Semi-sync replication is required for automated unplanned failover\n# (when the primary goes away). Here we just load the plugin so it's\n# available if desired, but it's disabled at startup.\n#\n# VTTablet will enable semi-sync at the proper time when replication is set up,\n# or when a primary is promoted or demoted based on the durability policy configured.\nplugin-load = rpl_semi_sync_master=semisync_master.so;rpl_semi_sync_slave=semisync_slave.so\n\n# MySQL 8.0 will not load plugins during --initialize\n# which makes these options unknown. Prefixing with --loose\n# tells the server it's fine if they are not understood.\nloose_rpl_semi_sync_master_timeout = 1000000000000000000\nloose_rpl_semi_sync_master_wait_no_slave = 1\n\n# Allow the information_schema table stats to track reality\n# closer than the default of 24 hours. MySQL 8.0 only variable\ninformation_schema_stats_expiry = 3600\n\n"), } filed := &embedded.EmbeddedFile{ Filename: "mycnf/sbr.cnf", - FileModTime: time.Unix(1655743820, 0), + FileModTime: time.Unix(1647748756, 0), Content: string("# This file is used to allow legacy tests to pass\n# In theory it should not be required\nbinlog_format=statement\n"), } filee := &embedded.EmbeddedFile{ Filename: "mycnf/test-suite.cnf", - FileModTime: time.Unix(1658182438, 0), + FileModTime: time.Unix(1647748756, 0), Content: string("# This sets some unsafe settings specifically for \n# the test-suite which is currently MySQL 5.7 based\n# In future it should be renamed testsuite.cnf\n\ninnodb_buffer_pool_size = 32M\ninnodb_flush_log_at_trx_commit = 0\ninnodb_log_buffer_size = 1M\ninnodb_log_file_size = 5M\n\n# Native AIO tends to run into aio-max-nr limit during test startup.\ninnodb_use_native_aio = 0\n\nkey_buffer_size = 2M\nsync_binlog=0\ninnodb_doublewrite=0\n\n# These two settings are required for the testsuite to pass, \n# but enabling them does not spark joy. They should be removed\n# in the future. See:\n# https://github.com/vitessio/vitess/issues/5396\n\nsql_mode = STRICT_TRANS_TABLES\n\n# set a short heartbeat interval in order to detect failures quickly\nslave_net_timeout = 4\n"), } fileg := &embedded.EmbeddedFile{ Filename: "orchestrator/default.json", - FileModTime: time.Unix(1655743820, 0), + FileModTime: time.Unix(1647748756, 0), Content: string("{\n \"Debug\": true,\n \"MySQLTopologyUser\": \"orc_client_user\",\n \"MySQLTopologyPassword\": \"orc_client_user_password\",\n \"MySQLReplicaUser\": \"vt_repl\",\n \"MySQLReplicaPassword\": \"\",\n \"RecoveryPeriodBlockSeconds\": 5\n}\n"), } filei := &embedded.EmbeddedFile{ Filename: "tablet/default.yaml", - FileModTime: time.Unix(1658182438, 0), + FileModTime: time.Unix(1647748756, 0), Content: string("tabletID: zone-1234\n\ninit:\n dbName: # init_db_name_override\n keyspace: # init_keyspace\n shard: # init_shard\n tabletType: # init_tablet_type\n timeoutSeconds: 60 # init_timeout\n\ndb:\n socket: # db_socket\n host: # db_host\n port: 0 # db_port\n charSet: # db_charset\n flags: 0 # db_flags\n flavor: # db_flavor\n sslCa: # db_ssl_ca\n sslCaPath: # db_ssl_ca_path\n sslCert: # db_ssl_cert\n sslKey: # db_ssl_key\n serverName: # db_server_name\n connectTimeoutMilliseconds: 0 # db_connect_timeout_ms\n app:\n user: vt_app # db_app_user\n password: # db_app_password\n useSsl: true # db_app_use_ssl\n preferTcp: false\n dba:\n user: vt_dba # db_dba_user\n password: # db_dba_password\n useSsl: true # db_dba_use_ssl\n preferTcp: false\n filtered:\n user: vt_filtered # db_filtered_user\n password: # db_filtered_password\n useSsl: true # db_filtered_use_ssl\n preferTcp: false\n repl:\n user: vt_repl # db_repl_user\n password: # db_repl_password\n useSsl: true # db_repl_use_ssl\n preferTcp: false\n appdebug:\n user: vt_appdebug # db_appdebug_user\n password: # db_appdebug_password\n useSsl: true # db_appdebug_use_ssl\n preferTcp: false\n allprivs:\n user: vt_allprivs # db_allprivs_user\n password: # db_allprivs_password\n useSsl: true # db_allprivs_use_ssl\n preferTcp: false\n\noltpReadPool:\n size: 16 # queryserver-config-pool-size\n timeoutSeconds: 0 # queryserver-config-query-pool-timeout\n idleTimeoutSeconds: 1800 # queryserver-config-idle-timeout\n prefillParallelism: 0 # queryserver-config-pool-prefill-parallelism\n maxWaiters: 50000 # queryserver-config-query-pool-waiter-cap\n\nolapReadPool:\n size: 200 # queryserver-config-stream-pool-size\n timeoutSeconds: 0 # queryserver-config-query-pool-timeout\n idleTimeoutSeconds: 1800 # queryserver-config-idle-timeout\n prefillParallelism: 0 # queryserver-config-stream-pool-prefill-parallelism\n maxWaiters: 0\n\ntxPool:\n size: 20 # queryserver-config-transaction-cap\n timeoutSeconds: 1 # queryserver-config-txpool-timeout\n idleTimeoutSeconds: 1800 # queryserver-config-idle-timeout\n prefillParallelism: 0 # queryserver-config-transaction-prefill-parallelism\n maxWaiters: 50000 # queryserver-config-txpool-waiter-cap\n\noltp:\n queryTimeoutSeconds: 30 # queryserver-config-query-timeout\n txTimeoutSeconds: 30 # queryserver-config-transaction-timeout\n maxRows: 10000 # queryserver-config-max-result-size\n warnRows: 0 # queryserver-config-warn-result-size\n\nhealthcheck:\n intervalSeconds: 20 # health_check_interval\n degradedThresholdSeconds: 30 # degraded_threshold\n unhealthyThresholdSeconds: 7200 # unhealthy_threshold\n\ngracePeriods:\n shutdownSeconds: 0 # shutdown_grace_period\n transitionSeconds: 0 # serving_state_grace_period\n\nreplicationTracker:\n mode: disable # enable_replication_reporter\n heartbeatIntervalMilliseconds: 0 # heartbeat_enable, heartbeat_interval\n\nhotRowProtection:\n mode: disable|dryRun|enable # enable_hot_row_protection, enable_hot_row_protection_dry_run\n # Recommended value: same as txPool.size.\n maxQueueSize: 20 # hot_row_protection_max_queue_size\n maxGlobalQueueSize: 1000 # hot_row_protection_max_global_queue_size\n maxConcurrency: 5 # hot_row_protection_concurrent_transactions\n\nconsolidator: enable|disable|notOnPrimary # enable-consolidator, enable-consolidator-replicas\npassthroughDML: false # queryserver-config-passthrough-dmls\nstreamBufferSize: 32768 # queryserver-config-stream-buffer-size\nqueryCacheSize: 5000 # queryserver-config-query-cache-size\nschemaReloadIntervalSeconds: 1800 # queryserver-config-schema-reload-time\nwatchReplication: false # watch_replication_stream\nterseErrors: false # queryserver-config-terse-errors\nmessagePostponeParallelism: 4 # queryserver-config-message-postpone-cap\ncacheResultFields: true # enable-query-plan-field-caching\n\n\n# The following flags are currently not supported.\n# enforce_strict_trans_tables\n# queryserver-config-strict-table-acl\n# queryserver-config-enable-table-acl-dry-run\n# queryserver-config-acl-exempt-acl\n# enable-tx-throttler\n# tx-throttler-config\n# tx-throttler-healthcheck-cells\n# enable_transaction_limit\n# enable_transaction_limit_dry_run\n# transaction_limit_per_user\n# transaction_limit_by_username\n# transaction_limit_by_principal\n# transaction_limit_by_component\n# transaction_limit_by_subcomponent\n"), } filej := &embedded.EmbeddedFile{ Filename: "zk-client-dev.json", - FileModTime: time.Unix(1655743820, 0), + FileModTime: time.Unix(1647748756, 0), Content: string("{\n \"local\": \"localhost:3863\",\n \"global\": \"localhost:3963\"\n}\n"), } filel := &embedded.EmbeddedFile{ Filename: "zkcfg/zoo.cfg", - FileModTime: time.Unix(1658248574, 0), + FileModTime: time.Unix(1658489960, 0), Content: string("tickTime=2000\ndataDir={{.DataDir}}\nclientPort={{.ClientPort}}\ninitLimit=5\nsyncLimit=2\nmaxClientCnxns=0\n# enable commands like ruok by default\n4lw.commands.whitelist=*\n{{range .Servers}}\nserver.{{.ServerId}}={{.Hostname}}:{{.LeaderPort}}:{{.ElectionPort}}\n{{end}}\n"), } @@ -109,7 +109,7 @@ func init() { // define dirs dir1 := &embedded.EmbeddedDir{ Filename: "", - DirModTime: time.Unix(1658248574, 0), + DirModTime: time.Unix(1658489960, 0), ChildFiles: []*embedded.EmbeddedFile{ file2, // "gomysql.pc.tmpl" file3, // "init_db.sql" @@ -119,7 +119,7 @@ func init() { } dir4 := &embedded.EmbeddedDir{ Filename: "mycnf", - DirModTime: time.Unix(1658263780, 0), + DirModTime: time.Unix(1658506955, 0), ChildFiles: []*embedded.EmbeddedFile{ file5, // "mycnf/default.cnf" file6, // "mycnf/mariadb100.cnf" @@ -136,7 +136,7 @@ func init() { } dirf := &embedded.EmbeddedDir{ Filename: "orchestrator", - DirModTime: time.Unix(1655743820, 0), + DirModTime: time.Unix(1647748756, 0), ChildFiles: []*embedded.EmbeddedFile{ fileg, // "orchestrator/default.json" @@ -144,7 +144,7 @@ func init() { } dirh := &embedded.EmbeddedDir{ Filename: "tablet", - DirModTime: time.Unix(1658182438, 0), + DirModTime: time.Unix(1647748756, 0), ChildFiles: []*embedded.EmbeddedFile{ filei, // "tablet/default.yaml" @@ -152,7 +152,7 @@ func init() { } dirk := &embedded.EmbeddedDir{ Filename: "zkcfg", - DirModTime: time.Unix(1658248574, 0), + DirModTime: time.Unix(1658489960, 0), ChildFiles: []*embedded.EmbeddedFile{ filel, // "zkcfg/zoo.cfg" @@ -175,7 +175,7 @@ func init() { // register embeddedBox embedded.RegisterEmbeddedBox(`../../../config`, &embedded.EmbeddedBox{ Name: `../../../config`, - Time: time.Unix(1658248574, 0), + Time: time.Unix(1658489960, 0), Dirs: map[string]*embedded.EmbeddedDir{ "": dir1, "mycnf": dir4, diff --git a/go/vt/vttablet/tabletmanager/rpc_replication.go b/go/vt/vttablet/tabletmanager/rpc_replication.go index adf1d3555f4..6bedaf25051 100644 --- a/go/vt/vttablet/tabletmanager/rpc_replication.go +++ b/go/vt/vttablet/tabletmanager/rpc_replication.go @@ -39,7 +39,7 @@ import ( ) var ( - enableSemiSync = flag.Bool("enable_semi_sync", false, "Enable semi-sync when configuring replication, on primary and replica tablets only (rdonly tablets will not ack).") + _ = flag.Bool("enable_semi_sync", false, "DEPRECATED - Set the correct durability policy on the keyspace instead.") setSuperReadOnly = flag.Bool("use_super_read_only", false, "Set super_read_only flag when performing planned failover.") ) @@ -993,45 +993,18 @@ func isPrimaryEligible(tabletType topodatapb.TabletType) bool { } func (tm *TabletManager) fixSemiSync(tabletType topodatapb.TabletType, semiSync SemiSyncAction) error { - if !*enableSemiSync { - // Semi-sync handling is not enabled. - if semiSync == SemiSyncActionSet { - log.Error("invalid configuration - semi-sync should be setup according to durability policies, but enable_semi_sync is not set") - } + switch semiSync { + case SemiSyncActionNone: return nil - } - - // Only enable if we're eligible for becoming primary (REPLICA type). - // Ineligible tablets (RDONLY) shouldn't ACK because we'll never promote them. - if !isPrimaryEligible(tabletType) { - if semiSync == SemiSyncActionSet { - log.Error("invalid configuration - semi-sync should be setup according to durability policies, but the tablet is not primaryEligible") - } + case SemiSyncActionSet: + // Always enable replica-side since it doesn't hurt to keep it on for a primary. + // The primary-side needs to be off for a replica, or else it will get stuck. + return tm.MysqlDaemon.SetSemiSyncEnabled(tabletType == topodatapb.TabletType_PRIMARY, true) + case SemiSyncActionUnset: return tm.MysqlDaemon.SetSemiSyncEnabled(false, false) + default: + return vterrors.Errorf(vtrpc.Code_INTERNAL, "Unknown SemiSyncAction - %v", semiSync) } - - if semiSync == SemiSyncActionUnset { - log.Error("invalid configuration - enabling semi sync even though not specified by durability policies. Possibly in the process of upgrading.") - } - // Always enable replica-side since it doesn't hurt to keep it on for a primary. - // The primary-side needs to be off for a replica, or else it will get stuck. - return tm.MysqlDaemon.SetSemiSyncEnabled(tabletType == topodatapb.TabletType_PRIMARY, true) - - // This following code will be uncommented and the above deleted when we are ready to use the - // durability policies for setting the semi_sync information - - //switch semiSync { - //case SemiSyncActionNone: - // return nil - //case SemiSyncActionSet: - // // Always enable replica-side since it doesn't hurt to keep it on for a primary. - // // The primary-side needs to be off for a replica, or else it will get stuck. - // return tm.MysqlDaemon.SetSemiSyncEnabled(tabletType == topodatapb.TabletType_PRIMARY, true) - //case SemiSyncActionUnset: - // return tm.MysqlDaemon.SetSemiSyncEnabled(false, false) - //default: - // return vterrors.Errorf(vtrpc.Code_INTERNAL, "Unknown SemiSyncAction - %v", semiSync) - //} } func (tm *TabletManager) isPrimarySideSemiSyncEnabled() bool { @@ -1040,14 +1013,10 @@ func (tm *TabletManager) isPrimarySideSemiSyncEnabled() bool { } func (tm *TabletManager) fixSemiSyncAndReplication(tabletType topodatapb.TabletType, semiSync SemiSyncAction) error { - if !*enableSemiSync { - // Semi-sync handling is not enabled. + if semiSync == SemiSyncActionNone { + // Semi-sync handling is not required. return nil } - //if semiSync == SemiSyncActionNone { - // // Semi-sync handling is not required. - // return nil - //} if tabletType == topodatapb.TabletType_PRIMARY { // Primary is special. It is always handled at the diff --git a/go/vt/vttablet/tabletmanager/rpc_replication_test.go b/go/vt/vttablet/tabletmanager/rpc_replication_test.go index 93bc8ccfbd2..b37d88518bf 100644 --- a/go/vt/vttablet/tabletmanager/rpc_replication_test.go +++ b/go/vt/vttablet/tabletmanager/rpc_replication_test.go @@ -17,16 +17,11 @@ limitations under the License. package tabletmanager import ( - "bytes" "context" "fmt" - "io" - "os" "testing" "time" - "vitess.io/vitess/go/vt/proto/topodata" - "github.com/stretchr/testify/require" "vitess.io/vitess/go/vt/mysqlctl/fakemysqldaemon" @@ -79,117 +74,3 @@ func TestPromoteReplicaReplicationManagerFailure(t *testing.T) { // At the end we expect the replication manager to be stopped. require.True(t, tm.replManager.ticks.Running()) } - -func captureStderr(f func()) (string, error) { - old := os.Stderr // keep backup of the real stderr - r, w, err := os.Pipe() - if err != nil { - return "", err - } - os.Stderr = w - - outC := make(chan string) - // copy the output in a separate goroutine so printing can't block indefinitely - go func() { - var buf bytes.Buffer - io.Copy(&buf, r) - outC <- buf.String() - }() - - // calling function which stderr we are going to capture: - f() - - // back to normal state - w.Close() - os.Stderr = old // restoring the real stderr - return <-outC, nil -} - -func TestTabletManager_fixSemiSync(t *testing.T) { - tests := []struct { - name string - tabletType topodata.TabletType - semiSync SemiSyncAction - logOutput string - shouldEnableSemiSync bool - }{ - { - name: "enableSemiSync=true(primary eligible),durabilitySemiSync=true", - tabletType: topodata.TabletType_REPLICA, - semiSync: SemiSyncActionSet, - logOutput: "", - shouldEnableSemiSync: true, - }, { - name: "enableSemiSync=true(primary eligible),durabilitySemiSync=false", - tabletType: topodata.TabletType_REPLICA, - semiSync: SemiSyncActionUnset, - logOutput: "invalid configuration - enabling semi sync even though not specified by durability policies.", - shouldEnableSemiSync: true, - }, { - name: "enableSemiSync=true(primary eligible),durabilitySemiSync=none", - tabletType: topodata.TabletType_REPLICA, - semiSync: SemiSyncActionNone, - logOutput: "", - shouldEnableSemiSync: true, - }, { - name: "enableSemiSync=true(primary not-eligible),durabilitySemiSync=true", - tabletType: topodata.TabletType_DRAINED, - semiSync: SemiSyncActionSet, - logOutput: "invalid configuration - semi-sync should be setup according to durability policies, but the tablet is not primaryEligible", - shouldEnableSemiSync: true, - }, { - name: "enableSemiSync=true(primary not-eligible),durabilitySemiSync=false", - tabletType: topodata.TabletType_DRAINED, - semiSync: SemiSyncActionUnset, - logOutput: "", - shouldEnableSemiSync: true, - }, { - name: "enableSemiSync=true(primary not-eligible),durabilitySemiSync=none", - tabletType: topodata.TabletType_DRAINED, - semiSync: SemiSyncActionNone, - logOutput: "", - shouldEnableSemiSync: true, - }, { - name: "enableSemiSync=false,durabilitySemiSync=true", - tabletType: topodata.TabletType_REPLICA, - semiSync: SemiSyncActionSet, - logOutput: "invalid configuration - semi-sync should be setup according to durability policies, but enable_semi_sync is not set", - shouldEnableSemiSync: false, - }, { - name: "enableSemiSync=false,durabilitySemiSync=false", - tabletType: topodata.TabletType_REPLICA, - semiSync: SemiSyncActionUnset, - logOutput: "", - shouldEnableSemiSync: false, - }, { - name: "enableSemiSync=false,durabilitySemiSync=none", - tabletType: topodata.TabletType_REPLICA, - semiSync: SemiSyncActionNone, - logOutput: "", - shouldEnableSemiSync: false, - }, - } - oldEnableSemiSync := *enableSemiSync - defer func() { - *enableSemiSync = oldEnableSemiSync - }() - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - *enableSemiSync = tt.shouldEnableSemiSync - fakeMysql := fakemysqldaemon.NewFakeMysqlDaemon(nil) - tm := &TabletManager{ - MysqlDaemon: fakeMysql, - } - logOutput, err := captureStderr(func() { - err := tm.fixSemiSync(tt.tabletType, tt.semiSync) - require.NoError(t, err) - }) - require.NoError(t, err) - if tt.logOutput != "" { - require.Contains(t, logOutput, tt.logOutput) - } else { - require.Equal(t, "", logOutput) - } - }) - } -} diff --git a/go/vt/wrangler/testlib/semi_sync_test.go b/go/vt/wrangler/testlib/semi_sync_test.go index a9485742be9..bcb16b10836 100644 --- a/go/vt/wrangler/testlib/semi_sync_test.go +++ b/go/vt/wrangler/testlib/semi_sync_test.go @@ -17,7 +17,6 @@ limitations under the License. package testlib import ( - "flag" "testing" "github.com/stretchr/testify/assert" @@ -25,11 +24,6 @@ import ( "vitess.io/vitess/go/vt/topo/topoproto" ) -func init() { - // Enable semi-sync for all testlib tests. - flag.Set("enable_semi_sync", "true") -} - func checkSemiSyncEnabled(t *testing.T, primary, replica bool, tablets ...*FakeTablet) { for _, tablet := range tablets { assert.Equal(t, primary, tablet.FakeMysqlDaemon.SemiSyncPrimaryEnabled, "%v: SemiSyncPrimaryEnabled", topoproto.TabletAliasString(tablet.Tablet.Alias)) diff --git a/vitess-mixin/e2e/external_db/mysql/mysql56.cnf b/vitess-mixin/e2e/external_db/mysql/mysql56.cnf index 7454231c33d..fdd34b1bd2e 100644 --- a/vitess-mixin/e2e/external_db/mysql/mysql56.cnf +++ b/vitess-mixin/e2e/external_db/mysql/mysql56.cnf @@ -19,9 +19,8 @@ innodb_use_native_aio = 0 # (when the master goes away). Here we just load the plugin so it's # available if desired, but it's disabled at startup. # -# If the -enable_semi_sync flag is used, VTTablet will enable semi-sync -# at the proper time when replication is set up, or when masters are -# promoted or demoted. +# VTTablet will enable semi-sync at the proper time when replication is set up, +# or when a primary is promoted or demoted based on the durability policy configured. plugin-load = rpl_semi_sync_master=semisync_master.so;rpl_semi_sync_slave=semisync_slave.so # When semi-sync is enabled, don't allow fallback to async diff --git a/vitess-mixin/e2e/external_db/mysql/mysql57.cnf b/vitess-mixin/e2e/external_db/mysql/mysql57.cnf index 08935674b37..ebf301187eb 100644 --- a/vitess-mixin/e2e/external_db/mysql/mysql57.cnf +++ b/vitess-mixin/e2e/external_db/mysql/mysql57.cnf @@ -21,9 +21,8 @@ collation_server = utf8_general_ci # (when the master goes away). Here we just load the plugin so it's # available if desired, but it's disabled at startup. # -# If the -enable_semi_sync flag is used, VTTablet will enable semi-sync -# at the proper time when replication is set up, or when masters are -# promoted or demoted. +# VTTablet will enable semi-sync at the proper time when replication is set up, +# or when a primary is promoted or demoted based on the durability policy configured. plugin-load = rpl_semi_sync_master=semisync_master.so;rpl_semi_sync_slave=semisync_slave.so # When semi-sync is enabled, don't allow fallback to async diff --git a/vitess-mixin/e2e/vttablet-up.sh b/vitess-mixin/e2e/vttablet-up.sh index 72ac44fcd48..a4bf31a5c42 100755 --- a/vitess-mixin/e2e/vttablet-up.sh +++ b/vitess-mixin/e2e/vttablet-up.sh @@ -150,7 +150,6 @@ exec $VTROOT/bin/vttablet \ --tablet-path $alias \ --tablet_hostname "$vthost" \ --health_check_interval 5s \ - --enable_semi_sync=false \ --disable_active_reparents=true \ --port $web_port \ --grpc_port $grpc_port \