Skip to content

Commit

Permalink
do not demote a new primary after backup completion (#12856)
Browse files Browse the repository at this point in the history
* do not demote a new primary after backup completion

Signed-off-by: Olga Shestopalova <[email protected]>

* move set replication source out of vtctld and into tablet

Signed-off-by: Olga Shestopalova <[email protected]>

* remove set replication source in vtctld

Signed-off-by: Olga Shestopalova <[email protected]>

* fix ctx, and wip test

Signed-off-by: Olga Shestopalova <[email protected]>

* add e2e test instead of unit test

Signed-off-by: Olga Shestopalova <[email protected]>

* use built in methods

Signed-off-by: Olga Shestopalova <[email protected]>

* omit this in test

Signed-off-by: Olga Shestopalova <[email protected]>

* Update go/vt/vttablet/tabletmanager/rpc_backup.go

Co-authored-by: Shlomi Noach <[email protected]>
Signed-off-by: Olga Shestopalova <[email protected]>
Signed-off-by: Olga Shestopalova <[email protected]>

* move wg done to top of func

Signed-off-by: Olga Shestopalova <[email protected]>

* fix method change

Signed-off-by: Olga Shestopalova <[email protected]>
Signed-off-by: <[email protected]>
Signed-off-by: Olga Shestopalova <[email protected]>

* try running this in isolation

Signed-off-by: Olga Shestopalova <[email protected]>

* try running this in isolation, dont run with others

Signed-off-by: Olga Shestopalova <[email protected]>

* add debug logging

Signed-off-by: Olga Shestopalova <[email protected]>

* add debug logging

Signed-off-by: Olga Shestopalova <[email protected]>

* Update go/test/endtoend/backup/vtctlbackup/backup_utils.go

Co-authored-by: Manan Gupta <[email protected]>
Signed-off-by: Olga Shestopalova <[email protected]>

* Update go/test/endtoend/backup/vtctlbackup/backup_utils.go

Co-authored-by: Manan Gupta <[email protected]>
Signed-off-by: Olga Shestopalova <[email protected]>

* run goimports

Signed-off-by: Olga Shestopalova <[email protected]>

* remove dupe

Signed-off-by: Olga Shestopalova <[email protected]>

* remove generated file

Signed-off-by: Olga Shestopalova <[email protected]>

---------

Signed-off-by: Olga Shestopalova <[email protected]>
Signed-off-by: Olga Shestopalova <[email protected]>
Signed-off-by: <[email protected]>
Co-authored-by: Olga Shestopalova <[email protected]>
Co-authored-by: Shlomi Noach <[email protected]>
Co-authored-by: Manan Gupta <[email protected]>
  • Loading branch information
4 people authored May 27, 2023
1 parent dafc82f commit 18ab80e
Show file tree
Hide file tree
Showing 4 changed files with 99 additions and 17 deletions.
54 changes: 51 additions & 3 deletions go/test/endtoend/backup/vtctlbackup/backup_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,15 @@ import (
"os/exec"
"path"
"strings"
"sync"
"syscall"
"testing"
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

"vitess.io/vitess/go/json2"
"vitess.io/vitess/go/mysql"
"vitess.io/vitess/go/sqltypes"
"vitess.io/vitess/go/test/endtoend/cluster"
Expand Down Expand Up @@ -354,6 +356,10 @@ func TestBackup(t *testing.T, setupType int, streamMode string, stripes int, cDe
name: "TestTerminatedRestore",
method: terminatedRestore,
}, //
{
name: "DoNotDemoteNewlyPromotedPrimaryIfReparentingDuringBackup",
method: doNotDemoteNewlyPromotedPrimaryIfReparentingDuringBackup,
}, //
}

defer cluster.PanicHandler(t)
Expand All @@ -369,6 +375,10 @@ func TestBackup(t *testing.T, setupType int, streamMode string, stripes int, cDe
if len(runSpecific) > 0 && !isRegistered(test.name, runSpecific) {
continue
}
// don't run this one unless specified
if len(runSpecific) == 0 && test.name == "DoNotDemoteNewlyPromotedPrimaryIfReparentingDuringBackup" {
continue
}
if retVal := t.Run(test.name, test.method); !retVal {
return vterrors.Errorf(vtrpc.Code_UNKNOWN, "test failure: %s", test.name)
}
Expand Down Expand Up @@ -802,11 +812,11 @@ func terminatedRestore(t *testing.T) {
func checkTabletType(t *testing.T, alias string, tabletType topodata.TabletType) {
// for loop for 15 seconds to check if tablet type is correct
for i := 0; i < 15; i++ {
output, err := localCluster.VtctlclientProcess.ExecuteCommandWithOutput("GetTablet", alias)
output, err := localCluster.VtctldClientProcess.ExecuteCommandWithOutput("GetTablet", alias)
require.Nil(t, err)
var tabletPB topodata.Tablet
err = json.Unmarshal([]byte(output), &tabletPB)
require.Nil(t, err)
err = json2.Unmarshal([]byte(output), &tabletPB)
require.NoError(t, err)
if tabletType == tabletPB.Type {
return
}
Expand All @@ -815,6 +825,44 @@ func checkTabletType(t *testing.T, alias string, tabletType topodata.TabletType)
require.Failf(t, "checkTabletType failed.", "Tablet type is not correct. Expected: %v", tabletType)
}

func doNotDemoteNewlyPromotedPrimaryIfReparentingDuringBackup(t *testing.T) {
var wg sync.WaitGroup
wg.Add(2)

// Start the backup on a replica
go func() {
defer wg.Done()
// ensure this is a primary first
checkTabletType(t, primary.Alias, topodata.TabletType_PRIMARY)

// now backup
err := localCluster.VtctlclientProcess.ExecuteCommand("Backup", replica1.Alias)
require.Nil(t, err)
}()

// Perform a graceful reparent operation
go func() {
defer wg.Done()
// ensure this is a primary first
checkTabletType(t, primary.Alias, topodata.TabletType_PRIMARY)

// now reparent
_, err := localCluster.VtctlclientProcess.ExecuteCommandWithOutput(
"PlannedReparentShard", "--",
"--keyspace_shard", fmt.Sprintf("%s/%s", keyspaceName, shardName),
"--new_primary", replica1.Alias)
require.Nil(t, err)

// check that we reparented
checkTabletType(t, replica1.Alias, topodata.TabletType_PRIMARY)
}()

wg.Wait()

// check that this is still a primary
checkTabletType(t, replica1.Alias, topodata.TabletType_PRIMARY)
}

// test_backup will:
// - create a shard with primary and replica1 only
// - run InitShardPrimary
Expand Down
4 changes: 4 additions & 0 deletions go/test/endtoend/backup/xtrabackup/xtrabackup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@ func TestXtrabackupWithExternalZstdCompressionAndManifestedDecompressor(t *testi
backup.TestBackup(t, backup.XtraBackup, "tar", 0, cDetails, []string{"TestReplicaBackup"})
}

func TestDoNotDemoteNewlyPromotedPrimaryIfReparentingDuringBackup(t *testing.T) {
backup.TestBackup(t, backup.XtraBackup, "xbstream", 0, nil, []string{"DoNotDemoteNewlyPromotedPrimaryIfReparentingDuringBackup"})
}

func setDefaultCompressionFlag() {
mysqlctl.CompressionEngineName = "pgzip"
mysqlctl.ExternalCompressorCmd = ""
Expand Down
15 changes: 1 addition & 14 deletions go/vt/vtctl/grpcvtctldserver/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -489,20 +489,7 @@ func (s *VtctldServer) backupTablet(ctx context.Context, tablet *topodatapb.Tabl
logger.Errorf("failed to send stream response %+v: %v", resp, err)
}
case io.EOF:
// Do not do anything for primary tablets and when active reparenting is disabled
if mysqlctl.DisableActiveReparents || tablet.Type == topodatapb.TabletType_PRIMARY {
return nil
}

// Otherwise we find the correct primary tablet and set the replication source,
// since the primary could have changed while we executed the backup which can
// also affect whether we want to send semi sync acks or not.
tabletInfo, err := s.ts.GetTablet(ctx, tablet.Alias)
if err != nil {
return err
}

return reparentutil.SetReplicationSource(ctx, s.ts, s.tmc, tabletInfo.Tablet)
return nil
default:
return err
}
Expand Down
43 changes: 43 additions & 0 deletions go/vt/vttablet/tabletmanager/rpc_backup.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ import (
"fmt"
"time"

"vitess.io/vitess/go/vt/topotools"
"vitess.io/vitess/go/vt/vtctl/reparentutil"

"vitess.io/vitess/go/vt/logutil"
"vitess.io/vitess/go/vt/mysqlctl"
"vitess.io/vitess/go/vt/mysqlctl/backupstats"
Expand Down Expand Up @@ -100,6 +103,46 @@ func (tm *TabletManager) Backup(ctx context.Context, logger logutil.Logger, req
// Original type could be primary so pass in a real value for PrimaryTermStartTime
if err := tm.changeTypeLocked(bgCtx, originalType, DBActionNone, SemiSyncActionNone); err != nil {
l.Errorf("Failed to change tablet type from %v to %v, error: %v", topodatapb.TabletType_BACKUP, originalType, err)
return
}

// Find the correct primary tablet and set the replication source,
// since the primary could have changed while we executed the backup which can
// also affect whether we want to send semi sync acks or not.
tabletInfo, err := tm.TopoServer.GetTablet(bgCtx, tablet.Alias)
if err != nil {
l.Errorf("Failed to fetch updated tablet info, error: %v", err)
return
}

// Do not do anything for primary tablets or when active reparenting is disabled
if mysqlctl.DisableActiveReparents || tabletInfo.Type == topodatapb.TabletType_PRIMARY {
return
}

shardPrimary, err := topotools.GetShardPrimaryForTablet(bgCtx, tm.TopoServer, tablet.Tablet)
if err != nil {
return
}

durabilityName, err := tm.TopoServer.GetKeyspaceDurability(bgCtx, tablet.Keyspace)
if err != nil {
l.Errorf("Failed to get durability policy, error: %v", err)
return
}
durability, err := reparentutil.GetDurabilityPolicy(durabilityName)
if err != nil {
l.Errorf("Failed to get durability with name %v, error: %v", durabilityName, err)
}

isSemiSync := reparentutil.IsReplicaSemiSync(durability, shardPrimary.Tablet, tabletInfo.Tablet)
semiSyncAction, err := tm.convertBoolToSemiSyncAction(isSemiSync)
if err != nil {
l.Errorf("Failed to convert bool to semisync action, error: %v", err)
return
}
if err := tm.setReplicationSourceLocked(bgCtx, shardPrimary.Alias, 0, "", false, semiSyncAction); err != nil {
l.Errorf("Failed to set replication source, error: %v", err)
}
}()
}
Expand Down

0 comments on commit 18ab80e

Please sign in to comment.