From 5147e9f7e660c8dbba8acf8df320a7ea2e6b3102 Mon Sep 17 00:00:00 2001 From: David Weitzman Date: Wed, 12 Dec 2018 16:13:15 -0800 Subject: [PATCH 1/2] Don't freeze with no serving shards if reversing vreplication fails during MigrateServedTypes Signed-off-by: David Weitzman --- go/vt/wrangler/keyspace.go | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/go/vt/wrangler/keyspace.go b/go/vt/wrangler/keyspace.go index 1e5cd10200a..29a64533b55 100644 --- a/go/vt/wrangler/keyspace.go +++ b/go/vt/wrangler/keyspace.go @@ -29,14 +29,13 @@ import ( "vitess.io/vitess/go/vt/binlog/binlogplayer" "vitess.io/vitess/go/vt/concurrency" "vitess.io/vitess/go/vt/discovery" + binlogdatapb "vitess.io/vitess/go/vt/proto/binlogdata" + topodatapb "vitess.io/vitess/go/vt/proto/topodata" + vschemapb "vitess.io/vitess/go/vt/proto/vschema" "vitess.io/vitess/go/vt/topo" "vitess.io/vitess/go/vt/topo/topoproto" "vitess.io/vitess/go/vt/topotools" "vitess.io/vitess/go/vt/topotools/events" - - binlogdatapb "vitess.io/vitess/go/vt/proto/binlogdata" - topodatapb "vitess.io/vitess/go/vt/proto/topodata" - vschemapb "vitess.io/vitess/go/vt/proto/vschema" ) const ( @@ -553,6 +552,13 @@ func (wr *Wrangler) masterMigrateServedType(ctx context.Context, keyspace string // Always setup reverse replication. We'll start it later if reverseReplication was specified. // This will allow someone to reverse the replication later if they change their mind. if err := wr.setupReverseReplication(ctx, sourceShards, destinationShards); err != nil { + // It's safe to unfreeze if reverse replication setup fails. + wr.cancelMasterMigrateServedTypes(ctx, sourceShards) + unfreezeErr := wr.updateFrozenFlag(ctx, sourceShards, false) + if unfreezeErr != nil { + wr.Logger().Errorf("Problem recovering for failed reverse replication: %v", unfreezeErr) + } + return err } From 96e7c5ddbac7fb29cbd0c6e978dd9c2fdd07f0ed Mon Sep 17 00:00:00 2001 From: David Weitzman Date: Fri, 4 Jan 2019 10:47:37 -0800 Subject: [PATCH 2/2] Delete orphan SourceShard records for a failed master migration with reverse replication enabled Signed-off-by: David Weitzman --- go/vt/wrangler/keyspace.go | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/go/vt/wrangler/keyspace.go b/go/vt/wrangler/keyspace.go index 29a64533b55..a98a467feef 100644 --- a/go/vt/wrangler/keyspace.go +++ b/go/vt/wrangler/keyspace.go @@ -474,13 +474,13 @@ func (wr *Wrangler) replicaMigrateServedType(ctx context.Context, keyspace strin // Check and update all source shard records. // Enable query service if needed event.DispatchUpdate(ev, "updating shards to migrate from") - if err = wr.updateShardRecords(ctx, fromShards, cells, servedType, true); err != nil { + if err = wr.updateShardRecords(ctx, fromShards, cells, servedType, true, false); err != nil { return err } // Do the same for destination shards event.DispatchUpdate(ev, "updating shards to migrate to") - if err = wr.updateShardRecords(ctx, toShards, cells, servedType, false); err != nil { + if err = wr.updateShardRecords(ctx, toShards, cells, servedType, false, false); err != nil { return err } @@ -520,7 +520,7 @@ func (wr *Wrangler) masterMigrateServedType(ctx context.Context, keyspace string // - wait for filtered replication to catch up // - mark source shards as frozen event.DispatchUpdate(ev, "disabling query service on all source masters") - if err := wr.updateShardRecords(ctx, sourceShards, nil, topodatapb.TabletType_MASTER, true); err != nil { + if err := wr.updateShardRecords(ctx, sourceShards, nil, topodatapb.TabletType_MASTER, true, false); err != nil { wr.cancelMasterMigrateServedTypes(ctx, sourceShards) return err } @@ -612,7 +612,7 @@ func (wr *Wrangler) masterMigrateServedType(ctx context.Context, keyspace string } func (wr *Wrangler) cancelMasterMigrateServedTypes(ctx context.Context, sourceShards []*topo.ShardInfo) { - if err := wr.updateShardRecords(ctx, sourceShards, nil, topodatapb.TabletType_MASTER, false); err != nil { + if err := wr.updateShardRecords(ctx, sourceShards, nil, topodatapb.TabletType_MASTER, false, true); err != nil { wr.Logger().Errorf2(err, "failed to re-enable source masters") return } @@ -698,9 +698,12 @@ func (wr *Wrangler) startReverseReplication(ctx context.Context, sourceShards [] } // updateShardRecords updates the shard records based on 'from' or 'to' direction. -func (wr *Wrangler) updateShardRecords(ctx context.Context, shards []*topo.ShardInfo, cells []string, servedType topodatapb.TabletType, isFrom bool) (err error) { +func (wr *Wrangler) updateShardRecords(ctx context.Context, shards []*topo.ShardInfo, cells []string, servedType topodatapb.TabletType, isFrom bool, clearSourceShards bool) (err error) { for i, si := range shards { shards[i], err = wr.ts.UpdateShardFields(ctx, si.Keyspace(), si.ShardName(), func(si *topo.ShardInfo) error { + if clearSourceShards { + si.SourceShards = nil + } if err := si.UpdateServedTypesMap(servedType, cells, isFrom /* remove */); err != nil { return err }