From 5147e9f7e660c8dbba8acf8df320a7ea2e6b3102 Mon Sep 17 00:00:00 2001
From: David Weitzman <dweitzman@pinterest.com>
Date: Wed, 12 Dec 2018 16:13:15 -0800
Subject: [PATCH 1/2] Don't freeze with no serving shards if reversing
 vreplication fails during MigrateServedTypes

Signed-off-by: David Weitzman <dweitzman@pinterest.com>
---
 go/vt/wrangler/keyspace.go | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/go/vt/wrangler/keyspace.go b/go/vt/wrangler/keyspace.go
index 1e5cd10200a..29a64533b55 100644
--- a/go/vt/wrangler/keyspace.go
+++ b/go/vt/wrangler/keyspace.go
@@ -29,14 +29,13 @@ import (
 	"vitess.io/vitess/go/vt/binlog/binlogplayer"
 	"vitess.io/vitess/go/vt/concurrency"
 	"vitess.io/vitess/go/vt/discovery"
+	binlogdatapb "vitess.io/vitess/go/vt/proto/binlogdata"
+	topodatapb "vitess.io/vitess/go/vt/proto/topodata"
+	vschemapb "vitess.io/vitess/go/vt/proto/vschema"
 	"vitess.io/vitess/go/vt/topo"
 	"vitess.io/vitess/go/vt/topo/topoproto"
 	"vitess.io/vitess/go/vt/topotools"
 	"vitess.io/vitess/go/vt/topotools/events"
-
-	binlogdatapb "vitess.io/vitess/go/vt/proto/binlogdata"
-	topodatapb "vitess.io/vitess/go/vt/proto/topodata"
-	vschemapb "vitess.io/vitess/go/vt/proto/vschema"
 )
 
 const (
@@ -553,6 +552,13 @@ func (wr *Wrangler) masterMigrateServedType(ctx context.Context, keyspace string
 	// Always setup reverse replication. We'll start it later if reverseReplication was specified.
 	// This will allow someone to reverse the replication later if they change their mind.
 	if err := wr.setupReverseReplication(ctx, sourceShards, destinationShards); err != nil {
+		// It's safe to unfreeze if reverse replication setup fails.
+		wr.cancelMasterMigrateServedTypes(ctx, sourceShards)
+		unfreezeErr := wr.updateFrozenFlag(ctx, sourceShards, false)
+		if unfreezeErr != nil {
+			wr.Logger().Errorf("Problem recovering for failed reverse replication: %v", unfreezeErr)
+		}
+
 		return err
 	}
 

From 96e7c5ddbac7fb29cbd0c6e978dd9c2fdd07f0ed Mon Sep 17 00:00:00 2001
From: David Weitzman <dweitzman@pinterest.com>
Date: Fri, 4 Jan 2019 10:47:37 -0800
Subject: [PATCH 2/2] Delete orphan SourceShard records for a failed master
 migration with reverse replication enabled

Signed-off-by: David Weitzman <dweitzman@pinterest.com>
---
 go/vt/wrangler/keyspace.go | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/go/vt/wrangler/keyspace.go b/go/vt/wrangler/keyspace.go
index 29a64533b55..a98a467feef 100644
--- a/go/vt/wrangler/keyspace.go
+++ b/go/vt/wrangler/keyspace.go
@@ -474,13 +474,13 @@ func (wr *Wrangler) replicaMigrateServedType(ctx context.Context, keyspace strin
 	// Check and update all source shard records.
 	// Enable query service if needed
 	event.DispatchUpdate(ev, "updating shards to migrate from")
-	if err = wr.updateShardRecords(ctx, fromShards, cells, servedType, true); err != nil {
+	if err = wr.updateShardRecords(ctx, fromShards, cells, servedType, true, false); err != nil {
 		return err
 	}
 
 	// Do the same for destination shards
 	event.DispatchUpdate(ev, "updating shards to migrate to")
-	if err = wr.updateShardRecords(ctx, toShards, cells, servedType, false); err != nil {
+	if err = wr.updateShardRecords(ctx, toShards, cells, servedType, false, false); err != nil {
 		return err
 	}
 
@@ -520,7 +520,7 @@ func (wr *Wrangler) masterMigrateServedType(ctx context.Context, keyspace string
 	// - wait for filtered replication to catch up
 	// - mark source shards as frozen
 	event.DispatchUpdate(ev, "disabling query service on all source masters")
-	if err := wr.updateShardRecords(ctx, sourceShards, nil, topodatapb.TabletType_MASTER, true); err != nil {
+	if err := wr.updateShardRecords(ctx, sourceShards, nil, topodatapb.TabletType_MASTER, true, false); err != nil {
 		wr.cancelMasterMigrateServedTypes(ctx, sourceShards)
 		return err
 	}
@@ -612,7 +612,7 @@ func (wr *Wrangler) masterMigrateServedType(ctx context.Context, keyspace string
 }
 
 func (wr *Wrangler) cancelMasterMigrateServedTypes(ctx context.Context, sourceShards []*topo.ShardInfo) {
-	if err := wr.updateShardRecords(ctx, sourceShards, nil, topodatapb.TabletType_MASTER, false); err != nil {
+	if err := wr.updateShardRecords(ctx, sourceShards, nil, topodatapb.TabletType_MASTER, false, true); err != nil {
 		wr.Logger().Errorf2(err, "failed to re-enable source masters")
 		return
 	}
@@ -698,9 +698,12 @@ func (wr *Wrangler) startReverseReplication(ctx context.Context, sourceShards []
 }
 
 // updateShardRecords updates the shard records based on 'from' or 'to' direction.
-func (wr *Wrangler) updateShardRecords(ctx context.Context, shards []*topo.ShardInfo, cells []string, servedType topodatapb.TabletType, isFrom bool) (err error) {
+func (wr *Wrangler) updateShardRecords(ctx context.Context, shards []*topo.ShardInfo, cells []string, servedType topodatapb.TabletType, isFrom bool, clearSourceShards bool) (err error) {
 	for i, si := range shards {
 		shards[i], err = wr.ts.UpdateShardFields(ctx, si.Keyspace(), si.ShardName(), func(si *topo.ShardInfo) error {
+			if clearSourceShards {
+				si.SourceShards = nil
+			}
 			if err := si.UpdateServedTypesMap(servedType, cells, isFrom /* remove */); err != nil {
 				return err
 			}