diff --git a/go.mod b/go.mod index ef433a786c5..f841401468c 100644 --- a/go.mod +++ b/go.mod @@ -54,6 +54,7 @@ require ( github.com/klauspost/pgzip v1.2.4 github.com/konsorten/go-windows-terminal-sequences v1.0.2 // indirect github.com/krishicks/yaml-patch v0.0.10 + github.com/looplab/fsm v0.2.0 github.com/magiconair/properties v1.8.1 github.com/martini-contrib/auth v0.0.0-20150219114609-fa62c19b7ae8 github.com/martini-contrib/gzip v0.0.0-20151124214156-6c035326b43f diff --git a/go.sum b/go.sum index d2c3874432b..78b596778af 100644 --- a/go.sum +++ b/go.sum @@ -431,6 +431,8 @@ github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/krishicks/yaml-patch v0.0.10 h1:H4FcHpnNwVmw8u0MjPRjWyIXtco6zM2F78t+57oNM3E= github.com/krishicks/yaml-patch v0.0.10/go.mod h1:Sm5TchwZS6sm7RJoyg87tzxm2ZcKzdRE4Q7TjNhPrME= +github.com/looplab/fsm v0.2.0 h1:M8hf5EF4AYLcT1FNKVUX8nu7D0xfp291iGeuigSxfrw= +github.com/looplab/fsm v0.2.0/go.mod h1:p+IElwgCnAByqr2DWMuNbPjgMwqcHvTRZZn3dvKEke0= github.com/magiconair/properties v1.8.0 h1:LLgXmsheXeRoUOBOjtwPQCWIYqM/LU1ayDtDePerRcY= github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= github.com/magiconair/properties v1.8.1 h1:ZC2Vc7/ZFkGmsVC9KvOjumD+G5lXy2RtTKyzRKO2BQ4= diff --git a/go/cmd/vtctlclient/main.go b/go/cmd/vtctlclient/main.go index 339be1ac84d..1c311518f79 100644 --- a/go/cmd/vtctlclient/main.go +++ b/go/cmd/vtctlclient/main.go @@ -19,7 +19,9 @@ package main import ( "errors" "flag" + "fmt" "os" + "strings" "time" "golang.org/x/net/context" @@ -64,6 +66,8 @@ func main() { logutil.LogEvent(logger, e) }) if err != nil { + errStr := strings.Replace(err.Error(), "remote error: ", "", -1) + fmt.Printf("%s Error: %s\n", flag.Arg(0), errStr) log.Error(err) os.Exit(1) } diff --git a/go/test/endtoend/vreplication/cluster.go b/go/test/endtoend/vreplication/cluster.go index 7dec3d56a2c..e1e2b8820bd 100644 --- a/go/test/endtoend/vreplication/cluster.go +++ b/go/test/endtoend/vreplication/cluster.go @@ -21,7 +21,8 @@ import ( ) var ( - debug = false // set to true to always use local env vtdataroot for local debugging + debug = false // set to true to always use local env vtdataroot for local debugging + originalVtdataroot string vtdataroot string ) diff --git a/go/test/endtoend/vreplication/config.go b/go/test/endtoend/vreplication/config.go index ee5dbc349d2..d937b7a4948 100644 --- a/go/test/endtoend/vreplication/config.go +++ b/go/test/endtoend/vreplication/config.go @@ -4,22 +4,28 @@ var ( initialProductSchema = ` create table product(pid int, description varbinary(128), primary key(pid)); create table customer(cid int, name varbinary(128), typ enum('individual','soho','enterprise'), sport set('football','cricket','baseball'),ts timestamp not null default current_timestamp, primary key(cid)); +create table customer_seq(id int, next_id bigint, cache bigint, primary key(id)) comment 'vitess_sequence'; create table merchant(mname varchar(128), category varchar(128), primary key(mname)) DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; create table orders(oid int, cid int, pid int, mname varchar(128), price int, primary key(oid)); -create table customer_seq(id int, next_id bigint, cache bigint, primary key(id)) comment 'vitess_sequence'; create table order_seq(id int, next_id bigint, cache bigint, primary key(id)) comment 'vitess_sequence'; +create table customer2(cid int, name varbinary(128), typ enum('individual','soho','enterprise'), sport set('football','cricket','baseball'),ts timestamp not null default current_timestamp, primary key(cid)); +create table customer_seq2(id int, next_id bigint, cache bigint, primary key(id)) comment 'vitess_sequence'; ` initialProductVSchema = ` { "tables": { "product": {}, - "customer": {}, "merchant": {}, "orders": {}, + "customer": {}, "customer_seq": { "type": "sequence" }, + "customer2": {}, + "customer_seq2": { + "type": "sequence" + }, "order_seq": { "type": "sequence" } @@ -47,6 +53,18 @@ create table order_seq(id int, next_id bigint, cache bigint, primary key(id)) co "column": "cid", "sequence": "customer_seq" } + }, + "customer2": { + "column_vindexes": [ + { + "column": "cid", + "name": "reverse_bits" + } + ], + "auto_increment": { + "column": "cid", + "sequence": "customer_seq2" + } } } diff --git a/go/test/endtoend/vreplication/helper.go b/go/test/endtoend/vreplication/helper.go index 31856bc46bf..5c0794b0707 100644 --- a/go/test/endtoend/vreplication/helper.go +++ b/go/test/endtoend/vreplication/helper.go @@ -10,9 +10,8 @@ import ( "strings" "testing" - "github.com/stretchr/testify/require" - "github.com/buger/jsonparser" + "github.com/stretchr/testify/require" "vitess.io/vitess/go/test/endtoend/cluster" @@ -145,13 +144,16 @@ func getQueryCount(url string, query string) int { if len(row) != len(headings) { continue } + filterChars := []string{"_", "`"} //Queries seem to include non-printable characters at times and hence equality fails unless these are removed re := regexp.MustCompile("[[:^ascii:]]") foundQuery := re.ReplaceAllLiteralString(row[queryIndex], "") - foundQuery = strings.ReplaceAll(foundQuery, "_", "") cleanQuery := re.ReplaceAllLiteralString(query, "") - cleanQuery = strings.ReplaceAll(cleanQuery, "_", "") - if foundQuery == cleanQuery { + for _, filterChar := range filterChars { + foundQuery = strings.ReplaceAll(foundQuery, filterChar, "") + cleanQuery = strings.ReplaceAll(cleanQuery, filterChar, "") + } + if foundQuery == cleanQuery || strings.Contains(foundQuery, cleanQuery) { count, _ = strconv.Atoi(row[countIndex]) } } @@ -239,3 +241,20 @@ func printShardPositions(vc *VitessCluster, ksShards []string) { } } } + +func clearRoutingRules(t *testing.T, vc *VitessCluster) error { + if _, err := vc.VtctlClient.ExecuteCommandWithOutput("ApplyRoutingRules", "-rules={}"); err != nil { + return err + } + return nil +} + +func printRoutingRules(t *testing.T, vc *VitessCluster, msg string) error { + var output string + var err error + if output, err = vc.VtctlClient.ExecuteCommandWithOutput("GetRoutingRules"); err != nil { + return err + } + fmt.Printf("Routing Rules::%s:\n%s\n", msg, output) + return nil +} diff --git a/go/test/endtoend/vreplication/resharding_workflows_v2_test.go b/go/test/endtoend/vreplication/resharding_workflows_v2_test.go new file mode 100644 index 00000000000..86e88fc08ff --- /dev/null +++ b/go/test/endtoend/vreplication/resharding_workflows_v2_test.go @@ -0,0 +1,563 @@ +/* +Copyright 2020 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package vreplication + +import ( + "fmt" + "strings" + "testing" + "time" + + "vitess.io/vitess/go/vt/log" + + "vitess.io/vitess/go/vt/wrangler" + + "github.com/stretchr/testify/require" + "vitess.io/vitess/go/test/endtoend/cluster" +) + +const ( + workflowName = "wf1" + sourceKs = "product" + targetKs = "customer" + ksWorkflow = targetKs + "." + workflowName + reverseKsWorkflow = sourceKs + "." + workflowName + "_reverse" + tablesToMove = "customer" + defaultCellName = "zone1" + readQuery = "select * from customer" +) + +const ( + workflowActionStart = "Start" + workflowActionSwitchTraffic = "SwitchTraffic" + workflowActionReverseTraffic = "ReverseTraffic" + workflowActionComplete = "Complete" + workflowActionAbort = "Abort" +) + +var ( + targetTab1, targetTab2, targetReplicaTab1 *cluster.VttabletProcess + sourceReplicaTab, sourceTab *cluster.VttabletProcess + + lastOutput string + currentWorkflowType wrangler.VReplicationWorkflowType +) + +func reshard2Start(t *testing.T, sourceShards, targetShards string) error { + err := tstWorkflowExec(t, defaultCellName, workflowName, targetKs, targetKs, + "", workflowActionStart, "", sourceShards, targetShards) + require.NoError(t, err) + time.Sleep(1 * time.Second) + catchup(t, targetTab1, workflowName, "Reshard") + catchup(t, targetTab2, workflowName, "Reshard") + vdiff(t, ksWorkflow) + return nil +} + +func moveTables2Start(t *testing.T, tables string) error { + if tables == "" { + tables = tablesToMove + } + err := tstWorkflowExec(t, defaultCellName, workflowName, sourceKs, targetKs, + tables, workflowActionStart, "", "", "") + require.NoError(t, err) + catchup(t, targetTab1, workflowName, "MoveTables") + catchup(t, targetTab2, workflowName, "MoveTables") + time.Sleep(1 * time.Second) + vdiff(t, ksWorkflow) + return nil +} + +func tstWorkflowAction(t *testing.T, action, tabletTypes, cells string) error { + return tstWorkflowExec(t, cells, workflowName, sourceKs, targetKs, tablesToMove, action, tabletTypes, "", "") +} + +func tstWorkflowExec(t *testing.T, cells, workflow, sourceKs, targetKs, tables, action, tabletTypes, sourceShards, targetShards string) error { + var args []string + if currentWorkflowType == wrangler.MoveTablesWorkflow { + args = append(args, "MoveTables") + } else { + args = append(args, "Reshard") + } + args = append(args, "-v2") + switch action { + case workflowActionStart: + if currentWorkflowType == wrangler.MoveTablesWorkflow { + args = append(args, "-source", sourceKs, "-tables", tables) + } else { + args = append(args, "-source_shards", sourceShards, "-target_shards", targetShards) + } + } + if cells != "" { + args = append(args, "-cells", cells) + } + if tabletTypes != "" { + args = append(args, "-tablet_types", tabletTypes) + } + ksWorkflow := fmt.Sprintf("%s.%s", targetKs, workflow) + args = append(args, action, ksWorkflow) + output, err := vc.VtctlClient.ExecuteCommandWithOutput(args...) + lastOutput = output + if err != nil { + return fmt.Errorf("%s: %s", err, output) + } + fmt.Printf("----------\n%+v\n%s----------\n", args, output) + return nil +} + +func tstWorkflowSwitchReads(t *testing.T, tabletTypes, cells string) { + if tabletTypes == "" { + tabletTypes = "replica,rdonly" + } + require.NoError(t, tstWorkflowAction(t, workflowActionSwitchTraffic, tabletTypes, cells)) +} + +func tstWorkflowReverseReads(t *testing.T, tabletTypes, cells string) { + if tabletTypes == "" { + tabletTypes = "replica,rdonly" + } + require.NoError(t, tstWorkflowAction(t, workflowActionReverseTraffic, tabletTypes, cells)) +} + +func tstWorkflowSwitchWrites(t *testing.T) { + require.NoError(t, tstWorkflowAction(t, workflowActionSwitchTraffic, "master", "")) +} + +func tstWorkflowReverseWrites(t *testing.T) { + require.NoError(t, tstWorkflowAction(t, workflowActionReverseTraffic, "master", "")) +} + +func tstWorkflowSwitchReadsAndWrites(t *testing.T) { + require.NoError(t, tstWorkflowAction(t, workflowActionSwitchTraffic, "replica,rdonly,master", "")) +} + +func tstWorkflowReverseReadsAndWrites(t *testing.T) { + require.NoError(t, tstWorkflowAction(t, workflowActionReverseTraffic, "replica,rdonly,master", "")) +} + +func tstWorkflowComplete(t *testing.T) error { + return tstWorkflowAction(t, workflowActionComplete, "", "") +} + +func tstWorkflowAbort(t *testing.T) error { + return tstWorkflowAction(t, workflowActionAbort, "", "") +} + +func validateReadsRoute(t *testing.T, tabletTypes string, tablet *cluster.VttabletProcess) { + if tabletTypes == "" { + tabletTypes = "replica,rdonly" + } + for _, tt := range []string{"replica", "rdonly"} { + destination := fmt.Sprintf("%s:%s@%s", tablet.Keyspace, tablet.Shard, tt) + if strings.Contains(tabletTypes, tt) { + require.True(t, validateThatQueryExecutesOnTablet(t, vtgateConn, tablet, destination, readQuery, readQuery)) + } + } +} + +func validateReadsRouteToSource(t *testing.T, tabletTypes string) { + validateReadsRoute(t, tabletTypes, sourceReplicaTab) +} + +func validateReadsRouteToTarget(t *testing.T, tabletTypes string) { + validateReadsRoute(t, tabletTypes, targetReplicaTab1) +} + +func validateWritesRouteToSource(t *testing.T) { + insertQuery := "insert into customer(name, cid) values('tempCustomer2', 200)" + matchInsertQuery := "insert into customer(name, cid) values" + require.True(t, validateThatQueryExecutesOnTablet(t, vtgateConn, sourceTab, "customer", insertQuery, matchInsertQuery)) + execVtgateQuery(t, vtgateConn, "customer", "delete from customer where cid > 100") +} + +func validateWritesRouteToTarget(t *testing.T) { + insertQuery := "insert into customer(name, cid) values('tempCustomer3', 101)" + matchInsertQuery := "insert into customer(name, cid) values" + require.True(t, validateThatQueryExecutesOnTablet(t, vtgateConn, targetTab2, "customer", insertQuery, matchInsertQuery)) + insertQuery = "insert into customer(name, cid) values('tempCustomer3', 102)" + require.True(t, validateThatQueryExecutesOnTablet(t, vtgateConn, targetTab1, "customer", insertQuery, matchInsertQuery)) + execVtgateQuery(t, vtgateConn, "customer", "delete from customer where cid > 100") +} + +func revert(t *testing.T) { + switchWrites(t, reverseKsWorkflow, false) + validateWritesRouteToSource(t) + switchReadsNew(t, allCellNames, ksWorkflow, true) + validateReadsRouteToSource(t, "replica") + queries := []string{ + "delete from _vt.vreplication", + "delete from _vt.resharding_journal", + } + + for _, query := range queries { + targetTab1.QueryTablet(query, "customer", true) + targetTab2.QueryTablet(query, "customer", true) + sourceTab.QueryTablet(query, "product", true) + } + targetTab1.QueryTablet("drop table vt_customer.customer", "customer", true) + targetTab2.QueryTablet("drop table vt_customer.customer", "customer", true) + + clearRoutingRules(t, vc) +} + +func checkStates(t *testing.T, startState, endState string) { + require.Contains(t, lastOutput, fmt.Sprintf("Start State: %s", startState)) + require.Contains(t, lastOutput, fmt.Sprintf("Current State: %s", endState)) +} + +func getCurrentState(t *testing.T) string { + if err := tstWorkflowAction(t, "GetState", "", ""); err != nil { + return err.Error() + } + return strings.TrimSpace(strings.Trim(lastOutput, "\n")) +} + +// ideally this should be broken up into multiple tests for full flow, replica/rdonly flow, reverse flows etc +// but CI currently fails on creating multiple clusters even after the previous ones are torn down + +func TestBasicV2Workflows(t *testing.T) { + vc = setupCluster(t) + defer vtgateConn.Close() + //defer vc.TearDown() + + testMoveTablesV2Workflow(t) + testReshardV2Workflow(t) + log.Flush() +} + +func testReshardV2Workflow(t *testing.T) { + currentWorkflowType = wrangler.ReshardWorkflow + + createAdditionalCustomerShards(t, "-40,40-80,80-c0,c0-") + reshard2Start(t, "-80,80-", "-40,40-80,80-c0,c0-") + + checkStates(t, wrangler.WorkflowStateNotStarted, wrangler.WorkflowStateNotSwitched) + validateReadsRouteToSource(t, "replica") + validateWritesRouteToSource(t) + + testRestOfWorkflow(t) +} + +func testMoveTablesV2Workflow(t *testing.T) { + currentWorkflowType = wrangler.MoveTablesWorkflow + + // test basic forward and reverse flows + setupCustomerKeyspace(t) + moveTables2Start(t, "customer") + checkStates(t, wrangler.WorkflowStateNotStarted, wrangler.WorkflowStateNotSwitched) + validateReadsRouteToSource(t, "replica") + validateWritesRouteToSource(t) + + testRestOfWorkflow(t) + + listAllArgs := []string{"workflow", "customer", "listall"} + output, _ := vc.VtctlClient.ExecuteCommandWithOutput(listAllArgs...) + require.Contains(t, output, "No workflows found in keyspace customer") + + moveTables2Start(t, "customer2") + output, _ = vc.VtctlClient.ExecuteCommandWithOutput(listAllArgs...) + require.Contains(t, output, "Following workflow(s) found in keyspace customer: wf1") + + err := tstWorkflowAbort(t) + require.NoError(t, err) + + output, _ = vc.VtctlClient.ExecuteCommandWithOutput(listAllArgs...) + require.Contains(t, output, "No workflows found in keyspace customer") +} + +func testPartialSwitches(t *testing.T) { + //nothing switched + require.Equal(t, getCurrentState(t), wrangler.WorkflowStateNotSwitched) + tstWorkflowSwitchReads(t, "replica,rdonly", "zone1") + nextState := "Reads partially switched. Replica switched in cells: zone1. Rdonly switched in cells: zone1. Writes Not Switched" + checkStates(t, wrangler.WorkflowStateNotSwitched, nextState) + + tstWorkflowSwitchReads(t, "replica,rdonly", "zone2") + currentState := nextState + nextState = wrangler.WorkflowStateReadsSwitched + checkStates(t, currentState, nextState) + + tstWorkflowSwitchReads(t, "", "") + checkStates(t, nextState, nextState) //idempotency + + tstWorkflowSwitchWrites(t) + currentState = nextState + nextState = wrangler.WorkflowStateAllSwitched + checkStates(t, currentState, nextState) + + tstWorkflowSwitchWrites(t) + checkStates(t, nextState, nextState) //idempotency + + tstWorkflowReverseReads(t, "replica,rdonly", "zone1") + currentState = nextState + nextState = "Reads partially switched. Replica switched in cells: zone2. Rdonly switched in cells: zone2. Writes Switched" + checkStates(t, currentState, nextState) + + tstWorkflowReverseReads(t, "replica,rdonly", "zone2") + currentState = nextState + nextState = wrangler.WorkflowStateWritesSwitched + checkStates(t, currentState, nextState) + + tstWorkflowReverseWrites(t) + currentState = nextState + nextState = wrangler.WorkflowStateNotSwitched + checkStates(t, currentState, nextState) +} + +func testRestOfWorkflow(t *testing.T) { + testPartialSwitches(t) + + // test basic forward and reverse flows + tstWorkflowSwitchReads(t, "", "") + checkStates(t, wrangler.WorkflowStateNotSwitched, wrangler.WorkflowStateReadsSwitched) + validateReadsRouteToTarget(t, "replica") + validateWritesRouteToSource(t) + + tstWorkflowSwitchWrites(t) + checkStates(t, wrangler.WorkflowStateReadsSwitched, wrangler.WorkflowStateAllSwitched) + validateReadsRouteToTarget(t, "replica") + validateWritesRouteToTarget(t) + + tstWorkflowReverseReads(t, "", "") + checkStates(t, wrangler.WorkflowStateAllSwitched, wrangler.WorkflowStateWritesSwitched) + validateReadsRouteToSource(t, "replica") + validateWritesRouteToTarget(t) + + tstWorkflowReverseWrites(t) + checkStates(t, wrangler.WorkflowStateWritesSwitched, wrangler.WorkflowStateNotSwitched) + validateReadsRouteToSource(t, "replica") + validateWritesRouteToSource(t) + + tstWorkflowSwitchWrites(t) + checkStates(t, wrangler.WorkflowStateNotSwitched, wrangler.WorkflowStateWritesSwitched) + validateReadsRouteToSource(t, "replica") + validateWritesRouteToTarget(t) + + tstWorkflowReverseWrites(t) + validateReadsRouteToSource(t, "replica") + validateWritesRouteToSource(t) + + tstWorkflowSwitchReads(t, "", "") + validateReadsRouteToTarget(t, "replica") + validateWritesRouteToSource(t) + + tstWorkflowReverseReads(t, "", "") + validateReadsRouteToSource(t, "replica") + validateWritesRouteToSource(t) + + tstWorkflowSwitchReadsAndWrites(t) + validateReadsRouteToTarget(t, "replica") + validateWritesRouteToTarget(t) + tstWorkflowReverseReadsAndWrites(t) + validateReadsRouteToSource(t, "replica") + validateWritesRouteToSource(t) + + // trying to complete an unswitched workflow should error + err := tstWorkflowComplete(t) + require.Error(t, err) + require.Contains(t, err.Error(), wrangler.ErrWorkflowNotFullySwitched) + + // fully switch and complete + tstWorkflowSwitchReadsAndWrites(t) + validateReadsRouteToTarget(t, "replica") + validateWritesRouteToTarget(t) + + err = tstWorkflowComplete(t) + require.NoError(t, err) +} + +func setupCluster(t *testing.T) *VitessCluster { + cells := []string{"zone1", "zone2"} + + vc = InitCluster(t, cells) + require.NotNil(t, vc) + defaultCellName := "zone1" + allCellNames = defaultCellName + defaultCell = vc.Cells[defaultCellName] + + zone1 := vc.Cells["zone1"] + zone2 := vc.Cells["zone2"] + + vc.AddKeyspace(t, []*Cell{zone1, zone2}, "product", "0", initialProductVSchema, initialProductSchema, defaultReplicas, defaultRdonly, 100) + + vtgate = zone1.Vtgates[0] + require.NotNil(t, vtgate) + vtgate.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.master", "product", "0"), 1) + vtgate.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.replica", "product", "0"), 2) + + vtgateConn = getConnection(t, globalConfig.vtgateMySQLPort) + verifyClusterHealth(t) + insertInitialData(t) + + sourceReplicaTab = vc.Cells[defaultCell.Name].Keyspaces["product"].Shards["0"].Tablets["zone1-101"].Vttablet + sourceTab = vc.Cells[defaultCell.Name].Keyspaces["product"].Shards["0"].Tablets["zone1-100"].Vttablet + + return vc +} + +func setupCustomerKeyspace(t *testing.T) { + if _, err := vc.AddKeyspace(t, []*Cell{vc.Cells["zone1"], vc.Cells["zone2"]}, "customer", "-80,80-", + customerVSchema, customerSchema, defaultReplicas, defaultRdonly, 200); err != nil { + t.Fatal(err) + } + if err := vtgate.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.master", "customer", "-80"), 1); err != nil { + t.Fatal(err) + } + if err := vtgate.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.master", "customer", "80-"), 1); err != nil { + t.Fatal(err) + } + if err := vtgate.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.replica", "customer", "-80"), 2); err != nil { + t.Fatal(err) + } + if err := vtgate.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.replica", "customer", "80-"), 2); err != nil { + t.Fatal(err) + } + custKs := vc.Cells[defaultCell.Name].Keyspaces["customer"] + targetTab1 = custKs.Shards["-80"].Tablets["zone1-200"].Vttablet + targetTab2 = custKs.Shards["80-"].Tablets["zone1-300"].Vttablet + targetReplicaTab1 = custKs.Shards["-80"].Tablets["zone1-201"].Vttablet +} + +func TestSwitchReadsWritesInAnyOrder(t *testing.T) { + vc = setupCluster(t) + defer vc.TearDown() + moveCustomerTableSwitchFlows(t, []*Cell{vc.Cells["zone1"]}, "zone1") +} + +func switchReadsNew(t *testing.T, cells, ksWorkflow string, reverse bool) { + output, err := vc.VtctlClient.ExecuteCommandWithOutput("SwitchReads", "-cells="+cells, + "-tablet_types=rdonly,replica", fmt.Sprintf("-reverse=%t", reverse), ksWorkflow) + require.NoError(t, err, fmt.Sprintf("SwitchReads Error: %s: %s", err, output)) + if output != "" { + fmt.Printf("SwitchReads output: %s\n", output) + } +} + +func moveCustomerTableSwitchFlows(t *testing.T, cells []*Cell, sourceCellOrAlias string) { + workflow := "wf1" + sourceKs := "product" + targetKs := "customer" + ksWorkflow := fmt.Sprintf("%s.%s", targetKs, workflow) + tables := "customer" + setupCustomerKeyspace(t) + + var moveTablesAndWait = func() { + moveTables(t, sourceCellOrAlias, workflow, sourceKs, targetKs, tables) + catchup(t, targetTab1, workflow, "MoveTables") + catchup(t, targetTab2, workflow, "MoveTables") + vdiff(t, ksWorkflow) + } + + var switchReadsFollowedBySwitchWrites = func() { + moveTablesAndWait() + + validateReadsRouteToSource(t, "replica") + switchReadsNew(t, allCellNames, ksWorkflow, false) + validateReadsRouteToTarget(t, "replica") + + validateWritesRouteToSource(t) + switchWrites(t, ksWorkflow, false) + validateWritesRouteToTarget(t) + + revert(t) + } + var switchWritesFollowedBySwitchReads = func() { + moveTablesAndWait() + + validateWritesRouteToSource(t) + switchWrites(t, ksWorkflow, false) + validateWritesRouteToTarget(t) + + validateReadsRouteToSource(t, "replica") + switchReadsNew(t, allCellNames, ksWorkflow, false) + validateReadsRouteToTarget(t, "replica") + + revert(t) + } + + var switchReadsReverseSwitchWritesSwitchReads = func() { + moveTablesAndWait() + + validateReadsRouteToSource(t, "replica") + switchReadsNew(t, allCellNames, ksWorkflow, false) + validateReadsRouteToTarget(t, "replica") + + switchReadsNew(t, allCellNames, ksWorkflow, true) + validateReadsRouteToSource(t, "replica") + printRoutingRules(t, vc, "After reversing SwitchReads") + + validateWritesRouteToSource(t) + switchWrites(t, ksWorkflow, false) + validateWritesRouteToTarget(t) + + printRoutingRules(t, vc, "After SwitchWrites and reversing SwitchReads") + validateReadsRouteToSource(t, "replica") + switchReadsNew(t, allCellNames, ksWorkflow, false) + validateReadsRouteToTarget(t, "replica") + + revert(t) + } + + var switchWritesReverseSwitchReadsSwitchWrites = func() { + moveTablesAndWait() + + validateWritesRouteToSource(t) + switchWrites(t, ksWorkflow, false) + validateWritesRouteToTarget(t) + + switchWrites(t, ksWorkflow, true) + validateWritesRouteToSource(t) + + validateReadsRouteToSource(t, "replica") + switchReadsNew(t, allCellNames, ksWorkflow, false) + validateReadsRouteToTarget(t, "replica") + + validateWritesRouteToSource(t) + switchWrites(t, ksWorkflow, false) + validateWritesRouteToTarget(t) + + revert(t) + + } + switchReadsFollowedBySwitchWrites() + switchWritesFollowedBySwitchReads() + switchReadsReverseSwitchWritesSwitchReads() + switchWritesReverseSwitchReadsSwitchWrites() +} + +func createAdditionalCustomerShards(t *testing.T, shards string) { + ksName := "customer" + keyspace := vc.Cells[defaultCell.Name].Keyspaces[ksName] + require.NoError(t, vc.AddShards(t, []*Cell{defaultCell, vc.Cells["zone2"]}, keyspace, shards, defaultReplicas, defaultRdonly, 400)) + arrTargetShardNames := strings.Split(shards, ",") + + for _, shardName := range arrTargetShardNames { + if err := vtgate.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.master", ksName, shardName), 1); err != nil { + t.Fatal(err) + } + if err := vtgate.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.replica", ksName, shardName), 2); err != nil { + t.Fatal(err) + } + } + custKs := vc.Cells[defaultCell.Name].Keyspaces[ksName] + targetTab2 = custKs.Shards["80-c0"].Tablets["zone1-600"].Vttablet + targetTab1 = custKs.Shards["40-80"].Tablets["zone1-500"].Vttablet + targetReplicaTab1 = custKs.Shards["-40"].Tablets["zone1-401"].Vttablet + + sourceReplicaTab = custKs.Shards["-80"].Tablets["zone1-201"].Vttablet + sourceTab = custKs.Shards["-80"].Tablets["zone1-200"].Vttablet +} diff --git a/go/test/endtoend/vreplication/unsharded_init_data.sql b/go/test/endtoend/vreplication/unsharded_init_data.sql index 9a13e5bb22f..f8e0cc5d86f 100644 --- a/go/test/endtoend/vreplication/unsharded_init_data.sql +++ b/go/test/endtoend/vreplication/unsharded_init_data.sql @@ -7,4 +7,7 @@ insert into product(pid, description) values(1, 'keyboard'); insert into product(pid, description) values(2, 'monitor'); insert into orders(oid, cid, mname, pid, price) values(1, 1, 'monoprice', 1, 10); insert into orders(oid, cid, mname, pid, price) values(2, 1, 'newegg', 2, 15); -insert into orders(oid, cid, mname, pid, price) values(3, 2, 'monoprice', 2, 20); \ No newline at end of file +insert into orders(oid, cid, mname, pid, price) values(3, 2, 'monoprice', 2, 20); +insert into customer2(cid, name, typ, sport) values(1, 'john',1,'football,baseball'); +insert into customer2(cid, name, typ, sport) values(2, 'paul','soho','cricket'); +insert into customer2(cid, name, typ, sport) values(3, 'ringo','enterprise',''); diff --git a/go/test/endtoend/vreplication/vreplication_test.go b/go/test/endtoend/vreplication/vreplication_test.go index 43aaecc0b85..adfdd93730e 100644 --- a/go/test/endtoend/vreplication/vreplication_test.go +++ b/go/test/endtoend/vreplication/vreplication_test.go @@ -49,8 +49,9 @@ func init() { func TestBasicVreplicationWorkflow(t *testing.T) { defaultCellName := "zone1" + allCells := []string{"zone1"} allCellNames = "zone1" - vc = InitCluster(t, []string{defaultCellName}) + vc = InitCluster(t, allCells) require.NotNil(t, vc) defaultReplicas = 0 // because of CI resource constraints we can only run this test with master tablets defer func() { defaultReplicas = 1 }() @@ -182,7 +183,6 @@ func insertMoreProducts(t *testing.T) { execVtgateQuery(t, vtgateConn, "product", sql) } -// FIXME: if testReverse if false we don't dropsources and that creates a problem later on in the test due to existence of blacklisted tables func shardCustomer(t *testing.T, testReverse bool, cells []*Cell, sourceCellOrAlias string) { workflow := "p2c" sourceKs := "product" @@ -220,7 +220,7 @@ func shardCustomer(t *testing.T, testReverse bool, cells []*Cell, sourceCellOrAl switchReads(t, allCellNames, ksWorkflow) require.True(t, validateThatQueryExecutesOnTablet(t, vtgateConn, productTab, "customer", query, query)) switchWritesDryRun(t, ksWorkflow, dryRunResultsSwitchWritesCustomerShard) - switchWrites(t, ksWorkflow) + switchWrites(t, ksWorkflow, false) ksShards := []string{"product/0", "customer/-80", "customer/80-"} printShardPositions(vc, ksShards) insertQuery2 := "insert into customer(name, cid) values('tempCustomer2', 100)" @@ -236,7 +236,8 @@ func shardCustomer(t *testing.T, testReverse bool, cells []*Cell, sourceCellOrAl if testReverse { //Reverse Replicate switchReads(t, allCellNames, reverseKsWorkflow) - switchWrites(t, reverseKsWorkflow) + printShardPositions(vc, ksShards) + switchWrites(t, reverseKsWorkflow, false) insertQuery1 = "insert into customer(cid, name) values(1002, 'tempCustomer5')" require.True(t, validateThatQueryExecutesOnTablet(t, vtgateConn, productTab, "product", insertQuery1, matchInsertQuery1)) @@ -248,8 +249,7 @@ func shardCustomer(t *testing.T, testReverse bool, cells []*Cell, sourceCellOrAl //Go forward again switchReads(t, allCellNames, ksWorkflow) - switchWrites(t, ksWorkflow) - + switchWrites(t, ksWorkflow, false) dropSourcesDryRun(t, ksWorkflow, false, dryRunResultsDropSourcesDropCustomerShard) dropSourcesDryRun(t, ksWorkflow, true, dryRunResultsDropSourcesRenameCustomerShard) @@ -417,7 +417,7 @@ func reshard(t *testing.T, ksName string, tableName string, workflow string, sou if dryRunResultswitchWrites != nil { switchWritesDryRun(t, ksWorkflow, dryRunResultswitchWrites) } - switchWrites(t, ksWorkflow) + switchWrites(t, ksWorkflow, false) dropSources(t, ksWorkflow) for tabletName, count := range counts { @@ -445,7 +445,7 @@ func shardOrders(t *testing.T) { catchup(t, customerTab2, workflow, "MoveTables") vdiff(t, ksWorkflow) switchReads(t, allCellNames, ksWorkflow) - switchWrites(t, ksWorkflow) + switchWrites(t, ksWorkflow, false) dropSources(t, ksWorkflow) validateCountInTablet(t, customerTab1, "customer", "orders", 1) validateCountInTablet(t, customerTab2, "customer", "orders", 2) @@ -477,7 +477,7 @@ func shardMerchant(t *testing.T) { vdiff(t, "merchant.p2m") switchReads(t, allCellNames, ksWorkflow) - switchWrites(t, ksWorkflow) + switchWrites(t, ksWorkflow, false) dropSources(t, ksWorkflow) validateCountInTablet(t, merchantTab1, "merchant", "merchant", 1) @@ -615,7 +615,7 @@ func verifyClusterHealth(t *testing.T) { func catchup(t *testing.T, vttablet *cluster.VttabletProcess, workflow, info string) { const MaxWait = 10 * time.Second err := vc.WaitForVReplicationToCatchup(vttablet, workflow, fmt.Sprintf("vt_%s", vttablet.Keyspace), MaxWait) - require.NoError(nil, err, fmt.Sprintf("%s timed out for workflow %s on tablet %s.%s.%s", info, workflow, vttablet.Keyspace, vttablet.Shard, vttablet.Name)) + require.NoError(t, err, fmt.Sprintf("%s timed out for workflow %s on tablet %s.%s.%s", info, workflow, vttablet.Keyspace, vttablet.Shard, vttablet.Name)) } func moveTables(t *testing.T, cell, workflow, sourceKs, targetKs, tables string) { @@ -624,7 +624,6 @@ func moveTables(t *testing.T, cell, workflow, sourceKs, targetKs, tables string) t.Fatalf("MoveTables command failed with %+v\n", err) } } - func applyVSchema(t *testing.T, vschema, keyspace string) { err := vc.VtctlClient.ExecuteCommand("ApplyVSchema", "-vschema", vschema, keyspace) require.NoError(t, err) @@ -679,18 +678,16 @@ func printSwitchWritesExtraDebug(t *testing.T, ksWorkflow, msg string) { } } -func switchWrites(t *testing.T, ksWorkflow string) { +func switchWrites(t *testing.T, ksWorkflow string, reverse bool) { const SwitchWritesTimeout = "91s" // max: 3 tablet picker 30s waits + 1 output, err := vc.VtctlClient.ExecuteCommandWithOutput("SwitchWrites", - "-filtered_replication_wait_time="+SwitchWritesTimeout, ksWorkflow) + "-filtered_replication_wait_time="+SwitchWritesTimeout, fmt.Sprintf("-reverse=%t", reverse), ksWorkflow) if output != "" { fmt.Printf("Output of SwitchWrites for %s:\n++++++\n%s\n--------\n", ksWorkflow, output) } //printSwitchWritesExtraDebug is useful when debugging failures in SwitchWrites due to corner cases/races _ = printSwitchWritesExtraDebug - if err != nil { - require.FailNow(t, fmt.Sprintf("SwitchWrites Error: %s: %s", err, output)) - } + require.NoError(t, err, fmt.Sprintf("SwitchWrites Error: %s: %s", err, output)) } func dropSourcesDryRun(t *testing.T, ksWorkflow string, renameTables bool, dryRunResults []string) { diff --git a/go/vt/vtctl/vtctl.go b/go/vt/vtctl/vtctl.go index 995a1f82724..57006b4e9a4 100644 --- a/go/vt/vtctl/vtctl.go +++ b/go/vt/vtctl/vtctl.go @@ -339,7 +339,7 @@ var commands = []commandGroup{ "[-cells=c1,c2,...] [-reverse] -tablet_type={replica|rdonly} [-dry-run] ", "Switch read traffic for the specified workflow."}, {"SwitchWrites", commandSwitchWrites, - "[-filtered_replication_wait_time=30s] [-cancel] [-reverse_replication=true] [-dry-run] ", + "[-timeout=30s] [-reverse] [-reverse_replication=true] [-dry-run] ", "Switch write traffic for the specified workflow."}, {"CancelResharding", commandCancelResharding, "", @@ -1887,6 +1887,12 @@ func commandValidateKeyspace(ctx context.Context, wr *wrangler.Wrangler, subFlag } func commandReshard(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error { + for _, arg := range args { + if arg == "-v2" { + fmt.Println("*** Using Reshard v2 flow ***") + return commandVRWorkflow(ctx, wr, subFlags, args, wrangler.ReshardWorkflow) + } + } cells := subFlags.String("cells", "", "Cell(s) or CellAlias(es) (comma-separated) to replicate from.") tabletTypes := subFlags.String("tablet_types", "", "Source tablet types to replicate from.") skipSchemaCopy := subFlags.Bool("skip_schema_copy", false, "Skip copying of schema to targets") @@ -1906,6 +1912,12 @@ func commandReshard(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.F } func commandMoveTables(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error { + for _, arg := range args { + if arg == "-v2" { + fmt.Println("*** Using MoveTables v2 flow ***") + return commandVRWorkflow(ctx, wr, subFlags, args, wrangler.MoveTablesWorkflow) + } + } workflow := subFlags.String("workflow", "", "Workflow name. Can be any descriptive string. Will be used to later migrate traffic via SwitchReads/SwitchWrites.") cells := subFlags.String("cells", "", "Cell(s) or CellAlias(es) (comma-separated) to replicate from.") tabletTypes := subFlags.String("tablet_types", "", "Source tablet types to replicate from (e.g. master, replica, rdonly). Defaults to -vreplication_tablet_type parameter value for the tablet, which has the default value of replica.") @@ -1937,6 +1949,223 @@ func commandMoveTables(ctx context.Context, wr *wrangler.Wrangler, subFlags *fla return wr.MoveTables(ctx, *workflow, source, target, tableSpecs, *cells, *tabletTypes, *allTables, *excludes) } +// VReplicationWorkflowAction defines subcommands passed to vtctl for movetables or reshard +type VReplicationWorkflowAction string + +const ( + vReplicationWorkflowActionStart = "start" + vReplicationWorkflowActionSwitchTraffic = "switchtraffic" + vReplicationWorkflowActionReverseTraffic = "reversetraffic" + vReplicationWorkflowActionComplete = "complete" + vReplicationWorkflowActionAbort = "abort" + vReplicationWorkflowActionShow = "show" + vReplicationWorkflowActionProgress = "progress" + vReplicationWorkflowActionGetState = "getstate" +) + +func commandVRWorkflow(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string, + workflowType wrangler.VReplicationWorkflowType) error { + + cells := subFlags.String("cells", "", "Cell(s) or CellAlias(es) (comma-separated) to replicate from.") + tabletTypes := subFlags.String("tablet_types", "", "Source tablet types to replicate from (e.g. master, replica, rdonly). Defaults to -vreplication_tablet_type parameter value for the tablet, which has the default value of replica.") + dryRun := subFlags.Bool("dry_run", false, "Does a dry run of SwitchReads and only reports the actions to be taken") + timeout := subFlags.Duration("timeout", 30*time.Second, "Specifies the maximum time to wait, in seconds, for vreplication to catch up on master migrations. The migration will be aborted on timeout.") + reverseReplication := subFlags.Bool("reverse_replication", true, "Also reverse the replication") + keepData := subFlags.Bool("keep_data", false, "Do not drop tables or shards (if true, only vreplication artifacts are cleaned up)") + + sourceKeyspace := subFlags.String("source", "", "Source keyspace") + tables := subFlags.String("tables", "", "A table spec or a list of tables") + allTables := subFlags.Bool("all", false, "Move all tables from the source keyspace") + excludes := subFlags.String("exclude", "", "Tables to exclude (comma-separated) if -all is specified") + renameTables := subFlags.Bool("rename_tables", false, "Rename tables instead of dropping them") + + sourceShards := subFlags.String("source_shards", "", "Source shards") + targetShards := subFlags.String("target_shards", "", "Target shards") + skipSchemaCopy := subFlags.Bool("skip_schema_copy", false, "Skip copying of schema to target shards") + + _ = subFlags.Bool("v2", true, "") + + _ = dryRun //TODO: add dry run functionality + if err := subFlags.Parse(args); err != nil { + return err + } + + if subFlags.NArg() != 2 { + return fmt.Errorf("two arguments are needed: action, keyspace.workflow") + } + action := subFlags.Arg(0) + ksWorkflow := subFlags.Arg(1) + target, workflow, err := splitKeyspaceWorkflow(ksWorkflow) + if err != nil { + return err + } + _, err = wr.TopoServer().GetKeyspace(ctx, target) + if err != nil { + wr.Logger().Errorf("keyspace %s not found", target) + } + + vrwp := &wrangler.VReplicationWorkflowParams{ + TargetKeyspace: target, + Workflow: workflow, + DryRun: *dryRun, + } + + printDetails := func() error { + s := "" + res, err := wr.ShowWorkflow(ctx, workflow, target) + if err != nil { + return err + } + s += "Following vreplication streams are running in this workflow:\n\n" + for ksShard := range res.ShardStatuses { + statuses := res.ShardStatuses[ksShard].MasterReplicationStatuses + for _, st := range statuses { + now := time.Now().Nanosecond() + msg := "" + updateLag := int64(now) - st.TimeUpdated + if updateLag > 0*1e9 { + msg += " Vstream may not be running." + } + txLag := int64(now) - st.TransactionTimestamp + msg += fmt.Sprintf(" VStream Lag: %ds", txLag/1e9) + s += fmt.Sprintf("Stream %s (id=%d) :: Status: %s.%s\n", ksShard, st.ID, st.State, msg) + } + } + wr.Logger().Printf("\n%s\n\n", s) + return nil + } + + wrapError := func(wf *wrangler.VReplicationWorkflow, err error) error { + wr.Logger().Errorf("\n%s\n", err.Error()) + log.Infof("In wrapError wf is %+v", wf) + wr.Logger().Infof("Workflow Status: %s\n", wf.CurrentState()) + if wf.Exists() { + printDetails() + } + return err + } + + //TODO: check if invalid parameters were passed in that do not apply to this action + originalAction := action + action = strings.ToLower(action) // allow users to input action in a case-insensitive manner + switch action { + case vReplicationWorkflowActionStart: + switch workflowType { + case wrangler.MoveTablesWorkflow: + if *sourceKeyspace == "" { + return fmt.Errorf("source keyspace is not specified") + } + if !*allTables && *tables == "" { + return fmt.Errorf("no tables specified to move") + } + vrwp.SourceKeyspace = *sourceKeyspace + vrwp.Tables = *tables + vrwp.AllTables = *allTables + vrwp.ExcludeTables = *excludes + workflowType = wrangler.MoveTablesWorkflow + case wrangler.ReshardWorkflow: + if *sourceShards == "" || *targetShards == "" { + return fmt.Errorf("source and target shards are not specified") + } + vrwp.SourceShards = strings.Split(*sourceShards, ",") + vrwp.TargetShards = strings.Split(*targetShards, ",") + vrwp.SkipSchemaCopy = *skipSchemaCopy + vrwp.SourceKeyspace = target + workflowType = wrangler.ReshardWorkflow + log.Infof("params are %s, %s, %+v", *sourceShards, *targetShards, vrwp) + default: + return fmt.Errorf("unknown workflow type passed: %v", workflowType) + } + vrwp.Cells = *cells + vrwp.TabletTypes = *tabletTypes + case vReplicationWorkflowActionSwitchTraffic, vReplicationWorkflowActionReverseTraffic: + vrwp.Cells = *cells + vrwp.TabletTypes = *tabletTypes + vrwp.Timeout = *timeout + vrwp.EnableReverseReplication = *reverseReplication + case vReplicationWorkflowActionAbort: + vrwp.KeepData = *keepData + case vReplicationWorkflowActionComplete: + switch workflowType { + case wrangler.MoveTablesWorkflow: + vrwp.RenameTables = *renameTables + case wrangler.ReshardWorkflow: + default: + return fmt.Errorf("unknown workflow type passed: %v", workflowType) + } + vrwp.KeepData = *keepData + } + + wf, err := wr.NewVReplicationWorkflow(ctx, workflowType, vrwp) + if err != nil { + log.Warningf("NewVReplicationWorkflow returned error %+v", wf) + return err + } + if !wf.Exists() && action != vReplicationWorkflowActionStart { + return fmt.Errorf("workflow %s does not exist", ksWorkflow) + } + + printCopyProgress := func() error { + copyProgress, err := wf.GetCopyProgress() + if err != nil { + return err + } + if copyProgress == nil { + wr.Logger().Printf("\nCopy Completed.\n") + } else { + wr.Logger().Printf("\nCopy Progress (approx.):\n") + var tables []string + for table := range *copyProgress { + tables = append(tables, table) + } + sort.Strings(tables) + s := "" + var progress wrangler.TableCopyProgress + for table := range *copyProgress { + progress = *(*copyProgress)[table] + rowCountPct := 100.0 * progress.TargetRowCount / progress.SourceRowCount + tableSizePct := 100.0 * progress.TargetTableSize / progress.SourceTableSize + s += fmt.Sprintf("%s: rows copied %d/%d (%d%%), size copied %d/%d (%d%%)\n", + table, progress.TargetRowCount, progress.SourceRowCount, rowCountPct, + progress.TargetTableSize, progress.SourceTableSize, tableSizePct) + } + wr.Logger().Printf("\n%s\n", s) + } + return printDetails() + + } + startState := wf.CachedState() + switch action { + case vReplicationWorkflowActionShow: + return printDetails() + case vReplicationWorkflowActionProgress: + return printCopyProgress() + case vReplicationWorkflowActionStart: + err = wf.Start() + //TODO: wait for streams to start or report error (pos != "", Message contains error, tx/update time recent) + case vReplicationWorkflowActionSwitchTraffic: + err = wf.SwitchTraffic(wrangler.DirectionForward) + case vReplicationWorkflowActionReverseTraffic: + err = wf.ReverseTraffic() + case vReplicationWorkflowActionComplete: + err = wf.Complete() + case vReplicationWorkflowActionAbort: + err = wf.Abort() + case vReplicationWorkflowActionGetState: + wr.Logger().Printf(wf.CachedState() + "\n") + return nil + default: + return fmt.Errorf("found unsupported action %s", originalAction) + } + if err != nil { + log.Warningf(" %s error: %v", originalAction, wf) + return wrapError(wf, err) + } + wr.Logger().Printf("%s was successful\nStart State: %s\nCurrent State: %s\n\n", + originalAction, startState, wf.CurrentState()) + return nil +} + func commandCreateLookupVindex(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error { cell := subFlags.String("cell", "", "Cell to replicate from.") tabletTypes := subFlags.String("tablet_types", "", "Source tablet types to replicate from.") @@ -2105,6 +2334,7 @@ func commandMigrateServedFrom(ctx context.Context, wr *wrangler.Wrangler, subFla func commandDropSources(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error { dryRun := subFlags.Bool("dry_run", false, "Does a dry run of commandDropSources and only reports the actions to be taken") renameTables := subFlags.Bool("rename_tables", false, "Rename tables instead of dropping them") + keepData := subFlags.Bool("keep_data", false, "Do not drop tables or shards (if true, only vreplication artifacts are cleaned up)") if err := subFlags.Parse(args); err != nil { return err } @@ -2122,7 +2352,7 @@ func commandDropSources(ctx context.Context, wr *wrangler.Wrangler, subFlags *fl } _, _, _ = dryRun, keyspace, workflow - dryRunResults, err := wr.DropSources(ctx, keyspace, workflow, removalType, *dryRun) + dryRunResults, err := wr.DropSources(ctx, keyspace, workflow, removalType, *keepData, false, *dryRun) if err != nil { return err } @@ -2136,18 +2366,29 @@ func commandDropSources(ctx context.Context, wr *wrangler.Wrangler, subFlags *fl func commandSwitchReads(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error { reverse := subFlags.Bool("reverse", false, "Moves the served tablet type backward instead of forward.") cellsStr := subFlags.String("cells", "", "Specifies a comma-separated list of cells to update") - tabletType := subFlags.String("tablet_type", "", "Tablet type (replica or rdonly)") + tabletTypes := subFlags.String("tablet_types", "rdonly,replica", "Tablet types to switch one or both or rdonly/replica") + deprecatedTabletType := subFlags.String("tablet_type", "", "(DEPRECATED) one of rdonly/replica") dryRun := subFlags.Bool("dry_run", false, "Does a dry run of SwitchReads and only reports the actions to be taken") if err := subFlags.Parse(args); err != nil { return err } - if *tabletType == "" { - return fmt.Errorf("-tablet_type must be specified") + if !(*deprecatedTabletType == "" || *deprecatedTabletType == "replica" || *deprecatedTabletType == "rdonly") { + return fmt.Errorf("invalid value specified for -tablet_type: %s", *deprecatedTabletType) } - servedType, err := parseTabletType(*tabletType, []topodatapb.TabletType{topodatapb.TabletType_REPLICA, topodatapb.TabletType_RDONLY}) - if err != nil { - return err + + if *deprecatedTabletType != "" { + *tabletTypes = *deprecatedTabletType + } + + tabletTypesArr := strings.Split(*tabletTypes, ",") + var servedTypes []topodatapb.TabletType + for _, tabletType := range tabletTypesArr { + servedType, err := parseTabletType(tabletType, []topodatapb.TabletType{topodatapb.TabletType_REPLICA, topodatapb.TabletType_RDONLY}) + if err != nil { + return err + } + servedTypes = append(servedTypes, servedType) } var cells []string if *cellsStr != "" { @@ -2164,8 +2405,7 @@ func commandSwitchReads(ctx context.Context, wr *wrangler.Wrangler, subFlags *fl if err != nil { return err } - - dryRunResults, err := wr.SwitchReads(ctx, keyspace, workflow, servedType, cells, direction, *dryRun) + dryRunResults, err := wr.SwitchReads(ctx, keyspace, workflow, servedTypes, cells, direction, *dryRun) if err != nil { return err } @@ -2177,9 +2417,11 @@ func commandSwitchReads(ctx context.Context, wr *wrangler.Wrangler, subFlags *fl } func commandSwitchWrites(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error { - filteredReplicationWaitTime := subFlags.Duration("filtered_replication_wait_time", 30*time.Second, "Specifies the maximum time to wait, in seconds, for filtered replication to catch up on master migrations. The migration will be aborted on timeout.") + timeout := subFlags.Duration("timeout", 30*time.Second, "Specifies the maximum time to wait, in seconds, for vreplication to catch up on master migrations. The migration will be aborted on timeout.") + filteredReplicationWaitTime := subFlags.Duration("filtered_replication_wait_time", 30*time.Second, "DEPRECATED Specifies the maximum time to wait, in seconds, for vreplication to catch up on master migrations. The migration will be aborted on timeout.") reverseReplication := subFlags.Bool("reverse_replication", true, "Also reverse the replication") - cancelMigrate := subFlags.Bool("cancel", false, "Cancel the failed migration and serve from source") + cancel := subFlags.Bool("cancel", false, "Cancel the failed migration and serve from source") + reverse := subFlags.Bool("reverse", false, "Reverse a previous SwitchWrites serve from source") dryRun := subFlags.Bool("dry_run", false, "Does a dry run of SwitchWrites and only reports the actions to be taken") if err := subFlags.Parse(args); err != nil { return err @@ -2192,8 +2434,11 @@ func commandSwitchWrites(ctx context.Context, wr *wrangler.Wrangler, subFlags *f if err != nil { return err } + if filteredReplicationWaitTime != timeout { + timeout = filteredReplicationWaitTime + } - journalID, dryRunResults, err := wr.SwitchWrites(ctx, keyspace, workflow, *filteredReplicationWaitTime, *cancelMigrate, *reverseReplication, *dryRun) + journalID, dryRunResults, err := wr.SwitchWrites(ctx, keyspace, workflow, *timeout, *cancel, *reverse, *reverseReplication, *dryRun) if err != nil { return err } @@ -3034,7 +3279,7 @@ func commandWorkflow(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag. return err } if subFlags.NArg() != 2 { - return fmt.Errorf("usage: Workflow --dry-run keyspace.workflow start/stop/delete/list/listall") + return fmt.Errorf("usage: Workflow --dry-run keyspace[.workflow] start/stop/delete/list/listall") } keyspace := subFlags.Arg(0) action := strings.ToLower(subFlags.Arg(1)) diff --git a/go/vt/vttablet/tabletmanager/vreplication/controller.go b/go/vt/vttablet/tabletmanager/vreplication/controller.go index dbc8f6c6170..e92c3304afc 100644 --- a/go/vt/vttablet/tabletmanager/vreplication/controller.go +++ b/go/vt/vttablet/tabletmanager/vreplication/controller.go @@ -155,7 +155,7 @@ func (ct *controller) run(ctx context.Context) { timer := time.NewTimer(*retryDelay) select { case <-ctx.Done(): - log.Warningf("context canceleld: %s", err.Error()) + log.Warningf("context canceled: %s", err.Error()) timer.Stop() return case <-timer.C: diff --git a/go/vt/vttablet/tabletserver/schema/engine.go b/go/vt/vttablet/tabletserver/schema/engine.go index 702a21e0617..de449863e78 100644 --- a/go/vt/vttablet/tabletserver/schema/engine.go +++ b/go/vt/vttablet/tabletserver/schema/engine.go @@ -284,9 +284,9 @@ func (se *Engine) ReloadAt(ctx context.Context, pos mysql.Position) error { // reload reloads the schema. It can also be used to initialize it. func (se *Engine) reload(ctx context.Context) error { - start := time.Now() + //start := time.Now() defer func() { - log.Infof("Time taken to load the schema: %v", time.Since(start)) + //log.Infof("Time taken to load the schema: %v", time.Since(start)) se.env.LogError() }() diff --git a/go/vt/vttablet/tabletserver/vstreamer/vstreamer_test.go b/go/vt/vttablet/tabletserver/vstreamer/vstreamer_test.go index 7fe33ca8d8d..c6d84a8e446 100644 --- a/go/vt/vttablet/tabletserver/vstreamer/vstreamer_test.go +++ b/go/vt/vttablet/tabletserver/vstreamer/vstreamer_test.go @@ -2013,9 +2013,7 @@ func startStream(ctx context.Context, t *testing.T, filter *binlogdatapb.Filter, go func() { defer close(ch) defer wg.Done() - log.Infof(">>>>>>>>>>> before vstream") vstream(ctx, t, position, tablePKs, filter, ch) - log.Infof(">>>>>>>>>> after vstream") }() return &wg, ch } diff --git a/go/vt/wrangler/fake_dbclient_test.go b/go/vt/wrangler/fake_dbclient_test.go index 11fee5d68fa..d716e3ad79b 100644 --- a/go/vt/wrangler/fake_dbclient_test.go +++ b/go/vt/wrangler/fake_dbclient_test.go @@ -19,9 +19,11 @@ package wrangler import ( "fmt" "regexp" + "strings" "testing" "github.com/stretchr/testify/assert" + "vitess.io/vitess/go/vt/log" "vitess.io/vitess/go/sqltypes" ) @@ -46,6 +48,7 @@ type dbResult struct { func (dbrs *dbResults) next(query string) (*sqltypes.Result, error) { if dbrs.exhausted() { + log.Infof(fmt.Sprintf("Unexpected query >%s<", query)) return nil, fmt.Errorf("code executed this query, but the test did not expect it: %s", query) } i := dbrs.index @@ -143,6 +146,13 @@ func (dc *fakeDBClient) ExecuteFetch(query string, maxrows int) (qr *sqltypes.Re if result := dc.invariants[query]; result != nil { return result, nil } + for q, result := range dc.invariants { //supports allowing just a prefix of an expected query + if strings.Contains(query, q) { + return result, nil + } + } + + log.Infof("Missing query: >>>>>>>>>>>>>>>>>>%s<<<<<<<<<<<<<<<", query) return nil, fmt.Errorf("unexpected query: %s", query) } @@ -150,12 +160,12 @@ func (dc *fakeDBClient) verifyQueries(t *testing.T) { t.Helper() for query, dbrs := range dc.queries { if !dbrs.exhausted() { - assert.FailNow(t, "expected query: %v did not get executed during the test", query) + assert.FailNowf(t, "expected query did not get executed during the test", query) } } for query, dbrs := range dc.queriesRE { if !dbrs.exhausted() { - assert.FailNow(t, "expected regex query: %v did not get executed during the test", query) + assert.FailNowf(t, "expected regex query did not get executed during the test", query) } } } diff --git a/go/vt/wrangler/keyspace.go b/go/vt/wrangler/keyspace.go index d6ecaee2114..87b7324d9e6 100644 --- a/go/vt/wrangler/keyspace.go +++ b/go/vt/wrangler/keyspace.go @@ -121,8 +121,8 @@ func (wr *Wrangler) validateNewWorkflow(ctx context.Context, keyspace, workflow fmt.Sprintf("workflow %s already exists in keyspace %s on tablet %d", workflow, keyspace, master.Alias.Uid), }, { fmt.Sprintf("select 1 from _vt.vreplication where db_name=%s and message='FROZEN'", encodeString(master.DbName())), - fmt.Sprintf("workflow %s.%s is in a frozen state on tablet %d, please review and delete it before resharding", - keyspace, workflow, master.Alias.Uid), + fmt.Sprintf("found previous frozen workflow on tablet %d, please review and delete it first before creating a new workflow", + master.Alias.Uid), }} for _, validation := range validations { p3qr, err := wr.tmc.VReplicationExec(ctx, master.Tablet, validation.query) diff --git a/go/vt/wrangler/materializer.go b/go/vt/wrangler/materializer.go index cce22b66fe3..9a19c918b16 100644 --- a/go/vt/wrangler/materializer.go +++ b/go/vt/wrangler/materializer.go @@ -155,8 +155,15 @@ func (wr *Wrangler) MoveTables(ctx context.Context, workflow, sourceKeyspace, ta return err } for _, table := range tables { - rules[table] = []string{sourceKeyspace + "." + table} - rules[targetKeyspace+"."+table] = []string{sourceKeyspace + "." + table} + toSource := []string{sourceKeyspace + "." + table} + rules[table] = toSource + rules[table+"@replica"] = toSource + rules[table+"@rdonly"] = toSource + rules[targetKeyspace+"."+table] = toSource + rules[targetKeyspace+"."+table+"@replica"] = toSource + rules[targetKeyspace+"."+table+"@rdonly"] = toSource + rules[sourceKeyspace+"."+table+"@replica"] = toSource + rules[sourceKeyspace+"."+table+"@rdonly"] = toSource } if err := wr.saveRoutingRules(ctx, rules); err != nil { return err diff --git a/go/vt/wrangler/stream_migrater_test.go b/go/vt/wrangler/stream_migrater_test.go index 80b9857466b..b43e39e72fb 100644 --- a/go/vt/wrangler/stream_migrater_test.go +++ b/go/vt/wrangler/stream_migrater_test.go @@ -34,6 +34,11 @@ import ( "vitess.io/vitess/go/vt/vttablet/tabletmanager/vreplication" ) +var ( + rdOnly = []topodatapb.TabletType{topodatapb.TabletType_RDONLY} + replica = []topodatapb.TabletType{topodatapb.TabletType_REPLICA} +) + func TestStreamMigrateMainflow(t *testing.T) { ctx := context.Background() tme := newTestShardMigrater(ctx, t, []string{"-40", "40-"}, []string{"-80", "80-"}) @@ -42,13 +47,13 @@ func TestStreamMigrateMainflow(t *testing.T) { tme.expectNoPreviousJournals() // Migrate reads - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", rdOnly, nil, DirectionForward, false) if err != nil { t.Fatal(err) } tme.expectCheckJournals() - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", replica, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -166,7 +171,7 @@ func TestStreamMigrateMainflow(t *testing.T) { tme.expectCreateReverseVReplication() tme.expectStartReverseVReplication() tme.expectFrozenTargetVReplication() - if _, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, true, false); err != nil { + if _, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false); err != nil { t.Fatal(err) } @@ -182,7 +187,7 @@ func TestStreamMigrateMainflow(t *testing.T) { tme.expectDeleteReverseVReplication() tme.expectDeleteTargetVReplication() - if _, err := tme.wr.DropSources(ctx, tme.targetKeyspace, "test", DropTable, false); err != nil { + if _, err := tme.wr.DropSources(ctx, tme.targetKeyspace, "test", DropTable, false, false, false); err != nil { t.Fatal(err) } verifyQueries(t, tme.allDBClients) @@ -195,12 +200,12 @@ func TestStreamMigrateTwoStreams(t *testing.T) { tme.expectNoPreviousJournals() // Migrate reads - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", rdOnly, nil, DirectionForward, false) if err != nil { t.Fatal(err) } tme.expectNoPreviousJournals() - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", replica, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -340,7 +345,7 @@ func TestStreamMigrateTwoStreams(t *testing.T) { tme.expectStartReverseVReplication() tme.expectFrozenTargetVReplication() - if _, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, true, false); err != nil { + if _, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false); err != nil { t.Fatal(err) } @@ -364,12 +369,12 @@ func TestStreamMigrateOneToMany(t *testing.T) { tme.expectNoPreviousJournals() // Migrate reads - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", rdOnly, nil, DirectionForward, false) if err != nil { t.Fatal(err) } tme.expectNoPreviousJournals() - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", replica, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -474,7 +479,7 @@ func TestStreamMigrateOneToMany(t *testing.T) { tme.expectStartReverseVReplication() tme.expectFrozenTargetVReplication() - if _, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, true, false); err != nil { + if _, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false); err != nil { t.Fatal(err) } @@ -497,12 +502,12 @@ func TestStreamMigrateManyToOne(t *testing.T) { tme.expectNoPreviousJournals() // Migrate reads - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", rdOnly, nil, DirectionForward, false) if err != nil { t.Fatal(err) } tme.expectNoPreviousJournals() - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", replica, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -610,7 +615,7 @@ func TestStreamMigrateManyToOne(t *testing.T) { tme.expectStartReverseVReplication() tme.expectFrozenTargetVReplication() - if _, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, true, false); err != nil { + if _, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false); err != nil { t.Fatal(err) } @@ -632,12 +637,12 @@ func TestStreamMigrateSyncSuccess(t *testing.T) { tme.expectNoPreviousJournals() // Migrate reads - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", rdOnly, nil, DirectionForward, false) if err != nil { t.Fatal(err) } tme.expectNoPreviousJournals() - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", replica, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -800,7 +805,7 @@ func TestStreamMigrateSyncSuccess(t *testing.T) { tme.expectStartReverseVReplication() tme.expectFrozenTargetVReplication() - if _, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, true, false); err != nil { + if _, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false); err != nil { t.Fatal(err) } @@ -824,12 +829,12 @@ func TestStreamMigrateSyncFail(t *testing.T) { tme.expectNoPreviousJournals() // Migrate reads - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", rdOnly, nil, DirectionForward, false) if err != nil { t.Fatal(err) } tme.expectNoPreviousJournals() - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", replica, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -931,7 +936,7 @@ func TestStreamMigrateSyncFail(t *testing.T) { tme.expectCancelMigration() - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, true, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false) want := "does not match" if err == nil || !strings.Contains(err.Error(), want) { t.Errorf("SwitchWrites err: %v, want %s", err, want) @@ -946,12 +951,12 @@ func TestStreamMigrateCancel(t *testing.T) { tme.expectNoPreviousJournals() // Migrate reads - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", rdOnly, nil, DirectionForward, false) if err != nil { t.Fatal(err) } tme.expectNoPreviousJournals() - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", replica, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -1025,7 +1030,7 @@ func TestStreamMigrateCancel(t *testing.T) { } cancelMigration() - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, true, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false) want := "intentionally failed" if err == nil || !strings.Contains(err.Error(), want) { t.Errorf("SwitchWrites err: %v, want %s", err, want) @@ -1051,12 +1056,12 @@ func TestStreamMigrateStoppedStreams(t *testing.T) { tme.expectNoPreviousJournals() // Migrate reads - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", rdOnly, nil, DirectionForward, false) if err != nil { t.Fatal(err) } tme.expectNoPreviousJournals() - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", replica, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -1095,7 +1100,7 @@ func TestStreamMigrateStoppedStreams(t *testing.T) { } stopStreams() - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, true, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false) want := "cannot migrate until all streams are running: 0: 10" if err == nil || err.Error() != want { t.Errorf("SwitchWrites err: %v, want %v", err, want) @@ -1110,12 +1115,12 @@ func TestStreamMigrateCancelWithStoppedStreams(t *testing.T) { tme.expectNoPreviousJournals() // Migrate reads - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", rdOnly, nil, DirectionForward, false) if err != nil { t.Fatal(err) } tme.expectNoPreviousJournals() - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", replica, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -1163,7 +1168,7 @@ func TestStreamMigrateCancelWithStoppedStreams(t *testing.T) { tme.expectCancelMigration() - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, true, false, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, true, false, false, false) if err != nil { t.Fatal(err) } @@ -1177,12 +1182,12 @@ func TestStreamMigrateStillCopying(t *testing.T) { tme.expectNoPreviousJournals() // Migrate reads - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", rdOnly, nil, DirectionForward, false) if err != nil { t.Fatal(err) } tme.expectNoPreviousJournals() - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", replica, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -1225,7 +1230,7 @@ func TestStreamMigrateStillCopying(t *testing.T) { } stopStreams() - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, true, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false) want := "cannot migrate while vreplication streams in source shards are still copying: 0" if err == nil || err.Error() != want { t.Errorf("SwitchWrites err: %v, want %v", err, want) @@ -1240,12 +1245,12 @@ func TestStreamMigrateEmptyWorkflow(t *testing.T) { tme.expectNoPreviousJournals() // Migrate reads - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", rdOnly, nil, DirectionForward, false) if err != nil { t.Fatal(err) } tme.expectNoPreviousJournals() - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", replica, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -1287,7 +1292,7 @@ func TestStreamMigrateEmptyWorkflow(t *testing.T) { } stopStreams() - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, true, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false) want := "VReplication streams must have named workflows for migration: shard: ks:0, stream: 1" if err == nil || err.Error() != want { t.Errorf("SwitchWrites err: %v, want %v", err, want) @@ -1302,12 +1307,12 @@ func TestStreamMigrateDupWorkflow(t *testing.T) { tme.expectNoPreviousJournals() // Migrate reads - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", rdOnly, nil, DirectionForward, false) if err != nil { t.Fatal(err) } tme.expectNoPreviousJournals() - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", replica, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -1349,7 +1354,7 @@ func TestStreamMigrateDupWorkflow(t *testing.T) { } stopStreams() - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, true, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false) want := "VReplication stream has the same workflow name as the resharding workflow: shard: ks:0, stream: 1" if err == nil || err.Error() != want { t.Errorf("SwitchWrites err: %v, want %v", err, want) @@ -1365,12 +1370,12 @@ func TestStreamMigrateStreamsMismatch(t *testing.T) { tme.expectNoPreviousJournals() // Migrate reads - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", rdOnly, nil, DirectionForward, false) if err != nil { t.Fatal(err) } tme.expectNoPreviousJournals() - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", replica, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -1422,7 +1427,7 @@ func TestStreamMigrateStreamsMismatch(t *testing.T) { } stopStreams() - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, true, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false) want := "streams are mismatched across source shards" if err == nil || !strings.Contains(err.Error(), want) { t.Errorf("SwitchWrites err: %v, must contain %v", err, want) diff --git a/go/vt/wrangler/switcher.go b/go/vt/wrangler/switcher.go index e721211bf5f..a44909146d0 100644 --- a/go/vt/wrangler/switcher.go +++ b/go/vt/wrangler/switcher.go @@ -39,8 +39,6 @@ func (r *switcher) validateWorkflowHasCompleted(ctx context.Context) error { return r.ts.validateWorkflowHasCompleted(ctx) } -//TODO: do we need to disable ForeignKey before dropping tables? -//TODO: delete multiple tables in single statement? func (r *switcher) removeSourceTables(ctx context.Context, removalType TableRemovalType) error { return r.ts.removeSourceTables(ctx, removalType) } @@ -49,12 +47,12 @@ func (r *switcher) dropSourceShards(ctx context.Context) error { return r.ts.dropSourceShards(ctx) } -func (r *switcher) switchShardReads(ctx context.Context, cells []string, servedType topodatapb.TabletType, direction TrafficSwitchDirection) error { - return r.ts.switchShardReads(ctx, cells, servedType, direction) +func (r *switcher) switchShardReads(ctx context.Context, cells []string, servedTypes []topodatapb.TabletType, direction TrafficSwitchDirection) error { + return r.ts.switchShardReads(ctx, cells, servedTypes, direction) } -func (r *switcher) switchTableReads(ctx context.Context, cells []string, servedType topodatapb.TabletType, direction TrafficSwitchDirection) error { - return r.ts.switchTableReads(ctx, cells, servedType, direction) +func (r *switcher) switchTableReads(ctx context.Context, cells []string, servedTypes []topodatapb.TabletType, direction TrafficSwitchDirection) error { + return r.ts.switchTableReads(ctx, cells, servedTypes, direction) } func (r *switcher) startReverseVReplication(ctx context.Context) error { @@ -118,6 +116,14 @@ func (r *switcher) dropSourceReverseVReplicationStreams(ctx context.Context) err return r.ts.dropSourceReverseVReplicationStreams(ctx) } +func (r *switcher) removeTargetTables(ctx context.Context) error { + return r.ts.removeTargetTables(ctx) +} + +func (r *switcher) dropTargetShards(ctx context.Context) error { + return r.ts.dropTargetShards(ctx) +} + func (r *switcher) logs() *[]string { return nil } diff --git a/go/vt/wrangler/switcher_dry_run.go b/go/vt/wrangler/switcher_dry_run.go index f80245bfd23..a2d37c65447 100644 --- a/go/vt/wrangler/switcher_dry_run.go +++ b/go/vt/wrangler/switcher_dry_run.go @@ -37,7 +37,7 @@ type switcherDryRun struct { ts *trafficSwitcher } -func (dr *switcherDryRun) switchShardReads(ctx context.Context, cells []string, servedType topodatapb.TabletType, direction TrafficSwitchDirection) error { +func (dr *switcherDryRun) switchShardReads(ctx context.Context, cells []string, servedTypes []topodatapb.TabletType, direction TrafficSwitchDirection) error { sourceShards := make([]string, 0) targetShards := make([]string, 0) for _, source := range dr.ts.sources { @@ -58,7 +58,7 @@ func (dr *switcherDryRun) switchShardReads(ctx context.Context, cells []string, return nil } -func (dr *switcherDryRun) switchTableReads(ctx context.Context, cells []string, servedType topodatapb.TabletType, direction TrafficSwitchDirection) error { +func (dr *switcherDryRun) switchTableReads(ctx context.Context, cells []string, servedTypes []topodatapb.TabletType, direction TrafficSwitchDirection) error { ks := dr.ts.targetKeyspace if direction == DirectionBackward { ks = dr.ts.sourceKeyspace @@ -318,3 +318,42 @@ func (dr *switcherDryRun) dropSourceBlacklistedTables(ctx context.Context) error func (dr *switcherDryRun) logs() *[]string { return &dr.drLog.logs } + +func (dr *switcherDryRun) removeTargetTables(ctx context.Context) error { + logs := make([]string, 0) + for _, target := range dr.ts.targets { + for _, tableName := range dr.ts.tables { + logs = append(logs, fmt.Sprintf("\tKeyspace %s Shard %s DbName %s Tablet %d Table %s", + target.master.Keyspace, target.master.Shard, target.master.DbName(), target.master.Alias.Uid, tableName)) + } + } + if len(logs) > 0 { + dr.drLog.Log("Dropping following tables:") + dr.drLog.LogSlice(logs) + } + return nil +} + +func (dr *switcherDryRun) dropTargetShards(ctx context.Context) error { + logs := make([]string, 0) + tabletsList := make(map[string][]string) + for _, si := range dr.ts.targetShards() { + tabletAliases, err := dr.ts.wr.TopoServer().FindAllTabletAliasesInShard(ctx, si.Keyspace(), si.ShardName()) + if err != nil { + return err + } + tabletsList[si.ShardName()] = make([]string, 0) + for _, t := range tabletAliases { + tabletsList[si.ShardName()] = append(tabletsList[si.ShardName()], fmt.Sprintf("\t\t%d", t.Uid)) + } + sort.Strings(tabletsList[si.ShardName()]) + logs = append(logs, fmt.Sprintf("\tCell %s Keyspace %s Shard\n%s", + si.Shard.MasterAlias.Cell, si.Keyspace(), si.ShardName()), strings.Join(tabletsList[si.ShardName()], "\n")) + } + if len(logs) > 0 { + dr.drLog.Log("Deleting following shards (and all related tablets):") + dr.drLog.LogSlice(logs) + } + + return nil +} diff --git a/go/vt/wrangler/switcher_interface.go b/go/vt/wrangler/switcher_interface.go index 77f40904e92..6b532ce8425 100644 --- a/go/vt/wrangler/switcher_interface.go +++ b/go/vt/wrangler/switcher_interface.go @@ -37,8 +37,8 @@ type iswitcher interface { changeRouting(ctx context.Context) error streamMigraterfinalize(ctx context.Context, ts *trafficSwitcher, workflows []string) error startReverseVReplication(ctx context.Context) error - switchTableReads(ctx context.Context, cells []string, servedType topodatapb.TabletType, direction TrafficSwitchDirection) error - switchShardReads(ctx context.Context, cells []string, servedType topodatapb.TabletType, direction TrafficSwitchDirection) error + switchTableReads(ctx context.Context, cells []string, servedType []topodatapb.TabletType, direction TrafficSwitchDirection) error + switchShardReads(ctx context.Context, cells []string, servedType []topodatapb.TabletType, direction TrafficSwitchDirection) error validateWorkflowHasCompleted(ctx context.Context) error removeSourceTables(ctx context.Context, removalType TableRemovalType) error dropSourceShards(ctx context.Context) error @@ -46,5 +46,7 @@ type iswitcher interface { freezeTargetVReplication(ctx context.Context) error dropSourceReverseVReplicationStreams(ctx context.Context) error dropTargetVReplicationStreams(ctx context.Context) error + removeTargetTables(ctx context.Context) error + dropTargetShards(ctx context.Context) error logs() *[]string } diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index a1ed6bc447e..6d909f535f7 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -47,7 +47,8 @@ import ( ) const ( - frozenStr = "FROZEN" + frozenStr = "FROZEN" + errorNoStreams = "no streams found in keyspace %s for: %s" ) // TrafficSwitchDirection specifies the switching direction. @@ -126,18 +127,250 @@ type tsSource struct { journaled bool } -// SwitchReads is a generic way of switching read traffic for a resharding workflow. -func (wr *Wrangler) SwitchReads(ctx context.Context, targetKeyspace, workflow string, servedType topodatapb.TabletType, cells []string, direction TrafficSwitchDirection, dryRun bool) (*[]string, error) { - if servedType != topodatapb.TabletType_REPLICA && servedType != topodatapb.TabletType_RDONLY { - return nil, fmt.Errorf("tablet type must be REPLICA or RDONLY: %v", servedType) +const ( + workflowTypeReshard = "Reshard" + workflowTypeMoveTables = "MoveTables" +) + +type workflowState struct { + Workflow string + SourceKeyspace string + TargetKeyspace string + WorkflowType string + + ReplicaCellsSwitched []string + ReplicaCellsNotSwitched []string + + RdonlyCellsSwitched []string + RdonlyCellsNotSwitched []string + + WritesSwitched bool +} + +// For a Reshard, to check whether we have switched reads for a tablet type, we check if any one of the source shards has +// the query service disabled in its tablet control record +func (wr *Wrangler) getCellsWithShardReadsSwitched(ctx context.Context, targetKeyspace string, si *topo.ShardInfo, tabletType string) ( + cellsSwitched, cellsNotSwitched []string, err error) { + + cells, err := wr.ts.GetCellInfoNames(ctx) + if err != nil { + return nil, nil, err } - ts, err := wr.buildTrafficSwitcher(ctx, targetKeyspace, workflow) + for _, cell := range cells { + wr.Logger().Infof("cell %s", cell) + srvKeyspace, err := wr.ts.GetSrvKeyspace(ctx, cell, targetKeyspace) + if err != nil { + return nil, nil, err + } + // Checking one shard is enough. + var shardServedTypes []string + found := false + noControls := true + for _, partition := range srvKeyspace.GetPartitions() { + if !strings.EqualFold(partition.GetServedType().String(), tabletType) { + continue + } + + // If reads and writes are both switched it is possible that the shard is not in the partition table + for _, shardReference := range partition.GetShardReferences() { + if key.KeyRangeEqual(shardReference.GetKeyRange(), si.GetKeyRange()) { + found = true + break + } + } + + // It is possible that there are no tablet controls if the target shards are not yet serving + // or once reads and writes are both switched, + if len(partition.GetShardTabletControls()) == 0 { + noControls = true + break + } + for _, tabletControl := range partition.GetShardTabletControls() { + if key.KeyRangeEqual(tabletControl.GetKeyRange(), si.GetKeyRange()) { + if !tabletControl.GetQueryServiceDisabled() { + shardServedTypes = append(shardServedTypes, si.ShardName()) + } + break + } + } + } + if found && (len(shardServedTypes) > 0 || noControls) { + cellsNotSwitched = append(cellsNotSwitched, cell) + } else { + cellsSwitched = append(cellsSwitched, cell) + } + } + return cellsSwitched, cellsNotSwitched, nil +} + +// For MoveTables, to check whether we have switched reads for a tablet type, we check whether the routing rule +// for the tablet_type is pointing to the target keyspace +func (wr *Wrangler) getCellsWithTableReadsSwitched(ctx context.Context, targetKeyspace, table, tabletType string) ( + cellsSwitched, cellsNotSwitched []string, err error) { + + cells, err := wr.ts.GetCellInfoNames(ctx) if err != nil { + return nil, nil, err + } + getKeyspace := func(ruleTarget string) (string, error) { + arr := strings.Split(ruleTarget, ".") + if len(arr) != 2 { + return "", fmt.Errorf("rule target is not correctly formatted: %s", ruleTarget) + } + return arr[0], nil + } + for _, cell := range cells { + srvVSchema, err := wr.ts.GetSrvVSchema(ctx, cell) + if err != nil { + return nil, nil, err + } + rules := srvVSchema.RoutingRules.Rules + found := false + switched := false + for _, rule := range rules { + ruleName := fmt.Sprintf("%s.%s@%s", targetKeyspace, table, tabletType) + if rule.FromTable == ruleName { + found = true + for _, to := range rule.ToTables { + ks, err := getKeyspace(to) + if err != nil { + log.Errorf(err.Error()) + return nil, nil, err + } + if ks == targetKeyspace { + switched = true + break // if one table in workflow is switched we are done + } + } + } + if found { + break + } + } + if switched { + cellsSwitched = append(cellsSwitched, cell) + } else { + cellsNotSwitched = append(cellsNotSwitched, cell) + } + } + return cellsSwitched, cellsNotSwitched, nil +} + +func (wr *Wrangler) getWorkflowState(ctx context.Context, targetKeyspace, workflow string) (*trafficSwitcher, *workflowState, error) { + ts, err := wr.buildTrafficSwitcher(ctx, targetKeyspace, workflow) + + if ts == nil || err != nil { + if err.Error() == fmt.Sprintf(errorNoStreams, targetKeyspace, workflow) { + return nil, nil, nil + } wr.Logger().Errorf("buildTrafficSwitcher failed: %v", err) + return nil, nil, err + } + + ws := &workflowState{Workflow: workflow, TargetKeyspace: targetKeyspace} + ws.SourceKeyspace = ts.sourceKeyspace + var cellsSwitched, cellsNotSwitched []string + var keyspace string + var reverse bool + + // we reverse writes by using the source_keyspace.workflowname_reverse workflow spec, so we need to use the + // source of the reverse workflow, which is the target of the workflow initiated by the user for checking routing rules + // Similarly we use a target shard of the reverse workflow as the original source to check if writes have been switched + if strings.HasSuffix(workflow, "_reverse") { + reverse = true + keyspace = ws.SourceKeyspace + workflow = reverseName(workflow) + } else { + keyspace = targetKeyspace + } + if ts.migrationType == binlogdatapb.MigrationType_TABLES { + ws.WorkflowType = workflowTypeMoveTables + + // we assume a consistent state, so only choose routing rule for one table for replica/rdonly + if len(ts.tables) == 0 { + return nil, nil, fmt.Errorf("no tables in workflow %s.%s", keyspace, workflow) + + } + table := ts.tables[0] + + cellsSwitched, cellsNotSwitched, err = wr.getCellsWithTableReadsSwitched(ctx, keyspace, table, "rdonly") + if err != nil { + return nil, nil, err + } + ws.RdonlyCellsNotSwitched, ws.RdonlyCellsSwitched = cellsNotSwitched, cellsSwitched + cellsSwitched, cellsNotSwitched, err = wr.getCellsWithTableReadsSwitched(ctx, keyspace, table, "replica") + if err != nil { + return nil, nil, err + } + ws.ReplicaCellsNotSwitched, ws.ReplicaCellsSwitched = cellsNotSwitched, cellsSwitched + rules, err := ts.wr.getRoutingRules(ctx) + if err != nil { + return nil, nil, err + } + for _, table := range ts.tables { + rr := rules[table] + // if a rule exists for the table and points to the target keyspace, writes have been switched + if len(rr) > 0 && rr[0] == fmt.Sprintf("%s.%s", keyspace, table) { + ws.WritesSwitched = true + } + } + } else { + ws.WorkflowType = workflowTypeReshard + + // we assume a consistent state, so only choose one shard + var shard *topo.ShardInfo + if reverse { + shard = ts.targetShards()[0] + } else { + shard = ts.sourceShards()[0] + } + + cellsSwitched, cellsNotSwitched, err = wr.getCellsWithShardReadsSwitched(ctx, keyspace, shard, "rdonly") + if err != nil { + return nil, nil, err + } + ws.RdonlyCellsNotSwitched, ws.RdonlyCellsSwitched = cellsNotSwitched, cellsSwitched + cellsSwitched, cellsNotSwitched, err = wr.getCellsWithShardReadsSwitched(ctx, keyspace, shard, "replica") + if err != nil { + return nil, nil, err + } + ws.ReplicaCellsNotSwitched, ws.ReplicaCellsSwitched = cellsNotSwitched, cellsSwitched + if !shard.IsMasterServing { + ws.WritesSwitched = true + } + } + + return ts, ws, nil +} + +// SwitchReads is a generic way of switching read traffic for a resharding workflow. +func (wr *Wrangler) SwitchReads(ctx context.Context, targetKeyspace, workflow string, servedTypes []topodatapb.TabletType, + cells []string, direction TrafficSwitchDirection, dryRun bool) (*[]string, error) { + + ts, ws, err := wr.getWorkflowState(ctx, targetKeyspace, workflow) + if err != nil { + wr.Logger().Errorf("getWorkflowState failed: %v", err) return nil, err } + if ts == nil { + errorMsg := fmt.Sprintf("workflow %s not found in keyspace %s", workflow, targetKeyspace) + wr.Logger().Errorf(errorMsg) + return nil, fmt.Errorf(errorMsg) + } + wr.Logger().Infof("SwitchReads: %s.%s tt %+v, cells %+v, workflow state: %+v", targetKeyspace, workflow, servedTypes, cells, ws) + for _, servedType := range servedTypes { + if servedType != topodatapb.TabletType_REPLICA && servedType != topodatapb.TabletType_RDONLY { + return nil, fmt.Errorf("tablet type must be REPLICA or RDONLY: %v", servedType) + } + if direction == DirectionBackward && servedType == topodatapb.TabletType_REPLICA && len(ws.ReplicaCellsSwitched) == 0 { + return nil, fmt.Errorf("requesting reversal of SwitchReads for REPLICAs but REPLICA reads have not been switched") + } + if direction == DirectionBackward && servedType == topodatapb.TabletType_RDONLY && len(ws.RdonlyCellsSwitched) == 0 { + return nil, fmt.Errorf("requesting reversal of SwitchReads for RDONLYs but RDONLY reads have not been switched") + } + } - //If journals exist notify user and fail + // If journals exist notify user and fail journalsExist, _, err := ts.checkJournals(ctx) if err != nil { wr.Logger().Errorf("checkJournals failed: %v", err) @@ -145,9 +378,7 @@ func (wr *Wrangler) SwitchReads(ctx context.Context, targetKeyspace, workflow st } if journalsExist { wr.Logger().Errorf("Found a previous journal entry for %d", ts.id) - return nil, fmt.Errorf("found an entry from a previous run for migration id %d in _vt.resharding_journal, please review and delete it before proceeding", ts.id) } - var sw iswitcher if dryRun { sw = &switcherDryRun{ts: ts, drLog: NewLogRecorder()} @@ -155,10 +386,7 @@ func (wr *Wrangler) SwitchReads(ctx context.Context, targetKeyspace, workflow st sw = &switcher{ts: ts, wr: wr} } - if ts.frozen { - return nil, fmt.Errorf("cannot switch reads while SwitchWrites is in progress") - } - if err := ts.validate(ctx, false /* isWrite */); err != nil { + if err := ts.validate(ctx); err != nil { ts.wr.Logger().Errorf("validate failed: %v", err) return nil, err } @@ -172,13 +400,14 @@ func (wr *Wrangler) SwitchReads(ctx context.Context, targetKeyspace, workflow st defer unlock(&err) if ts.migrationType == binlogdatapb.MigrationType_TABLES { - if err := sw.switchTableReads(ctx, cells, servedType, direction); err != nil { + if err := sw.switchTableReads(ctx, cells, servedTypes, direction); err != nil { ts.wr.Logger().Errorf("switchTableReads failed: %v", err) return nil, err } return sw.logs(), nil } - if err := ts.switchShardReads(ctx, cells, servedType, direction); err != nil { + wr.Logger().Infof("switchShardReads: %+v, %+v, %+v", cells, servedTypes, direction) + if err := ts.switchShardReads(ctx, cells, servedTypes, direction); err != nil { ts.wr.Logger().Errorf("switchShardReads failed: %v", err) return nil, err } @@ -186,12 +415,18 @@ func (wr *Wrangler) SwitchReads(ctx context.Context, targetKeyspace, workflow st } // SwitchWrites is a generic way of migrating write traffic for a resharding workflow. -func (wr *Wrangler) SwitchWrites(ctx context.Context, targetKeyspace, workflow string, filteredReplicationWaitTime time.Duration, cancelMigrate, reverseReplication bool, dryRun bool) (journalID int64, dryRunResults *[]string, err error) { - ts, err := wr.buildTrafficSwitcher(ctx, targetKeyspace, workflow) +func (wr *Wrangler) SwitchWrites(ctx context.Context, targetKeyspace, workflow string, timeout time.Duration, cancel, reverse, reverseReplication bool, dryRun bool) (journalID int64, dryRunResults *[]string, err error) { + ts, ws, err := wr.getWorkflowState(ctx, targetKeyspace, workflow) + _ = ws if err != nil { - wr.Logger().Errorf("buildTrafficSwitcher failed: %v", err) + wr.Logger().Errorf("getWorkflowState failed: %v", err) return 0, nil, err } + if ts == nil { + errorMsg := fmt.Sprintf("workflow %s not found in keyspace %s", workflow, targetKeyspace) + wr.Logger().Errorf(errorMsg) + return 0, nil, fmt.Errorf(errorMsg) + } var sw iswitcher if dryRun { @@ -206,7 +441,7 @@ func (wr *Wrangler) SwitchWrites(ctx context.Context, targetKeyspace, workflow s } ts.wr.Logger().Infof("Built switching metadata: %+v", ts) - if err := ts.validate(ctx, true /* isWrite */); err != nil { + if err := ts.validate(ctx); err != nil { ts.wr.Logger().Errorf("validate failed: %v", err) return 0, nil, err } @@ -237,12 +472,12 @@ func (wr *Wrangler) SwitchWrites(ctx context.Context, targetKeyspace, workflow s } if !journalsExist { ts.wr.Logger().Infof("No previous journals were found. Proceeding normally.") - sm, err := buildStreamMigrater(ctx, ts, cancelMigrate) + sm, err := buildStreamMigrater(ctx, ts, cancel) if err != nil { ts.wr.Logger().Errorf("buildStreamMigrater failed: %v", err) return 0, nil, err } - if cancelMigrate { + if cancel { sw.cancelMigration(ctx, sm) return 0, sw.logs(), nil } @@ -266,7 +501,7 @@ func (wr *Wrangler) SwitchWrites(ctx context.Context, targetKeyspace, workflow s } ts.wr.Logger().Infof("Waiting for streams to catchup") - if err := sw.waitForCatchup(ctx, filteredReplicationWaitTime); err != nil { + if err := sw.waitForCatchup(ctx, timeout); err != nil { ts.wr.Logger().Errorf("waitForCatchup failed: %v", err) sw.cancelMigration(ctx, sm) return 0, nil, err @@ -286,7 +521,7 @@ func (wr *Wrangler) SwitchWrites(ctx context.Context, targetKeyspace, workflow s return 0, nil, err } } else { - if cancelMigrate { + if cancel { err := fmt.Errorf("traffic switching has reached the point of no return, cannot cancel") ts.wr.Logger().Errorf("%v", err) return 0, nil, err @@ -332,8 +567,8 @@ func (wr *Wrangler) SwitchWrites(ctx context.Context, targetKeyspace, workflow s return ts.id, sw.logs(), nil } -// DropSources cleans up source tables, shards and blacklisted tables after a MoveTables/Reshard is completed -func (wr *Wrangler) DropSources(ctx context.Context, targetKeyspace, workflow string, removalType TableRemovalType, dryRun bool) (*[]string, error) { +// DropTargets cleans up target tables, shards and blacklisted tables after a MoveTables/Reshard is completed +func (wr *Wrangler) DropTargets(ctx context.Context, targetKeyspace, workflow string, keepData, dryRun bool) (*[]string, error) { ts, err := wr.buildTrafficSwitcher(ctx, targetKeyspace, workflow) if err != nil { wr.Logger().Errorf("buildTrafficSwitcher failed: %v", err) @@ -346,7 +581,7 @@ func (wr *Wrangler) DropSources(ctx context.Context, targetKeyspace, workflow st sw = &switcher{ts: ts, wr: wr} } var tctx context.Context - tctx, sourceUnlock, lockErr := sw.lockKeyspace(ctx, ts.sourceKeyspace, "DropSources") + tctx, sourceUnlock, lockErr := sw.lockKeyspace(ctx, ts.sourceKeyspace, "DropTargets") if lockErr != nil { ts.wr.Logger().Errorf("Source LockKeyspace failed: %v", lockErr) return nil, lockErr @@ -354,7 +589,7 @@ func (wr *Wrangler) DropSources(ctx context.Context, targetKeyspace, workflow st defer sourceUnlock(&err) ctx = tctx if ts.targetKeyspace != ts.sourceKeyspace { - tctx, targetUnlock, lockErr := sw.lockKeyspace(ctx, ts.targetKeyspace, "DropSources") + tctx, targetUnlock, lockErr := sw.lockKeyspace(ctx, ts.targetKeyspace, "DropTargets") if lockErr != nil { ts.wr.Logger().Errorf("Target LockKeyspace failed: %v", lockErr) return nil, lockErr @@ -362,38 +597,105 @@ func (wr *Wrangler) DropSources(ctx context.Context, targetKeyspace, workflow st defer targetUnlock(&err) ctx = tctx } - if err := sw.validateWorkflowHasCompleted(ctx); err != nil { - wr.Logger().Errorf("Workflow has not completed, cannot DropSources: %v", err) + if !keepData { + switch ts.migrationType { + case binlogdatapb.MigrationType_TABLES: + log.Infof("Deleting target tables") + if err := sw.removeTargetTables(ctx); err != nil { + return nil, err + } + if err := sw.dropSourceBlacklistedTables(ctx); err != nil { + return nil, err + } + case binlogdatapb.MigrationType_SHARDS: + log.Infof("Removing target shards") + if err := sw.dropTargetShards(ctx); err != nil { + return nil, err + } + } + } + if err := wr.dropArtifacts(ctx, sw); err != nil { return nil, err } - switch ts.migrationType { - case binlogdatapb.MigrationType_TABLES: - if err := sw.removeSourceTables(ctx, removalType); err != nil { - return nil, err - } - if err := sw.dropSourceBlacklistedTables(ctx); err != nil { - return nil, err + return sw.logs(), nil +} + +func (wr *Wrangler) dropArtifacts(ctx context.Context, sw iswitcher) error { + if err := sw.dropSourceReverseVReplicationStreams(ctx); err != nil { + return err + } + if err := sw.dropTargetVReplicationStreams(ctx); err != nil { + return err + } + return nil +} + +// DropSources cleans up source tables, shards and blacklisted tables after a MoveTables/Reshard is completed +func (wr *Wrangler) DropSources(ctx context.Context, targetKeyspace, workflow string, removalType TableRemovalType, keepData, force, dryRun bool) (*[]string, error) { + ts, err := wr.buildTrafficSwitcher(ctx, targetKeyspace, workflow) + if err != nil { + wr.Logger().Errorf("buildTrafficSwitcher failed: %v", err) + return nil, err + } + var sw iswitcher + if dryRun { + sw = &switcherDryRun{ts: ts, drLog: NewLogRecorder()} + } else { + sw = &switcher{ts: ts, wr: wr} + } + var tctx context.Context + tctx, sourceUnlock, lockErr := sw.lockKeyspace(ctx, ts.sourceKeyspace, "DropSources") + if lockErr != nil { + ts.wr.Logger().Errorf("Source LockKeyspace failed: %v", lockErr) + return nil, lockErr + } + defer sourceUnlock(&err) + ctx = tctx + if ts.targetKeyspace != ts.sourceKeyspace { + tctx, targetUnlock, lockErr := sw.lockKeyspace(ctx, ts.targetKeyspace, "DropSources") + if lockErr != nil { + ts.wr.Logger().Errorf("Target LockKeyspace failed: %v", lockErr) + return nil, lockErr } - case binlogdatapb.MigrationType_SHARDS: - if err := sw.dropSourceShards(ctx); err != nil { + defer targetUnlock(&err) + ctx = tctx + } + if !force { + if err := sw.validateWorkflowHasCompleted(ctx); err != nil { + wr.Logger().Errorf("Workflow has not completed, cannot DropSources: %v", err) return nil, err } } - if err := sw.dropSourceReverseVReplicationStreams(ctx); err != nil { - return nil, err + if !keepData { + switch ts.migrationType { + case binlogdatapb.MigrationType_TABLES: + log.Infof("Deleting tables") + if err := sw.removeSourceTables(ctx, removalType); err != nil { + return nil, err + } + if err := sw.dropSourceBlacklistedTables(ctx); err != nil { + return nil, err + } + case binlogdatapb.MigrationType_SHARDS: + log.Infof("Removing shards") + if err := sw.dropSourceShards(ctx); err != nil { + return nil, err + } + } } - if err := sw.dropTargetVReplicationStreams(ctx); err != nil { + if err := wr.dropArtifacts(ctx, sw); err != nil { return nil, err } - return sw.logs(), nil } func (wr *Wrangler) buildTrafficSwitcher(ctx context.Context, targetKeyspace, workflow string) (*trafficSwitcher, error) { - targets, frozen, optCells, optTabletTypes, err := wr.buildTargets(ctx, targetKeyspace, workflow) + tgtInfo, err := wr.buildTargets(ctx, targetKeyspace, workflow) if err != nil { + log.Infof("Error building targets: %s", err) return nil, err } + targets, frozen, optCells, optTabletTypes := tgtInfo.targets, tgtInfo.frozen, tgtInfo.optCells, tgtInfo.optTabletTypes ts := &trafficSwitcher{ wr: wr, @@ -407,7 +709,7 @@ func (wr *Wrangler) buildTrafficSwitcher(ctx context.Context, targetKeyspace, wo optCells: optCells, optTabletTypes: optTabletTypes, } - ts.wr.Logger().Infof("Migration ID for workflow %s: %d", workflow, ts.id) + log.Infof("Migration ID for workflow %s: %d", workflow, ts.id) // Build the sources for _, target := range targets { @@ -476,11 +778,21 @@ func (wr *Wrangler) buildTrafficSwitcher(ctx context.Context, targetKeyspace, wo return ts, nil } -func (wr *Wrangler) buildTargets(ctx context.Context, targetKeyspace, workflow string) (targets map[string]*tsTarget, frozen bool, optCells string, optTabletTypes string, err error) { - targets = make(map[string]*tsTarget) +type targetInfo struct { + targets map[string]*tsTarget + frozen bool + optCells string + optTabletTypes string +} + +func (wr *Wrangler) buildTargets(ctx context.Context, targetKeyspace, workflow string) (*targetInfo, error) { + var err error + var frozen bool + var optCells, optTabletTypes string + targets := make(map[string]*tsTarget) targetShards, err := wr.ts.GetShardNames(ctx, targetKeyspace) if err != nil { - return nil, false, "", "", err + return nil, err } // We check all target shards. All of them may not have a stream. // For example, if we're splitting -80 to -40,40-80, only those @@ -488,19 +800,19 @@ func (wr *Wrangler) buildTargets(ctx context.Context, targetKeyspace, workflow s for _, targetShard := range targetShards { targetsi, err := wr.ts.GetShard(ctx, targetKeyspace, targetShard) if err != nil { - return nil, false, "", "", err + return nil, err } if targetsi.MasterAlias == nil { // This can happen if bad inputs are given. - return nil, false, "", "", fmt.Errorf("shard %v:%v doesn't have a master set", targetKeyspace, targetShard) + return nil, fmt.Errorf("shard %v:%v doesn't have a master set", targetKeyspace, targetShard) } targetMaster, err := wr.ts.GetTablet(ctx, targetsi.MasterAlias) if err != nil { - return nil, false, "", "", err + return nil, err } p3qr, err := wr.tmc.VReplicationExec(ctx, targetMaster.Tablet, fmt.Sprintf("select id, source, message, cell, tablet_types from _vt.vreplication where workflow=%s and db_name=%s", encodeString(workflow), encodeString(targetMaster.DbName()))) if err != nil { - return nil, false, "", "", err + return nil, err } // If there's no vreplication stream, check the next target. if len(p3qr.Rows) < 1 { @@ -516,12 +828,12 @@ func (wr *Wrangler) buildTargets(ctx context.Context, targetKeyspace, workflow s for _, row := range qr.Rows { id, err := evalengine.ToInt64(row[0]) if err != nil { - return nil, false, "", "", err + return nil, err } var bls binlogdatapb.BinlogSource if err := proto.UnmarshalText(row[1].ToString(), &bls); err != nil { - return nil, false, "", "", err + return nil, err } targets[targetShard].sources[uint32(id)] = &bls @@ -533,9 +845,11 @@ func (wr *Wrangler) buildTargets(ctx context.Context, targetKeyspace, workflow s } } if len(targets) == 0 { - return nil, false, "", "", fmt.Errorf("no streams found in keyspace %s for: %s", targetKeyspace, workflow) + err2 := fmt.Errorf(errorNoStreams, targetKeyspace, workflow) + return nil, err2 } - return targets, frozen, optCells, optTabletTypes, nil + tinfo := &targetInfo{targets: targets, frozen: frozen, optCells: optCells, optTabletTypes: optTabletTypes} + return tinfo, nil } // hashStreams produces a reproducible hash based on the input parameters. @@ -557,7 +871,7 @@ func hashStreams(targetKeyspace string, targets map[string]*tsTarget) int64 { return int64(hasher.Sum64() & math.MaxInt64) } -func (ts *trafficSwitcher) validate(ctx context.Context, isWrite bool) error { +func (ts *trafficSwitcher) validate(ctx context.Context) error { if ts.migrationType == binlogdatapb.MigrationType_TABLES { // All shards must be present. if err := ts.compareShards(ctx, ts.sourceKeyspace, ts.sourceShards()); err != nil { @@ -572,61 +886,6 @@ func (ts *trafficSwitcher) validate(ctx context.Context, isWrite bool) error { return fmt.Errorf("cannot migrate streams with wild card table names: %v", table) } } - if isWrite { - return ts.validateTableForWrite(ctx) - } - } else { // binlogdatapb.MigrationType_SHARDS - if isWrite { - return ts.validateShardForWrite(ctx) - } - } - return nil -} - -func (ts *trafficSwitcher) validateTableForWrite(ctx context.Context) error { - rules, err := ts.wr.getRoutingRules(ctx) - if err != nil { - return err - } - for _, table := range ts.tables { - for _, tabletType := range []topodatapb.TabletType{topodatapb.TabletType_REPLICA, topodatapb.TabletType_RDONLY} { - tt := strings.ToLower(tabletType.String()) - if rules[table+"@"+tt] == nil || rules[ts.targetKeyspace+"."+table+"@"+tt] == nil { - return fmt.Errorf("missing tablet type specific routing, read-only traffic must be switched before switching writes: %v", table) - } - } - } - return nil -} - -func (ts *trafficSwitcher) validateShardForWrite(ctx context.Context) error { - srvKeyspaces, err := ts.wr.ts.GetSrvKeyspaceAllCells(ctx, ts.sourceKeyspace) - if err != nil { - return err - } - - // Checking one shard is enough. - var si *topo.ShardInfo - for _, source := range ts.sources { - si = source.si - break - } - - for _, srvKeyspace := range srvKeyspaces { - var shardServedTypes []string - for _, partition := range srvKeyspace.GetPartitions() { - if partition.GetServedType() == topodatapb.TabletType_MASTER { - continue - } - for _, shardReference := range partition.GetShardReferences() { - if key.KeyRangeEqual(shardReference.GetKeyRange(), si.GetKeyRange()) { - shardServedTypes = append(shardServedTypes, partition.GetServedType().String()) - } - } - } - if len(shardServedTypes) > 0 { - return fmt.Errorf("cannot switch MASTER away from %v/%v until everything else is switched. Make sure that the following types are switched first: %v", si.Keyspace(), si.ShardName(), strings.Join(shardServedTypes, ", ")) - } } return nil } @@ -648,7 +907,8 @@ func (ts *trafficSwitcher) compareShards(ctx context.Context, keyspace string, s return nil } -func (ts *trafficSwitcher) switchTableReads(ctx context.Context, cells []string, servedType topodatapb.TabletType, direction TrafficSwitchDirection) error { +func (ts *trafficSwitcher) switchTableReads(ctx context.Context, cells []string, servedTypes []topodatapb.TabletType, direction TrafficSwitchDirection) error { + log.Infof("switchTableReads: servedTypes: %+v, direction %t", servedTypes, direction) rules, err := ts.wr.getRoutingRules(ctx) if err != nil { return err @@ -657,17 +917,23 @@ func (ts *trafficSwitcher) switchTableReads(ctx context.Context, cells []string, // table -> sourceKeyspace.table // targetKeyspace.table -> sourceKeyspace.table // For forward migration, we add tablet type specific rules to redirect traffic to the target. - // For backward, we delete them. - tt := strings.ToLower(servedType.String()) - for _, table := range ts.tables { - if direction == DirectionForward { - rules[table+"@"+tt] = []string{ts.targetKeyspace + "." + table} - rules[ts.targetKeyspace+"."+table+"@"+tt] = []string{ts.targetKeyspace + "." + table} - rules[ts.sourceKeyspace+"."+table+"@"+tt] = []string{ts.targetKeyspace + "." + table} - } else { - delete(rules, table+"@"+tt) - delete(rules, ts.targetKeyspace+"."+table+"@"+tt) - delete(rules, ts.sourceKeyspace+"."+table+"@"+tt) + // For backward, we redirect to source + for _, servedType := range servedTypes { + tt := strings.ToLower(servedType.String()) + for _, table := range ts.tables { + if direction == DirectionForward { + log.Infof("Route direction forward") + toTarget := []string{ts.targetKeyspace + "." + table} + rules[table+"@"+tt] = toTarget + rules[ts.targetKeyspace+"."+table+"@"+tt] = toTarget + rules[ts.sourceKeyspace+"."+table+"@"+tt] = toTarget + } else { + log.Infof("Route direction backwards") + toSource := []string{ts.sourceKeyspace + "." + table} + rules[table+"@"+tt] = toSource + rules[ts.targetKeyspace+"."+table+"@"+tt] = toSource + rules[ts.sourceKeyspace+"."+table+"@"+tt] = toSource + } } } if err := ts.wr.saveRoutingRules(ctx, rules); err != nil { @@ -676,21 +942,26 @@ func (ts *trafficSwitcher) switchTableReads(ctx context.Context, cells []string, return ts.wr.ts.RebuildSrvVSchema(ctx, cells) } -func (ts *trafficSwitcher) switchShardReads(ctx context.Context, cells []string, servedType topodatapb.TabletType, direction TrafficSwitchDirection) error { +func (ts *trafficSwitcher) switchShardReads(ctx context.Context, cells []string, servedTypes []topodatapb.TabletType, direction TrafficSwitchDirection) error { var fromShards, toShards []*topo.ShardInfo if direction == DirectionForward { fromShards, toShards = ts.sourceShards(), ts.targetShards() } else { fromShards, toShards = ts.targetShards(), ts.sourceShards() } - - if err := ts.wr.updateShardRecords(ctx, ts.sourceKeyspace, fromShards, cells, servedType, true /* isFrom */, false /* clearSourceShards */); err != nil { - return err - } - if err := ts.wr.updateShardRecords(ctx, ts.sourceKeyspace, toShards, cells, servedType, false, false); err != nil { - return err + for _, servedType := range servedTypes { + if err := ts.wr.updateShardRecords(ctx, ts.sourceKeyspace, fromShards, cells, servedType, true /* isFrom */, false /* clearSourceShards */); err != nil { + return err + } + if err := ts.wr.updateShardRecords(ctx, ts.sourceKeyspace, toShards, cells, servedType, false, false); err != nil { + return err + } + err := ts.wr.ts.MigrateServedType(ctx, ts.sourceKeyspace, toShards, fromShards, servedType, cells) + if err != nil { + return err + } } - return ts.wr.ts.MigrateServedType(ctx, ts.sourceKeyspace, toShards, fromShards, servedType, cells) + return nil } func (wr *Wrangler) checkIfJournalExistsOnTablet(ctx context.Context, tablet *topodatapb.Tablet, migrationID int64) (*binlogdatapb.Journal, bool, error) { @@ -1008,33 +1279,17 @@ func (ts *trafficSwitcher) allowTableTargetWrites(ctx context.Context) error { func (ts *trafficSwitcher) changeRouting(ctx context.Context) error { if ts.migrationType == binlogdatapb.MigrationType_TABLES { - return ts.changeTableRouting(ctx) + return ts.changeWriteRoute(ctx) } return ts.changeShardRouting(ctx) } -func (ts *trafficSwitcher) changeTableRouting(ctx context.Context) error { +func (ts *trafficSwitcher) changeWriteRoute(ctx context.Context) error { rules, err := ts.wr.getRoutingRules(ctx) if err != nil { return err } - // We assume that the following rules were setup when the targets were created: - // table -> sourceKeyspace.table - // targetKeyspace.table -> sourceKeyspace.table - // Additionally, SwitchReads would have added rules like this: - // table@replica -> targetKeyspace.table - // targetKeyspace.table@replica -> targetKeyspace.table - // After this step, only the following rules will be left: - // table -> targetKeyspace.table - // sourceKeyspace.table -> targetKeyspace.table for _, table := range ts.tables { - for _, tabletType := range []topodatapb.TabletType{topodatapb.TabletType_REPLICA, topodatapb.TabletType_RDONLY} { - tt := strings.ToLower(tabletType.String()) - delete(rules, table+"@"+tt) - delete(rules, ts.targetKeyspace+"."+table+"@"+tt) - delete(rules, ts.sourceKeyspace+"."+table+"@"+tt) - ts.wr.Logger().Infof("Delete routing: %v %v %v", table+"@"+tt, ts.targetKeyspace+"."+table+"@"+tt, ts.sourceKeyspace+"."+table+"@"+tt) - } delete(rules, ts.targetKeyspace+"."+table) ts.wr.Logger().Infof("Delete routing: %v", ts.targetKeyspace+"."+table) rules[table] = []string{ts.targetKeyspace + "." + table} @@ -1242,6 +1497,7 @@ func (ts *trafficSwitcher) removeSourceTables(ctx context.Context, removalType T }) } +// FIXME: even after dropSourceShards there are still entries in the topo, need to research and fix func (ts *trafficSwitcher) dropSourceShards(ctx context.Context) error { return ts.forAllSources(func(source *tsSource) error { ts.wr.Logger().Infof("Deleting shard %s.%s\n", source.si.Keyspace(), source.si.ShardName()) @@ -1289,6 +1545,36 @@ func (ts *trafficSwitcher) dropSourceReverseVReplicationStreams(ctx context.Cont }) } +func (ts *trafficSwitcher) removeTargetTables(ctx context.Context) error { + return ts.forAllTargets(func(target *tsTarget) error { + for _, tableName := range ts.tables { + query := fmt.Sprintf("drop table %s.%s", target.master.DbName(), tableName) + ts.wr.Logger().Infof("Dropping table %s.%s\n", target.master.DbName(), tableName) + _, err := ts.wr.ExecuteFetchAsDba(ctx, target.master.Alias, query, 1, false, true) + if err != nil { + ts.wr.Logger().Errorf("Error removing table %s: %v", tableName, err) + return err + } + ts.wr.Logger().Infof("Removed table %s.%s\n", target.master.DbName(), tableName) + + } + return nil + }) +} + +func (ts *trafficSwitcher) dropTargetShards(ctx context.Context) error { + return ts.forAllTargets(func(target *tsTarget) error { + ts.wr.Logger().Infof("Deleting shard %s.%s\n", target.si.Keyspace(), target.si.ShardName()) + err := ts.wr.DeleteShard(ctx, target.si.Keyspace(), target.si.ShardName(), true, false) + if err != nil { + ts.wr.Logger().Errorf("Error deleting shard %s: %v", target.si.ShardName(), err) + return err + } + ts.wr.Logger().Infof("Deleted shard %s.%s\n", target.si.Keyspace(), target.si.ShardName()) + return nil + }) +} + func (wr *Wrangler) getRoutingRules(ctx context.Context) (map[string][]string, error) { rrs, err := wr.ts.GetRoutingRules(ctx) if err != nil { @@ -1302,7 +1588,7 @@ func (wr *Wrangler) getRoutingRules(ctx context.Context) (map[string][]string, e } func (wr *Wrangler) saveRoutingRules(ctx context.Context, rules map[string][]string) error { - wr.Logger().Infof("Saving routing rules %v\n", rules) + log.Infof("Saving routing rules %v\n", rules) rrs := &vschemapb.RoutingRules{Rules: make([]*vschemapb.RoutingRule, 0, len(rules))} for from, to := range rules { rrs.Rules = append(rrs.Rules, &vschemapb.RoutingRule{ diff --git a/go/vt/wrangler/traffic_switcher_env_test.go b/go/vt/wrangler/traffic_switcher_env_test.go index 28d032b2d41..686ff2049f4 100644 --- a/go/vt/wrangler/traffic_switcher_env_test.go +++ b/go/vt/wrangler/traffic_switcher_env_test.go @@ -21,6 +21,8 @@ import ( "testing" "time" + "vitess.io/vitess/go/vt/log" + "vitess.io/vitess/go/mysql/fakesqldb" "golang.org/x/net/context" @@ -42,6 +44,7 @@ import ( const vreplQueryks = "select id, source, message, cell, tablet_types from _vt.vreplication where workflow='test' and db_name='vt_ks'" const vreplQueryks2 = "select id, source, message, cell, tablet_types from _vt.vreplication where workflow='test' and db_name='vt_ks2'" +const vreplQueryks1 = "select id, source, message, cell, tablet_types from _vt.vreplication where workflow='test_reverse' and db_name='vt_ks1'" type testMigraterEnv struct { ts *topo.Server @@ -191,6 +194,31 @@ func newTestTableMigraterCustom(ctx context.Context, t *testing.T, sourceShards, ) } + for i, sourceShard := range sourceShards { + var rows []string + for j, targetShard := range targetShards { + bls := &binlogdatapb.BinlogSource{ + Keyspace: "ks2", + Shard: targetShard, + Filter: &binlogdatapb.Filter{ + Rules: []*binlogdatapb.Rule{{ + Match: "t1", + Filter: fmt.Sprintf(fmtQuery, fmt.Sprintf("from t1 where in_keyrange('%s')", sourceShard)), + }, { + Match: "t2", + Filter: fmt.Sprintf(fmtQuery, fmt.Sprintf("from t2 where in_keyrange('%s')", sourceShard)), + }}, + }, + } + rows = append(rows, fmt.Sprintf("%d|%v|||", j+1, bls)) + } + tme.dbSourceClients[i].addInvariant(vreplQueryks1, sqltypes.MakeTestResult(sqltypes.MakeTestFields( + "id|source|message|cell|tablet_types", + "int64|varchar|varchar|varchar|varchar"), + rows...), + ) + } + if err := tme.wr.saveRoutingRules(ctx, map[string][]string{ "t1": {"ks1.t1"}, "ks2.t1": {"ks1.t1"}, @@ -361,6 +389,7 @@ func (tme *testMigraterEnv) createDBClients(ctx context.Context, t *testing.T) { master.TM.VREngine.Open(ctx) } for _, master := range tme.targetMasters { + log.Infof("Adding as targetMaster %s", master.Tablet.Alias) dbclient := newFakeDBClient() tme.dbTargetClients = append(tme.dbTargetClients, dbclient) dbClientFactory := func() binlogplayer.DBClient { return dbclient } @@ -403,6 +432,13 @@ func (tme *testMigraterEnv) expectNoPreviousJournals() { } } +func (tme *testMigraterEnv) expectNoPreviousReverseJournals() { + // validate that no previous journals exist + for _, dbclient := range tme.dbTargetClients { + dbclient.addQueryRE(tsCheckJournals, &sqltypes.Result{}, nil) + } +} + func (tme *testShardMigraterEnv) forAllStreams(f func(i, j int)) { for i := range tme.targetShards { for j := range tme.sourceShards { diff --git a/go/vt/wrangler/traffic_switcher_test.go b/go/vt/wrangler/traffic_switcher_test.go index 8e80c03a362..043d08230b6 100644 --- a/go/vt/wrangler/traffic_switcher_test.go +++ b/go/vt/wrangler/traffic_switcher_test.go @@ -86,7 +86,7 @@ func TestTableMigrateMainflow(t *testing.T) { tme.expectNoPreviousJournals() //------------------------------------------------------------------------------------------------------------------- // Single cell RDONLY migration. - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, []string{"cell1"}, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_RDONLY}, []string{"cell1"}, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -117,7 +117,7 @@ func TestTableMigrateMainflow(t *testing.T) { // So, adding routes for replica and deploying to cell2 will also cause // cell2 to switch rdonly. This is a quirk that can be fixed later if necessary. // TODO(sougou): check if it's worth fixing, or clearly document the quirk. - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, []string{"cell2"}, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_REPLICA}, []string{"cell2"}, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -152,15 +152,14 @@ func TestTableMigrateMainflow(t *testing.T) { "ks1.t2@replica": {"ks2.t2"}, }) verifyQueries(t, tme.allDBClients) - tme.expectNoPreviousJournals() //------------------------------------------------------------------------------------------------------------------- // Single cell backward REPLICA migration. - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, []string{"cell2"}, DirectionBackward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_REPLICA}, []string{"cell2"}, DirectionBackward, false) if err != nil { t.Fatal(err) } - checkRouting(t, tme.wr, map[string][]string{ + checkCellRouting(t, tme.wr, "cell1", map[string][]string{ "t1": {"ks1.t1"}, "ks2.t1": {"ks1.t1"}, "t2": {"ks1.t2"}, @@ -172,12 +171,30 @@ func TestTableMigrateMainflow(t *testing.T) { "ks2.t2@rdonly": {"ks2.t2"}, "ks1.t2@rdonly": {"ks2.t2"}, }) + checkCellRouting(t, tme.wr, "cell2", map[string][]string{ + "t1": {"ks1.t1"}, + "ks2.t1": {"ks1.t1"}, + "t2": {"ks1.t2"}, + "ks2.t2": {"ks1.t2"}, + "t1@rdonly": {"ks2.t1"}, + "ks2.t1@rdonly": {"ks2.t1"}, + "ks1.t1@rdonly": {"ks2.t1"}, + "t2@rdonly": {"ks2.t2"}, + "ks2.t2@rdonly": {"ks2.t2"}, + "ks1.t2@rdonly": {"ks2.t2"}, + "t1@replica": {"ks1.t1"}, + "ks2.t1@replica": {"ks1.t1"}, + "ks1.t1@replica": {"ks1.t1"}, + "t2@replica": {"ks1.t2"}, + "ks2.t2@replica": {"ks1.t2"}, + "ks1.t2@replica": {"ks1.t2"}, + }) verifyQueries(t, tme.allDBClients) tme.expectNoPreviousJournals() //------------------------------------------------------------------------------------------------------------------- // Switch all REPLICA. - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_REPLICA}, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -204,7 +221,7 @@ func TestTableMigrateMainflow(t *testing.T) { tme.expectNoPreviousJournals() //------------------------------------------------------------------------------------------------------------------- // All cells RDONLY backward migration. - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionBackward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_RDONLY}, nil, DirectionBackward, false) if err != nil { t.Fatal(err) } @@ -219,53 +236,62 @@ func TestTableMigrateMainflow(t *testing.T) { "t2@replica": {"ks2.t2"}, "ks2.t2@replica": {"ks2.t2"}, "ks1.t2@replica": {"ks2.t2"}, + "t1@rdonly": {"ks1.t1"}, + "ks2.t1@rdonly": {"ks1.t1"}, + "ks1.t1@rdonly": {"ks1.t1"}, + "t2@rdonly": {"ks1.t2"}, + "ks2.t2@rdonly": {"ks1.t2"}, + "ks1.t2@rdonly": {"ks1.t2"}, }) verifyQueries(t, tme.allDBClients) tme.expectNoPreviousJournals() //------------------------------------------------------------------------------------------------------------------- // All cells RDONLY backward migration. - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionBackward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_REPLICA}, nil, DirectionBackward, false) if err != nil { t.Fatal(err) } checkRouting(t, tme.wr, map[string][]string{ - "t1": {"ks1.t1"}, - "ks2.t1": {"ks1.t1"}, - "t2": {"ks1.t2"}, - "ks2.t2": {"ks1.t2"}, + "t1": {"ks1.t1"}, + "ks2.t1": {"ks1.t1"}, + "t2": {"ks1.t2"}, + "ks2.t2": {"ks1.t2"}, + "t1@replica": {"ks1.t1"}, + "ks2.t1@replica": {"ks1.t1"}, + "ks1.t1@replica": {"ks1.t1"}, + "t2@replica": {"ks1.t2"}, + "ks2.t2@replica": {"ks1.t2"}, + "ks1.t2@replica": {"ks1.t2"}, + "t1@rdonly": {"ks1.t1"}, + "ks2.t1@rdonly": {"ks1.t1"}, + "ks1.t1@rdonly": {"ks1.t1"}, + "t2@rdonly": {"ks1.t2"}, + "ks2.t2@rdonly": {"ks1.t2"}, + "ks1.t2@rdonly": {"ks1.t2"}, }) verifyQueries(t, tme.allDBClients) //------------------------------------------------------------------------------------------------------------------- // Can't switch master with SwitchReads. - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_MASTER, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_MASTER}, nil, DirectionForward, false) want := "tablet type must be REPLICA or RDONLY: MASTER" if err == nil || err.Error() != want { t.Errorf("SwitchReads(master) err: %v, want %v", err, want) } verifyQueries(t, tme.allDBClients) - //------------------------------------------------------------------------------------------------------------------- - // Can't switch writes if REPLICA and RDONLY have not fully switched yet. - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, true, false) - want = "missing tablet type specific routing, read-only traffic must be switched before switching writes" - if err == nil || !strings.Contains(err.Error(), want) { - t.Errorf("SwitchWrites err: %v, want %v", err, want) - } - verifyQueries(t, tme.allDBClients) - //------------------------------------------------------------------------------------------------------------------- // Test SwitchWrites cancelation on failure. tme.expectNoPreviousJournals() // Switch all the reads first. - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_RDONLY}, nil, DirectionForward, false) if err != nil { t.Fatal(err) } tme.expectNoPreviousJournals() - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_REPLICA}, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -316,7 +342,7 @@ func TestTableMigrateMainflow(t *testing.T) { } cancelMigration() - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 0*time.Second, false, true, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 0*time.Second, false, false, true, false) want = "DeadlineExceeded" if err == nil || !strings.Contains(err.Error(), want) { t.Errorf("SwitchWrites(0 timeout) err: %v, must contain %v", err, want) @@ -421,18 +447,10 @@ func TestTableMigrateMainflow(t *testing.T) { tme.dbTargetClients[1].addQuery("update _vt.vreplication set message = 'FROZEN' where id in (1, 2)", &sqltypes.Result{}, nil) tme.dbTargetClients[1].addQuery("select * from _vt.vreplication where id = 1", stoppedResult(1), nil) tme.dbTargetClients[1].addQuery("select * from _vt.vreplication where id = 2", stoppedResult(2), nil) - /* - tme.dbTargetClients[0].addQuery("select id from _vt.vreplication where db_name = 'vt_ks2' and workflow = 'test'", resultid12, nil) - tme.dbTargetClients[1].addQuery("select id from _vt.vreplication where db_name = 'vt_ks2' and workflow = 'test'", resultid12, nil) - tme.dbTargetClients[0].addQuery("delete from _vt.vreplication where id in (1, 2)", &sqltypes.Result{}, nil) - tme.dbTargetClients[0].addQuery("delete from _vt.copy_state where vrepl_id in (1, 2)", &sqltypes.Result{}, nil) - tme.dbTargetClients[1].addQuery("delete from _vt.vreplication where id in (1, 2)", &sqltypes.Result{}, nil) - tme.dbTargetClients[1].addQuery("delete from _vt.copy_state where vrepl_id in (1, 2)", &sqltypes.Result{}, nil) - */ } deleteTargetVReplication() - journalID, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, true, false) + journalID, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false) if err != nil { t.Fatal(err) } @@ -441,10 +459,22 @@ func TestTableMigrateMainflow(t *testing.T) { } checkRouting(t, tme.wr, map[string][]string{ - "t1": {"ks2.t1"}, - "ks1.t1": {"ks2.t1"}, - "t2": {"ks2.t2"}, - "ks1.t2": {"ks2.t2"}, + "t1": {"ks2.t1"}, + "ks1.t1": {"ks2.t1"}, + "t2": {"ks2.t2"}, + "ks1.t2": {"ks2.t2"}, + "t1@replica": {"ks2.t1"}, + "ks2.t1@replica": {"ks2.t1"}, + "ks1.t1@replica": {"ks2.t1"}, + "t2@replica": {"ks2.t2"}, + "ks2.t2@replica": {"ks2.t2"}, + "ks1.t2@replica": {"ks2.t2"}, + "t1@rdonly": {"ks2.t1"}, + "ks2.t1@rdonly": {"ks2.t1"}, + "ks1.t1@rdonly": {"ks2.t1"}, + "t2@rdonly": {"ks2.t2"}, + "ks2.t2@rdonly": {"ks2.t2"}, + "ks1.t2@rdonly": {"ks2.t2"}, }) checkBlacklist(t, tme.ts, "ks1:-40", []string{"t1", "t2"}) checkBlacklist(t, tme.ts, "ks1:40-", []string{"t1", "t2"}) @@ -457,6 +487,7 @@ func TestTableMigrateMainflow(t *testing.T) { // TestShardMigrate tests table mode migrations. // This has to be kept in sync with TestTableMigrate. func TestShardMigrateMainflow(t *testing.T) { + //t.Skip("To be fixed before release") //FIXME ctx := context.Background() tme := newTestShardMigrater(ctx, t, []string{"-40", "40-"}, []string{"-80", "80-"}) defer tme.stopTablets(t) @@ -470,7 +501,7 @@ func TestShardMigrateMainflow(t *testing.T) { tme.expectNoPreviousJournals() //------------------------------------------------------------------------------------------------------------------- // Single cell RDONLY migration. - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, []string{"cell1"}, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_RDONLY}, []string{"cell1"}, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -487,7 +518,7 @@ func TestShardMigrateMainflow(t *testing.T) { tme.expectNoPreviousJournals() //------------------------------------------------------------------------------------------------------------------- // Other cell REPLICA migration. - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, []string{"cell2"}, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_REPLICA}, []string{"cell2"}, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -504,7 +535,7 @@ func TestShardMigrateMainflow(t *testing.T) { tme.expectNoPreviousJournals() //------------------------------------------------------------------------------------------------------------------- // Single cell backward REPLICA migration. - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, []string{"cell2"}, DirectionBackward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_REPLICA}, []string{"cell2"}, DirectionBackward, false) if err != nil { t.Fatal(err) } @@ -524,7 +555,7 @@ func TestShardMigrateMainflow(t *testing.T) { // This is an extra step that does not exist in the tables test. // The per-cell migration mechanism is different for tables. So, this // extra step is needed to bring things in sync. - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_RDONLY}, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -537,7 +568,7 @@ func TestShardMigrateMainflow(t *testing.T) { tme.expectNoPreviousJournals() //------------------------------------------------------------------------------------------------------------------- // Switch all REPLICA. - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_REPLICA}, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -550,7 +581,7 @@ func TestShardMigrateMainflow(t *testing.T) { tme.expectNoPreviousJournals() //------------------------------------------------------------------------------------------------------------------- // All cells RDONLY backward migration. - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionBackward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_RDONLY}, nil, DirectionBackward, false) if err != nil { t.Fatal(err) } @@ -562,27 +593,19 @@ func TestShardMigrateMainflow(t *testing.T) { //------------------------------------------------------------------------------------------------------------------- // Can't switch master with SwitchReads. - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_MASTER, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_MASTER}, nil, DirectionForward, false) want := "tablet type must be REPLICA or RDONLY: MASTER" if err == nil || err.Error() != want { t.Errorf("SwitchReads(master) err: %v, want %v", err, want) } verifyQueries(t, tme.allDBClients) - //------------------------------------------------------------------------------------------------------------------- - // Can't switch writes if REPLICA and RDONLY have not fully switched yet. - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, true, false) - want = "cannot switch MASTER away" - if err == nil || !strings.Contains(err.Error(), want) { - t.Errorf("SwitchWrites err: %v, want %v", err, want) - } - verifyQueries(t, tme.allDBClients) //------------------------------------------------------------------------------------------------------------------- // Test SwitchWrites cancelation on failure. tme.expectNoPreviousJournals() // Switch all the reads first. - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_RDONLY}, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -633,7 +656,7 @@ func TestShardMigrateMainflow(t *testing.T) { } cancelMigration() - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 0*time.Second, false, true, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 0*time.Second, false, false, true, false) want = "DeadlineExceeded" if err == nil || !strings.Contains(err.Error(), want) { t.Errorf("SwitchWrites(0 timeout) err: %v, must contain %v", err, want) @@ -722,7 +745,7 @@ func TestShardMigrateMainflow(t *testing.T) { } freezeTargetVReplication() - journalID, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, true, false) + journalID, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false) if err != nil { t.Fatal(err) } @@ -751,12 +774,12 @@ func TestTableMigrateOneToMany(t *testing.T) { defer tme.stopTablets(t) tme.expectNoPreviousJournals() - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_RDONLY}, nil, DirectionForward, false) if err != nil { t.Fatal(err) } tme.expectNoPreviousJournals() - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_REPLICA}, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -818,11 +841,11 @@ func TestTableMigrateOneToMany(t *testing.T) { tme.dbTargetClients[1].addQuery("select 1 from _vt.vreplication where db_name='vt_ks2' and workflow='test' and message!='FROZEN'", &sqltypes.Result{}, nil) } dropSourcesInvalid() - _, err = tme.wr.DropSources(ctx, tme.targetKeyspace, "test", DropTable, false) + _, err = tme.wr.DropSources(ctx, tme.targetKeyspace, "test", DropTable, false, false, false) require.Error(t, err, "Workflow has not completed, cannot DropSources") tme.dbSourceClients[0].addQueryRE(tsCheckJournals, &sqltypes.Result{}, nil) - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, false, false) if err != nil { t.Fatal(err) } @@ -848,7 +871,7 @@ func TestTableMigrateOneToMany(t *testing.T) { "Unlock keyspace ks2", "Unlock keyspace ks1", } - results, err := tme.wr.DropSources(ctx, tme.targetKeyspace, "test", DropTable, true) + results, err := tme.wr.DropSources(ctx, tme.targetKeyspace, "test", DropTable, false, false, true) require.NoError(t, err) require.Empty(t, cmp.Diff(wantdryRunDropSources, *results)) checkBlacklist(t, tme.ts, fmt.Sprintf("%s:%s", "ks1", "0"), []string{"t1", "t2"}) @@ -874,7 +897,7 @@ func TestTableMigrateOneToMany(t *testing.T) { "Unlock keyspace ks2", "Unlock keyspace ks1", } - results, err = tme.wr.DropSources(ctx, tme.targetKeyspace, "test", RenameTable, true) + results, err = tme.wr.DropSources(ctx, tme.targetKeyspace, "test", RenameTable, false, false, true) require.NoError(t, err) require.Empty(t, cmp.Diff(wantdryRunRenameSources, *results)) checkBlacklist(t, tme.ts, fmt.Sprintf("%s:%s", "ks1", "0"), []string{"t1", "t2"}) @@ -890,7 +913,7 @@ func TestTableMigrateOneToMany(t *testing.T) { } dropSources() - _, err = tme.wr.DropSources(ctx, tme.targetKeyspace, "test", DropTable, false) + _, err = tme.wr.DropSources(ctx, tme.targetKeyspace, "test", DropTable, false, false, false) require.NoError(t, err) checkBlacklist(t, tme.ts, fmt.Sprintf("%s:%s", "ks1", "0"), nil) @@ -946,15 +969,15 @@ func TestTableMigrateOneToManyDryRun(t *testing.T) { "Unlock keyspace ks1", } tme.expectNoPreviousJournals() - dryRunResults, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, true) + dryRunResults, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_RDONLY}, nil, DirectionForward, true) require.NoError(t, err) require.Empty(t, cmp.Diff(wantdryRunReads, *dryRunResults)) tme.expectNoPreviousJournals() - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_RDONLY}, nil, DirectionForward, false) require.NoError(t, err) tme.expectNoPreviousJournals() - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_REPLICA}, nil, DirectionForward, false) require.NoError(t, err) verifyQueries(t, tme.allDBClients) @@ -1021,7 +1044,7 @@ func TestTableMigrateOneToManyDryRun(t *testing.T) { } deleteTargetVReplication() - _, results, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true) + _, results, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, false, true) require.NoError(t, err) require.Empty(t, cmp.Diff(wantdryRunWrites, *results)) } @@ -1034,12 +1057,12 @@ func TestMigrateFailJournal(t *testing.T) { defer tme.stopTablets(t) tme.expectNoPreviousJournals() - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_RDONLY}, nil, DirectionForward, false) if err != nil { t.Fatal(err) } tme.expectNoPreviousJournals() - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_REPLICA}, nil, DirectionForward, false) require.NoError(t, err) // mi.checkJournals @@ -1106,7 +1129,7 @@ func TestMigrateFailJournal(t *testing.T) { tme.dbSourceClients[0].addQueryRE("insert into _vt.resharding_journal", nil, errors.New("journaling intentionally failed")) tme.dbSourceClients[1].addQueryRE("insert into _vt.resharding_journal", nil, errors.New("journaling intentionally failed")) - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, true, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false) want := "journaling intentionally failed" if err == nil || !strings.Contains(err.Error(), want) { t.Errorf("SwitchWrites(0 timeout) err: %v, must contain %v", err, want) @@ -1130,12 +1153,12 @@ func TestTableMigrateJournalExists(t *testing.T) { defer tme.stopTablets(t) tme.expectNoPreviousJournals() - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_RDONLY}, nil, DirectionForward, false) if err != nil { t.Fatal(err) } tme.expectNoPreviousJournals() - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_REPLICA}, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -1167,17 +1190,29 @@ func TestTableMigrateJournalExists(t *testing.T) { tme.dbTargetClients[1].addQuery("select * from _vt.vreplication where id = 1", stoppedResult(1), nil) tme.dbTargetClients[1].addQuery("select * from _vt.vreplication where id = 2", stoppedResult(2), nil) - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, true, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false) if err != nil { t.Fatal(err) } // Routes will be redone. checkRouting(t, tme.wr, map[string][]string{ - "t1": {"ks2.t1"}, - "ks1.t1": {"ks2.t1"}, - "t2": {"ks2.t2"}, - "ks1.t2": {"ks2.t2"}, + "t1": {"ks2.t1"}, + "ks1.t1": {"ks2.t1"}, + "t2": {"ks2.t2"}, + "ks1.t2": {"ks2.t2"}, + "t1@replica": {"ks2.t1"}, + "ks2.t1@replica": {"ks2.t1"}, + "ks1.t1@replica": {"ks2.t1"}, + "t2@replica": {"ks2.t2"}, + "ks2.t2@replica": {"ks2.t2"}, + "ks1.t2@replica": {"ks2.t2"}, + "t1@rdonly": {"ks2.t1"}, + "ks2.t1@rdonly": {"ks2.t1"}, + "ks1.t1@rdonly": {"ks2.t1"}, + "t2@rdonly": {"ks2.t2"}, + "ks2.t2@rdonly": {"ks2.t2"}, + "ks1.t2@rdonly": {"ks2.t2"}, }) // We're showing that there are no blacklisted tables. But in real life, // tables on ks1 should be blacklisted from the previous failed attempt. @@ -1188,18 +1223,19 @@ func TestTableMigrateJournalExists(t *testing.T) { verifyQueries(t, tme.allDBClients) } + func TestShardMigrateJournalExists(t *testing.T) { ctx := context.Background() tme := newTestShardMigrater(ctx, t, []string{"-40", "40-"}, []string{"-80", "80-"}) defer tme.stopTablets(t) tme.expectNoPreviousJournals() - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_RDONLY}, nil, DirectionForward, false) if err != nil { t.Fatal(err) } tme.expectNoPreviousJournals() - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_REPLICA}, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -1231,7 +1267,7 @@ func TestShardMigrateJournalExists(t *testing.T) { tme.dbTargetClients[1].addQuery("update _vt.vreplication set message = 'FROZEN' where id in (2)", &sqltypes.Result{}, nil) tme.dbTargetClients[1].addQuery("select * from _vt.vreplication where id = 2", stoppedResult(2), nil) - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, true, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false) if err != nil { t.Fatal(err) } @@ -1248,18 +1284,19 @@ func TestShardMigrateJournalExists(t *testing.T) { verifyQueries(t, tme.allDBClients) } + func TestTableMigrateCancel(t *testing.T) { ctx := context.Background() tme := newTestTableMigrater(ctx, t) defer tme.stopTablets(t) tme.expectNoPreviousJournals() - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_RDONLY}, nil, DirectionForward, false) if err != nil { t.Fatal(err) } tme.expectNoPreviousJournals() - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_REPLICA}, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -1292,7 +1329,7 @@ func TestTableMigrateCancel(t *testing.T) { } cancelMigration() - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, true, false, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, true, false, false, false) if err != nil { t.Fatal(err) } @@ -1313,12 +1350,12 @@ func TestTableMigrateCancelDryRun(t *testing.T) { } tme.expectNoPreviousJournals() - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_RDONLY}, nil, DirectionForward, false) if err != nil { t.Fatal(err) } tme.expectNoPreviousJournals() - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_REPLICA}, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -1351,7 +1388,7 @@ func TestTableMigrateCancelDryRun(t *testing.T) { } cancelMigration() - _, dryRunResults, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, true, false, true) + _, dryRunResults, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, true, false, false, true) require.NoError(t, err) require.Empty(t, cmp.Diff(want, *dryRunResults)) } @@ -1362,12 +1399,12 @@ func TestTableMigrateNoReverse(t *testing.T) { defer tme.stopTablets(t) tme.expectNoPreviousJournals() - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_RDONLY}, nil, DirectionForward, false) if err != nil { t.Fatal(err) } tme.expectNoPreviousJournals() - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_REPLICA}, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -1449,7 +1486,7 @@ func TestTableMigrateNoReverse(t *testing.T) { } deleteTargetVReplication() - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, false, false) if err != nil { t.Fatal(err) } @@ -1462,13 +1499,13 @@ func TestMigrateFrozen(t *testing.T) { defer tme.stopTablets(t) tme.expectNoPreviousJournals() - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_RDONLY}, nil, DirectionForward, false) if err != nil { t.Fatal(err) } tme.expectNoPreviousJournals() - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_REPLICA}, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -1478,7 +1515,7 @@ func TestMigrateFrozen(t *testing.T) { Shard: "-40", Filter: &binlogdatapb.Filter{ Rules: []*binlogdatapb.Rule{{ - Match: "/.*", + Match: "t1", Filter: "", }}, }, @@ -1490,21 +1527,7 @@ func TestMigrateFrozen(t *testing.T) { ), nil) tme.dbTargetClients[1].addQuery(vreplQueryks2, &sqltypes.Result{}, nil) - tme.dbSourceClients[0].addQueryRE(tsCheckJournals, &sqltypes.Result{}, nil) - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) - want := "cannot switch reads while SwitchWrites is in progress" - if err == nil || err.Error() != want { - t.Errorf("SwitchReads(frozen) err: %v, want %v", err, want) - } - - tme.dbTargetClients[0].addQuery(vreplQueryks2, sqltypes.MakeTestResult(sqltypes.MakeTestFields( - "id|source|message|cell|tablet_type", - "int64|varchar|varchar|varchar|varchar"), - fmt.Sprintf("1|%v|FROZEN||", bls1), - ), nil) - tme.dbTargetClients[1].addQuery(vreplQueryks2, &sqltypes.Result{}, nil) - - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 0*time.Second, false, true, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 0*time.Second, false, false, true, false) if err != nil { t.Fatal(err) } @@ -1520,8 +1543,8 @@ func TestMigrateNoStreamsFound(t *testing.T) { tme.dbTargetClients[1].addQuery(vreplQueryks2, &sqltypes.Result{}, nil) tme.expectNoPreviousJournals() - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) - want := "no streams found in keyspace ks2 for: test" + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_RDONLY}, nil, DirectionForward, false) + want := "workflow test not found in keyspace ks2" if err == nil || !strings.Contains(err.Error(), want) { t.Errorf("SwitchReads: %v, must contain %v", err, want) } @@ -1552,7 +1575,7 @@ func TestMigrateDistinctSources(t *testing.T) { ), nil) tme.expectNoPreviousJournals() - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_RDONLY}, nil, DirectionForward, false) want := "source keyspaces are mismatched across streams" if err == nil || !strings.Contains(err.Error(), want) { t.Errorf("SwitchReads: %v, must contain %v", err, want) @@ -1582,7 +1605,7 @@ func TestMigrateMismatchedTables(t *testing.T) { ) tme.expectNoPreviousJournals() - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_RDONLY}, nil, DirectionForward, false) want := "table lists are mismatched across streams" if err == nil || !strings.Contains(err.Error(), want) { t.Errorf("SwitchReads: %v, must contain %v", err, want) @@ -1597,7 +1620,7 @@ func TestTableMigrateAllShardsNotPresent(t *testing.T) { tme.dbTargetClients[0].addQuery(vreplQueryks2, &sqltypes.Result{}, nil) tme.expectNoPreviousJournals() - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_RDONLY}, nil, DirectionForward, false) want := "mismatched shards for keyspace" if err == nil || !strings.Contains(err.Error(), want) { t.Errorf("SwitchReads: %v, must contain %v", err, want) @@ -1655,8 +1678,8 @@ func TestMigrateNoTableWildcards(t *testing.T) { fmt.Sprintf("1|%v|||", bls3), ), nil) tme.expectNoPreviousJournals() - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) - want := "cannot migrate streams with wild card table names" + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_RDONLY}, nil, DirectionForward, false) + want := "cannot migrate streams with wild card table names: /.*" if err == nil || !strings.Contains(err.Error(), want) { t.Errorf("SwitchReads: %v, must contain %v", err, want) } diff --git a/go/vt/wrangler/vdiff.go b/go/vt/wrangler/vdiff.go index b232dc25a8e..4e72ab30bb8 100644 --- a/go/vt/wrangler/vdiff.go +++ b/go/vt/wrangler/vdiff.go @@ -146,7 +146,7 @@ func (wr *Wrangler) VDiff(ctx context.Context, targetKeyspace, workflow, sourceC wr.Logger().Errorf("buildTrafficSwitcher: %v", err) return nil, err } - if err := ts.validate(ctx, false /* isWrite */); err != nil { + if err := ts.validate(ctx); err != nil { ts.wr.Logger().Errorf("validate: %v", err) return nil, err } diff --git a/go/vt/wrangler/vexec.go b/go/vt/wrangler/vexec.go index eeaa9e3a822..5c7c9eb1d21 100644 --- a/go/vt/wrangler/vexec.go +++ b/go/vt/wrangler/vexec.go @@ -575,7 +575,7 @@ func dumpStreamListAsJSON(replStatus *ReplicationStatusResult, wr *Wrangler) err func (wr *Wrangler) printWorkflowList(keyspace string, workflows []string) { list := strings.Join(workflows, ", ") if list == "" { - wr.Logger().Printf("No workflows found in keyspace %s", keyspace) + wr.Logger().Printf("No workflows found in keyspace %s\n", keyspace) return } wr.Logger().Printf("Following workflow(s) found in keyspace %s: %v\n", keyspace, list) diff --git a/go/vt/wrangler/workflow.go b/go/vt/wrangler/workflow.go new file mode 100644 index 00000000000..44ac91d6561 --- /dev/null +++ b/go/vt/wrangler/workflow.go @@ -0,0 +1,477 @@ +package wrangler + +import ( + "context" + "fmt" + "sort" + "strings" + "time" + + "vitess.io/vitess/go/sqltypes" + topodatapb "vitess.io/vitess/go/vt/proto/topodata" + "vitess.io/vitess/go/vt/topo/topoproto" + "vitess.io/vitess/go/vt/vtgate/evalengine" + + "vitess.io/vitess/go/vt/log" +) + +// VReplicationWorkflowType specifies whether workflow is MoveTables or Reshard +type VReplicationWorkflowType int + +// VReplicationWorkflowType enums +const ( + MoveTablesWorkflow = VReplicationWorkflowType(iota) + ReshardWorkflow +) + +// Workflow state display strings +const ( + WorkflowStateNotStarted = "Not Started" + WorkflowStateNotSwitched = "Reads Not Switched. Writes Not Switched" + WorkflowStateReadsSwitched = "All Reads Switched. Writes Not Switched" + WorkflowStateWritesSwitched = "Reads Not Switched. Writes Switched" + WorkflowStateAllSwitched = "All Reads Switched. Writes Switched" +) + +// region Move Tables Public API + +// VReplicationWorkflow stores various internal objects for a workflow +type VReplicationWorkflow struct { + workflowType VReplicationWorkflowType + ctx context.Context + wr *Wrangler + params *VReplicationWorkflowParams + ts *trafficSwitcher + ws *workflowState +} + +func (vrw *VReplicationWorkflow) String() string { + s := "" + s += fmt.Sprintf("Parameters: %+v\n", vrw.params) + s += fmt.Sprintf("State: %+v", vrw.CachedState()) + return s +} + +// VReplicationWorkflowParams stores args and options passed to a VReplicationWorkflow command +type VReplicationWorkflowParams struct { + Workflow, TargetKeyspace string + Cells, TabletTypes, ExcludeTables string + EnableReverseReplication, DryRun bool + KeepData bool + Timeout time.Duration + Direction TrafficSwitchDirection + + // MoveTables specific + SourceKeyspace, Tables string + AllTables, RenameTables bool + + // Reshard specific + SourceShards, TargetShards []string + SkipSchemaCopy bool +} + +// NewVReplicationWorkflow sets up a MoveTables or Reshard workflow based on options provided, deduces the state of the +// workflow from the persistent state stored in the vreplication table and the topo +func (wr *Wrangler) NewVReplicationWorkflow(ctx context.Context, workflowType VReplicationWorkflowType, + params *VReplicationWorkflowParams) (*VReplicationWorkflow, error) { + + log.Infof("NewVReplicationWorkflow with params %+v", params) + vrw := &VReplicationWorkflow{wr: wr, ctx: ctx, params: params, workflowType: workflowType} + ts, ws, err := wr.getWorkflowState(ctx, params.TargetKeyspace, params.Workflow) + if err != nil { + return nil, err + } + log.Infof("Workflow state is %+v", ws) + if ts != nil { //Other than on Start we need to get SourceKeyspace from the workflow + vrw.params.TargetKeyspace = ts.targetKeyspace + vrw.params.Workflow = ts.workflow + vrw.params.SourceKeyspace = ts.sourceKeyspace + } + vrw.ts = ts + vrw.ws = ws + return vrw, nil +} + +// CurrentState reloads and returns a human readable workflow state +func (vrw *VReplicationWorkflow) CurrentState() string { + var err error + vrw.ts, vrw.ws, err = vrw.wr.getWorkflowState(vrw.ctx, vrw.params.TargetKeyspace, vrw.params.Workflow) + if err != nil { + return err.Error() + } + if vrw.ws == nil { + return "Workflow Not Found" + } + return vrw.stateAsString(vrw.ws) +} + +// CachedState returns a human readable workflow state at the time the workflow was created +func (vrw *VReplicationWorkflow) CachedState() string { + return vrw.stateAsString(vrw.ws) +} + +// Exists checks if the workflow has already been initiated +func (vrw *VReplicationWorkflow) Exists() bool { + return vrw.ws != nil +} + +func (vrw *VReplicationWorkflow) stateAsString(ws *workflowState) string { + log.Infof("Workflow state is %+v", ws) + var stateInfo []string + s := "" + if !vrw.Exists() { + stateInfo = append(stateInfo, "Not Started") + } else { + if len(ws.RdonlyCellsNotSwitched) == 0 && len(ws.ReplicaCellsNotSwitched) == 0 && len(ws.ReplicaCellsSwitched) > 0 { + s = "All Reads Switched" + } else if len(ws.RdonlyCellsSwitched) == 0 && len(ws.ReplicaCellsSwitched) == 0 { + s = "Reads Not Switched" + } else { + stateInfo = append(stateInfo, "Reads partially switched") + if len(ws.ReplicaCellsNotSwitched) == 0 { + s += "All Replica Reads Switched" + } else if len(ws.ReplicaCellsSwitched) == 0 { + s += "Replica not switched" + } else { + s += "Replica switched in cells: " + strings.Join(ws.ReplicaCellsSwitched, ",") + } + stateInfo = append(stateInfo, s) + s = "" + if len(ws.RdonlyCellsNotSwitched) == 0 { + s += "All Rdonly Reads Switched" + } else if len(ws.RdonlyCellsSwitched) == 0 { + s += "Rdonly not switched" + } else { + s += "Rdonly switched in cells: " + strings.Join(ws.RdonlyCellsSwitched, ",") + } + } + stateInfo = append(stateInfo, s) + if ws.WritesSwitched { + stateInfo = append(stateInfo, "Writes Switched") + } else { + stateInfo = append(stateInfo, "Writes Not Switched") + } + } + return strings.Join(stateInfo, ". ") +} + +// Start initiates a workflow +func (vrw *VReplicationWorkflow) Start() error { + var err error + if vrw.Exists() { + return fmt.Errorf("workflow already exists found") + } + if vrw.CachedState() != WorkflowStateNotStarted { + return fmt.Errorf("workflow has already been started, state is %s", vrw.CachedState()) + } + switch vrw.workflowType { + case MoveTablesWorkflow: + err = vrw.initMoveTables() + case ReshardWorkflow: + err = vrw.initReshard() + default: + return fmt.Errorf("unknown workflow type %d", vrw.workflowType) + } + if err != nil { + return err + } + return nil +} + +// SwitchTraffic switches traffic forward for tablet_types passed +func (vrw *VReplicationWorkflow) SwitchTraffic(direction TrafficSwitchDirection) error { + if !vrw.Exists() { + return fmt.Errorf("workflow has not yet been started") + } + vrw.params.Direction = direction + hasReplica, hasRdonly, hasMaster, err := vrw.parseTabletTypes() + if err != nil { + return err + } + if hasReplica || hasRdonly { + if err := vrw.switchReads(); err != nil { + return err + } + } + if hasMaster { + if err := vrw.switchWrites(); err != nil { + return err + } + } + return nil +} + +// ReverseTraffic switches traffic backwards for tablet_types passed +func (vrw *VReplicationWorkflow) ReverseTraffic() error { + if !vrw.Exists() { + return fmt.Errorf("workflow has not yet been started") + } + return vrw.SwitchTraffic(DirectionBackward) +} + +// Workflow errors +const ( + ErrWorkflowNotFullySwitched = "cannot complete workflow because you have not yet switched all read and write traffic" + ErrWorkflowPartiallySwitched = "cannot abort workflow because you have already switched some or all read and write traffic" +) + +// Complete cleans up a successful workflow +func (vrw *VReplicationWorkflow) Complete() error { + ws := vrw.ws + if !ws.WritesSwitched || len(ws.ReplicaCellsNotSwitched) > 0 || len(ws.RdonlyCellsNotSwitched) > 0 { + return fmt.Errorf(ErrWorkflowNotFullySwitched) + } + var renameTable TableRemovalType + if vrw.params.RenameTables { + renameTable = RenameTable + } else { + renameTable = DropTable + } + if _, err := vrw.wr.DropSources(vrw.ctx, vrw.ws.TargetKeyspace, vrw.ws.Workflow, renameTable, vrw.params.KeepData, + false, false); err != nil { + return err + } + return nil +} + +// Abort deletes all artifacts from a workflow which has not yet been switched +func (vrw *VReplicationWorkflow) Abort() error { + ws := vrw.ws + if ws.WritesSwitched || len(ws.ReplicaCellsSwitched) > 0 || len(ws.RdonlyCellsSwitched) > 0 { + return fmt.Errorf(ErrWorkflowPartiallySwitched) + } + if _, err := vrw.wr.DropTargets(vrw.ctx, vrw.ws.TargetKeyspace, vrw.ws.Workflow, vrw.params.KeepData, false); err != nil { + return err + } + vrw.ts = nil + return nil +} + +// endregion + +// region Helpers + +func (vrw *VReplicationWorkflow) getCellsAsArray() []string { + if vrw.params.Cells != "" { + return strings.Split(vrw.params.Cells, ",") + } + return nil +} + +func (vrw *VReplicationWorkflow) getTabletTypes() []topodatapb.TabletType { + tabletTypesArr := strings.Split(vrw.params.TabletTypes, ",") + var tabletTypes []topodatapb.TabletType + for _, tabletType := range tabletTypesArr { + servedType, _ := topoproto.ParseTabletType(tabletType) + tabletTypes = append(tabletTypes, servedType) + } + return tabletTypes +} + +func (vrw *VReplicationWorkflow) parseTabletTypes() (hasReplica, hasRdonly, hasMaster bool, err error) { + tabletTypesArr := strings.Split(vrw.params.TabletTypes, ",") + for _, tabletType := range tabletTypesArr { + switch tabletType { + case "replica": + hasReplica = true + case "rdonly": + hasRdonly = true + case "master": + hasMaster = true + default: + return false, false, false, fmt.Errorf("invalid tablet type passed %s", tabletType) + } + } + return hasReplica, hasRdonly, hasMaster, nil +} + +// endregion + +// region Core Actions + +func (vrw *VReplicationWorkflow) initMoveTables() error { + log.Infof("In VReplicationWorkflow.initMoveTables() for %+v", vrw) + return vrw.wr.MoveTables(vrw.ctx, vrw.params.Workflow, vrw.params.SourceKeyspace, vrw.params.TargetKeyspace, + vrw.params.Tables, vrw.params.Cells, vrw.params.TabletTypes, vrw.params.AllTables, vrw.params.ExcludeTables) +} + +func (vrw *VReplicationWorkflow) initReshard() error { + log.Infof("In VReplicationWorkflow.initReshard() for %+v", vrw) + return vrw.wr.Reshard(vrw.ctx, vrw.params.TargetKeyspace, vrw.params.Workflow, vrw.params.SourceShards, + vrw.params.TargetShards, vrw.params.SkipSchemaCopy, vrw.params.Cells, vrw.params.TabletTypes) +} + +func (vrw *VReplicationWorkflow) switchReads() error { + log.Infof("In VReplicationWorkflow.switchReads() for %+v", vrw) + var tabletTypes []topodatapb.TabletType + for _, tt := range vrw.getTabletTypes() { + if tt != topodatapb.TabletType_MASTER { + tabletTypes = append(tabletTypes, tt) + } + } + + _, err := vrw.wr.SwitchReads(vrw.ctx, vrw.params.TargetKeyspace, vrw.params.Workflow, tabletTypes, + vrw.getCellsAsArray(), vrw.params.Direction, false) + if err != nil { + return err + } + return nil +} + +func (vrw *VReplicationWorkflow) switchWrites() error { + log.Infof("In VReplicationWorkflow.switchWrites() for %+v", vrw) + if vrw.params.Direction == DirectionBackward { + keyspace := vrw.params.SourceKeyspace + vrw.params.SourceKeyspace = vrw.params.TargetKeyspace + vrw.params.TargetKeyspace = keyspace + vrw.params.Workflow = reverseName(vrw.params.Workflow) + log.Infof("In VReplicationWorkflow.switchWrites(reverse) for %+v", vrw) + } + journalID, _, err := vrw.wr.SwitchWrites(vrw.ctx, vrw.params.TargetKeyspace, vrw.params.Workflow, vrw.params.Timeout, + false, vrw.params.Direction == DirectionBackward, vrw.params.EnableReverseReplication, false) + if err != nil { + return err + } + log.Infof("switchWrites succeeded with journal id %s", journalID) + return nil +} + +// endregion + +// region Copy Progress + +// TableCopyProgress stores the row counts and disk sizes of the source and target tables +type TableCopyProgress struct { + TargetRowCount, TargetTableSize int64 + SourceRowCount, SourceTableSize int64 +} + +// CopyProgress stores the TableCopyProgress for all tables still being copied +type CopyProgress map[string]*TableCopyProgress + +// GetCopyProgress returns the progress of all tables being copied in the workflow +func (vrw *VReplicationWorkflow) GetCopyProgress() (*CopyProgress, error) { + ctx := context.Background() + getTablesQuery := "select table_name from _vt.copy_state cs, _vt.vreplication vr where vr.id = cs.vrepl_id and vr.id = %d" + getRowCountQuery := "select table_name, table_rows, data_length from information_schema.tables where table_schema = %s and table_name in (%s)" + tables := make(map[string]bool) + const MaxRows = 1000 + sourceMasters := make(map[*topodatapb.TabletAlias]bool) + for _, target := range vrw.ts.targets { + for id, bls := range target.sources { + query := fmt.Sprintf(getTablesQuery, id) + p3qr, err := vrw.wr.tmc.ExecuteFetchAsDba(ctx, target.master.Tablet, true, []byte(query), MaxRows, false, false) + if err != nil { + return nil, err + } + if len(p3qr.Rows) < 1 { + continue + } + qr := sqltypes.Proto3ToResult(p3qr) + for i := 0; i < len(p3qr.Rows); i++ { + tables[qr.Rows[i][0].ToString()] = true + } + sourcesi, err := vrw.wr.ts.GetShard(ctx, bls.Keyspace, bls.Shard) + if err != nil { + return nil, err + } + found := false + for existingSource := range sourceMasters { + if existingSource.Uid == sourcesi.MasterAlias.Uid { + found = true + } + } + if !found { + sourceMasters[sourcesi.MasterAlias] = true + } + } + } + if len(tables) == 0 { + return nil, nil + } + var tableList []string + targetRowCounts := make(map[string]int64) + sourceRowCounts := make(map[string]int64) + targetTableSizes := make(map[string]int64) + sourceTableSizes := make(map[string]int64) + + for table := range tables { + tableList = append(tableList, encodeString(table)) + targetRowCounts[table] = 0 + sourceRowCounts[table] = 0 + targetTableSizes[table] = 0 + sourceTableSizes[table] = 0 + } + + var getTableMetrics = func(tablet *topodatapb.Tablet, query string, rowCounts *map[string]int64, tableSizes *map[string]int64) error { + p3qr, err := vrw.wr.tmc.ExecuteFetchAsDba(ctx, tablet, true, []byte(query), len(tables), false, false) + if err != nil { + return err + } + qr := sqltypes.Proto3ToResult(p3qr) + for i := 0; i < len(qr.Rows); i++ { + table := qr.Rows[i][0].ToString() + rowCount, err := evalengine.ToInt64(qr.Rows[i][1]) + if err != nil { + return err + } + tableSize, err := evalengine.ToInt64(qr.Rows[i][2]) + if err != nil { + return err + } + (*rowCounts)[table] += rowCount + (*tableSizes)[table] += tableSize + } + return nil + } + sourceDbName := "" + for _, tsSource := range vrw.ts.sources { + sourceDbName = tsSource.master.DbName() + break + } + if sourceDbName == "" { + return nil, fmt.Errorf("no sources found for workflow %s.%s", vrw.ws.TargetKeyspace, vrw.ws.Workflow) + } + targetDbName := "" + for _, tsTarget := range vrw.ts.targets { + targetDbName = tsTarget.master.DbName() + break + } + if sourceDbName == "" || targetDbName == "" { + return nil, fmt.Errorf("workflow %s.%s is incorrectly configured", vrw.ws.TargetKeyspace, vrw.ws.Workflow) + } + sort.Strings(tableList) // sort list for repeatability for mocking in tests + tablesStr := strings.Join(tableList, ",") + query := fmt.Sprintf(getRowCountQuery, encodeString(targetDbName), tablesStr) + for _, target := range vrw.ts.targets { + tablet := target.master.Tablet + if err := getTableMetrics(tablet, query, &targetRowCounts, &targetTableSizes); err != nil { + return nil, err + } + } + + query = fmt.Sprintf(getRowCountQuery, encodeString(sourceDbName), tablesStr) + for source := range sourceMasters { + ti, err := vrw.wr.ts.GetTablet(ctx, source) + tablet := ti.Tablet + if err != nil { + return nil, err + } + if err := getTableMetrics(tablet, query, &sourceRowCounts, &sourceTableSizes); err != nil { + return nil, err + } + } + + copyProgress := CopyProgress{} + for table, rowCount := range targetRowCounts { + copyProgress[table] = &TableCopyProgress{ + TargetRowCount: rowCount, + TargetTableSize: targetTableSizes[table], + SourceRowCount: sourceRowCounts[table], + SourceTableSize: sourceTableSizes[table], + } + } + return ©Progress, nil +} + +// endregion diff --git a/go/vt/wrangler/workflow_test.go b/go/vt/wrangler/workflow_test.go new file mode 100644 index 00000000000..71cd2d9133f --- /dev/null +++ b/go/vt/wrangler/workflow_test.go @@ -0,0 +1,433 @@ +/* +Copyright 2020 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package wrangler + +import ( + "testing" + + "github.com/stretchr/testify/require" + "golang.org/x/net/context" + "vitess.io/vitess/go/sqltypes" + "vitess.io/vitess/go/vt/log" + "vitess.io/vitess/go/vt/proto/topodata" +) + +func getMoveTablesWorkflow(t *testing.T, cells, tabletTypes string) *VReplicationWorkflow { + p := &VReplicationWorkflowParams{ + Workflow: "wf1", + SourceKeyspace: "sourceks", + TargetKeyspace: "targetks", + Tables: "customer,corder", + Cells: cells, + TabletTypes: tabletTypes, + } + mtwf := &VReplicationWorkflow{ + workflowType: MoveTablesWorkflow, + ctx: context.Background(), + wr: nil, + params: p, + ts: nil, + ws: nil, + } + return mtwf +} + +func TestReshardingWorkflowErrorsAndMisc(t *testing.T) { + mtwf := getMoveTablesWorkflow(t, "cell1,cell2", "replica,rdonly") + require.False(t, mtwf.Exists()) + mtwf.ws = &workflowState{} + require.True(t, mtwf.Exists()) + require.Errorf(t, mtwf.Complete(), ErrWorkflowNotFullySwitched) + mtwf.ws.WritesSwitched = true + require.Errorf(t, mtwf.Abort(), ErrWorkflowPartiallySwitched) + + require.ElementsMatch(t, mtwf.getCellsAsArray(), []string{"cell1", "cell2"}) + require.ElementsMatch(t, mtwf.getTabletTypes(), []topodata.TabletType{topodata.TabletType_REPLICA, topodata.TabletType_RDONLY}) + hasReplica, hasRdonly, hasMaster, err := mtwf.parseTabletTypes() + require.NoError(t, err) + require.True(t, hasReplica) + require.True(t, hasRdonly) + require.False(t, hasMaster) + + mtwf.params.TabletTypes = "replica,rdonly,master" + require.ElementsMatch(t, mtwf.getTabletTypes(), + []topodata.TabletType{topodata.TabletType_REPLICA, topodata.TabletType_RDONLY, topodata.TabletType_MASTER}) + + hasReplica, hasRdonly, hasMaster, err = mtwf.parseTabletTypes() + require.NoError(t, err) + require.True(t, hasReplica) + require.True(t, hasRdonly) + require.True(t, hasMaster) +} + +func TestCopyProgress(t *testing.T) { + var err error + var wf *VReplicationWorkflow + ctx := context.Background() + p := &VReplicationWorkflowParams{ + Workflow: "test", + SourceKeyspace: "ks1", + TargetKeyspace: "ks2", + Tables: "t1,t2", + Cells: "cell1,cell2", + TabletTypes: "replica,rdonly,master", + Timeout: DefaultActionTimeout, + } + tme := newTestTableMigrater(ctx, t) + defer tme.stopTablets(t) + wf, err = tme.wr.NewVReplicationWorkflow(ctx, MoveTablesWorkflow, p) + require.NoError(t, err) + require.NotNil(t, wf) + require.Equal(t, WorkflowStateNotSwitched, wf.CurrentState()) + + expectCopyProgressQueries(t, tme) + + cp, err2 := wf.GetCopyProgress() + require.NoError(t, err2) + log.Infof("CopyProgress is %+v,%+v", (*cp)["t1"], (*cp)["t2"]) + + require.Equal(t, int64(800), (*cp)["t1"].SourceRowCount) + require.Equal(t, int64(200), (*cp)["t1"].TargetRowCount) + require.Equal(t, int64(4000), (*cp)["t1"].SourceTableSize) + require.Equal(t, int64(2000), (*cp)["t1"].TargetTableSize) + + require.Equal(t, int64(2000), (*cp)["t2"].SourceRowCount) + require.Equal(t, int64(400), (*cp)["t2"].TargetRowCount) + require.Equal(t, int64(4000), (*cp)["t2"].SourceTableSize) + require.Equal(t, int64(1000), (*cp)["t2"].TargetTableSize) +} + +func expectCopyProgressQueries(t *testing.T, tme *testMigraterEnv) { + db := tme.tmeDB + query := "select table_name from _vt.copy_state cs, _vt.vreplication vr where vr.id = cs.vrepl_id and vr.id = 1" + rows := []string{"t1", "t2"} + result := sqltypes.MakeTestResult(sqltypes.MakeTestFields( + "table_name", + "varchar"), + rows...) + db.AddQuery(query, result) + query = "select table_name from _vt.copy_state cs, _vt.vreplication vr where vr.id = cs.vrepl_id and vr.id = 2" + db.AddQuery(query, result) + + query = "select table_name, table_rows, data_length from information_schema.tables where table_schema = 'vt_ks2' and table_name in ('t1','t2')" + result = sqltypes.MakeTestResult(sqltypes.MakeTestFields( + "table_name|table_rows|data_length", + "varchar|int64|int64"), + "t1|100|1000", + "t2|200|500") + db.AddQuery(query, result) + + query = "select table_name, table_rows, data_length from information_schema.tables where table_schema = 'vt_ks1' and table_name in ('t1','t2')" + result = sqltypes.MakeTestResult(sqltypes.MakeTestFields( + "table_name|table_rows|data_length", + "varchar|int64|int64"), + "t1|400|2000", + "t2|1000|2000") + db.AddQuery(query, result) + +} + +func TestMoveTablesV2(t *testing.T) { + ctx := context.Background() + p := &VReplicationWorkflowParams{ + Workflow: "test", + SourceKeyspace: "ks1", + TargetKeyspace: "ks2", + Tables: "t1,t2", + Cells: "cell1,cell2", + TabletTypes: "replica,rdonly,master", + Timeout: DefaultActionTimeout, + } + tme := newTestTableMigrater(ctx, t) + defer tme.stopTablets(t) + wf, err := tme.wr.NewVReplicationWorkflow(ctx, MoveTablesWorkflow, p) + require.NoError(t, err) + require.NotNil(t, wf) + require.Equal(t, WorkflowStateNotSwitched, wf.CurrentState()) + tme.expectNoPreviousJournals() + expectMoveTablesQueries(t, tme) + tme.expectNoPreviousJournals() + require.NoError(t, wf.SwitchTraffic(DirectionForward)) + require.Equal(t, WorkflowStateAllSwitched, wf.CurrentState()) + + tme.expectNoPreviousJournals() + tme.expectNoPreviousReverseJournals() + require.NoError(t, wf.ReverseTraffic()) + require.Equal(t, WorkflowStateNotSwitched, wf.CurrentState()) +} + +func TestMoveTablesV2Partial(t *testing.T) { + ctx := context.Background() + p := &VReplicationWorkflowParams{ + Workflow: "test", + SourceKeyspace: "ks1", + TargetKeyspace: "ks2", + Tables: "t1,t2", + Cells: "cell1,cell2", + TabletTypes: "replica,rdonly,master", + Timeout: DefaultActionTimeout, + } + tme := newTestTableMigrater(ctx, t) + defer tme.stopTablets(t) + wf, err := tme.wr.NewVReplicationWorkflow(ctx, MoveTablesWorkflow, p) + require.NoError(t, err) + require.NotNil(t, wf) + require.Equal(t, WorkflowStateNotSwitched, wf.CurrentState()) + tme.expectNoPreviousJournals() + expectMoveTablesQueries(t, tme) + + tme.expectNoPreviousJournals() + wf.params.TabletTypes = "replica" + wf.params.Cells = "cell1" + require.NoError(t, wf.SwitchTraffic(DirectionForward)) + require.Equal(t, "Reads partially switched. Replica switched in cells: cell1. Rdonly not switched. Writes Not Switched", wf.CurrentState()) + + tme.expectNoPreviousJournals() + wf.params.TabletTypes = "replica" + wf.params.Cells = "cell2" + require.NoError(t, wf.SwitchTraffic(DirectionForward)) + require.Equal(t, "Reads partially switched. All Replica Reads Switched. Rdonly not switched. Writes Not Switched", wf.CurrentState()) + + tme.expectNoPreviousJournals() + wf.params.TabletTypes = "rdonly" + wf.params.Cells = "cell1,cell2" + require.NoError(t, wf.SwitchTraffic(DirectionForward)) + require.Equal(t, WorkflowStateReadsSwitched, wf.CurrentState()) + + tme.expectNoPreviousJournals() + wf.params.TabletTypes = "replica,rdonly" + require.NoError(t, wf.SwitchTraffic(DirectionBackward)) + require.Equal(t, WorkflowStateNotSwitched, wf.CurrentState()) + + tme.expectNoPreviousJournals() + wf.params.TabletTypes = "rdonly" + wf.params.Cells = "cell1" + require.NoError(t, wf.SwitchTraffic(DirectionForward)) + require.Equal(t, "Reads partially switched. Replica not switched. Rdonly switched in cells: cell1. Writes Not Switched", wf.CurrentState()) + + tme.expectNoPreviousJournals() + wf.params.TabletTypes = "rdonly" + wf.params.Cells = "cell2" + require.NoError(t, wf.SwitchTraffic(DirectionForward)) + require.Equal(t, "Reads partially switched. Replica not switched. All Rdonly Reads Switched. Writes Not Switched", wf.CurrentState()) + +} + +func TestMoveTablesV2Abort(t *testing.T) { + ctx := context.Background() + p := &VReplicationWorkflowParams{ + Workflow: "test", + SourceKeyspace: "ks1", + TargetKeyspace: "ks2", + Tables: "t1,t2", + Cells: "cell1,cell2", + TabletTypes: "replica,rdonly,master", + Timeout: DefaultActionTimeout, + } + tme := newTestTableMigrater(ctx, t) + defer tme.stopTablets(t) + expectMoveTablesQueries(t, tme) + wf, err := tme.wr.NewVReplicationWorkflow(ctx, MoveTablesWorkflow, p) + require.NoError(t, err) + require.NotNil(t, wf) + require.Equal(t, WorkflowStateNotSwitched, wf.CurrentState()) + expectMoveTablesQueries(t, tme) + require.NoError(t, wf.Abort()) +} + +func TestReshardV2(t *testing.T) { + ctx := context.Background() + sourceShards := []string{"-40", "40-"} + targetShards := []string{"-80", "80-"} + p := &VReplicationWorkflowParams{ + Workflow: "test", + SourceKeyspace: "ks", + TargetKeyspace: "ks", + SourceShards: sourceShards, + TargetShards: targetShards, + Cells: "cell1,cell2", + TabletTypes: "replica,rdonly,master", + Timeout: DefaultActionTimeout, + } + tme := newTestShardMigrater(ctx, t, sourceShards, targetShards) + defer tme.stopTablets(t) + wf, err := tme.wr.NewVReplicationWorkflow(ctx, ReshardWorkflow, p) + require.NoError(t, err) + require.NotNil(t, wf) + require.Equal(t, WorkflowStateNotSwitched, wf.CurrentState()) + tme.expectNoPreviousJournals() + expectReshardQueries(t, tme) + tme.expectNoPreviousJournals() + require.NoError(t, wf.SwitchTraffic(DirectionForward)) + require.Equal(t, WorkflowStateAllSwitched, wf.CurrentState()) + require.NoError(t, wf.Complete()) + si, err := wf.wr.ts.GetShard(ctx, "ks", "-40") + require.Contains(t, err.Error(), "node doesn't exist") + require.Nil(t, si) + si, err = wf.wr.ts.GetShard(ctx, "ks", "-80") + require.NoError(t, err) + require.NotNil(t, si) +} + +func TestReshardV2Abort(t *testing.T) { + ctx := context.Background() + sourceShards := []string{"-40", "40-"} + targetShards := []string{"-80", "80-"} + p := &VReplicationWorkflowParams{ + Workflow: "test", + SourceKeyspace: "ks", + TargetKeyspace: "ks", + SourceShards: sourceShards, + TargetShards: targetShards, + Cells: "cell1,cell2", + TabletTypes: "replica,rdonly,master", + Timeout: DefaultActionTimeout, + } + tme := newTestShardMigrater(ctx, t, sourceShards, targetShards) + defer tme.stopTablets(t) + wf, err := tme.wr.NewVReplicationWorkflow(ctx, ReshardWorkflow, p) + require.NoError(t, err) + require.NotNil(t, wf) + require.Equal(t, WorkflowStateNotSwitched, wf.CurrentState()) + tme.expectNoPreviousJournals() + expectReshardQueries(t, tme) + require.NoError(t, wf.Abort()) +} + +func expectReshardQueries(t *testing.T, tme *testShardMigraterEnv) { + + sourceQueries := []string{ + "select id, workflow, source, pos from _vt.vreplication where db_name='vt_ks' and workflow != 'test_reverse' and state = 'Stopped' and message != 'FROZEN'", + "select id, workflow, source, pos from _vt.vreplication where db_name='vt_ks' and workflow != 'test_reverse'", + } + noResult := &sqltypes.Result{} + for _, dbclient := range tme.dbSourceClients { + for _, query := range sourceQueries { + dbclient.addInvariant(query, noResult) + } + dbclient.addInvariant("select id from _vt.vreplication where db_name = 'vt_ks' and workflow = 'test_reverse'", resultid1) + dbclient.addInvariant("delete from _vt.vreplication where id in (1)", noResult) + dbclient.addInvariant("delete from _vt.copy_state where vrepl_id in (1)", noResult) + dbclient.addInvariant("insert into _vt.vreplication (workflow, source, pos, max_tps, max_replication_lag, time_updated, transaction_timestamp, state, db_name)", &sqltypes.Result{InsertID: uint64(1)}) + dbclient.addInvariant("select id from _vt.vreplication where id = 1", resultid1) + dbclient.addInvariant("select id from _vt.vreplication where id = 2", resultid2) + dbclient.addInvariant("select * from _vt.vreplication where id = 1", runningResult(1)) + dbclient.addInvariant("select * from _vt.vreplication where id = 2", runningResult(2)) + dbclient.addInvariant("insert into _vt.resharding_journal", noResult) + + } + + targetQueries := []string{ + "select id, workflow, source, pos from _vt.vreplication where db_name='vt_ks' and workflow != 'test_reverse' and state = 'Stopped' and message != 'FROZEN'", + } + + for _, dbclient := range tme.dbTargetClients { + for _, query := range targetQueries { + dbclient.addInvariant(query, noResult) + } + dbclient.addInvariant("select id from _vt.vreplication where id = 1", resultid1) + dbclient.addInvariant("select id from _vt.vreplication where id = 2", resultid2) + dbclient.addInvariant("update _vt.vreplication set state = 'Stopped', message = 'stopped for cutover' where id in (1)", noResult) + dbclient.addInvariant("update _vt.vreplication set state = 'Stopped', message = 'stopped for cutover' where id in (2)", noResult) + dbclient.addInvariant("select * from _vt.vreplication where id = 1", runningResult(1)) + dbclient.addInvariant("select * from _vt.vreplication where id = 2", runningResult(2)) + state := sqltypes.MakeTestResult(sqltypes.MakeTestFields( + "pos|state|message", + "varchar|varchar|varchar"), + "MariaDB/5-456-892|Running") + dbclient.addInvariant("select pos, state, message from _vt.vreplication where id=2", state) + dbclient.addInvariant("select pos, state, message from _vt.vreplication where id=1", state) + dbclient.addInvariant("select id from _vt.vreplication where db_name = 'vt_ks' and workflow = 'test'", resultid1) + dbclient.addInvariant("update _vt.vreplication set message = 'FROZEN'", noResult) + dbclient.addInvariant("delete from _vt.vreplication where id in (1)", noResult) + dbclient.addInvariant("delete from _vt.copy_state where vrepl_id in (1)", noResult) + + } +} + +func expectMoveTablesQueries(t *testing.T, tme *testMigraterEnv) { + var query string + noResult := &sqltypes.Result{} + for _, dbclient := range tme.dbTargetClients { + query = "update _vt.vreplication set state = 'Running', message = '' where id in (1)" + dbclient.addInvariant(query, noResult) + dbclient.addInvariant("select id from _vt.vreplication where db_name = 'vt_ks2' and workflow = 'test'", resultid1) + dbclient.addInvariant("select * from _vt.vreplication where id = 1", runningResult(1)) + dbclient.addInvariant("select * from _vt.vreplication where id = 2", runningResult(2)) + query = "update _vt.vreplication set message='Picked source tablet: cell:\"cell1\" uid:10 ' where id=1" + dbclient.addInvariant(query, noResult) + dbclient.addInvariant("select id from _vt.vreplication where id = 1", resultid1) + dbclient.addInvariant("select id from _vt.vreplication where id = 2", resultid2) + dbclient.addInvariant("update _vt.vreplication set state = 'Stopped', message = 'stopped for cutover' where id in (1)", noResult) + dbclient.addInvariant("update _vt.vreplication set state = 'Stopped', message = 'stopped for cutover' where id in (2)", noResult) + dbclient.addInvariant("insert into _vt.vreplication (workflow, source, pos, max_tps, max_replication_lag, time_updated, transaction_timestamp, state, db_name)", &sqltypes.Result{InsertID: uint64(1)}) + dbclient.addInvariant("update _vt.vreplication set message = 'FROZEN'", noResult) + dbclient.addInvariant("select 1 from _vt.vreplication where db_name='vt_ks2' and workflow='test' and message!='FROZEN'", noResult) + dbclient.addInvariant("delete from _vt.vreplication where id in (1)", noResult) + dbclient.addInvariant("delete from _vt.copy_state where vrepl_id in (1)", noResult) + dbclient.addInvariant("insert into _vt.resharding_journal", noResult) + dbclient.addInvariant("select val from _vt.resharding_journal", noResult) + dbclient.addInvariant("select id, source, message, cell, tablet_types from _vt.vreplication where workflow='test_reverse' and db_name='vt_ks1'", + sqltypes.MakeTestResult(sqltypes.MakeTestFields( + "id|source|message|cell|tablet_types", + "int64|varchar|varchar|varchar|varchar"), + ""), + ) + //select pos, state, message from _vt.vreplication where id=1 + } + + for _, dbclient := range tme.dbSourceClients { + dbclient.addInvariant("select val from _vt.resharding_journal", noResult) + dbclient.addInvariant("update _vt.vreplication set message = 'FROZEN'", noResult) + dbclient.addInvariant("insert into _vt.vreplication (workflow, source, pos, max_tps, max_replication_lag, time_updated, transaction_timestamp, state, db_name)", &sqltypes.Result{InsertID: uint64(1)}) + dbclient.addInvariant("update _vt.vreplication set state = 'Stopped', message = 'stopped for cutover' where id in (1)", noResult) + dbclient.addInvariant("update _vt.vreplication set state = 'Stopped', message = 'stopped for cutover' where id in (2)", noResult) + dbclient.addInvariant("select id from _vt.vreplication where id = 1", resultid1) + dbclient.addInvariant("select id from _vt.vreplication where id = 2", resultid2) + dbclient.addInvariant("select id from _vt.vreplication where db_name = 'vt_ks1' and workflow = 'test_reverse'", resultid1) + dbclient.addInvariant("delete from _vt.vreplication where id in (1)", noResult) + dbclient.addInvariant("delete from _vt.copy_state where vrepl_id in (1)", noResult) + dbclient.addInvariant("insert into _vt.vreplication (workflow, source, pos, max_tps, max_replication_lag, time_updated, transaction_timestamp, state, db_name)", &sqltypes.Result{InsertID: uint64(1)}) + dbclient.addInvariant("select * from _vt.vreplication where id = 1", runningResult(1)) + dbclient.addInvariant("select * from _vt.vreplication where id = 2", runningResult(2)) + dbclient.addInvariant("insert into _vt.resharding_journal", noResult) + } + state := sqltypes.MakeTestResult(sqltypes.MakeTestFields( + "pos|state|message", + "varchar|varchar|varchar"), + "MariaDB/5-456-892|Running", + ) + tme.dbTargetClients[0].addInvariant("select pos, state, message from _vt.vreplication where id=1", state) + tme.dbTargetClients[0].addInvariant("select pos, state, message from _vt.vreplication where id=2", state) + tme.dbTargetClients[1].addInvariant("select pos, state, message from _vt.vreplication where id=1", state) + tme.dbTargetClients[1].addInvariant("select pos, state, message from _vt.vreplication where id=2", state) + + state = sqltypes.MakeTestResult(sqltypes.MakeTestFields( + "pos|state|message", + "varchar|varchar|varchar"), + "MariaDB/5-456-893|Running", + ) + tme.dbSourceClients[0].addInvariant("select pos, state, message from _vt.vreplication where id=1", state) + tme.dbSourceClients[0].addInvariant("select pos, state, message from _vt.vreplication where id=2", state) + tme.dbSourceClients[1].addInvariant("select pos, state, message from _vt.vreplication where id=1", state) + tme.dbSourceClients[1].addInvariant("select pos, state, message from _vt.vreplication where id=2", state) + tme.tmeDB.AddQuery("drop table vt_ks1.t1", noResult) + tme.tmeDB.AddQuery("drop table vt_ks1.t2", noResult) + tme.tmeDB.AddQuery("drop table vt_ks2.t1", noResult) + tme.tmeDB.AddQuery("drop table vt_ks2.t2", noResult) + tme.tmeDB.AddQuery("update _vt.vreplication set message='Picked source tablet: cell:\"cell1\" uid:10 ' where id=1", noResult) +} diff --git a/test/config.json b/test/config.json index 4ac0cd5cc0c..a7a48e23a1b 100644 --- a/test/config.json +++ b/test/config.json @@ -600,7 +600,7 @@ }, "vreplication_basic": { "File": "unused.go", - "Args": ["vitess.io/vitess/go/test/endtoend/vreplication", "-run", "Basic"], + "Args": ["vitess.io/vitess/go/test/endtoend/vreplication", "-run", "TestBasicVreplicationWorkflow"], "Command": [], "Manual": false, "Shard": 24, @@ -615,6 +615,15 @@ "Shard": 22, "RetryMax": 0, "Tags": [] + }, + "vreplication_v2": { + "File": "unused.go", + "Args": ["vitess.io/vitess/go/test/endtoend/vreplication", "-run", "TestBasicV2Workflows"], + "Command": [], + "Manual": false, + "Shard": 21, + "RetryMax": 3, + "Tags": [] } } } diff --git a/test/local_example.sh b/test/local_example.sh index 43ca3d0e6e8..a72a7bde0bb 100755 --- a/test/local_example.sh +++ b/test/local_example.sh @@ -44,7 +44,6 @@ for shard in "customer/0"; do done; ./202_move_tables.sh - sleep 3 # required for now ./203_switch_reads.sh @@ -75,6 +74,7 @@ done; sleep 3 # TODO: Required for now! + ./304_switch_reads.sh ./305_switch_writes.sh