Skip to content

Commit

Permalink
more debug
Browse files Browse the repository at this point in the history
  • Loading branch information
vmogilev committed Jan 11, 2024
1 parent 7335c67 commit 79fb085
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 15 deletions.
2 changes: 2 additions & 0 deletions go/vt/vttablet/tabletmanager/replmanager.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"sync"
"time"

"github.com/davecgh/go-spew/spew"
"vitess.io/vitess/go/mysql"
"vitess.io/vitess/go/timer"
"vitess.io/vitess/go/vt/log"
Expand Down Expand Up @@ -119,6 +120,7 @@ func (rm *replManager) checkActionLocked() {
}
}

log.Infof("vm-debug: replManager=%s", spew.Sdump(rm))
if !rm.failed {
log.Infof("Replication is stopped, reconnecting to primary.")
}
Expand Down
36 changes: 21 additions & 15 deletions go/vt/vttablet/tabletmanager/rpc_replication.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,16 @@ limitations under the License.
package tabletmanager

import (
"context"
"flag"
"fmt"
"strconv"
"strings"
"time"

"github.com/davecgh/go-spew/spew"
"vitess.io/vitess/go/vt/proto/vtrpc"

"context"

"vitess.io/vitess/go/mysql"
"vitess.io/vitess/go/vt/log"
"vitess.io/vitess/go/vt/logutil"
Expand Down Expand Up @@ -476,10 +476,10 @@ func (tm *TabletManager) InitReplica(ctx context.Context, parent *topodatapb.Tab
//
// It attemps to idempotently ensure the following guarantees upon returning
// successfully:
// * No future writes will be accepted.
// * No writes are in-flight.
// * MySQL is in read-only mode.
// * Semi-sync settings are consistent with a REPLICA tablet.
// - No future writes will be accepted.
// - No writes are in-flight.
// - MySQL is in read-only mode.
// - Semi-sync settings are consistent with a REPLICA tablet.
//
// If necessary, it waits for all in-flight writes to complete or time out.
//
Expand Down Expand Up @@ -703,6 +703,7 @@ func (tm *TabletManager) setReplicationSourceRepairReplication(ctx context.Conte
return err
}

log.Infof("vm-debug: calling tm.TopoServer.LockShard ctx=%s", spew.Sdump(ctx))
ctx, unlock, lockErr := tm.TopoServer.LockShard(ctx, parent.Tablet.GetKeyspace(), parent.Tablet.GetShard(), fmt.Sprintf("repairReplication to %v as parent)", topoproto.TabletAliasString(parentAlias)))
if lockErr != nil {
return lockErr
Expand Down Expand Up @@ -745,6 +746,7 @@ func (tm *TabletManager) setReplicationSourceLocked(ctx context.Context, parentA
// unintentionally change the type of RDONLY tablets
tablet := tm.Tablet()
if tablet.Type == topodatapb.TabletType_PRIMARY {
log.Infof("vm-debug: calling tm.tmState.ChangeTabletType")
if err := tm.tmState.ChangeTabletType(ctx, topodatapb.TabletType_REPLICA, DBActionNone); err != nil {
return err
}
Expand All @@ -755,6 +757,7 @@ func (tm *TabletManager) setReplicationSourceLocked(ctx context.Context, parentA
shouldbeReplicating := false
status, err := tm.MysqlDaemon.ReplicationStatus()
if err == mysql.ErrNotReplica {
log.Infof("vm-debug: err == mysql.ErrNotReplica")
// This is a special error that means we actually succeeded in reading
// the status, but the status is empty because replication is not
// configured. We assume this means we used to be a primary, so we always
Expand All @@ -781,6 +784,7 @@ func (tm *TabletManager) setReplicationSourceLocked(ctx context.Context, parentA
if tabletType == topodatapb.TabletType_PRIMARY {
tabletType = topodatapb.TabletType_REPLICA
}
log.Infof("vm-debug: calling tm.fixSemiSync")
if err := tm.fixSemiSync(tabletType, semiSync); err != nil {
return err
}
Expand All @@ -797,6 +801,7 @@ func (tm *TabletManager) setReplicationSourceLocked(ctx context.Context, parentA
host := parent.Tablet.MysqlHostname
port := int(parent.Tablet.MysqlPort)
if status.SourceHost != host || status.SourcePort != port {
log.Infof("vm-debug: calling tm.MysqlDaemon.SetReplicationSource")
// This handles both changing the address and starting replication.
if err := tm.MysqlDaemon.SetReplicationSource(ctx, host, port, wasReplicating, shouldbeReplicating); err != nil {
if err := tm.handleRelayLogError(err); err != nil {
Expand Down Expand Up @@ -1053,18 +1058,18 @@ func (tm *TabletManager) fixSemiSync(tabletType topodatapb.TabletType, semiSync
// This following code will be uncommented and the above deleted when we are ready to use the
// durability policies for setting the semi_sync information

//switch semiSync {
//case SemiSyncActionNone:
// switch semiSync {
// case SemiSyncActionNone:
// return nil
//case SemiSyncActionSet:
// case SemiSyncActionSet:
// // Always enable replica-side since it doesn't hurt to keep it on for a primary.
// // The primary-side needs to be off for a replica, or else it will get stuck.
// return tm.MysqlDaemon.SetSemiSyncEnabled(tabletType == topodatapb.TabletType_PRIMARY, true)
//case SemiSyncActionUnset:
// case SemiSyncActionUnset:
// return tm.MysqlDaemon.SetSemiSyncEnabled(false, false)
//default:
// default:
// return vterrors.Errorf(vtrpc.Code_INTERNAL, "Unknown SemiSyncAction - %v", semiSync)
//}
// }
}

func (tm *TabletManager) isPrimarySideSemiSyncEnabled() bool {
Expand All @@ -1077,10 +1082,10 @@ func (tm *TabletManager) fixSemiSyncAndReplication(tabletType topodatapb.TabletT
// Semi-sync handling is not enabled.
return nil
}
//if semiSync == SemiSyncActionNone {
// if semiSync == SemiSyncActionNone {
// // Semi-sync handling is not required.
// return nil
//}
// }

if tabletType == topodatapb.TabletType_PRIMARY {
// Primary is special. It is always handled at the
Expand All @@ -1106,7 +1111,7 @@ func (tm *TabletManager) fixSemiSyncAndReplication(tabletType topodatapb.TabletT
return nil
}

//shouldAck := semiSync == SemiSyncActionSet
// shouldAck := semiSync == SemiSyncActionSet
shouldAck := isPrimaryEligible(tabletType)
acking, err := tm.MysqlDaemon.SemiSyncReplicationStatus()
if err != nil {
Expand Down Expand Up @@ -1164,6 +1169,7 @@ func (tm *TabletManager) repairReplication(ctx context.Context) error {

// If Orchestrator is configured and if Orchestrator is actively reparenting, we should not repairReplication
if tm.orc != nil {
log.Infof("vm-debug: tm.orc != nil")
re, err := tm.orc.InActiveShardRecovery(tablet)
if err != nil {
return err
Expand Down

0 comments on commit 79fb085

Please sign in to comment.