Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add StalledDiskPrimary analysis and recovery by vtorc #16050

Closed
wants to merge 9 commits into from
3 changes: 3 additions & 0 deletions go/flags/endtoend/vtcombo.txt
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,9 @@ Flags:
--srv_topo_cache_refresh duration how frequently to refresh the topology for cached entries (default 1s)
--srv_topo_cache_ttl duration how long to use cached entries for topology (default 1s)
--srv_topo_timeout duration topo server timeout (default 5s)
--stalled-disk-write-dir string if provided, tablet will attempt to write a file to this directory to check if the disk is stalled
--stalled-disk-write-interval duration how often to write to the disk to check whether it is stalled (default 5s)
--stalled-disk-write-timeout duration if writes exceed this duration, the disk is considered stalled (default 30s)
--start_mysql Should vtcombo also start mysql
--stats_backend string The name of the registered push-based monitoring/stats backend to use
--stats_combine_dimensions string List of dimensions to be combined into a single "all" value in exported stats vars
Expand Down
1 change: 1 addition & 0 deletions go/flags/endtoend/vtorc.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ Flags:
--config-type string Config file type (omit to infer config type from file extension).
--consul_auth_static_file string JSON File to read the topos/tokens from.
--emit_stats If set, emit stats to push-based monitoring and stats backends
--enable-stalled-disk-primary-analysis Whether VTOrc should be analyzing and recovering stalled disk primary failures
--grpc_auth_static_client_creds string When using grpc_static_auth in the server, this file provides the credentials to use to authenticate with server.
--grpc_compression string Which protocol to use for compressing gRPC. Default: nothing. Supported: snappy
--grpc_enable_tracing Enable gRPC tracing.
Expand Down
3 changes: 3 additions & 0 deletions go/flags/endtoend/vttablet.txt
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,9 @@ Flags:
--srv_topo_cache_refresh duration how frequently to refresh the topology for cached entries (default 1s)
--srv_topo_cache_ttl duration how long to use cached entries for topology (default 1s)
--srv_topo_timeout duration topo server timeout (default 5s)
--stalled-disk-write-dir string if provided, tablet will attempt to write a file to this directory to check if the disk is stalled
--stalled-disk-write-interval duration how often to write to the disk to check whether it is stalled (default 5s)
--stalled-disk-write-timeout duration if writes exceed this duration, the disk is considered stalled (default 30s)
--stats_backend string The name of the registered push-based monitoring/stats backend to use
--stats_combine_dimensions string List of dimensions to be combined into a single "all" value in exported stats vars
--stats_common_tags strings Comma-separated list of common tags for the stats backend. It provides both label and values. Example: label1:value1,label2:value2
Expand Down
37 changes: 21 additions & 16 deletions go/vt/vtorc/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,22 +42,23 @@ const (
)

var (
sqliteDataFile = "file::memory:?mode=memory&cache=shared"
instancePollTime = 5 * time.Second
snapshotTopologyInterval = 0 * time.Hour
reasonableReplicationLag = 10 * time.Second
auditFileLocation = ""
auditToBackend = false
auditToSyslog = false
auditPurgeDuration = 7 * 24 * time.Hour // Equivalent of 7 days
recoveryPeriodBlockDuration = 30 * time.Second
preventCrossCellFailover = false
waitReplicasTimeout = 30 * time.Second
tolerableReplicationLag = 0 * time.Second
topoInformationRefreshDuration = 15 * time.Second
recoveryPollDuration = 1 * time.Second
ersEnabled = true
convertTabletsWithErrantGTIDs = false
sqliteDataFile = "file::memory:?mode=memory&cache=shared"
instancePollTime = 5 * time.Second
snapshotTopologyInterval = 0 * time.Hour
reasonableReplicationLag = 10 * time.Second
auditFileLocation = ""
auditToBackend = false
auditToSyslog = false
auditPurgeDuration = 7 * 24 * time.Hour // Equivalent of 7 days
recoveryPeriodBlockDuration = 30 * time.Second
preventCrossCellFailover = false
waitReplicasTimeout = 30 * time.Second
tolerableReplicationLag = 0 * time.Second
topoInformationRefreshDuration = 15 * time.Second
recoveryPollDuration = 1 * time.Second
ersEnabled = true
convertTabletsWithErrantGTIDs = false
enableStalledDiskPrimaryAnalysis = false
)

// RegisterFlags registers the flags required by VTOrc
Expand All @@ -79,6 +80,7 @@ func RegisterFlags(fs *pflag.FlagSet) {
fs.DurationVar(&recoveryPollDuration, "recovery-poll-duration", recoveryPollDuration, "Timer duration on which VTOrc polls its database to run a recovery")
fs.BoolVar(&ersEnabled, "allow-emergency-reparent", ersEnabled, "Whether VTOrc should be allowed to run emergency reparent operation when it detects a dead primary")
fs.BoolVar(&convertTabletsWithErrantGTIDs, "change-tablets-with-errant-gtid-to-drained", convertTabletsWithErrantGTIDs, "Whether VTOrc should be changing the type of tablets with errant GTIDs to DRAINED")
fs.BoolVar(&enableStalledDiskPrimaryAnalysis, "enable-stalled-disk-primary-analysis", enableStalledDiskPrimaryAnalysis, "Whether VTOrc should be analyzing and recovering stalled disk primary failures")
}

// Configuration makes for vtorc configuration input, which can be provided by user via JSON formatted file.
Expand All @@ -100,6 +102,7 @@ type Configuration struct {
TolerableReplicationLagSeconds int // Amount of replication lag that is considered acceptable for a tablet to be eligible for promotion when Vitess makes the choice of a new primary in PRS.
TopoInformationRefreshSeconds int // Timer duration on which VTOrc refreshes the keyspace and vttablet records from the topo-server.
RecoveryPollSeconds int // Timer duration on which VTOrc recovery analysis runs
EnableStalledDiskPrimaryAnalysis bool // Whether the enable the analysis and recovery of stalled disk primary failures
}

// ToJSONString will marshal this configuration as JSON
Expand Down Expand Up @@ -130,6 +133,7 @@ func UpdateConfigValuesFromFlags() {
Config.TolerableReplicationLagSeconds = int(tolerableReplicationLag / time.Second)
Config.TopoInformationRefreshSeconds = int(topoInformationRefreshDuration / time.Second)
Config.RecoveryPollSeconds = int(recoveryPollDuration / time.Second)
Config.EnableStalledDiskPrimaryAnalysis = enableStalledDiskPrimaryAnalysis
}

// ERSEnabled reports whether VTOrc is allowed to run ERS or not.
Expand Down Expand Up @@ -173,6 +177,7 @@ func newConfiguration() *Configuration {
WaitReplicasTimeoutSeconds: 30,
TopoInformationRefreshSeconds: 15,
RecoveryPollSeconds: 1,
EnableStalledDiskPrimaryAnalysis: false,
}
}

Expand Down
1 change: 1 addition & 0 deletions go/vt/vtorc/db/generate_base.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ CREATE TABLE database_instance (
semi_sync_primary_status TINYint NOT NULL DEFAULT 0,
semi_sync_replica_status TINYint NOT NULL DEFAULT 0,
semi_sync_primary_clients int NOT NULL DEFAULT 0,
stalled_disk TINYint NOT NULL DEFAULT 0,
PRIMARY KEY (alias)
)`,
`
Expand Down
2 changes: 2 additions & 0 deletions go/vt/vtorc/inst/analysis.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ const (
LockedSemiSyncPrimaryHypothesis AnalysisCode = "LockedSemiSyncPrimaryHypothesis"
LockedSemiSyncPrimary AnalysisCode = "LockedSemiSyncPrimary"
ErrantGTIDDetected AnalysisCode = "ErrantGTIDDetected"
StalledDiskPrimary AnalysisCode = "StalledDiskPrimary"
)

type StructureAnalysisCode string
Expand Down Expand Up @@ -130,6 +131,7 @@ type ReplicationAnalysis struct {
MaxReplicaGTIDMode string
MaxReplicaGTIDErrant string
IsReadOnly bool
IsStalledDisk bool
}

func (replicationAnalysis *ReplicationAnalysis) MarshalJSON() ([]byte, error) {
Expand Down
11 changes: 9 additions & 2 deletions go/vt/vtorc/inst/analysis_dao.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ func GetReplicationAnalysis(keyspace string, shard string, hints *ReplicationAna
vitess_keyspace.durability_policy AS durability_policy,
vitess_shard.primary_timestamp AS shard_primary_term_timestamp,
primary_instance.read_only AS read_only,
MIN(primary_instance.gtid_errant) AS gtid_errant,
MIN(primary_instance.gtid_errant) AS gtid_errant,
MIN(primary_instance.alias) IS NULL AS is_invalid,
MIN(primary_instance.binary_log_file) AS binary_log_file,
MIN(primary_instance.binary_log_pos) AS binary_log_pos,
Expand Down Expand Up @@ -246,7 +246,8 @@ func GetReplicationAnalysis(keyspace string, shard string, hints *ReplicationAna
COUNT(
DISTINCT case when replica_instance.log_bin
AND replica_instance.log_replica_updates then replica_instance.major_version else NULL end
) AS count_distinct_logging_major_versions
) AS count_distinct_logging_major_versions,
primary_instance.stalled_disk != 0 AS is_stalled_disk
FROM
vitess_tablet
JOIN vitess_keyspace ON (
Expand Down Expand Up @@ -364,6 +365,7 @@ func GetReplicationAnalysis(keyspace string, shard string, hints *ReplicationAna
a.HeartbeatInterval = m.GetFloat64("heartbeat_interval")

a.IsReadOnly = m.GetUint("read_only") == 1
a.IsStalledDisk = m.GetBool("is_stalled_disk")

if !a.LastCheckValid {
analysisMessage := fmt.Sprintf("analysis: Alias: %+v, Keyspace: %+v, Shard: %+v, IsPrimary: %+v, LastCheckValid: %+v, LastCheckPartialSuccess: %+v, CountReplicas: %+v, CountValidReplicas: %+v, CountValidReplicatingReplicas: %+v, CountLaggingReplicas: %+v, CountDelayedReplicas: %+v",
Expand Down Expand Up @@ -411,6 +413,11 @@ func GetReplicationAnalysis(keyspace string, shard string, hints *ReplicationAna
} else if isInvalid {
a.Analysis = InvalidReplica
a.Description = "VTOrc hasn't been able to reach the replica even once since restart/shutdown"
} else if a.IsClusterPrimary && !a.LastCheckValid && a.IsStalledDisk {
a.Analysis = StalledDiskPrimary
a.Description = "Primary has a stalled disk"
ca.hasClusterwideAction = true
//
} else if a.IsClusterPrimary && !a.LastCheckValid && a.CountReplicas == 0 {
a.Analysis = DeadPrimaryWithoutReplicas
a.Description = "Primary cannot be reached by vtorc and has no replica"
Expand Down
31 changes: 27 additions & 4 deletions go/vt/vtorc/inst/analysis_dao_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,10 @@ var (
// The initialSQL is a set of insert commands copied from a dump of an actual running VTOrc instances. The relevant insert commands are here.
// This is a dump taken from a test running 4 tablets, zone1-101 is the primary, zone1-100 is a replica, zone1-112 is a rdonly and zone2-200 is a cross-cell replica.
initialSQL = []string{
`INSERT INTO database_instance VALUES('zone1-0000000112','localhost',6747,'2022-12-28 07:26:04','2022-12-28 07:26:04',213696377,'8.0.31','ROW',1,1,'vt-0000000112-bin.000001',15963,'localhost',6714,8,4.0,1,1,'vt-0000000101-bin.000001',15583,'vt-0000000101-bin.000001',15583,0,0,1,'','',1,0,'vt-0000000112-relay-bin.000002',15815,0,1,0,'zone1','',0,0,0,1,'729a4cc4-8680-11ed-a104-47706090afbd:1-54','729a5138-8680-11ed-9240-92a06c3be3c2','2022-12-28 07:26:04','',1,0,0,'Homebrew','8.0','FULL',10816929,0,0,'ON',1,'729a4cc4-8680-11ed-a104-47706090afbd','','729a4cc4-8680-11ed-a104-47706090afbd,729a5138-8680-11ed-9240-92a06c3be3c2',1,1,'',1000000000000000000,1,0,0,0);`,
`INSERT INTO database_instance VALUES('zone1-0000000100','localhost',6711,'2022-12-28 07:26:04','2022-12-28 07:26:04',1094500338,'8.0.31','ROW',1,1,'vt-0000000100-bin.000001',15963,'localhost',6714,8,4.0,1,1,'vt-0000000101-bin.000001',15583,'vt-0000000101-bin.000001',15583,0,0,1,'','',1,0,'vt-0000000100-relay-bin.000002',15815,0,1,0,'zone1','',0,0,0,1,'729a4cc4-8680-11ed-a104-47706090afbd:1-54','729a5138-8680-11ed-acf8-d6b0ef9f4eaa','2022-12-28 07:26:04','',1,0,0,'Homebrew','8.0','FULL',10103920,0,1,'ON',1,'729a4cc4-8680-11ed-a104-47706090afbd','','729a4cc4-8680-11ed-a104-47706090afbd,729a5138-8680-11ed-acf8-d6b0ef9f4eaa',1,1,'',1000000000000000000,1,0,1,0);`,
`INSERT INTO database_instance VALUES('zone1-0000000101','localhost',6714,'2022-12-28 07:26:04','2022-12-28 07:26:04',390954723,'8.0.31','ROW',1,1,'vt-0000000101-bin.000001',15583,'',0,0,0,0,0,'',0,'',0,NULL,NULL,0,'','',0,0,'',0,0,0,0,'zone1','',0,0,0,1,'729a4cc4-8680-11ed-a104-47706090afbd:1-54','729a4cc4-8680-11ed-a104-47706090afbd','2022-12-28 07:26:04','',0,0,0,'Homebrew','8.0','FULL',11366095,1,1,'ON',1,'','','729a4cc4-8680-11ed-a104-47706090afbd',-1,-1,'',1000000000000000000,1,1,0,2);`,
`INSERT INTO database_instance VALUES('zone2-0000000200','localhost',6756,'2022-12-28 07:26:05','2022-12-28 07:26:05',444286571,'8.0.31','ROW',1,1,'vt-0000000200-bin.000001',15963,'localhost',6714,8,4.0,1,1,'vt-0000000101-bin.000001',15583,'vt-0000000101-bin.000001',15583,0,0,1,'','',1,0,'vt-0000000200-relay-bin.000002',15815,0,1,0,'zone2','',0,0,0,1,'729a4cc4-8680-11ed-a104-47706090afbd:1-54','729a497c-8680-11ed-8ad4-3f51d747db75','2022-12-28 07:26:05','',1,0,0,'Homebrew','8.0','FULL',10443112,0,1,'ON',1,'729a4cc4-8680-11ed-a104-47706090afbd','','729a4cc4-8680-11ed-a104-47706090afbd,729a497c-8680-11ed-8ad4-3f51d747db75',1,1,'',1000000000000000000,1,0,1,0);`,
`INSERT INTO database_instance VALUES('zone1-0000000112','localhost',6747,'2022-12-28 07:26:04','2022-12-28 07:26:04',213696377,'8.0.31','ROW',1,1,'vt-0000000112-bin.000001',15963,'localhost',6714,8,4.0,1,1,'vt-0000000101-bin.000001',15583,'vt-0000000101-bin.000001',15583,0,0,1,'','',1,0,'vt-0000000112-relay-bin.000002',15815,0,1,0,'zone1','',0,0,0,1,'729a4cc4-8680-11ed-a104-47706090afbd:1-54','729a5138-8680-11ed-9240-92a06c3be3c2','2022-12-28 07:26:04','',1,0,0,'Homebrew','8.0','FULL',10816929,0,0,'ON',1,'729a4cc4-8680-11ed-a104-47706090afbd','','729a4cc4-8680-11ed-a104-47706090afbd,729a5138-8680-11ed-9240-92a06c3be3c2',1,1,'',1000000000000000000,1,0,0,0,false);`,
`INSERT INTO database_instance VALUES('zone1-0000000100','localhost',6711,'2022-12-28 07:26:04','2022-12-28 07:26:04',1094500338,'8.0.31','ROW',1,1,'vt-0000000100-bin.000001',15963,'localhost',6714,8,4.0,1,1,'vt-0000000101-bin.000001',15583,'vt-0000000101-bin.000001',15583,0,0,1,'','',1,0,'vt-0000000100-relay-bin.000002',15815,0,1,0,'zone1','',0,0,0,1,'729a4cc4-8680-11ed-a104-47706090afbd:1-54','729a5138-8680-11ed-acf8-d6b0ef9f4eaa','2022-12-28 07:26:04','',1,0,0,'Homebrew','8.0','FULL',10103920,0,1,'ON',1,'729a4cc4-8680-11ed-a104-47706090afbd','','729a4cc4-8680-11ed-a104-47706090afbd,729a5138-8680-11ed-acf8-d6b0ef9f4eaa',1,1,'',1000000000000000000,1,0,1,0,false);`,
`INSERT INTO database_instance VALUES('zone1-0000000101','localhost',6714,'2022-12-28 07:26:04','2022-12-28 07:26:04',390954723,'8.0.31','ROW',1,1,'vt-0000000101-bin.000001',15583,'',0,0,0,0,0,'',0,'',0,NULL,NULL,0,'','',0,0,'',0,0,0,0,'zone1','',0,0,0,1,'729a4cc4-8680-11ed-a104-47706090afbd:1-54','729a4cc4-8680-11ed-a104-47706090afbd','2022-12-28 07:26:04','',0,0,0,'Homebrew','8.0','FULL',11366095,1,1,'ON',1,'','','729a4cc4-8680-11ed-a104-47706090afbd',-1,-1,'',1000000000000000000,1,1,0,2,false);`,
`INSERT INTO database_instance VALUES('zone2-0000000200','localhost',6756,'2022-12-28 07:26:05','2022-12-28 07:26:05',444286571,'8.0.31','ROW',1,1,'vt-0000000200-bin.000001',15963,'localhost',6714,8,4.0,1,1,'vt-0000000101-bin.000001',15583,'vt-0000000101-bin.000001',15583,0,0,1,'','',1,0,'vt-0000000200-relay-bin.000002',15815,0,1,0,'zone2','',0,0,0,1,'729a4cc4-8680-11ed-a104-47706090afbd:1-54','729a497c-8680-11ed-8ad4-3f51d747db75','2022-12-28 07:26:05','',1,0,0,'Homebrew','8.0','FULL',10443112,0,1,'ON',1,'729a4cc4-8680-11ed-a104-47706090afbd','','729a4cc4-8680-11ed-a104-47706090afbd,729a497c-8680-11ed-8ad4-3f51d747db75',1,1,'',1000000000000000000,1,0,1,0,false);`,
`INSERT INTO vitess_tablet VALUES('zone1-0000000100','localhost',6711,'ks','0','zone1',2,'0001-01-01 00:00:00+00:00',X'616c6961733a7b63656c6c3a227a6f6e653122207569643a3130307d20686f73746e616d653a226c6f63616c686f73742220706f72745f6d61703a7b6b65793a2267727063222076616c75653a363731307d20706f72745f6d61703a7b6b65793a227674222076616c75653a363730397d206b657973706163653a226b73222073686172643a22302220747970653a5245504c494341206d7973716c5f686f73746e616d653a226c6f63616c686f737422206d7973716c5f706f72743a363731312064625f7365727665725f76657273696f6e3a22382e302e3331222064656661756c745f636f6e6e5f636f6c6c6174696f6e3a3435');`,
`INSERT INTO vitess_tablet VALUES('zone1-0000000101','localhost',6714,'ks','0','zone1',1,'2022-12-28 07:23:25.129898+00:00',X'616c6961733a7b63656c6c3a227a6f6e653122207569643a3130317d20686f73746e616d653a226c6f63616c686f73742220706f72745f6d61703a7b6b65793a2267727063222076616c75653a363731337d20706f72745f6d61703a7b6b65793a227674222076616c75653a363731327d206b657973706163653a226b73222073686172643a22302220747970653a5052494d415259206d7973716c5f686f73746e616d653a226c6f63616c686f737422206d7973716c5f706f72743a36373134207072696d6172795f7465726d5f73746172745f74696d653a7b7365636f6e64733a31363732323132323035206e616e6f7365636f6e64733a3132393839383030307d2064625f7365727665725f76657273696f6e3a22382e302e3331222064656661756c745f636f6e6e5f636f6c6c6174696f6e3a3435');`,
`INSERT INTO vitess_tablet VALUES('zone1-0000000112','localhost',6747,'ks','0','zone1',3,'0001-01-01 00:00:00+00:00',X'616c6961733a7b63656c6c3a227a6f6e653122207569643a3131327d20686f73746e616d653a226c6f63616c686f73742220706f72745f6d61703a7b6b65793a2267727063222076616c75653a363734367d20706f72745f6d61703a7b6b65793a227674222076616c75653a363734357d206b657973706163653a226b73222073686172643a22302220747970653a52444f4e4c59206d7973716c5f686f73746e616d653a226c6f63616c686f737422206d7973716c5f706f72743a363734372064625f7365727665725f76657273696f6e3a22382e302e3331222064656661756c745f636f6e6e5f636f6c6c6174696f6e3a3435');`,
Expand Down Expand Up @@ -95,6 +95,29 @@ func TestGetReplicationAnalysisDecision(t *testing.T) {
keyspaceWanted: "ks",
shardWanted: "0",
codeWanted: PrimaryTabletDeleted,
}, {
name: "StalledDiskPrimary",
info: []*test.InfoForRecoveryAnalysis{{
TabletInfo: &topodatapb.Tablet{
Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 100},
Hostname: "localhost",
Keyspace: "ks",
Shard: "0",
Type: topodatapb.TabletType_PRIMARY,
MysqlHostname: "localhost",
MysqlPort: 6709,
},
DurabilityPolicy: "none",
LastCheckValid: 0,
CountReplicas: 4,
CountValidReplicas: 4,
CountValidReplicatingReplicas: 0,
IsPrimary: 1,
IsStalledDisk: 1,
}},
keyspaceWanted: "ks",
shardWanted: "0",
codeWanted: StalledDiskPrimary,
}, {
name: "DeadPrimary",
info: []*test.InfoForRecoveryAnalysis{{
Expand Down
1 change: 1 addition & 0 deletions go/vt/vtorc/inst/instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ type Instance struct {
IsUpToDate bool
IsRecentlyChecked bool
SecondsSinceLastSeen sql.NullInt64
StalledDisk bool

AllowTLS bool

Expand Down
Loading
Loading