From 52a86082e96f54bf51298c962518a5aa531b3aca Mon Sep 17 00:00:00 2001 From: Manan Gupta Date: Tue, 17 Aug 2021 15:51:31 +0530 Subject: [PATCH 1/6] rename master to primary in configuration Signed-off-by: Manan Gupta --- go/vt/orchestrator/config/config.go | 666 +++++++++--------- go/vt/orchestrator/config/config_test.go | 36 +- go/vt/orchestrator/http/api.go | 4 +- go/vt/orchestrator/inst/binlog_test.go | 2 +- go/vt/orchestrator/inst/cluster.go | 6 +- go/vt/orchestrator/inst/cluster_test.go | 2 +- go/vt/orchestrator/inst/instance_dao.go | 2 +- go/vt/orchestrator/inst/instance_topology.go | 6 +- go/vt/orchestrator/logic/topology_recovery.go | 34 +- 9 files changed, 379 insertions(+), 379 deletions(-) diff --git a/go/vt/orchestrator/config/config.go b/go/vt/orchestrator/config/config.go index d695efd6de8..89f84b59adb 100644 --- a/go/vt/orchestrator/config/config.go +++ b/go/vt/orchestrator/config/config.go @@ -62,184 +62,184 @@ const ( // strictly expected from user. // TODO(sougou): change this to yaml parsing, and possible merge with tabletenv. type Configuration struct { - Debug bool // set debug mode (similar to --debug option) - EnableSyslog bool // Should logs be directed (in addition) to syslog daemon? - ListenAddress string // Where orchestrator HTTP should listen for TCP - ListenSocket string // Where orchestrator HTTP should listen for unix socket (default: empty; when given, TCP is disabled) - HTTPAdvertise string // optional, for raft setups, what is the HTTP address this node will advertise to its peers (potentially use where behind NAT or when rerouting ports; example: "http://11.22.33.44:3030") - AgentsServerPort string // port orchestrator agents talk back to - Durability string // The type of durability to enforce. Default is "none". Other values are dictated by registered plugins - DurabilityParams map[string]string // map for specifying additional parameters for durability plugins. Used by durability mode "specified" - MySQLTopologyUser string - MySQLTopologyPassword string - MySQLReplicaUser string // If set, use this credential instead of discovering from mysql. TODO(sougou): deprecate this in favor of fetching from vttablet - MySQLReplicaPassword string - MySQLTopologyCredentialsConfigFile string // my.cnf style configuration file from where to pick credentials. Expecting `user`, `password` under `[client]` section - MySQLTopologySSLPrivateKeyFile string // Private key file used to authenticate with a Topology mysql instance with TLS - MySQLTopologySSLCertFile string // Certificate PEM file used to authenticate with a Topology mysql instance with TLS - MySQLTopologySSLCAFile string // Certificate Authority PEM file used to authenticate with a Topology mysql instance with TLS - MySQLTopologySSLSkipVerify bool // If true, do not strictly validate mutual TLS certs for Topology mysql instances - MySQLTopologyUseMutualTLS bool // Turn on TLS authentication with the Topology MySQL instances - MySQLTopologyUseMixedTLS bool // Mixed TLS and non-TLS authentication with the Topology MySQL instances - TLSCacheTTLFactor uint // Factor of InstancePollSeconds that we set as TLS info cache expiry - BackendDB string // EXPERIMENTAL: type of backend db; either "mysql" or "sqlite3" - SQLite3DataFile string // when BackendDB == "sqlite3", full path to sqlite3 datafile - SkipOrchestratorDatabaseUpdate bool // When true, do not check backend database schema nor attempt to update it. Useful when you may be running multiple versions of orchestrator, and you only wish certain boxes to dictate the db structure (or else any time a different orchestrator version runs it will rebuild database schema) - PanicIfDifferentDatabaseDeploy bool // When true, and this process finds the orchestrator backend DB was provisioned by a different version, panic - RaftEnabled bool // When true, setup orchestrator in a raft consensus layout. When false (default) all Raft* variables are ignored - RaftBind string - RaftAdvertise string - RaftDataDir string - DefaultRaftPort int // if a RaftNodes entry does not specify port, use this one - RaftNodes []string // Raft nodes to make initial connection with - ExpectFailureAnalysisConcensus bool - MySQLOrchestratorHost string - MySQLOrchestratorMaxPoolConnections int // The maximum size of the connection pool to the Orchestrator backend. - MySQLOrchestratorPort uint - MySQLOrchestratorDatabase string - MySQLOrchestratorUser string - MySQLOrchestratorPassword string - MySQLOrchestratorCredentialsConfigFile string // my.cnf style configuration file from where to pick credentials. Expecting `user`, `password` under `[client]` section - MySQLOrchestratorSSLPrivateKeyFile string // Private key file used to authenticate with the Orchestrator mysql instance with TLS - MySQLOrchestratorSSLCertFile string // Certificate PEM file used to authenticate with the Orchestrator mysql instance with TLS - MySQLOrchestratorSSLCAFile string // Certificate Authority PEM file used to authenticate with the Orchestrator mysql instance with TLS - MySQLOrchestratorSSLSkipVerify bool // If true, do not strictly validate mutual TLS certs for the Orchestrator mysql instances - MySQLOrchestratorUseMutualTLS bool // Turn on TLS authentication with the Orchestrator MySQL instance - MySQLOrchestratorReadTimeoutSeconds int // Number of seconds before backend mysql read operation is aborted (driver-side) - MySQLOrchestratorRejectReadOnly bool // Reject read only connections https://github.com/go-sql-driver/mysql#rejectreadonly - MySQLConnectTimeoutSeconds int // Number of seconds before connection is aborted (driver-side) - MySQLDiscoveryReadTimeoutSeconds int // Number of seconds before topology mysql read operation is aborted (driver-side). Used for discovery queries. - MySQLTopologyReadTimeoutSeconds int // Number of seconds before topology mysql read operation is aborted (driver-side). Used for all but discovery queries. - MySQLConnectionLifetimeSeconds int // Number of seconds the mysql driver will keep database connection alive before recycling it - DefaultInstancePort int // In case port was not specified on command line - SlaveLagQuery string // Synonym to ReplicationLagQuery - ReplicationLagQuery string // custom query to check on replica lg (e.g. heartbeat table). Must return a single row with a single numeric column, which is the lag. - ReplicationCredentialsQuery string // custom query to get replication credentials. Must return a single row, with two text columns: 1st is username, 2nd is password. This is optional, and can be used by orchestrator to configure replication after primary takeover or setup of co-primary. You need to ensure the orchestrator user has the privileges to run this query - DiscoverByShowSlaveHosts bool // Attempt SHOW SLAVE HOSTS before PROCESSLIST - UseSuperReadOnly bool // Should orchestrator super_read_only any time it sets read_only - InstancePollSeconds uint // Number of seconds between instance reads - InstanceWriteBufferSize int // Instance write buffer size (max number of instances to flush in one INSERT ODKU) - BufferInstanceWrites bool // Set to 'true' for write-optimization on backend table (compromise: writes can be stale and overwrite non stale data) - InstanceFlushIntervalMilliseconds int // Max interval between instance write buffer flushes - UnseenInstanceForgetHours uint // Number of hours after which an unseen instance is forgotten - SnapshotTopologiesIntervalHours uint // Interval in hour between snapshot-topologies invocation. Default: 0 (disabled) - DiscoveryMaxConcurrency uint // Number of goroutines doing hosts discovery - DiscoveryQueueCapacity uint // Buffer size of the discovery queue. Should be greater than the number of DB instances being discovered - DiscoveryQueueMaxStatisticsSize int // The maximum number of individual secondly statistics taken of the discovery queue - DiscoveryCollectionRetentionSeconds uint // Number of seconds to retain the discovery collection information - DiscoverySeeds []string // Hard coded array of hostname:port, ensuring orchestrator discovers these hosts upon startup, assuming not already known to orchestrator - InstanceBulkOperationsWaitTimeoutSeconds uint // Time to wait on a single instance when doing bulk (many instances) operation - HostnameResolveMethod string // Method by which to "normalize" hostname ("none"/"default"/"cname") - MySQLHostnameResolveMethod string // Method by which to "normalize" hostname via MySQL server. ("none"/"@@hostname"/"@@report_host"; default "@@hostname") - SkipBinlogServerUnresolveCheck bool // Skip the double-check that an unresolved hostname resolves back to same hostname for binlog servers - ExpiryHostnameResolvesMinutes int // Number of minutes after which to expire hostname-resolves - RejectHostnameResolvePattern string // Regexp pattern for resolved hostname that will not be accepted (not cached, not written to db). This is done to avoid storing wrong resolves due to network glitches. - ReasonableReplicationLagSeconds int // Above this value is considered a problem - ProblemIgnoreHostnameFilters []string // Will minimize problem visualization for hostnames matching given regexp filters - VerifyReplicationFilters bool // Include replication filters check before approving topology refactoring - ReasonableMaintenanceReplicationLagSeconds int // Above this value move-up and move-below are blocked - CandidateInstanceExpireMinutes uint // Minutes after which a suggestion to use an instance as a candidate replica (to be preferably promoted on primary failover) is expired. - AuditLogFile string // Name of log file for audit operations. Disabled when empty. - AuditToSyslog bool // If true, audit messages are written to syslog - AuditToBackendDB bool // If true, audit messages are written to the backend DB's `audit` table (default: true) - AuditPurgeDays uint // Days after which audit entries are purged from the database - RemoveTextFromHostnameDisplay string // Text to strip off the hostname on cluster/clusters pages - ReadOnly bool - AuthenticationMethod string // Type of autherntication to use, if any. "" for none, "basic" for BasicAuth, "multi" for advanced BasicAuth, "proxy" for forwarded credentials via reverse proxy, "token" for token based access - OAuthClientId string - OAuthClientSecret string - OAuthScopes []string - HTTPAuthUser string // Username for HTTP Basic authentication (blank disables authentication) - HTTPAuthPassword string // Password for HTTP Basic authentication - AuthUserHeader string // HTTP header indicating auth user, when AuthenticationMethod is "proxy" - PowerAuthUsers []string // On AuthenticationMethod == "proxy", list of users that can make changes. All others are read-only. - PowerAuthGroups []string // list of unix groups the authenticated user must be a member of to make changes. - AccessTokenUseExpirySeconds uint // Time by which an issued token must be used - AccessTokenExpiryMinutes uint // Time after which HTTP access token expires - ClusterNameToAlias map[string]string // map between regex matching cluster name to a human friendly alias - DetectClusterAliasQuery string // Optional query (executed on topology instance) that returns the alias of a cluster. Query will only be executed on cluster primary (though until the topology's primary is resovled it may execute on other/all replicas). If provided, must return one row, one column - DetectClusterDomainQuery string // Optional query (executed on topology instance) that returns the VIP/CNAME/Alias/whatever domain name for the primary of this cluster. Query will only be executed on cluster primary (though until the topology's primary is resovled it may execute on other/all replicas). If provided, must return one row, one column - DetectInstanceAliasQuery string // Optional query (executed on topology instance) that returns the alias of an instance. If provided, must return one row, one column - DetectPromotionRuleQuery string // Optional query (executed on topology instance) that returns the promotion rule of an instance. If provided, must return one row, one column. - DataCenterPattern string // Regexp pattern with one group, extracting the datacenter name from the hostname - RegionPattern string // Regexp pattern with one group, extracting the region name from the hostname - PhysicalEnvironmentPattern string // Regexp pattern with one group, extracting physical environment info from hostname (e.g. combination of datacenter & prod/dev env) - DetectDataCenterQuery string // Optional query (executed on topology instance) that returns the data center of an instance. If provided, must return one row, one column. Overrides DataCenterPattern and useful for installments where DC cannot be inferred by hostname - DetectRegionQuery string // Optional query (executed on topology instance) that returns the region of an instance. If provided, must return one row, one column. Overrides RegionPattern and useful for installments where Region cannot be inferred by hostname - DetectPhysicalEnvironmentQuery string // Optional query (executed on topology instance) that returns the physical environment of an instance. If provided, must return one row, one column. Overrides PhysicalEnvironmentPattern and useful for installments where env cannot be inferred by hostname - DetectSemiSyncEnforcedQuery string // Optional query (executed on topology instance) to determine whether semi-sync is fully enforced for primary writes (async fallback is not allowed under any circumstance). If provided, must return one row, one column, value 0 or 1. - SupportFuzzyPoolHostnames bool // Should "submit-pool-instances" command be able to pass list of fuzzy instances (fuzzy means non-fqdn, but unique enough to recognize). Defaults 'true', implies more queries on backend db - InstancePoolExpiryMinutes uint // Time after which entries in database_instance_pool are expired (resubmit via `submit-pool-instances`) - PromotionIgnoreHostnameFilters []string // Orchestrator will not promote replicas with hostname matching pattern (via -c recovery; for example, avoid promoting dev-dedicated machines) - ServeAgentsHttp bool // Spawn another HTTP interface dedicated for orchestrator-agent - AgentsUseSSL bool // When "true" orchestrator will listen on agents port with SSL as well as connect to agents via SSL - AgentsUseMutualTLS bool // When "true" Use mutual TLS for the server to agent communication - AgentSSLSkipVerify bool // When using SSL for the Agent, should we ignore SSL certification error - AgentSSLPrivateKeyFile string // Name of Agent SSL private key file, applies only when AgentsUseSSL = true - AgentSSLCertFile string // Name of Agent SSL certification file, applies only when AgentsUseSSL = true - AgentSSLCAFile string // Name of the Agent Certificate Authority file, applies only when AgentsUseSSL = true - AgentSSLValidOUs []string // Valid organizational units when using mutual TLS to communicate with the agents - UseSSL bool // Use SSL on the server web port - UseMutualTLS bool // When "true" Use mutual TLS for the server's web and API connections - SSLSkipVerify bool // When using SSL, should we ignore SSL certification error - SSLPrivateKeyFile string // Name of SSL private key file, applies only when UseSSL = true - SSLCertFile string // Name of SSL certification file, applies only when UseSSL = true - SSLCAFile string // Name of the Certificate Authority file, applies only when UseSSL = true - SSLValidOUs []string // Valid organizational units when using mutual TLS - StatusEndpoint string // Override the status endpoint. Defaults to '/api/status' - StatusOUVerify bool // If true, try to verify OUs when Mutual TLS is on. Defaults to false - AgentPollMinutes uint // Minutes between agent polling - UnseenAgentForgetHours uint // Number of hours after which an unseen agent is forgotten - StaleSeedFailMinutes uint // Number of minutes after which a stale (no progress) seed is considered failed. - SeedAcceptableBytesDiff int64 // Difference in bytes between seed source & target data size that is still considered as successful copy - SeedWaitSecondsBeforeSend int64 // Number of seconds for waiting before start send data command on agent - BinlogEventsChunkSize int // Chunk size (X) for SHOW BINLOG|RELAYLOG EVENTS LIMIT ?,X statements. Smaller means less locking and mroe work to be done - ReduceReplicationAnalysisCount bool // When true, replication analysis will only report instances where possibility of handled problems is possible in the first place (e.g. will not report most leaf nodes, that are mostly uninteresting). When false, provides an entry for every known instance - FailureDetectionPeriodBlockMinutes int // The time for which an instance's failure discovery is kept "active", so as to avoid concurrent "discoveries" of the instance's failure; this preceeds any recovery process, if any. - RecoveryPeriodBlockMinutes int // (supported for backwards compatibility but please use newer `RecoveryPeriodBlockSeconds` instead) The time for which an instance's recovery is kept "active", so as to avoid concurrent recoveries on smae instance as well as flapping - RecoveryPeriodBlockSeconds int // (overrides `RecoveryPeriodBlockMinutes`) The time for which an instance's recovery is kept "active", so as to avoid concurrent recoveries on smae instance as well as flapping - RecoveryIgnoreHostnameFilters []string // Recovery analysis will completely ignore hosts matching given patterns - RecoverMasterClusterFilters []string // Only do primary recovery on clusters matching these regexp patterns (of course the ".*" pattern matches everything) - RecoverIntermediateMasterClusterFilters []string // Only do IM recovery on clusters matching these regexp patterns (of course the ".*" pattern matches everything) - ProcessesShellCommand string // Shell that executes command scripts - OnFailureDetectionProcesses []string // Processes to execute when detecting a failover scenario (before making a decision whether to failover or not). May and should use some of these placeholders: {failureType}, {instanceType}, {isMaster}, {isCoMaster}, {failureDescription}, {command}, {failedHost}, {failureCluster}, {failureClusterAlias}, {failureClusterDomain}, {failedPort}, {successorHost}, {successorPort}, {successorAlias}, {countReplicas}, {replicaHosts}, {isDowntimed}, {autoMasterRecovery}, {autoIntermediateMasterRecovery} - PreGracefulTakeoverProcesses []string // Processes to execute before doing a failover (aborting operation should any once of them exits with non-zero code; order of execution undefined). May and should use some of these placeholders: {failureType}, {instanceType}, {isMaster}, {isCoMaster}, {failureDescription}, {command}, {failedHost}, {failureCluster}, {failureClusterAlias}, {failureClusterDomain}, {failedPort}, {successorHost}, {successorPort}, {countReplicas}, {replicaHosts}, {isDowntimed} - PreFailoverProcesses []string // Processes to execute before doing a failover (aborting operation should any once of them exits with non-zero code; order of execution undefined). May and should use some of these placeholders: {failureType}, {instanceType}, {isMaster}, {isCoMaster}, {failureDescription}, {command}, {failedHost}, {failureCluster}, {failureClusterAlias}, {failureClusterDomain}, {failedPort}, {countReplicas}, {replicaHosts}, {isDowntimed} - PostFailoverProcesses []string // Processes to execute after doing a failover (order of execution undefined). May and should use some of these placeholders: {failureType}, {instanceType}, {isMaster}, {isCoMaster}, {failureDescription}, {command}, {failedHost}, {failureCluster}, {failureClusterAlias}, {failureClusterDomain}, {failedPort}, {successorHost}, {successorPort}, {successorAlias}, {countReplicas}, {replicaHosts}, {isDowntimed}, {isSuccessful}, {lostReplicas}, {countLostReplicas} - PostUnsuccessfulFailoverProcesses []string // Processes to execute after a not-completely-successful failover (order of execution undefined). May and should use some of these placeholders: {failureType}, {instanceType}, {isMaster}, {isCoMaster}, {failureDescription}, {command}, {failedHost}, {failureCluster}, {failureClusterAlias}, {failureClusterDomain}, {failedPort}, {successorHost}, {successorPort}, {successorAlias}, {countReplicas}, {replicaHosts}, {isDowntimed}, {isSuccessful}, {lostReplicas}, {countLostReplicas} - PostMasterFailoverProcesses []string // Processes to execute after doing a primary failover (order of execution undefined). Uses same placeholders as PostFailoverProcesses - PostIntermediateMasterFailoverProcesses []string // Processes to execute after doing a primary failover (order of execution undefined). Uses same placeholders as PostFailoverProcesses - PostGracefulTakeoverProcesses []string // Processes to execute after running a graceful primary takeover. Uses same placeholders as PostFailoverProcesses - PostTakeMasterProcesses []string // Processes to execute after a successful Take-Master event has taken place - CoMasterRecoveryMustPromoteOtherCoMaster bool // When 'false', anything can get promoted (and candidates are prefered over others). When 'true', orchestrator will promote the other co-primary or else fail - DetachLostSlavesAfterMasterFailover bool // synonym to DetachLostReplicasAfterMasterFailover - DetachLostReplicasAfterMasterFailover bool // Should replicas that are not to be lost in primary recovery (i.e. were more up-to-date than promoted replica) be forcibly detached - ApplyMySQLPromotionAfterMasterFailover bool // Should orchestrator take upon itself to apply MySQL primary promotion: set read_only=0, detach replication, etc. - PreventCrossDataCenterMasterFailover bool // When true (default: false), cross-DC primary failover are not allowed, orchestrator will do all it can to only fail over within same DC, or else not fail over at all. - PreventCrossRegionMasterFailover bool // When true (default: false), cross-region primary failover are not allowed, orchestrator will do all it can to only fail over within same region, or else not fail over at all. - MasterFailoverLostInstancesDowntimeMinutes uint // Number of minutes to downtime any server that was lost after a primary failover (including failed primary & lost replicas). 0 to disable - MasterFailoverDetachSlaveMasterHost bool // synonym to MasterFailoverDetachReplicaMasterHost - MasterFailoverDetachReplicaMasterHost bool // Should orchestrator issue a detach-replica-master-host on newly promoted primary (this makes sure the new primary will not attempt to replicate old primary if that comes back to life). Defaults 'false'. Meaningless if ApplyMySQLPromotionAfterMasterFailover is 'true'. - FailMasterPromotionOnLagMinutes uint // when > 0, fail a primary promotion if the candidate replica is lagging >= configured number of minutes. - FailMasterPromotionIfSQLThreadNotUpToDate bool // when true, and a primary failover takes place, if candidate primary has not consumed all relay logs, promotion is aborted with error - DelayMasterPromotionIfSQLThreadNotUpToDate bool // when true, and a primary failover takes place, if candidate primary has not consumed all relay logs, delay promotion until the sql thread has caught up - PostponeSlaveRecoveryOnLagMinutes uint // Synonym to PostponeReplicaRecoveryOnLagMinutes - PostponeReplicaRecoveryOnLagMinutes uint // On crash recovery, replicas that are lagging more than given minutes are only resurrected late in the recovery process, after primary/IM has been elected and processes executed. Value of 0 disables this feature - OSCIgnoreHostnameFilters []string // OSC replicas recommendation will ignore replica hostnames matching given patterns - URLPrefix string // URL prefix to run orchestrator on non-root web path, e.g. /orchestrator to put it behind nginx. - DiscoveryIgnoreReplicaHostnameFilters []string // Regexp filters to apply to prevent auto-discovering new replicas. Usage: unreachable servers due to firewalls, applications which trigger binlog dumps - DiscoveryIgnoreMasterHostnameFilters []string // Regexp filters to apply to prevent auto-discovering a primary. Usage: pointing your primary temporarily to replicate seom data from external host - DiscoveryIgnoreHostnameFilters []string // Regexp filters to apply to prevent discovering instances of any kind - ConsulAddress string // Address where Consul HTTP api is found. Example: 127.0.0.1:8500 - ConsulScheme string // Scheme (http or https) for Consul - ConsulAclToken string // ACL token used to write to Consul KV - ConsulCrossDataCenterDistribution bool // should orchestrator automatically auto-deduce all consul DCs and write KVs in all DCs - ZkAddress string // UNSUPPERTED YET. Address where (single or multiple) ZooKeeper servers are found, in `srv1[:port1][,srv2[:port2]...]` format. Default port is 2181. Example: srv-a,srv-b:12181,srv-c - KVClusterMasterPrefix string // Prefix to use for clusters' primary's entries in KV stores (internal, consul, ZK), default: "mysql/master" - WebMessage string // If provided, will be shown on all web pages below the title bar - MaxConcurrentReplicaOperations int // Maximum number of concurrent operations on replicas - InstanceDBExecContextTimeoutSeconds int // Timeout on context used while calling ExecContext on instance database - LockShardTimeoutSeconds int // Timeout on context used to lock shard. Should be a small value because we should fail-fast + Debug bool // set debug mode (similar to --debug option) + EnableSyslog bool // Should logs be directed (in addition) to syslog daemon? + ListenAddress string // Where orchestrator HTTP should listen for TCP + ListenSocket string // Where orchestrator HTTP should listen for unix socket (default: empty; when given, TCP is disabled) + HTTPAdvertise string // optional, for raft setups, what is the HTTP address this node will advertise to its peers (potentially use where behind NAT or when rerouting ports; example: "http://11.22.33.44:3030") + AgentsServerPort string // port orchestrator agents talk back to + Durability string // The type of durability to enforce. Default is "none". Other values are dictated by registered plugins + DurabilityParams map[string]string // map for specifying additional parameters for durability plugins. Used by durability mode "specified" + MySQLTopologyUser string + MySQLTopologyPassword string + MySQLReplicaUser string // If set, use this credential instead of discovering from mysql. TODO(sougou): deprecate this in favor of fetching from vttablet + MySQLReplicaPassword string + MySQLTopologyCredentialsConfigFile string // my.cnf style configuration file from where to pick credentials. Expecting `user`, `password` under `[client]` section + MySQLTopologySSLPrivateKeyFile string // Private key file used to authenticate with a Topology mysql instance with TLS + MySQLTopologySSLCertFile string // Certificate PEM file used to authenticate with a Topology mysql instance with TLS + MySQLTopologySSLCAFile string // Certificate Authority PEM file used to authenticate with a Topology mysql instance with TLS + MySQLTopologySSLSkipVerify bool // If true, do not strictly validate mutual TLS certs for Topology mysql instances + MySQLTopologyUseMutualTLS bool // Turn on TLS authentication with the Topology MySQL instances + MySQLTopologyUseMixedTLS bool // Mixed TLS and non-TLS authentication with the Topology MySQL instances + TLSCacheTTLFactor uint // Factor of InstancePollSeconds that we set as TLS info cache expiry + BackendDB string // EXPERIMENTAL: type of backend db; either "mysql" or "sqlite3" + SQLite3DataFile string // when BackendDB == "sqlite3", full path to sqlite3 datafile + SkipOrchestratorDatabaseUpdate bool // When true, do not check backend database schema nor attempt to update it. Useful when you may be running multiple versions of orchestrator, and you only wish certain boxes to dictate the db structure (or else any time a different orchestrator version runs it will rebuild database schema) + PanicIfDifferentDatabaseDeploy bool // When true, and this process finds the orchestrator backend DB was provisioned by a different version, panic + RaftEnabled bool // When true, setup orchestrator in a raft consensus layout. When false (default) all Raft* variables are ignored + RaftBind string + RaftAdvertise string + RaftDataDir string + DefaultRaftPort int // if a RaftNodes entry does not specify port, use this one + RaftNodes []string // Raft nodes to make initial connection with + ExpectFailureAnalysisConcensus bool + MySQLOrchestratorHost string + MySQLOrchestratorMaxPoolConnections int // The maximum size of the connection pool to the Orchestrator backend. + MySQLOrchestratorPort uint + MySQLOrchestratorDatabase string + MySQLOrchestratorUser string + MySQLOrchestratorPassword string + MySQLOrchestratorCredentialsConfigFile string // my.cnf style configuration file from where to pick credentials. Expecting `user`, `password` under `[client]` section + MySQLOrchestratorSSLPrivateKeyFile string // Private key file used to authenticate with the Orchestrator mysql instance with TLS + MySQLOrchestratorSSLCertFile string // Certificate PEM file used to authenticate with the Orchestrator mysql instance with TLS + MySQLOrchestratorSSLCAFile string // Certificate Authority PEM file used to authenticate with the Orchestrator mysql instance with TLS + MySQLOrchestratorSSLSkipVerify bool // If true, do not strictly validate mutual TLS certs for the Orchestrator mysql instances + MySQLOrchestratorUseMutualTLS bool // Turn on TLS authentication with the Orchestrator MySQL instance + MySQLOrchestratorReadTimeoutSeconds int // Number of seconds before backend mysql read operation is aborted (driver-side) + MySQLOrchestratorRejectReadOnly bool // Reject read only connections https://github.com/go-sql-driver/mysql#rejectreadonly + MySQLConnectTimeoutSeconds int // Number of seconds before connection is aborted (driver-side) + MySQLDiscoveryReadTimeoutSeconds int // Number of seconds before topology mysql read operation is aborted (driver-side). Used for discovery queries. + MySQLTopologyReadTimeoutSeconds int // Number of seconds before topology mysql read operation is aborted (driver-side). Used for all but discovery queries. + MySQLConnectionLifetimeSeconds int // Number of seconds the mysql driver will keep database connection alive before recycling it + DefaultInstancePort int // In case port was not specified on command line + SlaveLagQuery string // Synonym to ReplicationLagQuery + ReplicationLagQuery string // custom query to check on replica lg (e.g. heartbeat table). Must return a single row with a single numeric column, which is the lag. + ReplicationCredentialsQuery string // custom query to get replication credentials. Must return a single row, with two text columns: 1st is username, 2nd is password. This is optional, and can be used by orchestrator to configure replication after primary takeover or setup of co-primary. You need to ensure the orchestrator user has the privileges to run this query + DiscoverByShowSlaveHosts bool // Attempt SHOW SLAVE HOSTS before PROCESSLIST + UseSuperReadOnly bool // Should orchestrator super_read_only any time it sets read_only + InstancePollSeconds uint // Number of seconds between instance reads + InstanceWriteBufferSize int // Instance write buffer size (max number of instances to flush in one INSERT ODKU) + BufferInstanceWrites bool // Set to 'true' for write-optimization on backend table (compromise: writes can be stale and overwrite non stale data) + InstanceFlushIntervalMilliseconds int // Max interval between instance write buffer flushes + UnseenInstanceForgetHours uint // Number of hours after which an unseen instance is forgotten + SnapshotTopologiesIntervalHours uint // Interval in hour between snapshot-topologies invocation. Default: 0 (disabled) + DiscoveryMaxConcurrency uint // Number of goroutines doing hosts discovery + DiscoveryQueueCapacity uint // Buffer size of the discovery queue. Should be greater than the number of DB instances being discovered + DiscoveryQueueMaxStatisticsSize int // The maximum number of individual secondly statistics taken of the discovery queue + DiscoveryCollectionRetentionSeconds uint // Number of seconds to retain the discovery collection information + DiscoverySeeds []string // Hard coded array of hostname:port, ensuring orchestrator discovers these hosts upon startup, assuming not already known to orchestrator + InstanceBulkOperationsWaitTimeoutSeconds uint // Time to wait on a single instance when doing bulk (many instances) operation + HostnameResolveMethod string // Method by which to "normalize" hostname ("none"/"default"/"cname") + MySQLHostnameResolveMethod string // Method by which to "normalize" hostname via MySQL server. ("none"/"@@hostname"/"@@report_host"; default "@@hostname") + SkipBinlogServerUnresolveCheck bool // Skip the double-check that an unresolved hostname resolves back to same hostname for binlog servers + ExpiryHostnameResolvesMinutes int // Number of minutes after which to expire hostname-resolves + RejectHostnameResolvePattern string // Regexp pattern for resolved hostname that will not be accepted (not cached, not written to db). This is done to avoid storing wrong resolves due to network glitches. + ReasonableReplicationLagSeconds int // Above this value is considered a problem + ProblemIgnoreHostnameFilters []string // Will minimize problem visualization for hostnames matching given regexp filters + VerifyReplicationFilters bool // Include replication filters check before approving topology refactoring + ReasonableMaintenanceReplicationLagSeconds int // Above this value move-up and move-below are blocked + CandidateInstanceExpireMinutes uint // Minutes after which a suggestion to use an instance as a candidate replica (to be preferably promoted on primary failover) is expired. + AuditLogFile string // Name of log file for audit operations. Disabled when empty. + AuditToSyslog bool // If true, audit messages are written to syslog + AuditToBackendDB bool // If true, audit messages are written to the backend DB's `audit` table (default: true) + AuditPurgeDays uint // Days after which audit entries are purged from the database + RemoveTextFromHostnameDisplay string // Text to strip off the hostname on cluster/clusters pages + ReadOnly bool + AuthenticationMethod string // Type of autherntication to use, if any. "" for none, "basic" for BasicAuth, "multi" for advanced BasicAuth, "proxy" for forwarded credentials via reverse proxy, "token" for token based access + OAuthClientId string + OAuthClientSecret string + OAuthScopes []string + HTTPAuthUser string // Username for HTTP Basic authentication (blank disables authentication) + HTTPAuthPassword string // Password for HTTP Basic authentication + AuthUserHeader string // HTTP header indicating auth user, when AuthenticationMethod is "proxy" + PowerAuthUsers []string // On AuthenticationMethod == "proxy", list of users that can make changes. All others are read-only. + PowerAuthGroups []string // list of unix groups the authenticated user must be a member of to make changes. + AccessTokenUseExpirySeconds uint // Time by which an issued token must be used + AccessTokenExpiryMinutes uint // Time after which HTTP access token expires + ClusterNameToAlias map[string]string // map between regex matching cluster name to a human friendly alias + DetectClusterAliasQuery string // Optional query (executed on topology instance) that returns the alias of a cluster. Query will only be executed on cluster primary (though until the topology's primary is resovled it may execute on other/all replicas). If provided, must return one row, one column + DetectClusterDomainQuery string // Optional query (executed on topology instance) that returns the VIP/CNAME/Alias/whatever domain name for the primary of this cluster. Query will only be executed on cluster primary (though until the topology's primary is resovled it may execute on other/all replicas). If provided, must return one row, one column + DetectInstanceAliasQuery string // Optional query (executed on topology instance) that returns the alias of an instance. If provided, must return one row, one column + DetectPromotionRuleQuery string // Optional query (executed on topology instance) that returns the promotion rule of an instance. If provided, must return one row, one column. + DataCenterPattern string // Regexp pattern with one group, extracting the datacenter name from the hostname + RegionPattern string // Regexp pattern with one group, extracting the region name from the hostname + PhysicalEnvironmentPattern string // Regexp pattern with one group, extracting physical environment info from hostname (e.g. combination of datacenter & prod/dev env) + DetectDataCenterQuery string // Optional query (executed on topology instance) that returns the data center of an instance. If provided, must return one row, one column. Overrides DataCenterPattern and useful for installments where DC cannot be inferred by hostname + DetectRegionQuery string // Optional query (executed on topology instance) that returns the region of an instance. If provided, must return one row, one column. Overrides RegionPattern and useful for installments where Region cannot be inferred by hostname + DetectPhysicalEnvironmentQuery string // Optional query (executed on topology instance) that returns the physical environment of an instance. If provided, must return one row, one column. Overrides PhysicalEnvironmentPattern and useful for installments where env cannot be inferred by hostname + DetectSemiSyncEnforcedQuery string // Optional query (executed on topology instance) to determine whether semi-sync is fully enforced for primary writes (async fallback is not allowed under any circumstance). If provided, must return one row, one column, value 0 or 1. + SupportFuzzyPoolHostnames bool // Should "submit-pool-instances" command be able to pass list of fuzzy instances (fuzzy means non-fqdn, but unique enough to recognize). Defaults 'true', implies more queries on backend db + InstancePoolExpiryMinutes uint // Time after which entries in database_instance_pool are expired (resubmit via `submit-pool-instances`) + PromotionIgnoreHostnameFilters []string // Orchestrator will not promote replicas with hostname matching pattern (via -c recovery; for example, avoid promoting dev-dedicated machines) + ServeAgentsHttp bool // Spawn another HTTP interface dedicated for orchestrator-agent + AgentsUseSSL bool // When "true" orchestrator will listen on agents port with SSL as well as connect to agents via SSL + AgentsUseMutualTLS bool // When "true" Use mutual TLS for the server to agent communication + AgentSSLSkipVerify bool // When using SSL for the Agent, should we ignore SSL certification error + AgentSSLPrivateKeyFile string // Name of Agent SSL private key file, applies only when AgentsUseSSL = true + AgentSSLCertFile string // Name of Agent SSL certification file, applies only when AgentsUseSSL = true + AgentSSLCAFile string // Name of the Agent Certificate Authority file, applies only when AgentsUseSSL = true + AgentSSLValidOUs []string // Valid organizational units when using mutual TLS to communicate with the agents + UseSSL bool // Use SSL on the server web port + UseMutualTLS bool // When "true" Use mutual TLS for the server's web and API connections + SSLSkipVerify bool // When using SSL, should we ignore SSL certification error + SSLPrivateKeyFile string // Name of SSL private key file, applies only when UseSSL = true + SSLCertFile string // Name of SSL certification file, applies only when UseSSL = true + SSLCAFile string // Name of the Certificate Authority file, applies only when UseSSL = true + SSLValidOUs []string // Valid organizational units when using mutual TLS + StatusEndpoint string // Override the status endpoint. Defaults to '/api/status' + StatusOUVerify bool // If true, try to verify OUs when Mutual TLS is on. Defaults to false + AgentPollMinutes uint // Minutes between agent polling + UnseenAgentForgetHours uint // Number of hours after which an unseen agent is forgotten + StaleSeedFailMinutes uint // Number of minutes after which a stale (no progress) seed is considered failed. + SeedAcceptableBytesDiff int64 // Difference in bytes between seed source & target data size that is still considered as successful copy + SeedWaitSecondsBeforeSend int64 // Number of seconds for waiting before start send data command on agent + BinlogEventsChunkSize int // Chunk size (X) for SHOW BINLOG|RELAYLOG EVENTS LIMIT ?,X statements. Smaller means less locking and mroe work to be done + ReduceReplicationAnalysisCount bool // When true, replication analysis will only report instances where possibility of handled problems is possible in the first place (e.g. will not report most leaf nodes, that are mostly uninteresting). When false, provides an entry for every known instance + FailureDetectionPeriodBlockMinutes int // The time for which an instance's failure discovery is kept "active", so as to avoid concurrent "discoveries" of the instance's failure; this preceeds any recovery process, if any. + RecoveryPeriodBlockMinutes int // (supported for backwards compatibility but please use newer `RecoveryPeriodBlockSeconds` instead) The time for which an instance's recovery is kept "active", so as to avoid concurrent recoveries on smae instance as well as flapping + RecoveryPeriodBlockSeconds int // (overrides `RecoveryPeriodBlockMinutes`) The time for which an instance's recovery is kept "active", so as to avoid concurrent recoveries on smae instance as well as flapping + RecoveryIgnoreHostnameFilters []string // Recovery analysis will completely ignore hosts matching given patterns + RecoverPrimaryClusterFilters []string // Only do primary recovery on clusters matching these regexp patterns (of course the ".*" pattern matches everything) + RecoverIntermediatePrimaryClusterFilters []string // Only do IM recovery on clusters matching these regexp patterns (of course the ".*" pattern matches everything) + ProcessesShellCommand string // Shell that executes command scripts + OnFailureDetectionProcesses []string // Processes to execute when detecting a failover scenario (before making a decision whether to failover or not). May and should use some of these placeholders: {failureType}, {instanceType}, {isMaster}, {isCoMaster}, {failureDescription}, {command}, {failedHost}, {failureCluster}, {failureClusterAlias}, {failureClusterDomain}, {failedPort}, {successorHost}, {successorPort}, {successorAlias}, {countReplicas}, {replicaHosts}, {isDowntimed}, {autoMasterRecovery}, {autoIntermediateMasterRecovery} + PreGracefulTakeoverProcesses []string // Processes to execute before doing a failover (aborting operation should any once of them exits with non-zero code; order of execution undefined). May and should use some of these placeholders: {failureType}, {instanceType}, {isMaster}, {isCoMaster}, {failureDescription}, {command}, {failedHost}, {failureCluster}, {failureClusterAlias}, {failureClusterDomain}, {failedPort}, {successorHost}, {successorPort}, {countReplicas}, {replicaHosts}, {isDowntimed} + PreFailoverProcesses []string // Processes to execute before doing a failover (aborting operation should any once of them exits with non-zero code; order of execution undefined). May and should use some of these placeholders: {failureType}, {instanceType}, {isMaster}, {isCoMaster}, {failureDescription}, {command}, {failedHost}, {failureCluster}, {failureClusterAlias}, {failureClusterDomain}, {failedPort}, {countReplicas}, {replicaHosts}, {isDowntimed} + PostFailoverProcesses []string // Processes to execute after doing a failover (order of execution undefined). May and should use some of these placeholders: {failureType}, {instanceType}, {isMaster}, {isCoMaster}, {failureDescription}, {command}, {failedHost}, {failureCluster}, {failureClusterAlias}, {failureClusterDomain}, {failedPort}, {successorHost}, {successorPort}, {successorAlias}, {countReplicas}, {replicaHosts}, {isDowntimed}, {isSuccessful}, {lostReplicas}, {countLostReplicas} + PostUnsuccessfulFailoverProcesses []string // Processes to execute after a not-completely-successful failover (order of execution undefined). May and should use some of these placeholders: {failureType}, {instanceType}, {isMaster}, {isCoMaster}, {failureDescription}, {command}, {failedHost}, {failureCluster}, {failureClusterAlias}, {failureClusterDomain}, {failedPort}, {successorHost}, {successorPort}, {successorAlias}, {countReplicas}, {replicaHosts}, {isDowntimed}, {isSuccessful}, {lostReplicas}, {countLostReplicas} + PostPrimaryFailoverProcesses []string // Processes to execute after doing a primary failover (order of execution undefined). Uses same placeholders as PostFailoverProcesses + PostIntermediatePrimaryFailoverProcesses []string // Processes to execute after doing a primary failover (order of execution undefined). Uses same placeholders as PostFailoverProcesses + PostGracefulTakeoverProcesses []string // Processes to execute after running a graceful primary takeover. Uses same placeholders as PostFailoverProcesses + PostTakePrimaryProcesses []string // Processes to execute after a successful Take-Master event has taken place + CoPrimaryRecoveryMustPromoteOtherCoPrimary bool // When 'false', anything can get promoted (and candidates are prefered over others). When 'true', orchestrator will promote the other co-primary or else fail + DetachLostSlavesAfterPrimaryFailover bool // synonym to DetachLostReplicasAfterPrimaryFailover + DetachLostReplicasAfterPrimaryFailover bool // Should replicas that are not to be lost in primary recovery (i.e. were more up-to-date than promoted replica) be forcibly detached + ApplyMySQLPromotionAfterPrimaryFailover bool // Should orchestrator take upon itself to apply MySQL primary promotion: set read_only=0, detach replication, etc. + PreventCrossDataCenterPrimaryFailover bool // When true (default: false), cross-DC primary failover are not allowed, orchestrator will do all it can to only fail over within same DC, or else not fail over at all. + PreventCrossRegionPrimaryFailover bool // When true (default: false), cross-region primary failover are not allowed, orchestrator will do all it can to only fail over within same region, or else not fail over at all. + PrimaryFailoverLostInstancesDowntimeMinutes uint // Number of minutes to downtime any server that was lost after a primary failover (including failed primary & lost replicas). 0 to disable + PrimaryFailoverDetachSlavePrimaryHost bool // synonym to PrimaryFailoverDetachReplicaPrimaryHost + PrimaryFailoverDetachReplicaPrimaryHost bool // Should orchestrator issue a detach-replica-master-host on newly promoted primary (this makes sure the new primary will not attempt to replicate old primary if that comes back to life). Defaults 'false'. Meaningless if ApplyMySQLPromotionAfterPrimaryFailover is 'true'. + FailPrimaryPromotionOnLagMinutes uint // when > 0, fail a primary promotion if the candidate replica is lagging >= configured number of minutes. + FailPrimaryPromotionIfSQLThreadNotUpToDate bool // when true, and a primary failover takes place, if candidate primary has not consumed all relay logs, promotion is aborted with error + DelayPrimaryPromotionIfSQLThreadNotUpToDate bool // when true, and a primary failover takes place, if candidate primary has not consumed all relay logs, delay promotion until the sql thread has caught up + PostponeSlaveRecoveryOnLagMinutes uint // Synonym to PostponeReplicaRecoveryOnLagMinutes + PostponeReplicaRecoveryOnLagMinutes uint // On crash recovery, replicas that are lagging more than given minutes are only resurrected late in the recovery process, after primary/IM has been elected and processes executed. Value of 0 disables this feature + OSCIgnoreHostnameFilters []string // OSC replicas recommendation will ignore replica hostnames matching given patterns + URLPrefix string // URL prefix to run orchestrator on non-root web path, e.g. /orchestrator to put it behind nginx. + DiscoveryIgnoreReplicaHostnameFilters []string // Regexp filters to apply to prevent auto-discovering new replicas. Usage: unreachable servers due to firewalls, applications which trigger binlog dumps + DiscoveryIgnorePrimaryHostnameFilters []string // Regexp filters to apply to prevent auto-discovering a primary. Usage: pointing your primary temporarily to replicate seom data from external host + DiscoveryIgnoreHostnameFilters []string // Regexp filters to apply to prevent discovering instances of any kind + ConsulAddress string // Address where Consul HTTP api is found. Example: 127.0.0.1:8500 + ConsulScheme string // Scheme (http or https) for Consul + ConsulAclToken string // ACL token used to write to Consul KV + ConsulCrossDataCenterDistribution bool // should orchestrator automatically auto-deduce all consul DCs and write KVs in all DCs + ZkAddress string // UNSUPPERTED YET. Address where (single or multiple) ZooKeeper servers are found, in `srv1[:port1][,srv2[:port2]...]` format. Default port is 2181. Example: srv-a,srv-b:12181,srv-c + KVClusterPrimaryPrefix string // Prefix to use for clusters' primary's entries in KV stores (internal, consul, ZK), default: "mysql/master" + WebMessage string // If provided, will be shown on all web pages below the title bar + MaxConcurrentReplicaOperations int // Maximum number of concurrent operations on replicas + InstanceDBExecContextTimeoutSeconds int // Timeout on context used while calling ExecContext on instance database + LockShardTimeoutSeconds int // Timeout on context used to lock shard. Should be a small value because we should fail-fast } // ToJSONString will marshal this configuration as JSON @@ -254,152 +254,152 @@ var readFileNames []string func newConfiguration() *Configuration { return &Configuration{ - Debug: false, - EnableSyslog: false, - ListenAddress: ":3000", - ListenSocket: "", - HTTPAdvertise: "", - AgentsServerPort: ":3001", - Durability: "none", - DurabilityParams: make(map[string]string), - StatusEndpoint: DefaultStatusAPIEndpoint, - StatusOUVerify: false, - BackendDB: "sqlite", - SQLite3DataFile: "file::memory:?mode=memory&cache=shared", - SkipOrchestratorDatabaseUpdate: false, - PanicIfDifferentDatabaseDeploy: false, - RaftBind: "127.0.0.1:10008", - RaftAdvertise: "", - RaftDataDir: "", - DefaultRaftPort: 10008, - RaftNodes: []string{}, - ExpectFailureAnalysisConcensus: true, - MySQLOrchestratorMaxPoolConnections: 128, // limit concurrent conns to backend DB - MySQLOrchestratorPort: 3306, - MySQLTopologyUseMutualTLS: false, - MySQLTopologyUseMixedTLS: true, - MySQLOrchestratorUseMutualTLS: false, - MySQLConnectTimeoutSeconds: 2, - MySQLOrchestratorReadTimeoutSeconds: 30, - MySQLOrchestratorRejectReadOnly: false, - MySQLDiscoveryReadTimeoutSeconds: 10, - MySQLTopologyReadTimeoutSeconds: 600, - MySQLConnectionLifetimeSeconds: 0, - DefaultInstancePort: 3306, - TLSCacheTTLFactor: 100, - InstancePollSeconds: 5, - InstanceWriteBufferSize: 100, - BufferInstanceWrites: false, - InstanceFlushIntervalMilliseconds: 100, - UnseenInstanceForgetHours: 240, - SnapshotTopologiesIntervalHours: 0, - DiscoverByShowSlaveHosts: false, - UseSuperReadOnly: false, - DiscoveryMaxConcurrency: 300, - DiscoveryQueueCapacity: 100000, - DiscoveryQueueMaxStatisticsSize: 120, - DiscoveryCollectionRetentionSeconds: 120, - DiscoverySeeds: []string{}, - InstanceBulkOperationsWaitTimeoutSeconds: 10, - HostnameResolveMethod: "default", - MySQLHostnameResolveMethod: "none", - SkipBinlogServerUnresolveCheck: true, - ExpiryHostnameResolvesMinutes: 60, - RejectHostnameResolvePattern: "", - ReasonableReplicationLagSeconds: 10, - ProblemIgnoreHostnameFilters: []string{}, - VerifyReplicationFilters: false, - ReasonableMaintenanceReplicationLagSeconds: 20, - CandidateInstanceExpireMinutes: 60, - AuditLogFile: "", - AuditToSyslog: false, - AuditToBackendDB: false, - AuditPurgeDays: 7, - RemoveTextFromHostnameDisplay: "", - ReadOnly: false, - AuthenticationMethod: "", - HTTPAuthUser: "", - HTTPAuthPassword: "", - AuthUserHeader: "X-Forwarded-User", - PowerAuthUsers: []string{"*"}, - PowerAuthGroups: []string{}, - AccessTokenUseExpirySeconds: 60, - AccessTokenExpiryMinutes: 1440, - ClusterNameToAlias: make(map[string]string), - DetectClusterAliasQuery: "", - DetectClusterDomainQuery: "", - DetectInstanceAliasQuery: "", - DetectPromotionRuleQuery: "", - DataCenterPattern: "", - PhysicalEnvironmentPattern: "", - DetectDataCenterQuery: "", - DetectPhysicalEnvironmentQuery: "", - DetectSemiSyncEnforcedQuery: "", - SupportFuzzyPoolHostnames: true, - InstancePoolExpiryMinutes: 60, - PromotionIgnoreHostnameFilters: []string{}, - ServeAgentsHttp: false, - AgentsUseSSL: false, - AgentsUseMutualTLS: false, - AgentSSLValidOUs: []string{}, - AgentSSLSkipVerify: false, - AgentSSLPrivateKeyFile: "", - AgentSSLCertFile: "", - AgentSSLCAFile: "", - UseSSL: false, - UseMutualTLS: false, - SSLValidOUs: []string{}, - SSLSkipVerify: false, - SSLPrivateKeyFile: "", - SSLCertFile: "", - SSLCAFile: "", - AgentPollMinutes: 60, - UnseenAgentForgetHours: 6, - StaleSeedFailMinutes: 60, - SeedAcceptableBytesDiff: 8192, - SeedWaitSecondsBeforeSend: 2, - BinlogEventsChunkSize: 10000, - ReduceReplicationAnalysisCount: true, - FailureDetectionPeriodBlockMinutes: 60, - RecoveryPeriodBlockMinutes: 60, - RecoveryPeriodBlockSeconds: 3600, - RecoveryIgnoreHostnameFilters: []string{}, - RecoverMasterClusterFilters: []string{"*"}, - RecoverIntermediateMasterClusterFilters: []string{}, - ProcessesShellCommand: "bash", - OnFailureDetectionProcesses: []string{}, - PreGracefulTakeoverProcesses: []string{}, - PreFailoverProcesses: []string{}, - PostMasterFailoverProcesses: []string{}, - PostIntermediateMasterFailoverProcesses: []string{}, - PostFailoverProcesses: []string{}, - PostUnsuccessfulFailoverProcesses: []string{}, - PostGracefulTakeoverProcesses: []string{}, - PostTakeMasterProcesses: []string{}, - CoMasterRecoveryMustPromoteOtherCoMaster: true, - DetachLostSlavesAfterMasterFailover: true, - ApplyMySQLPromotionAfterMasterFailover: true, - PreventCrossDataCenterMasterFailover: false, - PreventCrossRegionMasterFailover: false, - MasterFailoverLostInstancesDowntimeMinutes: 0, - MasterFailoverDetachSlaveMasterHost: false, - FailMasterPromotionOnLagMinutes: 0, - FailMasterPromotionIfSQLThreadNotUpToDate: false, - DelayMasterPromotionIfSQLThreadNotUpToDate: true, - PostponeSlaveRecoveryOnLagMinutes: 0, - OSCIgnoreHostnameFilters: []string{}, - URLPrefix: "", - DiscoveryIgnoreReplicaHostnameFilters: []string{}, - ConsulAddress: "", - ConsulScheme: "http", - ConsulAclToken: "", - ConsulCrossDataCenterDistribution: false, - ZkAddress: "", - KVClusterMasterPrefix: "mysql/master", - WebMessage: "", - MaxConcurrentReplicaOperations: 5, - InstanceDBExecContextTimeoutSeconds: 30, - LockShardTimeoutSeconds: 1, + Debug: false, + EnableSyslog: false, + ListenAddress: ":3000", + ListenSocket: "", + HTTPAdvertise: "", + AgentsServerPort: ":3001", + Durability: "none", + DurabilityParams: make(map[string]string), + StatusEndpoint: DefaultStatusAPIEndpoint, + StatusOUVerify: false, + BackendDB: "sqlite", + SQLite3DataFile: "file::memory:?mode=memory&cache=shared", + SkipOrchestratorDatabaseUpdate: false, + PanicIfDifferentDatabaseDeploy: false, + RaftBind: "127.0.0.1:10008", + RaftAdvertise: "", + RaftDataDir: "", + DefaultRaftPort: 10008, + RaftNodes: []string{}, + ExpectFailureAnalysisConcensus: true, + MySQLOrchestratorMaxPoolConnections: 128, // limit concurrent conns to backend DB + MySQLOrchestratorPort: 3306, + MySQLTopologyUseMutualTLS: false, + MySQLTopologyUseMixedTLS: true, + MySQLOrchestratorUseMutualTLS: false, + MySQLConnectTimeoutSeconds: 2, + MySQLOrchestratorReadTimeoutSeconds: 30, + MySQLOrchestratorRejectReadOnly: false, + MySQLDiscoveryReadTimeoutSeconds: 10, + MySQLTopologyReadTimeoutSeconds: 600, + MySQLConnectionLifetimeSeconds: 0, + DefaultInstancePort: 3306, + TLSCacheTTLFactor: 100, + InstancePollSeconds: 5, + InstanceWriteBufferSize: 100, + BufferInstanceWrites: false, + InstanceFlushIntervalMilliseconds: 100, + UnseenInstanceForgetHours: 240, + SnapshotTopologiesIntervalHours: 0, + DiscoverByShowSlaveHosts: false, + UseSuperReadOnly: false, + DiscoveryMaxConcurrency: 300, + DiscoveryQueueCapacity: 100000, + DiscoveryQueueMaxStatisticsSize: 120, + DiscoveryCollectionRetentionSeconds: 120, + DiscoverySeeds: []string{}, + InstanceBulkOperationsWaitTimeoutSeconds: 10, + HostnameResolveMethod: "default", + MySQLHostnameResolveMethod: "none", + SkipBinlogServerUnresolveCheck: true, + ExpiryHostnameResolvesMinutes: 60, + RejectHostnameResolvePattern: "", + ReasonableReplicationLagSeconds: 10, + ProblemIgnoreHostnameFilters: []string{}, + VerifyReplicationFilters: false, + ReasonableMaintenanceReplicationLagSeconds: 20, + CandidateInstanceExpireMinutes: 60, + AuditLogFile: "", + AuditToSyslog: false, + AuditToBackendDB: false, + AuditPurgeDays: 7, + RemoveTextFromHostnameDisplay: "", + ReadOnly: false, + AuthenticationMethod: "", + HTTPAuthUser: "", + HTTPAuthPassword: "", + AuthUserHeader: "X-Forwarded-User", + PowerAuthUsers: []string{"*"}, + PowerAuthGroups: []string{}, + AccessTokenUseExpirySeconds: 60, + AccessTokenExpiryMinutes: 1440, + ClusterNameToAlias: make(map[string]string), + DetectClusterAliasQuery: "", + DetectClusterDomainQuery: "", + DetectInstanceAliasQuery: "", + DetectPromotionRuleQuery: "", + DataCenterPattern: "", + PhysicalEnvironmentPattern: "", + DetectDataCenterQuery: "", + DetectPhysicalEnvironmentQuery: "", + DetectSemiSyncEnforcedQuery: "", + SupportFuzzyPoolHostnames: true, + InstancePoolExpiryMinutes: 60, + PromotionIgnoreHostnameFilters: []string{}, + ServeAgentsHttp: false, + AgentsUseSSL: false, + AgentsUseMutualTLS: false, + AgentSSLValidOUs: []string{}, + AgentSSLSkipVerify: false, + AgentSSLPrivateKeyFile: "", + AgentSSLCertFile: "", + AgentSSLCAFile: "", + UseSSL: false, + UseMutualTLS: false, + SSLValidOUs: []string{}, + SSLSkipVerify: false, + SSLPrivateKeyFile: "", + SSLCertFile: "", + SSLCAFile: "", + AgentPollMinutes: 60, + UnseenAgentForgetHours: 6, + StaleSeedFailMinutes: 60, + SeedAcceptableBytesDiff: 8192, + SeedWaitSecondsBeforeSend: 2, + BinlogEventsChunkSize: 10000, + ReduceReplicationAnalysisCount: true, + FailureDetectionPeriodBlockMinutes: 60, + RecoveryPeriodBlockMinutes: 60, + RecoveryPeriodBlockSeconds: 3600, + RecoveryIgnoreHostnameFilters: []string{}, + RecoverPrimaryClusterFilters: []string{"*"}, + RecoverIntermediatePrimaryClusterFilters: []string{}, + ProcessesShellCommand: "bash", + OnFailureDetectionProcesses: []string{}, + PreGracefulTakeoverProcesses: []string{}, + PreFailoverProcesses: []string{}, + PostPrimaryFailoverProcesses: []string{}, + PostIntermediatePrimaryFailoverProcesses: []string{}, + PostFailoverProcesses: []string{}, + PostUnsuccessfulFailoverProcesses: []string{}, + PostGracefulTakeoverProcesses: []string{}, + PostTakePrimaryProcesses: []string{}, + CoPrimaryRecoveryMustPromoteOtherCoPrimary: true, + DetachLostSlavesAfterPrimaryFailover: true, + ApplyMySQLPromotionAfterPrimaryFailover: true, + PreventCrossDataCenterPrimaryFailover: false, + PreventCrossRegionPrimaryFailover: false, + PrimaryFailoverLostInstancesDowntimeMinutes: 0, + PrimaryFailoverDetachSlavePrimaryHost: false, + FailPrimaryPromotionOnLagMinutes: 0, + FailPrimaryPromotionIfSQLThreadNotUpToDate: false, + DelayPrimaryPromotionIfSQLThreadNotUpToDate: true, + PostponeSlaveRecoveryOnLagMinutes: 0, + OSCIgnoreHostnameFilters: []string{}, + URLPrefix: "", + DiscoveryIgnoreReplicaHostnameFilters: []string{}, + ConsulAddress: "", + ConsulScheme: "http", + ConsulAclToken: "", + ConsulCrossDataCenterDistribution: false, + ZkAddress: "", + KVClusterPrimaryPrefix: "mysql/master", + WebMessage: "", + MaxConcurrentReplicaOperations: 5, + InstanceDBExecContextTimeoutSeconds: 30, + LockShardTimeoutSeconds: 1, } } @@ -475,20 +475,20 @@ func (this *Configuration) postReadAdjustments() error { } { - if this.DetachLostSlavesAfterMasterFailover { - this.DetachLostReplicasAfterMasterFailover = true + if this.DetachLostSlavesAfterPrimaryFailover { + this.DetachLostReplicasAfterPrimaryFailover = true } } { - if this.MasterFailoverDetachSlaveMasterHost { - this.MasterFailoverDetachReplicaMasterHost = true + if this.PrimaryFailoverDetachSlavePrimaryHost { + this.PrimaryFailoverDetachReplicaPrimaryHost = true } } - if this.FailMasterPromotionIfSQLThreadNotUpToDate && this.DelayMasterPromotionIfSQLThreadNotUpToDate { + if this.FailPrimaryPromotionIfSQLThreadNotUpToDate && this.DelayPrimaryPromotionIfSQLThreadNotUpToDate { return fmt.Errorf("Cannot have both FailMasterPromotionIfSQLThreadNotUpToDate and DelayMasterPromotionIfSQLThreadNotUpToDate enabled") } - if this.FailMasterPromotionOnLagMinutes > 0 && this.ReplicationLagQuery == "" { + if this.FailPrimaryPromotionOnLagMinutes > 0 && this.ReplicationLagQuery == "" { return fmt.Errorf("nonzero FailMasterPromotionOnLagMinutes requires ReplicationLagQuery to be set") } { @@ -520,12 +520,12 @@ func (this *Configuration) postReadAdjustments() error { if this.RaftAdvertise == "" { this.RaftAdvertise = this.RaftBind } - if this.KVClusterMasterPrefix != "/" { + if this.KVClusterPrimaryPrefix != "/" { // "/" remains "/" // "prefix" turns to "prefix/" // "some/prefix///" turns to "some/prefix/" - this.KVClusterMasterPrefix = strings.TrimRight(this.KVClusterMasterPrefix, "/") - this.KVClusterMasterPrefix = fmt.Sprintf("%s/", this.KVClusterMasterPrefix) + this.KVClusterPrimaryPrefix = strings.TrimRight(this.KVClusterPrimaryPrefix, "/") + this.KVClusterPrimaryPrefix = fmt.Sprintf("%s/", this.KVClusterPrimaryPrefix) } if this.HTTPAdvertise != "" { u, err := url.Parse(this.HTTPAdvertise) diff --git a/go/vt/orchestrator/config/config_test.go b/go/vt/orchestrator/config/config_test.go index cb58864f12c..fa95aaa0889 100644 --- a/go/vt/orchestrator/config/config_test.go +++ b/go/vt/orchestrator/config/config_test.go @@ -65,54 +65,54 @@ func TestPostponeReplicaRecoveryOnLagMinutes(t *testing.T) { func TestMasterFailoverDetachReplicaMasterHost(t *testing.T) { { c := newConfiguration() - c.MasterFailoverDetachSlaveMasterHost = false - c.MasterFailoverDetachReplicaMasterHost = false + c.PrimaryFailoverDetachSlavePrimaryHost = false + c.PrimaryFailoverDetachReplicaPrimaryHost = false err := c.postReadAdjustments() test.S(t).ExpectNil(err) - test.S(t).ExpectFalse(c.MasterFailoverDetachReplicaMasterHost) + test.S(t).ExpectFalse(c.PrimaryFailoverDetachReplicaPrimaryHost) } { c := newConfiguration() - c.MasterFailoverDetachSlaveMasterHost = false - c.MasterFailoverDetachReplicaMasterHost = true + c.PrimaryFailoverDetachSlavePrimaryHost = false + c.PrimaryFailoverDetachReplicaPrimaryHost = true err := c.postReadAdjustments() test.S(t).ExpectNil(err) - test.S(t).ExpectTrue(c.MasterFailoverDetachReplicaMasterHost) + test.S(t).ExpectTrue(c.PrimaryFailoverDetachReplicaPrimaryHost) } { c := newConfiguration() - c.MasterFailoverDetachSlaveMasterHost = true - c.MasterFailoverDetachReplicaMasterHost = false + c.PrimaryFailoverDetachSlavePrimaryHost = true + c.PrimaryFailoverDetachReplicaPrimaryHost = false err := c.postReadAdjustments() test.S(t).ExpectNil(err) - test.S(t).ExpectTrue(c.MasterFailoverDetachReplicaMasterHost) + test.S(t).ExpectTrue(c.PrimaryFailoverDetachReplicaPrimaryHost) } } func TestMasterFailoverDetachDetachLostReplicasAfterMasterFailover(t *testing.T) { { c := newConfiguration() - c.DetachLostSlavesAfterMasterFailover = false - c.DetachLostReplicasAfterMasterFailover = false + c.DetachLostSlavesAfterPrimaryFailover = false + c.DetachLostReplicasAfterPrimaryFailover = false err := c.postReadAdjustments() test.S(t).ExpectNil(err) - test.S(t).ExpectFalse(c.DetachLostReplicasAfterMasterFailover) + test.S(t).ExpectFalse(c.DetachLostReplicasAfterPrimaryFailover) } { c := newConfiguration() - c.DetachLostSlavesAfterMasterFailover = false - c.DetachLostReplicasAfterMasterFailover = true + c.DetachLostSlavesAfterPrimaryFailover = false + c.DetachLostReplicasAfterPrimaryFailover = true err := c.postReadAdjustments() test.S(t).ExpectNil(err) - test.S(t).ExpectTrue(c.DetachLostReplicasAfterMasterFailover) + test.S(t).ExpectTrue(c.DetachLostReplicasAfterPrimaryFailover) } { c := newConfiguration() - c.DetachLostSlavesAfterMasterFailover = true - c.DetachLostReplicasAfterMasterFailover = false + c.DetachLostSlavesAfterPrimaryFailover = true + c.DetachLostReplicasAfterPrimaryFailover = false err := c.postReadAdjustments() test.S(t).ExpectNil(err) - test.S(t).ExpectTrue(c.DetachLostReplicasAfterMasterFailover) + test.S(t).ExpectTrue(c.DetachLostReplicasAfterPrimaryFailover) } } diff --git a/go/vt/orchestrator/http/api.go b/go/vt/orchestrator/http/api.go index 352e1362b1e..7a5a8748def 100644 --- a/go/vt/orchestrator/http/api.go +++ b/go/vt/orchestrator/http/api.go @@ -2434,8 +2434,8 @@ func (this *HttpAPI) RegisterCandidate(params martini.Params, r render.Render, r // AutomatedRecoveryFilters retuens list of clusters which are configured with automated recovery func (this *HttpAPI) AutomatedRecoveryFilters(params martini.Params, r render.Render, req *http.Request) { automatedRecoveryMap := make(map[string]interface{}) - automatedRecoveryMap["RecoverMasterClusterFilters"] = config.Config.RecoverMasterClusterFilters - automatedRecoveryMap["RecoverIntermediateMasterClusterFilters"] = config.Config.RecoverIntermediateMasterClusterFilters + automatedRecoveryMap["RecoverMasterClusterFilters"] = config.Config.RecoverPrimaryClusterFilters + automatedRecoveryMap["RecoverIntermediateMasterClusterFilters"] = config.Config.RecoverIntermediatePrimaryClusterFilters automatedRecoveryMap["RecoveryIgnoreHostnameFilters"] = config.Config.RecoveryIgnoreHostnameFilters Respond(r, &APIResponse{Code: OK, Message: "Automated recovery configuration details", Details: automatedRecoveryMap}) diff --git a/go/vt/orchestrator/inst/binlog_test.go b/go/vt/orchestrator/inst/binlog_test.go index 98eb241053c..4b8405957a7 100644 --- a/go/vt/orchestrator/inst/binlog_test.go +++ b/go/vt/orchestrator/inst/binlog_test.go @@ -12,7 +12,7 @@ var testCoordinates = BinlogCoordinates{LogFile: "mysql-bin.000010", LogPos: 108 func init() { config.Config.HostnameResolveMethod = "none" - config.Config.KVClusterMasterPrefix = "test/master/" + config.Config.KVClusterPrimaryPrefix = "test/master/" config.MarkConfigurationLoaded() log.SetLevel(log.ERROR) } diff --git a/go/vt/orchestrator/inst/cluster.go b/go/vt/orchestrator/inst/cluster.go index 5c7df1ddb20..957ea13be90 100644 --- a/go/vt/orchestrator/inst/cluster.go +++ b/go/vt/orchestrator/inst/cluster.go @@ -26,7 +26,7 @@ import ( ) func GetClusterPrimaryKVKey(clusterAlias string) string { - return fmt.Sprintf("%s%s", config.Config.KVClusterMasterPrefix, clusterAlias) + return fmt.Sprintf("%s%s", config.Config.KVClusterPrimaryPrefix, clusterAlias) } func getClusterPrimaryKVPair(clusterAlias string, primaryKey *InstanceKey) *kv.KVPair { @@ -90,8 +90,8 @@ type ClusterInfo struct { // ReadRecoveryInfo func (this *ClusterInfo) ReadRecoveryInfo() { - this.HasAutomatedPrimaryRecovery = this.filtersMatchCluster(config.Config.RecoverMasterClusterFilters) - this.HasAutomatedIntermediatePrimaryRecovery = this.filtersMatchCluster(config.Config.RecoverIntermediateMasterClusterFilters) + this.HasAutomatedPrimaryRecovery = this.filtersMatchCluster(config.Config.RecoverPrimaryClusterFilters) + this.HasAutomatedIntermediatePrimaryRecovery = this.filtersMatchCluster(config.Config.RecoverIntermediatePrimaryClusterFilters) } // filtersMatchCluster will see whether the given filters match the given cluster details diff --git a/go/vt/orchestrator/inst/cluster_test.go b/go/vt/orchestrator/inst/cluster_test.go index 2588e2897bf..5756e3313a4 100644 --- a/go/vt/orchestrator/inst/cluster_test.go +++ b/go/vt/orchestrator/inst/cluster_test.go @@ -32,7 +32,7 @@ var masterKey = InstanceKey{Hostname: "host1", Port: 3306} func init() { config.Config.HostnameResolveMethod = "none" - config.Config.KVClusterMasterPrefix = "test/master/" + config.Config.KVClusterPrimaryPrefix = "test/master/" config.MarkConfigurationLoaded() log.SetLevel(log.ERROR) } diff --git a/go/vt/orchestrator/inst/instance_dao.go b/go/vt/orchestrator/inst/instance_dao.go index 32546f32d4a..74a482b6907 100644 --- a/go/vt/orchestrator/inst/instance_dao.go +++ b/go/vt/orchestrator/inst/instance_dao.go @@ -1789,7 +1789,7 @@ func InjectUnseenPrimaries() error { for _, primaryKey := range unseenPrimaryKeys { primaryKey := primaryKey - if RegexpMatchPatterns(primaryKey.StringCode(), config.Config.DiscoveryIgnoreMasterHostnameFilters) { + if RegexpMatchPatterns(primaryKey.StringCode(), config.Config.DiscoveryIgnorePrimaryHostnameFilters) { log.Debugf("InjectUnseenPrimaries: skipping discovery of %+v because it matches DiscoveryIgnoreMasterHostnameFilters", primaryKey) continue } diff --git a/go/vt/orchestrator/inst/instance_topology.go b/go/vt/orchestrator/inst/instance_topology.go index fbf3dd4f613..451bea8fd03 100644 --- a/go/vt/orchestrator/inst/instance_topology.go +++ b/go/vt/orchestrator/inst/instance_topology.go @@ -1402,8 +1402,8 @@ func TakePrimaryHook(successor *Instance, demoted *Instance) { successorStr := fmt.Sprintf("%v", successorKey) demotedStr := fmt.Sprintf("%v", demotedKey) - processCount := len(config.Config.PostTakeMasterProcesses) - for i, command := range config.Config.PostTakeMasterProcesses { + processCount := len(config.Config.PostTakePrimaryProcesses) + for i, command := range config.Config.PostTakePrimaryProcesses { fullDescription := fmt.Sprintf("PostTakeMasterProcesses hook %d of %d", i+1, processCount) log.Debugf("Take-Master: PostTakeMasterProcesses: Calling %+s", fullDescription) start := time.Now() @@ -1496,7 +1496,7 @@ Cleanup: // This only runs if there is a hook configured in orchestrator.conf.json demoted := primaryInstance successor := instance - if config.Config.PostTakeMasterProcesses != nil { + if config.Config.PostTakePrimaryProcesses != nil { TakePrimaryHook(successor, demoted) } diff --git a/go/vt/orchestrator/logic/topology_recovery.go b/go/vt/orchestrator/logic/topology_recovery.go index fa161c72b24..389db99b0d7 100644 --- a/go/vt/orchestrator/logic/topology_recovery.go +++ b/go/vt/orchestrator/logic/topology_recovery.go @@ -544,7 +544,7 @@ func recoverDeadPrimary(topologyRecovery *TopologyRecovery, candidateInstanceKey AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("RecoverDeadMaster: - lost replica: %+v", replica.Key)) } - if promotedReplica != nil && len(lostReplicas) > 0 && config.Config.DetachLostReplicasAfterMasterFailover { + if promotedReplica != nil && len(lostReplicas) > 0 && config.Config.DetachLostReplicasAfterPrimaryFailover { postponedFunction := func() error { AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("RecoverDeadMaster: lost %+v replicas during recovery process; detaching them", len(lostReplicas))) for _, replica := range lostReplicas { @@ -590,12 +590,12 @@ func recoverDeadPrimary(topologyRecovery *TopologyRecovery, candidateInstanceKey } func PrimaryFailoverGeographicConstraintSatisfied(analysisEntry *inst.ReplicationAnalysis, suggestedInstance *inst.Instance) (satisfied bool, dissatisfiedReason string) { - if config.Config.PreventCrossDataCenterMasterFailover { + if config.Config.PreventCrossDataCenterPrimaryFailover { if suggestedInstance.DataCenter != analysisEntry.AnalyzedInstanceDataCenter { return false, fmt.Sprintf("PreventCrossDataCenterMasterFailover: will not promote server in %s when failed server in %s", suggestedInstance.DataCenter, analysisEntry.AnalyzedInstanceDataCenter) } } - if config.Config.PreventCrossRegionMasterFailover { + if config.Config.PreventCrossRegionPrimaryFailover { if suggestedInstance.Region != analysisEntry.AnalyzedInstanceRegion { return false, fmt.Sprintf("PreventCrossRegionMasterFailover: will not promote server in %s when failed server in %s", suggestedInstance.Region, analysisEntry.AnalyzedInstanceRegion) } @@ -867,15 +867,15 @@ func checkAndRecoverDeadPrimary(analysisEntry inst.ReplicationAnalysis, candidat if satisfied, reason := PrimaryFailoverGeographicConstraintSatisfied(&analysisEntry, promotedReplica); !satisfied { return nil, fmt.Errorf("RecoverDeadMaster: failed %+v promotion; %s", promotedReplica.Key, reason) } - if config.Config.FailMasterPromotionOnLagMinutes > 0 && - time.Duration(promotedReplica.ReplicationLagSeconds.Int64)*time.Second >= time.Duration(config.Config.FailMasterPromotionOnLagMinutes)*time.Minute { + if config.Config.FailPrimaryPromotionOnLagMinutes > 0 && + time.Duration(promotedReplica.ReplicationLagSeconds.Int64)*time.Second >= time.Duration(config.Config.FailPrimaryPromotionOnLagMinutes)*time.Minute { // candidate replica lags too much - return nil, fmt.Errorf("RecoverDeadMaster: failed promotion. FailMasterPromotionOnLagMinutes is set to %d (minutes) and promoted replica %+v 's lag is %d (seconds)", config.Config.FailMasterPromotionOnLagMinutes, promotedReplica.Key, promotedReplica.ReplicationLagSeconds.Int64) + return nil, fmt.Errorf("RecoverDeadMaster: failed promotion. FailMasterPromotionOnLagMinutes is set to %d (minutes) and promoted replica %+v 's lag is %d (seconds)", config.Config.FailPrimaryPromotionOnLagMinutes, promotedReplica.Key, promotedReplica.ReplicationLagSeconds.Int64) } - if config.Config.FailMasterPromotionIfSQLThreadNotUpToDate && !promotedReplica.SQLThreadUpToDate() { + if config.Config.FailPrimaryPromotionIfSQLThreadNotUpToDate && !promotedReplica.SQLThreadUpToDate() { return nil, fmt.Errorf("RecoverDeadMaster: failed promotion. FailMasterPromotionIfSQLThreadNotUpToDate is set and promoted replica %+v 's sql thread is not up to date (relay logs still unapplied). Aborting promotion", promotedReplica.Key) } - if config.Config.DelayMasterPromotionIfSQLThreadNotUpToDate && !promotedReplica.SQLThreadUpToDate() { + if config.Config.DelayPrimaryPromotionIfSQLThreadNotUpToDate && !promotedReplica.SQLThreadUpToDate() { AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("DelayMasterPromotionIfSQLThreadNotUpToDate: waiting for SQL thread on %+v", promotedReplica.Key)) if _, err := inst.WaitForSQLThreadUpToDate(&promotedReplica.Key, 0, 0); err != nil { return nil, fmt.Errorf("DelayMasterPromotionIfSQLThreadNotUpToDate error: %+v", err) @@ -937,7 +937,7 @@ func checkAndRecoverDeadPrimary(analysisEntry inst.ReplicationAnalysis, candidat err := kv.DistributePairs(kvPairs) log.Errore(err) } - if config.Config.MasterFailoverDetachReplicaMasterHost { + if config.Config.PrimaryFailoverDetachReplicaPrimaryHost { postponedFunction := func() error { AuditTopologyRecovery(topologyRecovery, "- RecoverDeadMaster: detaching master host on promoted master") inst.DetachReplicaPrimaryHost(&promotedReplica.Key) @@ -962,7 +962,7 @@ func checkAndRecoverDeadPrimary(analysisEntry inst.ReplicationAnalysis, candidat if !skipProcesses { // Execute post primary-failover processes - executeProcesses(config.Config.PostMasterFailoverProcesses, "PostMasterFailoverProcesses", topologyRecovery, false) + executeProcesses(config.Config.PostPrimaryFailoverProcesses, "PostMasterFailoverProcesses", topologyRecovery, false) } } else { recoverDeadPrimaryFailureCounter.Inc(1) @@ -1234,7 +1234,7 @@ func checkAndRecoverDeadIntermediatePrimary(analysisEntry inst.ReplicationAnalys // Execute post intermediate-master-failover processes topologyRecovery.SuccessorKey = &promotedReplica.Key topologyRecovery.SuccessorAlias = promotedReplica.InstanceAlias - executeProcesses(config.Config.PostIntermediateMasterFailoverProcesses, "PostIntermediateMasterFailoverProcesses", topologyRecovery, false) + executeProcesses(config.Config.PostIntermediatePrimaryFailoverProcesses, "PostIntermediateMasterFailoverProcesses", topologyRecovery, false) } } else { recoverDeadIntermediatePrimaryFailureCounter.Inc(1) @@ -1282,7 +1282,7 @@ func RecoverDeadCoPrimary(topologyRecovery *TopologyRecovery, skipProcesses bool topologyRecovery.AddError(err) lostReplicas = append(lostReplicas, cannotReplicateReplicas...) - mustPromoteOtherCoPrimary := config.Config.CoMasterRecoveryMustPromoteOtherCoMaster + mustPromoteOtherCoPrimary := config.Config.CoPrimaryRecoveryMustPromoteOtherCoPrimary if !otherCoPrimary.ReadOnly { AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("RecoverDeadCoPrimary: other co-master %+v is writeable hence has to be promoted", otherCoPrimary.Key)) mustPromoteOtherCoPrimary = true @@ -1307,7 +1307,7 @@ func RecoverDeadCoPrimary(topologyRecovery *TopologyRecovery, skipProcesses bool } } if promotedReplica != nil { - if config.Config.DelayMasterPromotionIfSQLThreadNotUpToDate { + if config.Config.DelayPrimaryPromotionIfSQLThreadNotUpToDate { AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("Waiting to ensure the SQL thread catches up on %+v", promotedReplica.Key)) if _, err := inst.WaitForSQLThreadUpToDate(&promotedReplica.Key, 0, 0); err != nil { return promotedReplica, lostReplicas, err @@ -1336,7 +1336,7 @@ func RecoverDeadCoPrimary(topologyRecovery *TopologyRecovery, skipProcesses bool topologyRecovery.AddError(log.Errore(err)) } - if promotedReplica != nil && len(lostReplicas) > 0 && config.Config.DetachLostReplicasAfterMasterFailover { + if promotedReplica != nil && len(lostReplicas) > 0 && config.Config.DetachLostReplicasAfterPrimaryFailover { postponedFunction := func() error { AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("- RecoverDeadCoPrimary: lost %+v replicas during recovery process; detaching them", len(lostReplicas))) for _, replica := range lostReplicas { @@ -1385,13 +1385,13 @@ func checkAndRecoverDeadCoPrimary(analysisEntry inst.ReplicationAnalysis, candid } topologyRecovery.LostReplicas.AddInstances(lostReplicas) if promotedReplica != nil { - if config.Config.FailMasterPromotionIfSQLThreadNotUpToDate && !promotedReplica.SQLThreadUpToDate() { + if config.Config.FailPrimaryPromotionIfSQLThreadNotUpToDate && !promotedReplica.SQLThreadUpToDate() { return false, nil, log.Errorf("Promoted replica %+v: sql thread is not up to date (relay logs still unapplied). Aborting promotion", promotedReplica.Key) } // success recoverDeadCoPrimarySuccessCounter.Inc(1) - if config.Config.ApplyMySQLPromotionAfterMasterFailover { + if config.Config.ApplyMySQLPromotionAfterPrimaryFailover { AuditTopologyRecovery(topologyRecovery, "- RecoverDeadMaster: will apply MySQL changes to promoted master") inst.SetReadOnly(&promotedReplica.Key, false) } @@ -1399,7 +1399,7 @@ func checkAndRecoverDeadCoPrimary(analysisEntry inst.ReplicationAnalysis, candid // Execute post intermediate-master-failover processes topologyRecovery.SuccessorKey = &promotedReplica.Key topologyRecovery.SuccessorAlias = promotedReplica.InstanceAlias - executeProcesses(config.Config.PostMasterFailoverProcesses, "PostMasterFailoverProcesses", topologyRecovery, false) + executeProcesses(config.Config.PostPrimaryFailoverProcesses, "PostMasterFailoverProcesses", topologyRecovery, false) } } else { recoverDeadCoPrimaryFailureCounter.Inc(1) From f9f96018d1a1440d668110c8122d7dc6f50842b5 Mon Sep 17 00:00:00 2001 From: Manan Gupta Date: Tue, 17 Aug 2021 15:56:10 +0530 Subject: [PATCH 2/6] rename master to primary in tests Signed-off-by: Manan Gupta --- go/vt/orchestrator/config/config_test.go | 4 ++-- go/vt/orchestrator/inst/cluster_test.go | 26 ++++++++++++------------ 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/go/vt/orchestrator/config/config_test.go b/go/vt/orchestrator/config/config_test.go index fa95aaa0889..432869ba036 100644 --- a/go/vt/orchestrator/config/config_test.go +++ b/go/vt/orchestrator/config/config_test.go @@ -62,7 +62,7 @@ func TestPostponeReplicaRecoveryOnLagMinutes(t *testing.T) { } } -func TestMasterFailoverDetachReplicaMasterHost(t *testing.T) { +func TestPrimaryFailoverDetachReplicaPrimaryHost(t *testing.T) { { c := newConfiguration() c.PrimaryFailoverDetachSlavePrimaryHost = false @@ -89,7 +89,7 @@ func TestMasterFailoverDetachReplicaMasterHost(t *testing.T) { } } -func TestMasterFailoverDetachDetachLostReplicasAfterMasterFailover(t *testing.T) { +func TestPrimaryFailoverDetachDetachLostReplicasAfterPrimaryFailover(t *testing.T) { { c := newConfiguration() c.DetachLostSlavesAfterPrimaryFailover = false diff --git a/go/vt/orchestrator/inst/cluster_test.go b/go/vt/orchestrator/inst/cluster_test.go index 5756e3313a4..851d2aafe84 100644 --- a/go/vt/orchestrator/inst/cluster_test.go +++ b/go/vt/orchestrator/inst/cluster_test.go @@ -28,7 +28,7 @@ import ( test "vitess.io/vitess/go/vt/orchestrator/external/golib/tests" ) -var masterKey = InstanceKey{Hostname: "host1", Port: 3306} +var primaryKey = InstanceKey{Hostname: "host1", Port: 3306} func init() { config.Config.HostnameResolveMethod = "none" @@ -37,20 +37,20 @@ func init() { log.SetLevel(log.ERROR) } -func TestGetClusterMasterKVKey(t *testing.T) { +func TestGetClusterPrimaryKVKey(t *testing.T) { kvKey := GetClusterPrimaryKVKey("foo") test.S(t).ExpectEquals(kvKey, "test/master/foo") } -func TestGetClusterMasterKVPair(t *testing.T) { +func TestGetClusterPrimaryKVPair(t *testing.T) { { - kvPair := getClusterPrimaryKVPair("myalias", &masterKey) + kvPair := getClusterPrimaryKVPair("myalias", &primaryKey) test.S(t).ExpectNotNil(kvPair) test.S(t).ExpectEquals(kvPair.Key, "test/master/myalias") - test.S(t).ExpectEquals(kvPair.Value, masterKey.StringCode()) + test.S(t).ExpectEquals(kvPair.Value, primaryKey.StringCode()) } { - kvPair := getClusterPrimaryKVPair("", &masterKey) + kvPair := getClusterPrimaryKVPair("", &primaryKey) test.S(t).ExpectTrue(kvPair == nil) } { @@ -59,28 +59,28 @@ func TestGetClusterMasterKVPair(t *testing.T) { } } -func TestGetClusterMasterKVPairs(t *testing.T) { - kvPairs := GetClusterPrimaryKVPairs("myalias", &masterKey) +func TestGetClusterPrimaryKVPairs(t *testing.T) { + kvPairs := GetClusterPrimaryKVPairs("myalias", &primaryKey) test.S(t).ExpectTrue(len(kvPairs) >= 2) { kvPair := kvPairs[0] test.S(t).ExpectEquals(kvPair.Key, "test/master/myalias") - test.S(t).ExpectEquals(kvPair.Value, masterKey.StringCode()) + test.S(t).ExpectEquals(kvPair.Value, primaryKey.StringCode()) } { kvPair := kvPairs[1] test.S(t).ExpectEquals(kvPair.Key, "test/master/myalias/hostname") - test.S(t).ExpectEquals(kvPair.Value, masterKey.Hostname) + test.S(t).ExpectEquals(kvPair.Value, primaryKey.Hostname) } { kvPair := kvPairs[2] test.S(t).ExpectEquals(kvPair.Key, "test/master/myalias/port") - test.S(t).ExpectEquals(kvPair.Value, fmt.Sprintf("%d", masterKey.Port)) + test.S(t).ExpectEquals(kvPair.Value, fmt.Sprintf("%d", primaryKey.Port)) } } -func TestGetClusterMasterKVPairs2(t *testing.T) { - kvPairs := GetClusterPrimaryKVPairs("", &masterKey) +func TestGetClusterPrimaryKVPairs2(t *testing.T) { + kvPairs := GetClusterPrimaryKVPairs("", &primaryKey) test.S(t).ExpectEquals(len(kvPairs), 0) } From 69fcb607afb94c82c808dc0bbe55f594caa5cd5c Mon Sep 17 00:00:00 2001 From: Manan Gupta Date: Tue, 17 Aug 2021 16:24:03 +0530 Subject: [PATCH 3/6] rename master to primary in api and cli Signed-off-by: Manan Gupta --- go/vt/orchestrator/app/cli.go | 60 +++-- go/vt/orchestrator/app/command_help.go | 212 +++++++++--------- go/vt/orchestrator/config/config.go | 10 +- go/vt/orchestrator/http/api.go | 86 ++++--- go/vt/orchestrator/inst/analysis.go | 6 +- go/vt/orchestrator/inst/instance_topology.go | 28 +-- go/vt/orchestrator/logic/topology_recovery.go | 2 +- 7 files changed, 198 insertions(+), 206 deletions(-) diff --git a/go/vt/orchestrator/app/cli.go b/go/vt/orchestrator/app/cli.go index 4d103cc97ed..e00e2360b94 100644 --- a/go/vt/orchestrator/app/cli.go +++ b/go/vt/orchestrator/app/cli.go @@ -60,20 +60,16 @@ var commandSynonyms = map[string]string{ "regroup-slaves": "regroup-replicas", "move-up-slaves": "move-up-replicas", "repoint-slaves": "repoint-replicas", - "enslave-siblings": "take-siblings", - "enslave-master": "take-master", "get-candidate-slave": "get-candidate-replica", "move-slaves-gtid": "move-replicas-gtid", "regroup-slaves-gtid": "regroup-replicas-gtid", "which-cluster-osc-slaves": "which-cluster-osc-replicas", "which-cluster-gh-ost-slaves": "which-cluster-gh-ost-replicas", "which-slaves": "which-replicas", - "detach-slave": "detach-replica-master-host", - "detach-replica": "detach-replica-master-host", - "detach-slave-master-host": "detach-replica-master-host", - "reattach-slave": "reattach-replica-master-host", - "reattach-replica": "reattach-replica-master-host", - "reattach-slave-master-host": "reattach-replica-master-host", + "detach-slave": "detach-replica-primary-host", + "detach-replica": "detach-replica-primary-host", + "reattach-slave": "reattach-replica-primary-host", + "reattach-replica": "reattach-replica-primary-host", } func registerCliCommand(command string, section string, description string) string { @@ -250,7 +246,7 @@ func Cli(command string, strict bool, instance string, destination string, owner } fmt.Println(instanceKey.DisplayString()) } - case registerCliCommand("regroup-replicas", "Smart relocation", `Given an instance, pick one of its replicas and make it local master of its siblings`): + case registerCliCommand("regroup-replicas", "Smart relocation", `Given an instance, pick one of its replicas and make it local primary of its siblings`): { instanceKey, _ = inst.FigureInstanceKey(instanceKey, thisInstanceKey) if instanceKey == nil { @@ -301,7 +297,7 @@ func Cli(command string, strict bool, instance string, destination string, owner } } } - case registerCliCommand("move-below", "Classic file:pos relocation", `Moves a replica beneath its sibling. Both replicas must be actively replicating from same master.`): + case registerCliCommand("move-below", "Classic file:pos relocation", `Moves a replica beneath its sibling. Both replicas must be actively replicating from same primary.`): { instanceKey, _ = inst.FigureInstanceKey(instanceKey, thisInstanceKey) if destinationKey == nil { @@ -338,7 +334,7 @@ func Cli(command string, strict bool, instance string, destination string, owner } } } - case registerCliCommand("take-master", "Classic file:pos relocation", `Turn an instance into a master of its own master; essentially switch the two.`): + case registerCliCommand("take-primary", "Classic file:pos relocation", `Turn an instance into a primary of its own primary; essentially switch the two.`): { instanceKey, _ = inst.FigureInstanceKey(instanceKey, thisInstanceKey) if instanceKey == nil { @@ -350,7 +346,7 @@ func Cli(command string, strict bool, instance string, destination string, owner } fmt.Println(instanceKey.DisplayString()) } - case registerCliCommand("make-co-master", "Classic file:pos relocation", `Create a master-master replication. Given instance is a replica which replicates directly from a master.`): + case registerCliCommand("make-co-primary", "Classic file:pos relocation", `Create a primary-primary replication. Given instance is a replica which replicates directly from a primary.`): { instanceKey, _ = inst.FigureInstanceKey(instanceKey, thisInstanceKey) _, err := inst.MakeCoPrimary(instanceKey) @@ -421,7 +417,7 @@ func Cli(command string, strict bool, instance string, destination string, owner } } } - case registerCliCommand("regroup-replicas-gtid", "GTID relocation", `Given an instance, pick one of its replica and make it local master of its siblings, using GTID.`): + case registerCliCommand("regroup-replicas-gtid", "GTID relocation", `Given an instance, pick one of its replica and make it local primary of its siblings, using GTID.`): { instanceKey, _ = inst.FigureInstanceKey(instanceKey, thisInstanceKey) if instanceKey == nil { @@ -472,7 +468,7 @@ func Cli(command string, strict bool, instance string, destination string, owner } fmt.Println(instance.GtidErrant) //nolint } - case registerCliCommand("gtid-errant-reset-master", "Replication, general", `Reset master on instance, remove GTID errant transactions`): + case registerCliCommand("gtid-errant-reset-primary", "Replication, general", `Reset primary on instance, remove GTID errant transactions`): { instanceKey, _ = inst.FigureInstanceKey(instanceKey, thisInstanceKey) _, err := inst.ErrantGTIDResetPrimary(instanceKey) @@ -526,7 +522,7 @@ func Cli(command string, strict bool, instance string, destination string, owner } fmt.Println(instanceKey.DisplayString()) } - case registerCliCommand("detach-replica-master-host", "Replication, general", `Stops replication and modifies Master_Host into an impossible, yet reversible, value.`): + case registerCliCommand("detach-replica-primary-host", "Replication, general", `Stops replication and modifies Master_Host into an impossible, yet reversible, value.`): { instanceKey, _ = inst.FigureInstanceKey(instanceKey, thisInstanceKey) if instanceKey == nil { @@ -538,7 +534,7 @@ func Cli(command string, strict bool, instance string, destination string, owner } fmt.Println(instanceKey.DisplayString()) } - case registerCliCommand("reattach-replica-master-host", "Replication, general", `Undo a detach-replica-master-host operation`): + case registerCliCommand("reattach-replica-primary-host", "Replication, general", `Undo a detach-replica-primary-host operation`): { instanceKey, _ = inst.FigureInstanceKey(instanceKey, thisInstanceKey) if instanceKey == nil { @@ -550,7 +546,7 @@ func Cli(command string, strict bool, instance string, destination string, owner } fmt.Println(instanceKey.DisplayString()) } - case registerCliCommand("master-pos-wait", "Replication, general", `Wait until replica reaches given replication coordinates (--binlog=file:pos)`): + case registerCliCommand("primary-pos-wait", "Replication, general", `Wait until replica reaches given replication coordinates (--binlog=file:pos)`): { instanceKey, _ = inst.FigureInstanceKey(instanceKey, thisInstanceKey) if instanceKey == nil { @@ -770,7 +766,7 @@ func Cli(command string, strict bool, instance string, destination string, owner fmt.Printf("%s\t%s\n", cluster.ClusterName, cluster.ClusterAlias) } } - case registerCliCommand("all-clusters-masters", "Information", `List of writeable masters, one per cluster`): + case registerCliCommand("all-clusters-primaries", "Information", `List of writeable primaries, one per cluster`): { instances, err := inst.ReadWriteableClustersPrimaries() if err != nil { @@ -823,7 +819,7 @@ func Cli(command string, strict bool, instance string, destination string, owner { instanceKey, _ = inst.FigureInstanceKey(instanceKey, thisInstanceKey) if instanceKey == nil { - log.Fatalf("Unable to get master: unresolved instance") + log.Fatalf("Unable to get primary: unresolved instance") } instance := validateInstanceIsFound(instanceKey) fmt.Println(instance.Key.DisplayString()) @@ -860,7 +856,7 @@ func Cli(command string, strict bool, instance string, destination string, owner } fmt.Println(instanceKey.DisplayString()) } - case registerCliCommand("which-cluster-master", "Information", `Output the name of the master in a given cluster`): + case registerCliCommand("which-cluster-primary", "Information", `Output the name of the primary in a given cluster`): { clusterName := getClusterName(clusterAlias, instanceKey) primaries, err := inst.ReadClusterPrimary(clusterName) @@ -905,11 +901,11 @@ func Cli(command string, strict bool, instance string, destination string, owner fmt.Println(clusterInstance.Key.DisplayString()) } } - case registerCliCommand("which-master", "Information", `Output the fully-qualified hostname:port representation of a given instance's master`): + case registerCliCommand("which-primary", "Information", `Output the fully-qualified hostname:port representation of a given instance's primary`): { instanceKey, _ = inst.FigureInstanceKey(instanceKey, thisInstanceKey) if instanceKey == nil { - log.Fatalf("Unable to get master: unresolved instance") + log.Fatalf("Unable to get primary: unresolved instance") } instance := validateInstanceIsFound(instanceKey) if instance.SourceKey.IsValid() { @@ -969,7 +965,7 @@ func Cli(command string, strict bool, instance string, destination string, owner } fmt.Println(lag) } - case registerCliCommand("submit-masters-to-kv-stores", "Key-value", `Submit master of a specific cluster, or all masters of all clusters to key-value stores`): + case registerCliCommand("submit-primaries-to-kv-stores", "Key-value", `Submit primary of a specific cluster, or all primaries of all clusters to key-value stores`): { clusterName := getClusterName(clusterAlias, instanceKey) log.Debugf("cluster name is <%s>", clusterName) @@ -1192,7 +1188,7 @@ func Cli(command string, strict bool, instance string, destination string, owner fmt.Println(promotedInstanceKey.DisplayString()) } } - case registerCliCommand("force-master-failover", "Recovery", `Forcibly discard master and initiate a failover, even if orchestrator doesn't see a problem. This command lets orchestrator choose the replacement master`): + case registerCliCommand("force-primary-failover", "Recovery", `Forcibly discard primary and initiate a failover, even if orchestrator doesn't see a problem. This command lets orchestrator choose the replacement primary`): { clusterName := getClusterName(clusterAlias, instanceKey) topologyRecovery, err := logic.ForcePrimaryFailover(clusterName) @@ -1201,11 +1197,11 @@ func Cli(command string, strict bool, instance string, destination string, owner } fmt.Println(topologyRecovery.SuccessorKey.DisplayString()) } - case registerCliCommand("force-master-takeover", "Recovery", `Forcibly discard master and promote another (direct child) instance instead, even if everything is running well`): + case registerCliCommand("force-primary-takeover", "Recovery", `Forcibly discard primary and promote another (direct child) instance instead, even if everything is running well`): { clusterName := getClusterName(clusterAlias, instanceKey) if destinationKey == nil { - log.Fatal("Cannot deduce destination, the instance to promote in place of the master. Please provide with -d") + log.Fatal("Cannot deduce destination, the instance to promote in place of the primary. Please provide with -d") } destination := validateInstanceIsFound(destinationKey) topologyRecovery, err := logic.ForcePrimaryTakeover(clusterName, destination) @@ -1214,7 +1210,7 @@ func Cli(command string, strict bool, instance string, destination string, owner } fmt.Println(topologyRecovery.SuccessorKey.DisplayString()) } - case registerCliCommand("graceful-master-takeover", "Recovery", `Gracefully promote a new master. Either indicate identity of new master via '-d designated.instance.com' or setup replication tree to have a single direct replica to the master.`): + case registerCliCommand("graceful-primary-takeover", "Recovery", `Gracefully promote a new primary. Either indicate identity of new primary via '-d designated.instance.com' or setup replication tree to have a single direct replica to the primary.`): { clusterName := getClusterName(clusterAlias, instanceKey) if destinationKey != nil { @@ -1226,9 +1222,9 @@ func Cli(command string, strict bool, instance string, destination string, owner } fmt.Println(topologyRecovery.SuccessorKey.DisplayString()) fmt.Println(*promotedPrimaryCoordinates) - log.Debugf("Promoted %+v as new master. Binlog coordinates at time of promotion: %+v", topologyRecovery.SuccessorKey, *promotedPrimaryCoordinates) + log.Debugf("Promoted %+v as new primary. Binlog coordinates at time of promotion: %+v", topologyRecovery.SuccessorKey, *promotedPrimaryCoordinates) } - case registerCliCommand("graceful-master-takeover-auto", "Recovery", `Gracefully promote a new master. orchestrator will attempt to pick the promoted replica automatically`): + case registerCliCommand("graceful-primary-takeover-auto", "Recovery", `Gracefully promote a new primary. orchestrator will attempt to pick the promoted replica automatically`): { clusterName := getClusterName(clusterAlias, instanceKey) // destinationKey doesn't _have_ to be specified: if unspecified, orchestrator will auto-deduce a replica. @@ -1242,7 +1238,7 @@ func Cli(command string, strict bool, instance string, destination string, owner } fmt.Println(topologyRecovery.SuccessorKey.DisplayString()) fmt.Println(*promotedPrimaryCoordinates) - log.Debugf("Promoted %+v as new master. Binlog coordinates at time of promotion: %+v", topologyRecovery.SuccessorKey, *promotedPrimaryCoordinates) + log.Debugf("Promoted %+v as new primary. Binlog coordinates at time of promotion: %+v", topologyRecovery.SuccessorKey, *promotedPrimaryCoordinates) } case registerCliCommand("replication-analysis", "Recovery", `Request an analysis of potential crash incidents in all known topologies`): { @@ -1291,7 +1287,7 @@ func Cli(command string, strict bool, instance string, destination string, owner fmt.Printf("%d recoveries acknowldged\n", countRecoveries) } // Instance meta - case registerCliCommand("register-candidate", "Instance, meta", `Indicate that a specific instance is a preferred candidate for master promotion`): + case registerCliCommand("register-candidate", "Instance, meta", `Indicate that a specific instance is a preferred candidate for primary promotion`): { instanceKey, _ = inst.FigureInstanceKey(instanceKey, thisInstanceKey) promotionRule, err := inst.ParseCandidatePromotionRule(*config.RuntimeCLIFlags.PromotionRule) @@ -1322,7 +1318,7 @@ func Cli(command string, strict bool, instance string, destination string, owner } fmt.Println(instanceKey.DisplayString()) } - case registerCliCommand("set-heuristic-domain-instance", "Instance, meta", `Associate domain name of given cluster with what seems to be the writer master for that cluster`): + case registerCliCommand("set-heuristic-domain-instance", "Instance, meta", `Associate domain name of given cluster with what seems to be the writer primary for that cluster`): { clusterName := getClusterName(clusterAlias, instanceKey) instanceKey, err := inst.HeuristicallyApplyClusterDomainInstanceAttribute(clusterName) diff --git a/go/vt/orchestrator/app/command_help.go b/go/vt/orchestrator/app/command_help.go index fcc9b7d639b..88ebdd65e80 100644 --- a/go/vt/orchestrator/app/command_help.go +++ b/go/vt/orchestrator/app/command_help.go @@ -35,12 +35,12 @@ func init() { Relocate a replica beneath another (destination) instance. The choice of destination is almost arbitrary; it must not be a child/descendant of the instance, but otherwise it can be anywhere, and can be a normal replica or a binlog server. Orchestrator will choose the best course of action to relocate the replica. - No action taken when destination instance cannot act as master (e.g. has no binary logs, is of incompatible version, incompatible binlog format etc.) + No action taken when destination instance cannot act as primary (e.g. has no binary logs, is of incompatible version, incompatible binlog format etc.) Examples: - orchestrator -c relocate -i replica.to.relocate.com -d instance.that.becomes.its.master + orchestrator -c relocate -i replica.to.relocate.com -d instance.that.becomes.its.primary - orchestrator -c relocate -d destination.instance.that.becomes.its.master + orchestrator -c relocate -d destination.instance.that.becomes.its.primary -i not given, implicitly assumed local hostname (this command was previously named "relocate-below") @@ -50,12 +50,12 @@ func init() { typically much faster than relocating replicas one by one. Orchestrator chooses the best course of action to relocation the replicas. It may choose a multi-step operations. Some replicas may succeed and some may fail the operation. - The instance (replicas' master) itself may be crashed or inaccessible. It is not contacted throughout the operation. + The instance (replicas' primary) itself may be crashed or inaccessible. It is not contacted throughout the operation. Examples: - orchestrator -c relocate-replicas -i instance.whose.replicas.will.relocate -d instance.that.becomes.their.master + orchestrator -c relocate-replicas -i instance.whose.replicas.will.relocate -d instance.that.becomes.their.primary - orchestrator -c relocate-replicas -i instance.whose.replicas.will.relocate -d instance.that.becomes.their.master --pattern=regexp.filter + orchestrator -c relocate-replicas -i instance.whose.replicas.will.relocate -d instance.that.becomes.their.primary --pattern=regexp.filter only apply to those instances that match given regex ` CommandHelp["move-up-replicas"] = ` @@ -69,8 +69,8 @@ func init() { only apply to those instances that match given regex ` CommandHelp["move-below"] = ` - Moves a replica beneath its sibling. Both replicas must be actively replicating from same master. - The sibling will become instance's master. No action taken when sibling cannot act as master + Moves a replica beneath its sibling. Both replicas must be actively replicating from same primary. + The sibling will become instance's primary. No action taken when sibling cannot act as primary (e.g. has no binary logs, is of incompatible version, incompatible binlog format etc.) Example: @@ -79,22 +79,22 @@ func init() { orchestrator -c move-below -d sibling.replica.under.which.to.move.com -i not given, implicitly assumed local hostname ` - CommandHelp["take-master"] = ` - Turn an instance into a master of its own master; essentially switch the two. Replicas of each of the two + CommandHelp["take-primary"] = ` + Turn an instance into a primary of its own primary; essentially switch the two. Replicas of each of the two involved instances are unaffected, and continue to replicate as they were. - The instance's master must itself be a replica. It does not necessarily have to be actively replicating. + The instance's primary must itself be a replica. It does not necessarily have to be actively replicating. - orchestrator -c take-master -i replica.that.will.switch.places.with.its.master.com + orchestrator -c take-primary -i replica.that.will.switch.places.with.its.primary.com ` CommandHelp["repoint"] = ` Make the given instance replicate from another instance without changing the binglog coordinates. There - are little sanity checks to this and this is a risky operation. Use cases are: a rename of the master's + are little sanity checks to this and this is a risky operation. Use cases are: a rename of the primary's host, a corruption in relay-logs, move from beneath Binlog-server. Examples: - orchestrator -c repoint -i replica.to.operate.on.com -d new.master.com + orchestrator -c repoint -i replica.to.operate.on.com -d new.primary.com orchestrator -c repoint -i replica.to.operate.on.com - The above will repoint the replica back to its existing master without change + The above will repoint the replica back to its existing primary without change orchestrator -c repoint -i not given, implicitly assumed local hostname @@ -108,22 +108,22 @@ func init() { orchestrator -c repoint-replicas -i not given, implicitly assumed local hostname ` - CommandHelp["make-co-master"] = ` - Create a master-master replication. Given instance is a replica which replicates directly from a master. - The master is then turned to be a replica of the instance. The master is expected to not be a replica. + CommandHelp["make-co-primary"] = ` + Create a primary-primary replication. Given instance is a replica which replicates directly from a primary. + The primary is then turned to be a replica of the instance. The primary is expected to not be a replica. The read_only property of the slve is unaffected by this operation. Examples: - orchestrator -c make-co-master -i replica.to.turn.into.co.master.com + orchestrator -c make-co-primary -i replica.to.turn.into.co.primary.com - orchestrator -c make-co-master + orchestrator -c make-co-primary -i not given, implicitly assumed local hostname ` CommandHelp["get-candidate-replica"] = ` Information command suggesting the most up-to-date replica of a given instance, which can be promoted - as local master to its siblings. If replication is up and running, this command merely gives an + as local primary to its siblings. If replication is up and running, this command merely gives an estimate, since replicas advance and progress continuously in different pace. If all replicas of given instance have broken replication (e.g. because given instance is dead), then this command provides - with a definitve candidate, which could act as a replace master. See also regroup-replicas. Example: + with a definitve candidate, which could act as a replace primary. See also regroup-replicas. Example: orchestrator -c get-candidate-replica -i instance.with.replicas.one.of.which.may.be.candidate.com ` @@ -139,35 +139,35 @@ func init() { ` CommandHelp["move-gtid"] = ` Move a replica beneath another (destination) instance. Orchestrator will reject the operation if GTID is - not enabled on the replica, or is not supported by the would-be master. + not enabled on the replica, or is not supported by the would-be primary. You may try and move the replica under any other instance; there are no constraints on the family ties the two may have, though you should be careful as not to try and replicate from a descendant (making an impossible loop). Examples: - orchestrator -c move-gtid -i replica.to.move.com -d instance.that.becomes.its.master + orchestrator -c move-gtid -i replica.to.move.com -d instance.that.becomes.its.primary - orchestrator -c match -d destination.instance.that.becomes.its.master + orchestrator -c match -d destination.instance.that.becomes.its.primary -i not given, implicitly assumed local hostname ` CommandHelp["move-replicas-gtid"] = ` Moves all replicas of a given instance under another (destination) instance using GTID. This is a (faster) shortcut to moving each replica via "move-gtid". Orchestrator will only move those replica configured with GTID (either Oracle or MariaDB variants) and under the - condition the would-be master supports GTID. + condition the would-be primary supports GTID. Examples: - orchestrator -c move-replicas-gtid -i instance.whose.replicas.will.relocate -d instance.that.becomes.their.master + orchestrator -c move-replicas-gtid -i instance.whose.replicas.will.relocate -d instance.that.becomes.their.primary - orchestrator -c move-replicas-gtid -i instance.whose.replicas.will.relocate -d instance.that.becomes.their.master --pattern=regexp.filter + orchestrator -c move-replicas-gtid -i instance.whose.replicas.will.relocate -d instance.that.becomes.their.primary --pattern=regexp.filter only apply to those instances that match given regex ` CommandHelp["regroup-replicas-gtid"] = ` Given an instance (possibly a crashed one; it is never being accessed), pick one of its replica and make it - local master of its siblings, using GTID. The rules are similar to those in the "regroup-replicas" command. + local primary of its siblings, using GTID. The rules are similar to those in the "regroup-replicas" command. Example: - orchestrator -c regroup-replicas-gtid -i instance.with.gtid.and.replicas.one.of.which.will.turn.local.master.if.possible + orchestrator -c regroup-replicas-gtid -i instance.with.gtid.and.replicas.one.of.which.will.turn.local.primary.if.possible --debug is your friend. ` @@ -177,12 +177,12 @@ func init() { and in fact (if you know what you're doing), they don't actually have to belong to the same topology. The operation expects the relocated instance to be "behind" the destination instance. It only finds out whether this is the case by the end; the operation is cancelled in the event this is not the case. - No action taken when destination instance cannot act as master (e.g. has no binary logs, is of incompatible version, incompatible binlog format etc.) + No action taken when destination instance cannot act as primary (e.g. has no binary logs, is of incompatible version, incompatible binlog format etc.) Examples: - orchestrator -c match -i replica.to.relocate.com -d instance.that.becomes.its.master + orchestrator -c match -i replica.to.relocate.com -d instance.that.becomes.its.primary - orchestrator -c match -d destination.instance.that.becomes.its.master + orchestrator -c match -d destination.instance.that.becomes.its.primary -i not given, implicitly assumed local hostname (this command was previously named "match-below") @@ -194,9 +194,9 @@ func init() { respective position behind the instance (the more replicas, the more savings). The instance itself may be crashed or inaccessible. It is not contacted throughout the operation. Examples: - orchestrator -c match-replicas -i instance.whose.replicas.will.relocate -d instance.that.becomes.their.master + orchestrator -c match-replicas -i instance.whose.replicas.will.relocate -d instance.that.becomes.their.primary - orchestrator -c match-replicas -i instance.whose.replicas.will.relocate -d instance.that.becomes.their.master --pattern=regexp.filter + orchestrator -c match-replicas -i instance.whose.replicas.will.relocate -d instance.that.becomes.their.primary --pattern=regexp.filter only apply to those instances that match given regex (this command was previously named "multi-match-replicas") @@ -213,13 +213,13 @@ func init() { orchestrator -c disable-gtid -i replica.replicating.via.gtid.com ` - CommandHelp["reset-master-gtid-remove-own-uuid"] = ` - Assuming GTID is enabled, Reset master on instance, remove GTID entries generated by the instance. + CommandHelp["reset-primary-gtid-remove-own-uuid"] = ` + Assuming GTID is enabled, Reset primary on instance, remove GTID entries generated by the instance. This operation is only allowed on Oracle-GTID enabled servers that have no replicas. Is is used for cleaning up the GTID mess incurred by mistakenly issuing queries on the replica (even such queries as "FLUSH ENGINE LOGS" that happen to write to binary logs). Example: - orchestrator -c reset-master-gtid-remove-own-uuid -i replica.running.with.gtid.com + orchestrator -c reset-primary-gtid-remove-own-uuid -i replica.running.with.gtid.com ` CommandHelp["stop-slave"] = ` Issues a STOP SLAVE; command. Example: @@ -264,19 +264,19 @@ func init() { Issuing this on an attached (i.e. normal) replica will do nothing. ` - CommandHelp["detach-replica-master-host"] = ` + CommandHelp["detach-replica-primary-host"] = ` Stops replication and modifies Master_Host into an impossible, yet reversible, value. - This effectively means the replication becomes broken. See reattach-replica-master-host. Example: + This effectively means the replication becomes broken. See reattach-replica-primary-host. Example: - orchestrator -c detach-replica-master-host -i replica.whose.replication.will.break.com + orchestrator -c detach-replica-primary-host -i replica.whose.replication.will.break.com Issuing this on an already detached replica will do nothing. ` - CommandHelp["reattach-replica-master-host"] = ` - Undo a detach-replica-master-host operation. Reverses the hostname change into the original value, and + CommandHelp["reattach-replica-primary-host"] = ` + Undo a detach-replica-primary-host operation. Reverses the hostname change into the original value, and resumes replication. Example: - orchestrator -c reattach-replica-master-host -i detahced.replica.whose.replication.will.amend.com + orchestrator -c reattach-replica-primary-host -i detahced.replica.whose.replication.will.amend.com Issuing this on an attached (i.e. normal) replica will do nothing. ` @@ -363,18 +363,18 @@ func init() { ` CommandHelp["clusters"] = ` List all clusters known to orchestrator. A cluster (aka topology, aka chain) is identified by its - master (or one of its master if more than one exists). Example: + primary (or one of its primary if more than one exists). Example: orchestrator -c clusters -i not given, implicitly assumed local hostname ` - CommandHelp["all-clusters-masters"] = ` - List of writeable masters, one per cluster. - For most single-master topologies, this is trivially the master. - For active-active master-master topologies, this ensures only one of - the masters is returned. Example: + CommandHelp["all-clusters-primaries"] = ` + List of writeable primaries, one per cluster. + For most single-primary topologies, this is trivially the primary. + For active-active primary-primary topologies, this ensures only one of + the primaries is returned. Example: - orchestrator -c all-clusters-masters + orchestrator -c all-clusters-primaries ` CommandHelp["topology"] = ` Show an ascii-graph of a replication topology, given a member of that topology. Example: @@ -446,26 +446,26 @@ func init() { Detects the domain name for given cluster, reads from key-value store the writer host associated with the domain name. orchestrator -c which-heuristic-domain-instance -i instance.of.some.cluster - Cluster is inferred by a member instance (the instance is not necessarily the master) + Cluster is inferred by a member instance (the instance is not necessarily the primary) ` - CommandHelp["which-cluster-master"] = ` - Output the name of the active master in a given cluster, indicated by instance or alias. - An "active" master is one that is writable and is not marked as downtimed due to a topology recovery. + CommandHelp["which-cluster-primary"] = ` + Output the name of the active primary in a given cluster, indicated by instance or alias. + An "active" primary is one that is writable and is not marked as downtimed due to a topology recovery. Examples: - orchestrator -c which-cluster-master -i instance.to.check.com + orchestrator -c which-cluster-primary -i instance.to.check.com - orchestrator -c which-cluster-master + orchestrator -c which-cluster-primary -i not given, implicitly assumed local hostname - orchestrator -c which-cluster-master -alias some_alias + orchestrator -c which-cluster-primary -alias some_alias assuming some_alias is a known cluster alias (see ClusterNameToAlias or DetectClusterAliasQuery configuration) ` CommandHelp["which-cluster-osc-replicas"] = ` Output a list of replicas in same cluster as given instance, that would server as good candidates as control replicas for a pt-online-schema-change operation. Those replicas would be used for replication delay so as to throtthe osc operation. Selected replicas will include, - where possible: intermediate masters, their replicas, 3rd level replicas, direct non-intermediate-master replicas. + where possible: intermediate primaries, their replicas, 3rd level replicas, direct non-intermediate-primary replicas. orchestrator -c which-cluster-osc-replicas -i instance.to.check.com @@ -484,17 +484,17 @@ func init() { orchestrator -c which-lost-in-recovery Lists all heuristically-recent known lost instances ` - CommandHelp["which-master"] = ` - Output the fully-qualified hostname:port representation of a given instance's master. Examples: + CommandHelp["which-primary"] = ` + Output the fully-qualified hostname:port representation of a given instance's primary. Examples: - orchestrator -c which-master -i a.known.replica.com + orchestrator -c which-primary -i a.known.replica.com - orchestrator -c which-master + orchestrator -c which-primary -i not given, implicitly assumed local hostname ` CommandHelp["which-replicas"] = ` Output the fully-qualified hostname:port list of replicas (one per line) of a given instance (or empty - list if instance is not a master to anyone). Examples: + list if instance is not a primary to anyone). Examples: orchestrator -c which-replicas -i a.known.instance.com @@ -526,7 +526,7 @@ func init() { ` CommandHelp["snapshot-topologies"] = ` Take a snapshot of existing topologies. This will record minimal replication topology data: the identity - of an instance, its master and its cluster. + of an instance, its primary and its cluster. Taking a snapshot later allows for reviewing changes in topologies. One might wish to invoke this command on a daily basis, and later be able to solve questions like 'where was this instacne replicating from before we moved it?', 'which instances were replication from this instance a week ago?' etc. Example: @@ -621,57 +621,57 @@ func init() { orchestrator -c recover-lite -i dead.instance.com --debug ` - CommandHelp["force-master-failover"] = ` - Forcibly begin a master failover process, even if orchestrator does not see anything wrong - in particular with the master. - - This will not work in a master-master configuration - - Orchestrator just treats this command as a DeadMaster failover scenario + CommandHelp["force-primary-failover"] = ` + Forcibly begin a primary failover process, even if orchestrator does not see anything wrong + in particular with the primary. + - This will not work in a primary-primary configuration + - Orchestrator just treats this command as a DeadPrimary failover scenario - Orchestrator will issue all relevant pre-failover and post-failover external processes. - - Orchestrator will not attempt to recover/reconnect the old master + - Orchestrator will not attempt to recover/reconnect the old primary ` - CommandHelp["force-master-takeover"] = ` - Forcibly discard master and promote another (direct child) instance instead, even if everything is running well. + CommandHelp["force-primary-takeover"] = ` + Forcibly discard primary and promote another (direct child) instance instead, even if everything is running well. This allows for planned switchover. NOTE: - You must specify the instance to promote via "-d" - - Promoted instance must be a direct child of the existing master - - This will not work in a master-master configuration - - Orchestrator just treats this command as a DeadMaster failover scenario + - Promoted instance must be a direct child of the existing primary + - This will not work in a primary-primary configuration + - Orchestrator just treats this command as a DeadPrimary failover scenario - It is STRONGLY suggested that you first relocate everything below your chosen instance-to-promote. It *is* a planned failover thing. - Otherwise orchestrator will do its thing in moving instances around, hopefully promoting your requested server on top. - Orchestrator will issue all relevant pre-failover and post-failover external processes. - - In this command orchestrator will not issue 'SET GLOBAL read_only=1' on the existing master, nor will - it issue a 'FLUSH TABLES WITH READ LOCK'. Please see the 'graceful-master-takeover' command. + - In this command orchestrator will not issue 'SET GLOBAL read_only=1' on the existing primary, nor will + it issue a 'FLUSH TABLES WITH READ LOCK'. Please see the 'graceful-primary-takeover' command. Examples: - orchestrator -c force-master-takeover -alias mycluster -d immediate.child.of.master.com - Indicate cluster by alias. Orchestrator automatically figures out the master + orchestrator -c force-primary-takeover -alias mycluster -d immediate.child.of.primary.com + Indicate cluster by alias. Orchestrator automatically figures out the primary - orchestrator -c force-master-takeover -i instance.in.relevant.cluster.com -d immediate.child.of.master.com - Indicate cluster by an instance. You don't structly need to specify the master, orchestrator - will infer the master's identify. + orchestrator -c force-primary-takeover -i instance.in.relevant.cluster.com -d immediate.child.of.primary.com + Indicate cluster by an instance. You don't structly need to specify the primary, orchestrator + will infer the primary's identify. ` - CommandHelp["graceful-master-takeover"] = ` - Gracefully discard master and promote another (direct child) instance instead, even if everything is running well. + CommandHelp["graceful-primary-takeover"] = ` + Gracefully discard primary and promote another (direct child) instance instead, even if everything is running well. This allows for planned switchover. NOTE: - - Promoted instance must be a direct child of the existing master - - Promoted instance must be the *only* direct child of the existing master. It *is* a planned failover thing. - - Orchestrator will first issue a "set global read_only=1" on existing master - - It will promote candidate master to the binlog positions of the existing master after issuing the above - - There _could_ still be statements issued and executed on the existing master by SUPER users, but those are ignored. - - Orchestrator then proceeds to handle a DeadMaster failover scenario + - Promoted instance must be a direct child of the existing primary + - Promoted instance must be the *only* direct child of the existing primary. It *is* a planned failover thing. + - Orchestrator will first issue a "set global read_only=1" on existing primary + - It will promote candidate primary to the binlog positions of the existing primary after issuing the above + - There _could_ still be statements issued and executed on the existing primary by SUPER users, but those are ignored. + - Orchestrator then proceeds to handle a DeadPrimary failover scenario - Orchestrator will issue all relevant pre-failover and post-failover external processes. Examples: - orchestrator -c graceful-master-takeover -alias mycluster - Indicate cluster by alias. Orchestrator automatically figures out the master and verifies it has a single direct replica + orchestrator -c graceful-primary-takeover -alias mycluster + Indicate cluster by alias. Orchestrator automatically figures out the primary and verifies it has a single direct replica - orchestrator -c force-master-takeover -i instance.in.relevant.cluster.com - Indicate cluster by an instance. You don't structly need to specify the master, orchestrator - will infer the master's identify. + orchestrator -c force-primary-takeover -i instance.in.relevant.cluster.com + Indicate cluster by an instance. You don't structly need to specify the primary, orchestrator + will infer the primary's identify. ` CommandHelp["replication-analysis"] = ` Request an analysis of potential crash incidents in all known topologies. @@ -704,11 +704,11 @@ func init() { ` CommandHelp["register-candidate"] = ` - Indicate that a specific instance is a preferred candidate for master promotion. Upon a dead master + Indicate that a specific instance is a preferred candidate for primary promotion. Upon a dead primary recovery, orchestrator will do its best to promote instances that are marked as candidates. However orchestrator cannot guarantee this will always work. Issues like version compatabilities, binlog format etc. are limiting factors. - You will want to mark an instance as a candidate when: it is replicating directly from the master, has + You will want to mark an instance as a candidate when: it is replicating directly from the primary, has binary logs and log_slave_updates is enabled, uses same binlog_format as its siblings, compatible version as its siblings. If you're using DataCenterPattern & PhysicalEnvironmentPattern (see configuration), you would further wish to make sure you have a candidate in each data center. @@ -727,10 +727,10 @@ func init() { CommandHelp["register-hostname-unresolve"] = ` Assigns the given instance a virtual (aka "unresolved") name. When moving replicas under an instance with assigned "unresolve" name, orchestrator issues a CHANGE MASTER TO MASTER_HOST='' ... - This is useful in cases where your master is behind virtual IP (e.g. active/passive masters with shared storage or DRBD, + This is useful in cases where your primary is behind virtual IP (e.g. active/passive primaries with shared storage or DRBD, e.g. binlog servers sharing common VIP). A "repoint" command is useful after "register-hostname-unresolve": you can repoint replicas of the instance to their exact - same location, and orchestrator will swap the fqdn of their master with the unresolved name. + same location, and orchestrator will swap the fqdn of their primary with the unresolved name. Such registration must be periodic. Orchestrator automatically expires such registration after ExpiryHostnameResolvesMinutes. Example: @@ -738,25 +738,25 @@ func init() { ` CommandHelp["deregister-hostname-unresolve"] = ` Explicitly deregister/dosassociate a hostname with an "unresolved" name. Orchestrator merely remvoes the association, but does - not touch any replica at this point. A "repoint" command can be useful right after calling this command to change replica's master host - name (assumed to be an "unresolved" name, such as a VIP) with the real fqdn of the master host. + not touch any replica at this point. A "repoint" command can be useful right after calling this command to change replica's primary host + name (assumed to be an "unresolved" name, such as a VIP) with the real fqdn of the primary host. Example: orchestrator -c deregister-hostname-unresolve -i instance.fqdn.com ` CommandHelp["set-heuristic-domain-instance"] = ` This is a temporary (sync your watches, watch for next ice age) command which registers the cluster domain name of a given cluster - with the master/writer host for that cluster. It is a one-time-master-discovery operation. + with the primary/writer host for that cluster. It is a one-time-primary-discovery operation. At this time orchestrator may also act as a small & simple key-value store (recall the "temporary" indication). - Master failover operations will overwrite the domain instance identity. Orchestrator so turns into a mini master-discovery + Primary failover operations will overwrite the domain instance identity. Orchestrator so turns into a mini primary-discovery service (I said "TEMPORARY"). Really there are other tools for the job. See also: which-heuristic-domain-instance Example: orchestrator -c set-heuristic-domain-instance --alias some_alias - Detects the domain name for given cluster, identifies the writer master of the cluster, associates the two in key-value store + Detects the domain name for given cluster, identifies the writer primary of the cluster, associates the two in key-value store orchestrator -c set-heuristic-domain-instance -i instance.of.some.cluster - Cluster is inferred by a member instance (the instance is not necessarily the master) + Cluster is inferred by a member instance (the instance is not necessarily the primary) ` CommandHelp["continuous"] = ` diff --git a/go/vt/orchestrator/config/config.go b/go/vt/orchestrator/config/config.go index 89f84b59adb..2737f4398fe 100644 --- a/go/vt/orchestrator/config/config.go +++ b/go/vt/orchestrator/config/config.go @@ -219,7 +219,7 @@ type Configuration struct { PreventCrossRegionPrimaryFailover bool // When true (default: false), cross-region primary failover are not allowed, orchestrator will do all it can to only fail over within same region, or else not fail over at all. PrimaryFailoverLostInstancesDowntimeMinutes uint // Number of minutes to downtime any server that was lost after a primary failover (including failed primary & lost replicas). 0 to disable PrimaryFailoverDetachSlavePrimaryHost bool // synonym to PrimaryFailoverDetachReplicaPrimaryHost - PrimaryFailoverDetachReplicaPrimaryHost bool // Should orchestrator issue a detach-replica-master-host on newly promoted primary (this makes sure the new primary will not attempt to replicate old primary if that comes back to life). Defaults 'false'. Meaningless if ApplyMySQLPromotionAfterPrimaryFailover is 'true'. + PrimaryFailoverDetachReplicaPrimaryHost bool // Should orchestrator issue a detach-replica-primary-host on newly promoted primary (this makes sure the new primary will not attempt to replicate old primary if that comes back to life). Defaults 'false'. Meaningless if ApplyMySQLPromotionAfterPrimaryFailover is 'true'. FailPrimaryPromotionOnLagMinutes uint // when > 0, fail a primary promotion if the candidate replica is lagging >= configured number of minutes. FailPrimaryPromotionIfSQLThreadNotUpToDate bool // when true, and a primary failover takes place, if candidate primary has not consumed all relay logs, promotion is aborted with error DelayPrimaryPromotionIfSQLThreadNotUpToDate bool // when true, and a primary failover takes place, if candidate primary has not consumed all relay logs, delay promotion until the sql thread has caught up @@ -235,7 +235,7 @@ type Configuration struct { ConsulAclToken string // ACL token used to write to Consul KV ConsulCrossDataCenterDistribution bool // should orchestrator automatically auto-deduce all consul DCs and write KVs in all DCs ZkAddress string // UNSUPPERTED YET. Address where (single or multiple) ZooKeeper servers are found, in `srv1[:port1][,srv2[:port2]...]` format. Default port is 2181. Example: srv-a,srv-b:12181,srv-c - KVClusterPrimaryPrefix string // Prefix to use for clusters' primary's entries in KV stores (internal, consul, ZK), default: "mysql/master" + KVClusterPrimaryPrefix string // Prefix to use for clusters' primary's entries in KV stores (internal, consul, ZK), default: "mysql/primary" WebMessage string // If provided, will be shown on all web pages below the title bar MaxConcurrentReplicaOperations int // Maximum number of concurrent operations on replicas InstanceDBExecContextTimeoutSeconds int // Timeout on context used while calling ExecContext on instance database @@ -395,7 +395,7 @@ func newConfiguration() *Configuration { ConsulAclToken: "", ConsulCrossDataCenterDistribution: false, ZkAddress: "", - KVClusterPrimaryPrefix: "mysql/master", + KVClusterPrimaryPrefix: "mysql/primary", WebMessage: "", MaxConcurrentReplicaOperations: 5, InstanceDBExecContextTimeoutSeconds: 30, @@ -486,10 +486,10 @@ func (this *Configuration) postReadAdjustments() error { } } if this.FailPrimaryPromotionIfSQLThreadNotUpToDate && this.DelayPrimaryPromotionIfSQLThreadNotUpToDate { - return fmt.Errorf("Cannot have both FailMasterPromotionIfSQLThreadNotUpToDate and DelayMasterPromotionIfSQLThreadNotUpToDate enabled") + return fmt.Errorf("Cannot have both FailPrimaryPromotionIfSQLThreadNotUpToDate and DelayPrimaryPromotionIfSQLThreadNotUpToDate enabled") } if this.FailPrimaryPromotionOnLagMinutes > 0 && this.ReplicationLagQuery == "" { - return fmt.Errorf("nonzero FailMasterPromotionOnLagMinutes requires ReplicationLagQuery to be set") + return fmt.Errorf("nonzero FailPrimaryPromotionOnLagMinutes requires ReplicationLagQuery to be set") } { if this.PostponeReplicaRecoveryOnLagMinutes != 0 && this.PostponeSlaveRecoveryOnLagMinutes != 0 && diff --git a/go/vt/orchestrator/http/api.go b/go/vt/orchestrator/http/api.go index 7a5a8748def..f43ce6c299d 100644 --- a/go/vt/orchestrator/http/api.go +++ b/go/vt/orchestrator/http/api.go @@ -51,26 +51,22 @@ const ( ) var apiSynonyms = map[string]string{ - "relocate-slaves": "relocate-replicas", - "regroup-slaves": "regroup-replicas", - "move-up-slaves": "move-up-replicas", - "repoint-slaves": "repoint-replicas", - "enslave-siblings": "take-siblings", - "enslave-master": "take-master", - "regroup-slaves-bls": "regroup-replicas-bls", - "move-slaves-gtid": "move-replicas-gtid", - "regroup-slaves-gtid": "regroup-replicas-gtid", - "detach-slave": "detach-replica", - "reattach-slave": "reattach-replica", - "detach-slave-master-host": "detach-replica-master-host", - "reattach-slave-master-host": "reattach-replica-master-host", - "cluster-osc-slaves": "cluster-osc-replicas", - "start-slave": "start-replica", - "restart-slave": "restart-replica", - "stop-slave": "stop-replica", - "stop-slave-nice": "stop-replica-nice", - "reset-slave": "reset-replica", - "restart-slave-statements": "restart-replica-statements", + "relocate-slaves": "relocate-replicas", + "regroup-slaves": "regroup-replicas", + "move-up-slaves": "move-up-replicas", + "repoint-slaves": "repoint-replicas", + "regroup-slaves-bls": "regroup-replicas-bls", + "move-slaves-gtid": "move-replicas-gtid", + "regroup-slaves-gtid": "regroup-replicas-gtid", + "detach-slave": "detach-replica", + "reattach-slave": "reattach-replica", + "cluster-osc-slaves": "cluster-osc-replicas", + "start-slave": "start-replica", + "restart-slave": "restart-replica", + "stop-slave": "stop-replica", + "stop-slave-nice": "stop-replica-nice", + "reset-slave": "reset-replica", + "restart-slave-statements": "restart-replica-statements", } var registeredPaths = []string{} @@ -642,7 +638,7 @@ func (this *HttpAPI) DetachReplicaPrimaryHost(params martini.Params, r render.Re Respond(r, &APIResponse{Code: OK, Message: fmt.Sprintf("Replica detached: %+v", instance.Key), Details: instance}) } -// ReattachReplicaPrimaryHost reverts a detachReplicaMasterHost command +// ReattachReplicaPrimaryHost reverts a detachReplicaPrimaryHost command // by resetting the original primary hostname in CHANGE MASTER TO func (this *HttpAPI) ReattachReplicaPrimaryHost(params martini.Params, r render.Render, req *http.Request, user auth.User) { if !isAuthorizedForAction(req, user) { @@ -2323,10 +2319,10 @@ func (this *HttpAPI) gracefulPrimaryTakeover(params martini.Params, r render.Ren return } if topologyRecovery == nil || topologyRecovery.SuccessorKey == nil { - Respond(r, &APIResponse{Code: ERROR, Message: "graceful-master-takeover: no successor promoted", Details: topologyRecovery}) + Respond(r, &APIResponse{Code: ERROR, Message: "graceful-primary-takeover: no successor promoted", Details: topologyRecovery}) return } - Respond(r, &APIResponse{Code: OK, Message: "graceful-master-takeover: successor promoted", Details: topologyRecovery}) + Respond(r, &APIResponse{Code: OK, Message: "graceful-primary-takeover: successor promoted", Details: topologyRecovery}) } // GracefulPrimaryTakeover gracefully fails over a primary, either: @@ -2839,9 +2835,9 @@ func (this *HttpAPI) RegisterRequests(m *martini.ClassicMartini) { this.registerAPIRequest(m, "move-below/:host/:port/:siblingHost/:siblingPort", this.MoveBelow) this.registerAPIRequest(m, "repoint/:host/:port/:belowHost/:belowPort", this.Repoint) this.registerAPIRequest(m, "repoint-slaves/:host/:port", this.RepointReplicas) - this.registerAPIRequest(m, "make-co-master/:host/:port", this.MakeCoPrimary) - this.registerAPIRequest(m, "enslave-siblings/:host/:port", this.TakeSiblings) - this.registerAPIRequest(m, "enslave-master/:host/:port", this.TakePrimary) + this.registerAPIRequest(m, "make-co-primary/:host/:port", this.MakeCoPrimary) + this.registerAPIRequest(m, "take-siblings/:host/:port", this.TakeSiblings) + this.registerAPIRequest(m, "take-primary/:host/:port", this.TakePrimary) // Binlog server relocation: this.registerAPIRequest(m, "regroup-slaves-bls/:host/:port", this.RegroupReplicasBinlogServers) @@ -2855,7 +2851,7 @@ func (this *HttpAPI) RegisterRequests(m *martini.ClassicMartini) { this.registerAPIRequest(m, "enable-gtid/:host/:port", this.EnableGTID) this.registerAPIRequest(m, "disable-gtid/:host/:port", this.DisableGTID) this.registerAPIRequest(m, "locate-gtid-errant/:host/:port", this.LocateErrantGTID) - this.registerAPIRequest(m, "gtid-errant-reset-master/:host/:port", this.ErrantGTIDResetPrimary) + this.registerAPIRequest(m, "gtid-errant-reset-primary/:host/:port", this.ErrantGTIDResetPrimary) this.registerAPIRequest(m, "gtid-errant-inject-empty/:host/:port", this.ErrantGTIDInjectEmpty) this.registerAPIRequest(m, "skip-query/:host/:port", this.SkipQuery) this.registerAPIRequest(m, "start-slave/:host/:port", this.StartReplication) @@ -2865,8 +2861,8 @@ func (this *HttpAPI) RegisterRequests(m *martini.ClassicMartini) { this.registerAPIRequest(m, "reset-slave/:host/:port", this.ResetReplication) this.registerAPIRequest(m, "detach-slave/:host/:port", this.DetachReplicaPrimaryHost) this.registerAPIRequest(m, "reattach-slave/:host/:port", this.ReattachReplicaPrimaryHost) - this.registerAPIRequest(m, "detach-slave-master-host/:host/:port", this.DetachReplicaPrimaryHost) - this.registerAPIRequest(m, "reattach-slave-master-host/:host/:port", this.ReattachReplicaPrimaryHost) + this.registerAPIRequest(m, "detach-replica-primary-host/:host/:port", this.DetachReplicaPrimaryHost) + this.registerAPIRequest(m, "reattach-replica-primary-host/:host/:port", this.ReattachReplicaPrimaryHost) this.registerAPIRequest(m, "flush-binary-logs/:host/:port", this.FlushBinaryLogs) this.registerAPIRequest(m, "purge-binary-logs/:host/:port/:logFile", this.PurgeBinaryLogs) this.registerAPIRequest(m, "restart-slave-statements/:host/:port", this.RestartReplicationStatements) @@ -2904,8 +2900,8 @@ func (this *HttpAPI) RegisterRequests(m *martini.ClassicMartini) { this.registerAPIRequest(m, "clusters", this.Clusters) this.registerAPIRequest(m, "clusters-info", this.ClustersInfo) - this.registerAPIRequest(m, "masters", this.Primaries) - this.registerAPIRequest(m, "master/:clusterHint", this.ClusterPrimary) + this.registerAPIRequest(m, "primaries", this.Primaries) + this.registerAPIRequest(m, "primary/:clusterHint", this.ClusterPrimary) this.registerAPIRequest(m, "instance-replicas/:host/:port", this.InstanceReplicas) this.registerAPIRequest(m, "all-instances", this.AllInstances) this.registerAPIRequest(m, "downtimed", this.Downtimed) @@ -2919,8 +2915,8 @@ func (this *HttpAPI) RegisterRequests(m *martini.ClassicMartini) { this.registerAPIRequest(m, "snapshot-topologies", this.SnapshotTopologies) // Key-value: - this.registerAPIRequest(m, "submit-masters-to-kv-stores", this.SubmitPrimariesToKvStores) - this.registerAPIRequest(m, "submit-masters-to-kv-stores/:clusterHint", this.SubmitPrimariesToKvStores) + this.registerAPIRequest(m, "submit-primaries-to-kv-stores", this.SubmitPrimariesToKvStores) + this.registerAPIRequest(m, "submit-primaries-to-kv-stores/:clusterHint", this.SubmitPrimariesToKvStores) // Tags: this.registerAPIRequest(m, "tagged", this.Tagged) @@ -2958,18 +2954,18 @@ func (this *HttpAPI) RegisterRequests(m *martini.ClassicMartini) { this.registerAPIRequest(m, "recover/:host/:port/:candidateHost/:candidatePort", this.Recover) this.registerAPIRequest(m, "recover-lite/:host/:port", this.RecoverLite) this.registerAPIRequest(m, "recover-lite/:host/:port/:candidateHost/:candidatePort", this.RecoverLite) - this.registerAPIRequest(m, "graceful-master-takeover/:host/:port", this.GracefulPrimaryTakeover) - this.registerAPIRequest(m, "graceful-master-takeover/:host/:port/:designatedHost/:designatedPort", this.GracefulPrimaryTakeover) - this.registerAPIRequest(m, "graceful-master-takeover/:clusterHint", this.GracefulPrimaryTakeover) - this.registerAPIRequest(m, "graceful-master-takeover/:clusterHint/:designatedHost/:designatedPort", this.GracefulPrimaryTakeover) - this.registerAPIRequest(m, "graceful-master-takeover-auto/:host/:port", this.GracefulPrimaryTakeoverAuto) - this.registerAPIRequest(m, "graceful-master-takeover-auto/:host/:port/:designatedHost/:designatedPort", this.GracefulPrimaryTakeoverAuto) - this.registerAPIRequest(m, "graceful-master-takeover-auto/:clusterHint", this.GracefulPrimaryTakeoverAuto) - this.registerAPIRequest(m, "graceful-master-takeover-auto/:clusterHint/:designatedHost/:designatedPort", this.GracefulPrimaryTakeoverAuto) - this.registerAPIRequest(m, "force-master-failover/:host/:port", this.ForcePrimaryFailover) - this.registerAPIRequest(m, "force-master-failover/:clusterHint", this.ForcePrimaryFailover) - this.registerAPIRequest(m, "force-master-takeover/:clusterHint/:designatedHost/:designatedPort", this.ForcePrimaryTakeover) - this.registerAPIRequest(m, "force-master-takeover/:host/:port/:designatedHost/:designatedPort", this.ForcePrimaryTakeover) + this.registerAPIRequest(m, "graceful-primary-takeover/:host/:port", this.GracefulPrimaryTakeover) + this.registerAPIRequest(m, "graceful-primary-takeover/:host/:port/:designatedHost/:designatedPort", this.GracefulPrimaryTakeover) + this.registerAPIRequest(m, "graceful-primary-takeover/:clusterHint", this.GracefulPrimaryTakeover) + this.registerAPIRequest(m, "graceful-primary-takeover/:clusterHint/:designatedHost/:designatedPort", this.GracefulPrimaryTakeover) + this.registerAPIRequest(m, "graceful-primary-takeover-auto/:host/:port", this.GracefulPrimaryTakeoverAuto) + this.registerAPIRequest(m, "graceful-primary-takeover-auto/:host/:port/:designatedHost/:designatedPort", this.GracefulPrimaryTakeoverAuto) + this.registerAPIRequest(m, "graceful-primary-takeover-auto/:clusterHint", this.GracefulPrimaryTakeoverAuto) + this.registerAPIRequest(m, "graceful-primary-takeover-auto/:clusterHint/:designatedHost/:designatedPort", this.GracefulPrimaryTakeoverAuto) + this.registerAPIRequest(m, "force-primary-failover/:host/:port", this.ForcePrimaryFailover) + this.registerAPIRequest(m, "force-primary-failover/:clusterHint", this.ForcePrimaryFailover) + this.registerAPIRequest(m, "force-primary-takeover/:clusterHint/:designatedHost/:designatedPort", this.ForcePrimaryTakeover) + this.registerAPIRequest(m, "force-primary-takeover/:host/:port/:designatedHost/:designatedPort", this.ForcePrimaryTakeover) this.registerAPIRequest(m, "register-candidate/:host/:port/:promotionRule", this.RegisterCandidate) this.registerAPIRequest(m, "automated-recovery-filters", this.AutomatedRecoveryFilters) this.registerAPIRequest(m, "audit-failure-detection", this.AuditFailureDetection) diff --git a/go/vt/orchestrator/inst/analysis.go b/go/vt/orchestrator/inst/analysis.go index f0e766d0b33..a15baa41eaf 100644 --- a/go/vt/orchestrator/inst/analysis.go +++ b/go/vt/orchestrator/inst/analysis.go @@ -112,9 +112,9 @@ type ReplicationAnalysisHints struct { } const ( - ForcePrimaryFailoverCommandHint string = "force-master-failover" - ForcePrimaryTakeoverCommandHint string = "force-master-takeover" - GracefulPrimaryTakeoverCommandHint string = "graceful-master-takeover" + ForcePrimaryFailoverCommandHint string = "force-primary-failover" + ForcePrimaryTakeoverCommandHint string = "force-primary-takeover" + GracefulPrimaryTakeoverCommandHint string = "graceful-primary-takeover" ) type AnalysisInstanceType string diff --git a/go/vt/orchestrator/inst/instance_topology.go b/go/vt/orchestrator/inst/instance_topology.go index 451bea8fd03..9116b578f95 100644 --- a/go/vt/orchestrator/inst/instance_topology.go +++ b/go/vt/orchestrator/inst/instance_topology.go @@ -975,7 +975,7 @@ Cleanup: return instance, log.Errore(err) } // and we're done (pending deferred functions) - AuditOperation("make-co-master", instanceKey, fmt.Sprintf("%+v made co-master of %+v", *instanceKey, primary.Key)) + AuditOperation("make-co-primary", instanceKey, fmt.Sprintf("%+v made co-master of %+v", *instanceKey, primary.Key)) return instance, err } @@ -1037,7 +1037,7 @@ func DetachReplicaPrimaryHost(instanceKey *InstanceKey) (*Instance, error) { log.Infof("Will detach master host on %+v. Detached key is %+v", *instanceKey, *detachedPrimaryKey) - if maintenanceToken, merr := BeginMaintenance(instanceKey, GetMaintenanceOwner(), "detach-replica-master-host"); merr != nil { + if maintenanceToken, merr := BeginMaintenance(instanceKey, GetMaintenanceOwner(), "detach-replica-primary-host"); merr != nil { err = fmt.Errorf("Cannot begin maintenance on %+v: %v", *instanceKey, merr) goto Cleanup } else { @@ -1082,7 +1082,7 @@ func ReattachReplicaPrimaryHost(instanceKey *InstanceKey) (*Instance, error) { log.Infof("Will reattach master host on %+v. Reattached key is %+v", *instanceKey, *reattachedPrimaryKey) - if maintenanceToken, merr := BeginMaintenance(instanceKey, GetMaintenanceOwner(), "reattach-replica-master-host"); merr != nil { + if maintenanceToken, merr := BeginMaintenance(instanceKey, GetMaintenanceOwner(), "reattach-replica-primary-host"); merr != nil { err = fmt.Errorf("Cannot begin maintenance on %+v: %v", *instanceKey, merr) goto Cleanup } else { @@ -1223,13 +1223,13 @@ func ErrantGTIDResetPrimary(instanceKey *InstanceKey) (instance *Instance, err e return instance, err } if instance.GtidErrant == "" { - return instance, log.Errorf("gtid-errant-reset-master will not operate on %+v because no errant GTID is found", *instanceKey) + return instance, log.Errorf("gtid-errant-reset-primary will not operate on %+v because no errant GTID is found", *instanceKey) } if !instance.SupportsOracleGTID { - return instance, log.Errorf("gtid-errant-reset-master requested for %+v but it is not using oracle-gtid", *instanceKey) + return instance, log.Errorf("gtid-errant-reset-primary requested for %+v but it is not using oracle-gtid", *instanceKey) } if len(instance.Replicas) > 0 { - return instance, log.Errorf("gtid-errant-reset-master will not operate on %+v because it has %+v replicas. Expecting no replicas", *instanceKey, len(instance.Replicas)) + return instance, log.Errorf("gtid-errant-reset-primary will not operate on %+v because it has %+v replicas. Expecting no replicas", *instanceKey, len(instance.Replicas)) } gtidSubtract := "" @@ -1255,7 +1255,7 @@ func ErrantGTIDResetPrimary(instanceKey *InstanceKey) (instance *Instance, err e goto Cleanup } if !replicationStopped { - err = fmt.Errorf("gtid-errant-reset-master: timeout while waiting for replication to stop on %+v", instance.Key) + err = fmt.Errorf("gtid-errant-reset-primary: timeout while waiting for replication to stop on %+v", instance.Key) goto Cleanup } } @@ -1276,21 +1276,21 @@ func ErrantGTIDResetPrimary(instanceKey *InstanceKey) (instance *Instance, err e time.Sleep(waitInterval) } if err != nil { - err = fmt.Errorf("gtid-errant-reset-master: error while resetting master on %+v, after which intended to set gtid_purged to: %s. Error was: %+v", instance.Key, gtidSubtract, err) + err = fmt.Errorf("gtid-errant-reset-primary: error while resetting master on %+v, after which intended to set gtid_purged to: %s. Error was: %+v", instance.Key, gtidSubtract, err) goto Cleanup } primaryStatusFound, executedGtidSet, err = ShowPrimaryStatus(instanceKey) if err != nil { - err = fmt.Errorf("gtid-errant-reset-master: error getting master status on %+v, after which intended to set gtid_purged to: %s. Error was: %+v", instance.Key, gtidSubtract, err) + err = fmt.Errorf("gtid-errant-reset-primary: error getting master status on %+v, after which intended to set gtid_purged to: %s. Error was: %+v", instance.Key, gtidSubtract, err) goto Cleanup } if !primaryStatusFound { - err = fmt.Errorf("gtid-errant-reset-master: cannot get master status on %+v, after which intended to set gtid_purged to: %s.", instance.Key, gtidSubtract) + err = fmt.Errorf("gtid-errant-reset-primary: cannot get master status on %+v, after which intended to set gtid_purged to: %s.", instance.Key, gtidSubtract) goto Cleanup } if executedGtidSet != "" { - err = fmt.Errorf("gtid-errant-reset-master: Unexpected non-empty Executed_Gtid_Set found on %+v following RESET MASTER, after which intended to set gtid_purged to: %s. Executed_Gtid_Set found to be: %+v", instance.Key, gtidSubtract, executedGtidSet) + err = fmt.Errorf("gtid-errant-reset-primary: Unexpected non-empty Executed_Gtid_Set found on %+v following RESET MASTER, after which intended to set gtid_purged to: %s. Executed_Gtid_Set found to be: %+v", instance.Key, gtidSubtract, executedGtidSet) goto Cleanup } @@ -1303,7 +1303,7 @@ func ErrantGTIDResetPrimary(instanceKey *InstanceKey) (instance *Instance, err e time.Sleep(waitInterval) } if err != nil { - err = fmt.Errorf("gtid-errant-reset-master: error setting gtid_purged on %+v to: %s. Error was: %+v", instance.Key, gtidSubtract, err) + err = fmt.Errorf("gtid-errant-reset-primary: error setting gtid_purged on %+v to: %s. Error was: %+v", instance.Key, gtidSubtract, err) goto Cleanup } @@ -1317,7 +1317,7 @@ Cleanup: } // and we're done (pending deferred functions) - AuditOperation("gtid-errant-reset-master", instanceKey, fmt.Sprintf("%+v master reset", *instanceKey)) + AuditOperation("gtid-errant-reset-primary", instanceKey, fmt.Sprintf("%+v master reset", *instanceKey)) return instance, err } @@ -1490,7 +1490,7 @@ Cleanup: if err != nil { return instance, err } - AuditOperation("take-master", instanceKey, fmt.Sprintf("took master: %+v", primaryInstance.Key)) + AuditOperation("take-primary", instanceKey, fmt.Sprintf("took master: %+v", primaryInstance.Key)) // Created this to enable a custom hook to be called after a TakePrimary success. // This only runs if there is a hook configured in orchestrator.conf.json diff --git a/go/vt/orchestrator/logic/topology_recovery.go b/go/vt/orchestrator/logic/topology_recovery.go index 389db99b0d7..13b75dc9ec3 100644 --- a/go/vt/orchestrator/logic/topology_recovery.go +++ b/go/vt/orchestrator/logic/topology_recovery.go @@ -1330,7 +1330,7 @@ func RecoverDeadCoPrimary(topologyRecovery *TopologyRecovery, skipProcesses bool // !! This is an evil 3-node circle that must be broken. // config.Config.ApplyMySQLPromotionAfterMasterFailover, if true, will cause it to break, because we would RESET SLAVE on S1 // but we want to make sure the circle is broken no matter what. - // So in the case we promoted not-the-other-co-primary, we issue a detach-replica-master-host, which is a reversible operation + // So in the case we promoted not-the-other-co-primary, we issue a detach-replica-primary-host, which is a reversible operation if promotedReplica != nil && !promotedReplica.Key.Equals(otherCoPrimaryKey) { _, err = inst.DetachReplicaPrimaryHost(&promotedReplica.Key) topologyRecovery.AddError(log.Errore(err)) From 38db82d01934d1211a793c2a9b2347be90053bc7 Mon Sep 17 00:00:00 2001 From: Manan Gupta Date: Tue, 17 Aug 2021 17:18:53 +0530 Subject: [PATCH 4/6] remove slave terms from config and inst package Signed-off-by: Manan Gupta --- go/vt/orchestrator/config/config.go | 44 +--------- go/vt/orchestrator/config/config_test.go | 104 ----------------------- go/vt/orchestrator/inst/analysis.go | 3 - go/vt/orchestrator/inst/instance.go | 12 --- 4 files changed, 3 insertions(+), 160 deletions(-) diff --git a/go/vt/orchestrator/config/config.go b/go/vt/orchestrator/config/config.go index 2737f4398fe..d5c44b9bc13 100644 --- a/go/vt/orchestrator/config/config.go +++ b/go/vt/orchestrator/config/config.go @@ -112,7 +112,6 @@ type Configuration struct { MySQLTopologyReadTimeoutSeconds int // Number of seconds before topology mysql read operation is aborted (driver-side). Used for all but discovery queries. MySQLConnectionLifetimeSeconds int // Number of seconds the mysql driver will keep database connection alive before recycling it DefaultInstancePort int // In case port was not specified on command line - SlaveLagQuery string // Synonym to ReplicationLagQuery ReplicationLagQuery string // custom query to check on replica lg (e.g. heartbeat table). Must return a single row with a single numeric column, which is the lag. ReplicationCredentialsQuery string // custom query to get replication credentials. Must return a single row, with two text columns: 1st is username, 2nd is password. This is optional, and can be used by orchestrator to configure replication after primary takeover or setup of co-primary. You need to ensure the orchestrator user has the privileges to run this query DiscoverByShowSlaveHosts bool // Attempt SHOW SLAVE HOSTS before PROCESSLIST @@ -212,18 +211,15 @@ type Configuration struct { PostGracefulTakeoverProcesses []string // Processes to execute after running a graceful primary takeover. Uses same placeholders as PostFailoverProcesses PostTakePrimaryProcesses []string // Processes to execute after a successful Take-Master event has taken place CoPrimaryRecoveryMustPromoteOtherCoPrimary bool // When 'false', anything can get promoted (and candidates are prefered over others). When 'true', orchestrator will promote the other co-primary or else fail - DetachLostSlavesAfterPrimaryFailover bool // synonym to DetachLostReplicasAfterPrimaryFailover DetachLostReplicasAfterPrimaryFailover bool // Should replicas that are not to be lost in primary recovery (i.e. were more up-to-date than promoted replica) be forcibly detached ApplyMySQLPromotionAfterPrimaryFailover bool // Should orchestrator take upon itself to apply MySQL primary promotion: set read_only=0, detach replication, etc. PreventCrossDataCenterPrimaryFailover bool // When true (default: false), cross-DC primary failover are not allowed, orchestrator will do all it can to only fail over within same DC, or else not fail over at all. PreventCrossRegionPrimaryFailover bool // When true (default: false), cross-region primary failover are not allowed, orchestrator will do all it can to only fail over within same region, or else not fail over at all. PrimaryFailoverLostInstancesDowntimeMinutes uint // Number of minutes to downtime any server that was lost after a primary failover (including failed primary & lost replicas). 0 to disable - PrimaryFailoverDetachSlavePrimaryHost bool // synonym to PrimaryFailoverDetachReplicaPrimaryHost PrimaryFailoverDetachReplicaPrimaryHost bool // Should orchestrator issue a detach-replica-primary-host on newly promoted primary (this makes sure the new primary will not attempt to replicate old primary if that comes back to life). Defaults 'false'. Meaningless if ApplyMySQLPromotionAfterPrimaryFailover is 'true'. FailPrimaryPromotionOnLagMinutes uint // when > 0, fail a primary promotion if the candidate replica is lagging >= configured number of minutes. FailPrimaryPromotionIfSQLThreadNotUpToDate bool // when true, and a primary failover takes place, if candidate primary has not consumed all relay logs, promotion is aborted with error DelayPrimaryPromotionIfSQLThreadNotUpToDate bool // when true, and a primary failover takes place, if candidate primary has not consumed all relay logs, delay promotion until the sql thread has caught up - PostponeSlaveRecoveryOnLagMinutes uint // Synonym to PostponeReplicaRecoveryOnLagMinutes PostponeReplicaRecoveryOnLagMinutes uint // On crash recovery, replicas that are lagging more than given minutes are only resurrected late in the recovery process, after primary/IM has been elected and processes executed. Value of 0 disables this feature OSCIgnoreHostnameFilters []string // OSC replicas recommendation will ignore replica hostnames matching given patterns URLPrefix string // URL prefix to run orchestrator on non-root web path, e.g. /orchestrator to put it behind nginx. @@ -377,16 +373,16 @@ func newConfiguration() *Configuration { PostGracefulTakeoverProcesses: []string{}, PostTakePrimaryProcesses: []string{}, CoPrimaryRecoveryMustPromoteOtherCoPrimary: true, - DetachLostSlavesAfterPrimaryFailover: true, + DetachLostReplicasAfterPrimaryFailover: true, ApplyMySQLPromotionAfterPrimaryFailover: true, PreventCrossDataCenterPrimaryFailover: false, PreventCrossRegionPrimaryFailover: false, PrimaryFailoverLostInstancesDowntimeMinutes: 0, - PrimaryFailoverDetachSlavePrimaryHost: false, + PrimaryFailoverDetachReplicaPrimaryHost: false, FailPrimaryPromotionOnLagMinutes: 0, FailPrimaryPromotionIfSQLThreadNotUpToDate: false, DelayPrimaryPromotionIfSQLThreadNotUpToDate: true, - PostponeSlaveRecoveryOnLagMinutes: 0, + PostponeReplicaRecoveryOnLagMinutes: 0, OSCIgnoreHostnameFilters: []string{}, URLPrefix: "", DiscoveryIgnoreReplicaHostnameFilters: []string{}, @@ -460,46 +456,12 @@ func (this *Configuration) postReadAdjustments() error { this.RecoveryPeriodBlockSeconds = this.RecoveryPeriodBlockMinutes * 60 } - { - if this.ReplicationLagQuery != "" && this.SlaveLagQuery != "" && this.ReplicationLagQuery != this.SlaveLagQuery { - return fmt.Errorf("config's ReplicationLagQuery and SlaveLagQuery are synonyms and cannot both be defined") - } - // ReplicationLagQuery is the replacement param to SlaveLagQuery - if this.ReplicationLagQuery == "" { - this.ReplicationLagQuery = this.SlaveLagQuery - } - // We reset SlaveLagQuery because we want to support multiple config file loading; - // One of the next config files may indicate a new value for ReplicationLagQuery. - // If we do not reset SlaveLagQuery, then the two will have a conflict. - this.SlaveLagQuery = "" - } - - { - if this.DetachLostSlavesAfterPrimaryFailover { - this.DetachLostReplicasAfterPrimaryFailover = true - } - } - - { - if this.PrimaryFailoverDetachSlavePrimaryHost { - this.PrimaryFailoverDetachReplicaPrimaryHost = true - } - } if this.FailPrimaryPromotionIfSQLThreadNotUpToDate && this.DelayPrimaryPromotionIfSQLThreadNotUpToDate { return fmt.Errorf("Cannot have both FailPrimaryPromotionIfSQLThreadNotUpToDate and DelayPrimaryPromotionIfSQLThreadNotUpToDate enabled") } if this.FailPrimaryPromotionOnLagMinutes > 0 && this.ReplicationLagQuery == "" { return fmt.Errorf("nonzero FailPrimaryPromotionOnLagMinutes requires ReplicationLagQuery to be set") } - { - if this.PostponeReplicaRecoveryOnLagMinutes != 0 && this.PostponeSlaveRecoveryOnLagMinutes != 0 && - this.PostponeReplicaRecoveryOnLagMinutes != this.PostponeSlaveRecoveryOnLagMinutes { - return fmt.Errorf("config's PostponeReplicaRecoveryOnLagMinutes and PostponeSlaveRecoveryOnLagMinutes are synonyms and cannot both be defined") - } - if this.PostponeSlaveRecoveryOnLagMinutes != 0 { - this.PostponeReplicaRecoveryOnLagMinutes = this.PostponeSlaveRecoveryOnLagMinutes - } - } if this.URLPrefix != "" { // Ensure the prefix starts with "/" and has no trailing one. diff --git a/go/vt/orchestrator/config/config_test.go b/go/vt/orchestrator/config/config_test.go index 432869ba036..0a91701c1f9 100644 --- a/go/vt/orchestrator/config/config_test.go +++ b/go/vt/orchestrator/config/config_test.go @@ -12,110 +12,6 @@ func init() { log.SetLevel(log.ERROR) } -func TestReplicationLagQuery(t *testing.T) { - { - c := newConfiguration() - c.SlaveLagQuery = "select 3" - c.ReplicationLagQuery = "select 4" - err := c.postReadAdjustments() - test.S(t).ExpectNotNil(err) - } - { - c := newConfiguration() - c.SlaveLagQuery = "select 3" - c.ReplicationLagQuery = "select 3" - err := c.postReadAdjustments() - test.S(t).ExpectNil(err) - } - { - c := newConfiguration() - c.SlaveLagQuery = "select 3" - c.ReplicationLagQuery = "" - err := c.postReadAdjustments() - test.S(t).ExpectNil(err) - test.S(t).ExpectEquals(c.ReplicationLagQuery, "select 3") - } -} - -func TestPostponeReplicaRecoveryOnLagMinutes(t *testing.T) { - { - c := newConfiguration() - c.PostponeSlaveRecoveryOnLagMinutes = 3 - c.PostponeReplicaRecoveryOnLagMinutes = 5 - err := c.postReadAdjustments() - test.S(t).ExpectNotNil(err) - } - { - c := newConfiguration() - c.PostponeSlaveRecoveryOnLagMinutes = 3 - c.PostponeReplicaRecoveryOnLagMinutes = 3 - err := c.postReadAdjustments() - test.S(t).ExpectNil(err) - } - { - c := newConfiguration() - c.PostponeSlaveRecoveryOnLagMinutes = 3 - c.PostponeReplicaRecoveryOnLagMinutes = 0 - err := c.postReadAdjustments() - test.S(t).ExpectNil(err) - test.S(t).ExpectEquals(c.PostponeReplicaRecoveryOnLagMinutes, uint(3)) - } -} - -func TestPrimaryFailoverDetachReplicaPrimaryHost(t *testing.T) { - { - c := newConfiguration() - c.PrimaryFailoverDetachSlavePrimaryHost = false - c.PrimaryFailoverDetachReplicaPrimaryHost = false - err := c.postReadAdjustments() - test.S(t).ExpectNil(err) - test.S(t).ExpectFalse(c.PrimaryFailoverDetachReplicaPrimaryHost) - } - { - c := newConfiguration() - c.PrimaryFailoverDetachSlavePrimaryHost = false - c.PrimaryFailoverDetachReplicaPrimaryHost = true - err := c.postReadAdjustments() - test.S(t).ExpectNil(err) - test.S(t).ExpectTrue(c.PrimaryFailoverDetachReplicaPrimaryHost) - } - { - c := newConfiguration() - c.PrimaryFailoverDetachSlavePrimaryHost = true - c.PrimaryFailoverDetachReplicaPrimaryHost = false - err := c.postReadAdjustments() - test.S(t).ExpectNil(err) - test.S(t).ExpectTrue(c.PrimaryFailoverDetachReplicaPrimaryHost) - } -} - -func TestPrimaryFailoverDetachDetachLostReplicasAfterPrimaryFailover(t *testing.T) { - { - c := newConfiguration() - c.DetachLostSlavesAfterPrimaryFailover = false - c.DetachLostReplicasAfterPrimaryFailover = false - err := c.postReadAdjustments() - test.S(t).ExpectNil(err) - test.S(t).ExpectFalse(c.DetachLostReplicasAfterPrimaryFailover) - } - { - c := newConfiguration() - c.DetachLostSlavesAfterPrimaryFailover = false - c.DetachLostReplicasAfterPrimaryFailover = true - err := c.postReadAdjustments() - test.S(t).ExpectNil(err) - test.S(t).ExpectTrue(c.DetachLostReplicasAfterPrimaryFailover) - } - { - c := newConfiguration() - c.DetachLostSlavesAfterPrimaryFailover = true - c.DetachLostReplicasAfterPrimaryFailover = false - err := c.postReadAdjustments() - test.S(t).ExpectNil(err) - test.S(t).ExpectTrue(c.DetachLostReplicasAfterPrimaryFailover) - } -} - func TestRecoveryPeriodBlock(t *testing.T) { { c := newConfiguration() diff --git a/go/vt/orchestrator/inst/analysis.go b/go/vt/orchestrator/inst/analysis.go index a15baa41eaf..0a1571073bb 100644 --- a/go/vt/orchestrator/inst/analysis.go +++ b/go/vt/orchestrator/inst/analysis.go @@ -149,7 +149,6 @@ type ReplicationAnalysis struct { CountDowntimedReplicas uint ReplicationDepth uint Replicas InstanceKeyMap - SlaveHosts InstanceKeyMap // for backwards compatibility. Equals `Replicas` IsFailingToConnectToPrimary bool ReplicationStopped bool Analysis AnalysisCode @@ -202,8 +201,6 @@ func (this *ReplicationAnalysis) MarshalJSON() ([]byte, error) { ReplicationAnalysis }{} i.ReplicationAnalysis = *this - // backwards compatibility - i.SlaveHosts = i.Replicas return json.Marshal(i) } diff --git a/go/vt/orchestrator/inst/instance.go b/go/vt/orchestrator/inst/instance.go index d8b123ae5b0..29f8f6e9617 100644 --- a/go/vt/orchestrator/inst/instance.go +++ b/go/vt/orchestrator/inst/instance.go @@ -45,7 +45,6 @@ type Instance struct { Binlog_format string BinlogRowImage string LogBinEnabled bool - LogSlaveUpdatesEnabled bool // for API backwards compatibility. Equals `LogReplicationUpdatesEnabled` LogReplicationUpdatesEnabled bool SelfBinlogCoordinates BinlogCoordinates SourceKey InstanceKey @@ -53,9 +52,7 @@ type Instance struct { AncestryUUID string IsDetachedPrimary bool - Slave_SQL_Running bool // for API backwards compatibility. Equals `ReplicationSQLThreadRuning` ReplicationSQLThreadRuning bool - Slave_IO_Running bool // for API backwards compatibility. Equals `ReplicationIOThreadRuning` ReplicationIOThreadRuning bool ReplicationSQLThreadState ReplicationThreadState ReplicationIOThreadState ReplicationThreadState @@ -80,9 +77,7 @@ type Instance struct { primaryExecutedGtidSet string // Not exported - SlaveLagSeconds sql.NullInt64 // for API backwards compatibility. Equals `ReplicationLagSeconds` ReplicationLagSeconds sql.NullInt64 - SlaveHosts InstanceKeyMap // for API backwards compatibility. Equals `Replicas` Replicas InstanceKeyMap ClusterName string SuggestedClusterAlias string @@ -163,13 +158,6 @@ func (this *Instance) MarshalJSON() ([]byte, error) { Instance }{} i.Instance = *this - // change terminology. Users of the orchestrator API can switch to new terminology and avoid using old terminology - // flip - i.SlaveHosts = i.Replicas - i.SlaveLagSeconds = this.ReplicationLagSeconds - i.LogSlaveUpdatesEnabled = this.LogReplicationUpdatesEnabled - i.Slave_SQL_Running = this.ReplicationSQLThreadRuning - i.Slave_IO_Running = this.ReplicationIOThreadRuning return json.Marshal(i) } From c7b3e8cf00506393c7d5f0a463c16a85e560dc3f Mon Sep 17 00:00:00 2001 From: Manan Gupta Date: Tue, 17 Aug 2021 17:28:19 +0530 Subject: [PATCH 5/6] remove slave terms from api and cli package Signed-off-by: Manan Gupta --- go/vt/orchestrator/app/cli.go | 31 +++------- go/vt/orchestrator/app/command_help.go | 20 +++---- go/vt/orchestrator/http/api.go | 63 +++++--------------- go/vt/orchestrator/http/api_test.go | 22 +------ go/vt/orchestrator/inst/instance_topology.go | 2 +- 5 files changed, 35 insertions(+), 103 deletions(-) diff --git a/go/vt/orchestrator/app/cli.go b/go/vt/orchestrator/app/cli.go index e00e2360b94..b3bbec86b13 100644 --- a/go/vt/orchestrator/app/cli.go +++ b/go/vt/orchestrator/app/cli.go @@ -51,25 +51,8 @@ func (a stringSlice) Swap(i, j int) { a[i], a[j] = a[j], a[i] } func (a stringSlice) Less(i, j int) bool { return a[i] < a[j] } var commandSynonyms = map[string]string{ - "stop-slave": "stop-replica", - "start-slave": "start-replica", - "restart-slave": "restart-replica", - "reset-slave": "reset-replica", - "restart-slave-statements": "restart-replica-statements", - "relocate-slaves": "relocate-replicas", - "regroup-slaves": "regroup-replicas", - "move-up-slaves": "move-up-replicas", - "repoint-slaves": "repoint-replicas", - "get-candidate-slave": "get-candidate-replica", - "move-slaves-gtid": "move-replicas-gtid", - "regroup-slaves-gtid": "regroup-replicas-gtid", - "which-cluster-osc-slaves": "which-cluster-osc-replicas", - "which-cluster-gh-ost-slaves": "which-cluster-gh-ost-replicas", - "which-slaves": "which-replicas", - "detach-slave": "detach-replica-primary-host", - "detach-replica": "detach-replica-primary-host", - "reattach-slave": "reattach-replica-primary-host", - "reattach-replica": "reattach-replica-primary-host", + "detach-replica": "detach-replica-primary-host", + "reattach-replica": "reattach-replica-primary-host", } func registerCliCommand(command string, section string, description string) string { @@ -486,7 +469,7 @@ func Cli(command string, strict bool, instance string, destination string, owner } fmt.Println(instanceKey.DisplayString()) } - case registerCliCommand("stop-slave", "Replication, general", `Issue a STOP SLAVE on an instance`): + case registerCliCommand("stop-replica", "Replication, general", `Issue a STOP SLAVE on an instance`): { instanceKey, _ = inst.FigureInstanceKey(instanceKey, thisInstanceKey) _, err := inst.StopReplication(instanceKey) @@ -495,7 +478,7 @@ func Cli(command string, strict bool, instance string, destination string, owner } fmt.Println(instanceKey.DisplayString()) } - case registerCliCommand("start-slave", "Replication, general", `Issue a START SLAVE on an instance`): + case registerCliCommand("start-replica", "Replication, general", `Issue a START SLAVE on an instance`): { instanceKey, _ = inst.FigureInstanceKey(instanceKey, thisInstanceKey) _, err := inst.StartReplication(instanceKey) @@ -504,7 +487,7 @@ func Cli(command string, strict bool, instance string, destination string, owner } fmt.Println(instanceKey.DisplayString()) } - case registerCliCommand("restart-slave", "Replication, general", `STOP and START SLAVE on an instance`): + case registerCliCommand("restart-replica", "Replication, general", `STOP and START SLAVE on an instance`): { instanceKey, _ = inst.FigureInstanceKey(instanceKey, thisInstanceKey) _, err := inst.RestartReplication(instanceKey) @@ -513,7 +496,7 @@ func Cli(command string, strict bool, instance string, destination string, owner } fmt.Println(instanceKey.DisplayString()) } - case registerCliCommand("reset-slave", "Replication, general", `Issues a RESET SLAVE command; use with care`): + case registerCliCommand("reset-replica", "Replication, general", `Issues a RESET SLAVE command; use with care`): { instanceKey, _ = inst.FigureInstanceKey(instanceKey, thisInstanceKey) _, err := inst.ResetReplicationOperation(instanceKey) @@ -570,7 +553,7 @@ func Cli(command string, strict bool, instance string, destination string, owner } fmt.Println(instanceKey.DisplayString()) } - case registerCliCommand("restart-slave-statements", "Replication, general", `Get a list of statements to execute to stop then restore replica to same execution state. Provide --statement for injected statement`): + case registerCliCommand("restart-replica-statements", "Replication, general", `Get a list of statements to execute to stop then restore replica to same execution state. Provide --statement for injected statement`): { instanceKey, _ = inst.FigureInstanceKey(instanceKey, thisInstanceKey) if instanceKey == nil { diff --git a/go/vt/orchestrator/app/command_help.go b/go/vt/orchestrator/app/command_help.go index 88ebdd65e80..a81e463d454 100644 --- a/go/vt/orchestrator/app/command_help.go +++ b/go/vt/orchestrator/app/command_help.go @@ -221,20 +221,20 @@ func init() { orchestrator -c reset-primary-gtid-remove-own-uuid -i replica.running.with.gtid.com ` - CommandHelp["stop-slave"] = ` + CommandHelp["stop-replica"] = ` Issues a STOP SLAVE; command. Example: - orchestrator -c stop-slave -i replica.to.be.stopped.com + orchestrator -c stop-replica -i replica.to.be.stopped.com ` - CommandHelp["start-slave"] = ` + CommandHelp["start-replica"] = ` Issues a START SLAVE; command. Example: - orchestrator -c start-slave -i replica.to.be.started.com + orchestrator -c start-replica -i replica.to.be.started.com ` - CommandHelp["restart-slave"] = ` + CommandHelp["restart-replica"] = ` Issues STOP SLAVE + START SLAVE; Example: - orchestrator -c restart-slave -i replica.to.be.started.com + orchestrator -c restart-replica -i replica.to.be.started.com ` CommandHelp["skip-query"] = ` On a failed replicating replica, skips a single query and attempts to resume replication. @@ -243,10 +243,10 @@ func init() { orchestrator -c skip-query -i replica.with.broken.sql.thread.com ` - CommandHelp["reset-slave"] = ` + CommandHelp["reset-replica"] = ` Issues a RESET SLAVE command. Destructive to replication. Example: - orchestrator -c reset-slave -i replica.to.reset.com + orchestrator -c reset-replica -i replica.to.reset.com ` CommandHelp["detach-replica"] = ` Stops replication and modifies binlog position into an impossible, yet reversible, value. @@ -280,7 +280,7 @@ func init() { Issuing this on an attached (i.e. normal) replica will do nothing. ` - CommandHelp["restart-slave-statements"] = ` + CommandHelp["restart-replica-statements"] = ` Prints a list of statements to execute to stop then restore replica to same execution state. Provide --statement for injected statement. This is useful for issuing a command that can only be executed while replica is stopped. Such @@ -288,7 +288,7 @@ func init() { Orchestrator will not execute given commands, only print them as courtesy. It may not have the privileges to execute them in the first place. Example: - orchestrator -c restart-slave-statements -i some.replica.com -statement="change master to master_heartbeat_period=5" + orchestrator -c restart-replica-statements -i some.replica.com -statement="change master to master_heartbeat_period=5" ` CommandHelp["set-read-only"] = ` diff --git a/go/vt/orchestrator/http/api.go b/go/vt/orchestrator/http/api.go index f43ce6c299d..4ea997e238a 100644 --- a/go/vt/orchestrator/http/api.go +++ b/go/vt/orchestrator/http/api.go @@ -50,25 +50,6 @@ const ( OK ) -var apiSynonyms = map[string]string{ - "relocate-slaves": "relocate-replicas", - "regroup-slaves": "regroup-replicas", - "move-up-slaves": "move-up-replicas", - "repoint-slaves": "repoint-replicas", - "regroup-slaves-bls": "regroup-replicas-bls", - "move-slaves-gtid": "move-replicas-gtid", - "regroup-slaves-gtid": "regroup-replicas-gtid", - "detach-slave": "detach-replica", - "reattach-slave": "reattach-replica", - "cluster-osc-slaves": "cluster-osc-replicas", - "start-slave": "start-replica", - "restart-slave": "restart-replica", - "stop-slave": "stop-replica", - "stop-slave-nice": "stop-replica-nice", - "reset-slave": "reset-replica", - "restart-slave-statements": "restart-replica-statements", -} - var registeredPaths = []string{} var emptyInstanceKey inst.InstanceKey @@ -2790,14 +2771,6 @@ func (this *HttpAPI) CheckGlobalRecoveries(params martini.Params, r render.Rende Respond(r, &APIResponse{Code: OK, Message: fmt.Sprintf("Global recoveries %+v", details), Details: details}) } -func (this *HttpAPI) getSynonymPath(path string) (synonymPath string) { - pathBase := strings.Split(path, "/")[0] - if synonym, ok := apiSynonyms[pathBase]; ok { - synonymPath = fmt.Sprintf("%s%s", synonym, path[len(pathBase):]) - } - return synonymPath -} - func (this *HttpAPI) registerSingleAPIRequest(m *martini.ClassicMartini, path string, handler martini.Handler, allowProxy bool) { registeredPaths = append(registeredPaths, path) fullPath := fmt.Sprintf("%s/api/%s", this.URLPrefix, path) @@ -2807,10 +2780,6 @@ func (this *HttpAPI) registerSingleAPIRequest(m *martini.ClassicMartini, path st func (this *HttpAPI) registerAPIRequestInternal(m *martini.ClassicMartini, path string, handler martini.Handler, allowProxy bool) { this.registerSingleAPIRequest(m, path, handler, allowProxy) - - if synonym := this.getSynonymPath(path); synonym != "" { - this.registerSingleAPIRequest(m, synonym, handler, allowProxy) - } } func (this *HttpAPI) registerAPIRequest(m *martini.ClassicMartini, path string, handler martini.Handler) { @@ -2826,26 +2795,26 @@ func (this *HttpAPI) RegisterRequests(m *martini.ClassicMartini) { // Smart relocation: this.registerAPIRequest(m, "relocate/:host/:port/:belowHost/:belowPort", this.RelocateBelow) this.registerAPIRequest(m, "relocate-below/:host/:port/:belowHost/:belowPort", this.RelocateBelow) - this.registerAPIRequest(m, "relocate-slaves/:host/:port/:belowHost/:belowPort", this.RelocateReplicas) - this.registerAPIRequest(m, "regroup-slaves/:host/:port", this.RegroupReplicas) + this.registerAPIRequest(m, "relocate-replicas/:host/:port/:belowHost/:belowPort", this.RelocateReplicas) + this.registerAPIRequest(m, "regroup-replicas/:host/:port", this.RegroupReplicas) // Classic file:pos relocation: this.registerAPIRequest(m, "move-up/:host/:port", this.MoveUp) - this.registerAPIRequest(m, "move-up-slaves/:host/:port", this.MoveUpReplicas) + this.registerAPIRequest(m, "move-up-replicas/:host/:port", this.MoveUpReplicas) this.registerAPIRequest(m, "move-below/:host/:port/:siblingHost/:siblingPort", this.MoveBelow) this.registerAPIRequest(m, "repoint/:host/:port/:belowHost/:belowPort", this.Repoint) - this.registerAPIRequest(m, "repoint-slaves/:host/:port", this.RepointReplicas) + this.registerAPIRequest(m, "repoint-replicas/:host/:port", this.RepointReplicas) this.registerAPIRequest(m, "make-co-primary/:host/:port", this.MakeCoPrimary) this.registerAPIRequest(m, "take-siblings/:host/:port", this.TakeSiblings) this.registerAPIRequest(m, "take-primary/:host/:port", this.TakePrimary) // Binlog server relocation: - this.registerAPIRequest(m, "regroup-slaves-bls/:host/:port", this.RegroupReplicasBinlogServers) + this.registerAPIRequest(m, "regroup-replicas-bls/:host/:port", this.RegroupReplicasBinlogServers) // GTID relocation: this.registerAPIRequest(m, "move-below-gtid/:host/:port/:belowHost/:belowPort", this.MoveBelowGTID) - this.registerAPIRequest(m, "move-slaves-gtid/:host/:port/:belowHost/:belowPort", this.MoveReplicasGTID) - this.registerAPIRequest(m, "regroup-slaves-gtid/:host/:port", this.RegroupReplicasGTID) + this.registerAPIRequest(m, "move-replicas-gtid/:host/:port/:belowHost/:belowPort", this.MoveReplicasGTID) + this.registerAPIRequest(m, "regroup-replicas-gtid/:host/:port", this.RegroupReplicasGTID) // Replication, general: this.registerAPIRequest(m, "enable-gtid/:host/:port", this.EnableGTID) @@ -2854,18 +2823,18 @@ func (this *HttpAPI) RegisterRequests(m *martini.ClassicMartini) { this.registerAPIRequest(m, "gtid-errant-reset-primary/:host/:port", this.ErrantGTIDResetPrimary) this.registerAPIRequest(m, "gtid-errant-inject-empty/:host/:port", this.ErrantGTIDInjectEmpty) this.registerAPIRequest(m, "skip-query/:host/:port", this.SkipQuery) - this.registerAPIRequest(m, "start-slave/:host/:port", this.StartReplication) - this.registerAPIRequest(m, "restart-slave/:host/:port", this.RestartReplication) - this.registerAPIRequest(m, "stop-slave/:host/:port", this.StopReplication) - this.registerAPIRequest(m, "stop-slave-nice/:host/:port", this.StopReplicationNicely) - this.registerAPIRequest(m, "reset-slave/:host/:port", this.ResetReplication) - this.registerAPIRequest(m, "detach-slave/:host/:port", this.DetachReplicaPrimaryHost) - this.registerAPIRequest(m, "reattach-slave/:host/:port", this.ReattachReplicaPrimaryHost) + this.registerAPIRequest(m, "start-replica/:host/:port", this.StartReplication) + this.registerAPIRequest(m, "restart-replica/:host/:port", this.RestartReplication) + this.registerAPIRequest(m, "stop-replica/:host/:port", this.StopReplication) + this.registerAPIRequest(m, "stop-replica-nice/:host/:port", this.StopReplicationNicely) + this.registerAPIRequest(m, "reset-replica/:host/:port", this.ResetReplication) + this.registerAPIRequest(m, "detach-replica/:host/:port", this.DetachReplicaPrimaryHost) + this.registerAPIRequest(m, "reattach-replica/:host/:port", this.ReattachReplicaPrimaryHost) this.registerAPIRequest(m, "detach-replica-primary-host/:host/:port", this.DetachReplicaPrimaryHost) this.registerAPIRequest(m, "reattach-replica-primary-host/:host/:port", this.ReattachReplicaPrimaryHost) this.registerAPIRequest(m, "flush-binary-logs/:host/:port", this.FlushBinaryLogs) this.registerAPIRequest(m, "purge-binary-logs/:host/:port/:logFile", this.PurgeBinaryLogs) - this.registerAPIRequest(m, "restart-slave-statements/:host/:port", this.RestartReplicationStatements) + this.registerAPIRequest(m, "restart-replica-statements/:host/:port", this.RestartReplicationStatements) // Replication information: this.registerAPIRequest(m, "can-replicate-from/:host/:port/:belowHost/:belowPort", this.CanReplicateFrom) @@ -2895,7 +2864,7 @@ func (this *HttpAPI) RegisterRequests(m *martini.ClassicMartini) { this.registerAPIRequest(m, "cluster/instance/:host/:port", this.ClusterByInstance) this.registerAPIRequest(m, "cluster-info/:clusterHint", this.ClusterInfo) this.registerAPIRequest(m, "cluster-info/alias/:clusterAlias", this.ClusterInfoByAlias) - this.registerAPIRequest(m, "cluster-osc-slaves/:clusterHint", this.ClusterOSCReplicas) + this.registerAPIRequest(m, "cluster-osc-replicas/:clusterHint", this.ClusterOSCReplicas) this.registerAPIRequest(m, "set-cluster-alias/:clusterName", this.SetClusterAliasManualOverride) this.registerAPIRequest(m, "clusters", this.Clusters) this.registerAPIRequest(m, "clusters-info", this.ClustersInfo) diff --git a/go/vt/orchestrator/http/api_test.go b/go/vt/orchestrator/http/api_test.go index 3070fbec938..c25677cc5b8 100644 --- a/go/vt/orchestrator/http/api_test.go +++ b/go/vt/orchestrator/http/api_test.go @@ -17,21 +17,6 @@ func init() { log.SetLevel(log.ERROR) } -func TestGetSynonymPath(t *testing.T) { - api := HttpAPI{} - - { - path := "relocate-slaves" - synonym := api.getSynonymPath(path) - test.S(t).ExpectEquals(synonym, "relocate-replicas") - } - { - path := "relocate-slaves/:host/:port" - synonym := api.getSynonymPath(path) - test.S(t).ExpectEquals(synonym, "relocate-replicas/:host/:port") - } -} - func TestKnownPaths(t *testing.T) { m := martini.Classic() api := HttpAPI{} @@ -46,10 +31,5 @@ func TestKnownPaths(t *testing.T) { test.S(t).ExpectTrue(pathsMap["health"]) test.S(t).ExpectTrue(pathsMap["lb-check"]) test.S(t).ExpectTrue(pathsMap["relocate"]) - test.S(t).ExpectTrue(pathsMap["relocate-slaves"]) - - for path, synonym := range apiSynonyms { - test.S(t).ExpectTrue(pathsMap[path]) - test.S(t).ExpectTrue(pathsMap[synonym]) - } + test.S(t).ExpectTrue(pathsMap["relocate-replicas"]) } diff --git a/go/vt/orchestrator/inst/instance_topology.go b/go/vt/orchestrator/inst/instance_topology.go index 9116b578f95..b08d9ba7c2d 100644 --- a/go/vt/orchestrator/inst/instance_topology.go +++ b/go/vt/orchestrator/inst/instance_topology.go @@ -1016,7 +1016,7 @@ Cleanup: } // and we're done (pending deferred functions) - AuditOperation("reset-slave", instanceKey, fmt.Sprintf("%+v replication reset", *instanceKey)) + AuditOperation("reset-replica", instanceKey, fmt.Sprintf("%+v replication reset", *instanceKey)) return instance, err } From e9ef83dc6046033a7412662edcc6056236f703ba Mon Sep 17 00:00:00 2001 From: Manan Gupta Date: Tue, 17 Aug 2021 17:32:38 +0530 Subject: [PATCH 6/6] fix test configuraion files to reflect the change Signed-off-by: Manan Gupta --- go/test/endtoend/vtorc/test_config.json | 2 +- .../endtoend/vtorc/test_config_crosscenter_prefer_prevent.json | 2 +- go/test/endtoend/vtorc/test_config_promotion_failure.json | 2 +- go/test/endtoend/vtorc/test_config_promotion_success.json | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/go/test/endtoend/vtorc/test_config.json b/go/test/endtoend/vtorc/test_config.json index fab6c7974c5..8ba28850090 100644 --- a/go/test/endtoend/vtorc/test_config.json +++ b/go/test/endtoend/vtorc/test_config.json @@ -6,5 +6,5 @@ "MySQLReplicaPassword": "", "RecoveryPeriodBlockSeconds": 1, "InstancePollSeconds": 1, - "PreventCrossDataCenterMasterFailover": true + "PreventCrossDataCenterPrimaryFailover": true } diff --git a/go/test/endtoend/vtorc/test_config_crosscenter_prefer_prevent.json b/go/test/endtoend/vtorc/test_config_crosscenter_prefer_prevent.json index 794a36f8a22..b2232a99ce7 100644 --- a/go/test/endtoend/vtorc/test_config_crosscenter_prefer_prevent.json +++ b/go/test/endtoend/vtorc/test_config_crosscenter_prefer_prevent.json @@ -10,5 +10,5 @@ "DurabilityParams": { "zone2-0000000200": "prefer" }, - "PreventCrossDataCenterMasterFailover": true + "PreventCrossDataCenterPrimaryFailover": true } diff --git a/go/test/endtoend/vtorc/test_config_promotion_failure.json b/go/test/endtoend/vtorc/test_config_promotion_failure.json index ab22d85e02d..94daa6a729f 100644 --- a/go/test/endtoend/vtorc/test_config_promotion_failure.json +++ b/go/test/endtoend/vtorc/test_config_promotion_failure.json @@ -7,5 +7,5 @@ "RecoveryPeriodBlockSeconds": 1, "InstancePollSeconds": 1, "ReplicationLagQuery": "select 61", - "FailMasterPromotionOnLagMinutes": 1 + "FailPrimaryPromotionOnLagMinutes": 1 } diff --git a/go/test/endtoend/vtorc/test_config_promotion_success.json b/go/test/endtoend/vtorc/test_config_promotion_success.json index 361c107d898..5084e4598ec 100644 --- a/go/test/endtoend/vtorc/test_config_promotion_success.json +++ b/go/test/endtoend/vtorc/test_config_promotion_success.json @@ -7,5 +7,5 @@ "RecoveryPeriodBlockSeconds": 1, "InstancePollSeconds": 1, "ReplicationLagQuery": "select 59", - "FailMasterPromotionOnLagMinutes": 1 + "FailPrimaryPromotionOnLagMinutes": 1 }