Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added mysql-monitor_replication_lag_group_by_host #3867

Merged
merged 1 commit into from
May 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/MySQL_HostGroups_Manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,7 @@ class MySQL_HostGroups_Manager {
void push_MyConn_to_pool_array(MySQL_Connection **, unsigned int);
void destroy_MyConn_from_pool(MySQL_Connection *, bool _lock=true);

void replication_lag_action_inner(MyHGC *, char*, unsigned int, int);
void replication_lag_action(int, char*, unsigned int, int);
void read_only_action(char *hostname, int port, int read_only);
unsigned int get_servers_table_version();
Expand Down
2 changes: 2 additions & 0 deletions include/MySQL_Thread.h
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,7 @@ struct p_th_gauge {
mysql_monitor_read_only_interval,
mysql_monitor_read_only_timeout,
mysql_monitor_writer_is_also_reader,
mysql_monitor_replication_lag_group_by_host,
mysql_monitor_replication_lag_interval,
mysql_monitor_replication_lag_timeout,
mysql_monitor_history,
Expand Down Expand Up @@ -434,6 +435,7 @@ class MySQL_Threads_Handler
//! ProxySQL session wait timeout. Unit: 'ms'.
bool monitor_wait_timeout;
bool monitor_writer_is_also_reader;
bool monitor_replication_lag_group_by_host;
//! How frequently a replication lag check is performed. Unit: 'ms'.
int monitor_replication_lag_interval;
//! Read only check timeout. Unit: 'ms'.
Expand Down
2 changes: 2 additions & 0 deletions include/proxysql_structs.h
Original file line number Diff line number Diff line change
Expand Up @@ -875,6 +875,7 @@ __thread int mysql_thread___monitor_read_only_timeout;
__thread int mysql_thread___monitor_read_only_max_timeout_count;
__thread bool mysql_thread___monitor_wait_timeout;
__thread bool mysql_thread___monitor_writer_is_also_reader;
__thread int mysql_thread___monitor_replication_lag_group_by_host;
__thread int mysql_thread___monitor_replication_lag_interval;
__thread int mysql_thread___monitor_replication_lag_timeout;
__thread int mysql_thread___monitor_replication_lag_count;
Expand Down Expand Up @@ -1035,6 +1036,7 @@ extern __thread int mysql_thread___monitor_read_only_timeout;
extern __thread int mysql_thread___monitor_read_only_max_timeout_count;
extern __thread bool mysql_thread___monitor_wait_timeout;
extern __thread bool mysql_thread___monitor_writer_is_also_reader;
extern __thread bool mysql_thread___monitor_replication_lag_group_by_host;
extern __thread int mysql_thread___monitor_replication_lag_interval;
extern __thread int mysql_thread___monitor_replication_lag_timeout;
extern __thread int mysql_thread___monitor_replication_lag_count;
Expand Down
97 changes: 54 additions & 43 deletions lib/MySQL_HostGroups_Manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3439,58 +3439,69 @@ void MySQL_HostGroups_Manager::add(MySrvC *mysrvc, unsigned int _hid) {
myhgc->mysrvs->add(mysrvc);
}

void MySQL_HostGroups_Manager::replication_lag_action(int _hid, char *address, unsigned int port, int current_replication_lag) {
GloAdmin->mysql_servers_wrlock();
wrlock();
void MySQL_HostGroups_Manager::replication_lag_action_inner(MyHGC *myhgc, char *address, unsigned int port, int current_replication_lag) {
int j;
MyHGC *myhgc = MyHGC_find(_hid);
if (myhgc) {
for (j=0; j<(int)myhgc->mysrvs->cnt(); j++) {
MySrvC *mysrvc=(MySrvC *)myhgc->mysrvs->servers->index(j);
if (strcmp(mysrvc->address,address)==0 && mysrvc->port==port) {
if (mysrvc->status==MYSQL_SERVER_STATUS_ONLINE) {
if (
// (current_replication_lag==-1 )
// ||
(current_replication_lag>=0 && ((unsigned int)current_replication_lag > mysrvc->max_replication_lag))
) {
// always increase the counter
mysrvc->cur_replication_lag_count += 1;
if (mysrvc->cur_replication_lag_count >= mysql_thread___monitor_replication_lag_count) {
proxy_warning("Shunning server %s:%d from HG %u with replication lag of %d second, count number: '%d'\n", address, port, myhgc->hid, current_replication_lag, mysrvc->cur_replication_lag_count);
mysrvc->status=MYSQL_SERVER_STATUS_SHUNNED_REPLICATION_LAG;
} else {
proxy_info(
"Not shunning server %s:%d from HG %u with replication lag of %d second, count number: '%d' < replication_lag_count: '%d'\n",
address,
port,
myhgc->hid,
current_replication_lag,
mysrvc->cur_replication_lag_count,
mysql_thread___monitor_replication_lag_count
);
}
for (j=0; j<(int)myhgc->mysrvs->cnt(); j++) {
MySrvC *mysrvc=(MySrvC *)myhgc->mysrvs->servers->index(j);
if (strcmp(mysrvc->address,address)==0 && mysrvc->port==port) {
if (mysrvc->status==MYSQL_SERVER_STATUS_ONLINE) {
if (
// (current_replication_lag==-1 )
// ||
(current_replication_lag>=0 && ((unsigned int)current_replication_lag > mysrvc->max_replication_lag))
) {
// always increase the counter
mysrvc->cur_replication_lag_count += 1;
if (mysrvc->cur_replication_lag_count >= mysql_thread___monitor_replication_lag_count) {
proxy_warning("Shunning server %s:%d from HG %u with replication lag of %d second, count number: '%d'\n", address, port, myhgc->hid, current_replication_lag, mysrvc->cur_replication_lag_count);
mysrvc->status=MYSQL_SERVER_STATUS_SHUNNED_REPLICATION_LAG;
} else {
mysrvc->cur_replication_lag_count = 0;
proxy_info(
"Not shunning server %s:%d from HG %u with replication lag of %d second, count number: '%d' < replication_lag_count: '%d'\n",
address,
port,
myhgc->hid,
current_replication_lag,
mysrvc->cur_replication_lag_count,
mysql_thread___monitor_replication_lag_count
);
}
} else {
if (mysrvc->status==MYSQL_SERVER_STATUS_SHUNNED_REPLICATION_LAG) {
if (
(current_replication_lag>=0 && ((unsigned int)current_replication_lag <= mysrvc->max_replication_lag))
||
(current_replication_lag==-2) // see issue 959
) {
mysrvc->status=MYSQL_SERVER_STATUS_ONLINE;
proxy_warning("Re-enabling server %s:%d from HG %u with replication lag of %d second\n", address, port, myhgc->hid, current_replication_lag);
mysrvc->cur_replication_lag_count = 0;
}
mysrvc->cur_replication_lag_count = 0;
}
} else {
if (mysrvc->status==MYSQL_SERVER_STATUS_SHUNNED_REPLICATION_LAG) {
if (
(current_replication_lag>=0 && ((unsigned int)current_replication_lag <= mysrvc->max_replication_lag))
||
(current_replication_lag==-2) // see issue 959
) {
mysrvc->status=MYSQL_SERVER_STATUS_ONLINE;
proxy_warning("Re-enabling server %s:%d from HG %u with replication lag of %d second\n", address, port, myhgc->hid, current_replication_lag);
mysrvc->cur_replication_lag_count = 0;
}
}
goto __exit_replication_lag_action;
}
return;
}
}
}

void MySQL_HostGroups_Manager::replication_lag_action(int _hid, char *address, unsigned int port, int current_replication_lag) {
GloAdmin->mysql_servers_wrlock();
wrlock();
if (mysql_thread___monitor_replication_lag_group_by_host == false) {
// legacy check. 1 check per server per hostgroup
MyHGC *myhgc = MyHGC_find(_hid);
replication_lag_action_inner(myhgc,address,port,current_replication_lag);
} else {
// only 1 check per server, no matter the hostgroup
// all hostgroups must be searched
for (unsigned int i=0; i<MyHostGroups->len; i++) {
MyHGC *myhgc=(MyHGC *)MyHostGroups->index(i);
replication_lag_action_inner(myhgc,address,port,current_replication_lag);
}
}
__exit_replication_lag_action:
wrunlock();
GloAdmin->mysql_servers_wrunlock();
}
Expand Down
7 changes: 6 additions & 1 deletion lib/MySQL_Monitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3233,7 +3233,12 @@ void * MySQL_Monitor::monitor_replication_lag() {
char *error=NULL;
SQLite3_result *resultset=NULL;
// add support for SSL
char *query=(char *)"SELECT hostgroup_id, hostname, port, max_replication_lag, use_ssl FROM mysql_servers WHERE max_replication_lag > 0 AND status NOT IN (2,3)";
char *query= NULL;
if (mysql_thread___monitor_replication_lag_group_by_host==true) {
query = (char *)"SELECT MIN(hostgroup_id), hostname, port, MIN(max_replication_lag), MAX(use_ssl) FROM mysql_servers WHERE max_replication_lag > 0 AND status NOT IN (2,3) GROUP BY hostname, port";
} else {
query=(char *)"SELECT hostgroup_id, hostname, port, max_replication_lag, use_ssl FROM mysql_servers WHERE max_replication_lag > 0 AND status NOT IN (2,3)";
}
t1=monotonic_time();

if (!GloMTH) return NULL; // quick exit during shutdown/restart
Expand Down
11 changes: 11 additions & 0 deletions lib/MySQL_Thread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,7 @@ static char * mysql_thread_variables_names[]= {
(char *)"monitor_read_only_interval",
(char *)"monitor_read_only_timeout",
(char *)"monitor_read_only_max_timeout_count",
(char *)"monitor_replication_lag_group_by_host",
(char *)"monitor_replication_lag_interval",
(char *)"monitor_replication_lag_timeout",
(char *)"monitor_replication_lag_count",
Expand Down Expand Up @@ -990,6 +991,12 @@ th_metrics_map = std::make_tuple(
"Encodes different behaviors for nodes depending on their 'READ_ONLY' flag value.",
metric_tags {}
),
std::make_tuple (
p_th_gauge::mysql_monitor_replication_lag_group_by_host,
"proxysql_monitor_replication_lag_group_by_host",
"Encodes different replication lag check if the same server is in multiple hostgroups.",
metric_tags {}
),
std::make_tuple (
p_th_gauge::mysql_monitor_replication_lag_interval,
"proxysql_mysql_monitor_replication_lag_interval_seconds",
Expand Down Expand Up @@ -1058,6 +1065,7 @@ MySQL_Threads_Handler::MySQL_Threads_Handler() {
variables.monitor_read_only_interval=1000;
variables.monitor_read_only_timeout=800;
variables.monitor_read_only_max_timeout_count=3;
variables.monitor_replication_lag_group_by_host=false;
variables.monitor_replication_lag_interval=10000;
variables.monitor_replication_lag_timeout=1000;
variables.monitor_replication_lag_count=1;
Expand Down Expand Up @@ -2082,6 +2090,7 @@ char ** MySQL_Threads_Handler::get_variables_list() {
VariablesPointers_bool["log_mysql_warnings_enabled"] = make_tuple(&variables.log_mysql_warnings_enabled, false);
VariablesPointers_bool["log_unhealthy_connections"] = make_tuple(&variables.log_unhealthy_connections, false);
VariablesPointers_bool["monitor_enabled"] = make_tuple(&variables.monitor_enabled, false);
VariablesPointers_bool["monitor_replication_lag_group_by_host"] = make_tuple(&variables.monitor_replication_lag_group_by_host, false);
VariablesPointers_bool["monitor_wait_timeout"] = make_tuple(&variables.monitor_wait_timeout, false);
VariablesPointers_bool["monitor_writer_is_also_reader"] = make_tuple(&variables.monitor_writer_is_also_reader, false);
VariablesPointers_bool["multiplexing"] = make_tuple(&variables.multiplexing, false);
Expand Down Expand Up @@ -3935,6 +3944,7 @@ void MySQL_Thread::refresh_variables() {
mysql_thread___monitor_read_only_interval=GloMTH->get_variable_int((char *)"monitor_read_only_interval");
mysql_thread___monitor_read_only_timeout=GloMTH->get_variable_int((char *)"monitor_read_only_timeout");
mysql_thread___monitor_read_only_max_timeout_count=GloMTH->get_variable_int((char *)"monitor_read_only_max_timeout_count");
mysql_thread___monitor_replication_lag_group_by_host=(bool)GloMTH->get_variable_int((char *)"monitor_replication_lag_group_by_host");
mysql_thread___monitor_replication_lag_interval=GloMTH->get_variable_int((char *)"monitor_replication_lag_interval");
mysql_thread___monitor_replication_lag_timeout=GloMTH->get_variable_int((char *)"monitor_replication_lag_timeout");
mysql_thread___monitor_replication_lag_count=GloMTH->get_variable_int((char *)"monitor_replication_lag_count");
Expand Down Expand Up @@ -5188,6 +5198,7 @@ void MySQL_Threads_Handler::p_update_metrics() {
this->status_variables.p_gauge_array[p_th_gauge::mysql_monitor_read_only_interval]->Set(this->variables.monitor_read_only_interval/1000.0);
this->status_variables.p_gauge_array[p_th_gauge::mysql_monitor_read_only_timeout]->Set(this->variables.monitor_read_only_timeout/1000.0);
this->status_variables.p_gauge_array[p_th_gauge::mysql_monitor_writer_is_also_reader]->Set(this->variables.monitor_writer_is_also_reader);
this->status_variables.p_gauge_array[p_th_gauge::mysql_monitor_replication_lag_group_by_host]->Set(this->variables.monitor_replication_lag_group_by_host);
this->status_variables.p_gauge_array[p_th_gauge::mysql_monitor_replication_lag_interval]->Set(this->variables.monitor_replication_lag_interval/1000.0);
this->status_variables.p_gauge_array[p_th_gauge::mysql_monitor_replication_lag_timeout]->Set(this->variables.monitor_replication_lag_timeout/1000.0);
this->status_variables.p_gauge_array[p_th_gauge::mysql_monitor_history]->Set(this->variables.monitor_history/1000.0);
Expand Down