Skip to content

Commit

Permalink
[DE84] Reconnecting to disconnected targets periodically, and, settin…
Browse files Browse the repository at this point in the history
…g keepalive parameters for connections (#95)

* Setting keepalive on connected sockets
* scan for any pending connects to target atleast every 2 seconds

Signed-off-by: Vishnu Itta <[email protected]>
  • Loading branch information
vishnuitta authored Aug 13, 2018
1 parent 635c465 commit d05cbc4
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 0 deletions.
12 changes: 12 additions & 0 deletions include/mgmt_conn.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,18 @@
extern "C" {
#endif

#define timesdiff(_clockid, _st, _now, _re) \
{ \
clock_gettime(_clockid, &_now); \
if ((_now.tv_nsec - _st.tv_nsec) < 0) { \
_re.tv_sec = _now.tv_sec - _st.tv_sec - 1; \
_re.tv_nsec = 1000000000 + _now.tv_nsec - _st.tv_nsec; \
} else { \
_re.tv_sec = _now.tv_sec - _st.tv_sec; \
_re.tv_nsec = _now.tv_nsec - _st.tv_nsec; \
} \
}

/*
* Mgmt connection states.
*/
Expand Down
1 change: 1 addition & 0 deletions include/zrepl_mgmt.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ extern int uzfs_zinfo_destroy(const char *ds_name, spa_t *spa);
uint64_t uzfs_zvol_get_last_committed_io_no(zvol_state_t *zv);
void uzfs_zvol_store_last_committed_io_no(zvol_state_t *zv,
uint64_t io_seq);
extern int set_socket_keepalive(int sfd);
extern int create_and_bind(const char *port, int bind_needed,
boolean_t nonblocking);
int uzfs_zvol_name_compare(zvol_info_t *zv, const char *name);
Expand Down
51 changes: 51 additions & 0 deletions lib/libzpool/zrepl_mgmt.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
#include <uzfs_mgmt.h>
#include <uzfs_zap.h>
#include <uzfs_io.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netinet/tcp.h>

#define ZVOL_THREAD_STACKSIZE (2 * 1024 * 1024)

Expand Down Expand Up @@ -63,6 +66,53 @@ zrepl_log(enum zrepl_log_level lvl, const char *fmt, ...)
fprintf(stderr, "%s\n", line);
}

int
set_socket_keepalive(int sfd)
{
int val = 1;
int ret = 0;
int max_idle_time = 5;
int max_try = 5;
int probe_interval = 5;

if (sfd < 3) {
LOG_ERR("can't set keepalive on fd(%d)\n", sfd);
goto out;
}

if (setsockopt(sfd, SOL_SOCKET, SO_KEEPALIVE, &val, sizeof (val)) < 0) {
LOG_ERR("Failed to set SO_KEEPALIVE for fd(%d) err(%d)\n",
sfd, errno);
ret = errno;
goto out;
}

if (setsockopt(sfd, SOL_TCP, TCP_KEEPCNT, &max_try, sizeof (max_try))) {
LOG_ERR("Failed to set TCP_KEEPCNT for fd(%d) err(%d)\n",
sfd, errno);
ret = errno;
goto out;
}

if (setsockopt(sfd, SOL_TCP, TCP_KEEPIDLE, &max_idle_time,
sizeof (max_idle_time))) {
LOG_ERR("Failed to set TCP_KEEPIDLE for fd(%d) err(%d)\n",
sfd, errno);
ret = errno;
goto out;
}

if (setsockopt(sfd, SOL_TCP, TCP_KEEPINTVL, &probe_interval,
sizeof (probe_interval))) {
LOG_ERR("Failed to set TCP_KEEPINTVL for fd(%d) err(%d)\n",
sfd, errno);
ret = errno;
}

out:
return (ret);
}

int
create_and_bind(const char *port, int bind_needed, boolean_t nonblock)
{
Expand Down Expand Up @@ -108,6 +158,7 @@ create_and_bind(const char *port, int bind_needed, boolean_t nonblock)
}

close(sfd);
sfd = -1;
}

if (result != NULL)
Expand Down
12 changes: 12 additions & 0 deletions lib/libzrepl/data_conn.c
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,11 @@ uzfs_zvol_rebuild_dw_replica(void *arg)
goto exit;
}

rc = set_socket_keepalive(sfd);
if (rc != 0)
LOG_ERR("keepalive errored on connected rebuild fd %d", sfd);
rc = 0;

/* Set state in-progess state now */
checkpointed_ionum = uzfs_zvol_get_last_committed_io_no(zinfo->zv);
zvol_state = zinfo->zv;
Expand Down Expand Up @@ -777,6 +782,13 @@ uzfs_zvol_io_conn_acceptor(void *arg)
kmem_free(hbuf, NI_MAXHOST);
kmem_free(sbuf, NI_MAXSERV);
#endif

rc = set_socket_keepalive(new_fd);
if (rc != 0)
LOG_ERR("Failed to set keepalive on "
"accepted fd %d", new_fd);
rc = 0;

if (events[i].data.fd == io_sfd) {
LOG_INFO("New data connection");
thrd_info = zk_thread_create(NULL, 0,
Expand Down
14 changes: 14 additions & 0 deletions lib/libzrepl/mgmt_conn.c
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ connect_to_tgt(uzfs_mgmt_conn_t *conn)
conn->conn_port);
return (-1);
}

return (sfd);
}

Expand Down Expand Up @@ -1048,6 +1049,10 @@ move_to_next_state(uzfs_mgmt_conn_t *conn)
switch (conn->conn_state) {
case CS_CONNECT:
LOGCONN(conn, "Connected");
rc = set_socket_keepalive(conn->conn_fd);
if (rc != 0)
LOGERRCONN(conn, "Failed to set keepalive");
rc = 0;
/* Fall-through */
case CS_INIT:
DBGCONN(conn, "Reading version..");
Expand Down Expand Up @@ -1128,6 +1133,7 @@ uzfs_zvol_mgmt_thread(void *arg)
int nfds, i, rc;
boolean_t do_scan;
async_task_t *async_task;
struct timespec diff_time, now, last_time;

SLIST_INIT(&uzfs_mgmt_conns);
mutex_init(&conn_list_mtx, NULL, MUTEX_DEFAULT, NULL);
Expand All @@ -1151,6 +1157,7 @@ uzfs_zvol_mgmt_thread(void *arg)
}

prctl(PR_SET_NAME, "mgmt_conn", 0, 0, 0);
clock_gettime(CLOCK_MONOTONIC, &last_time);

/*
* The only reason to break from this loop is a failure to update FDs
Expand Down Expand Up @@ -1255,10 +1262,17 @@ uzfs_zvol_mgmt_thread(void *arg)
* Scan the list either if signalled or timed out waiting
* for event
*/
if (nfds != 0 && !do_scan) {
timesdiff(CLOCK_MONOTONIC, last_time, now, diff_time);
if (diff_time.tv_sec >= (RECONNECT_DELAY / 2))
do_scan = 1;
}

if (nfds == 0 || do_scan) {
if (scan_conn_list() != 0) {
goto exit;
}
clock_gettime(CLOCK_MONOTONIC, &last_time);
}
}

Expand Down

0 comments on commit d05cbc4

Please sign in to comment.