Skip to content

Commit

Permalink
[DE83] Reinitializing zv variables during data connection break (#94)
Browse files Browse the repository at this point in the history
Signed-off-by: Vishnu Itta <[email protected]>
  • Loading branch information
vishnuitta authored Aug 13, 2018
1 parent d05cbc4 commit fd3407e
Show file tree
Hide file tree
Showing 10 changed files with 127 additions and 62 deletions.
79 changes: 60 additions & 19 deletions cmd/zrepl/zrepl.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,25 @@ static void uzfs_zvol_io_ack_sender(void *arg);
kthread_t *conn_accpt_thread;
kthread_t *uzfs_timer_thread;
kthread_t *mgmt_conn_thread;

/*
* (Re)Initializes zv's state variables.
* This fn need to be called to use zv across network disconnections.
* Lock protection and life of zv need to be managed by caller
*/
static void
reinitialize_zv_state(zvol_state_t *zv)
{
if (zv == NULL)
return;
zv->zv_metavolblocksize = 0;

uzfs_zvol_set_status(zv, ZVOL_STATUS_DEGRADED);
bzero(&zv->rebuild_info, sizeof (zvol_rebuild_info_t));

uzfs_zvol_set_rebuild_status(zv, ZVOL_REBUILDING_INIT);
}

/*
* Process open request on data connection, the first message.
*
Expand All @@ -45,9 +64,10 @@ open_zvol(int fd, zvol_info_t **zinfopp)
zvol_io_hdr_t hdr;
zvol_op_open_data_t open_data;
zvol_info_t *zinfo = NULL;
zvol_state_t *zv;
zvol_state_t *zv = NULL;
kthread_t *thrd_info;
thread_args_t *thrd_arg;
int rele_dataset_on_error = 0;

/*
* If we don't know the version yet, be more careful when
Expand Down Expand Up @@ -78,14 +98,24 @@ open_zvol(int fd, zvol_info_t **zinfopp)
hdr.status = ZVOL_OP_STATUS_FAILED;
goto open_reply;
}
if (zinfo->state != ZVOL_INFO_STATE_ONLINE) {
LOG_ERR("zvol %s is not online", open_data.volname);
hdr.status = ZVOL_OP_STATUS_FAILED;
goto open_reply;
}
zv = zinfo->zv;
ASSERT3P(zv, !=, NULL);
if (zv->zv_metavolblocksize != 0 &&
zv->zv_metavolblocksize != open_data.tgt_block_size) {
LOG_ERR("Conflicting block size");

if (zv->zv_metavolblocksize != 0) {
LOG_ERR("there might be already a data connection for %s",
open_data.volname);
hdr.status = ZVOL_OP_STATUS_FAILED;
goto open_reply;
}

ASSERT3P(zv, !=, NULL);
ASSERT3P(zv->zv_status, ==, ZVOL_STATUS_DEGRADED);
ASSERT3P(zv->rebuild_info.zv_rebuild_status, ==, ZVOL_REBUILDING_INIT);

// validate block size (only one bit is set in the number)
if (open_data.tgt_block_size == 0 ||
(open_data.tgt_block_size & (open_data.tgt_block_size - 1)) != 0) {
Expand All @@ -100,16 +130,21 @@ open_zvol(int fd, zvol_info_t **zinfopp)
* in case that the target creates data connection directly without
* getting the endpoint through mgmt connection first.
*/
if (zv->zv_objset == NULL && uzfs_hold_dataset(zv) != 0) {
(void) pthread_mutex_unlock(&zinfo->zinfo_mutex);
LOG_ERR("Failed to hold zvol during open");
hdr.status = ZVOL_OP_STATUS_FAILED;
goto open_reply;
rele_dataset_on_error = 0;
if (zv->zv_objset == NULL) {
if (uzfs_hold_dataset(zv) != 0) {
(void) pthread_mutex_unlock(&zinfo->zinfo_mutex);
LOG_ERR("Failed to hold zvol during open");
hdr.status = ZVOL_OP_STATUS_FAILED;
goto open_reply;
}
rele_dataset_on_error = 1;
}
if (uzfs_update_metadata_granularity(zv,
open_data.tgt_block_size) != 0) {
(void) pthread_mutex_unlock(&zinfo->zinfo_mutex);
uzfs_rele_dataset(zv);
if (rele_dataset_on_error == 1)
uzfs_rele_dataset(zv);
LOG_ERR("Failed to set granularity of metadata");
hdr.status = ZVOL_OP_STATUS_FAILED;
goto open_reply;
Expand Down Expand Up @@ -149,6 +184,7 @@ open_zvol(int fd, zvol_info_t **zinfopp)
LOG_ERR("Failed to send reply for open request");
if (hdr.status != ZVOL_OP_STATUS_OK) {
ASSERT3P(*zinfopp, ==, NULL);
reinitialize_zv_state(zv);
if (zinfo != NULL)
uzfs_zinfo_drop_refcnt(zinfo);
return (-1);
Expand Down Expand Up @@ -178,14 +214,12 @@ uzfs_zvol_io_receiver(void *arg)
(zinfo->is_io_ack_sender_created))
goto exit;
shutdown(fd, SHUT_RDWR);
(void) close(fd);
LOG_INFO("Data connection closed");
zk_thread_exit();
return;
goto thread_exit;
}
}

LOG_INFO("Data connection associated with zvol %s", zinfo->name);
LOG_INFO("Data connection associated with zvol %s fd: %d",
zinfo->name, fd);

while ((rc = uzfs_zvol_socket_read(fd, (char *)&hdr, sizeof (hdr))) ==
0) {
Expand Down Expand Up @@ -249,8 +283,13 @@ uzfs_zvol_io_receiver(void *arg)
}
(void) pthread_mutex_unlock(&zinfo->zinfo_mutex);

close(fd);
zinfo->io_ack_waiting = 0;

reinitialize_zv_state(zinfo->zv);
uzfs_zinfo_drop_refcnt(zinfo);
thread_exit:
close(fd);
LOG_INFO("Data connection closed on fd: %d", fd);
zk_thread_exit();
}

Expand Down Expand Up @@ -344,7 +383,6 @@ uzfs_zvol_io_ack_sender(void *arg)
while (1) {
if ((zinfo->state == ZVOL_INFO_STATE_OFFLINE) ||
(zinfo->conn_closed == B_TRUE)) {
zinfo->is_io_ack_sender_created = B_FALSE;
(void) pthread_mutex_unlock(
&zinfo->zinfo_mutex);
goto exit;
Expand Down Expand Up @@ -426,14 +464,17 @@ uzfs_zvol_io_ack_sender(void *arg)
exit:
zinfo->zio_cmd_in_ack = NULL;
shutdown(fd, SHUT_RDWR);
LOG_INFO("Data connection for zvol %s closed", zinfo->name);
LOG_INFO("Data connection for zvol %s closed on fd: %d",
zinfo->name, fd);

(void) pthread_mutex_lock(&zinfo->zinfo_mutex);
while (!STAILQ_EMPTY(&zinfo->complete_queue)) {
zio_cmd = STAILQ_FIRST(&zinfo->complete_queue);
STAILQ_REMOVE_HEAD(&zinfo->complete_queue, cmd_link);
zio_cmd_free(&zio_cmd);
}
zinfo->conn_closed = B_FALSE;
zinfo->is_io_ack_sender_created = B_FALSE;
(void) pthread_mutex_unlock(&zinfo->zinfo_mutex);
uzfs_zinfo_drop_refcnt(zinfo);

Expand Down
2 changes: 1 addition & 1 deletion include/gtest_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ class SocketFd {
m_fd = -1;
}
}
int fd();
int &fd();
SocketFd& operator=(int other);
void graceful_close();
bool opened();
Expand Down
4 changes: 0 additions & 4 deletions include/zrepl_mgmt.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,14 +99,10 @@ typedef struct zvol_info_s {
pthread_mutex_t zinfo_mutex;
pthread_cond_t io_ack_cond;

pthread_t io_receiver_thread;
pthread_t io_ack_sender_thread;

/* All cmds after execution will go here for ack */
STAILQ_HEAD(, zvol_io_cmd_s) complete_queue;

uint8_t io_ack_waiting;
uint8_t error_count;

/* Will be used to singal ack-sender to exit */
uint8_t conn_closed;
Expand Down
4 changes: 2 additions & 2 deletions include/zrepl_prot.h
Original file line number Diff line number Diff line change
Expand Up @@ -153,8 +153,8 @@ typedef enum zvol_rebuild_status zvol_rebuild_status_t;
* zvol status
*/
enum zvol_status {
ZVOL_STATUS_HEALTHY, /* zvol has latest data */
ZVOL_STATUS_DEGRADED /* zvol is missing some data */
ZVOL_STATUS_DEGRADED, /* zvol is missing some data */
ZVOL_STATUS_HEALTHY /* zvol has latest data */
} __attribute__((packed));

typedef enum zvol_status zvol_status_t;
Expand Down
10 changes: 7 additions & 3 deletions lib/libzpool/uzfs_mgmt.c
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ get_controller_ip(objset_t *os, char *buf, int len)

/* owns objset with name 'ds_name' in pool 'spa' */
static int
uzfs_own_dataset(const char *ds_name, zvol_state_t **z)
uzfs_dataset_zv_create(const char *ds_name, zvol_state_t **z)
{
zvol_state_t *zv = NULL;
int error = -1;
Expand Down Expand Up @@ -392,6 +392,7 @@ uzfs_own_dataset(const char *ds_name, zvol_state_t **z)

/* On boot, mark zvol status health */
uzfs_zvol_set_status(zv, ZVOL_STATUS_DEGRADED);
uzfs_zvol_set_rebuild_status(zv, ZVOL_REBUILDING_INIT);

if (spa_writeable(dmu_objset_spa(os))) {
// if (zil_replay_disable)
Expand Down Expand Up @@ -420,7 +421,7 @@ uzfs_open_dataset(spa_t *spa, const char *ds_name, zvol_state_t **z)
return (error);
(void) snprintf(name, sizeof (name), "%s/%s", spa_name(spa), ds_name);

error = uzfs_own_dataset(name, z);
error = uzfs_dataset_zv_create(name, z);
return (error);
}

Expand Down Expand Up @@ -474,7 +475,7 @@ uzfs_zvol_create_cb(const char *ds_name, void *arg)
return (0);
}

error = uzfs_own_dataset(ds_name, &zv);
error = uzfs_dataset_zv_create(ds_name, &zv);
if (error) {
/* happens normally for all non-zvol-type datasets */
return (error);
Expand Down Expand Up @@ -541,6 +542,9 @@ uzfs_rele_dataset(zvol_state_t *zv)
dnode_rele(zv->zv_dn, zv);
if (zv->zv_objset != NULL)
dmu_objset_disown(zv->zv_objset, zv);
zv->zv_zilog = NULL;
zv->zv_dn = NULL;
zv->zv_objset = NULL;
}

/* disowns, closes dataset */
Expand Down
1 change: 1 addition & 0 deletions lib/libzpool/zrepl_mgmt.c
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,7 @@ uzfs_zinfo_init(void *zv, const char *ds_name, nvlist_t *create_props)

strlcpy(zinfo->name, ds_name, MAXNAMELEN);
zinfo->zv = zv;
zinfo->state = ZVOL_INFO_STATE_ONLINE;
/* iSCSI target will overwrite this value during handshake */
zinfo->update_ionum_interval = 6000;
/* Update zvol list */
Expand Down
1 change: 1 addition & 0 deletions lib/libzrepl/data_conn.c
Original file line number Diff line number Diff line change
Expand Up @@ -623,6 +623,7 @@ remove_pending_cmds_to_ack(int fd, zvol_info_t *zinfo)
while ((zinfo->zio_cmd_in_ack != NULL) &&
(((zvol_io_cmd_t *)(zinfo->zio_cmd_in_ack))->conn == fd)) {
(void) pthread_mutex_unlock(&zinfo->zinfo_mutex);
LOG_INFO("Waiting for IO to send off on vol %s", zinfo->name);
sleep(1);
(void) pthread_mutex_lock(&zinfo->zinfo_mutex);
}
Expand Down
15 changes: 9 additions & 6 deletions lib/libzrepl/mgmt_conn.c
Original file line number Diff line number Diff line change
Expand Up @@ -66,12 +66,15 @@
*/

/* log wrappers which prefix log message by iscsi target address */
#define DBGCONN(c, fmt, ...) LOG_DEBUG("[tgt %s:%u]: " fmt, \
(c)->conn_host, (c)->conn_port, ##__VA_ARGS__)
#define LOGCONN(c, fmt, ...) LOG_INFO("[tgt %s:%u]: " fmt, \
(c)->conn_host, (c)->conn_port, ##__VA_ARGS__)
#define LOGERRCONN(c, fmt, ...) LOG_ERR("[tgt %s:%u]: " fmt, \
(c)->conn_host, (c)->conn_port, ##__VA_ARGS__)
#define DBGCONN(c, fmt, ...) LOG_DEBUG("[tgt %s:%u:%d]: " fmt, \
(c)->conn_host, (c)->conn_port, \
c->conn_fd, ##__VA_ARGS__)
#define LOGCONN(c, fmt, ...) LOG_INFO("[tgt %s:%u:%d]: " fmt, \
(c)->conn_host, (c)->conn_port, \
c->conn_fd, ##__VA_ARGS__)
#define LOGERRCONN(c, fmt, ...) LOG_ERR("[tgt %s:%u:%d]: " fmt, \
(c)->conn_host, (c)->conn_port, \
c->conn_fd, ##__VA_ARGS__)

/* Max # of events from epoll processed at once */
#define MAX_EVENTS 10
Expand Down
2 changes: 1 addition & 1 deletion tests/cbtest/gtest/gtest_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ GtestUtils::strlcpy(char *dst, const char *src, size_t len)
return (slen);
}

int
int &
GtestUtils::SocketFd::fd()
{
return m_fd;
Expand Down
Loading

0 comments on commit fd3407e

Please sign in to comment.