Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[TA1143] Fix deadlock issue in finish_async_tasks. #71

Merged
merged 5 commits into from
Jun 20, 2018
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 22 additions & 17 deletions cmd/zrepl/zrepl.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#define MAXEVENTS 64
#define ZAP_UPDATE_TIME_INTERVAL 2

__thread char tinfo[50] = {0};
extern unsigned long zfs_arc_max;
extern unsigned long zfs_arc_min;
extern int zfs_autoimport_disable;
Expand Down Expand Up @@ -201,12 +202,12 @@ uzfs_zvol_io_receiver(void *arg)
zvol_io_cmd_t *zio_cmd;
zvol_io_hdr_t hdr;

prctl(PR_SET_NAME, "io_receiver", 0, 0, 0);

/* First command should be OPEN */
while (zinfo == NULL) {
if (open_zvol(fd, &zinfo) != 0)
goto exit;
snprintf(tinfo, 50, "io_receiver_%lu", zinfo->zvol_guid);
prctl(PR_SET_NAME, tinfo, 0, 0, 0);
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

from the prctl man page:

The name can  be  up  to  16  bytes  long,
              including  the  terminating  null  byte.  (If the length of the string, including the terminating null byte, exceeds 16 bytes, the string is
              silently truncated.)

we cannot store the whole guid there. We can number them from 1 to n (using increasing static counter). However I would probably just revert it to the previous code and just have a name io_receiver for all such threads. By using gdb we can always figure out which pool it belongs to (printing zinfo). Unless there is some advantage if the thread name is unique.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

similar command applies to the other thread below.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reverted back this change

}

while (1) {
Expand All @@ -222,6 +223,15 @@ uzfs_zvol_io_receiver(void *arg)
goto exit;
}

if (((hdr.opcode == ZVOL_OPCODE_WRITE) ||
(hdr.opcode == ZVOL_OPCODE_READ)) && !hdr.len) {
LOG_ERR("Zero Payload size for opcode %d", hdr.opcode);
goto exit;
} else if ((hdr.opcode == ZVOL_OPCODE_SYNC) && hdr.len > 0) {
LOG_ERR("Unexpected payload for opcode %d", hdr.opcode);
goto exit;
}

zio_cmd = zio_cmd_alloc(&hdr, fd);
/* Read payload for commands which have it */
if (hdr.opcode == ZVOL_OPCODE_WRITE) {
Expand All @@ -230,11 +240,6 @@ uzfs_zvol_io_receiver(void *arg)
zio_cmd_free(&zio_cmd);
goto exit;
}
} else if (hdr.opcode != ZVOL_OPCODE_READ && hdr.len > 0) {
LOG_ERR("Unexpected payload for opcode %d",
hdr.opcode);
zio_cmd_free(&zio_cmd);
goto exit;
}

/* Take refcount for uzfs_zvol_worker to work on it */
Expand All @@ -245,8 +250,6 @@ uzfs_zvol_io_receiver(void *arg)
}
exit:
if (zinfo != NULL) {
LOG_DEBUG("uzfs_zvol_io_receiver thread for zvol %s exiting",
zinfo->name);
(void) pthread_mutex_lock(&zinfo->zinfo_mutex);
zinfo->conn_closed = B_TRUE;
/*
Expand All @@ -267,9 +270,8 @@ uzfs_zvol_io_receiver(void *arg)
}
(void) pthread_mutex_unlock(&zinfo->zinfo_mutex);
uzfs_zinfo_drop_refcnt(zinfo, B_FALSE);
} else {
LOG_DEBUG("uzfs_zvol_io_receiver thread exiting");
}
LOG_DEBUG("%s thread exiting", tinfo);
zk_thread_exit();
}

Expand Down Expand Up @@ -674,7 +676,8 @@ uzfs_zvol_io_ack_sender(void *arg)
zinfo = thrd_arg->zinfo;
kmem_free(arg, sizeof (thread_args_t));

prctl(PR_SET_NAME, "ack_sender", 0, 0, 0);
snprintf(tinfo, 50, "ack_sender_%lu", zinfo->zvol_guid);
prctl(PR_SET_NAME, tinfo, 0, 0, 0);

while (1) {
int rc = 0;
Expand Down Expand Up @@ -752,18 +755,19 @@ uzfs_zvol_io_ack_sender(void *arg)
}
}
}
zinfo->read_req_ack_cnt++;
atomic_inc_64(&zinfo->read_req_ack_cnt);
} else {
zinfo->write_req_ack_cnt++;
if (zio_cmd->hdr.opcode == ZVOL_OPCODE_WRITE)
atomic_inc_64(&zinfo->write_req_ack_cnt);
else if (zio_cmd->hdr.opcode == ZVOL_OPCODE_SYNC)
atomic_inc_64(&zinfo->sync_req_ack_cnt);
}
zinfo->zio_cmd_in_ack = NULL;
zio_cmd_free(&zio_cmd);
}
exit:
LOG_DEBUG("uzfs_zvol_io_ack_sender thread for zvol %s exiting",
zinfo->name);

zinfo->zio_cmd_in_ack = NULL;
shutdown(fd, SHUT_RDWR);
close(fd);
while (!STAILQ_EMPTY(&zinfo->complete_queue)) {
zio_cmd = STAILQ_FIRST(&zinfo->complete_queue);
Expand All @@ -774,6 +778,7 @@ uzfs_zvol_io_ack_sender(void *arg)
(void) pthread_mutex_unlock(&zinfo->zinfo_mutex);
uzfs_zinfo_drop_refcnt(zinfo, B_FALSE);

LOG_DEBUG("%s thread exiting", tinfo);
zk_thread_exit();
}

Expand Down
11 changes: 7 additions & 4 deletions include/zrepl_mgmt.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ typedef struct zvol_info_s {
int refcnt;
int is_io_ack_sender_created;
uint32_t timeout; /* iSCSI timeout val for this zvol */
uint64_t zvol_guid;
uint64_t running_ionum;
uint64_t checkpointed_ionum;
time_t checkpointed_time; /* time of the last chkpoint */
Expand Down Expand Up @@ -115,10 +116,12 @@ typedef struct zvol_info_s {
/* Perfromance counter */

/* Debug counters */
int read_req_received_cnt;
int write_req_received_cnt;
int read_req_ack_cnt;
int write_req_ack_cnt;
uint64_t read_req_received_cnt;
uint64_t write_req_received_cnt;
uint64_t sync_req_received_cnt;
uint64_t read_req_ack_cnt;
uint64_t write_req_ack_cnt;
uint64_t sync_req_ack_cnt;

/* ongoing command that is being worked on to ack to its sender */
void *zio_cmd_in_ack;
Expand Down
11 changes: 4 additions & 7 deletions lib/libzrepl/data_conn.c
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,6 @@ uzfs_zvol_worker(void *arg)
zvol_io_hdr_t *hdr;
metadata_desc_t **metadata_desc;
int rc = 0;
int write = 0;
boolean_t rebuild_cmd_req;
boolean_t read_metadata;

Expand Down Expand Up @@ -230,16 +229,19 @@ uzfs_zvol_worker(void *arg)
(char *)zio_cmd->buf,
hdr->offset, hdr->len,
metadata_desc);
atomic_inc_64(&zinfo->read_req_received_cnt);
break;

case ZVOL_OPCODE_WRITE:
write = 1;
rc = uzfs_submit_writes(zinfo, zio_cmd);
atomic_inc_64(&zinfo->write_req_received_cnt);
break;

case ZVOL_OPCODE_SYNC:
uzfs_flush_data(zinfo->zv);
atomic_inc_64(&zinfo->sync_req_received_cnt);
break;

case ZVOL_OPCODE_REBUILD_STEP_DONE:
break;
default:
Expand Down Expand Up @@ -269,11 +271,6 @@ uzfs_zvol_worker(void *arg)
goto drop_refcount;
}
STAILQ_INSERT_TAIL(&zinfo->complete_queue, zio_cmd, cmd_link);
if (write) {
zinfo->write_req_received_cnt++;
} else {
zinfo->read_req_received_cnt++;
}

if (zinfo->io_ack_waiting) {
rc = pthread_cond_signal(&zinfo->io_ack_cond);
Expand Down
5 changes: 3 additions & 2 deletions lib/libzrepl/mgmt_conn.c
Original file line number Diff line number Diff line change
Expand Up @@ -497,6 +497,8 @@ uzfs_zvol_mgmt_do_handshake(uzfs_mgmt_conn_t *conn, zvol_io_hdr_t *hdrp,
*/
mgmt_ack.zvol_guid = dsl_dataset_phys(
zv->zv_objset->os_dsl_dataset)->ds_guid;
zinfo->zvol_guid = mgmt_ack.zvol_guid;
LOG_INFO("Volume:%s has zvol_guid:%lu", zinfo->name, zinfo->zvol_guid);
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a nit but can we improve the message as "Zvol %s has guid %lu" ? this function is called also for ZVOL_OPCODE_PREPARE_FOR_REBUILD in which case guid is already assigned to zinfo->zvol_guid. So maybe if zinfo->zvol_guid != 0 ... would be more appropriate.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done


bzero(&hdr, sizeof (hdr));
hdr.version = REPLICA_VERSION;
Expand Down Expand Up @@ -593,9 +595,8 @@ finish_async_tasks(void)
rc = reply_nodata(async_task->conn, async_task->status,
async_task->hdr.opcode, async_task->hdr.io_seq);
}

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should not ignore rc here. If epoll fails then it is a serious error leading to internally inconsistent state which is difficult to recover from. we could just break if rc != 0 and change final return to return (rc). That will take care of releasing the mutex.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed deadlock issue. I will raise a PR to address mgmt_thread exit issue which need discussion among team members.

free_async_task(async_task);
if (rc != 0)
return (rc);
}
mutex_exit(&async_tasks_mtx);
return (0);
Expand Down