From 9aa90df9610dbbfae9ff68bc2cab9d602450086c Mon Sep 17 00:00:00 2001 From: Satbir Singh <34478047+satbirchhikara@users.noreply.github.com> Date: Wed, 20 Jun 2018 17:35:33 +0530 Subject: [PATCH] [TA1143] Fix deadlock issue in finish_async_tasks. (#71) * Fixed hdr.len check in io_receiver thread Signed-off-by: satbir --- cmd/zrepl/zrepl.c | 22 +++++++++++++++------- include/zrepl_mgmt.h | 11 +++++++---- lib/libzrepl/data_conn.c | 11 ++++------- lib/libzrepl/mgmt_conn.c | 9 ++++++--- 4 files changed, 32 insertions(+), 21 deletions(-) diff --git a/cmd/zrepl/zrepl.c b/cmd/zrepl/zrepl.c index 7ad60c098c45..db575c59385d 100644 --- a/cmd/zrepl/zrepl.c +++ b/cmd/zrepl/zrepl.c @@ -222,6 +222,15 @@ uzfs_zvol_io_receiver(void *arg) goto exit; } + if (((hdr.opcode == ZVOL_OPCODE_WRITE) || + (hdr.opcode == ZVOL_OPCODE_READ)) && !hdr.len) { + LOG_ERR("Zero Payload size for opcode %d", hdr.opcode); + goto exit; + } else if ((hdr.opcode == ZVOL_OPCODE_SYNC) && hdr.len > 0) { + LOG_ERR("Unexpected payload for opcode %d", hdr.opcode); + goto exit; + } + zio_cmd = zio_cmd_alloc(&hdr, fd); /* Read payload for commands which have it */ if (hdr.opcode == ZVOL_OPCODE_WRITE) { @@ -230,11 +239,6 @@ uzfs_zvol_io_receiver(void *arg) zio_cmd_free(&zio_cmd); goto exit; } - } else if (hdr.opcode != ZVOL_OPCODE_READ && hdr.len > 0) { - LOG_ERR("Unexpected payload for opcode %d", - hdr.opcode); - zio_cmd_free(&zio_cmd); - goto exit; } /* Take refcount for uzfs_zvol_worker to work on it */ @@ -752,9 +756,12 @@ uzfs_zvol_io_ack_sender(void *arg) } } } - zinfo->read_req_ack_cnt++; + atomic_inc_64(&zinfo->read_req_ack_cnt); } else { - zinfo->write_req_ack_cnt++; + if (zio_cmd->hdr.opcode == ZVOL_OPCODE_WRITE) + atomic_inc_64(&zinfo->write_req_ack_cnt); + else if (zio_cmd->hdr.opcode == ZVOL_OPCODE_SYNC) + atomic_inc_64(&zinfo->sync_req_ack_cnt); } zinfo->zio_cmd_in_ack = NULL; zio_cmd_free(&zio_cmd); @@ -764,6 +771,7 @@ uzfs_zvol_io_ack_sender(void *arg) zinfo->name); zinfo->zio_cmd_in_ack = NULL; + shutdown(fd, SHUT_RDWR); close(fd); while (!STAILQ_EMPTY(&zinfo->complete_queue)) { zio_cmd = STAILQ_FIRST(&zinfo->complete_queue); diff --git a/include/zrepl_mgmt.h b/include/zrepl_mgmt.h index 99a0fe56733e..bf4b63ffb8be 100644 --- a/include/zrepl_mgmt.h +++ b/include/zrepl_mgmt.h @@ -84,6 +84,7 @@ typedef struct zvol_info_s { int refcnt; int is_io_ack_sender_created; uint32_t timeout; /* iSCSI timeout val for this zvol */ + uint64_t zvol_guid; uint64_t running_ionum; uint64_t checkpointed_ionum; time_t checkpointed_time; /* time of the last chkpoint */ @@ -115,10 +116,12 @@ typedef struct zvol_info_s { /* Perfromance counter */ /* Debug counters */ - int read_req_received_cnt; - int write_req_received_cnt; - int read_req_ack_cnt; - int write_req_ack_cnt; + uint64_t read_req_received_cnt; + uint64_t write_req_received_cnt; + uint64_t sync_req_received_cnt; + uint64_t read_req_ack_cnt; + uint64_t write_req_ack_cnt; + uint64_t sync_req_ack_cnt; /* ongoing command that is being worked on to ack to its sender */ void *zio_cmd_in_ack; diff --git a/lib/libzrepl/data_conn.c b/lib/libzrepl/data_conn.c index f05c88c6ca6e..eb231f89a42b 100644 --- a/lib/libzrepl/data_conn.c +++ b/lib/libzrepl/data_conn.c @@ -201,7 +201,6 @@ uzfs_zvol_worker(void *arg) zvol_io_hdr_t *hdr; metadata_desc_t **metadata_desc; int rc = 0; - int write = 0; boolean_t rebuild_cmd_req; boolean_t read_metadata; @@ -230,16 +229,19 @@ uzfs_zvol_worker(void *arg) (char *)zio_cmd->buf, hdr->offset, hdr->len, metadata_desc); + atomic_inc_64(&zinfo->read_req_received_cnt); break; case ZVOL_OPCODE_WRITE: - write = 1; rc = uzfs_submit_writes(zinfo, zio_cmd); + atomic_inc_64(&zinfo->write_req_received_cnt); break; case ZVOL_OPCODE_SYNC: uzfs_flush_data(zinfo->zv); + atomic_inc_64(&zinfo->sync_req_received_cnt); break; + case ZVOL_OPCODE_REBUILD_STEP_DONE: break; default: @@ -269,11 +271,6 @@ uzfs_zvol_worker(void *arg) goto drop_refcount; } STAILQ_INSERT_TAIL(&zinfo->complete_queue, zio_cmd, cmd_link); - if (write) { - zinfo->write_req_received_cnt++; - } else { - zinfo->read_req_received_cnt++; - } if (zinfo->io_ack_waiting) { rc = pthread_cond_signal(&zinfo->io_ack_cond); diff --git a/lib/libzrepl/mgmt_conn.c b/lib/libzrepl/mgmt_conn.c index 13e9b84f3351..2a0e82d68d1d 100644 --- a/lib/libzrepl/mgmt_conn.c +++ b/lib/libzrepl/mgmt_conn.c @@ -499,6 +499,9 @@ uzfs_zvol_mgmt_do_handshake(uzfs_mgmt_conn_t *conn, zvol_io_hdr_t *hdrp, */ mgmt_ack.zvol_guid = dsl_dataset_phys( zv->zv_objset->os_dsl_dataset)->ds_guid; + if (zinfo->zvol_guid == 0) + zinfo->zvol_guid = mgmt_ack.zvol_guid; + LOG_INFO("Volume:%s has zvol_guid:%lu", zinfo->name, zinfo->zvol_guid); bzero(&hdr, sizeof (hdr)); hdr.version = REPLICA_VERSION; @@ -596,11 +599,11 @@ finish_async_tasks(void) async_task->hdr.opcode, async_task->hdr.io_seq); } free_async_task(async_task); - if (rc != 0) - return (rc); + if (rc == -1) + break; } mutex_exit(&async_tasks_mtx); - return (0); + return (rc); } /*