From d0f0edd0b9f4e4a5b3e4f7f388604d98cfc6c2af Mon Sep 17 00:00:00 2001 From: Satbir Singh <34478047+satbirchhikara@users.noreply.github.com> Date: Thu, 27 Sep 2018 22:01:51 +0530 Subject: [PATCH] [TA3151]feat(snap_rebuild)Flush outstanding IOs before taking snapshot on rebuild clone. (#123) Signed-off-by: satbir --- cmd/uzfs_test/zrepl_utest.c | 5 ++-- include/sys/uzfs_zvol.h | 5 +++- include/sys/zfs_context.h | 1 + include/zrepl_mgmt.h | 5 ++++ lib/libzpool/taskq.c | 13 ++++++++ lib/libzrepl/data_conn.c | 60 ++++++++++++++++++++++++++++++++----- lib/libzrepl/mgmt_conn.c | 1 + 7 files changed, 80 insertions(+), 10 deletions(-) diff --git a/cmd/uzfs_test/zrepl_utest.c b/cmd/uzfs_test/zrepl_utest.c index 9188595ab1fc..2a0085b12687 100644 --- a/cmd/uzfs_test/zrepl_utest.c +++ b/cmd/uzfs_test/zrepl_utest.c @@ -295,6 +295,7 @@ zrepl_utest_replica_rebuild_start(int fd, mgmt_ack_t *mgmt_ack, return (0); } + static void reader_thread(void *arg) { @@ -1026,7 +1027,7 @@ zrepl_rebuild_test(void *arg) goto exit; } /* - * Check rebuild status of of downgrade replica ds1. + * Check rebuild status of downgrade replica ds1. */ status_check: count = zrepl_utest_get_replica_status(ds1, ds1_mgmt_fd, &status_ack); @@ -1195,7 +1196,7 @@ zrepl_rebuild_test(void *arg) /* * Start rebuild process on downgraded replica ds3 - * by sharing IP and rebuild_Port info with ds3. + * by sharing IP and rebuild_port info with ds3. */ rc = zrepl_utest_replica_rebuild_start(ds3_mgmt_fd, mgmt_ack_ds3, sizeof (mgmt_ack_t) * 3); diff --git a/include/sys/uzfs_zvol.h b/include/sys/uzfs_zvol.h index 5a4d5628ad3c..ac4903032bd0 100644 --- a/include/sys/uzfs_zvol.h +++ b/include/sys/uzfs_zvol.h @@ -97,11 +97,14 @@ typedef struct zvol_state zvol_state_t; #define UZFS_IO_MREAD_FAIL 3 #define ZINFO_IS_DEGRADED(zinfo) (ZVOL_IS_DEGRADED(zinfo->main_zv)) -#define ZVOL_IS_DEGRADED(zv) (zv->zv_status == ZVOL_STATUS_DEGRADED) +#define ZVOL_IS_DEGRADED(zv) (zv->zv_status == ZVOL_STATUS_DEGRADED) +#define ZVOL_IS_HEALTHY(zv) (zv->zv_status == ZVOL_STATUS_HEALTHY) #define ZVOL_IS_REBUILDING(zv) \ ((zv->rebuild_info.zv_rebuild_status == ZVOL_REBUILDING_SNAP) || \ (zv->rebuild_info.zv_rebuild_status == ZVOL_REBUILDING_AFS)) +#define ZVOL_IS_REBUILDING_AFS(zv) \ + (zv->rebuild_info.zv_rebuild_status == ZVOL_REBUILDING_AFS) #define ZVOL_IS_REBUILDED(zv) \ (zv->rebuild_info.zv_rebuild_status == ZVOL_REBUILDING_DONE) #define ZVOL_IS_REBUILDING_ERRORED(zv) \ diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h index 8f153b73d2fc..c1c430074e14 100644 --- a/include/sys/zfs_context.h +++ b/include/sys/zfs_context.h @@ -515,6 +515,7 @@ extern int taskq_member(taskq_t *, kthread_t *); extern int taskq_cancel_id(taskq_t *, taskqid_t); extern void system_taskq_init(void); extern void system_taskq_fini(void); +extern int taskq_check_active_ios(taskq_t *tq); #define XVA_MAPSIZE 3 #define XVA_MAGIC 0x78766174 diff --git a/include/zrepl_mgmt.h b/include/zrepl_mgmt.h index b57fae66685d..6c2c6fbec540 100644 --- a/include/zrepl_mgmt.h +++ b/include/zrepl_mgmt.h @@ -158,6 +158,11 @@ typedef struct zvol_info_s { /* Will be used to singal ack-sender to exit */ uint8_t conn_closed; + + /* Rebuild flags to quiesce IOs */ + uint8_t quiesce_requested; + uint8_t quiesce_done; + /* Pointer to mgmt connection for this zinfo */ void *mgmt_conn; diff --git a/lib/libzpool/taskq.c b/lib/libzpool/taskq.c index 02a16e3f4ee6..161d2024af17 100644 --- a/lib/libzpool/taskq.c +++ b/lib/libzpool/taskq.c @@ -213,6 +213,19 @@ taskq_wait_outstanding(taskq_t *tq, taskqid_t id) taskq_wait(tq); } +int +taskq_check_active_ios(taskq_t *tq) +{ + int ret = 0; + taskq_ent_t *t; + mutex_enter(&tq->tq_lock); + if (((t = tq->tq_task.tqent_next) != &tq->tq_task) || + (tq->tq_active != 0)) + ret = 1; + mutex_exit(&tq->tq_lock); + return (ret); +} + static void taskq_thread(void *arg) { diff --git a/lib/libzrepl/data_conn.c b/lib/libzrepl/data_conn.c index b341bc0b27bc..85c5c24a6423 100644 --- a/lib/libzrepl/data_conn.c +++ b/lib/libzrepl/data_conn.c @@ -228,14 +228,20 @@ uzfs_submit_writes(zvol_info_t *zinfo, zvol_io_cmd_t *zio_cmd) remain -= sizeof (*write_hdr); if (remain < write_hdr->len) return (-1); - - rc = uzfs_write_data(zinfo->main_zv, datap, data_offset, - write_hdr->len, &metadata, is_rebuild); - if (rc != 0) - break; + /* + * Write to main_zv when volume is either + * healthy or in REBUILD_AFS state of rebuild + */ + if (ZVOL_IS_REBUILDING_AFS(zinfo->main_zv) || + ZVOL_IS_HEALTHY(zinfo->main_zv)) { + rc = uzfs_write_data(zinfo->main_zv, datap, data_offset, + write_hdr->len, &metadata, is_rebuild); + if (rc != 0) + break; + } /* IO to clone should be sent only when it is from app */ - if (!is_rebuild && (zinfo->clone_zv != NULL)) { + if (!is_rebuild && !ZVOL_IS_HEALTHY(zinfo->main_zv)) { rc = uzfs_write_data(zinfo->clone_zv, datap, data_offset, write_hdr->len, &metadata, is_rebuild); @@ -332,7 +338,9 @@ uzfs_zvol_worker(void *arg) rc = -1; break; } - } + } else if (!ZVOL_IS_HEALTHY(zinfo->main_zv)) + /* App IOs should go to clone_zv */ + read_zv = zinfo->clone_zv; rc = uzfs_read_data(read_zv, (char *)zio_cmd->buf, @@ -348,6 +356,8 @@ uzfs_zvol_worker(void *arg) case ZVOL_OPCODE_SYNC: uzfs_flush_data(zinfo->main_zv); + if (!ZVOL_IS_HEALTHY(zinfo->main_zv)) + uzfs_flush_data(zinfo->clone_zv); atomic_inc_64(&zinfo->sync_req_received_cnt); break; @@ -680,8 +690,29 @@ uzfs_zvol_rebuild_dw_replica(void *arg) ZVOL_REBUILDING_AFS); if (start_rebuild_from_clone == 0) start_rebuild_from_clone = 1; + /* + * Lets ask io_receiver thread to flush + * all outstanding IOs in taskq + */ + zinfo->quiesce_done = 0; + zinfo->quiesce_requested = 1; } mutex_exit(&zinfo->main_zv->rebuild_mtx); + /* + * Wait for all outstanding IOs to be flushed + * to disk before making further progress + */ + while (1) { + if (zinfo->quiesce_done || + !taskq_check_active_ios( + zinfo->uzfs_zvol_taskq)) { + zinfo->quiesce_done = 1; + zinfo->quiesce_requested = 0; + break; + } + else + sleep(1); + } if (start_rebuild_from_clone == 1) { start_rebuild_from_clone = 2; @@ -1859,6 +1890,19 @@ uzfs_zvol_io_receiver(void *arg) /* Take refcount for uzfs_zvol_worker to work on it */ uzfs_zinfo_take_refcnt(zinfo); zio_cmd->zinfo = zinfo; + + /* + * Rebuild want to take consistent snapshot + * so it asked to flush all outstanding IOs + * before taking snapshot on rebuild_clone + */ + if (zinfo->quiesce_requested) { + ASSERT(ZVOL_IS_REBUILDING_AFS(zinfo->main_zv)); + taskq_wait_outstanding(zinfo->uzfs_zvol_taskq, 0); + zinfo->quiesce_requested = 0; + zinfo->quiesce_done = 1; + } + taskq_dispatch(zinfo->uzfs_zvol_taskq, uzfs_zvol_worker, zio_cmd, TQ_SLEEP); } @@ -1892,6 +1936,8 @@ uzfs_zvol_io_receiver(void *arg) zinfo->is_io_receiver_created = B_FALSE; (void) uzfs_zvol_release_internal_clone(zinfo->main_zv, &zinfo->snap_zv, &zinfo->clone_zv); + zinfo->quiesce_requested = 0; + zinfo->quiesce_done = 1; uzfs_zinfo_drop_refcnt(zinfo); thread_exit: close(fd); diff --git a/lib/libzrepl/mgmt_conn.c b/lib/libzrepl/mgmt_conn.c index d8bcb8cad877..311f9990c7bc 100644 --- a/lib/libzrepl/mgmt_conn.c +++ b/lib/libzrepl/mgmt_conn.c @@ -1261,6 +1261,7 @@ handle_start_rebuild_req(uzfs_mgmt_conn_t *conn, zvol_io_hdr_t *hdrp, memset(&zinfo->main_zv->rebuild_info, 0, sizeof (zvol_rebuild_info_t)); + zinfo->quiesce_requested = zinfo->quiesce_done = 0; uzfs_zvol_set_rebuild_status(zinfo->main_zv, ZVOL_REBUILDING_SNAP);