Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[TA3151]feat(snap_rebuild)Flush outstanding IOs before taking snapshot on rebuild clone. #123

Merged
merged 6 commits into from
Sep 27, 2018
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 81 additions & 2 deletions cmd/uzfs_test/zrepl_utest.c
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,53 @@ zrepl_utest_replica_rebuild_start(int fd, mgmt_ack_t *mgmt_ack,
return (0);
}

static int
zrepl_send_read(int sfd, uint64_t io_block_size)
{

char *buf;
int count;
zvol_io_hdr_t hdr;
struct zvol_io_rw_hdr read_hdr;

hdr.version = REPLICA_VERSION;
hdr.opcode = ZVOL_OPCODE_READ;
hdr.io_seq = 0;
hdr.len = io_block_size;
hdr.status = 0;
hdr.flags = 0;
hdr.offset = 0;

count = write(sfd, (void *)&hdr, sizeof (hdr));
if (count == -1) {
printf("Write error\n");
return (-1);
}

count = read(sfd, (void *)&hdr, sizeof (hdr));
if (count == -1) {
printf("header read failed\n");
return (-1);
}

if (hdr.opcode == ZVOL_OPCODE_READ) {
count = read(sfd, &read_hdr, sizeof (read_hdr));
if (count != sizeof (read_hdr)) {
printf("Meta data header read error\n");
return (-1);
}

buf = kmem_alloc(read_hdr.len, KM_SLEEP);
count = read(sfd, buf, read_hdr.len);
free(buf);
if (count == -1) {
printf("payload read failed\n");
return (-1);
}
}
return (0);
}

static void
reader_thread(void *arg)
{
Expand Down Expand Up @@ -1026,14 +1073,22 @@ zrepl_rebuild_test(void *arg)
goto exit;
}
/*
* Check rebuild status of of downgrade replica ds1.
* Check rebuild status of downgrade replica ds1.
*/
status_check:
count = zrepl_utest_get_replica_status(ds1, ds1_mgmt_fd, &status_ack);
if (count == -1) {
goto exit;
}

/* Send a dummy IO to ds1 so that IO_quiesce check move on */
if (status_ack.rebuild_status == ZVOL_REBUILDING_AFS) {
sleep(1);
if (zrepl_send_read(ds1_io_sfd, io_block_size) == -1)
goto exit;
goto status_check;
}

if (status_ack.state != ZVOL_STATUS_HEALTHY) {
sleep(1);
goto status_check;
Expand Down Expand Up @@ -1101,6 +1156,14 @@ zrepl_rebuild_test(void *arg)
goto exit;
}

/* Send a dummy IO to ds2 so that IO_quiesce check move on */
if (status_ack.rebuild_status == ZVOL_REBUILDING_AFS) {
sleep(1);
if (zrepl_send_read(ds2_io_sfd, io_block_size) == -1)
goto exit;
goto status_check1;
}

if (status_ack.state != ZVOL_STATUS_HEALTHY) {
sleep(1);
goto status_check1;
Expand Down Expand Up @@ -1170,6 +1233,14 @@ zrepl_rebuild_test(void *arg)
goto exit;
}

/* Send a dummy IO to ds3 so that IO_quiesce check move on */
if (status_ack.rebuild_status == ZVOL_REBUILDING_AFS) {
sleep(1);
if (zrepl_send_read(ds3_io_sfd, io_block_size) == -1)
goto exit;
goto status_check2;
}

if (status_ack.rebuild_status != ZVOL_REBUILDING_FAILED) {
sleep(1);
goto status_check2;
Expand All @@ -1195,7 +1266,7 @@ zrepl_rebuild_test(void *arg)

/*
* Start rebuild process on downgraded replica ds3
* by sharing IP and rebuild_Port info with ds3.
* by sharing IP and rebuild_port info with ds3.
*/
rc = zrepl_utest_replica_rebuild_start(ds3_mgmt_fd, mgmt_ack_ds3,
sizeof (mgmt_ack_t) * 3);
Expand All @@ -1211,6 +1282,14 @@ zrepl_rebuild_test(void *arg)
goto exit;
}

/* Send a dummy IO to ds3 so that IO_quiesce check move on */
if (status_ack.rebuild_status == ZVOL_REBUILDING_AFS) {
sleep(1);
if (zrepl_send_read(ds3_io_sfd, io_block_size) == -1)
goto exit;
goto status_check3;
}

if (status_ack.state != ZVOL_STATUS_HEALTHY) {
sleep(1);
goto status_check3;
Expand Down
2 changes: 2 additions & 0 deletions include/sys/uzfs_zvol.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ typedef struct zvol_state zvol_state_t;
#define ZVOL_IS_REBUILDING(zv) \
((zv->rebuild_info.zv_rebuild_status == ZVOL_REBUILDING_SNAP) || \
(zv->rebuild_info.zv_rebuild_status == ZVOL_REBUILDING_AFS))
#define ZVOL_IS_REBUILDING_AFS(zv) \
(zv->rebuild_info.zv_rebuild_status == ZVOL_REBUILDING_AFS)
#define ZVOL_IS_REBUILDED(zv) \
(zv->rebuild_info.zv_rebuild_status == ZVOL_REBUILDING_DONE)
#define ZVOL_IS_REBUILDING_ERRORED(zv) \
Expand Down
6 changes: 6 additions & 0 deletions include/zrepl_mgmt.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ typedef struct inject_delay_s {
int helping_replica_rebuild_step;
int pre_uzfs_write_data;
int downgraded_replica_rebuild_size_set;
int rebuid_io_quiesce_check_by_pass;
} inject_delay_t;

typedef struct inject_error_s {
Expand Down Expand Up @@ -158,6 +159,11 @@ typedef struct zvol_info_s {

/* Will be used to singal ack-sender to exit */
uint8_t conn_closed;

/* Rebuild flags to quiesce IOs */
uint8_t quiesce_requested;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we use these two as bits in 'flags' variable?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and create setter/getter macros around these variables

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok.. we can't set them as bits, as there is no lock in reading these variables

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok

uint8_t quiesce_done;

/* Pointer to mgmt connection for this zinfo */
void *mgmt_conn;

Expand Down
1 change: 1 addition & 0 deletions include/zrepl_prot.h
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ enum zvol_status {

typedef enum zvol_status zvol_status_t;

#define ZVOL_IS_HEALTHY(zv) (zv->zv_status == ZVOL_STATUS_HEALTHY)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lets us uzfs_zinfo_get_status(zinfo) getter function

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are some more macros which are also not as per guidelines.
#define ZINFO_IS_DEGRADED(zinfo) (ZVOL_IS_DEGRADED(zinfo->main_zv))
#define ZVOL_IS_DEGRADED(zv) (zv->zv_status == ZVOL_STATUS_DEGRADED)
Let me fix them with next PR. This PR seems to be little heavy weight.

struct zrepl_status_ack {
zvol_status_t state;
zvol_rebuild_status_t rebuild_status;
Expand Down
61 changes: 54 additions & 7 deletions lib/libzrepl/data_conn.c
Original file line number Diff line number Diff line change
Expand Up @@ -228,14 +228,21 @@ uzfs_submit_writes(zvol_info_t *zinfo, zvol_io_cmd_t *zio_cmd)
remain -= sizeof (*write_hdr);
if (remain < write_hdr->len)
return (-1);

rc = uzfs_write_data(zinfo->main_zv, datap, data_offset,
write_hdr->len, &metadata, is_rebuild);
if (rc != 0)
break;
/*
* Write to main_zv when volume is either
* healthy or in REBUILD_AFS state of rebuild
*/
if (ZVOL_IS_REBUILDING_AFS(zinfo->main_zv) ||
ZVOL_IS_HEALTHY(zinfo->main_zv)) {
rc = uzfs_write_data(zinfo->main_zv, datap, data_offset,
write_hdr->len, &metadata, is_rebuild);
if (rc != 0)
break;
}

/* IO to clone should be sent only when it is from app */
if (!is_rebuild && (zinfo->clone_zv != NULL)) {
if (!is_rebuild && !ZVOL_IS_HEALTHY(zinfo->main_zv) &&
(zinfo->clone_zv != NULL)) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we can remove check for clone_zv

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

rc = uzfs_write_data(zinfo->clone_zv, datap,
data_offset, write_hdr->len, &metadata,
is_rebuild);
Expand Down Expand Up @@ -334,6 +341,12 @@ uzfs_zvol_worker(void *arg)
}
}

/* App IOs should go to cloen_zv */
Copy link

@vishnuitta vishnuitta Sep 27, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we can make this as 'else' for above 'if' without NULL check for clone_zv

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

if (!rebuild_cmd_req &&
!ZVOL_IS_HEALTHY(zinfo->main_zv) &&
(zinfo->clone_zv != NULL))
read_zv = zinfo->clone_zv;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should do the same for the sync also (ZVOL_OPCODE_SYNC) ?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done sir.


rc = uzfs_read_data(read_zv,
(char *)zio_cmd->buf,
hdr->offset, hdr->len,
Expand Down Expand Up @@ -670,10 +683,31 @@ uzfs_zvol_rebuild_dw_replica(void *arg)
* one of them might have changed rebuild state
*/
if (uzfs_zvol_get_rebuild_status(zinfo->main_zv) !=
ZVOL_REBUILDING_AFS)
ZVOL_REBUILDING_AFS) {
uzfs_zvol_set_rebuild_status(zinfo->main_zv,
ZVOL_REBUILDING_AFS);
/*
* Lets ask io_receiver thread to flush
* all outstanding IOs in taskq
*/
zinfo->quiesce_done = 0;
zinfo->quiesce_requested = 1;
}
mutex_exit(&zinfo->main_zv->rebuild_mtx);
/*
* Wait for all outstanding IOs to be flushed
* to disk before making further progress
*/
#ifdef DEBUG
if (inject_error.delay.rebuid_io_quiesce_check_by_pass)
continue;
#endif
while (1) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this may need to be executed only in the thread where we had set these flags

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think after discussion this code have no side effect.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes

if (!zinfo->quiesce_done)
sleep(1);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have a doubt. If replica gets disconnected from the target during rebuilding and quiescing was requested then will this thread exit?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

current zfs takes care that - if either mgmt or data conn breaks, any rebuilding related threads also will be exited.
With current quiescing code also, I don't think there will be any issue, except that, it may be delayed to break rebuild threads.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I is fixed now.

else
break;
}
continue;
}
ASSERT((hdr.opcode == ZVOL_OPCODE_READ) &&
Expand Down Expand Up @@ -1828,6 +1862,19 @@ uzfs_zvol_io_receiver(void *arg)
/* Take refcount for uzfs_zvol_worker to work on it */
uzfs_zinfo_take_refcnt(zinfo);
zio_cmd->zinfo = zinfo;

/*
* Rebuild want to take consistent snapshot
* so it asked to flush all outstanding IOs
* before taking snapshot on rebuild_clone
*/
if (zinfo->quiesce_requested) {
ASSERT(ZVOL_IS_REBUILDING_AFS(zinfo->main_zv));
taskq_wait_outstanding(zinfo->uzfs_zvol_taskq, 0);
zinfo->quiesce_requested = 0;
zinfo->quiesce_done = 1;
}

taskq_dispatch(zinfo->uzfs_zvol_taskq, uzfs_zvol_worker,
zio_cmd, TQ_SLEEP);
}
Expand Down
2 changes: 2 additions & 0 deletions lib/libzrepl/mgmt_conn.c
Original file line number Diff line number Diff line change
Expand Up @@ -1230,10 +1230,12 @@ handle_start_rebuild_req(uzfs_mgmt_conn_t *conn, zvol_io_hdr_t *hdrp,

memset(&zinfo->main_zv->rebuild_info, 0,
sizeof (zvol_rebuild_info_t));
#if 0
if (zinfo->checkpointed_ionum >= max_ioseq)
uzfs_zvol_set_rebuild_status(zinfo->main_zv,
ZVOL_REBUILDING_AFS);
else
#endif
uzfs_zvol_set_rebuild_status(zinfo->main_zv,
ZVOL_REBUILDING_SNAP);

Expand Down
8 changes: 8 additions & 0 deletions tests/cbtest/gtest/test_uzfs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,9 @@ uzfs_mock_rebuild_scanner_rebuild_comp(void *arg)
/* Read ZVOL_OPCODE_REBUILD_STEP */
uzfs_mock_rebuild_scanner_read_rebuild_step(fd, &hdr);

#ifdef DEBUG
inject_error.delay.rebuid_io_quiesce_check_by_pass = 1;
#endif
/* Write ZVOL_OPCODE_REBUILD_ALL_SNAP_DONE */
uzfs_mock_rebuild_scanner_write_snap_done(fd, &hdr);
while (1) {
Expand Down Expand Up @@ -377,6 +380,9 @@ uzfs_mock_rebuild_scanner_rebuild_comp(void *arg)
EXPECT_EQ(hdr.opcode, ZVOL_OPCODE_REBUILD_COMPLETE);
EXPECT_EQ(hdr.status, ZVOL_OP_STATUS_OK);

#ifdef DEBUG
inject_error.delay.rebuid_io_quiesce_check_by_pass = 0;
#endif
exit:
shutdown(fd, SHUT_RDWR);
close(fd);
Expand Down Expand Up @@ -405,6 +411,7 @@ uzfs_mock_rebuild_scanner_snap_rebuild_related(void *arg)
if ((rebuild_test_case >= 8) && (rebuild_test_case <= 12)) {
#ifdef DEBUG
inject_error.delay.downgraded_replica_rebuild_size_set = 1;
inject_error.delay.rebuid_io_quiesce_check_by_pass = 1;
#endif
hdr.opcode = ZVOL_OPCODE_REBUILD_SNAP_DONE;
hdr.status = ZVOL_OP_STATUS_OK;
Expand Down Expand Up @@ -487,6 +494,7 @@ uzfs_mock_rebuild_scanner_snap_rebuild_related(void *arg)
}
#ifdef DEBUG
inject_error.delay.downgraded_replica_rebuild_size_set = 0;
inject_error.delay.rebuid_io_quiesce_check_by_pass = 0;
#endif

exit:
Expand Down