Skip to content

Commit

Permalink
[TA2577] Updating ZAP (degraded_io_seq) at every 5 seconds if zvol is…
Browse files Browse the repository at this point in the history
… degraded (openzfs#101)

- Initializing zinfo's checkpointed io sequence number with old value stored in ZAP

Signed-off-by: mayank <[email protected]>
  • Loading branch information
mynktl authored and vishnuitta committed Aug 22, 2018
1 parent 42514bb commit d50339a
Show file tree
Hide file tree
Showing 8 changed files with 152 additions and 41 deletions.
11 changes: 7 additions & 4 deletions cmd/uzfs_test/uzfs_test_rebuilding.c
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,8 @@ rebuild_replica_thread(void *arg)

uzfs_zvol_set_rebuild_status(to_zvol, ZVOL_REBUILDING_INIT);

latest_io = uzfs_zvol_get_last_committed_io_no(from_zvol);
latest_io = uzfs_zvol_get_last_committed_io_no(from_zvol,
HEALTHY_IO_SEQNUM);
printf("io number... healthy replica:%lu degraded replica:%lu\n",
latest_io, r_info->base_io_num);
uzfs_zvol_set_rebuild_status(to_zvol, ZVOL_REBUILDING_IN_PROGRESS);
Expand Down Expand Up @@ -452,10 +453,11 @@ replica_writer_thread(void *arg)
* update ZAP entries for io_number frequently.
*/
if (!(io_num % 30)) {
uzfs_zvol_store_last_committed_io_no(zvol1, io_num);
uzfs_zvol_store_last_committed_io_no(zvol1, io_num,
HEALTHY_IO_SEQNUM);
if (replica_active)
uzfs_zvol_store_last_committed_io_no(zvol2,
io_num);
io_num, HEALTHY_IO_SEQNUM);
}

if (replica_active) {
Expand Down Expand Up @@ -489,7 +491,8 @@ replica_writer_thread(void *arg)
* and continue to update last_committed_io_number in
* degraded replica.
*/
last_io_num = uzfs_zvol_get_last_committed_io_no(zvol2);
last_io_num = uzfs_zvol_get_last_committed_io_no(zvol2,
HEALTHY_IO_SEQNUM);
rebuild_info.base_io_num = last_io_num;
} else if (now > replica_rebuild_start_time &&
!rebuilding_started) {
Expand Down
21 changes: 19 additions & 2 deletions include/zrepl_mgmt.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,13 @@ typedef struct zvol_info_s {
uint64_t zvol_guid;
uint64_t running_ionum;
uint64_t checkpointed_ionum;
uint64_t degraded_checkpointed_ionum;
time_t checkpointed_time; /* time of the last chkpoint */
/*
* time of the last stored checkedpointed io sequence number
* when ZVOL was in degraded state
*/
time_t degraded_checkpointed_time;
uint32_t update_ionum_interval; /* how often to update io seq */
taskq_t *uzfs_zvol_taskq; /* Taskq for minor management */

Expand Down Expand Up @@ -180,9 +186,9 @@ extern int uzfs_zinfo_init(void *zv, const char *ds_name,
extern zvol_info_t *uzfs_zinfo_lookup(const char *name);
extern void uzfs_zinfo_replay_zil_all(void);
extern int uzfs_zinfo_destroy(const char *ds_name, spa_t *spa);
uint64_t uzfs_zvol_get_last_committed_io_no(zvol_state_t *zv);
uint64_t uzfs_zvol_get_last_committed_io_no(zvol_state_t *zv, char *key);
void uzfs_zvol_store_last_committed_io_no(zvol_state_t *zv,
uint64_t io_seq);
uint64_t io_seq, char *key);
extern int set_socket_keepalive(int sfd);
extern int create_and_bind(const char *port, int bind_needed,
boolean_t nonblocking);
Expand All @@ -208,6 +214,17 @@ uzfs_zinfo_take_refcnt(zvol_info_t *zinfo)
atomic_inc_64(&zinfo->refcnt);
}

/*
* ZAP key for io sequence number
*/
#define HEALTHY_IO_SEQNUM "io_seq"
#define DEGRADED_IO_SEQNUM "degraded_io_seq"

/*
* update interval for io_sequence number in degraded mode
*/
#define DEGRADED_IO_UPDATE_INTERVAL 5

#ifdef __cplusplus
}
#endif
Expand Down
6 changes: 6 additions & 0 deletions include/zrepl_prot.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ struct zvol_io_hdr {
*/
uint64_t len;
uint64_t checkpointed_io_seq;
uint64_t checkpointed_degraded_io_seq;
} __attribute__((packed));

typedef struct zvol_io_hdr zvol_io_hdr_t;
Expand Down Expand Up @@ -196,6 +197,11 @@ struct zvol_io_rw_hdr {
uint64_t len;
} __attribute__((packed));

#define SLIST_FOREACH_SAFE(var, head, field, tvar) \
for ((var) = SLIST_FIRST((head)); \
(var) && ((tvar) = SLIST_NEXT((var), field), 1); \
(var) = (tvar))

#ifdef __cplusplus
}
#endif
Expand Down
14 changes: 5 additions & 9 deletions lib/libzpool/zrepl_mgmt.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,6 @@ void (*zinfo_destroy_hook)(zvol_info_t *);

struct zvol_list zvol_list;

#define SLIST_FOREACH_SAFE(var, head, field, tvar) \
for ((var) = SLIST_FIRST((head)); \
(var) && ((tvar) = SLIST_NEXT((var), field), 1); \
(var) = (tvar))

static int uzfs_zinfo_free(zvol_info_t *zinfo);

enum zrepl_log_level zrepl_log_level;
Expand Down Expand Up @@ -392,10 +387,10 @@ uzfs_zinfo_free(zvol_info_t *zinfo)
}

uint64_t
uzfs_zvol_get_last_committed_io_no(zvol_state_t *zv)
uzfs_zvol_get_last_committed_io_no(zvol_state_t *zv, char *key)
{
uzfs_zap_kv_t zap;
zap.key = "io_seq";
zap.key = key;
zap.value = 0;
zap.size = sizeof (uint64_t);

Expand All @@ -404,11 +399,12 @@ uzfs_zvol_get_last_committed_io_no(zvol_state_t *zv)
}

void
uzfs_zvol_store_last_committed_io_no(zvol_state_t *zv, uint64_t io_seq)
uzfs_zvol_store_last_committed_io_no(zvol_state_t *zv, uint64_t io_seq,
char *key)
{
uzfs_zap_kv_t *kv_array[0];
uzfs_zap_kv_t zap;
zap.key = "io_seq";
zap.key = key;
zap.value = io_seq;
zap.size = sizeof (io_seq);

Expand Down
83 changes: 75 additions & 8 deletions lib/libzrepl/data_conn.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,13 @@ uint16_t rebuild_io_server_port = REBUILD_IO_SERVER_PORT;
kcondvar_t timer_cv;
kmutex_t timer_mtx;

typedef struct singly_node_list_s {
void *node;
SLIST_ENTRY(singly_node_list_s) node_next;
} singly_node_list_t;

SLIST_HEAD(singly_node_list, singly_node_list_s);

/*
* Allocate zio command along with
* buffer needed for IO completion.
Expand Down Expand Up @@ -434,7 +441,8 @@ uzfs_zvol_rebuild_dw_replica(void *arg)
rc = 0;

/* Set state in-progess state now */
checkpointed_ionum = uzfs_zvol_get_last_committed_io_no(zinfo->zv);
checkpointed_ionum = uzfs_zvol_get_last_committed_io_no(zinfo->zv,
HEALTHY_IO_SEQNUM);
zvol_state = zinfo->zv;
bzero(&hdr, sizeof (hdr));
hdr.status = ZVOL_OP_STATUS_OK;
Expand Down Expand Up @@ -591,19 +599,38 @@ uzfs_zvol_timer_thread(void)
zvol_info_t *zinfo;
time_t min_interval;
time_t now, next_check;
struct singly_node_list zvol_node_list, free_node_list;
singly_node_list_t *n_zinfo, *t_zinfo;

init_zrepl();
prctl(PR_SET_NAME, "zvol_timer", 0, 0, 0);
SLIST_INIT(&zvol_node_list);
SLIST_INIT(&free_node_list);

mutex_enter(&timer_mtx);
while (1) {
min_interval = 600; // we check intervals at least every 10mins
min_interval = 5; // we check intervals at least every 5 sec

mutex_enter(&zvol_list_mutex);
now = time(NULL);
SLIST_FOREACH(zinfo, &zvol_list, zinfo_next) {
if (!SLIST_EMPTY(&free_node_list)) {
n_zinfo = SLIST_FIRST(&free_node_list);
SLIST_REMOVE_HEAD(&free_node_list, node_next);
} else {
n_zinfo = kmem_alloc(sizeof (*n_zinfo),
KM_SLEEP);
}
uzfs_zinfo_take_refcnt(zinfo);
n_zinfo->node = (void *) zinfo;
SLIST_INSERT_HEAD(&zvol_node_list, n_zinfo, node_next);
}
mutex_exit(&zvol_list_mutex);

next_check = now = time(NULL);
SLIST_FOREACH(n_zinfo, &zvol_node_list, node_next) {
zinfo = (zvol_info_t *)n_zinfo->node;
if (uzfs_zvol_get_status(zinfo->zv) ==
ZVOL_STATUS_HEALTHY) {
ZVOL_STATUS_HEALTHY && zinfo->zv->zv_objset) {
next_check = zinfo->checkpointed_time +
zinfo->update_ionum_interval;
if (next_check <= now) {
Expand All @@ -613,25 +640,65 @@ uzfs_zvol_timer_thread(void)
zinfo->name);
uzfs_zvol_store_last_committed_io_no(
zinfo->zv,
zinfo->checkpointed_ionum);
zinfo->checkpointed_ionum,
HEALTHY_IO_SEQNUM);
zinfo->checkpointed_ionum =
zinfo->running_ionum;
zinfo->checkpointed_time = now;
next_check = now +
zinfo->update_ionum_interval;
}
if (min_interval > next_check - now)
min_interval = next_check - now;
} else if (uzfs_zvol_get_status(zinfo->zv) ==
ZVOL_STATUS_DEGRADED && zinfo->zv->zv_objset) {
next_check = zinfo->degraded_checkpointed_time
+ DEGRADED_IO_UPDATE_INTERVAL;
if (next_check <= now &&
zinfo->degraded_checkpointed_ionum !=
zinfo->running_ionum) {
zinfo->degraded_checkpointed_ionum =
zinfo->running_ionum;
LOG_DEBUG("Checkpointing ionum "
"%lu on %s for degraded mode",
zinfo->degraded_checkpointed_ionum,
zinfo->name);
uzfs_zvol_store_last_committed_io_no(
zinfo->zv,
zinfo->degraded_checkpointed_ionum,
DEGRADED_IO_SEQNUM);
zinfo->degraded_checkpointed_time =
now;
next_check = now +
DEGRADED_IO_UPDATE_INTERVAL;
}
}

if (next_check > now &&
(min_interval > next_check - now))
min_interval = next_check - now;
}
mutex_exit(&zvol_list_mutex);

(void) cv_timedwait(&timer_cv, &timer_mtx, ddi_get_lbolt() +
SEC_TO_TICK(min_interval));

SLIST_FOREACH_SAFE(n_zinfo, &zvol_node_list,
node_next, t_zinfo) {
SLIST_REMOVE(&zvol_node_list, n_zinfo,
singly_node_list_s, node_next);
zinfo = (zvol_info_t *)n_zinfo->node;
uzfs_zinfo_drop_refcnt(zinfo);
SLIST_INSERT_HEAD(&free_node_list, n_zinfo, node_next);
}
}

mutex_exit(&timer_mtx);
mutex_destroy(&timer_mtx);
cv_destroy(&timer_cv);

SLIST_FOREACH_SAFE(n_zinfo, &free_node_list, node_next, t_zinfo) {
SLIST_REMOVE(&free_node_list, n_zinfo, singly_node_list_s,
node_next);
kmem_free(n_zinfo, sizeof (*n_zinfo));
}
}

/*
Expand Down
17 changes: 14 additions & 3 deletions lib/libzrepl/mgmt_conn.c
Original file line number Diff line number Diff line change
Expand Up @@ -501,7 +501,17 @@ uzfs_zvol_mgmt_do_handshake(uzfs_mgmt_conn_t *conn, zvol_io_hdr_t *hdrp,
hdr.io_seq = hdrp->io_seq;
hdr.len = sizeof (mgmt_ack);
hdr.status = ZVOL_OP_STATUS_OK;
hdr.checkpointed_io_seq = uzfs_zvol_get_last_committed_io_no(zv);

zinfo->checkpointed_ionum = uzfs_zvol_get_last_committed_io_no(zv,
HEALTHY_IO_SEQNUM);
zinfo->degraded_checkpointed_ionum =
uzfs_zvol_get_last_committed_io_no(zv, DEGRADED_IO_SEQNUM);
zinfo->running_ionum = zinfo->degraded_checkpointed_ionum;
LOG_INFO("IO sequence number:%lu Degraded IO sequence number:%lu\n",
zinfo->checkpointed_ionum, zinfo->degraded_checkpointed_ionum);

hdr.checkpointed_io_seq = zinfo->checkpointed_ionum;
hdr.checkpointed_degraded_io_seq = zinfo->degraded_checkpointed_ionum;

return (reply_data(conn, &hdr, &mgmt_ack, sizeof (mgmt_ack)));
}
Expand Down Expand Up @@ -617,7 +627,7 @@ uzfs_zvol_create_snapshot_update_zap(zvol_info_t *zinfo,
mutex_enter(&zvol_list_mutex);

uzfs_zvol_store_last_committed_io_no(zinfo->zv,
snapshot_io_num -1);
snapshot_io_num -1, HEALTHY_IO_SEQNUM);
zinfo->checkpointed_ionum = snapshot_io_num -1;
zinfo->checkpointed_time = time(NULL);

Expand Down Expand Up @@ -655,7 +665,8 @@ uzfs_zvol_get_snap_dataset_with_io(zvol_info_t *zinfo,
return (ret);
}

(*snapshot_io_num) = uzfs_zvol_get_last_committed_io_no(*snap_zv);
(*snapshot_io_num) = uzfs_zvol_get_last_committed_io_no(*snap_zv,
HEALTHY_IO_SEQNUM);
return (ret);
}

Expand Down
2 changes: 1 addition & 1 deletion tests/cbtest/gtest/test_uzfs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -857,7 +857,7 @@ uzfs_mock_zvol_rebuild_dw_replica(void *arg)

send_hdr_again:
/* Set state in-progess state now */
checkpointed_ionum = uzfs_zvol_get_last_committed_io_no(zinfo->zv);
checkpointed_ionum = uzfs_zvol_get_last_committed_io_no(zinfo->zv, (char *)HEALTHY_IO_SEQNUM);
zvol_state = zinfo->zv;
bzero(&hdr, sizeof (hdr));
hdr.status = ZVOL_OP_STATUS_OK;
Expand Down
Loading

0 comments on commit d50339a

Please sign in to comment.