Skip to content

Commit

Permalink
US1394 During replica registration/handshake iSCSI controll need to s…
Browse files Browse the repository at this point in the history
* US1394 During replica registration/handshake iSCSI controller need to share Block size and "timeout value at which target assumes replica as non-responding"
* Replaced ZVOL_OPCODE_HANDSHAKE message with ZVOL_OPCODE_OPEN in data connection
* Lock related changes in zinfo (data structure that is used to maintain network related information, and its threads info)

Signed-off-by: Jan Kryl <[email protected]>
  • Loading branch information
Jan Kryl authored and vishnuitta committed May 19, 2018
1 parent 19a1f73 commit b7b28b8
Show file tree
Hide file tree
Showing 29 changed files with 746 additions and 703 deletions.
1 change: 1 addition & 0 deletions cmd/uzfs_test/uzfs_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -663,6 +663,7 @@ open_ds(spa_t *spa, char *ds, zvol_state_t **zv)
exit(1);
}
uzfs_hold_dataset(*zv);
uzfs_update_metadata_granularity(*zv, 512);
}

void
Expand Down
4 changes: 2 additions & 2 deletions cmd/uzfs_test/uzfs_test_rebuilding.c
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ rebuild_replica_thread(void *arg)

uzfs_zvol_set_rebuild_status(to_zvol, ZVOL_REBUILDING_INIT);

uzfs_zvol_get_last_committed_io_no(from_zvol, &latest_io);
latest_io = uzfs_zvol_get_last_committed_io_no(from_zvol);
printf("io number... healthy replica:%lu degraded replica:%lu\n",
latest_io, r_info->base_io_num);
uzfs_zvol_set_rebuild_status(to_zvol, ZVOL_REBUILDING_IN_PROGRESS);
Expand Down Expand Up @@ -489,7 +489,7 @@ replica_writer_thread(void *arg)
* and continue to update last_committed_io_number in
* degraded replica.
*/
uzfs_zvol_get_last_committed_io_no(zvol2, &last_io_num);
last_io_num = uzfs_zvol_get_last_committed_io_no(zvol2);
rebuild_info.base_io_num = last_io_num;
} else if (now > replica_rebuild_start_time &&
!rebuilding_started) {
Expand Down
54 changes: 0 additions & 54 deletions cmd/uzfs_test/uzfs_zvol_zap.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
#include <uzfs_mgmt.h>
#include <uzfs_zap.h>
#include <zrepl_mgmt.h>
#include <uzfs.h>
#include <uzfs_test.h>

/*
Expand Down Expand Up @@ -120,9 +119,6 @@ uzfs_zvol_zap_operation(void *arg)
zvol_state_t *zvol;
uzfs_zap_kv_t **kv_array;
int zap_count;
uint64_t txg1, txg2, txg3, txg4;
struct timespec ts;
int err1, err2;

open_pool(&spa);
open_ds(spa, ds, &zvol);
Expand Down Expand Up @@ -174,56 +170,6 @@ uzfs_zvol_zap_operation(void *arg)
now = gethrtime();
end = now + (hrtime_t)(total_time_in_sec * (hrtime_t)(NANOSEC));

/*
* uzfs_update_txg_zap_thread thread updates LAST_ITER_TXG
* at interval of txg_update_interval_time (10 minutes).
* For testing purpose, we are changing txg_update_interval_time
* to 5 seconds
*/
txg_update_interval_time = 5 * hz;

mutex_enter(&(uzfs_spa(spa)->mtx));
cv_signal(&(uzfs_spa(spa)->cv));
mutex_exit(&(uzfs_spa(spa)->mtx));

ts.tv_nsec = 0;
ts.tv_sec = 2 * (txg_update_interval_time / hz);

while (1) {
err1 = uzfs_read_last_iter_txg(spa, &txg1);
if ((err1 != 0) && (err1 != 2)) {
printf("error in reading last iter txg..\n");
exit(1);
}

txg2 = spa_last_synced_txg(spa);

/*
* do txg_wait_synced during each iteration to force
* txg to increase well from last synced txg
*/
txg_wait_synced(spa_get_dsl(spa), 0);
nanosleep(&ts, NULL);

err2 = uzfs_read_last_iter_txg(spa, &txg3);
if ((err2 != 0) && (err2 != 2)) {
printf("error in reading last iter txg..\n");
exit(1);
}

txg4 = spa_last_synced_txg(spa);

if (txg2 != txg4)
if ((txg1 == txg3) && ((err1 == 0) || (err2 == 0))) {
printf("doesn't seem to be updating txg..\n");
exit(1);
}

now = gethrtime();
if (now > end)
break;
}

uzfs_close_dataset(zvol);
uzfs_close_pool(spa);
}
23 changes: 19 additions & 4 deletions cmd/uzfs_test/zrepl_utest.c
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ zrepl_utest_mgmt_hs_io_conn(char *volname, int mgmt_fd)
int io_fd = 0;
mgmt_ack_t *mgmt_ack;
zvol_io_hdr_t hdr;
zvol_op_open_data_t open_data;
struct sockaddr_in replica_io_addr;

bzero(&hdr, sizeof (hdr));
Expand Down Expand Up @@ -139,15 +140,29 @@ zrepl_utest_mgmt_hs_io_conn(char *volname, int mgmt_fd)
return (-1);
}

hdr.opcode = ZVOL_OPCODE_OPEN;
hdr.len = sizeof (open_data);
open_data.tgt_block_size = 4096;
open_data.timeout = 120;
strncpy(open_data.volname, volname, sizeof (open_data.volname));

rc = write(io_fd, (void *)&hdr, sizeof (zvol_io_hdr_t));
if (rc == -1) {
printf("During handshake, Write error\n");
printf("During zvol open, Write error\n");
return (rc);
}

rc = write(io_fd, volname, hdr.len);
rc = write(io_fd, &open_data, hdr.len);
if (rc == -1) {
printf("During volname send, Write error\n");
printf("During zvol open, Write error\n");
return (rc);
}
rc = read(io_fd, &hdr, sizeof (hdr));
if (rc == -1) {
printf("During open reply read, Read error\n");
return (rc);
}
if (hdr.status != ZVOL_OP_STATUS_OK) {
printf("Failed to open zvol for IO\n");
return (rc);
}
printf("Data-IO connection to volume:%s passed\n", volname);
Expand Down
101 changes: 93 additions & 8 deletions cmd/zrepl/data_conn.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
* Copyright (c) 2018 Cloudbyte. All rights reserved.
*/

#include <sys/prctl.h>
#include <netdb.h>
#include <sys/types.h>
#include <sys/socket.h>
Expand All @@ -35,6 +36,9 @@

#define ZVOL_REBUILD_STEP_SIZE (128 * 1024 * 1024) // 128MB

static kcondvar_t timer_cv;
static kmutex_t timer_mtx;

/*
* Allocate zio command along with
* buffer needed for IO completion.
Expand All @@ -48,7 +52,7 @@ zio_cmd_alloc(zvol_io_hdr_t *hdr, int fd)
bcopy(hdr, &zio_cmd->hdr, sizeof (zio_cmd->hdr));
if ((hdr->opcode == ZVOL_OPCODE_READ) ||
(hdr->opcode == ZVOL_OPCODE_WRITE) ||
(hdr->opcode == ZVOL_OPCODE_HANDSHAKE)) {
(hdr->opcode == ZVOL_OPCODE_OPEN)) {
zio_cmd->buf = kmem_zalloc(sizeof (char) * hdr->len, KM_SLEEP);
}

Expand All @@ -67,7 +71,7 @@ zio_cmd_free(zvol_io_cmd_t **cmd)
switch (opcode) {
case ZVOL_OPCODE_READ:
case ZVOL_OPCODE_WRITE:
case ZVOL_OPCODE_HANDSHAKE:
case ZVOL_OPCODE_OPEN:
if (zio_cmd->buf != NULL) {
kmem_free(zio_cmd->buf, zio_cmd->hdr.len);
}
Expand Down Expand Up @@ -136,6 +140,7 @@ uzfs_submit_writes(zvol_info_t *zinfo, zvol_io_cmd_t *zio_cmd)
size_t data_offset = hdr->offset;
size_t remain = hdr->len;
int rc = 0;
uint64_t running_ionum;
is_rebuild = hdr->flags & ZVOL_OP_FLAG_REBUILD;

while (remain > 0) {
Expand All @@ -154,6 +159,13 @@ uzfs_submit_writes(zvol_info_t *zinfo, zvol_io_cmd_t *zio_cmd)
write_hdr->len, &metadata, is_rebuild);
if (rc != 0)
break;
/* Update the highest ionum used for checkpointing */
running_ionum = zinfo->running_ionum;
while (running_ionum < write_hdr->io_num) {
atomic_cas_64(&zinfo->running_ionum, running_ionum,
write_hdr->io_num);
running_ionum = zinfo->running_ionum;
}

datap += write_hdr->len;
remain -= write_hdr->len;
Expand Down Expand Up @@ -237,7 +249,12 @@ uzfs_zvol_worker(void *arg)
goto drop_refcount;
}

(void) pthread_mutex_lock(&zinfo->complete_queue_mutex);
(void) pthread_mutex_lock(&zinfo->zinfo_mutex);
if (!zinfo->is_io_ack_sender_created) {
(void) pthread_mutex_unlock(&zinfo->zinfo_mutex);
zio_cmd_free(&zio_cmd);
goto drop_refcount;
}
STAILQ_INSERT_TAIL(&zinfo->complete_queue, zio_cmd, cmd_link);
if (write) {
zinfo->write_req_received_cnt++;
Expand All @@ -248,8 +265,7 @@ uzfs_zvol_worker(void *arg)
if (zinfo->io_ack_waiting) {
rc = pthread_cond_signal(&zinfo->io_ack_cond);
}

(void) pthread_mutex_unlock(&zinfo->complete_queue_mutex);
(void) pthread_mutex_unlock(&zinfo->zinfo_mutex);

drop_refcount:
uzfs_zinfo_drop_refcnt(zinfo, B_FALSE);
Expand All @@ -264,7 +280,7 @@ uzfs_zvol_rebuild_dw_replica(void *arg)
int rc = 0;
int sfd = -1;
uint64_t offset = 0;
uint64_t checkpointed_io_seq;
uint64_t checkpointed_ionum;
zvol_info_t *zinfo = NULL;
zvol_state_t *zvol_state;
zvol_io_cmd_t *zio_cmd = NULL;
Expand All @@ -285,7 +301,7 @@ uzfs_zvol_rebuild_dw_replica(void *arg)
}

/* Set state in-progess state now */
uzfs_zvol_get_last_committed_io_no(zinfo->zv, &checkpointed_io_seq);
checkpointed_ionum = uzfs_zvol_get_last_committed_io_no(zinfo->zv);
zvol_state = zinfo->zv;
bzero(&hdr, sizeof (hdr));
hdr.status = ZVOL_OP_STATUS_OK;
Expand Down Expand Up @@ -327,6 +343,7 @@ uzfs_zvol_rebuild_dw_replica(void *arg)
uzfs_zvol_set_rebuild_status(zinfo->zv,
ZVOL_REBUILDING_DONE);
uzfs_zvol_set_status(zinfo->zv, ZVOL_STATUS_HEALTHY);
uzfs_update_ionum_interval(zinfo, 0);
}
ZREPL_ERRLOG("Rebuilding on Replica:%s completed\n",
zinfo->name);
Expand All @@ -336,7 +353,7 @@ uzfs_zvol_rebuild_dw_replica(void *arg)
hdr.status = ZVOL_OP_STATUS_OK;
hdr.version = REPLICA_VERSION;
hdr.opcode = ZVOL_OPCODE_REBUILD_STEP;
hdr.checkpointed_io_seq = checkpointed_io_seq;
hdr.checkpointed_io_seq = checkpointed_ionum;
hdr.offset = offset;
hdr.len = ZVOL_REBUILD_STEP_SIZE;
rc = uzfs_zvol_socket_write(sfd, (char *)&hdr, sizeof (hdr));
Expand Down Expand Up @@ -403,3 +420,71 @@ uzfs_zvol_rebuild_dw_replica(void *arg)

zk_thread_exit();
}

void
uzfs_zvol_timer_thread(void)
{
zvol_info_t *zinfo;
time_t min_interval;
time_t now, next_check;

mutex_init(&timer_mtx, NULL, MUTEX_DEFAULT, NULL);
cv_init(&timer_cv, NULL, CV_DEFAULT, NULL);
prctl(PR_SET_NAME, "zvol_timer", 0, 0, 0);

mutex_enter(&timer_mtx);
while (1) {
min_interval = 600; // we check intervals at least every 10mins
mutex_enter(&zvol_list_mutex);
now = time(NULL);
SLIST_FOREACH(zinfo, &zvol_list, zinfo_next) {
if (uzfs_zvol_get_status(zinfo->zv) ==
ZVOL_STATUS_HEALTHY) {
next_check = zinfo->checkpointed_time +
zinfo->update_ionum_interval;
if (next_check <= now) {
fprintf(stderr, "Checkpointing ionum "
"%lu on %s\n",
zinfo->checkpointed_ionum,
zinfo->name);
uzfs_zvol_store_last_committed_io_no(
zinfo->zv,
zinfo->checkpointed_ionum);
zinfo->checkpointed_ionum =
zinfo->running_ionum;
zinfo->checkpointed_time = now;
next_check = now +
zinfo->update_ionum_interval;
}
if (min_interval > next_check - now)
min_interval = next_check - now;
}
}
mutex_exit(&zvol_list_mutex);

(void) cv_timedwait(&timer_cv, &timer_mtx, ddi_get_lbolt() +
SEC_TO_TICK(min_interval));
}
mutex_exit(&timer_mtx);
mutex_destroy(&timer_mtx);
cv_destroy(&timer_cv);
}

/*
* Update interval and wake up timer thread so that it can adjust to the new
* value. If timeout is zero, then we just wake up the timer thread (used in
* case when zvol state is changed to make timer thread aware of it).
*/
void
uzfs_update_ionum_interval(zvol_info_t *zinfo, uint32_t timeout)
{
mutex_enter(&timer_mtx);
if (zinfo->update_ionum_interval == timeout) {
mutex_exit(&timer_mtx);
return;
}
if (timeout != 0)
zinfo->update_ionum_interval = timeout;
cv_signal(&timer_cv);
mutex_exit(&timer_mtx);
}
2 changes: 2 additions & 0 deletions cmd/zrepl/data_conn.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,5 +42,7 @@ int uzfs_zvol_socket_read(int fd, char *buf, uint64_t nbytes);
int uzfs_zvol_socket_write(int fd, char *buf, uint64_t nbytes);
void uzfs_zvol_worker(void *arg);
void uzfs_zvol_rebuild_dw_replica(void *arg);
void uzfs_update_ionum_interval(zvol_info_t *zinfo, uint32_t timeout);
void uzfs_zvol_timer_thread(void);

#endif /* _REBUILD_H */
13 changes: 10 additions & 3 deletions cmd/zrepl/mgmt_conn.c
Original file line number Diff line number Diff line change
Expand Up @@ -491,8 +491,14 @@ uzfs_zvol_mgmt_do_handshake(uzfs_mgmt_conn_t *conn, zvol_io_hdr_t *hdrp,
* hold dataset during handshake if objset is NULL
* no critical section here as rebuild & handshake won't come at a time
*/
if (zv->zv_objset == NULL)
uzfs_hold_dataset(zv);
if (zv->zv_objset == NULL) {
if (uzfs_hold_dataset(zv) != 0) {
fprintf(stderr, "Failed to hold zvol during "
"handshake\n");
return (reply_error(conn, ZVOL_OP_STATUS_FAILED,
hdrp->opcode, hdrp->io_seq, CS_INIT));
}
}

/*
* We don't use fsid_guid because that one is not guaranteed
Expand All @@ -507,7 +513,7 @@ uzfs_zvol_mgmt_do_handshake(uzfs_mgmt_conn_t *conn, zvol_io_hdr_t *hdrp,
hdr.io_seq = hdrp->io_seq;
hdr.len = sizeof (mgmt_ack);
hdr.status = ZVOL_OP_STATUS_OK;
uzfs_zvol_get_last_committed_io_no(zv, &hdr.checkpointed_io_seq);
hdr.checkpointed_io_seq = uzfs_zvol_get_last_committed_io_no(zv);

return (reply_data(conn, &hdr, &mgmt_ack, sizeof (mgmt_ack)));
}
Expand Down Expand Up @@ -568,6 +574,7 @@ uzfs_zvol_rebuild_dw_replica_start(uzfs_mgmt_conn_t *conn, zvol_io_hdr_t *hdrp,
ZVOL_REBUILDING_DONE);
uzfs_zvol_set_status(zinfo->zv,
ZVOL_STATUS_HEALTHY);
uzfs_update_ionum_interval(zinfo, 0);
printf("Rebuild of replica %s completed\n",
zinfo->name);
uzfs_zinfo_drop_refcnt(zinfo, B_FALSE);
Expand Down
Loading

0 comments on commit b7b28b8

Please sign in to comment.