Skip to content

Commit

Permalink
US1541 Debugging and logging at cStor target (openzfs#54)
Browse files Browse the repository at this point in the history
Signed-off-by: Jan Kryl <[email protected]>
  • Loading branch information
Jan Kryl committed Jun 26, 2018
1 parent 6e279e4 commit f4a98dc
Show file tree
Hide file tree
Showing 8 changed files with 207 additions and 205 deletions.
3 changes: 1 addition & 2 deletions cmd/zrepl/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@ include $(top_srcdir)/config/Rules.am

# -Wnoformat-truncation to get rid of compiler warning for unchecked
# truncating snprintfs on gcc 7.1.1.
AM_CFLAGS += $(DEBUG_STACKFLAGS) $(FRAME_LARGER_THAN) $(NO_FORMAT_TRUNCATION)
AM_CPPFLAGS += -DDEBUG
AM_CFLAGS += $(FRAME_LARGER_THAN) $(NO_FORMAT_TRUNCATION)

DEFAULT_INCLUDES += \
-I$(top_srcdir)/include \
Expand Down
39 changes: 22 additions & 17 deletions cmd/zrepl/data_conn.c
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,11 @@ uzfs_zvol_socket_read(int fd, char *buf, uint64_t nbytes)
while (nbytes) {
count = read(fd, (void *)p, nbytes);
if (count <= 0) {
ZREPL_ERRLOG("Read error:%d\n", errno);
if (count == 0) {
LOG_INFO("Connection closed");
} else {
LOG_ERRNO("Socket read error");
}
return (-1);
}
p += count;
Expand All @@ -116,7 +120,11 @@ uzfs_zvol_socket_write(int fd, char *buf, uint64_t nbytes)
while (nbytes) {
count = write(fd, (void *)p, nbytes);
if (count <= 0) {
ZREPL_ERRLOG("Write error:%d\n", errno);
if (count == 0) {
LOG_INFO("Connection closed");
} else {
LOG_ERRNO("Socket write error");
}
return (-1);
}
p += count;
Expand Down Expand Up @@ -233,8 +241,7 @@ uzfs_zvol_worker(void *arg)
}

if (rc != 0) {
ZREPL_ERRLOG("Zvol op_code :%d failed with "
"error: %d\n", hdr->opcode, errno);
LOG_ERR("OP code %d failed", hdr->opcode);
hdr->status = ZVOL_OP_STATUS_FAILED;
hdr->len = 0;
} else {
Expand Down Expand Up @@ -311,14 +318,14 @@ uzfs_zvol_rebuild_dw_replica(void *arg)

rc = uzfs_zvol_socket_write(sfd, (char *)&hdr, sizeof (hdr));
if (rc == -1) {
ZREPL_ERRLOG("Socket write failed, err: %d\n", errno);
LOG_ERRNO("Socket write failed");
goto exit;
}

rc = uzfs_zvol_socket_write(sfd, (void *)rebuild_args->zvol_name,
hdr.len);
if (rc == -1) {
ZREPL_ERRLOG("Socket write failed, err: %d\n", errno);
LOG_ERRNO("Socket write failed");
goto exit;
}

Expand All @@ -332,7 +339,7 @@ uzfs_zvol_rebuild_dw_replica(void *arg)
hdr.opcode = ZVOL_OPCODE_REBUILD_COMPLETE;
rc = uzfs_zvol_socket_write(sfd, (char *)&hdr, sizeof (hdr));
if (rc != 0) {
ZREPL_ERRLOG("Socket write failed, err: %d\n", errno);
LOG_ERRNO("Socket write failed");
goto exit;
}

Expand All @@ -345,8 +352,7 @@ uzfs_zvol_rebuild_dw_replica(void *arg)
uzfs_zvol_set_status(zinfo->zv, ZVOL_STATUS_HEALTHY);
uzfs_update_ionum_interval(zinfo, 0);
}
ZREPL_ERRLOG("Rebuilding on Replica:%s completed\n",
zinfo->name);
LOG_INFO("Rebuilding zvol %s completed", zinfo->name);
goto exit;
} else {
bzero(&hdr, sizeof (hdr));
Expand All @@ -358,7 +364,7 @@ uzfs_zvol_rebuild_dw_replica(void *arg)
hdr.len = ZVOL_REBUILD_STEP_SIZE;
rc = uzfs_zvol_socket_write(sfd, (char *)&hdr, sizeof (hdr));
if (rc != 0) {
ZREPL_ERRLOG("Socket write failed, err: %d\n", errno);
LOG_ERRNO("Socket write failed");
goto exit;
}
}
Expand All @@ -371,13 +377,13 @@ uzfs_zvol_rebuild_dw_replica(void *arg)

rc = uzfs_zvol_socket_read(sfd, (char *)&hdr, sizeof (hdr));
if (rc != 0) {
ZREPL_ERRLOG("Socket read failed, err: %d\n", errno);
LOG_ERRNO("Socket read failed");
goto exit;
}

if (hdr.opcode == ZVOL_OPCODE_REBUILD_STEP_DONE) {
offset += ZVOL_REBUILD_STEP_SIZE;
printf("ZVOL_OPCODE_REBUILD_STEP_DONE received\n");
LOG_DEBUG("ZVOL_OPCODE_REBUILD_STEP_DONE received");
goto next_step;
}

Expand All @@ -388,8 +394,7 @@ uzfs_zvol_rebuild_dw_replica(void *arg)
zio_cmd = zio_cmd_alloc(&hdr, sfd);
rc = uzfs_zvol_socket_read(sfd, zio_cmd->buf, hdr.len);
if (rc != 0) {
ZREPL_ERRLOG("Socket read failed with "
"error: %d\n", errno);
LOG_ERRNO("Socket read failed");
goto exit;
}

Expand All @@ -413,7 +418,7 @@ uzfs_zvol_rebuild_dw_replica(void *arg)
if (rc == -1)
uzfs_zvol_set_rebuild_status(zinfo->zv, ZVOL_REBUILDING_FAILED);

ZREPL_LOG("uzfs_zvol_rebuild_dw_replica thread exiting, volume:%s\n",
LOG_DEBUG("uzfs_zvol_rebuild_dw_replica thread exiting, zvol: %s",
zinfo->name);
/* Parent thread have taken refcount, drop it now */
uzfs_zinfo_drop_refcnt(zinfo, B_FALSE);
Expand Down Expand Up @@ -443,8 +448,8 @@ uzfs_zvol_timer_thread(void)
next_check = zinfo->checkpointed_time +
zinfo->update_ionum_interval;
if (next_check <= now) {
fprintf(stderr, "Checkpointing ionum "
"%lu on %s\n",
LOG_DEBUG("Checkpointing ionum "
"%lu on %s",
zinfo->checkpointed_ionum,
zinfo->name);
uzfs_zvol_store_last_committed_io_no(
Expand Down
79 changes: 29 additions & 50 deletions cmd/zrepl/mgmt_conn.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,33 +63,9 @@
* their state.
*/

/* To print verbose debug messages uncomment the following line */
#define MGMT_CONN_DEBUG 1

#ifdef MGMT_CONN_DEBUG
static const char *
gettimestamp(void)
{
struct timeval tv;
static char buf[20];
struct tm *timeinfo;
unsigned int ms;

gettimeofday(&tv, NULL);
timeinfo = localtime(&tv.tv_sec);
ms = tv.tv_usec / 1000;

strftime(buf, sizeof (buf), "%H:%M:%S.", timeinfo);
snprintf(buf + 9, sizeof (buf) - 9, "%u", ms);

return (buf);
}
#define DBGCONN(c, fmt, ...) fprintf(stderr, "%s [tgt %s:%u]: " fmt "\n", \
gettimestamp(), \
/* LOG_DEBUG wrapper which puts target address prefix to message */
#define DBGCONN(c, fmt, ...) LOG_DEBUG("[tgt %s:%u]: " fmt, \
(c)->conn_host, (c)->conn_port, ##__VA_ARGS__)
#else
#define DBGCONN(c, fmt, ...)
#endif

/* Max # of events from epoll processed at once */
#define MAX_EVENTS 10
Expand Down Expand Up @@ -164,7 +140,8 @@ close_conn(uzfs_mgmt_conn_t *conn)
{
async_task_t *async_task;

DBGCONN(conn, "Closing the connection");
if (conn->conn_state != CS_CONNECT)
DBGCONN(conn, "Closing the connection");

/* Release resources tight to the conn */
if (conn->conn_buf != NULL) {
Expand Down Expand Up @@ -241,7 +218,8 @@ connect_to_tgt(uzfs_mgmt_conn_t *conn)
/* EINPROGRESS means that EPOLLOUT will tell us when connect is done */
if (rc != 0 && errno != EINPROGRESS) {
close(sfd);
DBGCONN(conn, "Failed to connect");
LOG_ERRNO("Failed to connect to %s:%d", conn->conn_host,
conn->conn_port);
return (-1);
}
return (sfd);
Expand Down Expand Up @@ -515,11 +493,11 @@ uzfs_zvol_mgmt_do_handshake(uzfs_mgmt_conn_t *conn, zvol_io_hdr_t *hdrp,
mgmt_ack_t mgmt_ack;
zvol_io_hdr_t hdr;

printf("Volume %s sent for enq\n", name);
LOG_INFO("Handshake on zvol %s", name);

bzero(&mgmt_ack, sizeof (mgmt_ack));
if (uzfs_zvol_get_ip(mgmt_ack.ip) == -1) {
fprintf(stderr, "Unable to get IP with err: %d\n", errno);
LOG_ERRNO("Unable to get IP");
return (reply_nodata(conn, ZVOL_OP_STATUS_FAILED, hdrp->opcode,
hdrp->io_seq));
}
Expand All @@ -535,8 +513,7 @@ uzfs_zvol_mgmt_do_handshake(uzfs_mgmt_conn_t *conn, zvol_io_hdr_t *hdrp,
*/
if (zv->zv_objset == NULL) {
if (uzfs_hold_dataset(zv) != 0) {
fprintf(stderr, "Failed to hold zvol during "
"handshake\n");
LOG_ERR("Failed to hold zvol during handshake");
return (reply_nodata(conn, ZVOL_OP_STATUS_FAILED,
hdrp->opcode, hdrp->io_seq));
}
Expand Down Expand Up @@ -638,7 +615,8 @@ uzfs_zvol_execute_async_command(void *arg)
case ZVOL_OPCODE_SNAP_CREATE:
rc = dmu_objset_snapshot_one(zinfo->name, snapname);
if (rc != 0) {
fprintf(stderr, "Snapshot create failed: %d\n", rc);
LOG_ERR("Failed to create %s@%s: %d",
zinfo->name, snapname, rc);
async_task->status = ZVOL_OP_STATUS_FAILED;
} else {
async_task->status = ZVOL_OP_STATUS_OK;
Expand All @@ -649,7 +627,8 @@ uzfs_zvol_execute_async_command(void *arg)
rc = dsl_destroy_snapshot(dataset, B_FALSE);
strfree(dataset);
if (rc != 0) {
fprintf(stderr, "Snapshot destroy failed: %d\n", rc);
LOG_ERR("Failed to destroy %s@%s: %d",
zinfo->name, snapname, rc);
async_task->status = ZVOL_OP_STATUS_FAILED;
} else {
async_task->status = ZVOL_OP_STATUS_OK;
Expand Down Expand Up @@ -717,14 +696,14 @@ uzfs_zvol_rebuild_dw_replica_start(uzfs_mgmt_conn_t *conn, zvol_io_hdr_t *hdrp,

for (; rebuild_op_cnt > 0; rebuild_op_cnt--, mack++) {
if (mack->volname[0] != '\0') {
printf("Replica %s at %s:%u helping in rebuild\n",
LOG_INFO("zvol %s at %s:%u helping in rebuild",
mack->volname, mack->ip, mack->port);
}
if (zinfo == NULL) {
zinfo = uzfs_zinfo_lookup(mack->dw_volname);
if ((zinfo == NULL) || (zinfo->mgmt_conn != conn)) {
printf("Replica %s not found or not matching "
"conn\n", mack->dw_volname);
LOG_ERR("zvol %s not found or not matching "
"connection", mack->dw_volname);
return (reply_nodata(conn,
ZVOL_OP_STATUS_FAILED,
hdrp->opcode, hdrp->io_seq));
Expand All @@ -744,7 +723,7 @@ uzfs_zvol_rebuild_dw_replica_start(uzfs_mgmt_conn_t *conn, zvol_io_hdr_t *hdrp,
uzfs_zvol_set_status(zinfo->zv,
ZVOL_STATUS_HEALTHY);
uzfs_update_ionum_interval(zinfo, 0);
printf("Rebuild of replica %s completed\n",
LOG_INFO("Rebuild of zvol %s completed",
zinfo->name);
uzfs_zinfo_drop_refcnt(zinfo, B_FALSE);
break;
Expand All @@ -754,8 +733,8 @@ uzfs_zvol_rebuild_dw_replica_start(uzfs_mgmt_conn_t *conn, zvol_io_hdr_t *hdrp,
} else {
if (strncmp(zinfo->name, mack->dw_volname, MAXNAMELEN)
!= 0) {
printf("Replica %s not matching with zinfo"
" %s\n", mack->dw_volname, zinfo->name);
LOG_ERR("zvol %s not matching with zinfo %s",
mack->dw_volname, zinfo->name);
return (reply_nodata(conn,
ZVOL_OP_STATUS_FAILED,
hdrp->opcode, hdrp->io_seq));
Expand All @@ -766,8 +745,8 @@ uzfs_zvol_rebuild_dw_replica_start(uzfs_mgmt_conn_t *conn, zvol_io_hdr_t *hdrp,
io_sfd = create_and_bind("", B_FALSE, B_FALSE);
if (io_sfd < 0) {
/* Fail this rebuild process entirely */
fprintf(stderr, "Rebuild IO socket create and bind"
" failed on volume: %s\n", zinfo->name);
LOG_ERR("Rebuild IO socket create and bind"
" failed on zvol: %s", zinfo->name);
uzfs_zvol_set_rebuild_status(zinfo->zv,
ZVOL_REBUILDING_FAILED);
uzfs_zinfo_drop_refcnt(zinfo, B_FALSE);
Expand Down Expand Up @@ -823,7 +802,7 @@ process_message(uzfs_mgmt_conn_t *conn)

if (((zinfo = uzfs_zinfo_lookup(zvol_name)) == NULL) ||
(zinfo->mgmt_conn != conn)) {
fprintf(stderr, "Unknown zvol: %s\n", zvol_name);
LOG_ERR("Unknown zvol: %s", zvol_name);
rc = reply_nodata(conn, ZVOL_OP_STATUS_FAILED,
hdrp->opcode, hdrp->io_seq);
break;
Expand Down Expand Up @@ -861,7 +840,7 @@ process_message(uzfs_mgmt_conn_t *conn)
zvol_name[payload_size] = '\0';
snap = strchr(zvol_name, '@');
if (snap == NULL) {
fprintf(stderr, "Invalid snapshot name: %s\n",
LOG_ERR("Invalid snapshot name: %s",
zvol_name);
rc = reply_nodata(conn, ZVOL_OP_STATUS_FAILED,
hdrp->opcode, hdrp->io_seq);
Expand All @@ -871,7 +850,7 @@ process_message(uzfs_mgmt_conn_t *conn)
/* ref will be released when async command has finished */
if (((zinfo = uzfs_zinfo_lookup(zvol_name)) == NULL) ||
(zinfo->mgmt_conn != conn)) {
fprintf(stderr, "Unknown zvol: %s\n", zvol_name);
LOG_ERR("Unknown zvol: %s", zvol_name);
rc = reply_nodata(conn, ZVOL_OP_STATUS_FAILED,
hdrp->opcode, hdrp->io_seq);
break;
Expand Down Expand Up @@ -946,7 +925,7 @@ move_to_next_state(uzfs_mgmt_conn_t *conn)
kmem_free(conn->conn_buf, sizeof (uint16_t));
conn->conn_buf = NULL;
if (vers != REPLICA_VERSION) {
fprintf(stderr, "invalid replica protocol version %d\n",
LOG_ERR("Invalid replica protocol version %d",
vers);
rc = reply_nodata(conn, ZVOL_OP_STATUS_VERSION_MISMATCH,
0, 0);
Expand Down Expand Up @@ -1068,10 +1047,10 @@ uzfs_zvol_mgmt_thread(void *arg)

if (events[i].events & EPOLLERR) {
if (conn->conn_state == CS_CONNECT) {
DBGCONN(conn, "Failed to connect");
LOG_ERR("Failed to connect to %s:%d",
conn->conn_host, conn->conn_port);
} else {
fprintf(stderr, "Error on connection "
"to %s:%d\n",
LOG_ERR("Error on connection to %s:%d",
conn->conn_host, conn->conn_port);
}
if (close_conn(conn) != 0)
Expand Down Expand Up @@ -1157,6 +1136,6 @@ uzfs_zvol_mgmt_thread(void *arg)
mutex_exit(&async_tasks_mtx);
mutex_destroy(&async_tasks_mtx);

fprintf(stderr, "uzfs_zvol_mgmt_thread thread exiting\n");
LOG_DEBUG("uzfs_zvol_mgmt thread exiting");
zk_thread_exit();
}
Loading

0 comments on commit f4a98dc

Please sign in to comment.