Skip to content

Commit

Permalink
TA1727 add slow txg sync message
Browse files Browse the repository at this point in the history
Signed-off-by: Jan Kryl <[email protected]>
  • Loading branch information
Jan Kryl committed Jul 11, 2018
1 parent 55fd1fe commit 9147ed7
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 2 deletions.
37 changes: 37 additions & 0 deletions cmd/zrepl/zfs_events.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <sys/fm/util.h>
#include <sys/fm/protocol.h>
#include <sys/fm/fs/zfs.h>
#include <sys/txg.h>
#include <libzfs.h>
#include <zrepl_mgmt.h>

Expand Down Expand Up @@ -57,6 +58,34 @@ print_state_change(nvlist_t *event)
}
}

static void
print_slow_sync(nvlist_t *event)
{
char *pool_name;

if (nvlist_lookup_string(event,
FM_EREPORT_PAYLOAD_ZFS_POOL, &pool_name) != 0) {
LOG_ERR("Invalid content of ZFS slow sync event");
} else {
LOG_ERR("Pool %s took more than %d seconds to sync",
pool_name, sync_threshold);
}
}

static void
print_slow_quiesce(nvlist_t *event)
{
char *pool_name;

if (nvlist_lookup_string(event,
FM_EREPORT_PAYLOAD_ZFS_POOL, &pool_name) != 0) {
LOG_ERR("Invalid content of ZFS slow quiesce event");
} else {
LOG_ERR("Pool %s took more than %d seconds to quiesce",
pool_name, quiesce_threshold);
}
}

/*
* Print the event in raw form to stderr.
*/
Expand Down Expand Up @@ -90,6 +119,14 @@ print_zfs_event(nvlist_t *event)
FM_RESOURCE_STATECHANGE) == 0) {
print_state_change(event);
skip = B_TRUE;
} else if (strcmp(class, FM_EREPORT_CLASS "." ZFS_ERROR_CLASS "."
FM_EREPORT_ZFS_SYNC_SLOW) == 0) {
print_slow_sync(event);
skip = B_TRUE;
} else if (strcmp(class, FM_EREPORT_CLASS "." ZFS_ERROR_CLASS "."
FM_EREPORT_ZFS_QUIESCE_SLOW) == 0) {
print_slow_quiesce(event);
skip = B_TRUE;
}

if (!skip) {
Expand Down
2 changes: 2 additions & 0 deletions include/sys/fm/fs/zfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ extern "C" {
#define FM_EREPORT_ZFS_PROBE_FAILURE "probe_failure"
#define FM_EREPORT_ZFS_LOG_REPLAY "log_replay"
#define FM_EREPORT_ZFS_CONFIG_CACHE_WRITE "config_cache_write"
#define FM_EREPORT_ZFS_SYNC_SLOW "sync_slow"
#define FM_EREPORT_ZFS_QUIESCE_SLOW "quiesce_slow"

#define FM_EREPORT_PAYLOAD_ZFS_POOL "pool"
#define FM_EREPORT_PAYLOAD_ZFS_POOL_FAILMODE "pool_failmode"
Expand Down
4 changes: 4 additions & 0 deletions include/sys/txg.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,10 @@ typedef struct txg_list {

struct dsl_pool;

/* Based on these thresholds are generated ereports */
extern volatile int sync_threshold;
extern volatile int quiesce_threshold;

extern void txg_init(struct dsl_pool *dp, uint64_t txg);
extern void txg_fini(struct dsl_pool *dp);
extern void txg_sync_start(struct dsl_pool *dp);
Expand Down
4 changes: 2 additions & 2 deletions include/zrepl_mgmt.h
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ extern int create_and_bind(const char *port, int bind_needed,
* API to drop refcnt on zinfo. If refcnt
* dropped to zero then free zinfo.
*/
inline void
static inline void
uzfs_zinfo_drop_refcnt(zvol_info_t *zinfo)
{
atomic_dec_64(&zinfo->refcnt);
Expand All @@ -174,7 +174,7 @@ uzfs_zinfo_drop_refcnt(zvol_info_t *zinfo)
/*
* API to take refcount on zinfo.
*/
inline void
static inline void
uzfs_zinfo_take_refcnt(zvol_info_t *zinfo)
{
atomic_inc_64(&zinfo->refcnt);
Expand Down
19 changes: 19 additions & 0 deletions module/zfs/txg.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include <sys/zil.h>
#include <sys/callb.h>
#include <sys/trace_txg.h>
#include <sys/fm/fs/zfs.h>

/*
* ZFS Transaction Groups
Expand Down Expand Up @@ -112,6 +113,9 @@ static void txg_sync_thread(dsl_pool_t *dp);
static void txg_quiesce_thread(dsl_pool_t *dp);

int zfs_txg_timeout = 5; /* max seconds worth of delta per txg */
/* max thresholds are in seconds */
volatile int sync_threshold = 15;
volatile int quiesce_threshold = 15;

/*
* Prepare the txg subsystem.
Expand Down Expand Up @@ -493,6 +497,7 @@ txg_sync_thread(dsl_pool_t *dp)
clock_t timer;
uint64_t txg;
txg_stat_t *ts;
hrtime_t timestamp;

/*
* We sync when we're scanning, there's someone waiting
Expand Down Expand Up @@ -543,8 +548,14 @@ txg_sync_thread(dsl_pool_t *dp)
mutex_exit(&tx->tx_sync_lock);

start = ddi_get_lbolt();
timestamp = gethrtime();
spa_sync(spa, txg);
delta = ddi_get_lbolt() - start;
timestamp = (gethrtime() - timestamp) / 1000000000;
if (timestamp > sync_threshold) {
zfs_ereport_post(FM_EREPORT_ZFS_SYNC_SLOW, spa,
NULL, NULL, 0, 0);
}

mutex_enter(&tx->tx_sync_lock);
tx->tx_synced_txg = txg;
Expand All @@ -570,6 +581,7 @@ txg_quiesce_thread(dsl_pool_t *dp)

for (;;) {
uint64_t txg;
hrtime_t timestamp;

/*
* We quiesce when there's someone waiting on us.
Expand All @@ -591,7 +603,14 @@ txg_quiesce_thread(dsl_pool_t *dp)
txg, tx->tx_quiesce_txg_waiting,
tx->tx_sync_txg_waiting);
mutex_exit(&tx->tx_sync_lock);
timestamp = gethrtime();
txg_quiesce(dp, txg);
/* Generate ereport if quiescing takes too long */
timestamp = (gethrtime() - timestamp) / 1000000000;
if (timestamp > quiesce_threshold) {
zfs_ereport_post(FM_EREPORT_ZFS_QUIESCE_SLOW,
dp->dp_spa, NULL, NULL, 0, 0);
}
mutex_enter(&tx->tx_sync_lock);

/*
Expand Down

0 comments on commit 9147ed7

Please sign in to comment.