diff --git a/include/sys/dmu_tx.h b/include/sys/dmu_tx.h index 60e9ed6e26f5..71a9ac7ca7bf 100644 --- a/include/sys/dmu_tx.h +++ b/include/sys/dmu_tx.h @@ -124,6 +124,7 @@ typedef struct dmu_tx_stats { kstat_named_t dmu_tx_dirty_throttle; kstat_named_t dmu_tx_dirty_delay; kstat_named_t dmu_tx_dirty_over_max; + kstat_named_t dmu_tx_wrlog_over_max; kstat_named_t dmu_tx_dirty_frees_delay; kstat_named_t dmu_tx_quota; } dmu_tx_stats_t; diff --git a/man/man4/zfs.4 b/man/man4/zfs.4 index 6da8d42b42bd..cd6fd052bb9a 100644 --- a/man/man4/zfs.4 +++ b/man/man4/zfs.4 @@ -1066,6 +1066,18 @@ Start syncing out a transaction group if there's at least this much dirty data This should be less than .Sy zfs_vdev_async_write_active_min_dirty_percent . . +.It Sy zfs_wrlog_data_max Ns = Pq int +The upper limit of write-transaction zil log data size in bytes. +Once it is reached, write operation is blocked, until log data is cleared out +after transaction group sync. Because of some overhead, it should be set +at least 2 times the size of +.Sy zfs_dirty_data_max +.No to prevent harming normal write throughput. +It also should be smaller than the size of the slog device if slog is present. +.Pp +Defaults to +.Sy zfs_dirty_data_max*2 +. .It Sy zfs_fallocate_reserve_percent Ns = Ns Sy 110 Ns % Pq uint Since ZFS is a copy-on-write filesystem with snapshots, blocks cannot be preallocated for a file in order to guarantee that later writes will not diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 606774a3b46c..e29f9845bddc 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -7975,7 +7975,7 @@ arc_init(void) /* * dp_wrlog_total is reduced for each txg at the end of * spa_sync(). However, dp_dirty_total is reduced every time - * a block being written out. Thus under normal operation, + * a block is written out. Thus under normal operation, * dp_wrlog_total could grow 2 times as big as * zfs_dirty_data_max. */ diff --git a/module/zfs/dmu_tx.c b/module/zfs/dmu_tx.c index f9f35b7cd5c2..5fa516866668 100644 --- a/module/zfs/dmu_tx.c +++ b/module/zfs/dmu_tx.c @@ -53,6 +53,7 @@ dmu_tx_stats_t dmu_tx_stats = { { "dmu_tx_dirty_throttle", KSTAT_DATA_UINT64 }, { "dmu_tx_dirty_delay", KSTAT_DATA_UINT64 }, { "dmu_tx_dirty_over_max", KSTAT_DATA_UINT64 }, + { "dmu_tx_wrlog_over_max", KSTAT_DATA_UINT64 }, { "dmu_tx_dirty_frees_delay", KSTAT_DATA_UINT64 }, { "dmu_tx_quota", KSTAT_DATA_UINT64 }, }; @@ -886,6 +887,7 @@ dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how) if (!tx->tx_dirty_delayed && dsl_pool_wrlog_over_max(tx->tx_pool)) { + DMU_TX_STAT_BUMP(dmu_tx_wrlog_over_max); return (SET_ERROR(ERESTART)); } diff --git a/module/zfs/dsl_pool.c b/module/zfs/dsl_pool.c index 536a44d13b71..1350f1329564 100644 --- a/module/zfs/dsl_pool.c +++ b/module/zfs/dsl_pool.c @@ -430,8 +430,10 @@ dsl_pool_close(dsl_pool_t *dp) mutex_destroy(&dp->dp_lock); cv_destroy(&dp->dp_spaceavail_cv); + ASSERT0(aggsum_value(&dp->dp_wrlog_total)); aggsum_fini(&dp->dp_wrlog_total); for (int i = 0; i < TXG_SIZE; i++) { + ASSERT0(aggsum_value(&dp->dp_wrlog_pertxg[i])); aggsum_fini(&dp->dp_wrlog_pertxg[i]); }