Skip to content

Commit

Permalink
Maximize txg size to improve throughput
Browse files Browse the repository at this point in the history
Added dp_dirty_peak status to dsl_pool_t struct. It is used to track
peaks of dp_dirty_total percisely.
Following routine only applies to heavy work load when delay is
kicked in. (Under light load, txg kick is triggered by
zfs_dirty_data_sync_percent.)
The desired txg size is derived from dp_dirty_peak. An optimized txg
size should meet following criterias.
1. dp_dirty_peak should be able to fit 2 full txgs and 1 partial txg
in order to fully utilize 3 stages of pipeline.
2. The size of txg should be as large as possible, to fully utilize
each txg.

Some other style fixes regarding code review.

Signed-off-by: jxdking <[email protected]>
  • Loading branch information
jxdking committed Apr 30, 2021
1 parent 184f4cd commit 632a5f2
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 6 deletions.
1 change: 1 addition & 0 deletions include/sys/dsl_pool.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ typedef struct dsl_pool {
kcondvar_t dp_spaceavail_cv;
uint64_t dp_dirty_pertxg[TXG_SIZE];
uint64_t dp_dirty_total;
uint64_t dp_dirty_peak; /* historical peak of dp_dirty_total */
uint64_t dp_long_free_dirty_pertxg[TXG_SIZE];
uint64_t dp_mos_used_delta;
uint64_t dp_mos_compressed_delta;
Expand Down
3 changes: 1 addition & 2 deletions module/zfs/dmu_tx.c
Original file line number Diff line number Diff line change
Expand Up @@ -1056,9 +1056,8 @@ dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how)

txg_rele_to_quiesce(&tx->tx_txgh);

if (dsl_pool_need_dirty_sync(tx->tx_pool, tx->tx_txg)) {
if (dsl_pool_need_dirty_sync(tx->tx_pool, tx->tx_txg))
txg_kick(tx->tx_pool, tx->tx_txg);
}
return (0);
}

Expand Down
47 changes: 44 additions & 3 deletions module/zfs/dsl_pool.c
Original file line number Diff line number Diff line change
Expand Up @@ -818,6 +818,16 @@ dsl_pool_sync_done(dsl_pool_t *dp, uint64_t txg)
dmu_buf_rele(ds->ds_dbuf, zilog);
}
ASSERT(!dmu_objset_is_dirty(dp->dp_meta_objset, txg));

mutex_enter(&dp->dp_lock);
/*
* An absolute peak of dp_dirty_total since the pool is loaded
* may not represent current workload.
* Use following formula to fade off dp_dirty_peak value each
* time a txg is synced, so that it can reflect current workload.
*/
dp->dp_dirty_peak -= dp->dp_dirty_peak / 128;
mutex_exit(&dp->dp_lock);
}

/*
Expand Down Expand Up @@ -911,15 +921,45 @@ dsl_pool_need_dirty_delay(dsl_pool_t *dp)
boolean_t
dsl_pool_need_dirty_sync(dsl_pool_t *dp, uint64_t txg)
{
uint64_t dirty;
uint64_t dirty_min_bytes =
zfs_dirty_data_max * zfs_dirty_data_sync_percent / 100;

mutex_enter(&dp->dp_lock);
dirty = dp->dp_dirty_pertxg[txg & TXG_MASK];
uint64_t dirty = dp->dp_dirty_pertxg[txg & TXG_MASK];
uint64_t total = dp->dp_dirty_total;
uint64_t peak = dp->dp_dirty_peak;
mutex_exit(&dp->dp_lock);

return (dirty > dirty_min_bytes);
if (dirty < dirty_min_bytes)
return (0);

if (dirty == total) {
/*
* All dirty data is in current txg, which suggests
* there is no dirty data quiescing or syncing.
* Since dirty data is no less than dirty_min_bytes,
* current txg should be synced.
*/
return (1);
}

/*
* Ensure target_txg_size is no less than the half
* of zfs_delay_min_dirty_percent.
*/
uint64_t target_txg_size =
zfs_dirty_data_max * zfs_delay_min_dirty_percent / 100 / 2;
/*
* The <peak> suggests the largest possible dp_dirty_total that
* it can grow.
* We want to maximize target_txg_size value to fully utilize
* each txg, and also want to fit 2 full txgs and 1 partial
* txg in the <peak> to fully utilize the pipline.
* Choosing the target_txg_size slightly below 50% of the <peak>
* should be right.
*/
target_txg_size = MAX(peak * 48 / 100, target_txg_size);
return (dirty >= target_txg_size);
}

void
Expand Down Expand Up @@ -948,6 +988,7 @@ dsl_pool_undirty_space(dsl_pool_t *dp, int64_t space, uint64_t txg)
ASSERT3U(dp->dp_dirty_pertxg[txg & TXG_MASK], >=, space);
dp->dp_dirty_pertxg[txg & TXG_MASK] -= space;
ASSERT3U(dp->dp_dirty_total, >=, space);
dp->dp_dirty_peak = MAX(dp->dp_dirty_peak, dp->dp_dirty_total);
dsl_pool_dirty_delta(dp, -space);
mutex_exit(&dp->dp_lock);
}
Expand Down
3 changes: 2 additions & 1 deletion module/zfs/txg.c
Original file line number Diff line number Diff line change
Expand Up @@ -779,6 +779,7 @@ txg_wait_open(dsl_pool_t *dp, uint64_t txg, boolean_t should_quiesce)
* If there isn't a txg quiescing in the pipeline, push the txg
* through the pipeline by quiescing the open txg.
* It is fine there is a txg still syncing.
* Pass in the txg number of the transaction that should be closed and synced.
*/
void
txg_kick(dsl_pool_t *dp, uint64_t txg)
Expand All @@ -788,7 +789,7 @@ txg_kick(dsl_pool_t *dp, uint64_t txg)
ASSERT(!dsl_pool_config_held(dp));

mutex_enter(&tx->tx_sync_lock);
txg = txg == 0 ? tx->tx_open_txg : txg;
txg = (txg == 0 ? tx->tx_open_txg : txg);
if (txg == tx->tx_open_txg &&
!txg_is_quiescing(dp) &&
tx->tx_quiesce_txg_waiting <= tx->tx_open_txg &&
Expand Down

0 comments on commit 632a5f2

Please sign in to comment.