Skip to content

Commit

Permalink
Freeing throttle should account for holes
Browse files Browse the repository at this point in the history
Deletion throttle currently does not account for holes in a file.
This means that it can activate when it shouldn't.
To fix it we switch the throttle to be based on the number of
L1 blocks we will have to dirty when freeing

Reviewed by: Tom Caputi <[email protected]>
Reviewed by: Matt Ahrens <[email protected]>
Reviewed-by: Brian Behlendorf <[email protected]>
Signed-off-by: Alek Pinchuk <[email protected]>
Closes #7725 
Closes #7888
  • Loading branch information
alek-p authored and behlendorf committed Feb 12, 2019
1 parent dcec0a1 commit 65282ee
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 16 deletions.
12 changes: 6 additions & 6 deletions man/man5/zfs-module-parameters.5
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
'\" te
.\" Copyright (c) 2013 by Turbo Fredriksson <[email protected]>. All rights reserved.
.\" Copyright (c) 2017 Datto Inc.
.\" Copyright (c) 2018 by Delphix. All rights reserved.
.\" Copyright (c) 2019 Datto Inc.
.\" The contents of this file are subject to the terms of the Common Development
.\" and Distribution License (the "License"). You may not use this file except
.\" in compliance with the License. You can obtain a copy of the license at
Expand All @@ -14,7 +14,7 @@
.\" CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your
.\" own identifying information:
.\" Portions Copyright [yyyy] [name of copyright owner]
.TH ZFS-MODULE-PARAMETERS 5 "Oct 28, 2017"
.TH ZFS-MODULE-PARAMETERS 5 "Feb 8, 2019"
.SH NAME
zfs\-module\-parameters \- ZFS module parameters
.SH DESCRIPTION
Expand Down Expand Up @@ -1970,12 +1970,12 @@ Default value: \fB52,428,800\fR.
\fBzfs_per_txg_dirty_frees_percent \fR (ulong)
.ad
.RS 12n
Tunable to control percentage of dirtied blocks from frees in one TXG.
After this threshold is crossed, additional dirty blocks from frees
wait until the next TXG.
Tunable to control percentage of dirtied indirect blocks from frees allowed
into one TXG. After this threshold is crossed, additional frees will wait until
the next TXG.
A value of zero will disable this throttle.
.sp
Default value: \fB30\fR and \fB0\fR to disable.
Default value: \fB5\fR, set to \fB0\fR to disable.
.RE

.sp
Expand Down
41 changes: 31 additions & 10 deletions module/zfs/dmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
* Copyright (c) 2016, Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2015 by Chunwei Chen. All rights reserved.
* Copyright (c) 2019 Datto Inc.
*/

#include <sys/dmu.h>
Expand Down Expand Up @@ -61,12 +62,12 @@
int zfs_nopwrite_enabled = 1;

/*
* Tunable to control percentage of dirtied blocks from frees in one TXG.
* After this threshold is crossed, additional dirty blocks from frees
* wait until the next TXG.
* Tunable to control percentage of dirtied L1 blocks from frees allowed into
* one TXG. After this threshold is crossed, additional dirty blocks from frees
* will wait until the next TXG.
* A value of zero will disable this throttle.
*/
unsigned long zfs_per_txg_dirty_frees_percent = 30;
unsigned long zfs_per_txg_dirty_frees_percent = 5;

/*
* Enable/disable forcing txg sync when dirty in dmu_offset_next.
Expand Down Expand Up @@ -709,11 +710,13 @@ dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset,
*
* On input, *start should be the first offset that does not need to be
* freed (e.g. "offset + length"). On return, *start will be the first
* offset that should be freed.
* offset that should be freed and l1blks is set to the number of level 1
* indirect blocks found within the chunk.
*/
static int
get_next_chunk(dnode_t *dn, uint64_t *start, uint64_t minimum)
get_next_chunk(dnode_t *dn, uint64_t *start, uint64_t minimum, uint64_t *l1blks)
{
uint64_t blks;
uint64_t maxblks = DMU_MAX_ACCESS >> (dn->dn_indblkshift + 1);
/* bytes of data covered by a level-1 indirect block */
uint64_t iblkrange =
Expand All @@ -723,11 +726,16 @@ get_next_chunk(dnode_t *dn, uint64_t *start, uint64_t minimum)

if (*start - minimum <= iblkrange * maxblks) {
*start = minimum;
/*
* Assume full L1 blocks and 128k recordsize to approximate the
* expected number of L1 blocks in this chunk
*/
*l1blks = minimum / (1024 * 128 * 1024);
return (0);
}
ASSERT(ISP2(iblkrange));

for (uint64_t blks = 0; *start > minimum && blks < maxblks; blks++) {
for (blks = 0; *start > minimum && blks < maxblks; blks++) {
int err;

/*
Expand All @@ -745,6 +753,7 @@ get_next_chunk(dnode_t *dn, uint64_t *start, uint64_t minimum)
*start = minimum;
break;
} else if (err != 0) {
*l1blks = blks;
return (err);
}

Expand All @@ -753,6 +762,7 @@ get_next_chunk(dnode_t *dn, uint64_t *start, uint64_t minimum)
}
if (*start < minimum)
*start = minimum;
*l1blks = blks;
return (0);
}

Expand Down Expand Up @@ -792,14 +802,15 @@ dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset,
dirty_frees_threshold =
zfs_per_txg_dirty_frees_percent * zfs_dirty_data_max / 100;
else
dirty_frees_threshold = zfs_dirty_data_max / 4;
dirty_frees_threshold = zfs_dirty_data_max / 20;

if (length == DMU_OBJECT_END || offset + length > object_size)
length = object_size - offset;

while (length != 0) {
uint64_t chunk_end, chunk_begin, chunk_len;
uint64_t long_free_dirty_all_txgs = 0;
uint64_t l1blks;
dmu_tx_t *tx;

if (dmu_objset_zfs_unmounting(dn->dn_objset))
Expand All @@ -808,7 +819,7 @@ dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset,
chunk_end = chunk_begin = offset + length;

/* move chunk_begin backwards to the beginning of this chunk */
err = get_next_chunk(dn, &chunk_begin, offset);
err = get_next_chunk(dn, &chunk_begin, offset, &l1blks);
if (err)
return (err);
ASSERT3U(chunk_begin, >=, offset);
Expand Down Expand Up @@ -849,9 +860,19 @@ dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset,
return (err);
}

/*
* In order to prevent unnecessary write throttling, for each
* TXG, we track the cumulative size of L1 blocks being dirtied
* in dnode_free_range() below. We compare this number to a
* tunable threshold, past which we prevent new L1 dirty freeing
* blocks from being added into the open TXG. See
* dmu_free_long_range_impl() for details. The threshold
* prevents write throttle activation due to dirty freeing L1
* blocks taking up a large percentage of zfs_dirty_data_max.
*/
mutex_enter(&dp->dp_lock);
dp->dp_long_free_dirty_pertxg[dmu_tx_get_txg(tx) & TXG_MASK] +=
chunk_len;
l1blks << dn->dn_indblkshift;
mutex_exit(&dp->dp_lock);
DTRACE_PROBE3(free__long__range,
uint64_t, long_free_dirty_all_txgs, uint64_t, chunk_len,
Expand Down

0 comments on commit 65282ee

Please sign in to comment.