Skip to content

Commit

Permalink
Illumos openzfs#4950 files sometimes can't be removed from a full fil…
Browse files Browse the repository at this point in the history
…esystem openzfs#2784

Reviewed by: Adam Leventhal [email protected]
Reviewed by: George Wilson [email protected]
Reviewed by: Sebastien Roy [email protected]
Reviewed by: Boris Protopopov [email protected]
Approved by: Dan McDonald [email protected]
Ported-by: Richard Yao [email protected]

Porting notes:
1. ZoL currently does not log discards to zvols, so the portion of this patch that
modifies the discard logging to mark it as freeing space has been discarded.

may_delete_now had been removed from zfs_remove() in ZoL. It has been reintroduced.
  • Loading branch information
ryao authored and kernelOfTruth committed Dec 13, 2014
1 parent d453f7d commit a691832
Show file tree
Hide file tree
Showing 7 changed files with 61 additions and 8 deletions.
1 change: 1 addition & 0 deletions include/sys/dmu.h
Original file line number Diff line number Diff line change
Expand Up @@ -566,6 +566,7 @@ void dmu_tx_abort(dmu_tx_t *tx);
int dmu_tx_assign(dmu_tx_t *tx, enum txg_how txg_how);
void dmu_tx_wait(dmu_tx_t *tx);
void dmu_tx_commit(dmu_tx_t *tx);
void dmu_tx_mark_netfree(dmu_tx_t *tx);

/*
* To register a commit callback, dmu_tx_callback_register() must be called.
Expand Down
7 changes: 7 additions & 0 deletions module/zfs/dmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -670,6 +670,12 @@ dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset,
tx = dmu_tx_create(os);
dmu_tx_hold_free(tx, dn->dn_object,
chunk_begin, chunk_end - chunk_begin);

/*
* Mark this transaction as typically resulting in a net
* reduction in space used.
*/
dmu_tx_mark_netfree(tx);
err = dmu_tx_assign(tx, TXG_WAIT);
if (err) {
dmu_tx_abort(tx);
Expand Down Expand Up @@ -721,6 +727,7 @@ dmu_free_long_object(objset_t *os, uint64_t object)
tx = dmu_tx_create(os);
dmu_tx_hold_bonus(tx, object);
dmu_tx_hold_free(tx, object, 0, DMU_OBJECT_END);
dmu_tx_mark_netfree(tx);
err = dmu_tx_assign(tx, TXG_WAIT);
if (err == 0) {
err = dmu_object_free(os, object, tx);
Expand Down
28 changes: 27 additions & 1 deletion module/zfs/dmu_tx.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
*/

#include <sys/dmu.h>
Expand Down Expand Up @@ -599,6 +599,32 @@ dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
txh->txh_space_tounref += unref;
}

/*
* This function marks the transaction as being a "net free". The end
* result is that refquotas will be disabled for this transaction, and
* this transaction will be able to use half of the pool space overhead
* (see dsl_pool_adjustedsize()). Therefore this function should only
* be called for transactions that we expect will not cause a net increase
* in the amount of space used (but it's OK if that is occasionally not true).
*/
void
dmu_tx_mark_netfree(dmu_tx_t *tx)
{
dmu_tx_hold_t *txh;

txh = dmu_tx_hold_object_impl(tx, tx->tx_objset,
DMU_NEW_OBJECT, THT_FREE, 0, 0);

/*
* Pretend that this operation will free 1GB of space. This
* should be large enough to cancel out the largest write.
* We don't want to use something like UINT64_MAX, because that would
* cause overflows when doing math with these values (e.g. in
* dmu_tx_try_assign()).
*/
txh->txh_space_tofree = txh->txh_space_tounref = 1024 * 1024 * 1024;
}

void
dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off, uint64_t len)
{
Expand Down
3 changes: 2 additions & 1 deletion module/zfs/zfs_dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright (c) 2013, 2014 by Delphix. All rights reserved.
*/


Expand Down Expand Up @@ -578,6 +578,7 @@ zfs_purgedir(znode_t *dzp)
dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, FALSE, NULL);
/* Is this really needed ? */
zfs_sa_upgrade_txholds(tx, xzp);
dmu_tx_mark_netfree(tx);
error = dmu_tx_assign(tx, TXG_WAIT);
if (error) {
dmu_tx_abort(tx);
Expand Down
18 changes: 17 additions & 1 deletion module/zfs/zfs_vnops.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright (c) 2013, 2014 by Delphix. All rights reserved.
*/

/* Portions Copyright 2007 Jeremy Teo */
Expand Down Expand Up @@ -1508,6 +1508,9 @@ zfs_remove(struct inode *dip, char *name, cred_t *cr)
uint64_t obj = 0;
zfs_dirlock_t *dl;
dmu_tx_t *tx;
#if defined(_KERNEL)
boolean_t may_delete_now;
#endif
boolean_t unlinked;
uint64_t txtype;
pathname_t *realnmp = NULL;
Expand Down Expand Up @@ -1567,6 +1570,11 @@ zfs_remove(struct inode *dip, char *name, cred_t *cr)
dnlc_remove(dvp, name);
#endif /* HAVE_DNLC */

#if defined(_KERNEL) && defined(__linux__)
may_delete_now = atomic_read(&ip->i_count) == 1 &&
!(ip->i_state & I_DIRTY_PAGES);
#endif

/*
* We never delete the znode and always place it in the unlinked
* set. The dentry cache will always hold the last reference and
Expand All @@ -1592,6 +1600,14 @@ zfs_remove(struct inode *dip, char *name, cred_t *cr)
/* charge as an update -- would be nice not to charge at all */
dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, FALSE, NULL);

/*
* Mark this transaction as typically resulting in a net free of
* space, unless object removal will be delayed indefinitely
* (due to active holds on the vnode due to the file being open).
*/
if (may_delete_now)
dmu_tx_mark_netfree(tx);

error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
if (error) {
zfs_dirent_unlock(dl);
Expand Down
11 changes: 6 additions & 5 deletions module/zfs/zfs_znode.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
*/

/* Portions Copyright 2007 Jeremy Teo */
Expand Down Expand Up @@ -1280,7 +1280,7 @@ zfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx)
* IN: zp - znode of file to free data in.
* end - new end-of-file
*
* RETURN: 0 on success, error code on failure
* RETURN: 0 on success, error code on failure
*/
static int
zfs_extend(znode_t *zp, uint64_t end)
Expand Down Expand Up @@ -1395,7 +1395,7 @@ zfs_zero_partial_page(znode_t *zp, uint64_t start, uint64_t len)
* off - start of section to free.
* len - length of section to free.
*
* RETURN: 0 on success, error code on failure
* RETURN: 0 on success, error code on failure
*/
static int
zfs_free_range(znode_t *zp, uint64_t off, uint64_t len)
Expand Down Expand Up @@ -1474,7 +1474,7 @@ zfs_free_range(znode_t *zp, uint64_t off, uint64_t len)
* IN: zp - znode of file to free data in.
* end - new end-of-file.
*
* RETURN: 0 on success, error code on failure
* RETURN: 0 on success, error code on failure
*/
static int
zfs_trunc(znode_t *zp, uint64_t end)
Expand Down Expand Up @@ -1507,6 +1507,7 @@ zfs_trunc(znode_t *zp, uint64_t end)
tx = dmu_tx_create(zsb->z_os);
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
zfs_sa_upgrade_txholds(tx, zp);
dmu_tx_mark_netfree(tx);
error = dmu_tx_assign(tx, TXG_WAIT);
if (error) {
dmu_tx_abort(tx);
Expand Down Expand Up @@ -1541,7 +1542,7 @@ zfs_trunc(znode_t *zp, uint64_t end)
* flag - current file open mode flags.
* log - TRUE if this action should be logged
*
* RETURN: 0 on success, error code on failure
* RETURN: 0 on success, error code on failure
*/
int
zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log)
Expand Down
1 change: 1 addition & 0 deletions module/zfs/zvol.c
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,7 @@ zvol_update_volsize(uint64_t volsize, objset_t *os)

tx = dmu_tx_create(os);
dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL);
dmu_tx_mark_netfree(tx);
error = dmu_tx_assign(tx, TXG_WAIT);
if (error) {
dmu_tx_abort(tx);
Expand Down

0 comments on commit a691832

Please sign in to comment.