Skip to content

Commit

Permalink
Handle zap_add() failures in mixed case mode
Browse files Browse the repository at this point in the history
With "casesensitivity=mixed", zap_add() could fail when the number of
files/directories with the same name (varying in case) exceed the
capacity of the leaf node of a Fatzap. This results in a ASSERT()
failure as zfs_link_create() does not expect zap_add() to fail. The fix
is to handle these failures and rollback the transactions.

Reviewed by: Matt Ahrens <[email protected]>
Reviewed-by: Chunwei Chen <[email protected]>
Reviewed-by: Brian Behlendorf <[email protected]>
Signed-off-by: Sanjeev Bagewadi <[email protected]>
Closes #7011 
Closes #7054
  • Loading branch information
sanjeevbagewadi authored and behlendorf committed Feb 9, 2018
1 parent eb9c453 commit cc63068
Show file tree
Hide file tree
Showing 9 changed files with 290 additions and 32 deletions.
15 changes: 11 additions & 4 deletions include/sys/zap_leaf.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,15 @@ struct zap_stats;
* block size (1<<l->l_bs) - hash entry size (2) * number of hash
* entries - header space (2*chunksize)
*/
#define ZAP_LEAF_NUMCHUNKS(l) \
(((1<<(l)->l_bs) - 2*ZAP_LEAF_HASH_NUMENTRIES(l)) / \
#define ZAP_LEAF_NUMCHUNKS_BS(bs) \
(((1<<(bs)) - 2*ZAP_LEAF_HASH_NUMENTRIES_BS(bs)) / \
ZAP_LEAF_CHUNKSIZE - 2)

#define ZAP_LEAF_NUMCHUNKS(l) (ZAP_LEAF_NUMCHUNKS_BS(((l)->l_bs)))

#define ZAP_LEAF_NUMCHUNKS_DEF \
(ZAP_LEAF_NUMCHUNKS_BS(fzap_default_block_shift))

/*
* The amount of space within the chunk available for the array is:
* chunk size - space for type (1) - space for next pointer (2)
Expand All @@ -74,8 +79,10 @@ struct zap_stats;
* which is less than block size / CHUNKSIZE (24) / minimum number of
* chunks per entry (3).
*/
#define ZAP_LEAF_HASH_SHIFT(l) ((l)->l_bs - 5)
#define ZAP_LEAF_HASH_NUMENTRIES(l) (1 << ZAP_LEAF_HASH_SHIFT(l))
#define ZAP_LEAF_HASH_SHIFT_BS(bs) ((bs) - 5)
#define ZAP_LEAF_HASH_NUMENTRIES_BS(bs) (1 << ZAP_LEAF_HASH_SHIFT_BS(bs))
#define ZAP_LEAF_HASH_SHIFT(l) (ZAP_LEAF_HASH_SHIFT_BS(((l)->l_bs)))
#define ZAP_LEAF_HASH_NUMENTRIES(l) (ZAP_LEAF_HASH_NUMENTRIES_BS(((l)->l_bs)))

/*
* The chunks start immediately after the hash table. The end of the
Expand Down
25 changes: 24 additions & 1 deletion module/zfs/zap.c
Original file line number Diff line number Diff line change
Expand Up @@ -818,15 +818,19 @@ fzap_lookup(zap_name_t *zn,
return (err);
}

#define MAX_EXPAND_RETRIES 2

int
fzap_add_cd(zap_name_t *zn,
uint64_t integer_size, uint64_t num_integers,
const void *val, uint32_t cd, void *tag, dmu_tx_t *tx)
{
zap_leaf_t *l;
zap_leaf_t *prev_l = NULL;
int err;
zap_entry_handle_t zeh;
zap_t *zap = zn->zn_zap;
int expand_retries = 0;

ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
ASSERT(!zap->zap_ismicro);
Expand All @@ -850,10 +854,29 @@ fzap_add_cd(zap_name_t *zn,
if (err == 0) {
zap_increment_num_entries(zap, 1, tx);
} else if (err == EAGAIN) {
/*
* If the last two expansions did not help, there is no point
* trying to expand again
*/
if (expand_retries > MAX_EXPAND_RETRIES && prev_l == l) {
err = SET_ERROR(ENOSPC);
goto out;
}

err = zap_expand_leaf(zn, l, tag, tx, &l);
zap = zn->zn_zap; /* zap_expand_leaf() may change zap */
if (err == 0)
if (err == 0) {
prev_l = l;
expand_retries++;
goto retry;
} else if (err == ENOSPC) {
/*
* If we failed to expand the leaf, then bailout
* as there is no point trying
* zap_put_leaf_maybe_grow_ptrtbl().
*/
return (err);
}
}

out:
Expand Down
2 changes: 1 addition & 1 deletion module/zfs/zap_leaf.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ static uint16_t *zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry);
((h) >> \
(64 - ZAP_LEAF_HASH_SHIFT(l) - zap_leaf_phys(l)->l_hdr.lh_prefix_len)))

#define LEAF_HASH_ENTPTR(l, h) (&zap_leaf_phys(l)->l_hash[LEAF_HASH(l, h)])
#define LEAF_HASH_ENTPTR(l, h) (&zap_leaf_phys(l)->l_hash[LEAF_HASH(l, h)])

extern inline zap_leaf_phys_t *zap_leaf_phys(zap_leaf_t *l);

Expand Down
38 changes: 37 additions & 1 deletion module/zfs/zap_micro.c
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,41 @@ mze_find_unused_cd(zap_t *zap, uint64_t hash)
return (cd);
}

/*
* Each mzap entry requires at max : 4 chunks
* 3 chunks for names + 1 chunk for value.
*/
#define MZAP_ENT_CHUNKS (1 + ZAP_LEAF_ARRAY_NCHUNKS(MZAP_NAME_LEN) + \
ZAP_LEAF_ARRAY_NCHUNKS(sizeof (uint64_t)))

/*
* Check if the current entry keeps the colliding entries under the fatzap leaf
* size.
*/
static boolean_t
mze_canfit_fzap_leaf(zap_name_t *zn, uint64_t hash)
{
zap_t *zap = zn->zn_zap;
mzap_ent_t mze_tofind;
mzap_ent_t *mze;
avl_index_t idx;
avl_tree_t *avl = &zap->zap_m.zap_avl;
uint32_t mzap_ents = 0;

mze_tofind.mze_hash = hash;
mze_tofind.mze_cd = 0;

for (mze = avl_find(avl, &mze_tofind, &idx);
mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) {
mzap_ents++;
}

/* Include the new entry being added */
mzap_ents++;

return (ZAP_LEAF_NUMCHUNKS_DEF > (mzap_ents * MZAP_ENT_CHUNKS));
}

static void
mze_remove(zap_t *zap, mzap_ent_t *mze)
{
Expand Down Expand Up @@ -1190,7 +1225,8 @@ zap_add_impl(zap_t *zap, const char *key,
err = fzap_add(zn, integer_size, num_integers, val, tag, tx);
zap = zn->zn_zap; /* fzap_add() may change zap */
} else if (integer_size != 8 || num_integers != 1 ||
strlen(key) >= MZAP_NAME_LEN) {
strlen(key) >= MZAP_NAME_LEN ||
!mze_canfit_fzap_leaf(zn, zn->zn_hash)) {
err = mzap_upgrade(&zn->zn_zap, tag, tx, 0);
if (err == 0) {
err = fzap_add(zn, integer_size, num_integers, val,
Expand Down
29 changes: 23 additions & 6 deletions module/zfs/zfs_dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -742,7 +742,11 @@ zfs_dirent(znode_t *zp, uint64_t mode)
}

/*
* Link zp into dl. Can only fail if zp has been unlinked.
* Link zp into dl. Can fail in the following cases :
* - if zp has been unlinked.
* - if the number of entries with the same hash (aka. colliding entries)
* exceed the capacity of a leaf-block of fatzap and splitting of the
* leaf-block does not help.
*/
int
zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
Expand Down Expand Up @@ -776,6 +780,24 @@ zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
NULL, &links, sizeof (links));
}
}

value = zfs_dirent(zp, zp->z_mode);
error = zap_add(ZTOZSB(zp)->z_os, dzp->z_id, dl->dl_name, 8, 1,
&value, tx);

/*
* zap_add could fail to add the entry if it exceeds the capacity of the
* leaf-block and zap_leaf_split() failed to help.
* The caller of this routine is responsible for failing the transaction
* which will rollback the SA updates done above.
*/
if (error != 0) {
if (!(flag & ZRENAMING) && !(flag & ZNEW))
drop_nlink(ZTOI(zp));
mutex_exit(&zp->z_lock);
return (error);
}

SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL,
&dzp->z_id, sizeof (dzp->z_id));
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
Expand Down Expand Up @@ -813,11 +835,6 @@ zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
ASSERT(error == 0);
mutex_exit(&dzp->z_lock);

value = zfs_dirent(zp, zp->z_mode);
error = zap_add(ZTOZSB(zp)->z_os, dzp->z_id, dl->dl_name,
8, 1, &value, tx);
ASSERT(error == 0);

return (0);
}

Expand Down
74 changes: 56 additions & 18 deletions module/zfs/zfs_vnops.c
Original file line number Diff line number Diff line change
Expand Up @@ -1429,6 +1429,7 @@ zfs_create(struct inode *dip, char *name, vattr_t *vap, int excl,
dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
0, acl_ids.z_aclp->z_acl_bytes);
}

error = dmu_tx_assign(tx,
(waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
if (error) {
Expand All @@ -1446,10 +1447,22 @@ zfs_create(struct inode *dip, char *name, vattr_t *vap, int excl,
}
zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);

error = zfs_link_create(dl, zp, tx, ZNEW);
if (error != 0) {
/*
* Since, we failed to add the directory entry for it,
* delete the newly created dnode.
*/
zfs_znode_delete(zp, tx);
remove_inode_hash(ZTOI(zp));
zfs_acl_ids_free(&acl_ids);
dmu_tx_commit(tx);
goto out;
}

if (fuid_dirtied)
zfs_fuid_sync(zfsvfs, tx);

(void) zfs_link_create(dl, zp, tx, ZNEW);
txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap);
if (flag & FIGNORECASE)
txtype |= TX_CI;
Expand Down Expand Up @@ -2040,13 +2053,18 @@ zfs_mkdir(struct inode *dip, char *dirname, vattr_t *vap, struct inode **ipp,
*/
zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);

if (fuid_dirtied)
zfs_fuid_sync(zfsvfs, tx);

/*
* Now put new name in parent dir.
*/
(void) zfs_link_create(dl, zp, tx, ZNEW);
error = zfs_link_create(dl, zp, tx, ZNEW);
if (error != 0) {
zfs_znode_delete(zp, tx);
remove_inode_hash(ZTOI(zp));
goto out;
}

if (fuid_dirtied)
zfs_fuid_sync(zfsvfs, tx);

*ipp = ZTOI(zp);

Expand All @@ -2056,6 +2074,7 @@ zfs_mkdir(struct inode *dip, char *dirname, vattr_t *vap, struct inode **ipp,
zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp,
acl_ids.z_fuidp, vap);

out:
zfs_acl_ids_free(&acl_ids);

dmu_tx_commit(tx);
Expand All @@ -2065,10 +2084,14 @@ zfs_mkdir(struct inode *dip, char *dirname, vattr_t *vap, struct inode **ipp,
if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
zil_commit(zilog, 0);

zfs_inode_update(dzp);
zfs_inode_update(zp);
if (error != 0) {
iput(ZTOI(zp));
} else {
zfs_inode_update(dzp);
zfs_inode_update(zp);
}
ZFS_EXIT(zfsvfs);
return (0);
return (error);
}

/*
Expand Down Expand Up @@ -3686,6 +3709,13 @@ zfs_rename(struct inode *sdip, char *snm, struct inode *tdip, char *tnm,
VERIFY3U(zfs_link_destroy(tdl, szp, tx,
ZRENAMING, NULL), ==, 0);
}
} else {
/*
* If we had removed the existing target, subsequent
* call to zfs_link_create() to add back the same entry
* but, the new dnode (szp) should not fail.
*/
ASSERT(tzp == NULL);
}
}

Expand Down Expand Up @@ -3856,25 +3886,33 @@ zfs_symlink(struct inode *dip, char *name, vattr_t *vap, char *link,
/*
* Insert the new object into the directory.
*/
(void) zfs_link_create(dl, zp, tx, ZNEW);

if (flags & FIGNORECASE)
txtype |= TX_CI;
zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link);
error = zfs_link_create(dl, zp, tx, ZNEW);
if (error != 0) {
zfs_znode_delete(zp, tx);
remove_inode_hash(ZTOI(zp));
} else {
if (flags & FIGNORECASE)
txtype |= TX_CI;
zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link);

zfs_inode_update(dzp);
zfs_inode_update(zp);
zfs_inode_update(dzp);
zfs_inode_update(zp);
}

zfs_acl_ids_free(&acl_ids);

dmu_tx_commit(tx);

zfs_dirent_unlock(dl);

*ipp = ZTOI(zp);
if (error == 0) {
*ipp = ZTOI(zp);

if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
zil_commit(zilog, 0);
if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
zil_commit(zilog, 0);
} else {
iput(ZTOI(zp));
}

ZFS_EXIT(zfsvfs);
return (error);
Expand Down
2 changes: 1 addition & 1 deletion tests/runfiles/linux.run
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ tags = ['functional', 'cachefile']
# 'mixed_none_lookup', 'mixed_none_lookup_ci', 'mixed_none_delete',
# 'mixed_formd_lookup', 'mixed_formd_lookup_ci', 'mixed_formd_delete']
[tests/functional/casenorm]
tests = ['case_all_values', 'norm_all_values']
tests = ['case_all_values', 'norm_all_values', 'mixed_create_failure']
tags = ['functional', 'casenorm']

[tests/functional/channel_program/lua_core]
Expand Down
1 change: 1 addition & 0 deletions tests/zfs-tests/tests/functional/casenorm/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ dist_pkgdata_SCRIPTS = \
insensitive_formd_lookup.ksh \
insensitive_none_delete.ksh \
insensitive_none_lookup.ksh \
mixed_create_failure.ksh \
mixed_formd_delete.ksh \
mixed_formd_lookup_ci.ksh \
mixed_formd_lookup.ksh \
Expand Down
Loading

0 comments on commit cc63068

Please sign in to comment.