From 1a5b96b8ee66da5dfdfbedcb6bc462f454b4a25d Mon Sep 17 00:00:00 2001 From: Matthew Ahrens Date: Wed, 30 May 2018 11:27:40 -0700 Subject: [PATCH] OpenZFS 9329 - panic in zap_leaf_lookup() due to concurrent zapification For the null pointer issue shown below, the solution is to initialize the contents of the object before changing its type, so that concurrent accessors will see it as non-zapified until it is ready for access via the ZAP. BAD TRAP: type=e (#pf Page fault) rp=ffffff00ff520440 addr=20 occurred in module "zfs" due to a NULL pointer dereference ffffff00ff520320 unix:die+df () ffffff00ff520430 unix:trap+dc0 () ffffff00ff520440 unix:cmntrap+e6 () ffffff00ff520590 zfs:zap_leaf_lookup+46 () ffffff00ff520640 zfs:fzap_lookup+a9 () ffffff00ff5206e0 zfs:zap_lookup_norm+111 () ffffff00ff520730 zfs:zap_contains+42 () ffffff00ff520760 zfs:dsl_dataset_has_resume_receive_state+47 () ffffff00ff520900 zfs:get_receive_resume_stats+3e () ffffff00ff520a90 zfs:dsl_dataset_stats+262 () ffffff00ff520ac0 zfs:dmu_objset_stats+2b () ffffff00ff520b10 zfs:zfs_ioc_objset_stats_impl+64 () ffffff00ff520b60 zfs:zfs_ioc_objset_stats+33 () ffffff00ff520bd0 zfs:zfs_ioc_dataset_list_next+140 () ffffff00ff520c80 zfs:zfsdev_ioctl+4d7 () ffffff00ff520cc0 genunix:cdev_ioctl+39 () ffffff00ff520d10 specfs:spec_ioctl+60 () ffffff00ff520da0 genunix:fop_ioctl+55 () ffffff00ff520ec0 genunix:ioctl+9b () ffffff00ff520f10 unix:brand_sys_sysenter+1c9 () Porting Notes: * DMU_OT_BYTESWAP conditional in zap_lockdir_impl() kept. Authored by: Matthew Ahrens Reviewed by: Pavel Zakharov Reviewed by: Brad Lewis Reviewed-by: George Melikov Approved by: Dan McDonald Ported-by: Brian Behlendorf OpenZFS-issue: https://illumos.org/issues/9329 OpenZFS-commit: https://github.com/openzfs/openzfs/commit/e8e0f97 Closes #7578 --- module/zfs/dmu_object.c | 11 +++++++++-- module/zfs/zap_micro.c | 39 +++++++++++++++++++++++++++------------ 2 files changed, 36 insertions(+), 14 deletions(-) diff --git a/module/zfs/dmu_object.c b/module/zfs/dmu_object.c index 21e8e5a9475e..62ddea905afa 100644 --- a/module/zfs/dmu_object.c +++ b/module/zfs/dmu_object.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013, 2015 by Delphix. All rights reserved. + * Copyright (c) 2013, 2017 by Delphix. All rights reserved. * Copyright 2014 HybridCluster. All rights reserved. */ @@ -381,12 +381,19 @@ dmu_object_zapify(objset_t *mos, uint64_t object, dmu_object_type_t old_type, } ASSERT3U(dn->dn_type, ==, old_type); ASSERT0(dn->dn_maxblkid); + + /* + * We must initialize the ZAP data before changing the type, + * so that concurrent calls to *_is_zapified() can determine if + * the object has been completely zapified by checking the type. + */ + mzap_create_impl(mos, object, 0, 0, tx); + dn->dn_next_type[tx->tx_txg & TXG_MASK] = dn->dn_type = DMU_OTN_ZAP_METADATA; dnode_setdirty(dn, tx); dnode_rele(dn, FTAG); - mzap_create_impl(mos, object, 0, 0, tx); spa_feature_incr(dmu_objset_spa(mos), SPA_FEATURE_EXTENSIBLE_DATASET, tx); diff --git a/module/zfs/zap_micro.c b/module/zfs/zap_micro.c index a628cb881b2e..920b529ca6bb 100644 --- a/module/zfs/zap_micro.c +++ b/module/zfs/zap_micro.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2016 by Delphix. All rights reserved. + * Copyright (c) 2011, 2017 by Delphix. All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright 2017 Nexenta Systems, Inc. */ @@ -501,6 +501,10 @@ mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db) return (winner); } +/* + * This routine "consumes" the caller's hold on the dbuf, which must + * have the specified tag. + */ static int zap_lockdir_impl(dmu_buf_t *db, void *tag, dmu_tx_t *tx, krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp) @@ -586,6 +590,14 @@ zap_lockdir_by_dnode(dnode_t *dn, dmu_tx_t *tx, if (err != 0) { return (err); } +#ifdef ZFS_DEBUG + { + dmu_object_info_t doi; + dmu_object_info_from_db(db, &doi); + ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP); + } +#endif + err = zap_lockdir_impl(db, tag, tx, lti, fatreader, adding, zapp); if (err != 0) { dmu_buf_rele(db, tag); @@ -602,6 +614,13 @@ zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx, int err = dmu_buf_hold(os, obj, 0, tag, &db, DMU_READ_NO_PREFETCH); if (err != 0) return (err); +#ifdef ZFS_DEBUG + { + dmu_object_info_t doi; + dmu_object_info_from_db(db, &doi); + ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP); + } +#endif err = zap_lockdir_impl(db, tag, tx, lti, fatreader, adding, zapp); if (err != 0) dmu_buf_rele(db, tag); @@ -687,28 +706,21 @@ mzap_create_impl(objset_t *os, uint64_t obj, int normflags, zap_flags_t flags, VERIFY0(dmu_buf_hold(os, obj, 0, FTAG, &db, DMU_READ_NO_PREFETCH)); -#ifdef ZFS_DEBUG - { - dmu_object_info_t doi; - dmu_object_info_from_db(db, &doi); - ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP); - } -#endif - dmu_buf_will_dirty(db, tx); mzap_phys_t *zp = db->db_data; zp->mz_block_type = ZBT_MICRO; zp->mz_salt = ((uintptr_t)db ^ (uintptr_t)tx ^ (obj << 1)) | 1ULL; zp->mz_normflags = normflags; - dmu_buf_rele(db, FTAG); if (flags != 0) { zap_t *zap; /* Only fat zap supports flags; upgrade immediately. */ - VERIFY(0 == zap_lockdir(os, obj, tx, RW_WRITER, - B_FALSE, B_FALSE, FTAG, &zap)); + VERIFY0(zap_lockdir_impl(db, FTAG, tx, RW_WRITER, + B_FALSE, B_FALSE, &zap)); VERIFY0(mzap_upgrade(&zap, FTAG, tx, flags)); zap_unlockdir(zap, FTAG); + } else { + dmu_buf_rele(db, FTAG); } } @@ -742,6 +754,7 @@ zap_create_claim_norm_dnsize(objset_t *os, uint64_t obj, int normflags, dmu_object_type_t ot, dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx) { + ASSERT3U(DMU_OT_BYTESWAP(ot), ==, DMU_BSWAP_ZAP); int err = dmu_object_claim_dnsize(os, obj, ot, 0, bonustype, bonuslen, dnodesize, tx); if (err != 0) @@ -777,6 +790,7 @@ uint64_t zap_create_norm_dnsize(objset_t *os, int normflags, dmu_object_type_t ot, dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx) { + ASSERT3U(DMU_OT_BYTESWAP(ot), ==, DMU_BSWAP_ZAP); uint64_t obj = dmu_object_alloc_dnsize(os, ot, 0, bonustype, bonuslen, dnodesize, tx); @@ -798,6 +812,7 @@ zap_create_flags_dnsize(objset_t *os, int normflags, zap_flags_t flags, dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift, dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx) { + ASSERT3U(DMU_OT_BYTESWAP(ot), ==, DMU_BSWAP_ZAP); uint64_t obj = dmu_object_alloc_dnsize(os, ot, 0, bonustype, bonuslen, dnodesize, tx);