Skip to content

Commit

Permalink
Revert "Illumos 5056 - ZFS deadlock on db_mtx and dn_holds"
Browse files Browse the repository at this point in the history
This reverts commit 0c66c32.

It caused a runtime failure:

https://clusterhq.atlassian.net/browse/ZFS-37

The original deadlock must be resolved differently.

All Spectralogic copyright notices introduced in the reverted commit
have been retained due to either additional changes that depend on them
that we presently have or additional changes that depend on them that we
yet to have.

Signed-off-by: Richard Yao <[email protected]>
  • Loading branch information
ryao committed Oct 1, 2015
1 parent 0ab82d7 commit b1d5794
Show file tree
Hide file tree
Showing 36 changed files with 306 additions and 613 deletions.
5 changes: 3 additions & 2 deletions include/sys/dbuf.h
Original file line number Diff line number Diff line change
Expand Up @@ -227,8 +227,9 @@ typedef struct dmu_buf_impl {

/* Data which is unique to data (leaf) blocks: */

/* User callback information. */
dmu_buf_user_t *db_user;
/* stuff we store for the user (see dmu_buf_set_user) */
void *db_user_ptr;
dmu_buf_evict_func_t *db_evict_func;

uint8_t db_immediate_evict;
uint8_t db_freed_in_flight;
Expand Down
132 changes: 23 additions & 109 deletions include/sys/dmu.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,11 @@
* dmu_spa.h.
*/

#include <sys/zfs_context.h>
#include <sys/inttypes.h>
#include <sys/types.h>
#include <sys/param.h>
#include <sys/cred.h>
#include <sys/time.h>
#include <sys/fs/zfs.h>
#include <sys/uio.h>

Expand Down Expand Up @@ -288,6 +290,8 @@ typedef struct dmu_buf {
void *db_data; /* data in buffer */
} dmu_buf_t;

typedef void dmu_buf_evict_func_t(struct dmu_buf *db, void *user_ptr);

/*
* The names of zap entries in the DIRECTORY_OBJECT of the MOS.
*/
Expand Down Expand Up @@ -489,126 +493,36 @@ int dmu_buf_hold_array_by_bonus(dmu_buf_t *db, uint64_t offset,
uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp);
void dmu_buf_rele_array(dmu_buf_t **, int numbufs, void *tag);

typedef void dmu_buf_evict_func_t(void *user_ptr);

/*
* A DMU buffer user object may be associated with a dbuf for the
* duration of its lifetime. This allows the user of a dbuf (client)
* to attach private data to a dbuf (e.g. in-core only data such as a
* dnode_children_t, zap_t, or zap_leaf_t) and be optionally notified
* when that dbuf has been evicted. Clients typically respond to the
* eviction notification by freeing their private data, thus ensuring
* the same lifetime for both dbuf and private data.
*
* The mapping from a dmu_buf_user_t to any client private data is the
* client's responsibility. All current consumers of the API with private
* data embed a dmu_buf_user_t as the first member of the structure for
* their private data. This allows conversions between the two types
* with a simple cast. Since the DMU buf user API never needs access
* to the private data, other strategies can be employed if necessary
* or convenient for the client (e.g. using container_of() to do the
* conversion for private data that cannot have the dmu_buf_user_t as
* its first member).
*
* Eviction callbacks are executed without the dbuf mutex held or any
* other type of mechanism to guarantee that the dbuf is still available.
* For this reason, users must assume the dbuf has already been freed
* and not reference the dbuf from the callback context.
*
* Users requesting "immediate eviction" are notified as soon as the dbuf
* is only referenced by dirty records (dirties == holds). Otherwise the
* notification occurs after eviction processing for the dbuf begins.
*/
typedef struct dmu_buf_user {
/*
* Asynchronous user eviction callback state.
*/
taskq_ent_t dbu_tqent;

/* This instance's eviction function pointer. */
dmu_buf_evict_func_t *dbu_evict_func;
#ifdef ZFS_DEBUG
/*
* Pointer to user's dbuf pointer. NULL for clients that do
* not associate a dbuf with their user data.
*
* The dbuf pointer is cleared upon eviction so as to catch
* use-after-evict bugs in clients.
*/
dmu_buf_t **dbu_clear_on_evict_dbufp;
#endif
} dmu_buf_user_t;

/*
* Initialize the given dmu_buf_user_t instance with the eviction function
* evict_func, to be called when the user is evicted.
*
* NOTE: This function should only be called once on a given dmu_buf_user_t.
* To allow enforcement of this, dbu must already be zeroed on entry.
*/
#ifdef __lint
/* Very ugly, but it beats issuing suppression directives in many Makefiles. */
extern void
dmu_buf_init_user(dmu_buf_user_t *dbu, dmu_buf_evict_func_t *evict_func,
dmu_buf_t **clear_on_evict_dbufp);
#else /* __lint */
static inline void
dmu_buf_init_user(dmu_buf_user_t *dbu, dmu_buf_evict_func_t *evict_func,
dmu_buf_t **clear_on_evict_dbufp)
{
ASSERT(dbu->dbu_evict_func == NULL);
ASSERT(evict_func != NULL);
dbu->dbu_evict_func = evict_func;
#ifdef ZFS_DEBUG
dbu->dbu_clear_on_evict_dbufp = clear_on_evict_dbufp;
#endif
}
#endif /* __lint */

/*
* Attach user data to a dbuf and mark it for normal (when the dbuf's
* data is cleared or its reference count goes to zero) eviction processing.
* Returns NULL on success, or the existing user ptr if it's already
* been set.
*
* Returns NULL on success, or the existing user if another user currently
* owns the buffer.
*/
void *dmu_buf_set_user(dmu_buf_t *db, dmu_buf_user_t *user);

/*
* Attach user data to a dbuf and mark it for immediate (its dirty and
* reference counts are equal) eviction processing.
* user_ptr is for use by the user and can be obtained via dmu_buf_get_user().
*
* Returns NULL on success, or the existing user if another user currently
* owns the buffer.
*/
void *dmu_buf_set_user_ie(dmu_buf_t *db, dmu_buf_user_t *user);

/*
* Replace the current user of a dbuf.
* If non-NULL, pageout func will be called when this buffer is being
* excised from the cache, so that you can clean up the data structure
* pointed to by user_ptr.
*
* If given the current user of a dbuf, replaces the dbuf's user with
* "new_user" and returns the user data pointer that was replaced.
* Otherwise returns the current, and unmodified, dbuf user pointer.
* dmu_evict_user() will call the pageout func for all buffers in a
* objset with a given pageout func.
*/
void *dmu_buf_replace_user(dmu_buf_t *db,
dmu_buf_user_t *old_user, dmu_buf_user_t *new_user);

void *dmu_buf_set_user(dmu_buf_t *db, void *user_ptr,
dmu_buf_evict_func_t *pageout_func);
/*
* Remove the specified user data for a DMU buffer.
*
* Returns the user that was removed on success, or the current user if
* another user currently owns the buffer.
* set_user_ie is the same as set_user, but request immediate eviction
* when hold count goes to zero.
*/
void *dmu_buf_remove_user(dmu_buf_t *db, dmu_buf_user_t *user);
void *dmu_buf_set_user_ie(dmu_buf_t *db, void *user_ptr,
dmu_buf_evict_func_t *pageout_func);
void *dmu_buf_update_user(dmu_buf_t *db_fake, void *old_user_ptr,
void *user_ptr, dmu_buf_evict_func_t *pageout_func);
void dmu_evict_user(objset_t *os, dmu_buf_evict_func_t *func);

/*
* Returns the user data (dmu_buf_user_t *) associated with this dbuf.
* Returns the user_ptr set with dmu_buf_set_user(), or NULL if not set.
*/
void *dmu_buf_get_user(dmu_buf_t *db);

/* Block until any in-progress dmu buf user evictions complete. */
void dmu_buf_user_evict_wait(void);

/*
* Returns the blkptr associated with this dbuf, or NULL if not set.
*/
Expand Down
13 changes: 4 additions & 9 deletions include/sys/dmu_objset.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,25 +75,22 @@ struct objset {
arc_buf_t *os_phys_buf;
objset_phys_t *os_phys;
/*
* The following "special" dnodes have no parent, are exempt
* from dnode_move(), and are not recorded in os_dnodes, but they
* root their descendents in this objset using handles anyway, so
* that all access to dnodes from dbufs consistently uses handles.
* The following "special" dnodes have no parent and are exempt from
* dnode_move(), but they root their descendents in this objset using
* handles anyway, so that all access to dnodes from dbufs consistently
* uses handles.
*/
dnode_handle_t os_meta_dnode;
dnode_handle_t os_userused_dnode;
dnode_handle_t os_groupused_dnode;
zilog_t *os_zil;

list_node_t os_evicting_node;

/* can change, under dsl_dir's locks: */
enum zio_checksum os_checksum;
enum zio_compress os_compress;
uint8_t os_copies;
enum zio_checksum os_dedup_checksum;
boolean_t os_dedup_verify;
boolean_t os_evicting;
zfs_logbias_op_t os_logbias;
zfs_cache_type_t os_primary_cache;
zfs_cache_type_t os_secondary_cache;
Expand Down Expand Up @@ -175,8 +172,6 @@ int dmu_objset_userspace_upgrade(objset_t *os);
boolean_t dmu_objset_userspace_present(objset_t *os);
int dmu_fsname(const char *snapname, char *buf);

void dmu_objset_evict_done(objset_t *os);

/* Code for handling userspace interface */
extern const char *dmu_objset_types[];

Expand Down
3 changes: 1 addition & 2 deletions include/sys/dnode.h
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,6 @@ typedef struct dnode_handle {
} dnode_handle_t;

typedef struct dnode_children {
dmu_buf_user_t dnc_dbu; /* User evict data */
size_t dnc_count; /* number of children */
dnode_handle_t dnc_children[]; /* sized dynamically */
} dnode_children_t;
Expand All @@ -289,7 +288,7 @@ typedef struct free_range {
uint64_t fr_nblks;
} free_range_t;

void dnode_special_open(struct objset *dd, dnode_phys_t *dnp,
dnode_t *dnode_special_open(struct objset *dd, dnode_phys_t *dnp,
uint64_t object, dnode_handle_t *dnh);
void dnode_special_close(dnode_handle_t *dnh);

Expand Down
10 changes: 5 additions & 5 deletions include/sys/dsl_dataset.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,14 +133,11 @@ typedef struct dsl_dataset_phys {
} dsl_dataset_phys_t;

typedef struct dsl_dataset {
dmu_buf_user_t ds_dbu;

/* Immutable: */
struct dsl_dir *ds_dir;
dmu_buf_t *ds_dbuf;
uint64_t ds_object;
uint64_t ds_fsid_guid;
boolean_t ds_is_snapshot;

/* only used in syncing context, only valid for non-snapshots: */
struct dsl_dataset *ds_prev;
Expand Down Expand Up @@ -201,8 +198,11 @@ dsl_dataset_phys(dsl_dataset_t *ds)
*/
#define MAX_TAG_PREFIX_LEN 17

#define dsl_dataset_is_snapshot(ds) \
(dsl_dataset_phys(ds)->ds_num_children != 0)
static inline boolean_t
dsl_dataset_is_snapshot(dsl_dataset_t *ds)
{
return (dsl_dataset_phys(ds)->ds_num_children != 0);
}

#define DS_UNIQUE_IS_ACCURATE(ds) \
((dsl_dataset_phys(ds)->ds_flags & DS_FLAG_UNIQUE_ACCURATE) != 0)
Expand Down
3 changes: 0 additions & 3 deletions include/sys/dsl_dir.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,6 @@ typedef struct dsl_dir_phys {
} dsl_dir_phys_t;

struct dsl_dir {
dmu_buf_user_t dd_dbu;

/* These are immutable; no lock needed: */
uint64_t dd_object;
dsl_pool_t *dd_pool;
Expand Down Expand Up @@ -125,7 +123,6 @@ struct dsl_dataset;
typedef struct dsl_dataset dsl_dataset_t;

void dsl_dir_rele(dsl_dir_t *dd, void *tag);
void dsl_dir_async_rele(dsl_dir_t *dd, void *tag);
int dsl_dir_hold(dsl_pool_t *dp, const char *name, void *tag,
dsl_dir_t **, const char **tail);
int dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
Expand Down
1 change: 1 addition & 0 deletions include/sys/sa.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ int sa_update_from_cb(sa_handle_t *, sa_attr_type_t,
uint32_t buflen, sa_data_locator_t *, void *userdata, dmu_tx_t *);
void sa_object_info(sa_handle_t *, dmu_object_info_t *);
void sa_object_size(sa_handle_t *, uint32_t *, u_longlong_t *);
void sa_update_user(sa_handle_t *, sa_handle_t *);
void *sa_get_userdata(sa_handle_t *);
void sa_set_userp(sa_handle_t *, void *);
dmu_buf_t *sa_get_db(sa_handle_t *);
Expand Down
3 changes: 1 addition & 2 deletions include/sys/sa_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -209,12 +209,11 @@ typedef enum sa_data_op {
*/

struct sa_handle {
dmu_buf_user_t sa_dbu;
kmutex_t sa_lock;
dmu_buf_t *sa_bonus;
dmu_buf_t *sa_spill;
objset_t *sa_os;
void *sa_userp;
void *sa_userp;
sa_idx_tab_t *sa_bonus_tab; /* idx of bonus */
sa_idx_tab_t *sa_spill_tab; /* only present if spill activated */
};
Expand Down
4 changes: 0 additions & 4 deletions include/sys/spa.h
Original file line number Diff line number Diff line change
Expand Up @@ -690,7 +690,6 @@ extern spa_t *spa_next(spa_t *prev);
/* Refcount functions */
extern void spa_open_ref(spa_t *spa, void *tag);
extern void spa_close(spa_t *spa, void *tag);
extern void spa_async_close(spa_t *spa, void *tag);
extern boolean_t spa_refcount_zero(spa_t *spa);

#define SCL_NONE 0x00
Expand Down Expand Up @@ -801,9 +800,6 @@ extern uint64_t spa_version(spa_t *spa);
extern boolean_t spa_deflate(spa_t *spa);
extern metaslab_class_t *spa_normal_class(spa_t *spa);
extern metaslab_class_t *spa_log_class(spa_t *spa);
extern void spa_evicting_os_register(spa_t *, objset_t *os);
extern void spa_evicting_os_deregister(spa_t *, objset_t *os);
extern void spa_evicting_os_wait(spa_t *spa);
extern int spa_max_replication(spa_t *spa);
extern int spa_prev_software_version(spa_t *spa);
extern uint8_t spa_get_failmode(spa_t *spa);
Expand Down
3 changes: 0 additions & 3 deletions include/sys/spa_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,6 @@ struct spa {
uint64_t spa_claim_max_txg; /* highest claimed birth txg */
timespec_t spa_loaded_ts; /* 1st successful open time */
objset_t *spa_meta_objset; /* copy of dp->dp_meta_objset */
kmutex_t spa_evicting_os_lock; /* Evicting objset list lock */
list_t spa_evicting_os_list; /* Objsets being evicted. */
kcondvar_t spa_evicting_os_cv; /* Objset Eviction Completion */
txg_list_t spa_vdev_txg_list; /* per-txg dirty vdev list */
vdev_t *spa_root_vdev; /* top-level vdev container */
int spa_min_ashift; /* of vdevs in normal class */
Expand Down
3 changes: 1 addition & 2 deletions include/sys/zap_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,6 @@ typedef struct zap_phys {
typedef struct zap_table_phys zap_table_phys_t;

typedef struct zap {
dmu_buf_user_t zap_dbu;
objset_t *zap_objset;
uint64_t zap_object;
struct dmu_buf *zap_dbuf;
Expand Down Expand Up @@ -197,7 +196,7 @@ boolean_t zap_match(zap_name_t *zn, const char *matchname);
int zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp);
void zap_unlockdir(zap_t *zap);
void zap_evict(void *dbu);
void zap_evict(dmu_buf_t *db, void *vmzap);
zap_name_t *zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt);
void zap_name_free(zap_name_t *zn);
int zap_hashbits(zap_t *zap);
Expand Down
1 change: 0 additions & 1 deletion include/sys/zap_leaf.h
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,6 @@ typedef union zap_leaf_chunk {
} zap_leaf_chunk_t;

typedef struct zap_leaf {
dmu_buf_user_t l_dbu;
krwlock_t l_rwlock;
uint64_t l_blkid; /* 1<<ZAP_BLOCK_SHIFT byte block off */
int l_bs; /* block size shift */
Expand Down
Loading

0 comments on commit b1d5794

Please sign in to comment.