Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master'
Browse files Browse the repository at this point in the history
  • Loading branch information
rdolbeau committed May 9, 2015
2 parents 340c028 + 16fcdea commit 9bae3a1
Show file tree
Hide file tree
Showing 4 changed files with 143 additions and 25 deletions.
1 change: 1 addition & 0 deletions include/sys/spa.h
Original file line number Diff line number Diff line change
Expand Up @@ -830,6 +830,7 @@ extern boolean_t spa_has_slogs(spa_t *spa);
extern boolean_t spa_is_root(spa_t *spa);
extern boolean_t spa_writeable(spa_t *spa);
extern boolean_t spa_has_pending_synctask(spa_t *spa);
extern void zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp);

extern int spa_mode(spa_t *spa);
extern uint64_t strtonum(const char *str, char **nptr);
Expand Down
32 changes: 20 additions & 12 deletions module/zfs/arc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1825,13 +1825,15 @@ arc_evict(arc_state_t *state, uint64_t spa, int64_t bytes, boolean_t recycle,
kmutex_t *hash_lock;
boolean_t have_lock;
void *stolen = NULL;
arc_buf_hdr_t marker = {{{ 0 }}};
arc_buf_hdr_t *marker;
int count = 0;

ASSERT(state == arc_mru || state == arc_mfu);

evicted_state = (state == arc_mru) ? arc_mru_ghost : arc_mfu_ghost;

marker = kmem_zalloc(sizeof (arc_buf_hdr_t), KM_SLEEP);

top:
mutex_enter(&state->arcs_mtx);
mutex_enter(&evicted_state->arcs_mtx);
Expand Down Expand Up @@ -1866,14 +1868,14 @@ arc_evict(arc_state_t *state, uint64_t spa, int64_t bytes, boolean_t recycle,
* the hot code path, so don't sleep.
*/
if (!recycle && count++ > arc_evict_iterations) {
list_insert_after(list, ab, &marker);
list_insert_after(list, ab, marker);
mutex_exit(&evicted_state->arcs_mtx);
mutex_exit(&state->arcs_mtx);
kpreempt(KPREEMPT_SYNC);
mutex_enter(&state->arcs_mtx);
mutex_enter(&evicted_state->arcs_mtx);
ab_prev = list_prev(list, &marker);
list_remove(list, &marker);
ab_prev = list_prev(list, marker);
list_remove(list, marker);
count = 0;
continue;
}
Expand Down Expand Up @@ -1957,6 +1959,8 @@ arc_evict(arc_state_t *state, uint64_t spa, int64_t bytes, boolean_t recycle,
goto top;
}

kmem_free(marker, sizeof (arc_buf_hdr_t));

if (bytes_evicted < bytes)
dprintf("only evicted %lld bytes from %x\n",
(longlong_t)bytes_evicted, state->arcs_state);
Expand Down Expand Up @@ -1986,15 +1990,17 @@ arc_evict_ghost(arc_state_t *state, uint64_t spa, int64_t bytes,
arc_buf_contents_t type)
{
arc_buf_hdr_t *ab, *ab_prev;
arc_buf_hdr_t marker;
arc_buf_hdr_t *marker;
list_t *list = &state->arcs_list[type];
kmutex_t *hash_lock;
uint64_t bytes_deleted = 0;
uint64_t bufs_skipped = 0;
int count = 0;

ASSERT(GHOST_STATE(state));
bzero(&marker, sizeof (marker));

marker = kmem_zalloc(sizeof (arc_buf_hdr_t), KM_SLEEP);

top:
mutex_enter(&state->arcs_mtx);
for (ab = list_tail(list); ab; ab = ab_prev) {
Expand All @@ -2020,12 +2026,12 @@ arc_evict_ghost(arc_state_t *state, uint64_t spa, int64_t bytes,
* before reacquiring the lock.
*/
if (count++ > arc_evict_iterations) {
list_insert_after(list, ab, &marker);
list_insert_after(list, ab, marker);
mutex_exit(&state->arcs_mtx);
kpreempt(KPREEMPT_SYNC);
mutex_enter(&state->arcs_mtx);
ab_prev = list_prev(list, &marker);
list_remove(list, &marker);
ab_prev = list_prev(list, marker);
list_remove(list, marker);
count = 0;
continue;
}
Expand Down Expand Up @@ -2057,13 +2063,13 @@ arc_evict_ghost(arc_state_t *state, uint64_t spa, int64_t bytes,
* hash lock to become available. Once its
* available, restart from where we left off.
*/
list_insert_after(list, ab, &marker);
list_insert_after(list, ab, marker);
mutex_exit(&state->arcs_mtx);
mutex_enter(hash_lock);
mutex_exit(hash_lock);
mutex_enter(&state->arcs_mtx);
ab_prev = list_prev(list, &marker);
list_remove(list, &marker);
ab_prev = list_prev(list, marker);
list_remove(list, marker);
} else {
bufs_skipped += 1;
}
Expand All @@ -2076,6 +2082,8 @@ arc_evict_ghost(arc_state_t *state, uint64_t spa, int64_t bytes,
goto top;
}

kmem_free(marker, sizeof (arc_buf_hdr_t));

if (bufs_skipped) {
ARCSTAT_INCR(arcstat_mutex_miss, bufs_skipped);
ASSERT(bytes >= 0);
Expand Down
38 changes: 30 additions & 8 deletions module/zfs/zfs_vfsops.c
Original file line number Diff line number Diff line change
Expand Up @@ -1137,8 +1137,28 @@ zfs_sb_teardown(zfs_sb_t *zsb, boolean_t unmounting)
* drain the iput_taskq to ensure all active references to the
* zfs_sb_t have been handled only then can it be safely destroyed.
*/
if (zsb->z_os)
taskq_wait(dsl_pool_iput_taskq(dmu_objset_pool(zsb->z_os)));
if (zsb->z_os) {
/*
* If we're unmounting we have to wait for the list to
* drain completely.
*
* If we're not unmounting there's no guarantee the list
* will drain completely, but iputs run from the taskq
* may add the parents of dir-based xattrs to the taskq
* so we want to wait for these.
*
* We can safely read z_nr_znodes without locking because the
* VFS has already blocked operations which add to the
* z_all_znodes list and thus increment z_nr_znodes.
*/
int round = 0;
while (zsb->z_nr_znodes > 0) {
taskq_wait(dsl_pool_iput_taskq(dmu_objset_pool(
zsb->z_os)));
if (++round > 1 && !unmounting)
break;
}
}

rrw_enter(&zsb->z_teardown_lock, RW_WRITER, FTAG);

Expand Down Expand Up @@ -1182,13 +1202,15 @@ zfs_sb_teardown(zfs_sb_t *zsb, boolean_t unmounting)
*
* Release all holds on dbufs.
*/
mutex_enter(&zsb->z_znodes_lock);
for (zp = list_head(&zsb->z_all_znodes); zp != NULL;
zp = list_next(&zsb->z_all_znodes, zp)) {
if (zp->z_sa_hdl)
zfs_znode_dmu_fini(zp);
if (!unmounting) {
mutex_enter(&zsb->z_znodes_lock);
for (zp = list_head(&zsb->z_all_znodes); zp != NULL;
zp = list_next(&zsb->z_all_znodes, zp)) {
if (zp->z_sa_hdl)
zfs_znode_dmu_fini(zp);
}
mutex_exit(&zsb->z_znodes_lock);
}
mutex_exit(&zsb->z_znodes_lock);

/*
* If we are unmounting, set the unmounted flag and let new VFS ops
Expand Down
97 changes: 92 additions & 5 deletions module/zfs/zio.c
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ zio_buf_alloc(size_t size)
{
size_t c = (size - 1) >> SPA_MINBLOCKSHIFT;

ASSERT3U(c, <, SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT);
VERIFY3U(c, <, SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT);

return (kmem_cache_alloc(zio_buf_cache[c], KM_PUSHPAGE));
}
Expand All @@ -228,7 +228,7 @@ zio_data_buf_alloc(size_t size)
{
size_t c = (size - 1) >> SPA_MINBLOCKSHIFT;

ASSERT(c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT);
VERIFY3U(c, <, SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT);

return (kmem_cache_alloc(zio_data_buf_cache[c], KM_PUSHPAGE));
}
Expand All @@ -238,7 +238,7 @@ zio_buf_free(void *buf, size_t size)
{
size_t c = (size - 1) >> SPA_MINBLOCKSHIFT;

ASSERT(c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT);
VERIFY3U(c, <, SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT);

kmem_cache_free(zio_buf_cache[c], buf);
}
Expand All @@ -248,7 +248,7 @@ zio_data_buf_free(void *buf, size_t size)
{
size_t c = (size - 1) >> SPA_MINBLOCKSHIFT;

ASSERT(c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT);
VERIFY3U(c, <, SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT);

kmem_cache_free(zio_data_buf_cache[c], buf);
}
Expand Down Expand Up @@ -596,13 +596,99 @@ zio_root(spa_t *spa, zio_done_func_t *done, void *private, enum zio_flag flags)
return (zio_null(NULL, spa, NULL, done, private, flags));
}

void
zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp)
{
int i;

if (!DMU_OT_IS_VALID(BP_GET_TYPE(bp))) {
zfs_panic_recover("blkptr at %p has invalid TYPE %llu",
bp, (longlong_t)BP_GET_TYPE(bp));
}
if (BP_GET_CHECKSUM(bp) >= ZIO_CHECKSUM_FUNCTIONS ||
BP_GET_CHECKSUM(bp) <= ZIO_CHECKSUM_ON) {
zfs_panic_recover("blkptr at %p has invalid CHECKSUM %llu",
bp, (longlong_t)BP_GET_CHECKSUM(bp));
}
if (BP_GET_COMPRESS(bp) >= ZIO_COMPRESS_FUNCTIONS ||
BP_GET_COMPRESS(bp) <= ZIO_COMPRESS_ON) {
zfs_panic_recover("blkptr at %p has invalid COMPRESS %llu",
bp, (longlong_t)BP_GET_COMPRESS(bp));
}
if (BP_GET_LSIZE(bp) > SPA_MAXBLOCKSIZE) {
zfs_panic_recover("blkptr at %p has invalid LSIZE %llu",
bp, (longlong_t)BP_GET_LSIZE(bp));
}
if (BP_GET_PSIZE(bp) > SPA_MAXBLOCKSIZE) {
zfs_panic_recover("blkptr at %p has invalid PSIZE %llu",
bp, (longlong_t)BP_GET_PSIZE(bp));
}

if (BP_IS_EMBEDDED(bp)) {
if (BPE_GET_ETYPE(bp) > NUM_BP_EMBEDDED_TYPES) {
zfs_panic_recover("blkptr at %p has invalid ETYPE %llu",
bp, (longlong_t)BPE_GET_ETYPE(bp));
}
}

/*
* Pool-specific checks.
*
* Note: it would be nice to verify that the blk_birth and
* BP_PHYSICAL_BIRTH() are not too large. However, spa_freeze()
* allows the birth time of log blocks (and dmu_sync()-ed blocks
* that are in the log) to be arbitrarily large.
*/
for (i = 0; i < BP_GET_NDVAS(bp); i++) {
uint64_t vdevid = DVA_GET_VDEV(&bp->blk_dva[i]);
vdev_t *vd;
uint64_t offset, asize;
if (vdevid >= spa->spa_root_vdev->vdev_children) {
zfs_panic_recover("blkptr at %p DVA %u has invalid "
"VDEV %llu",
bp, i, (longlong_t)vdevid);
}
vd = spa->spa_root_vdev->vdev_child[vdevid];
if (vd == NULL) {
zfs_panic_recover("blkptr at %p DVA %u has invalid "
"VDEV %llu",
bp, i, (longlong_t)vdevid);
}
if (vd->vdev_ops == &vdev_hole_ops) {
zfs_panic_recover("blkptr at %p DVA %u has hole "
"VDEV %llu",
bp, i, (longlong_t)vdevid);

}
if (vd->vdev_ops == &vdev_missing_ops) {
/*
* "missing" vdevs are valid during import, but we
* don't have their detailed info (e.g. asize), so
* we can't perform any more checks on them.
*/
continue;
}
offset = DVA_GET_OFFSET(&bp->blk_dva[i]);
asize = DVA_GET_ASIZE(&bp->blk_dva[i]);
if (BP_IS_GANG(bp))
asize = vdev_psize_to_asize(vd, SPA_GANGBLOCKSIZE);
if (offset + asize > vd->vdev_asize) {
zfs_panic_recover("blkptr at %p DVA %u has invalid "
"OFFSET %llu",
bp, i, (longlong_t)offset);
}
}
}

zio_t *
zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
void *data, uint64_t size, zio_done_func_t *done, void *private,
zio_priority_t priority, enum zio_flag flags, const zbookmark_phys_t *zb)
{
zio_t *zio;

zfs_blkptr_verify(spa, bp);

zio = zio_create(pio, spa, BP_PHYSICAL_BIRTH(bp), bp,
data, size, done, private,
ZIO_TYPE_READ, priority, flags, NULL, 0, zb,
Expand Down Expand Up @@ -2879,7 +2965,8 @@ zio_checksum_verify(zio_t *zio)

if ((error = zio_checksum_error(zio, &info)) != 0) {
zio->io_error = error;
if (!(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
if (error == ECKSUM &&
!(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
zfs_ereport_start_checksum(zio->io_spa,
zio->io_vd, zio, zio->io_offset,
zio->io_size, NULL, &info);
Expand Down

0 comments on commit 9bae3a1

Please sign in to comment.