Skip to content

Commit

Permalink
Illumos #3086: unnecessarily setting DS_FLAG_INCONSISTENT on async
Browse files Browse the repository at this point in the history
3086 unnecessarily setting DS_FLAG_INCONSISTENT on async
destroyed datasets
Reviewed by: Christopher Siden <[email protected]>
Approved by: Eric Schrock <[email protected]>

References:
  illumos/illumos-gate@ce636f8
  illumos changeset: 13776:cd512c80fd75
  https://www.illumos.org/issues/3086

Ported-by: Brian Behlendorf <[email protected]>
  • Loading branch information
ahrens authored and behlendorf committed Jan 8, 2013
1 parent b9b24bb commit 29809a6
Show file tree
Hide file tree
Showing 12 changed files with 209 additions and 115 deletions.
3 changes: 3 additions & 0 deletions cmd/ztest/ztest.c
Original file line number Diff line number Diff line change
Expand Up @@ -2277,6 +2277,7 @@ ztest_zil_remount(ztest_ds_t *zd, uint64_t id)
{
objset_t *os = zd->zd_os;

mutex_enter(&zd->zd_dirobj_lock);
(void) rw_enter(&zd->zd_zilog_lock, RW_WRITER);

/* zfs_sb_teardown() */
Expand All @@ -2287,6 +2288,7 @@ ztest_zil_remount(ztest_ds_t *zd, uint64_t id)
zil_replay(os, zd, ztest_replay_vector);

(void) rw_exit(&zd->zd_zilog_lock);
mutex_exit(&zd->zd_dirobj_lock);
}

/*
Expand Down Expand Up @@ -5743,6 +5745,7 @@ ztest_freeze(void)
*/
kernel_init(FREAD | FWRITE);
VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG));
ASSERT(spa_freeze_txg(spa) == UINT64_MAX);
VERIFY3U(0, ==, ztest_dataset_open(0));
ztest_dataset_close(0);

Expand Down
7 changes: 6 additions & 1 deletion include/sys/dsl_pool.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,6 @@ typedef struct dsl_pool {

/* No lock needed - sync context only */
blkptr_t dp_meta_rootbp;
list_t dp_synced_datasets;
hrtime_t dp_read_overhead;
uint64_t dp_throughput; /* bytes per millisec */
uint64_t dp_write_limit;
Expand All @@ -104,13 +103,17 @@ typedef struct dsl_pool {
kmutex_t dp_lock;
uint64_t dp_space_towrite[TXG_SIZE];
uint64_t dp_tempreserved[TXG_SIZE];
uint64_t dp_mos_used_delta;
uint64_t dp_mos_compressed_delta;
uint64_t dp_mos_uncompressed_delta;
uint64_t dp_txg_history_size;
list_t dp_txg_history;


/* Has its own locking */
tx_state_t dp_tx;
txg_list_t dp_dirty_datasets;
txg_list_t dp_dirty_zilogs;
txg_list_t dp_dirty_dirs;
txg_list_t dp_sync_tasks;

Expand Down Expand Up @@ -150,6 +153,8 @@ int dsl_read_nolock(zio_t *pio, spa_t *spa, const blkptr_t *bpp,
void dsl_pool_create_origin(dsl_pool_t *dp, dmu_tx_t *tx);
void dsl_pool_upgrade_clones(dsl_pool_t *dp, dmu_tx_t *tx);
void dsl_pool_upgrade_dir_clones(dsl_pool_t *dp, dmu_tx_t *tx);
void dsl_pool_mos_diduse_space(dsl_pool_t *dp,
int64_t used, int64_t comp, int64_t uncomp);

taskq_t *dsl_pool_iput_taskq(dsl_pool_t *dp);

Expand Down
5 changes: 4 additions & 1 deletion include/sys/txg.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* Copyright (c) 2012 by Delphix. All rights reserved.
*/

#ifndef _SYS_TXG_H
#define _SYS_TXG_H
Expand Down Expand Up @@ -121,7 +124,7 @@ extern void txg_wait_callbacks(struct dsl_pool *dp);

extern void txg_list_create(txg_list_t *tl, size_t offset);
extern void txg_list_destroy(txg_list_t *tl);
extern int txg_list_empty(txg_list_t *tl, uint64_t txg);
extern boolean_t txg_list_empty(txg_list_t *tl, uint64_t txg);
extern int txg_list_add(txg_list_t *tl, void *p, uint64_t txg);
extern int txg_list_add_tail(txg_list_t *tl, void *p, uint64_t txg);
extern void *txg_list_remove(txg_list_t *tl, uint64_t txg);
Expand Down
2 changes: 2 additions & 0 deletions include/sys/zil.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/

/* Portions Copyright 2010 Robert Milkowski */
Expand Down Expand Up @@ -454,6 +455,7 @@ extern void zil_replay(objset_t *os, void *arg,
zil_replay_func_t *replay_func[TX_MAX_TYPE]);
extern boolean_t zil_replaying(zilog_t *zilog, dmu_tx_t *tx);
extern void zil_destroy(zilog_t *zilog, boolean_t keep_first);
extern void zil_destroy_sync(zilog_t *zilog, dmu_tx_t *tx);
extern void zil_rollback_destroy(zilog_t *zilog, dmu_tx_t *tx);

extern itx_t *zil_itx_create(uint64_t txtype, size_t lrsize);
Expand Down
2 changes: 2 additions & 0 deletions include/sys/zil_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/

/* Portions Copyright 2010 Robert Milkowski */
Expand Down Expand Up @@ -131,6 +132,7 @@ struct zilog {
zil_header_t zl_old_header; /* debugging aid */
uint_t zl_prev_blks[ZIL_PREV_BLKS]; /* size - sector rounded */
uint_t zl_prev_rotor; /* rotor for zl_prev[] */
txg_node_t zl_dirty_link; /* protected by dp_dirty_zilogs list */
};

typedef struct zil_bp_node {
Expand Down
4 changes: 2 additions & 2 deletions module/zfs/dmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -1953,15 +1953,15 @@ dmu_init(void)
dbuf_init();
zfetch_init();
dmu_tx_init();
arc_init();
l2arc_init();
arc_init();
}

void
dmu_fini(void)
{
l2arc_fini();
arc_fini();
l2arc_fini();
dmu_tx_fini();
zfetch_fini();
dbuf_fini();
Expand Down
7 changes: 0 additions & 7 deletions module/zfs/dmu_send.c
Original file line number Diff line number Diff line change
Expand Up @@ -1609,13 +1609,6 @@ dmu_recv_existing_end(dmu_recv_cookie_t *drc)
dsl_dataset_t *ds = drc->drc_logical_ds;
int err, myerr;

/*
* XXX hack; seems the ds is still dirty and dsl_pool_zil_clean()
* expects it to have a ds_user_ptr (and zil), but clone_swap()
* can close it.
*/
txg_wait_synced(ds->ds_dir->dd_pool, 0);

if (dsl_dataset_tryown(ds, FALSE, dmu_recv_tag)) {
err = dsl_dataset_clone_swap(drc->drc_real_ds, ds,
drc->drc_force);
Expand Down
108 changes: 47 additions & 61 deletions module/zfs/dsl_dataset.c
Original file line number Diff line number Diff line change
Expand Up @@ -105,14 +105,8 @@ dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE);
ASSERT(DMU_OT_IS_VALID(BP_GET_TYPE(bp)));
if (ds == NULL) {
/*
* Account for the meta-objset space in its placeholder
* dsl_dir.
*/
ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */
dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, DD_USED_HEAD,
used, compressed, uncompressed, tx);
dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx);
dsl_pool_mos_diduse_space(tx->tx_pool,
used, compressed, uncompressed);
return;
}
dmu_buf_will_dirty(ds->ds_dbuf, tx);
Expand Down Expand Up @@ -150,15 +144,9 @@ dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx,

ASSERT(used > 0);
if (ds == NULL) {
/*
* Account for the meta-objset space in its placeholder
* dataset.
*/
dsl_free(tx->tx_pool, tx->tx_txg, bp);

dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, DD_USED_HEAD,
-used, -compressed, -uncompressed, tx);
dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx);
dsl_pool_mos_diduse_space(tx->tx_pool,
-used, -compressed, -uncompressed);
return (used);
}
ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool);
Expand Down Expand Up @@ -1074,26 +1062,26 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer)
dummy_ds->ds_dir = dd;
dummy_ds->ds_object = ds->ds_object;

/*
* Check for errors and mark this ds as inconsistent, in
* case we crash while freeing the objects.
*/
err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check,
dsl_dataset_destroy_begin_sync, ds, NULL, 0);
if (err)
goto out_free;

err = dmu_objset_from_ds(ds, &os);
if (err)
goto out_free;

/*
* If async destruction is not enabled try to remove all objects
* while in the open context so that there is less work to do in
* the syncing context.
*/
if (!spa_feature_is_enabled(dsl_dataset_get_spa(ds),
&spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
/*
* Check for errors and mark this ds as inconsistent, in
* case we crash while freeing the objects.
*/
err = dsl_sync_task_do(dd->dd_pool,
dsl_dataset_destroy_begin_check,
dsl_dataset_destroy_begin_sync, ds, NULL, 0);
if (err)
goto out_free;

err = dmu_objset_from_ds(ds, &os);
if (err)
goto out_free;

/*
* Remove all objects while in the open context so that
* there is less work to do in the syncing context.
*/
for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE,
ds->ds_phys->ds_prev_snap_txg)) {
/*
Expand All @@ -1104,29 +1092,25 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer)
}
if (err != ESRCH)
goto out_free;
}

/*
* Only the ZIL knows how to free log blocks.
*/
zil_destroy(dmu_objset_zil(os), B_FALSE);

/*
* Sync out all in-flight IO.
*/
txg_wait_synced(dd->dd_pool, 0);
/*
* Sync out all in-flight IO.
*/
txg_wait_synced(dd->dd_pool, 0);

/*
* If we managed to free all the objects in open
* context, the user space accounting should be zero.
*/
if (ds->ds_phys->ds_bp.blk_fill == 0 &&
dmu_objset_userused_enabled(os)) {
ASSERTV(uint64_t count);
ASSERT(zap_count(os, DMU_USERUSED_OBJECT, &count) != 0 ||
count == 0);
ASSERT(zap_count(os, DMU_GROUPUSED_OBJECT, &count) != 0 ||
count == 0);
/*
* If we managed to free all the objects in open
* context, the user space accounting should be zero.
*/
if (ds->ds_phys->ds_bp.blk_fill == 0 &&
dmu_objset_userused_enabled(os)) {
ASSERTV(uint64_t count);

ASSERT(zap_count(os, DMU_USERUSED_OBJECT,
&count) != 0 || count == 0);
ASSERT(zap_count(os, DMU_GROUPUSED_OBJECT,
&count) != 0 || count == 0);
}
}

rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
Expand Down Expand Up @@ -1878,6 +1862,7 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
} else {
zfeature_info_t *async_destroy =
&spa_feature_table[SPA_FEATURE_ASYNC_DESTROY];
objset_t *os;

/*
* There's no next snapshot, so this is a head dataset.
Expand All @@ -1889,6 +1874,8 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx);
ds->ds_phys->ds_deadlist_obj = 0;

VERIFY3U(0, ==, dmu_objset_from_ds(ds, &os));

if (!spa_feature_is_enabled(dp->dp_spa, async_destroy)) {
err = old_synchronous_dataset_destroy(ds, tx);
} else {
Expand All @@ -1898,12 +1885,12 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
*/
uint64_t used, comp, uncomp;

ASSERT(err == 0 || err == EBUSY);
zil_destroy_sync(dmu_objset_zil(os), tx);

if (!spa_feature_is_active(dp->dp_spa, async_destroy)) {
spa_feature_incr(dp->dp_spa, async_destroy, tx);
dp->dp_bptree_obj = bptree_alloc(
dp->dp_meta_objset, tx);
VERIFY(zap_add(dp->dp_meta_objset,
dp->dp_bptree_obj = bptree_alloc(mos, tx);
VERIFY(zap_add(mos,
DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1,
&dp->dp_bptree_obj, tx) == 0);
Expand All @@ -1916,7 +1903,7 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
ds->ds_phys->ds_unique_bytes == used);

bptree_add(dp->dp_meta_objset, dp->dp_bptree_obj,
bptree_add(mos, dp->dp_bptree_obj,
&ds->ds_phys->ds_bp, ds->ds_phys->ds_prev_snap_txg,
used, comp, uncomp, tx);
dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
Expand Down Expand Up @@ -2203,7 +2190,6 @@ dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx)
dmu_buf_will_dirty(ds->ds_dbuf, tx);
ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid;

dsl_dir_dirty(ds->ds_dir, tx);
dmu_objset_sync(ds->ds_objset, zio, tx);
}

Expand Down
10 changes: 3 additions & 7 deletions module/zfs/dsl_dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,6 @@ dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
kmem_free(dd, sizeof (dsl_dir_t));
dmu_buf_rele(dbuf, tag);
return (err);

}

void
Expand Down Expand Up @@ -223,7 +222,7 @@ dsl_dir_name(dsl_dir_t *dd, char *buf)
}
}

/* Calculate name legnth, avoiding all the strcat calls of dsl_dir_name */
/* Calculate name length, avoiding all the strcat calls of dsl_dir_name */
int
dsl_dir_namelen(dsl_dir_t *dd)
{
Expand Down Expand Up @@ -592,8 +591,6 @@ dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx)
{
ASSERT(dmu_tx_is_syncing(tx));

dmu_buf_will_dirty(dd->dd_dbuf, tx);

mutex_enter(&dd->dd_lock);
ASSERT3U(dd->dd_tempreserved[tx->tx_txg&TXG_MASK], ==, 0);
dprintf_dd(dd, "txg=%llu towrite=%lluK\n", tx->tx_txg,
Expand Down Expand Up @@ -950,8 +947,6 @@ dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type,
ASSERT(dmu_tx_is_syncing(tx));
ASSERT(type < DD_USED_NUM);

dsl_dir_dirty(dd, tx);

if (needlock)
mutex_enter(&dd->dd_lock);
accounted_delta = parent_delta(dd, dd->dd_phys->dd_used_bytes, used);
Expand All @@ -960,6 +955,7 @@ dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type,
dd->dd_phys->dd_compressed_bytes >= -compressed);
ASSERT(uncompressed >= 0 ||
dd->dd_phys->dd_uncompressed_bytes >= -uncompressed);
dmu_buf_will_dirty(dd->dd_dbuf, tx);
dd->dd_phys->dd_used_bytes += used;
dd->dd_phys->dd_uncompressed_bytes += uncompressed;
dd->dd_phys->dd_compressed_bytes += compressed;
Expand Down Expand Up @@ -1003,13 +999,13 @@ dsl_dir_transfer_space(dsl_dir_t *dd, int64_t delta,
if (delta == 0 || !(dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN))
return;

dsl_dir_dirty(dd, tx);
if (needlock)
mutex_enter(&dd->dd_lock);
ASSERT(delta > 0 ?
dd->dd_phys->dd_used_breakdown[oldtype] >= delta :
dd->dd_phys->dd_used_breakdown[newtype] >= -delta);
ASSERT(dd->dd_phys->dd_used_bytes >= ABS(delta));
dmu_buf_will_dirty(dd->dd_dbuf, tx);
dd->dd_phys->dd_used_breakdown[oldtype] -= delta;
dd->dd_phys->dd_used_breakdown[newtype] += delta;
if (needlock)
Expand Down
Loading

0 comments on commit 29809a6

Please sign in to comment.