From f9111e8e6a44a234c5f5c1fb45968edaafb60638 Mon Sep 17 00:00:00 2001 From: Tom Caputi Date: Mon, 19 Jun 2017 16:56:48 -0400 Subject: [PATCH] experimental change to avoid reading metadata twice Signed-off-by: Tom Caputi --- include/sys/dsl_scan.h | 1 + module/zfs/dsl_scan.c | 417 +++++++++++++++++++++++------------------ 2 files changed, 236 insertions(+), 182 deletions(-) diff --git a/include/sys/dsl_scan.h b/include/sys/dsl_scan.h index bb2afd541bd5..91c9e99e4b05 100644 --- a/include/sys/dsl_scan.h +++ b/include/sys/dsl_scan.h @@ -138,6 +138,7 @@ typedef struct dsl_scan { uint64_t scn_segs_this_txg; uint64_t scn_avg_zio_size_this_txg; uint64_t scn_zios_this_txg; + uint64_t scn_dummy_zios_this_txg; dsl_scan_phys_t scn_phys; dsl_scan_phys_t scn_phys_cached; diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index c763ef262b97..ef33dc08794e 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -108,8 +108,8 @@ * the scan simply resumes from the last checkpoint. */ -typedef int (scan_cb_t)(dsl_pool_t *, const blkptr_t *, - const zbookmark_phys_t *); +typedef void (scan_cb_t)(dsl_pool_t *, const blkptr_t *, + const zbookmark_phys_t *, boolean_t io_done); static scan_cb_t dsl_scan_scrub_cb; @@ -223,6 +223,7 @@ typedef struct scan_io { int sio_flags; /* members for queue sorting */ + boolean_t sio_dummy; union { avl_node_t sio_addr_node; /* link into sequential queue */ list_node_t sio_list_node; /* list link for issuing */ @@ -264,6 +265,7 @@ struct dsl_scan_io_queue { uint64_t q_segs_this_txg; uint64_t q_avg_zio_size_this_txg; uint64_t q_zios_this_txg; + uint64_t q_dummy_zios_this_txg; }; /* private data for dsl_scan_prefetch_cb() */ @@ -344,6 +346,112 @@ bp2sio(const blkptr_t *bp, scan_io_t *sio, int dva_i) sio->sio_cksum = bp->blk_cksum; } +static void +scan_io_queue_insert_impl(dsl_scan_io_queue_t *queue, scan_io_t *sio) +{ + avl_index_t idx; + scan_io_t *found_sio = NULL; + uint64_t offset = SCAN_IO_GET_OFFSET(sio); + uint64_t asize = sio->sio_asize; + + ASSERT(MUTEX_HELD(&queue->q_vd->vdev_scan_io_queue_lock)); + + if ((found_sio = avl_find(&queue->q_zios_by_addr, sio, &idx)) != NULL) { + /* block is already scheduled for reading */ + if (sio->sio_dummy) + found_sio->sio_dummy = B_TRUE; + mutex_enter(&queue->q_scn->scn_status_lock); + queue->q_scn->scn_bytes_pending -= sio->sio_asize; + mutex_exit(&queue->q_scn->scn_status_lock); + kmem_free(sio, sizeof (*sio)); + return; + } + avl_insert(&queue->q_zios_by_addr, sio, idx); + atomic_add_64(&queue->q_zio_bytes, asize); + + range_tree_set_gap(queue->q_exts_by_addr, zfs_scan_max_ext_gap); + range_tree_add_fill(queue->q_exts_by_addr, offset, asize, asize); +} + +/* + * Given all the info we got from our metadata scanning process, we + * construct a scan_io_t and insert it into the scan sorting queue. The + * I/O must already be suitable for us to process. This is controlled + * by dsl_scan_enqueue(). + */ +static void +scan_io_queue_insert(dsl_scan_io_queue_t *queue, const blkptr_t *bp, int dva_i, + int zio_flags, const zbookmark_phys_t *zb, boolean_t dummy) +{ + scan_io_t *sio = kmem_zalloc(sizeof (*sio), KM_SLEEP); + + ASSERT(MUTEX_HELD(&queue->q_vd->vdev_scan_io_queue_lock)); + + bp2sio(bp, sio, dva_i); + sio->sio_flags = zio_flags; + sio->sio_zb = *zb; + sio->sio_dummy = dummy; + + /* + * Increment the bytes pending counter now so that we can't + * get an integer underflow in case the worker processes the + * zio before we get to incrementing this counter. + */ + mutex_enter(&queue->q_scn->scn_status_lock); + queue->q_scn->scn_bytes_pending += sio->sio_asize; + mutex_exit(&queue->q_scn->scn_status_lock); + + scan_io_queue_insert_impl(queue, sio); +} + +static void +dsl_scan_enqueue_impl(dsl_pool_t *dp, const blkptr_t *bp, int zio_flags, + const zbookmark_phys_t *zb, boolean_t dummy) +{ + int i; + spa_t *spa = dp->dp_spa; + + ASSERT(dp->dp_scan != NULL); + ASSERT(dp->dp_scan->scn_is_sorted); + + for (i = 0; i < BP_GET_NDVAS(bp); i++) { + dva_t dva; + vdev_t *vdev; + + dva = bp->blk_dva[i]; + vdev = vdev_lookup_top(spa, DVA_GET_VDEV(&dva)); + ASSERT(vdev != NULL); + + mutex_enter(&vdev->vdev_scan_io_queue_lock); + if (vdev->vdev_scan_io_queue == NULL) + vdev->vdev_scan_io_queue = scan_io_queue_create(vdev); + scan_io_queue_insert(vdev->vdev_scan_io_queue, bp, + i, zio_flags, zb, dummy); + mutex_exit(&vdev->vdev_scan_io_queue_lock); + } +} + +/* + * Given a set of I/O parameters as discovered by the metadata traversal + * process, attempts to place the I/O into the sorted queues (if allowed), + * or immediately executes the I/O. The dummy flag can be set to + */ +static void +dsl_scan_enqueue(dsl_pool_t *dp, const blkptr_t *bp, int zio_flags, + const zbookmark_phys_t *zb, boolean_t dummy) +{ + ASSERT(!BP_IS_EMBEDDED(bp)); + if (!dp->dp_scan->scn_is_sorted) { + if (dummy) + return; + + scan_exec_io(dp, bp, zio_flags, zb, NULL); + return; + } + + dsl_scan_enqueue_impl(dp, bp, zio_flags, zb, dummy); +} + void dsl_scan_global_init(void) { @@ -1104,7 +1212,7 @@ dsl_scan_zil_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg) SET_BOOKMARK(&zb, zh->zh_log.blk_cksum.zc_word[ZIL_ZC_OBJSET], ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, bp->blk_cksum.zc_word[ZIL_ZC_SEQ]); - VERIFY(0 == scan_funcs[scn->scn_phys.scn_func](dp, bp, &zb)); + scan_funcs[scn->scn_phys.scn_func](dp, bp, &zb, B_FALSE); return (0); } @@ -1137,7 +1245,7 @@ dsl_scan_zil_record(zilog_t *zilog, lr_t *lrc, void *arg, uint64_t claim_txg) lr->lr_foid, ZB_ZIL_LEVEL, lr->lr_offset / BP_GET_LSIZE(bp)); - VERIFY(0 == scan_funcs[scn->scn_phys.scn_func](dp, bp, &zb)); + scan_funcs[scn->scn_phys.scn_func](dp, bp, &zb, B_FALSE); } return (0); } @@ -1332,6 +1440,18 @@ dsl_scan_prefetch_cb(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp, if (scn->scn_pausing) goto out; + /* + * If we read this block to do the prefetch we don't need to read it + * again for the scrub. Instead, we enqueue a dummy sio to indicate + * that we don't need to do the read at this point. We can't do this + * optimization for non-sorted scans which must do all I/O in logical + * order. + */ + if (scn->scn_is_sorted && zio != NULL && zio->io_error == 0 && + (zio->io_flags & (ZIO_FLAG_SCRUB | ZIO_FLAG_RESILVER))) { + dsl_scan_enqueue_impl(scn->scn_dp, bp, 0, zb, B_TRUE); + } + if (BP_GET_LEVEL(bp) > 0) { int i; blkptr_t *cbp; @@ -1385,14 +1505,16 @@ dsl_scan_prefetch_thread(void *arg) /* loop until we are told to stop */ while (!scn->scn_prefetch_stop) { - /* - * XXX These are prefetches, but unfortunately the ARC - * currently expects prefetches to not have a "done" - * function, which we need. - */ + int zio_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCAN_THREAD; arc_flags_t flags = ARC_FLAG_NOWAIT | ARC_FLAG_LONG_LIFE | ARC_FLAG_PREFETCH; + if (scn->scn_phys.scn_func == POOL_SCAN_SCRUB) { + zio_flags |= ZIO_FLAG_SCRUB; + } else if (scn->scn_phys.scn_func == POOL_SCAN_RESILVER) { + zio_flags |= ZIO_FLAG_RESILVER; + } + mutex_enter(&scn->scn_prefetch_lock); /* @@ -1421,9 +1543,7 @@ dsl_scan_prefetch_thread(void *arg) /* issue the prefetch asynchronously */ (void) arc_read(scn->scn_zio_root, scn->scn_dp->dp_spa, &spic->spic_bp, dsl_scan_prefetch_cb, spic->spic_spc, - ZIO_PRIORITY_ASYNC_READ, - ZIO_FLAG_CANFAIL | ZIO_FLAG_SCAN_THREAD, &flags, - &spic->spic_zb); + ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, &spic->spic_zb); kmem_free(spic, sizeof (scan_prefetch_issue_ctx_t)); } @@ -1489,13 +1609,21 @@ inline __attribute__((always_inline)) static void dsl_scan_visitdnode( */ inline __attribute__((always_inline)) static int dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype, - dnode_phys_t *dnp, const blkptr_t *bp, - const zbookmark_phys_t *zb, dmu_tx_t *tx) + dnode_phys_t *dnp, const blkptr_t *bp, const zbookmark_phys_t *zb, + boolean_t *io_done, dmu_tx_t *tx) { dsl_pool_t *dp = scn->scn_dp; int zio_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCAN_THREAD; int err; + *io_done = B_FALSE; + + if (scn->scn_phys.scn_func == POOL_SCAN_SCRUB) { + zio_flags |= ZIO_FLAG_SCRUB; + } else if (scn->scn_phys.scn_func == POOL_SCAN_RESILVER) { + zio_flags |= ZIO_FLAG_RESILVER; + } + if (BP_GET_LEVEL(bp) > 0) { arc_flags_t flags = ARC_FLAG_WAIT; int i; @@ -1519,6 +1647,8 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype, ds, scn, ostype, tx); } arc_buf_destroy(buf, &buf); + *io_done = ((flags & ARC_FLAG_CACHED) == 0 && + (zio_flags & (ZIO_FLAG_SCRUB | ZIO_FLAG_RESILVER))); } else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) { arc_flags_t flags = ARC_FLAG_WAIT; dnode_phys_t *cdnp; @@ -1540,6 +1670,8 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype, } arc_buf_destroy(buf, &buf); + *io_done = ((flags & ARC_FLAG_CACHED) == 0 && + (zio_flags & (ZIO_FLAG_SCRUB | ZIO_FLAG_RESILVER))); } else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) { arc_flags_t flags = ARC_FLAG_WAIT; objset_phys_t *osp; @@ -1572,6 +1704,8 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype, DMU_USERUSED_OBJECT, tx); } arc_buf_destroy(buf, &buf); + *io_done = ((flags & ARC_FLAG_CACHED) == 0 && + (zio_flags & (ZIO_FLAG_SCRUB | ZIO_FLAG_RESILVER))); } return (0); @@ -1613,6 +1747,7 @@ dsl_scan_visitbp(blkptr_t *bp, const zbookmark_phys_t *zb, { dsl_pool_t *dp = scn->scn_dp; blkptr_t *bp_toread = NULL; + boolean_t io_done = B_FALSE; /* ASSERT(pbuf == NULL || arc_released(pbuf)); */ @@ -1650,8 +1785,10 @@ dsl_scan_visitbp(blkptr_t *bp, const zbookmark_phys_t *zb, bp_toread = kmem_alloc(sizeof (blkptr_t), KM_SLEEP); *bp_toread = *bp; - if (dsl_scan_recurse(scn, ds, ostype, dnp, bp_toread, zb, tx) != 0) + if (dsl_scan_recurse(scn, ds, ostype, dnp, + bp_toread, zb, &io_done, tx) != 0) { goto out; + } /* * If dsl_scan_ddt() has already visited this block, it will have @@ -1676,7 +1813,7 @@ dsl_scan_visitbp(blkptr_t *bp, const zbookmark_phys_t *zb, goto out; } - scan_funcs[scn->scn_phys.scn_func](dp, bp, zb); + scan_funcs[scn->scn_phys.scn_func](dp, bp, zb, io_done); out: kmem_free(bp_toread, sizeof (blkptr_t)); @@ -2187,7 +2324,8 @@ dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum, ddt_bp_create(checksum, ddk, ddp, &bp); scn->scn_visited_this_txg++; - scan_funcs[scn->scn_phys.scn_func](scn->scn_dp, &bp, &zb); + scan_funcs[scn->scn_phys.scn_func](scn->scn_dp, &bp, + &zb, B_FALSE); } } @@ -2364,19 +2502,75 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx) } static void -scan_io_queues_update_zio_stats(dsl_scan_io_queue_t *q, const blkptr_t *bp) +count_block(dsl_scan_t *scn, zfs_all_blkstats_t *zab, const blkptr_t *bp) { int i; - uint64_t cur_size = 0; - for (i = 0; i < BP_GET_NDVAS(bp); i++) { - cur_size += DVA_GET_ASIZE(&bp->blk_dva[i]); + for (i = 0; i < BP_GET_NDVAS(bp); i++) + atomic_add_64(&scn->scn_bytes_issued, + DVA_GET_ASIZE(&bp->blk_dva[i])); + + /* + * If we resume after a reboot, zab will be NULL; don't record + * incomplete stats in that case. + */ + if (zab == NULL) + return; + + mutex_enter(&zab->zab_lock); + + for (i = 0; i < 4; i++) { + int l = (i < 2) ? BP_GET_LEVEL(bp) : DN_MAX_LEVELS; + int t = (i & 1) ? BP_GET_TYPE(bp) : DMU_OT_TOTAL; + int equal; + zfs_blkstat_t *zb; + + if (t & DMU_OT_NEWTYPE) + t = DMU_OT_OTHER; + + zb = &zab->zab_type[l][t]; + zb->zb_count++; + zb->zb_asize += BP_GET_ASIZE(bp); + zb->zb_lsize += BP_GET_LSIZE(bp); + zb->zb_psize += BP_GET_PSIZE(bp); + zb->zb_gangs += BP_COUNT_GANG(bp); + + switch (BP_GET_NDVAS(bp)) { + case 2: + if (DVA_GET_VDEV(&bp->blk_dva[0]) == + DVA_GET_VDEV(&bp->blk_dva[1])) + zb->zb_ditto_2_of_2_samevdev++; + break; + case 3: + equal = (DVA_GET_VDEV(&bp->blk_dva[0]) == + DVA_GET_VDEV(&bp->blk_dva[1])) + + (DVA_GET_VDEV(&bp->blk_dva[0]) == + DVA_GET_VDEV(&bp->blk_dva[2])) + + (DVA_GET_VDEV(&bp->blk_dva[1]) == + DVA_GET_VDEV(&bp->blk_dva[2])); + if (equal == 1) + zb->zb_ditto_2_of_3_samevdev++; + else if (equal == 3) + zb->zb_ditto_3_of_3_samevdev++; + break; + } } + mutex_exit(&zab->zab_lock); +} + +static void +scan_io_queues_update_zio_stats(dsl_scan_io_queue_t *q, const scan_io_t *sio) +{ + uint64_t cur_size = sio->sio_asize; + q->q_avg_zio_size_this_txg = (cur_size + (q->q_avg_zio_size_this_txg * q->q_zios_this_txg)) / (q->q_zios_this_txg + 1); q->q_zios_this_txg++; + + if (sio->sio_dummy) + q->q_dummy_zios_this_txg++; } static void @@ -2429,12 +2623,17 @@ scan_io_queue_issue(dsl_scan_io_queue_t *queue, list_t *io_list) sio2bp(sio, &bp, queue->q_vd->vdev_id); bytes_issued += sio->sio_asize; - scan_exec_io(scn->scn_dp, &bp, sio->sio_flags, - &sio->sio_zb, queue); + + if (!sio->sio_dummy) { + scan_exec_io(scn->scn_dp, &bp, sio->sio_flags, + &sio->sio_zb, queue); + } + + count_block(scn, scn->scn_dp->dp_blkstats, &bp); (void) list_remove_head(io_list); ASSERT(queue->q_num_issuing_zios > 0); atomic_dec_64(&queue->q_num_issuing_zios); - scan_io_queues_update_zio_stats(queue, &bp); + scan_io_queues_update_zio_stats(queue, sio); kmem_free(sio, sizeof (*sio)); } @@ -2726,6 +2925,7 @@ dsl_scan_update_stats(dsl_scan_t *scn) uint64_t i; uint64_t seg_size_total = 0, zio_size_total = 0; uint64_t seg_count_total = 0, zio_count_total = 0; + uint64_t zio_dummy_count = 0; for (i = 0; i < spa->spa_root_vdev->vdev_children; i++) { vdev_t *vd = spa->spa_root_vdev->vdev_child[i]; @@ -2740,6 +2940,7 @@ dsl_scan_update_stats(dsl_scan_t *scn) queue->q_avg_zio_size_this_txg * queue->q_zios_this_txg; seg_count_total += queue->q_segs_this_txg; zio_count_total += queue->q_zios_this_txg; + zio_dummy_count += queue->q_dummy_zios_this_txg; } if (seg_count_total == 0 || zio_count_total == 0) { @@ -2754,6 +2955,7 @@ dsl_scan_update_stats(dsl_scan_t *scn) scn->scn_avg_zio_size_this_txg = zio_size_total / zio_count_total; scn->scn_segs_this_txg = seg_count_total; scn->scn_zios_this_txg = zio_count_total; + scn->scn_dummy_zios_this_txg = zio_dummy_count; } boolean_t @@ -2878,6 +3080,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) scn->scn_segs_this_txg = 0; scn->scn_avg_zio_size_this_txg = 0; scn->scn_zios_this_txg = 0; + scn->scn_dummy_zios_this_txg = 0; scn->scn_pausing = B_FALSE; scn->scn_sync_start_time = gethrtime(); spa->spa_scrub_active = B_TRUE; @@ -3119,9 +3322,10 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) (void) dsl_scan_should_clear(scn); dsl_scan_update_stats(scn); - zfs_dbgmsg("scrubbed %llu blocks (%llu segs) in %llums " - "(avg_block_size = %llu, avg_seg_size = %llu)", + zfs_dbgmsg("scrubbed %llu blocks (%llu dummies, %llu segs) " + "in %llums (avg_block_size = %llu, avg_seg_size = %llu)", (longlong_t)scn->scn_zios_this_txg, + (longlong_t)scn->scn_dummy_zios_this_txg, (longlong_t)scn->scn_segs_this_txg, (longlong_t)NSEC2MSEC(gethrtime() - scn->scn_sync_start_time), @@ -3140,155 +3344,8 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) } static void -count_block(dsl_scan_t *scn, zfs_all_blkstats_t *zab, const blkptr_t *bp) -{ - int i; - - for (i = 0; i < BP_GET_NDVAS(bp); i++) - atomic_add_64(&scn->scn_bytes_issued, - DVA_GET_ASIZE(&bp->blk_dva[i])); - - /* - * If we resume after a reboot, zab will be NULL; don't record - * incomplete stats in that case. - */ - if (zab == NULL) - return; - - mutex_enter(&zab->zab_lock); - - for (i = 0; i < 4; i++) { - int l = (i < 2) ? BP_GET_LEVEL(bp) : DN_MAX_LEVELS; - int t = (i & 1) ? BP_GET_TYPE(bp) : DMU_OT_TOTAL; - int equal; - zfs_blkstat_t *zb; - - if (t & DMU_OT_NEWTYPE) - t = DMU_OT_OTHER; - - zb = &zab->zab_type[l][t]; - zb->zb_count++; - zb->zb_asize += BP_GET_ASIZE(bp); - zb->zb_lsize += BP_GET_LSIZE(bp); - zb->zb_psize += BP_GET_PSIZE(bp); - zb->zb_gangs += BP_COUNT_GANG(bp); - - switch (BP_GET_NDVAS(bp)) { - case 2: - if (DVA_GET_VDEV(&bp->blk_dva[0]) == - DVA_GET_VDEV(&bp->blk_dva[1])) - zb->zb_ditto_2_of_2_samevdev++; - break; - case 3: - equal = (DVA_GET_VDEV(&bp->blk_dva[0]) == - DVA_GET_VDEV(&bp->blk_dva[1])) + - (DVA_GET_VDEV(&bp->blk_dva[0]) == - DVA_GET_VDEV(&bp->blk_dva[2])) + - (DVA_GET_VDEV(&bp->blk_dva[1]) == - DVA_GET_VDEV(&bp->blk_dva[2])); - if (equal == 1) - zb->zb_ditto_2_of_3_samevdev++; - else if (equal == 3) - zb->zb_ditto_3_of_3_samevdev++; - break; - } - } - - mutex_exit(&zab->zab_lock); -} - -static void -scan_io_queue_insert_impl(dsl_scan_io_queue_t *queue, scan_io_t *sio) -{ - avl_index_t idx; - uint64_t offset = SCAN_IO_GET_OFFSET(sio); - uint64_t asize = sio->sio_asize; - - ASSERT(MUTEX_HELD(&queue->q_vd->vdev_scan_io_queue_lock)); - - if (avl_find(&queue->q_zios_by_addr, sio, &idx) != NULL) { - /* block is already scheduled for reading */ - mutex_enter(&queue->q_scn->scn_status_lock); - queue->q_scn->scn_bytes_pending -= sio->sio_asize; - mutex_exit(&queue->q_scn->scn_status_lock); - kmem_free(sio, sizeof (*sio)); - return; - } - avl_insert(&queue->q_zios_by_addr, sio, idx); - atomic_add_64(&queue->q_zio_bytes, asize); - - range_tree_set_gap(queue->q_exts_by_addr, zfs_scan_max_ext_gap); - range_tree_add_fill(queue->q_exts_by_addr, offset, asize, asize); -} - -/* - * Given all the info we got from our metadata scanning process, we - * construct a scan_io_t and insert it into the scan sorting queue. The - * I/O must already be suitable for us to process. This is controlled - * by dsl_scan_enqueue(). - */ -static void -scan_io_queue_insert(dsl_scan_io_queue_t *queue, const blkptr_t *bp, int dva_i, - int zio_flags, const zbookmark_phys_t *zb) -{ - scan_io_t *sio = kmem_zalloc(sizeof (*sio), KM_SLEEP); - - ASSERT(MUTEX_HELD(&queue->q_vd->vdev_scan_io_queue_lock)); - - bp2sio(bp, sio, dva_i); - sio->sio_flags = zio_flags; - sio->sio_zb = *zb; - - /* - * Increment the bytes pending counter now so that we can't - * get an integer underflow in case the worker processes the - * zio before we get to incrementing this counter. - */ - mutex_enter(&queue->q_scn->scn_status_lock); - queue->q_scn->scn_bytes_pending += sio->sio_asize; - mutex_exit(&queue->q_scn->scn_status_lock); - - scan_io_queue_insert_impl(queue, sio); -} - -/* - * Given a set of I/O parameters as discovered by the metadata traversal - * process, attempts to place the I/O into the sorted queues (if allowed), - * or immediately executes the I/O. - */ -static void -dsl_scan_enqueue(dsl_pool_t *dp, const blkptr_t *bp, int zio_flags, - const zbookmark_phys_t *zb) -{ - spa_t *spa = dp->dp_spa; - - ASSERT(!BP_IS_EMBEDDED(bp)); - if (!dp->dp_scan->scn_is_sorted) { - scan_exec_io(dp, bp, zio_flags, zb, NULL); - return; - } - - for (int i = 0; i < BP_GET_NDVAS(bp); i++) { - dva_t dva; - vdev_t *vdev; - - dva = bp->blk_dva[i]; - vdev = vdev_lookup_top(spa, DVA_GET_VDEV(&dva)); - ASSERT(vdev != NULL); - - mutex_enter(&vdev->vdev_scan_io_queue_lock); - if (vdev->vdev_scan_io_queue == NULL) - vdev->vdev_scan_io_queue = scan_io_queue_create(vdev); - ASSERT(dp->dp_scan != NULL); - scan_io_queue_insert(vdev->vdev_scan_io_queue, bp, - i, zio_flags, zb); - mutex_exit(&vdev->vdev_scan_io_queue_lock); - } -} - -static int -dsl_scan_scrub_cb(dsl_pool_t *dp, - const blkptr_t *bp, const zbookmark_phys_t *zb) +dsl_scan_scrub_cb(dsl_pool_t *dp, const blkptr_t *bp, + const zbookmark_phys_t *zb, boolean_t io_done) { dsl_scan_t *scn = dp->dp_scan; spa_t *spa = dp->dp_spa; @@ -3300,11 +3357,11 @@ dsl_scan_scrub_cb(dsl_pool_t *dp, if (phys_birth <= scn->scn_phys.scn_min_txg || phys_birth >= scn->scn_phys.scn_max_txg) - return (0); + return; if (BP_IS_EMBEDDED(bp)) { count_block(scn, dp->dp_blkstats, bp); - return (0); + return; } ASSERT(DSL_SCAN_IS_SCRUB_RESILVER(scn)); @@ -3338,13 +3395,10 @@ dsl_scan_scrub_cb(dsl_pool_t *dp, } if (needs_io && !zfs_no_scrub_io) { - dsl_scan_enqueue(dp, bp, zio_flags, zb); + dsl_scan_enqueue(dp, bp, zio_flags, zb, io_done); } else { count_block(scn, dp->dp_blkstats, bp); } - - /* do not relocate this block */ - return (0); } static void @@ -3415,7 +3469,6 @@ scan_exec_io(dsl_pool_t *dp, const blkptr_t *bp, int zio_flags, atomic_add_64(&spa->spa_scan_pass_work, DVA_GET_ASIZE(&bp->blk_dva[i])); - count_block(dp->dp_scan, dp->dp_blkstats, bp); zio_nowait(zio_read(dp->dp_scan->scn_zio_root, spa, bp, data, size, dsl_scan_scrub_done, queue, ZIO_PRIORITY_SCRUB, zio_flags, zb)); }