diff --git a/include/sys/zio.h b/include/sys/zio.h index 77c70b9b481c..c5a9fb6c8824 100644 --- a/include/sys/zio.h +++ b/include/sys/zio.h @@ -594,7 +594,7 @@ extern zio_t *zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg, extern int zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, blkptr_t *new_bp, uint64_t size, boolean_t *slog); -extern void zio_flush(zio_t *zio, vdev_t *vd); +extern void zio_flush(zio_t *zio, vdev_t *vd, boolean_t propagate); extern void zio_shrink(zio_t *zio, uint64_t size); extern int zio_wait(zio_t *zio); diff --git a/module/zfs/vdev_label.c b/module/zfs/vdev_label.c index ed592514fded..6f61d57b749d 100644 --- a/module/zfs/vdev_label.c +++ b/module/zfs/vdev_label.c @@ -1831,19 +1831,21 @@ vdev_uberblock_sync_list(vdev_t **svd, int svdcount, uberblock_t *ub, int flags) for (int v = 0; v < svdcount; v++) { if (vdev_writeable(svd[v])) { - zio_flush(zio, svd[v]); + zio_flush(zio, svd[v], B_FALSE); } } if (spa->spa_aux_sync_uber) { spa->spa_aux_sync_uber = B_FALSE; for (int v = 0; v < spa->spa_spares.sav_count; v++) { if (vdev_writeable(spa->spa_spares.sav_vdevs[v])) { - zio_flush(zio, spa->spa_spares.sav_vdevs[v]); + zio_flush(zio, spa->spa_spares.sav_vdevs[v], + B_FALSE); } } for (int v = 0; v < spa->spa_l2cache.sav_count; v++) { if (vdev_writeable(spa->spa_l2cache.sav_vdevs[v])) { - zio_flush(zio, spa->spa_l2cache.sav_vdevs[v]); + zio_flush(zio, spa->spa_l2cache.sav_vdevs[v], + B_FALSE); } } } @@ -1981,7 +1983,7 @@ vdev_label_sync_list(spa_t *spa, int l, uint64_t txg, int flags) zio = zio_root(spa, NULL, NULL, flags); for (vd = list_head(dl); vd != NULL; vd = list_next(dl, vd)) - zio_flush(zio, vd); + zio_flush(zio, vd, B_FALSE); (void) zio_wait(zio); @@ -2056,7 +2058,7 @@ vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg) for (vdev_t *vd = txg_list_head(&spa->spa_vdev_txg_list, TXG_CLEAN(txg)); vd != NULL; vd = txg_list_next(&spa->spa_vdev_txg_list, vd, TXG_CLEAN(txg))) - zio_flush(zio, vd); + zio_flush(zio, vd, B_FALSE); (void) zio_wait(zio); diff --git a/module/zfs/vdev_raidz.c b/module/zfs/vdev_raidz.c index 15c8b8ca6016..187d3908ff50 100644 --- a/module/zfs/vdev_raidz.c +++ b/module/zfs/vdev_raidz.c @@ -4172,7 +4172,7 @@ raidz_reflow_scratch_sync(void *arg, dmu_tx_t *tx) goto io_error_exit; } pio = zio_root(spa, NULL, NULL, 0); - zio_flush(pio, raidvd); + zio_flush(pio, raidvd, B_FALSE); zio_wait(pio); zfs_dbgmsg("reflow: wrote %llu bytes (logical) to scratch area", @@ -4231,7 +4231,7 @@ raidz_reflow_scratch_sync(void *arg, dmu_tx_t *tx) goto io_error_exit; } pio = zio_root(spa, NULL, NULL, 0); - zio_flush(pio, raidvd); + zio_flush(pio, raidvd, B_FALSE); zio_wait(pio); zfs_dbgmsg("reflow: overwrote %llu bytes (logical) to real location", @@ -4339,7 +4339,7 @@ vdev_raidz_reflow_copy_scratch(spa_t *spa) } zio_wait(pio); pio = zio_root(spa, NULL, NULL, 0); - zio_flush(pio, raidvd); + zio_flush(pio, raidvd, B_FALSE); zio_wait(pio); zfs_dbgmsg("reflow recovery: overwrote %llu bytes (logical) " diff --git a/module/zfs/zil.c b/module/zfs/zil.c index 12a5e952b59c..37b9287ad7b6 100644 --- a/module/zfs/zil.c +++ b/module/zfs/zil.c @@ -23,6 +23,7 @@ * Copyright (c) 2011, 2018 by Delphix. All rights reserved. * Copyright (c) 2014 Integros [integros.com] * Copyright (c) 2018 Datto Inc. + * Copyright (c) 2024, Klara, Inc. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -1478,12 +1479,6 @@ zil_lwb_flush_vdevs_done(zio_t *zio) * includes ZIO errors from either this LWB's write or * flush, as well as any errors from other dependent LWBs * (e.g. a root LWB ZIO that might be a child of this LWB). - * - * With that said, it's important to note that LWB flush - * errors are not propagated up to the LWB root ZIO. - * This is incorrect behavior, and results in VDEV flush - * errors not being handled correctly here. See the - * comment above the call to "zio_flush" for details. */ zcw->zcw_zio_error = zio->io_error; @@ -1633,17 +1628,8 @@ zil_lwb_write_done(zio_t *zio) while ((zv = avl_destroy_nodes(t, &cookie)) != NULL) { vdev_t *vd = vdev_lookup_top(spa, zv->zv_vdev); - if (vd != NULL) { - /* - * The "ZIO_FLAG_DONT_PROPAGATE" is currently - * always used within "zio_flush". This means, - * any errors when flushing the vdev(s), will - * (unfortunately) not be handled correctly, - * since these "zio_flush" errors will not be - * propagated up to "zil_lwb_flush_vdevs_done". - */ - zio_flush(lwb->lwb_root_zio, vd); - } + if (vd != NULL) + zio_flush(lwb->lwb_root_zio, vd, B_TRUE); kmem_free(zv, sizeof (*zv)); } } diff --git a/module/zfs/zio.c b/module/zfs/zio.c index d68d5ababe79..278ff220e392 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -1624,10 +1624,10 @@ zio_vdev_delegated_io(vdev_t *vd, uint64_t offset, abd_t *data, uint64_t size, * the flushes complete. */ void -zio_flush(zio_t *pio, vdev_t *vd) +zio_flush(zio_t *pio, vdev_t *vd, boolean_t propagate) { - const zio_flag_t flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE | - ZIO_FLAG_DONT_RETRY; + const zio_flag_t flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_RETRY | + (propagate ? 0 : ZIO_FLAG_DONT_PROPAGATE); if (vd->vdev_nowritecache) return; @@ -1638,7 +1638,7 @@ zio_flush(zio_t *pio, vdev_t *vd) NULL, ZIO_STAGE_OPEN, ZIO_FLUSH_PIPELINE)); } else { for (uint64_t c = 0; c < vd->vdev_children; c++) - zio_flush(pio, vd->vdev_child[c]); + zio_flush(pio, vd->vdev_child[c], propagate); } }