diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index 159b12f8ae6d..2f1210cd5f93 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -1007,8 +1007,16 @@ zpool_do_add(int argc, char **argv) *propval = '\0'; propval++; - if ((strcmp(optarg, ZPOOL_CONFIG_ASHIFT) != 0) || - (add_prop_list(optarg, propval, &props, B_TRUE))) + /* Only the following properties are allowed */ + if ((strcmp(optarg, + ZPOOL_CONFIG_ASHIFT) != 0) && + (strcmp(optarg, + ZPOOL_PROP_OBJ_ENDPOINT_URI) != 0) && + (strcmp(optarg, + ZPOOL_PROP_OBJ_REGION_NAME) != 0)) + usage(B_FALSE); + + if (add_prop_list(optarg, propval, &props, B_TRUE)) usage(B_FALSE); break; case 'P': diff --git a/contrib/pyzfs/libzfs_core/_constants.py b/contrib/pyzfs/libzfs_core/_constants.py index d4135041585a..b6fb30b9c20b 100644 --- a/contrib/pyzfs/libzfs_core/_constants.py +++ b/contrib/pyzfs/libzfs_core/_constants.py @@ -104,6 +104,8 @@ def enum(*sequential, **named): 'ZFS_ERR_NOT_USER_NAMESPACE', 'ZFS_ERR_RESUME_EXISTS', 'ZFS_ERR_CRYPTO_NOTSUP', + 'ZFS_ERR_OBJSTORE_EXISTS', + 'ZFS_ERR_OBJSTORE_SINGLE_ADD', ], {} ) diff --git a/include/libzfs.h b/include/libzfs.h index 029501f1c533..ed0fd6540eea 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -153,6 +153,8 @@ typedef enum zfs_error { EZFS_NOT_USER_NAMESPACE, /* a file is not a user namespace */ EZFS_CKSUM, /* insufficient replicas */ EZFS_RESUME_EXISTS, /* Resume on existing dataset without force */ + EZFS_OBJSTORE_EXISTS, /* pool is already backed by an object store */ + EZFS_OBJSTORE_ADD_ALONE, /* can't add extra vdevs with objstore */ EZFS_UNKNOWN } zfs_error_t; diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index 0968f0d3562d..56775748b1f6 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -725,6 +725,17 @@ typedef struct zpool_load_policy { #define ZPOOL_CONFIG_VDEV_STATS "vdev_stats" /* not stored on disk */ #define ZPOOL_CONFIG_INDIRECT_SIZE "indirect_size" /* not stored on disk */ +/* + * These are user-visible property names that are specific to the object + * store. We declare those here because they happen to be temporarily + * added to the config in-memory, but they're never actually saved on + * disk with the config. + */ +#define ZPOOL_PROP_OBJ_PROTOCOL_NAME "object-protocol" +#define ZPOOL_PROP_OBJ_ENDPOINT_URI "object-endpoint" +#define ZPOOL_PROP_OBJ_REGION_NAME "object-region" +#define ZPOOL_PROP_OBJ_CRED_PROFILE_REF "object-credentials-profile" + /* container nvlist of extended stats */ #define ZPOOL_CONFIG_VDEV_STATS_EX "vdev_stats_ex" @@ -1571,6 +1582,8 @@ typedef enum { ZFS_ERR_NOT_USER_NAMESPACE, ZFS_ERR_RESUME_EXISTS, ZFS_ERR_CRYPTO_NOTSUP, + ZFS_ERR_OBJSTORE_EXISTS, + ZFS_ERR_OBJSTORE_ADD_ALONE, } zfs_errno_t; /* diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h index 674f68fc5265..c1c0aaf5224a 100644 --- a/include/sys/spa_impl.h +++ b/include/sys/spa_impl.h @@ -469,7 +469,8 @@ extern int param_set_deadman_failmode_common(const char *val); extern void spa_set_deadman_synctime(hrtime_t ns); extern void spa_set_deadman_ziotime(hrtime_t ns); extern const char *spa_history_zone(void); -extern void spa_set_pool_type(spa_t *); +extern void spa_set_pool_type_from_vdev_tree(spa_t *); +extern void spa_set_pool_type_from_vdev_metadata(spa_t *); #ifdef __cplusplus } diff --git a/include/sys/vdev.h b/include/sys/vdev.h index 4a6071742909..6606c871cd5d 100644 --- a/include/sys/vdev.h +++ b/include/sys/vdev.h @@ -69,6 +69,7 @@ extern int vdev_validate_aux(vdev_t *vd); extern zio_t *vdev_probe(vdev_t *vd, zio_t *pio); extern boolean_t vdev_is_concrete(vdev_t *vd); extern boolean_t vdev_is_object_based(vdev_t *vd); +extern boolean_t vdev_is_object_store(vdev_t *vd); extern boolean_t vdev_is_bootable(vdev_t *vd); extern vdev_t *vdev_lookup_top(spa_t *spa, uint64_t vdev); extern vdev_t *vdev_lookup_by_guid(vdev_t *vd, uint64_t guid); diff --git a/lib/libzfs/libzfs_util.c b/lib/libzfs/libzfs_util.c index b482cc3615d3..0cd319d5eec4 100644 --- a/lib/libzfs/libzfs_util.c +++ b/lib/libzfs/libzfs_util.c @@ -307,6 +307,12 @@ libzfs_error_description(libzfs_handle_t *hdl) case EZFS_RESUME_EXISTS: return (dgettext(TEXT_DOMAIN, "Resuming recv on existing " "dataset without force")); + case EZFS_OBJSTORE_EXISTS: + return (dgettext(TEXT_DOMAIN, "pool already has an " + "object store vdev")); + case EZFS_OBJSTORE_ADD_ALONE: + return (dgettext(TEXT_DOMAIN, "one vdev entry allowed " + "when adding an object store vdev")); case EZFS_UNKNOWN: return (dgettext(TEXT_DOMAIN, "unknown error")); default: @@ -753,6 +759,12 @@ zpool_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...) case ZFS_ERR_IOC_ARG_BADTYPE: zfs_verror(hdl, EZFS_IOC_NOTSUPPORTED, fmt, ap); break; + case ZFS_ERR_OBJSTORE_EXISTS: + zfs_verror(hdl, EZFS_OBJSTORE_EXISTS, fmt, ap); + break; + case ZFS_ERR_OBJSTORE_ADD_ALONE: + zfs_verror(hdl, EZFS_OBJSTORE_ADD_ALONE, fmt, ap); + break; default: zfs_error_aux(hdl, "%s", strerror(error)); zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap); diff --git a/lib/libzutil/zutil_import.c b/lib/libzutil/zutil_import.c index 5dfe63e41bde..3be238270ad6 100644 --- a/lib/libzutil/zutil_import.c +++ b/lib/libzutil/zutil_import.c @@ -1761,12 +1761,11 @@ zpool_find_import_cached(libpc_handle_t *hdl, importargs_t *iarg) return (NULL); } - nvlist_t *tree; - uint_t c, children; - nvlist_t **child; - - tree = fnvlist_lookup_nvlist(src, ZPOOL_CONFIG_VDEV_TREE); + nvlist_t *tree = + fnvlist_lookup_nvlist(src, ZPOOL_CONFIG_VDEV_TREE); + uint_t children; + nvlist_t **child; if (nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) { fprintf(stderr, gettext("cannot import '%s': invalid " @@ -1775,7 +1774,7 @@ zpool_find_import_cached(libpc_handle_t *hdl, importargs_t *iarg) } boolean_t object_store_pool_found = B_FALSE; - for (c = 0; c < children; c++) { + for (uint_t c = 0; c < children; c++) { const char *type; if (nvlist_lookup_string(child[c], ZPOOL_CONFIG_TYPE, &type) != 0) { @@ -1848,11 +1847,14 @@ zpool_find_import_agent(libpc_handle_t *hdl, importargs_t *iarg, if (bucket == NULL && iarg->path != NULL) { bucket = iarg->path[0]; } - nvlist_lookup_string(iarg->props, "object-protocol", &protocol); - nvlist_lookup_string(iarg->props, "object-endpoint", &endpoint); - nvlist_lookup_string(iarg->props, "object-region", ®ion); - nvlist_lookup_string(iarg->props, "object-credentials-profile", - &profile); + nvlist_lookup_string(iarg->props, + ZPOOL_PROP_OBJ_PROTOCOL_NAME, &protocol); + nvlist_lookup_string(iarg->props, + ZPOOL_PROP_OBJ_ENDPOINT_URI, &endpoint); + nvlist_lookup_string(iarg->props, + ZPOOL_PROP_OBJ_REGION_NAME, ®ion); + nvlist_lookup_string(iarg->props, + ZPOOL_PROP_OBJ_CRED_PROFILE_REF, &profile); nvlist_t *msg = fnvlist_alloc(); fnvlist_add_string(msg, AGENT_REQUEST_TYPE, AGENT_TYPE_GET_POOLS); @@ -1911,14 +1913,17 @@ zpool_find_import_agent(libpc_handle_t *hdl, importargs_t *iarg, * them to the config here. */ if (region != NULL) - fnvlist_add_string(tree, "object-region", region); + fnvlist_add_string(tree, + ZPOOL_PROP_OBJ_REGION_NAME, region); if (endpoint != NULL) - fnvlist_add_string(tree, "object-endpoint", endpoint); + fnvlist_add_string(tree, + ZPOOL_PROP_OBJ_ENDPOINT_URI, endpoint); if (protocol != NULL) - fnvlist_add_string(tree, "object-protocol", protocol); + fnvlist_add_string(tree, + ZPOOL_PROP_OBJ_PROTOCOL_NAME, protocol); if (profile != NULL) - fnvlist_add_string(tree, "object-credentials-profile", - profile); + fnvlist_add_string(tree, + ZPOOL_PROP_OBJ_CRED_PROFILE_REF, profile); slice->rn_vdev_guid = guid; slice->rn_lock = lock; @@ -2161,11 +2166,14 @@ zoa_resume_destroy(void *hdl, importargs_t *iarg) if (bucket == NULL) { return (-1); } - nvlist_lookup_string(iarg->props, "object-protocol", &protocol); - nvlist_lookup_string(iarg->props, "object-endpoint", &endpoint); - nvlist_lookup_string(iarg->props, "object-region", ®ion); - nvlist_lookup_string(iarg->props, "object-credentials-profile", - &profile); + nvlist_lookup_string(iarg->props, + ZPOOL_PROP_OBJ_PROTOCOL_NAME, &protocol); + nvlist_lookup_string(iarg->props, + ZPOOL_PROP_OBJ_ENDPOINT_URI, &endpoint); + nvlist_lookup_string(iarg->props, + ZPOOL_PROP_OBJ_REGION_NAME, ®ion); + nvlist_lookup_string(iarg->props, + ZPOOL_PROP_OBJ_CRED_PROFILE_REF, &profile); // Resume destroy nvlist_t *msg = fnvlist_alloc(); diff --git a/module/os/freebsd/zfs/spa_os.c b/module/os/freebsd/zfs/spa_os.c index 8505b021398b..f94ad2b627ab 100644 --- a/module/os/freebsd/zfs/spa_os.c +++ b/module/os/freebsd/zfs/spa_os.c @@ -246,7 +246,7 @@ spa_import_rootpool(const char *name, bool checkpointrewind) spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); error = spa_config_parse(spa, &rvd, nvtop, NULL, 0, VDEV_ALLOC_ROOTPOOL); - spa_set_pool_type(spa); + spa_set_pool_type_from_vdev_metadata(spa); spa_config_exit(spa, SCL_ALL, FTAG); if (error) { mutex_exit(&spa_namespace_lock); diff --git a/module/os/linux/zfs/vdev_object_store.c b/module/os/linux/zfs/vdev_object_store.c index 83222066d2bb..182fe38c0221 100644 --- a/module/os/linux/zfs/vdev_object_store.c +++ b/module/os/linux/zfs/vdev_object_store.c @@ -2403,7 +2403,18 @@ vdev_object_store_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, vos->vos_send_thread = thread_create(NULL, 0, vdev_object_store_send_thread, vd, 0, &p0, TS_RUN, defclsyspri); - if (vd->vdev_spa->spa_load_state == SPA_LOAD_CREATE) { + /* + * There are two cases where we want to create an "object-store pool": + * [1] We are creating a brand new ZFS pool on top of an object store + * (e.g. SPA_LOAD_CREATE). + * [2] We are about to add an object-based vdev as part of migrating a + * block-based ZFS pool to an object-based one (e.g. this is the + * spa_pending_vdev and the spa pool type is not object based yet). + */ + spa_t *spa = vd->vdev_spa; + if ((spa->spa_load_state == SPA_LOAD_CREATE) || + (vdev_is_object_based(spa->spa_pending_vdev) && + !spa_is_object_based(spa))) { vos->vos_create_completed = B_FALSE; error = object_store_create_pool(vd); if (error != 0) { diff --git a/module/zcommon/zpool_prop.c b/module/zcommon/zpool_prop.c index 625bdcc2bf2e..b501c62aeee1 100644 --- a/module/zcommon/zpool_prop.c +++ b/module/zcommon/zpool_prop.c @@ -82,16 +82,20 @@ zpool_prop_init(void) zprop_register_string(ZPOOL_PROP_COMPATIBILITY, "compatibility", "off", PROP_DEFAULT, ZFS_TYPE_POOL, " | off | legacy", "COMPATIBILITY", sfeatures); - zprop_register_string(ZPOOL_PROP_OBJ_PROTOCOL, "object-protocol", NULL, - PROP_DEFAULT, ZFS_TYPE_POOL, "", "OBJ_PROTOCOL", - sfeatures); - zprop_register_string(ZPOOL_PROP_OBJ_ENDPOINT, "object-endpoint", NULL, - PROP_DEFAULT, ZFS_TYPE_POOL, "", "OBJ_ENDPOINT", sfeatures); - zprop_register_string(ZPOOL_PROP_OBJ_REGION, "object-region", NULL, - PROP_DEFAULT, ZFS_TYPE_POOL, "", "OBJ_REGION", sfeatures); + zprop_register_string(ZPOOL_PROP_OBJ_PROTOCOL, + ZPOOL_PROP_OBJ_PROTOCOL_NAME, NULL, + PROP_DEFAULT, ZFS_TYPE_POOL, "", + "OBJ_PROTOCOL", sfeatures); + zprop_register_string(ZPOOL_PROP_OBJ_ENDPOINT, + ZPOOL_PROP_OBJ_ENDPOINT_URI, NULL, + PROP_DEFAULT, ZFS_TYPE_POOL, "", + "OBJ_ENDPOINT", sfeatures); + zprop_register_string(ZPOOL_PROP_OBJ_REGION, + ZPOOL_PROP_OBJ_REGION_NAME, NULL, PROP_DEFAULT, ZFS_TYPE_POOL, + "", "OBJ_REGION", sfeatures); zprop_register_string(ZPOOL_PROP_OBJ_CRED_PROFILE, - "object-credentials-profile", NULL, PROP_DEFAULT, ZFS_TYPE_POOL, - "", "OBJ_CRED_PROFILE", sfeatures); + ZPOOL_PROP_OBJ_CRED_PROFILE_REF, NULL, PROP_DEFAULT, + ZFS_TYPE_POOL, "", "OBJ_CRED_PROFILE", sfeatures); /* readonly number properties */ zprop_register_number(ZPOOL_PROP_SIZE, "size", 0, PROP_READONLY, diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 01e026e4e5e9..8f49c2a1c62d 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -3592,7 +3592,7 @@ spa_ld_parse_config(spa_t *spa, spa_import_type_t type) parse = (type == SPA_IMPORT_EXISTING ? VDEV_ALLOC_LOAD : VDEV_ALLOC_SPLIT); error = spa_config_parse(spa, &rvd, nvtree, NULL, 0, parse); - spa_set_pool_type(spa); + spa_set_pool_type_from_vdev_metadata(spa); spa_config_exit(spa, SCL_ALL, FTAG); if (error != 0) { @@ -3964,7 +3964,7 @@ spa_ld_trusted_config(spa_t *spa, spa_import_type_t type, error); return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, error)); } - spa_set_pool_type(spa); + spa_set_pool_type_from_vdev_metadata(spa); /* * Vdev paths in the MOS may be obsolete. If the untrusted config was @@ -4859,6 +4859,10 @@ spa_ld_checkpoint_rewind(spa_t *spa) if (svdcount == SPA_SYNC_MIN_VDEVS) break; } + + ASSERT(!spa_is_object_based(spa)); + ASSERT(!vdev_is_object_based(spa->spa_root_vdev)); + error = vdev_config_sync(svd, svdcount, spa->spa_first_txg); if (error == 0) spa->spa_last_synced_guid = rvd->vdev_guid; @@ -6068,7 +6072,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_ADD); if (error == 0) - spa_set_pool_type(spa); + spa_set_pool_type_from_vdev_tree(spa); ASSERT(error != 0 || rvd != NULL); ASSERT(error != 0 || spa->spa_root_vdev == rvd); @@ -6414,6 +6418,11 @@ spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags) if (spa_writeable(spa)) { /* * Update the config cache to include the newly-imported pool. + * + * This may also create metaslabs for newly added vdevs (see + * `spa_vdev_add()`. This normally happens through the async + * thread request of `spa_ld_check_for_config_update()` calling + * `spa_config_update()` but this code path may get there first. */ spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); } @@ -6801,6 +6810,55 @@ spa_vdev_add(spa_t *spa, nvlist_t *nvroot) spa->spa_pending_vdev = vd; /* spa_vdev_exit() will clear this */ + if (vdev_is_object_based(vd)) { + /* + * Can't add a second object-store vdev to object-based + * pool. + * + * Note: we check if there is an object-store vdev under the + * root_vdev instead of using the `spa_is_object_based()` + * helper because it is possible that we just added an object + * store vdev in this pool but we haven't yet converted the + * pool's type. + */ + if (vdev_is_object_based(spa->spa_root_vdev)) { + return (spa_vdev_exit(spa, vd, txg, + SET_ERROR(ZFS_ERR_OBJSTORE_EXISTS))); + } + + /* + * If we are adding an object-store vdev to migrate a + * block-based pool to the object store, ensure that + * zpool-add just passed that vdev and nothing else. + * This is a lazy way of making sure that we don't add + * two object store vdevs at once. Note, it also + * prevents us from adding any other type of vdev within + * the same zpool-add invocation. + */ + if (vd->vdev_children > 1) { + return (spa_vdev_exit(spa, vd, txg, + SET_ERROR(ZFS_ERR_OBJSTORE_ADD_ALONE))); + } + + /* + * Pure object-based pools (e.g. not hybrid ones) do the + * checkpoint rewinding process in the object agent. This + * is a different mechanism from the storage pool checkpoint + * in block-based pools. Until we have the need to make those + * two mechanism work well with each other we avoid any + * migrations to the object store while a zpool checkpoint + * is in effect. See `spa_ld_checkpoint_rewind()` usage in + * `spa_load_impl()` for more info. + */ + if (spa_feature_is_active(spa, SPA_FEATURE_POOL_CHECKPOINT)) { + error = (spa_has_checkpoint(spa)) ? + ZFS_ERR_CHECKPOINT_EXISTS : + ZFS_ERR_DISCARDING_CHECKPOINT; + return (spa_vdev_exit(spa, NULL, txg, + SET_ERROR(error))); + } + } + if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, &nspares) != 0) nspares = 0; @@ -8381,9 +8439,12 @@ spa_async_thread(void *arg) uint64_t old_space, new_space; mutex_enter(&spa_namespace_lock); + /* + * There is no point in introducing the object store class + * here as it would ruin any reasonable calculation for the + * history log message below and won't be helpful in any way. + */ old_space = metaslab_class_get_space(spa_normal_class(spa)); - old_space += - metaslab_class_get_space(spa_object_store_class(spa)); old_space += metaslab_class_get_space(spa_special_class(spa)); old_space += metaslab_class_get_space(spa_dedup_class(spa)); old_space += metaslab_class_get_space( @@ -9566,7 +9627,8 @@ spa_sync(spa_t *spa, uint64_t txg) VERIFY(spa_writeable(spa)); if (spa_is_object_based(spa)) { - object_store_begin_txg(spa->spa_root_vdev->vdev_child[0], txg); + object_store_begin_txg(vdev_find_leaf(spa->spa_root_vdev, + &vdev_object_store_ops), txg); } /* diff --git a/module/zfs/spa_config.c b/module/zfs/spa_config.c index 0d2f9fa28be9..fea58291a4cf 100644 --- a/module/zfs/spa_config.c +++ b/module/zfs/spa_config.c @@ -618,6 +618,22 @@ spa_config_update(spa_t *spa, int what) what != SPA_CONFIG_UPDATE_POOL); } + if (what == SPA_CONFIG_UPDATE_VDEVS) { + /* + * If we recently added an object store vdev we need + * to reset the pool's type. That needs to happen at + * a point when that vdev is allocatable so we do it + * here after creating its metaslab metadata above + * and waiting for one TXG for them to sync to disk. + */ + spa_config_enter(spa, SCL_ALL, spa, RW_WRITER); + if (vdev_is_object_based(spa->spa_root_vdev) && + !spa_is_object_based(spa)) { + spa_set_pool_type_from_vdev_metadata(spa); + } + spa_config_exit(spa, SCL_ALL, spa); + } + if (what == SPA_CONFIG_UPDATE_POOL) spa_config_update(spa, SPA_CONFIG_UPDATE_VDEVS); } diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index eae2f87468c2..fcff6d08a555 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -432,8 +432,19 @@ static int zfs_user_indirect_is_special = B_TRUE; */ static uint_t zfs_special_class_metadata_reserve_pct = 25; +/* + * Set the pool's type based on whether the config contains an + * object store vdev or not. It does not check whether the object + * store vdev is allocatable. This makes this wrapper suitable + * only for the scenario of pool creation where there is no + * allocation metadata (e.g metaslabs) initialized yet for any + * device in the pool. + * + * To set the pool's type during import or device addition use + * `spa_set_pool_type_from_vdev_metadata()` instead. + */ void -spa_set_pool_type(spa_t *spa) +spa_set_pool_type_from_vdev_tree(spa_t *spa) { ASSERT3P(spa->spa_root_vdev, !=, NULL); @@ -445,6 +456,32 @@ spa_set_pool_type(spa_t *spa) SPA_TYPE_OBJECT_STORE : SPA_TYPE_NORMAL; } +/* + * Set the pool's type based on whether the config contains an + * object store vdev AND whether that vdev is allocatable or not. + */ +void +spa_set_pool_type_from_vdev_metadata(spa_t *spa) +{ + ASSERT3P(spa->spa_root_vdev, !=, NULL); + + /* + * Must hold all of spa_config locks. + */ + ASSERT3U(spa_config_held(spa, SCL_ALL, RW_WRITER), ==, SCL_ALL); + vdev_t *object_store = + vdev_find_leaf(spa->spa_root_vdev, &vdev_object_store_ops); + + /* + * Even if there is an object store vdev, it may not have a + * metaslab array (i.e. it doesn't accept allocations yet). + */ + if (object_store == NULL || object_store->vdev_ms_array == 0) + spa->spa_pool_type = SPA_TYPE_NORMAL; + else + spa->spa_pool_type = SPA_TYPE_OBJECT_STORE; +} + boolean_t spa_is_object_based(spa_t *spa) { diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index 96069e44611f..5718987ba09f 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -1455,7 +1455,7 @@ vdev_metaslab_group_create(vdev_t *vd) } /* - * The spa ashift min/max only apply for the normal metaslab + * The spa ashift min/max only apply for the default metaslab * class. Class destination is late binding so ashift boundary * setting had to wait until now. */ @@ -5481,6 +5481,18 @@ vdev_is_object_based(vdev_t *vd) return (object_based); } +/* + * This predicate is similar to `vdev_is_object_based()` but only checks the + * vdev parameter passed to it and not any children under it. It also doesn't + * attempt to grab any config locks. + */ +boolean_t +vdev_is_object_store(vdev_t *vd) +{ + return ((vd != NULL) ? + (vd->vdev_ops == &vdev_object_store_ops) : B_FALSE); +} + /* * Determine if a log device has valid content. If the vdev was * removed or faulted in the MOS config then we know that diff --git a/module/zfs/vdev_label.c b/module/zfs/vdev_label.c index 289b13f164dd..7d5df1e99aa1 100644 --- a/module/zfs/vdev_label.c +++ b/module/zfs/vdev_label.c @@ -1715,6 +1715,12 @@ vdev_uberblock_sync(zio_t *zio, uint64_t *good_writes, if (!vd->vdev_ops->vdev_op_leaf) return; + /* + * Object store uberblock is written in `object_store_end_txg()`. + */ + if (vdev_is_object_store(vd)) + return; + if (!vdev_writeable(vd)) return; @@ -1751,15 +1757,33 @@ vdev_uberblock_sync(zio_t *zio, uint64_t *good_writes, abd_free(ub_abd); } -/* Sync the uberblocks to all vdevs in svd[] */ +/* + * Sync the uberblocks to the object store vdev (if it exists) + * and all vdevs in svd[]. + */ static int vdev_uberblock_sync_list(vdev_t **svd, int svdcount, uberblock_t *ub, int flags) { spa_t *spa = svd[0]->vdev_spa; - zio_t *zio; uint64_t good_writes = 0; - zio = zio_root(spa, NULL, NULL, flags); + /* + * If this pool is using object storage, then we need to notify the + * backend that we've completed the txg. This operation combines + * syncing the uberblock/labels/config to the object store. We + * always do this first as it's the object store's config that we'll + * use to open the pool. + */ + if (spa_is_object_based(spa)) { + vdev_t *vd_os = vdev_find_leaf(spa->spa_root_vdev, + &vdev_object_store_ops); + nvlist_t *label = spa_config_generate(spa, + vd_os, ub->ub_txg, B_FALSE); + object_store_end_txg(vd_os, label, ub->ub_txg); + good_writes++; + } + + zio_t *zio = zio_root(spa, NULL, NULL, flags); for (int v = 0; v < svdcount; v++) vdev_uberblock_sync(zio, &good_writes, ub, svd[v], flags); @@ -1881,21 +1905,25 @@ static int vdev_label_sync_list(spa_t *spa, int l, uint64_t txg, int flags) { list_t *dl = &spa->spa_config_dirty_list; - vdev_t *vd; - zio_t *zio; - int error; /* * Write the new labels to disk. */ - zio = zio_root(spa, NULL, NULL, flags); - - for (vd = list_head(dl); vd != NULL; vd = list_next(dl, vd)) { - uint64_t *good_writes; + zio_t *zio = zio_root(spa, NULL, NULL, flags); + for (vdev_t *vd = list_head(dl); vd != NULL; + vd = list_next(dl, vd)) { ASSERT(!vd->vdev_ishole); - good_writes = kmem_zalloc(sizeof (uint64_t), KM_SLEEP); + /* + * Object store label is updated + * in `object_store_end_txg()`. + */ + if (vdev_is_object_store(vd)) + continue; + + uint64_t *good_writes = + kmem_zalloc(sizeof (uint64_t), KM_SLEEP); zio_t *vio = zio_null(zio, spa, NULL, (vd->vdev_islog || vd->vdev_aux != NULL) ? vdev_label_sync_ignore_done : vdev_label_sync_top_done, @@ -1904,14 +1932,14 @@ vdev_label_sync_list(spa_t *spa, int l, uint64_t txg, int flags) zio_nowait(vio); } - error = zio_wait(zio); + int error = zio_wait(zio); /* * Flush the new labels to disk. */ zio = zio_root(spa, NULL, NULL, flags); - for (vd = list_head(dl); vd != NULL; vd = list_next(dl, vd)) + for (vdev_t *vd = list_head(dl); vd != NULL; vd = list_next(dl, vd)) zio_flush(zio, vd); (void) zio_wait(zio); @@ -1973,28 +2001,13 @@ vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg) return (0); /* - * If this pool is using object storage, then - * it only needs to notify the backend that - * we've completed the txg and return. + * Object-store pools may go past the spa_final_dirty_txg + * (see comment in ub_dp_mos_used_delta for more). */ - if (spa_is_object_based(spa)) { - /* - * XXX - Right now we don't update the labels on - * any slog devices if we're using object-based pools. - * This seems to be fine since the mos config object - * will have all the information we need. However, there - * might be some corner cases where need to look at the - * label on the slog device directly. If that is - * the case, then we will need to revisit this. - */ - nvlist_t *label = spa_config_generate(spa, - svd[0], txg, B_FALSE); - object_store_end_txg(svd[0], label, txg); - return (0); + if (!spa_is_object_based(spa)) { + ASSERT3U(txg, <=, spa_final_dirty_txg(spa)); } - ASSERT3U(txg, <=, spa_final_dirty_txg(spa)); - /* * Flush the write cache of every disk that's been written to * in this transaction group. This ensures that all blocks @@ -2029,9 +2042,10 @@ vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg) } /* - * Sync the uberblocks to all vdevs in svd[]. - * If the system dies in the middle of this step, there are two cases - * to consider, and the on-disk state is consistent either way: + * Sync the uberblocks to the object store vdev (if it exists) and + * all the vdevs in svd[]. If the system dies in the middle of this + * step, there are two cases to consider, and the on-disk state is + * consistent either way: * * (1) If none of the new uberblocks made it to disk, then the * previous uberblock will be the newest, and the odd labels @@ -2045,8 +2059,9 @@ vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg) */ if ((error = vdev_uberblock_sync_list(svd, svdcount, ub, flags)) != 0) { if ((flags & ZIO_FLAG_TRYHARD) != 0) { - zfs_dbgmsg("vdev_uberblock_sync_list() returned error " - "%d for pool '%s'", error, spa_name(spa)); + zfs_dbgmsg("vdev_uberblock_sync_list() " + "returned error %d for pool '%s'", + error, spa_name(spa)); } goto retry; } diff --git a/module/zfs/zil.c b/module/zfs/zil.c index ba106a49bc49..01de74200d31 100644 --- a/module/zfs/zil.c +++ b/module/zfs/zil.c @@ -1602,6 +1602,7 @@ zil_lwb_write_done(zio_t *zio) while ((zv = avl_destroy_nodes(t, &cookie)) != NULL) { vdev_t *vd = vdev_lookup_top(spa, zv->zv_vdev); if (vd != NULL) { + ASSERT(!vdev_is_object_store(vd)); /* * The "ZIO_FLAG_DONT_PROPAGATE" is currently * always used within "zio_flush". This means, @@ -3324,6 +3325,14 @@ zil_commit(zilog_t *zilog, uint64_t foid) if (zilog->zl_sync == ZFS_SYNC_DISABLED) return; + /* + * If this is an object store pool and we have no slog devices skip + * committing this zil. Note, in a hybrid pool (e.g. backed by both + * an object store and block devices) we could fall back to + * allocating the zil from the normal/embedded_log class if there + * are no slog devices. This functionality is left as potential + * future work. + */ if (spa_is_object_based(zilog->zl_spa) && !spa_has_slogs(zilog->zl_spa)) { ZIL_STAT_BUMP(zilog, zil_skip_zil_commit); diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 910ce2177af8..62b47f461ef8 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -1545,6 +1545,13 @@ zio_vdev_delegated_io(vdev_t *vd, uint64_t offset, abd_t *data, uint64_t size, void zio_flush(zio_t *zio, vdev_t *vd) { + /* + * Object store vdevs have their own mechanism for flushing + * their changes to the backend - see `object_store_end_txg()`. + */ + if (vdev_is_object_store(vd)) + return; + zio_nowait(zio_ioctl(zio, zio->io_spa, vd, DKIOCFLUSHWRITECACHE, NULL, NULL, ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY)); @@ -3802,22 +3809,16 @@ zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, blkptr_t *new_bp, ASSERT3U(spa->spa_alloc_count, ==, metaslab_class_allocator_count(spa_embedded_log_class(spa))); - /* - * Object-based pools can only allocate zil block on the slog, - * so if we failed to allocate from the slog we need to return - * an error and not fallback to the normal class. - */ - boolean_t object_based = spa_is_object_based(spa); error = metaslab_alloc(spa, spa_log_class(spa), size, new_bp, 1, txg, NULL, flags, &io_alloc_list, NULL, allocator); - *slog = (error == 0 || object_based); + *slog = (error == 0); - if (error != 0 && !object_based) { + if (error != 0) { error = metaslab_alloc(spa, spa_embedded_log_class(spa), size, new_bp, 1, txg, NULL, flags, &io_alloc_list, NULL, allocator); } - if (error != 0 && !object_based) { + if (error != 0) { error = metaslab_alloc(spa, spa_normal_class(spa), size, new_bp, 1, txg, NULL, flags, &io_alloc_list, NULL, allocator); diff --git a/module/zfs/zio_inject.c b/module/zfs/zio_inject.c index 0850f3f92b88..9a3f9441ffa9 100644 --- a/module/zfs/zio_inject.c +++ b/module/zfs/zio_inject.c @@ -180,8 +180,9 @@ zio_handle_panic_injection(spa_t *spa, const char *tag, uint64_t type) if (handler->zi_record.zi_type == type && strcmp(tag, handler->zi_record.zi_func) == 0) { if (spa_is_object_based(spa)) { - vdev_t *vd = spa->spa_root_vdev->vdev_child[0]; - object_store_restart_agent(vd); + object_store_restart_agent( + vdev_find_leaf(spa->spa_root_vdev, + &vdev_object_store_ops)); } else { panic("Panic requested in function %s\n", tag); } diff --git a/tests/runfiles/object_store.run b/tests/runfiles/object_store.run index 0de43bc1fcfb..d835aef13689 100644 --- a/tests/runfiles/object_store.run +++ b/tests/runfiles/object_store.run @@ -54,6 +54,14 @@ tests = ['checkpoint_after_rewind', 'checkpoint_big_rewind', tags = ['functional', 'pool_checkpoint'] timeout = 1800 +[tests/functional/block_to_object] +pre = +post = +tests = ['zpool_add', 'zpool_add_args_neg', 'zpool_add_ckpoint', + 'zpool_add_existing', 'zpool_add_with_writes', + 'hybrid_pool_import_export'] +tags = ['functional', 'block_to_object'] + [tests/functional/chattr] tests = ['chattr_001_pos', 'chattr_002_neg'] tags = ['functional', 'chattr'] diff --git a/tests/zfs-tests/tests/functional/block_to_object/block_to_object.kshlib b/tests/zfs-tests/tests/functional/block_to_object/block_to_object.kshlib new file mode 100644 index 000000000000..9de88c9c7d3e --- /dev/null +++ b/tests/zfs-tests/tests/functional/block_to_object/block_to_object.kshlib @@ -0,0 +1,71 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2023 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/include/object_store.shlib +. $STF_SUITE/tests/functional/removal/removal.kshlib + +DATASET=$TESTPOOL/$TESTFS +TESTFILE0=/$DATASET/00 +TESTFILE1=/$DATASET/01 + +DISK="$(echo $DISKS | cut -d' ' -f1)" +EXTRADISK="$(echo $DISKS | cut -d' ' -f2)" + +function setup_testpool +{ + log_must zpool create -f $TESTPOOL $DISK + + log_must zfs create -o compression=lz4 -o recordsize=8k $DATASET + + # + # First file is created with writes so initial block-based + # pool has some data. + # + log_must mkfile -n 16M $TESTFILE0 + log_must randwritecomp $TESTFILE0 4096 + + # + # Second file is created so it can be used for writes specific + # to each test. + # + log_must mkfile -n 16M $TESTFILE1 +} + +function cleanup_testpool +{ + + # + # Some tests may start randwritecomp workloads + # in the background. Make sure you kill those + # instances. + # + pkill randwritecomp + + if poolexists $TESTPOOL; then + log_must zpool sync $TESTPOOL + log_must zpool destroy $TESTPOOL + fi + + # + # We always clear the labels of all disks + # between tests so imports from zpool or + # or zdb do not get confused with leftover + # data from old pools. + # + for disk in $DISKS; do + zpool labelclear -f $disk + done +} diff --git a/tests/zfs-tests/tests/functional/block_to_object/hybrid_pool_import_export.ksh b/tests/zfs-tests/tests/functional/block_to_object/hybrid_pool_import_export.ksh new file mode 100755 index 000000000000..44784b3f83c0 --- /dev/null +++ b/tests/zfs-tests/tests/functional/block_to_object/hybrid_pool_import_export.ksh @@ -0,0 +1,84 @@ +#!/bin/ksh -p + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2023 by Delphix. All rights reserved. +# + +. $STF_SUITE/tests/functional/block_to_object/block_to_object.kshlib + +# +# DESCRIPTION: +# Ensure that we can add an object store vdev to an existing +# block-based pool and that we can import and export it. Once +# the object-store vdev is added, we shouldn't be able to import +# the pool without the object-store vdev. +# +# STRATEGY: +# 1. Create block-based pool and populate it with some data. +# 2. Add object-store endpoint/vdev +# 3. Write some new data +# 4. Export the pool +# 5. Attempt to import the pool by only specifying the block +# device (this should fail). +# 6. Import the pool normally. +# + +verify_runnable "global" + +if ! use_object_store; then + log_unsupported "Need object store info to migrate block-based pool." +fi + +setup_testpool +log_onexit cleanup_testpool + +# +# Add object store +# +log_must zpool add -f -o object-endpoint=$ZTS_OBJECT_ENDPOINT \ + -o object-region=$ZTS_REGION \ + $TESTPOOL $ZTS_OBJECT_STORE $ZTS_BUCKET_NAME + +# +# Write some data +# +log_must randwritecomp $TESTFILE1 4096 + +# +# Get pool guid +# +typeset GUID=$(zpool get -Hp -o value guid) + +# +# Export pool +# +log_must zpool export $TESTPOOL + +# +# Attempt to import by only specifying the block vdev +# +log_mustnot zpool import -d ${DEVICE_DIR:-/dev} $TESTPOOL +log_mustnot zpool import -d ${DEVICE_DIR:-/dev} $GUID + +# +# Import pool using object-store parameters +# +log_must zpool import \ + -o object-endpoint=$ZTS_OBJECT_ENDPOINT \ + -o object-region=$ZTS_REGION \ + -d $ZTS_BUCKET_NAME \ + -d ${DEVICE_DIR:-/dev} \ + $GUID + +log_pass "Successfully add object store vdev and write to it." diff --git a/tests/zfs-tests/tests/functional/block_to_object/zpool_add.ksh b/tests/zfs-tests/tests/functional/block_to_object/zpool_add.ksh new file mode 100755 index 000000000000..a4ea94f4b9f4 --- /dev/null +++ b/tests/zfs-tests/tests/functional/block_to_object/zpool_add.ksh @@ -0,0 +1,90 @@ +#!/bin/ksh -p + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2023 by Delphix. All rights reserved. +# + +. $STF_SUITE/tests/functional/block_to_object/block_to_object.kshlib + +# +# DESCRIPTION: +# Ensure that we can add an object store vdev to an existing +# block-based pool and do some writes on it without any +# problems. +# +# STRATEGY: +# 1. Create block-based pool and populate it with some data. +# 2. Add object-store endpoint/vdev +# 3. Write some new data +# 4. Read data from both block-based devices and object-store. +# + +verify_runnable "global" + +if ! use_object_store; then + log_unsupported "Need object store info to migrate block-based pool." +fi + +setup_testpool +log_onexit cleanup_testpool + +# +# Add object store +# +log_must zpool add -f -o object-endpoint=$ZTS_OBJECT_ENDPOINT \ + -o object-region=$ZTS_REGION \ + $TESTPOOL $ZTS_OBJECT_STORE $ZTS_BUCKET_NAME + +# +# Record allocated space in object store and the normal +# disk before writes +# +log_must zpool sync $TESTPOOL +OBJ_START=$(zpool list -Hp -o name,alloc -v | \ + grep "$ZTS_BUCKET_NAME" | awk '{print $3}') +DISK_START=$(zpool list -Hp -o name,alloc -v | \ + grep "$DISK" | awk '{print $3}') + + +# +# Write some data that should end up in the object store +# +log_must randwritecomp $TESTFILE1 4096 + +# +# Record allocated space in object store and the normal +# disk after writes +# +log_must zpool sync $TESTPOOL +OBJ_END=$(zpool list -Hp -o name,alloc -v | \ + grep "$ZTS_BUCKET_NAME" | awk '{print $3}') +DISK_END=$(zpool list -Hp -o name,alloc -v | \ + grep "$DISK" | awk '{print $3}') + +# +# There should be some allocations in the object store by now. +# +log_must [ $OBJ_START -lt $OBJ_END ] + +# +# There should be no new allocations in the normal disk. +# +log_must [ $DISK_START -ge $DISK_END ] + +# +# Display information for manual checking +# +log_must zpool list -v $TESTPOOL + +log_pass "Successfully add object store vdev and write to it." diff --git a/tests/zfs-tests/tests/functional/block_to_object/zpool_add_args_neg.ksh b/tests/zfs-tests/tests/functional/block_to_object/zpool_add_args_neg.ksh new file mode 100755 index 000000000000..8c00e34401be --- /dev/null +++ b/tests/zfs-tests/tests/functional/block_to_object/zpool_add_args_neg.ksh @@ -0,0 +1,104 @@ +#!/bin/ksh -p + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2023 by Delphix. All rights reserved. +# + +. $STF_SUITE/tests/functional/block_to_object/block_to_object.kshlib + +# +# DESCRIPTION: +# Test invalid arguments passed to zpool-add when adding an +# object store vdev. +# + +verify_runnable "global" + +if ! use_object_store; then + log_unsupported "Need object store info to migrate block-based pool." +fi + +setup_testpool +log_onexit cleanup_testpool + +# +# Attempt to add object store without -f option +# +log_mustnot zpool add -o object-endpoint=$ZTS_OBJECT_ENDPOINT \ + -o object-region=$ZTS_REGION \ + $TESTPOOL $ZTS_OBJECT_STORE $ZTS_BUCKET_NAME + +# +# Attempt to add object store without bucket name +# +log_mustnot zpool add -f -o object-endpoint=$ZTS_OBJECT_ENDPOINT \ + -o object-region=$ZTS_REGION \ + $TESTPOOL $ZTS_OBJECT_STORE + +# +# Attempt to add object store without endpoint property +# +log_mustnot zpool add -f \ + -o object-region=$ZTS_REGION \ + $TESTPOOL $ZTS_OBJECT_STORE $ZTS_BUCKET_NAME + +# +# Attempt to add object store without region property +# +log_mustnot zpool add -f -o object-endpoint=$ZTS_OBJECT_ENDPOINT \ + $TESTPOOL $ZTS_OBJECT_STORE $ZTS_BUCKET_NAME + +# +# Attempt without both endpoint and region properties +# +log_mustnot zpool add -f \ + $TESTPOOL $ZTS_OBJECT_STORE $ZTS_BUCKET_NAME + +# +# Attempt to add other vdevs together with object store +# +log_mustnot zpool add -f -o object-endpoint=$ZTS_OBJECT_ENDPOINT \ + -o object-region=$ZTS_REGION \ + $TESTPOOL $EXTRADISK $ZTS_OBJECT_STORE $ZTS_BUCKET_NAME + +# +# Attempt to add two object stores at the same time +# +log_mustnot zpool add -f -o object-endpoint=$ZTS_OBJECT_ENDPOINT \ + -o object-region=$ZTS_REGION \ + $TESTPOOL $ZTS_OBJECT_STORE $ZTS_BUCKET_NAME \ + $ZTS_OBJECT_STORE $ZTS_BUCKET_NAME + +# +# Attempt with bogus bucket name +# +log_mustnot zpool add -f -o object-endpoint=$ZTS_OBJECT_ENDPOINT \ + -o object-region=$ZTS_REGION \ + $TESTPOOL $ZTS_OBJECT_STORE thisisbogus + +# +# Attempt with bogus region +# +log_mustnot zpool add -f -o object-endpoint=$ZTS_OBJECT_ENDPOINT \ + -o object-region=thisisbogus \ + $TESTPOOL $ZTS_OBJECT_STORE $ZTS_BUCKET_NAME + +# +# Attempt with bogus endpoint +# +log_mustnot zpool add -f -o object-endpoint=thisisbogus \ + -o object-region=$ZTS_REGION \ + $TESTPOOL $ZTS_OBJECT_STORE $ZTS_BUCKET_NAME + +log_pass "Fail as expected when adding object store with invalid args." diff --git a/tests/zfs-tests/tests/functional/block_to_object/zpool_add_ckpoint.ksh b/tests/zfs-tests/tests/functional/block_to_object/zpool_add_ckpoint.ksh new file mode 100755 index 000000000000..750438470cf1 --- /dev/null +++ b/tests/zfs-tests/tests/functional/block_to_object/zpool_add_ckpoint.ksh @@ -0,0 +1,41 @@ +#!/bin/ksh -p + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2023 by Delphix. All rights reserved. +# + +. $STF_SUITE/tests/functional/block_to_object/block_to_object.kshlib + +# +# DESCRIPTION: +# Ensure that we can't add an object store while the pool has +# a checkpoint. +# + +verify_runnable "global" + +if ! use_object_store; then + log_unsupported "Need object store info to migrate block-based pool." +fi + +setup_testpool +log_onexit cleanup_testpool + +log_must zpool checkpoint $TESTPOOL + +log_mustnot zpool add -f -o object-endpoint=$ZTS_OBJECT_ENDPOINT \ + -o object-region=$ZTS_REGION \ + $TESTPOOL $ZTS_OBJECT_STORE $ZTS_BUCKET_NAME + +log_pass "Cannot add object store when checkpointed." diff --git a/tests/zfs-tests/tests/functional/block_to_object/zpool_add_existing.ksh b/tests/zfs-tests/tests/functional/block_to_object/zpool_add_existing.ksh new file mode 100755 index 000000000000..4febe9f489e5 --- /dev/null +++ b/tests/zfs-tests/tests/functional/block_to_object/zpool_add_existing.ksh @@ -0,0 +1,50 @@ +#!/bin/ksh -p + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2023 by Delphix. All rights reserved. +# + +. $STF_SUITE/tests/functional/block_to_object/block_to_object.kshlib + +# +# DESCRIPTION: +# Ensure we can't add a second object store once the first one +# has been added. +# + +verify_runnable "global" + +if ! use_object_store; then + log_unsupported "Need object store info to migrate block-based pool." +fi + +setup_testpool +log_onexit cleanup_testpool + +# +# Add first object store +# +log_must zpool add -f -o object-endpoint=$ZTS_OBJECT_ENDPOINT \ + -o object-region=$ZTS_REGION \ + $TESTPOOL $ZTS_OBJECT_STORE $ZTS_BUCKET_NAME + +# +# Fail trying to add a second one +# +log_mustnot zpool add -f -o object-endpoint=$ZTS_OBJECT_ENDPOINT \ + -o object-region=$ZTS_REGION \ + $TESTPOOL $ZTS_OBJECT_STORE $ZTS_BUCKET_NAME + + +log_pass "Adding a second object store fails." diff --git a/tests/zfs-tests/tests/functional/block_to_object/zpool_add_with_writes.ksh b/tests/zfs-tests/tests/functional/block_to_object/zpool_add_with_writes.ksh new file mode 100755 index 000000000000..81f6193800bc --- /dev/null +++ b/tests/zfs-tests/tests/functional/block_to_object/zpool_add_with_writes.ksh @@ -0,0 +1,61 @@ +#!/bin/ksh -p + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2023 by Delphix. All rights reserved. +# + +. $STF_SUITE/tests/functional/block_to_object/block_to_object.kshlib + +# +# DESCRIPTION: +# Ensure that we can add an object store vdev to an existing +# block-based pool while writes are taking place without +# any problem. +# +# STRATEGY: +# 1. Create block-based pool and populate it with some data. +# 2. Start writing data to a file in the pool +# 3. Add object-store endpoint/vdev while writes are happening. +# 4. Stop writes when object-store has been added. +# + +verify_runnable "global" + +if ! use_object_store; then + log_unsupported "Need object store info to migrate block-based pool." +fi + +setup_testpool +log_onexit cleanup_testpool + +# +# Start writing data in the background +# +randwritecomp $TESTFILE1 & +randwritecomp $TESTFILE1 & + +# +# Add object store +# +log_must zpool add -f -o object-endpoint=$ZTS_OBJECT_ENDPOINT \ + -o object-region=$ZTS_REGION \ + $TESTPOOL $ZTS_OBJECT_STORE $ZTS_BUCKET_NAME + +# +# Sync writes and kill any writers +# +log_must zpool sync $TESTPOOL +log_must pkill randwritecomp + +log_pass "Successfully add object store vdev and write to it."