From 1b6fd5bd2605e69834593193e09f37596570d4e5 Mon Sep 17 00:00:00 2001 From: mayank Date: Fri, 2 Mar 2018 13:20:56 +0530 Subject: [PATCH] - API to get condensed tree of modified blocks between two txg - changes in zvol zap update/read API for microzap checks Signed-off-by: mayank --- cmd/uzfs_test/Makefile.am | 3 +- cmd/uzfs_test/uzfs_test.c | 6 +- cmd/uzfs_test/uzfs_txg_diff.c | 320 +++++++++++++++++ cmd/uzfs_test/uzfs_zvol_zap.c | 51 +-- include/Makefile.am | 3 +- include/sys/uzfs_zvol.h | 6 + include/uzfs_mtree.h | 31 ++ include/uzfs_test.h | 4 + include/uzfs_zap.h | 6 +- lib/libzpool/Makefile.am | 1 + lib/libzpool/uzfs_mtree.c | 251 +++++++++++++ lib/libzpool/uzfs_zap.c | 20 +- module/zfs/spa_config.c | 592 ------------------------------- tests/cbtest/script/test_uzfs.sh | 4 +- 14 files changed, 672 insertions(+), 626 deletions(-) create mode 100644 cmd/uzfs_test/uzfs_txg_diff.c create mode 100644 include/uzfs_mtree.h create mode 100644 lib/libzpool/uzfs_mtree.c delete mode 100644 module/zfs/spa_config.c diff --git a/cmd/uzfs_test/Makefile.am b/cmd/uzfs_test/Makefile.am index 7444dd52aa04..98c18aff363a 100644 --- a/cmd/uzfs_test/Makefile.am +++ b/cmd/uzfs_test/Makefile.am @@ -14,7 +14,8 @@ sbin_PROGRAMS = uzfs_test uzfs_test_SOURCES = \ uzfs_test.c \ uzfs_test_sync.c \ - uzfs_zvol_zap.c + uzfs_zvol_zap.c \ + uzfs_txg_diff.c uzfs_test_LDADD = \ $(top_builddir)/lib/libnvpair/libnvpair.la \ diff --git a/cmd/uzfs_test/uzfs_test.c b/cmd/uzfs_test/uzfs_test.c index 8170b5ce9b12..8aff0bc83a64 100644 --- a/cmd/uzfs_test/uzfs_test.c +++ b/cmd/uzfs_test/uzfs_test.c @@ -39,6 +39,8 @@ uzfs_test_info_t uzfs_tests[] = { { uzfs_zvol_zap_operation, "uzfs zap operation test" }, { replay_fn, "zvol replay test" }, { unit_test_fn, "zvol read/write verification test"}, + { uzfs_zvol_txg_diff_blk_test, "uzfs modified blocks between two txg" }, + { uzfs_zvol_txg_mtree_test, "uzfs offset:len base tree test" }, }; uint64_t metaverify = 0; @@ -326,7 +328,7 @@ static void usage(int num) printf("uzfs_test -t -a " " -b -i -v -l(for log device)" - " -m " + " -m -n " " -s(for sync on) -S(for silent) -V " " -w(for write during replay) -T \n"); @@ -401,7 +403,7 @@ static void process_options(int argc, char **argv) uint64_t val = 0; uint64_t num_tests = sizeof (uzfs_tests) / sizeof (uzfs_tests[0]); - while ((opt = getopt(argc, argv, "a:b:i:lm:sSt:v:V:wT:n:")) != EOF) { + while ((opt = getopt(argc, argv, "a:b:i:lm:n:sSt:T:v:V:w")) != EOF) { if (optarg != NULL) val = nicenumtoull(optarg); switch (opt) { diff --git a/cmd/uzfs_test/uzfs_txg_diff.c b/cmd/uzfs_test/uzfs_txg_diff.c new file mode 100644 index 000000000000..cc8fe8005e8b --- /dev/null +++ b/cmd/uzfs_test/uzfs_txg_diff.c @@ -0,0 +1,320 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +extern int total_time_in_sec; +extern void populate_data(char *buf, uint64_t offset, int idx, + uint64_t block_size); +extern uint64_t block_size; + +static int del_from_mblktree(avl_tree_t *tree, uint64_t b_offset, + uint64_t b_len); +static int uzfs_search_mblktree(avl_tree_t *tree, uint64_t offset, + uint64_t *len); + +typedef struct wblkinfo { + uint64_t offset; + uint64_t len; + list_node_t link; +} wblkinfo_t; + +int +del_from_mblktree(avl_tree_t *tree, uint64_t offset, uint64_t len) +{ + uint64_t new_offset, new_len, b_end, b_offset, b_len; + uint64_t entry_len, entry_offset; + uzfs_zvol_blk_phy_t *entry, *new_entry, *b_entry; + uzfs_zvol_blk_phy_t f_entry; + avl_index_t where; + int err = 0; + + new_offset = offset; + new_len = len; + + f_entry.offset = new_offset; + f_entry.len = new_len; + entry = avl_find(tree, &f_entry, &where); + + if (entry != NULL) { + if (entry->len < new_len) { + err = -1; + goto done; + } + + entry_offset = entry->offset; + entry_len = entry->len; + avl_remove(tree, entry); + umem_free(entry, sizeof (*entry)); + + if (entry_len > new_len) { + new_entry = umem_alloc(sizeof (uzfs_zvol_blk_phy_t), + UMEM_NOFAIL); + new_entry->offset = entry_offset + new_len; + new_entry->len = (entry_len - new_len); + avl_add(tree, new_entry); + } + goto done; + } + + b_entry = avl_nearest(tree, where, AVL_BEFORE); + if (b_entry) { + b_end = (b_entry->offset + b_entry->len); + if (b_end < (new_offset + new_len)) { + err = -1; + goto done; + } + + b_offset = b_entry->offset; + b_len = b_entry->len; + avl_remove(tree, b_entry); + + umem_free(b_entry, sizeof (*b_entry)); + + new_entry = umem_alloc(sizeof (uzfs_zvol_blk_phy_t), + UMEM_NOFAIL); + new_entry->offset = b_offset; + new_entry->len = (new_offset - b_offset); + avl_add(tree, new_entry); + + if (b_end > (new_offset + new_len)) { + new_entry = umem_alloc(sizeof (uzfs_zvol_blk_phy_t), + UMEM_NOFAIL); + new_entry->offset = new_offset + new_len; + new_entry->len = (b_end - new_entry->offset); + avl_add(tree, new_entry); + } + goto done; + } else { + err = -1; + goto done; + } + +done: + return (err); +} + +int +uzfs_search_mblktree(avl_tree_t *tree, uint64_t offset, uint64_t *len) +{ + uzfs_zvol_blk_phy_t tofind; + avl_index_t where; + uzfs_zvol_blk_phy_t *entry; + + tofind.offset = offset; + tofind.len = 0; + + entry = avl_find(tree, &tofind, &where); + if (entry == NULL) + return (0); + + *len = entry->len; + return (1); +} + +void +uzfs_zvol_txg_diff_blk_test(void *arg) +{ + uzfs_test_info_t *test_info = (uzfs_test_info_t *)arg; + avl_tree_t *tree; + uint64_t first_txg, last_txg; + hrtime_t end, now; + uint64_t blk_offset, offset, vol_blocks; + uint64_t blksz = io_block_size, io_num = 0; + void *spa, *zvol; + char *buf; + int diff_txg = 5, count, i = 0; + list_t wlist; + wblkinfo_t *blk; + + setup_unit_test(); + unit_test_create_pool_ds(); + open_pool_ds(&spa, &zvol); + + vol_blocks = active_size / blksz; + buf = umem_alloc(block_size, UMEM_NOFAIL); + + list_create(&wlist, sizeof (wblkinfo_t), offsetof(wblkinfo_t, link)); + + now = gethrtime(); + end = now + (hrtime_t)(total_time_in_sec * (hrtime_t)(NANOSEC)); + + while (i++ < test_iterations) { + count = 0; + + txg_wait_synced(spa_get_dsl(spa), 0); + first_txg = spa_last_synced_txg(spa); + + while (count++ < diff_txg) { + io_num++; + blk_offset = uzfs_random(vol_blocks - 16); + offset = ((blk_offset * blksz + block_size) / + block_size) * block_size; + + populate_data(buf, offset, 0, block_size); + + if (uzfs_write_data(zvol, buf, offset, block_size, + &io_num)) + printf("IO error at offset: %lu len: %lu\n", + offset, block_size); + + blk = umem_alloc(sizeof (wblkinfo_t), UMEM_NOFAIL); + blk->offset = offset; + blk->len = block_size; + list_insert_tail(&wlist, blk); + blk = NULL; + } + + txg_wait_synced(spa_get_dsl(spa), 0); + last_txg = spa_last_synced_txg(spa); + + uzfs_txg_block_diff(zvol, first_txg, last_txg, (void **)&tree); + + while ((blk = list_remove_head(&wlist))) { + VERIFY0(del_from_mblktree(tree, blk->offset, blk->len)); + umem_free(blk, sizeof (*blk)); + blk = NULL; + } + VERIFY0(avl_numnodes(tree)); + printf("%s pass:%d\n", test_info->name, i); + umem_free(tree, sizeof (*tree)); + tree = NULL; + } + + uzfs_close_dataset(zvol); + uzfs_close_pool(spa); + list_destroy(&wlist); + umem_free(buf, block_size); +} + +static void +check_tree(avl_tree_t *tree, uint64_t offset, uint64_t len, uint64_t exp_off, + uint64_t exp_len, int exp_ret) +{ + int ret; + uint64_t len1 = 0; + + ret = uzfs_search_mblktree(tree, exp_off, &len1); + + VERIFY(ret == exp_ret); + + if (ret) + VERIFY(exp_len == len1); +} + +static void +add_and_check_tree(avl_tree_t *tree, uint64_t offset, uint64_t len, + uint64_t exp_off, uint64_t exp_len, int exp_ret) +{ + add_to_mblktree(tree, offset, len); + check_tree(tree, offset, len, exp_off, exp_len, exp_ret); +} + +static void +delete_and_check_tree(avl_tree_t *tree, uint64_t offset, uint64_t len, + uint64_t exp_off, uint64_t exp_len, int exp_ret) +{ + del_from_mblktree(tree, offset, len); + check_tree(tree, offset, len, exp_off, exp_len, exp_ret); +} + +void +uzfs_zvol_txg_mtree_test(void *arg) +{ + uzfs_test_info_t *test_info = (uzfs_test_info_t *)arg; + avl_tree_t *tree; + uint64_t blksz = io_block_size; + + uzfs_create_mblktree((void **)&tree); + + add_and_check_tree(tree, 100, 50, 100, 50, 1); + add_and_check_tree(tree, 150, 50, 100, 100, 1); + add_and_check_tree(tree, 5 * blksz, blksz, 5 * blksz, blksz, 1); + add_and_check_tree(tree, 4 * blksz, blksz, 4 * blksz, + 2 * blksz, 1); + check_tree(tree, 4 * blksz, blksz, 5 * blksz, blksz, 0); + add_and_check_tree(tree, 2 * blksz, blksz, 2 * blksz, blksz, 1); + check_tree(tree, 2 * blksz, blksz, 4 * blksz, 2 * blksz, 1); + check_tree(tree, 2 * blksz, blksz, 5 * blksz, blksz, 0); + add_and_check_tree(tree, 2 * blksz, blksz, 2 * blksz, blksz, 1); + check_tree(tree, 2 * blksz, blksz, 4 * blksz, 2 * blksz, 1); + check_tree(tree, 2 * blksz, blksz, 5 * blksz, blksz, 0); + add_and_check_tree(tree, blksz, blksz, blksz, 2 * blksz, 1); + check_tree(tree, blksz, blksz, 2 * blksz, blksz, 0); + check_tree(tree, blksz, blksz, 4 * blksz, 2 * blksz, 1); + check_tree(tree, blksz, blksz, 5 * blksz, blksz, 0); + add_and_check_tree(tree, 3 * blksz, blksz, blksz, 5 * blksz, 1); + check_tree(tree, 3 * blksz, blksz, 2 * blksz, blksz, 0); + check_tree(tree, 3 * blksz, blksz, 3 * blksz, blksz, 0); + check_tree(tree, 3 * blksz, blksz, 4 * blksz, 2 * blksz, 0); + check_tree(tree, 3 * blksz, blksz, 5 * blksz, blksz, 0); + add_and_check_tree(tree, blksz, blksz, blksz, 5 * blksz, 1); + check_tree(tree, blksz, blksz, 2 * blksz, blksz, 0); + check_tree(tree, blksz, blksz, 3 * blksz, blksz, 0); + check_tree(tree, blksz, blksz, 4 * blksz, 2 * blksz, 0); + check_tree(tree, blksz, blksz, 5 * blksz, blksz, 0); + add_and_check_tree(tree, 3 * blksz, blksz, blksz, 5 * blksz, 1); + check_tree(tree, 3 * blksz, blksz, 2 * blksz, blksz, 0); + check_tree(tree, 3 * blksz, blksz, 3 * blksz, blksz, 0); + check_tree(tree, 3 * blksz, blksz, 4 * blksz, 2 * blksz, 0); + check_tree(tree, 3 * blksz, blksz, 5 * blksz, blksz, 0); + delete_and_check_tree(tree, blksz, blksz, 2 * blksz, + 4 * blksz, 1); + check_tree(tree, blksz, blksz, blksz, blksz, 0); + check_tree(tree, blksz, blksz, 3 * blksz, blksz, 0); + check_tree(tree, blksz, blksz, 4 * blksz, 2 * blksz, 0); + check_tree(tree, blksz, blksz, 5 * blksz, blksz, 0); + delete_and_check_tree(tree, 2 * blksz, blksz, 3 * blksz, + 3 * blksz, 1); + check_tree(tree, 2 * blksz, blksz, blksz, blksz, 0); + check_tree(tree, 2 * blksz, blksz, 2 * blksz, blksz, 0); + check_tree(tree, 2 * blksz, blksz, 4 * blksz, 2 * blksz, 0); + check_tree(tree, 2 * blksz, blksz, 5 * blksz, blksz, 0); + delete_and_check_tree(tree, 110, 10, 100, 10, 1); + check_tree(tree, 120, 30, 120, 80, 1); + add_and_check_tree(tree, 60, 20, 50, 0, 0); + add_and_check_tree(tree, 60, 20, 60, 20, 1); + delete_and_check_tree(tree, 60, 10, 70, 10, 1); + add_and_check_tree(tree, 80, 20, 70, 40, 1); + add_and_check_tree(tree, 80, 20, 90, 0, 0); + add_and_check_tree(tree, 80, 20, 100, 0, 0); + add_and_check_tree(tree, 200, 50, 200, 0, 0); + add_and_check_tree(tree, 200, 50, 70, 40, 1); + delete_and_check_tree(tree, 230, 20, 200, 0, 0); + add_and_check_tree(tree, 40, 45, 40, 70, 1); + add_and_check_tree(tree, 40, 45, 70, 40, 0); + add_and_check_tree(tree, 150, 50, 120, 110, 1); + add_and_check_tree(tree, 130, 140, 120, 150, 1); + add_and_check_tree(tree, 30, 270, 30, 270, 1); + add_and_check_tree(tree, 30, 270, 40, 0, 0); + add_and_check_tree(tree, 130, 140, 130, 140, 0); + add_and_check_tree(tree, 130, 140, 120, 150, 0); + + uzfs_destroy_mblktree((void *)tree); + printf("%s pass\n", test_info->name); +} diff --git a/cmd/uzfs_test/uzfs_zvol_zap.c b/cmd/uzfs_test/uzfs_zvol_zap.c index c633a6c4cb61..7e834bf406f1 100644 --- a/cmd/uzfs_test/uzfs_zvol_zap.c +++ b/cmd/uzfs_test/uzfs_zvol_zap.c @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -29,9 +30,8 @@ destroy_zap_entries(uzfs_zap_kv_t **kv_array, uint64_t zap_count) for (i = 0; i < zap_count; i++) { kv = kv_array[i]; - free(kv->key); - free(kv->value); - free(kv); + umem_free(kv->key, strlen(kv->key) + 1); + umem_free(kv, sizeof (*kv)); kv = NULL; } } @@ -41,22 +41,19 @@ fill_up_zap_entries(uzfs_zap_kv_t **array, uint64_t n) { int i = 0; uzfs_zap_kv_t *zap; - uint64_t key_len, value_len; + uint64_t key_len, value; for (i = 0; i < n; i++, zap = NULL) { - zap = malloc(sizeof (uzfs_zap_kv_t)); + zap = umem_alloc(sizeof (uzfs_zap_kv_t), UMEM_NOFAIL); key_len = uzfs_random(32); - value_len = uzfs_random(32); key_len = (key_len < 8) ? 8 : key_len; - value_len = (value_len < 8) ? 8 : value_len; - zap->key = malloc(key_len); - zap->value = malloc(value_len); - zap->size = value_len; + zap->key = umem_alloc(key_len, UMEM_NOFAIL); + zap->value = uzfs_random(ULONG_MAX); + zap->size = sizeof (value); populate_string(zap->key, key_len); - populate_string(zap->value, value_len); array[i] = zap; } } @@ -69,7 +66,7 @@ update_zap_entries(uzfs_zap_kv_t **array, uint64_t n) for (i = 0; i < n; i++) { zap = array[i]; - populate_string(zap->value, zap->size); + zap->value = uzfs_random(ULONG_MAX); } } @@ -78,23 +75,20 @@ void verify_zap_entries(void *zvol, uzfs_zap_kv_t **key_array, uint64_t count) { uzfs_zap_kv_t *kv; - char *value, *temp_value; + uint64_t value; int i = 0, err; uzfs_zap_kv_t dummy_key; for (i = 0; i < count; i++) { kv = key_array[i]; - temp_value = kv->value; - kv->value = calloc(1, kv->size); + value = kv->value; + kv->value = 0; uzfs_read_zap_entry(zvol, kv); - VERIFY0(strncmp(kv->value, temp_value, kv->size)); - free(temp_value); - value = NULL; + VERIFY(kv->value == value); } - dummy_key.key = malloc(20); - dummy_key.value = malloc(20); - dummy_key.size = 20; + dummy_key.key = umem_alloc(20, UMEM_NOFAIL); + dummy_key.size = sizeof (dummy_key.value); dummy_key.key = "DUMMY"; err = uzfs_read_zap_entry(zvol, &dummy_key); @@ -125,7 +119,8 @@ uzfs_zvol_zap_operation(void *arg) while (i++ < test_iterations) { zap_count = uzfs_random(16) + 1; - kv_array = malloc(zap_count * sizeof (*kv_array)); + kv_array = umem_alloc(zap_count * sizeof (*kv_array), + UMEM_NOFAIL); fill_up_zap_entries(kv_array, zap_count); /* update key/value pair in ZAP entries */ @@ -142,8 +137,18 @@ uzfs_zvol_zap_operation(void *arg) verify_zap_entries(zvol, kv_array, zap_count); + uzfs_zap_kv_t *temp_kv; + temp_kv = kv_array[0]; + umem_free(temp_kv->key, strlen(temp_kv->key) + 1); + temp_kv->key = umem_alloc(MZAP_NAME_LEN + 4, UMEM_NOFAIL); + populate_string(temp_kv->key, MZAP_NAME_LEN + 4); + temp_kv->value = 2; + temp_kv->size = sizeof (temp_kv->value); + VERIFY(uzfs_update_zap_entries(zvol, + (const uzfs_zap_kv_t **) kv_array, zap_count) == EINVAL); + destroy_zap_entries(kv_array, zap_count); - free(kv_array); + umem_free(kv_array, zap_count * sizeof (*kv_array)); kv_array = NULL; printf("%s pass:%d\n", test_info->name, i); diff --git a/include/Makefile.am b/include/Makefile.am index 1f7d3de9eaf7..afad0ebd89dd 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -33,7 +33,8 @@ USER_H = \ $(top_srcdir)/include/rte_pause.h \ $(top_srcdir)/include/uzfs_zap.h \ $(top_srcdir)/include/uzfs.h \ - $(top_srcdir)/include/uzfs_task.h + $(top_srcdir)/include/uzfs_task.h \ + $(top_srcdir)/include/uzfs_mtree.h EXTRA_DIST = $(COMMON_H) $(KERNEL_H) $(USER_H) diff --git a/include/sys/uzfs_zvol.h b/include/sys/uzfs_zvol.h index 3a768d0ef983..4e9c12c2a1bc 100644 --- a/include/sys/uzfs_zvol.h +++ b/include/sys/uzfs_zvol.h @@ -103,5 +103,11 @@ uint64_t get_metadata_len(zvol_state_t *zv, uint64_t offset, uint64_t len); */ extern zil_replay_func_t zvol_replay_vector[TX_MAX_TYPE]; +typedef struct uzfs_zvol_blk_phy { + uint64_t offset; + uint64_t len; + avl_node_t uzb_link; +} uzfs_zvol_blk_phy_t; + #endif #endif diff --git a/include/uzfs_mtree.h b/include/uzfs_mtree.h new file mode 100644 index 000000000000..2a1e41e7d37a --- /dev/null +++ b/include/uzfs_mtree.h @@ -0,0 +1,31 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +#ifndef _UZFS_MTREE_H +#define _UZFS_MTREE_H + +extern int uzfs_txg_block_diff(void *zv, uint64_t start_txg, + uint64_t end_txg, void **tree); +extern void dump_mblktree(void *tree); +extern void uzfs_create_mblktree(void **tree); +extern void uzfs_destroy_mblktree(void *tree); +extern int add_to_mblktree(void *tree, uint64_t offset, uint64_t size); +#endif diff --git a/include/uzfs_test.h b/include/uzfs_test.h index 59a11bb58285..bd800ef320d5 100644 --- a/include/uzfs_test.h +++ b/include/uzfs_test.h @@ -33,6 +33,8 @@ extern int write_op; extern int verify_err; extern int verify; extern int test_iterations; +extern uint64_t active_size; +extern uint64_t vol_size; extern unsigned long zfs_arc_max; extern unsigned long zfs_arc_min; @@ -58,4 +60,6 @@ typedef struct uzfs_test_info { void uzfs_zvol_zap_operation(void *arg); void unit_test_fn(void *arg); +void uzfs_zvol_txg_diff_blk_test(void *arg); +void uzfs_zvol_txg_mtree_test(void *arg); #endif diff --git a/include/uzfs_zap.h b/include/uzfs_zap.h index 3afdd25e92b5..fd84ec40f140 100644 --- a/include/uzfs_zap.h +++ b/include/uzfs_zap.h @@ -24,7 +24,7 @@ typedef struct { char *key; /* zap key to update */ - char *value; /* value to update against zap key */ + uint64_t value; /* value to update against zap key */ size_t size; /* size of value */ } uzfs_zap_kv_t; @@ -33,7 +33,9 @@ typedef struct { extern long long txg_update_interval_time; /* - * Here, allocation/freeing of kv_array needs to be handled by caller function. + * Here, allocation/freeing of kv_array needs to be handled by + * caller function. uzfs_*_zap_entry will handle only microzap + * entries or value with uint64_t entries. */ int uzfs_update_zap_entries(void *zv, const uzfs_zap_kv_t **kv_array, uint64_t n); diff --git a/lib/libzpool/Makefile.am b/lib/libzpool/Makefile.am index 1c3019e0c6e1..8e7ae4faee93 100644 --- a/lib/libzpool/Makefile.am +++ b/lib/libzpool/Makefile.am @@ -21,6 +21,7 @@ USER_C = \ uzfs_io.c \ uzfs_task.c \ uzfs_mgmt.c \ + uzfs_mtree.c \ uzfs_zap.c \ vdev_disk_aio.c diff --git a/lib/libzpool/uzfs_mtree.c b/lib/libzpool/uzfs_mtree.c new file mode 100644 index 000000000000..79175b1a1105 --- /dev/null +++ b/lib/libzpool/uzfs_mtree.c @@ -0,0 +1,251 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +#include +#include +#include +#include +#include +#include +#include + +#define TXG_DIFF_SNAPNAME "tsnap" + +struct diff_txg_blk { + avl_tree_t *tree; + uint64_t start_txg; + uint64_t end_txg; +}; + +void +add_to_mblktree(avl_tree_t *tree, uint64_t boffset, uint64_t blen) +{ + uint64_t new_offset, new_len, b_end, a_end; + uzfs_zvol_blk_phy_t *entry, *new_node, *b_entry, *a_entry; + uzfs_zvol_blk_phy_t tofind; + avl_index_t where; + + new_offset = boffset; + new_len = blen; + +find: + tofind.offset = new_offset; + tofind.len = new_len; + entry = avl_find(tree, &tofind, &where); + + if (entry != NULL) { + if (entry->len >= new_len) { + return; + } else { + avl_remove(tree, entry); + umem_free(entry, sizeof (*entry)); + goto find; + } + } + + b_entry = avl_nearest(tree, where, AVL_BEFORE); + if (b_entry) { + b_end = (b_entry->offset + b_entry->len); + if (b_end < new_offset) + goto after; + + if (b_end == new_offset) { + new_len += (b_entry->len); + new_offset = b_entry->offset; + avl_remove(tree, b_entry); + umem_free(b_entry, sizeof (*b_entry)); + goto find; + } + + if (b_end < (new_offset + new_len)) { + new_len += (new_offset - b_entry->offset); + new_offset = b_entry->offset; + avl_remove(tree, b_entry); + umem_free(b_entry, sizeof (*b_entry)); + goto find; + } + + if (b_end >= (new_offset + new_len)) + return; + } + +after: + a_entry = avl_nearest(tree, where, AVL_AFTER); + + if (a_entry) { + a_end = (a_entry->offset + a_entry->len); + if ((new_offset + new_len) < a_entry->offset) + goto doadd; + + if ((new_offset + new_len) == a_entry->offset) { + new_len += a_entry->len; + avl_remove(tree, a_entry); + umem_free(a_entry, sizeof (*a_entry)); + goto find; + } + + if ((new_offset + new_len) <= (a_end)) { + new_len = (a_entry->len) + + (a_entry->offset - new_offset); + avl_remove(tree, a_entry); + umem_free(a_entry, sizeof (*a_entry)); + goto find; + } + + if ((new_offset + new_len) > (a_end)) { + avl_remove(tree, a_entry); + umem_free(a_entry, sizeof (*a_entry)); + goto find; + } + } + +doadd: + new_node = umem_alloc(sizeof (uzfs_zvol_blk_phy_t), UMEM_NOFAIL); + new_node->offset = new_offset; + new_node->len = new_len; + avl_insert(tree, new_node, where); +} + +void +dump_mblktree(avl_tree_t *tree) +{ + uzfs_zvol_blk_phy_t *blk; + + for (blk = avl_first(tree); blk; blk = AVL_NEXT(tree, blk)) { + printf("offset:%lu, length:%lu\n", blk->offset, blk->len); + } +} + +int +uzfs_changed_block_cb(spa_t *spa, zilog_t *zillog, const blkptr_t *bp, + const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg) +{ + uint64_t blksz; + struct diff_txg_blk *diff_blk_info = (struct diff_txg_blk *)arg; + + if ((bp == NULL) || (BP_IS_HOLE(bp)) || (zb->zb_object != ZVOL_OBJ) || + (zb->zb_level != 0)) + return (0); + + if (bp->blk_birth > diff_blk_info->end_txg || + bp->blk_birth < diff_blk_info->start_txg) + return (0); + + blksz = BP_GET_LSIZE(bp); + add_to_mblktree(diff_blk_info->tree, zb->zb_blkid * blksz, blksz); + return (0); +} + +static int +zvol_blk_off_cmpr(const void *arg1, const void *arg2) +{ + uzfs_zvol_blk_phy_t *node1 = (uzfs_zvol_blk_phy_t *)arg1; + uzfs_zvol_blk_phy_t *node2 = (uzfs_zvol_blk_phy_t *)arg2; + + return (AVL_CMP(node1->offset, node2->offset)); +} + + +int +uzfs_txg_block_diff(zvol_state_t *zv, uint64_t start_txg, uint64_t end_txg, + avl_tree_t **tree) +{ + int error; + char snapname[ZFS_MAX_DATASET_NAME_LEN]; + struct diff_txg_blk diff_blk; + hrtime_t now; + dsl_pool_t *dp; + dsl_dataset_t *ds_snap; + + now = gethrtime(); + snprintf(snapname, sizeof (snapname), "%s%llu", TXG_DIFF_SNAPNAME, now); + + error = dmu_objset_snapshot_one(zv->zv_name, snapname); + if (error) { + printf("failed to create snapshot for %s\n", zv->zv_name); + return (error); + } + + memset(snapname, 0, sizeof (snapname)); + snprintf(snapname, sizeof (snapname), "%s@%s%llu", zv->zv_name, + TXG_DIFF_SNAPNAME, now); + + error = dsl_pool_hold(snapname, FTAG, &dp); + if (error != 0) + return (error); + + error = dsl_dataset_hold(dp, snapname, FTAG, &ds_snap); + if (error != 0) { + dsl_pool_rele(dp, FTAG); + return (error); + } + + memset(&diff_blk, 0, sizeof (diff_blk)); + + diff_blk.tree = umem_alloc(sizeof (avl_tree_t), UMEM_NOFAIL); + avl_create(diff_blk.tree, zvol_blk_off_cmpr, + sizeof (uzfs_zvol_blk_phy_t), + offsetof(uzfs_zvol_blk_phy_t, uzb_link)); + + diff_blk.start_txg = start_txg; + diff_blk.end_txg = end_txg; + + error = traverse_dataset(ds_snap, start_txg, + TRAVERSE_PRE, uzfs_changed_block_cb, &diff_blk); + + *tree = diff_blk.tree; + + dsl_dataset_rele(ds_snap, FTAG); + dsl_pool_rele(dp, FTAG); + + /* + * TODO: if we failed to destroy snapshot here then + * this should be handled separately from application. + */ + (void) dsl_destroy_snapshot(snapname, B_FALSE); + return (error); +} + +void +uzfs_create_mblktree(void **tree) +{ + avl_tree_t *temp_tree; + + temp_tree = umem_alloc(sizeof (avl_tree_t), UMEM_NOFAIL); + avl_create(temp_tree, zvol_blk_off_cmpr, sizeof (uzfs_zvol_blk_phy_t), + offsetof(uzfs_zvol_blk_phy_t, uzb_link)); + *tree = temp_tree; +} + +void +uzfs_destroy_mblktree(void *tree) +{ + avl_tree_t *temp_tree = tree; + uzfs_zvol_blk_phy_t *node; + void *cookie = NULL; + + while ((node = avl_destroy_nodes(temp_tree, &cookie)) != NULL) { + umem_free(node, sizeof (*node)); + } + + avl_destroy(temp_tree); + umem_free(temp_tree, sizeof (*temp_tree)); +} diff --git a/lib/libzpool/uzfs_zap.c b/lib/libzpool/uzfs_zap.c index 06727fd58802..39313abc43b8 100644 --- a/lib/libzpool/uzfs_zap.c +++ b/lib/libzpool/uzfs_zap.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -50,6 +51,17 @@ uzfs_update_zap_entries(void *zvol, const uzfs_zap_kv_t **array, int err; int i = 0; + /* + * check if key length is greater than MZAP_NAME_LEN. + * key with MZAP_NAME_LEN+ length will convert microzap + * to fatzap. + */ + for (i = 0; i < count; i++) { + kv = array[i]; + if (strlen(kv->key) >= MZAP_NAME_LEN) + return (EINVAL); + } + tx = dmu_tx_create(os); dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); @@ -61,8 +73,8 @@ uzfs_update_zap_entries(void *zvol, const uzfs_zap_kv_t **array, for (i = 0; i < count; i++) { kv = array[i]; - VERIFY0(zap_update(os, ZVOL_ZAP_OBJ, kv->key, 1, kv->size, - kv->value, tx)); + VERIFY0(zap_update(os, ZVOL_ZAP_OBJ, kv->key, kv->size, 1, + &kv->value, tx)); } dmu_tx_commit(tx); @@ -80,8 +92,8 @@ uzfs_read_zap_entry(void *zvol, uzfs_zap_kv_t *entry) objset_t *os = zv->zv_objset; int err; - err = zap_lookup(os, ZVOL_ZAP_OBJ, entry->key, 1, entry->size, - entry->value); + err = zap_lookup(os, ZVOL_ZAP_OBJ, entry->key, entry->size, 1, + &entry->value); if (err) return (SET_ERROR(err)); diff --git a/module/zfs/spa_config.c b/module/zfs/spa_config.c deleted file mode 100644 index 5b792b868455..000000000000 --- a/module/zfs/spa_config.c +++ /dev/null @@ -1,592 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2011, 2015 by Delphix. All rights reserved. - * Copyright 2017 Joyent, Inc. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#ifdef _KERNEL -#include -#include -#endif - -/* - * Pool configuration repository. - * - * Pool configuration is stored as a packed nvlist on the filesystem. By - * default, all pools are stored in /etc/zfs/zpool.cache and loaded on boot - * (when the ZFS module is loaded). Pools can also have the 'cachefile' - * property set that allows them to be stored in an alternate location until - * the control of external software. - * - * For each cache file, we have a single nvlist which holds all the - * configuration information. When the module loads, we read this information - * from /etc/zfs/zpool.cache and populate the SPA namespace. This namespace is - * maintained independently in spa.c. Whenever the namespace is modified, or - * the configuration of a pool is changed, we call spa_config_sync(), which - * walks through all the active pools and writes the configuration to disk. - */ - -static uint64_t spa_config_generation = 1; - -/* - * This can be overridden in userland to preserve an alternate namespace for - * userland pools when doing testing. - */ -char *spa_config_path = ZPOOL_CACHE; -int zfs_autoimport_disable = 1; - -/* - * Called when the module is first loaded, this routine loads the configuration - * file into the SPA namespace. It does not actually open or load the pools; it - * only populates the namespace. - */ -void -spa_config_load(void) -{ - void *buf = NULL; - nvlist_t *nvlist, *child; - nvpair_t *nvpair; - char *pathname; - struct _buf *file; - uint64_t fsize; - -#ifdef _KERNEL - if (zfs_autoimport_disable) - return; -#endif - - /* - * Open the configuration file. - */ - pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); - - (void) snprintf(pathname, MAXPATHLEN, "%s%s", - (rootdir != NULL) ? "./" : "", spa_config_path); - - file = kobj_open_file(pathname); - - kmem_free(pathname, MAXPATHLEN); - - if (file == (struct _buf *)-1) - return; - - if (kobj_get_filesize(file, &fsize) != 0) - goto out; - - buf = kmem_alloc(fsize, KM_SLEEP); - - /* - * Read the nvlist from the file. - */ - if (kobj_read_file(file, buf, fsize, 0) < 0) - goto out; - - /* - * Unpack the nvlist. - */ - if (nvlist_unpack(buf, fsize, &nvlist, KM_SLEEP) != 0) - goto out; - - /* - * Iterate over all elements in the nvlist, creating a new spa_t for - * each one with the specified configuration. - */ - mutex_enter(&spa_namespace_lock); - nvpair = NULL; - while ((nvpair = nvlist_next_nvpair(nvlist, nvpair)) != NULL) { - if (nvpair_type(nvpair) != DATA_TYPE_NVLIST) - continue; - - child = fnvpair_value_nvlist(nvpair); - - if (spa_lookup(nvpair_name(nvpair)) != NULL) - continue; - (void) spa_add(nvpair_name(nvpair), child, NULL); - } - mutex_exit(&spa_namespace_lock); - - nvlist_free(nvlist); - -out: - if (buf != NULL) - kmem_free(buf, fsize); - - kobj_close_file(file); -} - -static int -spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl) -{ - size_t buflen; - char *buf; - vnode_t *vp; - int oflags = FWRITE | FTRUNC | FCREAT | FOFFMAX; - char *temp; - int err; - - /* - * If the nvlist is empty (NULL), then remove the old cachefile. - */ - if (nvl == NULL) { - err = vn_remove(dp->scd_path, UIO_SYSSPACE, RMFILE); - return (err); - } - - /* - * Pack the configuration into a buffer. - */ - buf = fnvlist_pack(nvl, &buflen); - temp = kmem_zalloc(MAXPATHLEN, KM_SLEEP); - -#if defined(__linux__) && defined(_KERNEL) - /* - * Write the configuration to disk. Due to the complexity involved - * in performing a rename from within the kernel the file is truncated - * and overwritten in place. In the event of an error the file is - * unlinked to make sure we always have a consistent view of the data. - */ - err = vn_open(dp->scd_path, UIO_SYSSPACE, oflags, 0644, &vp, 0, 0); - if (err == 0) { - err = vn_rdwr(UIO_WRITE, vp, buf, buflen, 0, - UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, NULL); - if (err == 0) - err = VOP_FSYNC(vp, FSYNC, kcred, NULL); - - (void) VOP_CLOSE(vp, oflags, 1, 0, kcred, NULL); - - if (err) - (void) vn_remove(dp->scd_path, UIO_SYSSPACE, RMFILE); - } -#else - /* - * Write the configuration to disk. We need to do the traditional - * 'write to temporary file, sync, move over original' to make sure we - * always have a consistent view of the data. - */ - (void) snprintf(temp, MAXPATHLEN, "%s.tmp", dp->scd_path); - - err = vn_open(temp, UIO_SYSSPACE, oflags, 0644, &vp, CRCREAT, 0); - if (err == 0) { - err = vn_rdwr(UIO_WRITE, vp, buf, buflen, 0, UIO_SYSSPACE, - 0, RLIM64_INFINITY, kcred, NULL); - if (err == 0) - err = VOP_FSYNC(vp, FSYNC, kcred, NULL); - if (err == 0) - err = vn_rename(temp, dp->scd_path, UIO_SYSSPACE); - (void) VOP_CLOSE(vp, oflags, 1, 0, kcred, NULL); - } - - (void) vn_remove(temp, UIO_SYSSPACE, RMFILE); -#endif - - fnvlist_pack_free(buf, buflen); - kmem_free(temp, MAXPATHLEN); - return (err); -} - -/* - * Synchronize pool configuration to disk. This must be called with the - * namespace lock held. Synchronizing the pool cache is typically done after - * the configuration has been synced to the MOS. This exposes a window where - * the MOS config will have been updated but the cache file has not. If - * the system were to crash at that instant then the cached config may not - * contain the correct information to open the pool and an explicit import - * would be required. - */ -void -spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent) -{ - spa_config_dirent_t *dp, *tdp; - nvlist_t *nvl; - char *pool_name; - boolean_t ccw_failure; - int error = 0; - - ASSERT(MUTEX_HELD(&spa_namespace_lock)); - - if (rootdir == NULL || !(spa_mode_global & FWRITE)) - return; - - /* - * Iterate over all cachefiles for the pool, past or present. When the - * cachefile is changed, the new one is pushed onto this list, allowing - * us to update previous cachefiles that no longer contain this pool. - */ - ccw_failure = B_FALSE; - for (dp = list_head(&target->spa_config_list); dp != NULL; - dp = list_next(&target->spa_config_list, dp)) { - spa_t *spa = NULL; - if (dp->scd_path == NULL) - continue; - - /* - * Iterate over all pools, adding any matching pools to 'nvl'. - */ - nvl = NULL; - while ((spa = spa_next(spa)) != NULL) { - /* - * Skip over our own pool if we're about to remove - * ourselves from the spa namespace or any pool that - * is readonly. Since we cannot guarantee that a - * readonly pool would successfully import upon reboot, - * we don't allow them to be written to the cache file. - */ - if ((spa == target && removing) || - !spa_writeable(spa)) - continue; - - mutex_enter(&spa->spa_props_lock); - tdp = list_head(&spa->spa_config_list); - if (spa->spa_config == NULL || - tdp == NULL || - tdp->scd_path == NULL || - strcmp(tdp->scd_path, dp->scd_path) != 0) { - mutex_exit(&spa->spa_props_lock); - continue; - } - - if (nvl == NULL) - nvl = fnvlist_alloc(); - - if (spa->spa_import_flags & ZFS_IMPORT_TEMP_NAME) - pool_name = fnvlist_lookup_string( - spa->spa_config, ZPOOL_CONFIG_POOL_NAME); - else - pool_name = spa_name(spa); - - fnvlist_add_nvlist(nvl, pool_name, spa->spa_config); - mutex_exit(&spa->spa_props_lock); - } - - error = spa_config_write(dp, nvl); - if (error != 0) - ccw_failure = B_TRUE; - nvlist_free(nvl); - } - - if (ccw_failure) { - /* - * Keep trying so that configuration data is - * written if/when any temporary filesystem - * resource issues are resolved. - */ - if (target->spa_ccw_fail_time == 0) { - zfs_ereport_post(FM_EREPORT_ZFS_CONFIG_CACHE_WRITE, - target, NULL, NULL, 0, 0); - } - target->spa_ccw_fail_time = gethrtime(); - spa_async_request(target, SPA_ASYNC_CONFIG_UPDATE); - } else { - /* - * Do not rate limit future attempts to update - * the config cache. - */ - target->spa_ccw_fail_time = 0; - } - - /* - * Remove any config entries older than the current one. - */ - dp = list_head(&target->spa_config_list); - while ((tdp = list_next(&target->spa_config_list, dp)) != NULL) { - list_remove(&target->spa_config_list, tdp); - if (tdp->scd_path != NULL) - spa_strfree(tdp->scd_path); - kmem_free(tdp, sizeof (spa_config_dirent_t)); - } - - spa_config_generation++; - - if (postsysevent) - spa_event_notify(target, NULL, NULL, ESC_ZFS_CONFIG_SYNC); -} - -/* - * Sigh. Inside a local zone, we don't have access to /etc/zfs/zpool.cache, - * and we don't want to allow the local zone to see all the pools anyway. - * So we have to invent the ZFS_IOC_CONFIG ioctl to grab the configuration - * information for all pool visible within the zone. - */ -nvlist_t * -spa_all_configs(uint64_t *generation) -{ - nvlist_t *pools; - spa_t *spa = NULL; - - if (*generation == spa_config_generation) - return (NULL); - - pools = fnvlist_alloc(); - - mutex_enter(&spa_namespace_lock); - while ((spa = spa_next(spa)) != NULL) { - if (INGLOBALZONE(curproc) || - zone_dataset_visible(spa_name(spa), NULL)) { - mutex_enter(&spa->spa_props_lock); - fnvlist_add_nvlist(pools, spa_name(spa), - spa->spa_config); - mutex_exit(&spa->spa_props_lock); - } - } - *generation = spa_config_generation; - mutex_exit(&spa_namespace_lock); - - return (pools); -} - -void -spa_config_set(spa_t *spa, nvlist_t *config) -{ - mutex_enter(&spa->spa_props_lock); - nvlist_free(spa->spa_config); - spa->spa_config = config; - mutex_exit(&spa->spa_props_lock); -} - -/* - * Generate the pool's configuration based on the current in-core state. - * - * We infer whether to generate a complete config or just one top-level config - * based on whether vd is the root vdev. - */ -nvlist_t * -spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats) -{ - nvlist_t *config, *nvroot; - vdev_t *rvd = spa->spa_root_vdev; - unsigned long hostid = 0; - boolean_t locked = B_FALSE; - uint64_t split_guid; - char *pool_name; - int config_gen_flags = 0; - - if (vd == NULL) { - vd = rvd; - locked = B_TRUE; - spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER); - } - - ASSERT(spa_config_held(spa, SCL_CONFIG | SCL_STATE, RW_READER) == - (SCL_CONFIG | SCL_STATE)); - - /* - * If txg is -1, report the current value of spa->spa_config_txg. - */ - if (txg == -1ULL) - txg = spa->spa_config_txg; - - /* - * Originally, users had to handle spa namespace collisions by either - * exporting the already imported pool or by specifying a new name for - * the pool with a conflicting name. In the case of root pools from - * virtual guests, neither approach to collision resolution is - * reasonable. This is addressed by extending the new name syntax with - * an option to specify that the new name is temporary. When specified, - * ZFS_IMPORT_TEMP_NAME will be set in spa->spa_import_flags to tell us - * to use the previous name, which we do below. - */ - if (spa->spa_import_flags & ZFS_IMPORT_TEMP_NAME) { - VERIFY0(nvlist_lookup_string(spa->spa_config, - ZPOOL_CONFIG_POOL_NAME, &pool_name)); - } else - pool_name = spa_name(spa); - - config = fnvlist_alloc(); - - fnvlist_add_uint64(config, ZPOOL_CONFIG_VERSION, spa_version(spa)); - fnvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, pool_name); - fnvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, spa_state(spa)); - fnvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG, txg); - fnvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID, spa_guid(spa)); - fnvlist_add_uint64(config, ZPOOL_CONFIG_ERRATA, spa->spa_errata); - if (spa->spa_comment != NULL) - fnvlist_add_string(config, ZPOOL_CONFIG_COMMENT, - spa->spa_comment); - - hostid = spa_get_hostid(); - if (hostid != 0) - fnvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID, hostid); - fnvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME, utsname()->nodename); - - if (vd != rvd) { - fnvlist_add_uint64(config, ZPOOL_CONFIG_TOP_GUID, - vd->vdev_top->vdev_guid); - fnvlist_add_uint64(config, ZPOOL_CONFIG_GUID, - vd->vdev_guid); - if (vd->vdev_isspare) - fnvlist_add_uint64(config, - ZPOOL_CONFIG_IS_SPARE, 1ULL); - if (vd->vdev_islog) - fnvlist_add_uint64(config, - ZPOOL_CONFIG_IS_LOG, 1ULL); - vd = vd->vdev_top; /* label contains top config */ - } else { - /* - * Only add the (potentially large) split information - * in the mos config, and not in the vdev labels - */ - if (spa->spa_config_splitting != NULL) - fnvlist_add_nvlist(config, ZPOOL_CONFIG_SPLIT, - spa->spa_config_splitting); - - fnvlist_add_boolean(config, ZPOOL_CONFIG_HAS_PER_VDEV_ZAPS); - - config_gen_flags |= VDEV_CONFIG_MOS; - } - - /* - * Add the top-level config. We even add this on pools which - * don't support holes in the namespace. - */ - vdev_top_config_generate(spa, config); - - /* - * If we're splitting, record the original pool's guid. - */ - if (spa->spa_config_splitting != NULL && - nvlist_lookup_uint64(spa->spa_config_splitting, - ZPOOL_CONFIG_SPLIT_GUID, &split_guid) == 0) { - fnvlist_add_uint64(config, ZPOOL_CONFIG_SPLIT_GUID, split_guid); - } - - nvroot = vdev_config_generate(spa, vd, getstats, config_gen_flags); - fnvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot); - nvlist_free(nvroot); - - /* - * Store what's necessary for reading the MOS in the label. - */ - fnvlist_add_nvlist(config, ZPOOL_CONFIG_FEATURES_FOR_READ, - spa->spa_label_features); - - if (getstats && spa_load_state(spa) == SPA_LOAD_NONE) { - ddt_histogram_t *ddh; - ddt_stat_t *dds; - ddt_object_t *ddo; - - ddh = kmem_zalloc(sizeof (ddt_histogram_t), KM_SLEEP); - ddt_get_dedup_histogram(spa, ddh); - fnvlist_add_uint64_array(config, - ZPOOL_CONFIG_DDT_HISTOGRAM, - (uint64_t *)ddh, sizeof (*ddh) / sizeof (uint64_t)); - kmem_free(ddh, sizeof (ddt_histogram_t)); - - ddo = kmem_zalloc(sizeof (ddt_object_t), KM_SLEEP); - ddt_get_dedup_object_stats(spa, ddo); - fnvlist_add_uint64_array(config, - ZPOOL_CONFIG_DDT_OBJ_STATS, - (uint64_t *)ddo, sizeof (*ddo) / sizeof (uint64_t)); - kmem_free(ddo, sizeof (ddt_object_t)); - - dds = kmem_zalloc(sizeof (ddt_stat_t), KM_SLEEP); - ddt_get_dedup_stats(spa, dds); - fnvlist_add_uint64_array(config, - ZPOOL_CONFIG_DDT_STATS, - (uint64_t *)dds, sizeof (*dds) / sizeof (uint64_t)); - kmem_free(dds, sizeof (ddt_stat_t)); - } - - if (locked) - spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); - - return (config); -} - -/* - * Update all disk labels, generate a fresh config based on the current - * in-core state, and sync the global config cache (do not sync the config - * cache if this is a booting rootpool). - */ -void -spa_config_update(spa_t *spa, int what) -{ - vdev_t *rvd = spa->spa_root_vdev; - uint64_t txg; - int c; - - ASSERT(MUTEX_HELD(&spa_namespace_lock)); - - spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); - txg = spa_last_synced_txg(spa) + 1; - if (what == SPA_CONFIG_UPDATE_POOL) { - vdev_config_dirty(rvd); - } else { - /* - * If we have top-level vdevs that were added but have - * not yet been prepared for allocation, do that now. - * (It's safe now because the config cache is up to date, - * so it will be able to translate the new DVAs.) - * See comments in spa_vdev_add() for full details. - */ - for (c = 0; c < rvd->vdev_children; c++) { - vdev_t *tvd = rvd->vdev_child[c]; - if (tvd->vdev_ms_array == 0) - vdev_metaslab_set_size(tvd); - vdev_expand(tvd, txg); - } - } - spa_config_exit(spa, SCL_ALL, FTAG); - - /* - * Wait for the mosconfig to be regenerated and synced. - */ - txg_wait_synced(spa->spa_dsl_pool, txg); - - /* - * Update the global config cache to reflect the new mosconfig. - */ - if (!spa->spa_is_root) - spa_config_sync(spa, B_FALSE, what != SPA_CONFIG_UPDATE_POOL); - - if (what == SPA_CONFIG_UPDATE_POOL) - spa_config_update(spa, SPA_CONFIG_UPDATE_VDEVS); -} - -#if defined(_KERNEL) && defined(HAVE_SPL) -EXPORT_SYMBOL(spa_config_sync); -EXPORT_SYMBOL(spa_config_load); -EXPORT_SYMBOL(spa_all_configs); -EXPORT_SYMBOL(spa_config_set); -EXPORT_SYMBOL(spa_config_generate); -EXPORT_SYMBOL(spa_config_update); - -module_param(spa_config_path, charp, 0444); -MODULE_PARM_DESC(spa_config_path, "SPA config file (/etc/zfs/zpool.cache)"); - -module_param(zfs_autoimport_disable, int, 0644); -MODULE_PARM_DESC(zfs_autoimport_disable, "Disable pool import at module load"); - -#endif diff --git a/tests/cbtest/script/test_uzfs.sh b/tests/cbtest/script/test_uzfs.sh index ca6054d62079..0cea6076347c 100755 --- a/tests/cbtest/script/test_uzfs.sh +++ b/tests/cbtest/script/test_uzfs.sh @@ -555,7 +555,9 @@ run_uzfs_test() log_must $UZFS_TEST -s -i 8192 -b 65536 -T 2 log_must $UZFS_TEST -l -i 8192 -b 65536 -T 2 log_must $UZFS_TEST -s -l -i 8192 -b 65536 -T 2 - log_must $UZFS_TEST -t 10 -T 0 + log_must $UZFS_TEST -t 10 -T 0 -n 10 + log_must $UZFS_TEST -t 10 -T 3 -n 5 + log_must $UZFS_TEST -t 10 -T 4 -n 5 log_must . $UZFS_TEST_SYNC_SH