diff --git a/cmd/uzfs_test/Makefile.am b/cmd/uzfs_test/Makefile.am index 7444dd52aa04..98c18aff363a 100644 --- a/cmd/uzfs_test/Makefile.am +++ b/cmd/uzfs_test/Makefile.am @@ -14,7 +14,8 @@ sbin_PROGRAMS = uzfs_test uzfs_test_SOURCES = \ uzfs_test.c \ uzfs_test_sync.c \ - uzfs_zvol_zap.c + uzfs_zvol_zap.c \ + uzfs_txg_diff.c uzfs_test_LDADD = \ $(top_builddir)/lib/libnvpair/libnvpair.la \ diff --git a/cmd/uzfs_test/uzfs_test.c b/cmd/uzfs_test/uzfs_test.c index 33fb43fcdb76..61a898bae2e2 100644 --- a/cmd/uzfs_test/uzfs_test.c +++ b/cmd/uzfs_test/uzfs_test.c @@ -42,6 +42,9 @@ uzfs_test_info_t uzfs_tests[] = { { uzfs_zvol_zap_operation, "uzfs zap operation test" }, { replay_fn, "zvol replay test" }, { unit_test_fn, "zvol read/write verification test"}, + { uzfs_txg_diff_verifcation_test, + "test to verify modified blocks between two txg for zvol" }, + { uzfs_txg_diff_tree_test, "txg_diff_tree functionality test" }, }; uint64_t metaverify = 0; @@ -416,14 +419,10 @@ static void process_options(int argc, char **argv) val = nicenumtoull(optarg); break; } + switch (opt) { case 'a': active_size = val; - if (vol_size == 0) - vol_size = active_size; - else - active_size = (active_size < vol_size) - ? (active_size) : (vol_size); break; case 'b': block_size = val; @@ -457,11 +456,6 @@ static void process_options(int argc, char **argv) break; case 'v': vol_size = val; - if (active_size == 0) - active_size = vol_size; - else - active_size = (active_size < vol_size) - ? (active_size) : (vol_size); break; case 'V': verify = val; @@ -482,8 +476,14 @@ static void process_options(int argc, char **argv) usage(0); } } - if (active_size == 0 || vol_size == 0) - active_size = vol_size = 1024*1024*1024ULL; + if (active_size == 0) + active_size = 1024*1024*1024ULL; + + if (vol_size == 0) + vol_size = 1024*1024*1024ULL; + + if (active_size > vol_size) + vol_size = active_size; if (silent == 0) { printf("vol size: %lu active size: %lu create: %d\n", vol_size, diff --git a/cmd/uzfs_test/uzfs_txg_diff.c b/cmd/uzfs_test/uzfs_txg_diff.c new file mode 100644 index 000000000000..a5a209036897 --- /dev/null +++ b/cmd/uzfs_test/uzfs_txg_diff.c @@ -0,0 +1,350 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +extern int total_time_in_sec; +extern void populate_data(char *buf, uint64_t offset, int idx, + uint64_t block_size); +extern uint64_t block_size; + +static int del_from_txg_diff_tree(avl_tree_t *tree, uint64_t b_offset, + uint64_t b_len); +static int uzfs_search_txg_diff_tree(avl_tree_t *tree, uint64_t offset, + uint64_t *len); + +/* + * Delete entry (offset, len) from tree. + * Note : As of now, this API is used in testing code only. To use it in + * library, move this API to library code. + */ +int +del_from_txg_diff_tree(avl_tree_t *tree, uint64_t offset, uint64_t len) +{ + uint64_t new_offset, new_len, b_end, b_offset, b_len; + uint64_t entry_len, entry_offset; + uzfs_zvol_blk_phy_t *entry, *new_entry, *b_entry; + uzfs_zvol_blk_phy_t f_entry; + avl_index_t where; + int err = 0; + + new_offset = offset; + new_len = len; + + f_entry.offset = new_offset; + f_entry.len = new_len; + entry = avl_find(tree, &f_entry, &where); + + // found entry whose offset matches with f_entry's offset + if (entry != NULL) { + // entry's len doesn't match with f_entry's len + if (entry->len < new_len) { + err = -1; + goto done; + } + + /* + * entry's length is not lesser than f_entry. + * If entry's len is greater than f_entry, then + * update entry's offset to (f_entry's end) and len to + * entry's len - f_entry's len. + */ + entry_offset = entry->offset; + entry_len = entry->len; + avl_remove(tree, entry); + umem_free(entry, sizeof (*entry)); + + if (entry_len > new_len) { + new_entry = umem_alloc(sizeof (uzfs_zvol_blk_phy_t), + UMEM_NOFAIL); + new_entry->offset = entry_offset + new_len; + new_entry->len = (entry_len - new_len); + avl_add(tree, new_entry); + } + goto done; + } + + /* + * Search for nearest entry whose offset is lesser than + * f_entry's offset + */ + b_entry = avl_nearest(tree, where, AVL_BEFORE); + if (b_entry) { + b_end = (b_entry->offset + b_entry->len); + + // b_entry ends before f_entry ends + if (b_end < (new_offset + new_len)) { + err = -1; + goto done; + } + + b_offset = b_entry->offset; + b_len = b_entry->len; + avl_remove(tree, b_entry); + + umem_free(b_entry, sizeof (*b_entry)); + + new_entry = umem_alloc(sizeof (uzfs_zvol_blk_phy_t), + UMEM_NOFAIL); + new_entry->offset = b_offset; + new_entry->len = (new_offset - b_offset); + avl_add(tree, new_entry); + + if (b_end > (new_offset + new_len)) { + new_entry = umem_alloc(sizeof (uzfs_zvol_blk_phy_t), + UMEM_NOFAIL); + new_entry->offset = new_offset + new_len; + new_entry->len = (b_end - new_entry->offset); + avl_add(tree, new_entry); + } + goto done; + } else { + err = -1; + goto done; + } + +done: + return (err); +} + +int +uzfs_search_txg_diff_tree(avl_tree_t *tree, uint64_t offset, uint64_t *len) +{ + uzfs_zvol_blk_phy_t tofind; + avl_index_t where; + uzfs_zvol_blk_phy_t *entry; + + tofind.offset = offset; + tofind.len = 0; + + entry = avl_find(tree, &tofind, &where); + if (entry == NULL) + return (0); + + *len = entry->len; + return (1); +} + +void +uzfs_txg_diff_verifcation_test(void *arg) +{ + uzfs_test_info_t *test_info = (uzfs_test_info_t *)arg; + avl_tree_t *modified_block_tree; + uint64_t first_txg, last_txg; + hrtime_t end, now; + uint64_t blk_offset, offset, vol_blocks; + uint64_t io_num = 0; + void *spa, *zvol, *cookie = NULL; + char *buf; + int max_io, count, i = 0; + avl_tree_t *write_io_tree; + avl_index_t where; + uzfs_zvol_blk_phy_t *blk_info, temp_blk_info; + + setup_unit_test(); + unit_test_create_pool_ds(); + open_pool_ds(&spa, &zvol); + + vol_blocks = active_size / block_size; + buf = umem_alloc(block_size, UMEM_NOFAIL); + + uzfs_create_txg_diff_tree((void **)&write_io_tree); + + now = gethrtime(); + end = now + (hrtime_t)(total_time_in_sec * (hrtime_t)(NANOSEC)); + + while (i++ < 5) { + count = 0; + cookie = NULL; + // Here, consider test_iterations as number of ios + max_io = test_iterations; + io_num = 0; + + txg_wait_synced(spa_get_dsl(spa), 0); + first_txg = spa_last_synced_txg(spa); + + while (count++ < max_io) { + where = 0; + blk_offset = uzfs_random(vol_blocks - 16); + /* + * make sure offset is aligned to block size + */ + offset = blk_offset * block_size; + + temp_blk_info.offset = offset; + if (avl_find(write_io_tree, &temp_blk_info, &where)) + continue; + + populate_data(buf, offset, 0, block_size); + + if (uzfs_write_data(zvol, buf, offset, uzfs_random(1) ? + block_size : io_block_size, &io_num)) + printf("IO error at offset: %lu len: %lu\n", + offset, block_size); + + blk_info = umem_alloc(sizeof (uzfs_zvol_blk_phy_t), + UMEM_NOFAIL); + blk_info->offset = offset; + blk_info->len = block_size; + avl_insert(write_io_tree, blk_info, where); + blk_info = NULL; + io_num++; + } + + txg_wait_synced(spa_get_dsl(spa), 0); + last_txg = spa_last_synced_txg(spa); + + uzfs_get_txg_diff_tree(zvol, first_txg, last_txg, + (void **)&modified_block_tree); + + while ((blk_info = avl_destroy_nodes(write_io_tree, + &cookie)) != NULL) { + VERIFY0(del_from_txg_diff_tree(modified_block_tree, + blk_info->offset, blk_info->len)); + umem_free(blk_info, sizeof (uzfs_zvol_blk_phy_t)); + } + + VERIFY0(avl_numnodes(modified_block_tree)); + VERIFY0(avl_numnodes(write_io_tree)); + printf("%s : pass:%d\n", test_info->name, i); + umem_free(modified_block_tree, sizeof (*modified_block_tree)); + modified_block_tree = NULL; + } + + uzfs_close_dataset(zvol); + uzfs_close_pool(spa); + uzfs_destroy_txg_diff_tree(write_io_tree); + umem_free(buf, block_size); +} + +static void +check_tree(avl_tree_t *tree, uint64_t offset, uint64_t len, uint64_t exp_off, + uint64_t exp_len, int exp_ret) +{ + int ret; + uint64_t len1 = 0; + + ret = uzfs_search_txg_diff_tree(tree, exp_off, &len1); + + VERIFY(ret == exp_ret); + + if (ret) + VERIFY(exp_len == len1); +} + +static void +add_and_check_tree(avl_tree_t *tree, uint64_t offset, uint64_t len, + uint64_t exp_off, uint64_t exp_len, int exp_ret) +{ + add_to_txg_diff_tree(tree, offset, len); + check_tree(tree, offset, len, exp_off, exp_len, exp_ret); +} + +static void +delete_and_check_tree(avl_tree_t *tree, uint64_t offset, uint64_t len, + uint64_t exp_off, uint64_t exp_len, int exp_ret) +{ + del_from_txg_diff_tree(tree, offset, len); + check_tree(tree, offset, len, exp_off, exp_len, exp_ret); +} + +void +uzfs_txg_diff_tree_test(void *arg) +{ + uzfs_test_info_t *test_info = (uzfs_test_info_t *)arg; + avl_tree_t *tree; + uint64_t blksz = io_block_size; + + uzfs_create_txg_diff_tree((void **)&tree); + + add_and_check_tree(tree, 100, 50, 100, 50, 1); + add_and_check_tree(tree, 150, 50, 100, 100, 1); + add_and_check_tree(tree, 5 * blksz, blksz, 5 * blksz, blksz, 1); + add_and_check_tree(tree, 4 * blksz, blksz, 4 * blksz, + 2 * blksz, 1); + check_tree(tree, 4 * blksz, blksz, 5 * blksz, blksz, 0); + add_and_check_tree(tree, 2 * blksz, blksz, 2 * blksz, blksz, 1); + check_tree(tree, 2 * blksz, blksz, 4 * blksz, 2 * blksz, 1); + check_tree(tree, 2 * blksz, blksz, 5 * blksz, blksz, 0); + add_and_check_tree(tree, 2 * blksz, blksz, 2 * blksz, blksz, 1); + check_tree(tree, 2 * blksz, blksz, 4 * blksz, 2 * blksz, 1); + check_tree(tree, 2 * blksz, blksz, 5 * blksz, blksz, 0); + add_and_check_tree(tree, blksz, blksz, blksz, 2 * blksz, 1); + check_tree(tree, blksz, blksz, 2 * blksz, blksz, 0); + check_tree(tree, blksz, blksz, 4 * blksz, 2 * blksz, 1); + check_tree(tree, blksz, blksz, 5 * blksz, blksz, 0); + add_and_check_tree(tree, 3 * blksz, blksz, blksz, 5 * blksz, 1); + check_tree(tree, 3 * blksz, blksz, 2 * blksz, blksz, 0); + check_tree(tree, 3 * blksz, blksz, 3 * blksz, blksz, 0); + check_tree(tree, 3 * blksz, blksz, 4 * blksz, 2 * blksz, 0); + check_tree(tree, 3 * blksz, blksz, 5 * blksz, blksz, 0); + add_and_check_tree(tree, blksz, blksz, blksz, 5 * blksz, 1); + check_tree(tree, blksz, blksz, 2 * blksz, blksz, 0); + check_tree(tree, blksz, blksz, 3 * blksz, blksz, 0); + check_tree(tree, blksz, blksz, 4 * blksz, 2 * blksz, 0); + check_tree(tree, blksz, blksz, 5 * blksz, blksz, 0); + add_and_check_tree(tree, 3 * blksz, blksz, blksz, 5 * blksz, 1); + check_tree(tree, 3 * blksz, blksz, 2 * blksz, blksz, 0); + check_tree(tree, 3 * blksz, blksz, 3 * blksz, blksz, 0); + check_tree(tree, 3 * blksz, blksz, 4 * blksz, 2 * blksz, 0); + check_tree(tree, 3 * blksz, blksz, 5 * blksz, blksz, 0); + delete_and_check_tree(tree, blksz, blksz, 2 * blksz, + 4 * blksz, 1); + check_tree(tree, blksz, blksz, blksz, blksz, 0); + check_tree(tree, blksz, blksz, 3 * blksz, blksz, 0); + check_tree(tree, blksz, blksz, 4 * blksz, 2 * blksz, 0); + check_tree(tree, blksz, blksz, 5 * blksz, blksz, 0); + delete_and_check_tree(tree, 2 * blksz, blksz, 3 * blksz, + 3 * blksz, 1); + check_tree(tree, 2 * blksz, blksz, blksz, blksz, 0); + check_tree(tree, 2 * blksz, blksz, 2 * blksz, blksz, 0); + check_tree(tree, 2 * blksz, blksz, 4 * blksz, 2 * blksz, 0); + check_tree(tree, 2 * blksz, blksz, 5 * blksz, blksz, 0); + delete_and_check_tree(tree, 110, 10, 100, 10, 1); + check_tree(tree, 120, 30, 120, 80, 1); + add_and_check_tree(tree, 60, 20, 50, 0, 0); + add_and_check_tree(tree, 60, 20, 60, 20, 1); + delete_and_check_tree(tree, 60, 10, 70, 10, 1); + add_and_check_tree(tree, 80, 20, 70, 40, 1); + add_and_check_tree(tree, 80, 20, 90, 0, 0); + add_and_check_tree(tree, 80, 20, 100, 0, 0); + add_and_check_tree(tree, 200, 50, 200, 0, 0); + add_and_check_tree(tree, 200, 50, 70, 40, 1); + delete_and_check_tree(tree, 230, 20, 200, 0, 0); + add_and_check_tree(tree, 40, 45, 40, 70, 1); + add_and_check_tree(tree, 40, 45, 70, 40, 0); + add_and_check_tree(tree, 150, 50, 120, 110, 1); + add_and_check_tree(tree, 130, 140, 120, 150, 1); + add_and_check_tree(tree, 30, 270, 30, 270, 1); + add_and_check_tree(tree, 30, 270, 40, 0, 0); + add_and_check_tree(tree, 130, 140, 130, 140, 0); + add_and_check_tree(tree, 130, 140, 120, 150, 0); + + uzfs_destroy_txg_diff_tree((void *)tree); + printf("%s pass\n", test_info->name); +} diff --git a/cmd/uzfs_test/uzfs_zvol_zap.c b/cmd/uzfs_test/uzfs_zvol_zap.c index c633a6c4cb61..2bddb4beda38 100644 --- a/cmd/uzfs_test/uzfs_zvol_zap.c +++ b/cmd/uzfs_test/uzfs_zvol_zap.c @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -29,9 +30,8 @@ destroy_zap_entries(uzfs_zap_kv_t **kv_array, uint64_t zap_count) for (i = 0; i < zap_count; i++) { kv = kv_array[i]; - free(kv->key); - free(kv->value); - free(kv); + umem_free(kv->key, strlen(kv->key) + 1); + umem_free(kv, sizeof (*kv)); kv = NULL; } } @@ -41,22 +41,19 @@ fill_up_zap_entries(uzfs_zap_kv_t **array, uint64_t n) { int i = 0; uzfs_zap_kv_t *zap; - uint64_t key_len, value_len; + uint64_t key_len, value; for (i = 0; i < n; i++, zap = NULL) { - zap = malloc(sizeof (uzfs_zap_kv_t)); + zap = umem_alloc(sizeof (uzfs_zap_kv_t), UMEM_NOFAIL); key_len = uzfs_random(32); - value_len = uzfs_random(32); key_len = (key_len < 8) ? 8 : key_len; - value_len = (value_len < 8) ? 8 : value_len; - zap->key = malloc(key_len); - zap->value = malloc(value_len); - zap->size = value_len; + zap->key = umem_alloc(key_len, UMEM_NOFAIL); + zap->value = uzfs_random(ULONG_MAX); + zap->size = sizeof (value); populate_string(zap->key, key_len); - populate_string(zap->value, value_len); array[i] = zap; } } @@ -69,7 +66,7 @@ update_zap_entries(uzfs_zap_kv_t **array, uint64_t n) for (i = 0; i < n; i++) { zap = array[i]; - populate_string(zap->value, zap->size); + zap->value = uzfs_random(ULONG_MAX); } } @@ -78,30 +75,37 @@ void verify_zap_entries(void *zvol, uzfs_zap_kv_t **key_array, uint64_t count) { uzfs_zap_kv_t *kv; - char *value, *temp_value; + uint64_t value; int i = 0, err; - uzfs_zap_kv_t dummy_key; + uzfs_zap_kv_t *dummy_key; for (i = 0; i < count; i++) { kv = key_array[i]; - temp_value = kv->value; - kv->value = calloc(1, kv->size); + value = kv->value; + kv->value = 0; uzfs_read_zap_entry(zvol, kv); - VERIFY0(strncmp(kv->value, temp_value, kv->size)); - free(temp_value); - value = NULL; + VERIFY(kv->value == value); } - dummy_key.key = malloc(20); - dummy_key.value = malloc(20); - dummy_key.size = 20; + dummy_key = umem_alloc(sizeof (*dummy_key), UMEM_NOFAIL); + dummy_key->size = sizeof (dummy_key->value); - dummy_key.key = "DUMMY"; - err = uzfs_read_zap_entry(zvol, &dummy_key); + dummy_key->key = "DUMMY"; + err = uzfs_read_zap_entry(zvol, dummy_key); if (err == 0) { printf("read zap should fail..\n"); exit(1); } + + dummy_key->size = 16; + err = uzfs_update_zap_entries(zvol, + (const uzfs_zap_kv_t **) &dummy_key, 1); + if (err != EINVAL) { + printf("error in zap update\n"); + exit(1); + } + + umem_free(dummy_key, sizeof (*dummy_key)); } void @@ -125,7 +129,8 @@ uzfs_zvol_zap_operation(void *arg) while (i++ < test_iterations) { zap_count = uzfs_random(16) + 1; - kv_array = malloc(zap_count * sizeof (*kv_array)); + kv_array = umem_alloc(zap_count * sizeof (*kv_array), + UMEM_NOFAIL); fill_up_zap_entries(kv_array, zap_count); /* update key/value pair in ZAP entries */ @@ -142,8 +147,18 @@ uzfs_zvol_zap_operation(void *arg) verify_zap_entries(zvol, kv_array, zap_count); + uzfs_zap_kv_t *temp_kv; + temp_kv = kv_array[0]; + umem_free(temp_kv->key, strlen(temp_kv->key) + 1); + temp_kv->key = umem_alloc(MZAP_NAME_LEN + 4, UMEM_NOFAIL); + populate_string(temp_kv->key, MZAP_NAME_LEN + 4); + temp_kv->value = 2; + temp_kv->size = sizeof (temp_kv->value); + VERIFY(uzfs_update_zap_entries(zvol, + (const uzfs_zap_kv_t **) kv_array, zap_count) == EINVAL); + destroy_zap_entries(kv_array, zap_count); - free(kv_array); + umem_free(kv_array, zap_count * sizeof (*kv_array)); kv_array = NULL; printf("%s pass:%d\n", test_info->name, i); diff --git a/include/Makefile.am b/include/Makefile.am index 1f7d3de9eaf7..afad0ebd89dd 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -33,7 +33,8 @@ USER_H = \ $(top_srcdir)/include/rte_pause.h \ $(top_srcdir)/include/uzfs_zap.h \ $(top_srcdir)/include/uzfs.h \ - $(top_srcdir)/include/uzfs_task.h + $(top_srcdir)/include/uzfs_task.h \ + $(top_srcdir)/include/uzfs_mtree.h EXTRA_DIST = $(COMMON_H) $(KERNEL_H) $(USER_H) diff --git a/include/sys/uzfs_zvol.h b/include/sys/uzfs_zvol.h index 6efb65714071..eb27cba13243 100644 --- a/include/sys/uzfs_zvol.h +++ b/include/sys/uzfs_zvol.h @@ -102,5 +102,11 @@ uint64_t get_metadata_len(zvol_state_t *zv, uint64_t offset, uint64_t len); */ extern zil_replay_func_t zvol_replay_vector[TX_MAX_TYPE]; +typedef struct uzfs_zvol_blk_phy { + uint64_t offset; + uint64_t len; + avl_node_t uzb_link; +} uzfs_zvol_blk_phy_t; + #endif #endif diff --git a/include/uzfs_mtree.h b/include/uzfs_mtree.h new file mode 100644 index 000000000000..3762f0a9439e --- /dev/null +++ b/include/uzfs_mtree.h @@ -0,0 +1,31 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +#ifndef _UZFS_MTREE_H +#define _UZFS_MTREE_H + +extern int uzfs_get_txg_diff_tree(void *zv, uint64_t start_txg, + uint64_t end_txg, void **tree); +extern void dump_txg_diff_tree(void *tree); +extern void uzfs_create_txg_diff_tree(void **tree); +extern void uzfs_destroy_txg_diff_tree(void *tree); +extern int add_to_txg_diff_tree(void *tree, uint64_t offset, uint64_t size); +#endif diff --git a/include/uzfs_test.h b/include/uzfs_test.h index 579d20b691db..43f6ba57a7be 100644 --- a/include/uzfs_test.h +++ b/include/uzfs_test.h @@ -33,6 +33,7 @@ extern int write_op; extern int verify_err; extern int verify; extern int test_iterations; +extern uint64_t active_size; extern uint32_t create; extern char *pool; extern char *ds; @@ -61,4 +62,6 @@ typedef struct uzfs_test_info { void uzfs_zvol_zap_operation(void *arg); void unit_test_fn(void *arg); +void uzfs_txg_diff_tree_test(void *arg); +void uzfs_txg_diff_verifcation_test(void *arg); #endif diff --git a/include/uzfs_zap.h b/include/uzfs_zap.h index 3afdd25e92b5..fd84ec40f140 100644 --- a/include/uzfs_zap.h +++ b/include/uzfs_zap.h @@ -24,7 +24,7 @@ typedef struct { char *key; /* zap key to update */ - char *value; /* value to update against zap key */ + uint64_t value; /* value to update against zap key */ size_t size; /* size of value */ } uzfs_zap_kv_t; @@ -33,7 +33,9 @@ typedef struct { extern long long txg_update_interval_time; /* - * Here, allocation/freeing of kv_array needs to be handled by caller function. + * Here, allocation/freeing of kv_array needs to be handled by + * caller function. uzfs_*_zap_entry will handle only microzap + * entries or value with uint64_t entries. */ int uzfs_update_zap_entries(void *zv, const uzfs_zap_kv_t **kv_array, uint64_t n); diff --git a/lib/libzpool/Makefile.am b/lib/libzpool/Makefile.am index 0307661a19b1..33ed5096f6c3 100644 --- a/lib/libzpool/Makefile.am +++ b/lib/libzpool/Makefile.am @@ -21,6 +21,7 @@ USER_C = \ uzfs_io.c \ uzfs_task.c \ uzfs_mgmt.c \ + uzfs_mtree.c \ uzfs_test_mgmt.c \ uzfs_zap.c \ vdev_disk_aio.c diff --git a/lib/libzpool/uzfs_mtree.c b/lib/libzpool/uzfs_mtree.c new file mode 100644 index 000000000000..6cb5d2fbe7eb --- /dev/null +++ b/lib/libzpool/uzfs_mtree.c @@ -0,0 +1,304 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +#include +#include +#include +#include +#include +#include +#include + +#define TXG_DIFF_SNAPNAME "tsnap" + +typedef struct uzfs_txg_diff_cb_args { + avl_tree_t *uzfs_txg_diff_tree; + uint64_t start_txg; + uint64_t end_txg; +} uzfs_txg_diff_cb_args_t; + +/* + * Add entry with (offset, len) to tree. + * Merge new entry with an existing entry if new entry overlaps with + * existing entry. + */ +void +add_to_txg_diff_tree(avl_tree_t *tree, uint64_t boffset, uint64_t blen) +{ + uint64_t new_offset, new_len, b_end, a_end; + uzfs_zvol_blk_phy_t *entry, *new_node, *b_entry, *a_entry; + uzfs_zvol_blk_phy_t tofind; + avl_index_t where; + + new_offset = boffset; + new_len = blen; + +find: + tofind.offset = new_offset; + tofind.len = new_len; + entry = avl_find(tree, &tofind, &where); + + /* + * new_offset is available in tree. + * If entry->len is greater than or equal to new_len then skip adding + * a new_entry else remove entry and search again for new entry. + */ + if (entry != NULL) { + if (entry->len >= new_len) { + return; + } else { + avl_remove(tree, entry); + umem_free(entry, sizeof (*entry)); + goto find; + } + } + + // search for nearest entry whose offset is lesser than new_offset + b_entry = avl_nearest(tree, where, AVL_BEFORE); + if (b_entry) { + b_end = (b_entry->offset + b_entry->len); + + /* + * If new entry doesn't overlap with new_entry then search + * for after and entry whose offset is greater than + * new_entry's offset + */ + if (b_end < new_offset) + goto after; + + /* + * If new_entry's offset and b_entry's end are same, then + * remove b_entry and add new entry whose offset = + * (b_entry's offset) and length = (b_entry's len + + * new entry's len). + */ + if (b_end == new_offset) { + new_len += (b_entry->len); + new_offset = b_entry->offset; + avl_remove(tree, b_entry); + umem_free(b_entry, sizeof (*b_entry)); + goto find; + } + + /* + * If new_entry overlaps with b_entry, then remove b_entry and + * add new entry whose offset = (b_entry's offset) and len = + * ("b_entry's len" + "new_entry's len" - "overlap len"). + */ + if (b_end < (new_offset + new_len)) { + new_len += (new_offset - b_entry->offset); + new_offset = b_entry->offset; + avl_remove(tree, b_entry); + umem_free(b_entry, sizeof (*b_entry)); + goto find; + } + + // new_entry overlaps with b_entry completely + if (b_end >= (new_offset + new_len)) + return; + } + +after: + /* + * search for nearest entry whose offset is greater than new_offset + * Here, If we can not find any entry which overlaps with new_entry then + * we will add new_entry to tree else merge new_entry with nearest + * entry. + */ + a_entry = avl_nearest(tree, where, AVL_AFTER); + + if (a_entry) { + a_end = (a_entry->offset + a_entry->len); + + // new_entry doesn't overlap with a_entry + if ((new_offset + new_len) < a_entry->offset) + goto doadd; + + // new_entry's end and a_entry's offset are same + if ((new_offset + new_len) == a_entry->offset) { + new_len += a_entry->len; + avl_remove(tree, a_entry); + umem_free(a_entry, sizeof (*a_entry)); + goto find; + } + + /* + * new_entry overlaps with a_entry and new_entry's end is + * lesser or equal to a_entry's end + */ + if ((new_offset + new_len) <= (a_end)) { + new_len = (a_entry->len) + + (a_entry->offset - new_offset); + avl_remove(tree, a_entry); + umem_free(a_entry, sizeof (*a_entry)); + goto find; + } + + /* + * new_entry overlaps with a_entry and new_entry's end is + * greater than a_entry's end + */ + if ((new_offset + new_len) > (a_end)) { + avl_remove(tree, a_entry); + umem_free(a_entry, sizeof (*a_entry)); + goto find; + } + } + +doadd: + new_node = umem_alloc(sizeof (uzfs_zvol_blk_phy_t), UMEM_NOFAIL); + new_node->offset = new_offset; + new_node->len = new_len; + avl_insert(tree, new_node, where); +} + +void +dump_txg_diff_tree(avl_tree_t *tree) +{ + uzfs_zvol_blk_phy_t *blk; + + for (blk = avl_first(tree); blk; blk = AVL_NEXT(tree, blk)) { + printf("offset:%lu, length:%lu\n", blk->offset, blk->len); + } +} + +int +uzfs_txg_diff_cb(spa_t *spa, zilog_t *zillog, const blkptr_t *bp, + const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg) +{ + uint64_t blksz; + uzfs_txg_diff_cb_args_t *diff_blk_info = (uzfs_txg_diff_cb_args_t *)arg; + + if ((bp == NULL) || (BP_IS_HOLE(bp)) || (zb->zb_object != ZVOL_OBJ) || + (zb->zb_level != 0)) + return (0); + + if (bp->blk_birth > diff_blk_info->end_txg || + bp->blk_birth < diff_blk_info->start_txg) + return (0); + + blksz = BP_GET_LSIZE(bp); + + add_to_txg_diff_tree(diff_blk_info->uzfs_txg_diff_tree, + zb->zb_blkid * blksz, blksz); + return (0); +} + +static int +uzfs_txg_diff_tree_compare(const void *arg1, const void *arg2) +{ + uzfs_zvol_blk_phy_t *node1 = (uzfs_zvol_blk_phy_t *)arg1; + uzfs_zvol_blk_phy_t *node2 = (uzfs_zvol_blk_phy_t *)arg2; + + return (AVL_CMP(node1->offset, node2->offset)); +} + + +int +uzfs_get_txg_diff_tree(zvol_state_t *zv, uint64_t start_txg, uint64_t end_txg, + avl_tree_t **tree) +{ + int error; + char snapname[ZFS_MAX_DATASET_NAME_LEN]; + uzfs_txg_diff_cb_args_t diff_blk; + hrtime_t now; + dsl_pool_t *dp; + dsl_dataset_t *ds_snap; + + now = gethrtime(); + snprintf(snapname, sizeof (snapname), "%s%llu", TXG_DIFF_SNAPNAME, now); + + error = dmu_objset_snapshot_one(zv->zv_name, snapname); + if (error) { + printf("failed to create snapshot for %s\n", zv->zv_name); + return (error); + } + + memset(snapname, 0, sizeof (snapname)); + snprintf(snapname, sizeof (snapname), "%s@%s%llu", zv->zv_name, + TXG_DIFF_SNAPNAME, now); + + error = dsl_pool_hold(snapname, FTAG, &dp); + if (error != 0) + return (error); + + error = dsl_dataset_hold(dp, snapname, FTAG, &ds_snap); + if (error != 0) { + dsl_pool_rele(dp, FTAG); + return (error); + } + + dsl_dataset_long_hold(ds_snap, FTAG); + + memset(&diff_blk, 0, sizeof (diff_blk)); + + diff_blk.uzfs_txg_diff_tree = umem_alloc(sizeof (avl_tree_t), + UMEM_NOFAIL); + avl_create(diff_blk.uzfs_txg_diff_tree, uzfs_txg_diff_tree_compare, + sizeof (uzfs_zvol_blk_phy_t), + offsetof(uzfs_zvol_blk_phy_t, uzb_link)); + + diff_blk.start_txg = start_txg; + diff_blk.end_txg = end_txg; + + error = traverse_dataset(ds_snap, start_txg, + TRAVERSE_PRE, uzfs_txg_diff_cb, &diff_blk); + + *tree = diff_blk.uzfs_txg_diff_tree; + + dsl_dataset_long_rele(ds_snap, FTAG); + dsl_dataset_rele(ds_snap, FTAG); + dsl_pool_rele(dp, FTAG); + + /* + * TODO: if we failed to destroy snapshot here then + * this should be handled separately from application. + */ + (void) dsl_destroy_snapshot(snapname, B_FALSE); + return (error); +} + +void +uzfs_create_txg_diff_tree(void **tree) +{ + avl_tree_t *temp_tree; + + temp_tree = umem_alloc(sizeof (avl_tree_t), UMEM_NOFAIL); + avl_create(temp_tree, uzfs_txg_diff_tree_compare, + sizeof (uzfs_zvol_blk_phy_t), + offsetof(uzfs_zvol_blk_phy_t, uzb_link)); + *tree = temp_tree; +} + +void +uzfs_destroy_txg_diff_tree(void *tree) +{ + avl_tree_t *temp_tree = tree; + uzfs_zvol_blk_phy_t *node; + void *cookie = NULL; + + while ((node = avl_destroy_nodes(temp_tree, &cookie)) != NULL) { + umem_free(node, sizeof (*node)); + } + + avl_destroy(temp_tree); + umem_free(temp_tree, sizeof (*temp_tree)); +} diff --git a/lib/libzpool/uzfs_zap.c b/lib/libzpool/uzfs_zap.c index 06727fd58802..78539ae0c084 100644 --- a/lib/libzpool/uzfs_zap.c +++ b/lib/libzpool/uzfs_zap.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -50,6 +51,24 @@ uzfs_update_zap_entries(void *zvol, const uzfs_zap_kv_t **array, int err; int i = 0; + /* + * micro zap will upgrade to fat-zap in following cases: + * 1. key length is greater or equal to MZAP_NAME_LEN + * 2. value size is greater than 8 + * To avoid this, update zap-entries only if key length < MZAP_NAME_LEN + * and value_size == 1. + */ + for (i = 0; i < count; i++) { + kv = array[i]; + /* + * checks to avoid fat zap upgrade and value size + */ + if (strlen(kv->key) >= MZAP_NAME_LEN) + return (EINVAL); + if (kv->size != 8) + return (EINVAL); + } + tx = dmu_tx_create(os); dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); @@ -61,8 +80,8 @@ uzfs_update_zap_entries(void *zvol, const uzfs_zap_kv_t **array, for (i = 0; i < count; i++) { kv = array[i]; - VERIFY0(zap_update(os, ZVOL_ZAP_OBJ, kv->key, 1, kv->size, - kv->value, tx)); + VERIFY0(zap_update(os, ZVOL_ZAP_OBJ, kv->key, kv->size, 1, + &kv->value, tx)); } dmu_tx_commit(tx); @@ -80,8 +99,8 @@ uzfs_read_zap_entry(void *zvol, uzfs_zap_kv_t *entry) objset_t *os = zv->zv_objset; int err; - err = zap_lookup(os, ZVOL_ZAP_OBJ, entry->key, 1, entry->size, - entry->value); + err = zap_lookup(os, ZVOL_ZAP_OBJ, entry->key, entry->size, 1, + &entry->value); if (err) return (SET_ERROR(err)); diff --git a/tests/cbtest/script/test_uzfs.sh b/tests/cbtest/script/test_uzfs.sh index c59bb198b2e7..4b322641d371 100755 --- a/tests/cbtest/script/test_uzfs.sh +++ b/tests/cbtest/script/test_uzfs.sh @@ -611,8 +611,16 @@ run_uzfs_test() log_must setup_uzfs_test log 65536 nosync log_must $UZFS_TEST -l -i 8192 -b 65536 -T 2 + K=1024 + M=$(( 1024 * 1024 )) + G=$(( 1024 * 1024 * 1024 )) + log_must setup_uzfs_test log 65536 sync log_must $UZFS_TEST -s -l -i 8192 -b 65536 -T 2 + log_must $UZFS_TEST -t 10 -a $(( 50 * 1024 * 1024 )) -T 3 -n 10000 + log_must $UZFS_TEST -t 10 -a $(( 100 * 1024 * 1024 )) -T 3 -n 10000 + log_must $UZFS_TEST -t 10 -a $(( 1000 * 1024 * 1024 )) -T 3 -n 10000 + log_must $UZFS_TEST -t 10 -T 4 log_must $UZFS_TEST -t 10 -T 0