From 4ca38c6b33edb5f55b632e708d40a2293f140324 Mon Sep 17 00:00:00 2001 From: Peter Fetros Date: Fri, 20 Mar 2020 22:49:03 +0000 Subject: [PATCH 1/4] DAOS-4350 Placement: Extended Placement Unit Test Greatly extended unit tests to cover many different object classes with placement, rebuild, and reintegration tests for each object class. Signed-off-by: Peter Fetros --- src/placement/jump_map.c | 57 +--- src/placement/tests/jump_map_place_obj.c | 373 ++++++++++++++--------- src/placement/tests/place_obj_common.c | 116 ++++++- src/placement/tests/place_obj_common.h | 19 +- src/placement/tests/ring_map_place_obj.c | 14 +- 5 files changed, 374 insertions(+), 205 deletions(-) diff --git a/src/placement/jump_map.c b/src/placement/jump_map.c index c72e623317a..9ece1a2dad8 100644 --- a/src/placement/jump_map.c +++ b/src/placement/jump_map.c @@ -336,22 +336,19 @@ uint32_t count_available_spares(struct pl_jump_map *jmap, struct pl_obj_layout *layout, uint32_t failed_in_layout) { - uint32_t num_failed; - uint32_t spares_left; + uint32_t unusable_tgts; + uint32_t num_targets; - spares_left = jmap->jmp_domain_nr; - num_failed = pool_map_get_failed_cnt(jmap->jmp_map.pl_poolmap, - jmap->min_redundant_dom); + num_targets = pool_map_find_target(jmap->jmp_map.pl_poolmap, + PO_COMP_ID_ALL, NULL); - if (spares_left + failed_in_layout < (num_failed + layout->ol_nr)) + /* we might not have any valid targets left at all */ + unusable_tgts = layout->ol_nr; + + if (unusable_tgts >= num_targets || layout->ol_grp_size == 1) return 0; - /* Add back the ones already counted as failed in the layout - * Or we would double count them. - */ - spares_left = spares_left - (num_failed + layout->ol_nr); - spares_left += failed_in_layout; - return spares_left; + return num_targets - unusable_tgts; } /** @@ -422,9 +419,7 @@ obj_remap_shards(struct pl_jump_map *jmap, struct daos_obj_md *md, get_target(root, &spare_tgt, crc(key, rebuild_key), dom_used, tgts_used, layout, shard_id); - - if (pool_target_unavail(spare_tgt, for_reint) == false) - spares_left--; + spares_left--; } determine_valid_spares(spare_tgt, md, spare_avail, ¤t, @@ -515,12 +510,10 @@ get_object_layout(struct pl_jump_map *jmap, struct pl_obj_layout *layout, { struct pool_target *target; struct pool_domain *root; - struct pool_domain *min_redundant_dom; daos_obj_id_t oid; uint8_t *dom_used; uint8_t *tgts_used; uint32_t dom_used_length; - uint32_t doms_left; uint64_t key; uint32_t fail_tgt_cnt; int i, j, k, rc; @@ -533,17 +526,8 @@ get_object_layout(struct pl_jump_map *jmap, struct pl_obj_layout *layout, fail_tgt_cnt = 0; oid = md->omd_id; key = oid.lo; - doms_left = jmap->jmp_domain_nr; target = NULL; - rc = pool_map_find_domain(jmap->jmp_map.pl_poolmap, - jmap->min_redundant_dom, - PO_COMP_ID_ALL, &min_redundant_dom); - if (rc == 0) { - D_ERROR("Could not find node type in pool map."); - return -DER_NONEXIST; - } - rc = pool_map_find_domain(jmap->jmp_map.pl_poolmap, PO_COMP_TP_ROOT, PO_COMP_ID_ALL, &root); if (rc == 0) { @@ -597,20 +581,8 @@ get_object_layout(struct pl_jump_map *jmap, struct pl_obj_layout *layout, k = 1; } - for (i = 0; i < jmop->jmop_grp_nr; i++) { - if (doms_left < jmop->jmop_grp_size) { - uint32_t start_dom; - uint32_t end_dom; - - doms_left = jmap->jmp_domain_nr; - start_dom = min_redundant_dom - root; - end_dom = start_dom + (doms_left - 1); - - clrbit_range(dom_used, start_dom, end_dom); - } - for (; j < jmop->jmop_grp_size; j++, k++) { uint32_t tgt_id; uint32_t fseq; @@ -633,13 +605,12 @@ get_object_layout(struct pl_jump_map *jmap, struct pl_obj_layout *layout, if (rc) D_GOTO(out, rc); } - doms_left--; } j = 0; } - rc = obj_remap_shards(jmap, md, layout, jmop, remap_list, for_reint, - tgts_used, dom_used, fail_tgt_cnt); + rc = obj_remap_shards(jmap, md, layout, jmop, remap_list, + for_reint, tgts_used, dom_used, fail_tgt_cnt); out: if (rc) { D_ERROR("jump_map_obj_layout_fill failed, rc "DF_RC"\n", @@ -971,8 +942,8 @@ jump_map_obj_find_reint(struct pl_map *map, struct daos_obj_md *md, } rc = remap_list_fill(map, md, shard_md, reint_ver, tgt_rank, shard_id, - array_size, myrank, &idx, layout, &reint_list); - + array_size, myrank, &idx, reint_layout, + &reint_list); out: remap_list_free_all(&reint_list); remap_list_free_all(&remap_list); diff --git a/src/placement/tests/jump_map_place_obj.c b/src/placement/tests/jump_map_place_obj.c index 332536808f3..6d83e223e42 100644 --- a/src/placement/tests/jump_map_place_obj.c +++ b/src/placement/tests/jump_map_place_obj.c @@ -24,6 +24,7 @@ #include #include +#include "daos_api.h" #include #include "place_obj_common.h" @@ -31,168 +32,266 @@ #define NODE_PER_DOM 1 #define VOS_PER_TARGET 4 #define SPARE_MAX_NUM (DOM_NR * 3) - #define COMPONENT_NR (DOM_NR + DOM_NR * NODE_PER_DOM + \ DOM_NR * NODE_PER_DOM * VOS_PER_TARGET) +#define NUM_TARGETS (DOM_NR * NODE_PER_DOM * VOS_PER_TARGET) + +#define TEST_PER_OC 1000 static bool pl_debug_msg; -int -main(int argc, char **argv) +void +placement_object_class(daos_oclass_id_t cid) { - int i; struct pool_map *po_map; - struct pl_obj_layout *lo_1; - struct pl_obj_layout *lo_2; - struct pl_obj_layout *lo_3; struct pl_map *pl_map; - uuid_t pl_uuid; - daos_obj_id_t oid; - uint32_t spare_tgt_candidate[SPARE_MAX_NUM]; - uint32_t spare_tgt_ranks[SPARE_MAX_NUM]; - uint32_t shard_ids[SPARE_MAX_NUM]; - uint32_t failed_tgts[SPARE_MAX_NUM]; - uint32_t reint_tgts[SPARE_MAX_NUM]; - static uint32_t po_ver; - unsigned int spare_cnt; - int rc; + struct pl_obj_layout *layout; + daos_obj_id_t oid; + int test_num; - po_ver = 1; - rc = daos_debug_init(NULL); - if (rc != 0) - return rc; + gen_pool_and_placement_map(DOM_NR, NODE_PER_DOM, + VOS_PER_TARGET, PL_TYPE_JUMP_MAP, + &po_map, &pl_map); + D_ASSERT(po_map != NULL); + D_ASSERT(pl_map != NULL); - rc = pl_init(); - if (rc != 0) { - daos_debug_fini(); - return rc; + srand(time(NULL)); + oid.hi = 5; + + for (test_num = 0; test_num < TEST_PER_OC; ++test_num) { + oid.lo = rand(); + daos_obj_generate_id(&oid, 0, cid, 0); + + plt_obj_place(oid, &layout, pl_map, false); + plt_obj_layout_check(layout, COMPONENT_NR, 0); + + pl_obj_layout_free(layout); } + free_pool_and_placement_map(po_map, pl_map); + D_PRINT("\tPlacement: OK\n"); +} + +void +rebuild_object_class(daos_oclass_id_t cid) +{ + struct pool_map *po_map; + struct pl_map *pl_map; + uint32_t spare_tgt_ranks[SPARE_MAX_NUM]; + uint32_t shard_ids[SPARE_MAX_NUM]; + daos_obj_id_t oid; + uuid_t pl_uuid; + struct daos_obj_md *md_arr; + struct daos_obj_md md = { 0 }; + struct pl_obj_layout *layout; + uint32_t po_ver; + int test_num; + int num_new_spares; + int fail_tgt; + int rc, i; + + uuid_generate(pl_uuid); + srand(time(NULL)); + oid.hi = 5; + po_ver = 1; + + D_ALLOC_ARRAY(md_arr, TEST_PER_OC); + D_ASSERT(md_arr != NULL); + gen_pool_and_placement_map(DOM_NR, NODE_PER_DOM, VOS_PER_TARGET, PL_TYPE_JUMP_MAP, &po_map, &pl_map); D_ASSERT(po_map != NULL); D_ASSERT(pl_map != NULL); - pool_map_print(po_map); - pl_map_print(pl_map); + + for (i = 0; i < TEST_PER_OC; ++i) { + oid.lo = rand(); + daos_obj_generate_id(&oid, 0, cid, 0); + dc_obj_fetch_md(oid, &md); + md.omd_ver = po_ver; + md_arr[i] = md; + } + + for (fail_tgt = 0; fail_tgt < NUM_TARGETS; ++fail_tgt) { + + /* Fail target and update the pool map */ + plt_fail_tgt(fail_tgt, &po_ver, po_map, pl_debug_msg); + pl_map_update(pl_uuid, po_map, false, PL_TYPE_JUMP_MAP); + pl_map = pl_map_find(pl_uuid, oid); + + for (test_num = 0; test_num < TEST_PER_OC; ++test_num) { + md_arr[test_num].omd_ver = po_ver; + + num_new_spares = pl_obj_find_rebuild(pl_map, + &md_arr[test_num], NULL, po_ver, + spare_tgt_ranks, shard_ids, + SPARE_MAX_NUM, -1); + + D_ASSERT(0 <= num_new_spares && num_new_spares < 2); + } + + + plt_fail_tgt_out(fail_tgt, &po_ver, po_map, pl_debug_msg); + pl_map_update(pl_uuid, po_map, false, PL_TYPE_JUMP_MAP); + pl_map = pl_map_find(pl_uuid, oid); + + for (test_num = 0; test_num < TEST_PER_OC; ++test_num) { + md_arr[test_num].omd_ver = po_ver; + + rc = pl_obj_place(pl_map, &md_arr[test_num], NULL, + &layout); + D_ASSERT(rc == 0); + + plt_obj_layout_check(layout, COMPONENT_NR, + layout->ol_nr); + pl_obj_layout_free(layout); + } + + } + + free_pool_and_placement_map(po_map, pl_map); + D_PRINT("\tRebuild: OK\n"); +} + +void +reint_object_class(daos_oclass_id_t cid) +{ + struct pool_map *po_map; + struct pl_map *pl_map; + uint32_t spare_tgt_ranks[SPARE_MAX_NUM]; + uint32_t shard_ids[SPARE_MAX_NUM]; + daos_obj_id_t oid; + uuid_t pl_uuid; + struct daos_obj_md *md_arr; + struct daos_obj_md md = { 0 }; + struct pl_obj_layout **layout; + struct pl_obj_layout *temp_layout; + uint32_t po_ver; + int test_num; + int num_reint; + int fail_tgt; + int rc, i; uuid_generate(pl_uuid); srand(time(NULL)); - oid.lo = rand(); oid.hi = 5; + po_ver = 1; + + D_ALLOC_ARRAY(md_arr, TEST_PER_OC); + D_ASSERT(md_arr != NULL); + D_ALLOC_ARRAY(layout, TEST_PER_OC); + D_ASSERT(layout != NULL); + + gen_pool_and_placement_map(DOM_NR, NODE_PER_DOM, + VOS_PER_TARGET, PL_TYPE_JUMP_MAP, + &po_map, &pl_map); + D_ASSERT(po_map != NULL); + D_ASSERT(pl_map != NULL); - /* initial placement when all nodes alive */ - daos_obj_generate_id(&oid, 0, OC_RP_4G2, 0); - D_PRINT("\ntest initial placement when no failed shard ...\n"); - plt_obj_place(oid, &lo_1, pl_map); - plt_obj_layout_check(lo_1, COMPONENT_NR); - - /* test plt_obj_place when some/all shards failed */ - D_PRINT("\ntest to fail all shards and new placement ...\n"); - for (i = 0; i < SPARE_MAX_NUM && i < lo_1->ol_nr; i++) - plt_fail_tgt(lo_1->ol_shards[i].po_target, &po_ver, po_map, - pl_debug_msg); - plt_obj_place(oid, &lo_2, pl_map); - plt_obj_layout_check(lo_2, COMPONENT_NR); - D_ASSERT(!pt_obj_layout_match(lo_1, lo_2, DOM_NR)); - D_PRINT("spare target candidate:"); - for (i = 0; i < SPARE_MAX_NUM && i < lo_1->ol_nr; i++) { - spare_tgt_candidate[i] = lo_2->ol_shards[i].po_target; - D_PRINT(" %d", spare_tgt_candidate[i]); + for (i = 0; i < TEST_PER_OC; ++i) { + oid.lo = rand(); + daos_obj_generate_id(&oid, 0, cid, 0); + dc_obj_fetch_md(oid, &md); + md.omd_ver = po_ver; + md_arr[i] = md; } - D_PRINT("\n"); - - D_PRINT("\ntest to add back all failed shards and new placement ...\n"); - for (i = 0; i < SPARE_MAX_NUM && i < lo_1->ol_nr; i++) - plt_add_tgt(lo_1->ol_shards[i].po_target, &po_ver, po_map, - pl_debug_msg); - plt_obj_place(oid, &lo_3, pl_map); - plt_obj_layout_check(lo_3, COMPONENT_NR); - D_ASSERT(pt_obj_layout_match(lo_1, lo_3, DOM_NR)); - - /* test pl_obj_find_rebuild */ - D_PRINT("\ntest pl_obj_find_rebuild to get correct spare tagets ...\n"); - failed_tgts[0] = lo_3->ol_shards[0].po_target; - failed_tgts[1] = lo_3->ol_shards[1].po_target; - D_PRINT("failed target %d[0], %d[1]\n, expected %d[0], %d[1]\n", - failed_tgts[0], failed_tgts[1], spare_tgt_candidate[0], - spare_tgt_candidate[1]); - plt_spare_tgts_get(pl_uuid, oid, failed_tgts, 2, spare_tgt_ranks, - pl_debug_msg, shard_ids, &spare_cnt, &po_ver, - PL_TYPE_JUMP_MAP, SPARE_MAX_NUM, po_map, pl_map); - plt_obj_rebuild_unique_check(shard_ids, spare_cnt, COMPONENT_NR); - D_ASSERT(spare_cnt == 2); - D_ASSERT(spare_tgt_ranks[0] == spare_tgt_candidate[0]); - D_ASSERT(spare_tgt_ranks[1] == spare_tgt_candidate[1]); - - /* test pl_obj_find_reint */ - D_PRINT("\ntest pl_obj_find_reint to get correct reintegration " - "tagets ...\n"); - failed_tgts[0] = lo_3->ol_shards[0].po_target; - failed_tgts[1] = lo_3->ol_shards[1].po_target; - reint_tgts[0] = lo_3->ol_shards[0].po_target; - plt_reint_tgts_get(pl_uuid, oid, failed_tgts, 2, reint_tgts, 1, - spare_tgt_ranks, shard_ids, &spare_cnt, PL_TYPE_JUMP_MAP, - SPARE_MAX_NUM, po_map, pl_map, &po_ver, pl_debug_msg); - D_PRINT("reintegrated target %d. expected target %d\n", - reint_tgts[0], lo_3->ol_shards[0].po_target); - - D_ASSERT(spare_cnt == 1); - D_ASSERT(shard_ids[0] == 0); - D_ASSERT(spare_tgt_ranks[0] == lo_3->ol_shards[0].po_target); - - /* fail the to-be-spare target and select correct next spare */ - failed_tgts[0] = lo_3->ol_shards[0].po_target; - failed_tgts[1] = lo_3->ol_shards[1].po_target; - failed_tgts[2] = spare_tgt_candidate[0]; - D_PRINT("\nfailed targets %d[1] %d %d[0], expected spare %d[1]\n", - failed_tgts[0], failed_tgts[1], failed_tgts[2], - spare_tgt_candidate[1]); - plt_spare_tgts_get(pl_uuid, oid, failed_tgts, 3, spare_tgt_ranks, - pl_debug_msg, shard_ids, &spare_cnt, &po_ver, - PL_TYPE_JUMP_MAP, SPARE_MAX_NUM, po_map, pl_map); - plt_obj_rebuild_unique_check(shard_ids, spare_cnt, COMPONENT_NR); - D_ASSERT(spare_cnt == 2); - D_ASSERT(shard_ids[0] == 1); - D_ASSERT(shard_ids[1] == 0); - D_ASSERT(spare_tgt_ranks[0] == spare_tgt_candidate[1]); - - /* test pl_obj_find_reint */ - D_PRINT("\ntest pl_obj_find_reint to get correct reintregation " - "tagets ...\n"); - failed_tgts[0] = lo_3->ol_shards[0].po_target; - failed_tgts[1] = spare_tgt_candidate[0]; - failed_tgts[2] = lo_3->ol_shards[1].po_target; - reint_tgts[0] = lo_3->ol_shards[0].po_target; - reint_tgts[1] = spare_tgt_candidate[0]; - plt_reint_tgts_get(pl_uuid, oid, failed_tgts, 3, reint_tgts, 2, - spare_tgt_ranks, shard_ids, &spare_cnt, PL_TYPE_JUMP_MAP, - SPARE_MAX_NUM, po_map, pl_map, &po_ver, pl_debug_msg); - D_PRINT("\n"); - D_ASSERT(spare_cnt >= 1); - D_ASSERT(shard_ids[spare_cnt-1] == 0); - D_ASSERT(spare_tgt_ranks[spare_cnt-1] == lo_3->ol_shards[0].po_target); - - failed_tgts[0] = spare_tgt_candidate[0]; - failed_tgts[1] = spare_tgt_candidate[1]; - failed_tgts[2] = lo_3->ol_shards[3].po_target; - failed_tgts[3] = lo_3->ol_shards[0].po_target; - failed_tgts[4] = lo_3->ol_shards[1].po_target; - D_PRINT("\nfailed targets %d %d %d[3] %d[0] %d[1]\n", - failed_tgts[0], failed_tgts[1], failed_tgts[2], failed_tgts[3], - failed_tgts[4]); - plt_spare_tgts_get(pl_uuid, oid, failed_tgts, 5, spare_tgt_ranks, - pl_debug_msg, shard_ids, &spare_cnt, &po_ver, - PL_TYPE_JUMP_MAP, SPARE_MAX_NUM, po_map, pl_map); - plt_obj_rebuild_unique_check(shard_ids, spare_cnt, COMPONENT_NR); - D_ASSERT(spare_cnt == 3); - - pl_obj_layout_free(lo_1); - pl_obj_layout_free(lo_2); - pl_obj_layout_free(lo_3); + + /* Generate layouts for later comparison*/ + for (test_num = 0; test_num < TEST_PER_OC; ++test_num) { + md_arr[test_num].omd_ver = po_ver; + + rc = pl_obj_place(pl_map, &md_arr[test_num], NULL, + &layout[test_num]); + D_ASSERT(rc == 0); + plt_obj_layout_check(layout[test_num], COMPONENT_NR, 0); + } + + /* fail all the targets */ + for (fail_tgt = 0; fail_tgt < NUM_TARGETS; ++fail_tgt) { + + plt_fail_tgt(fail_tgt, &po_ver, po_map, pl_debug_msg); + pl_map_update(pl_uuid, po_map, false, PL_TYPE_JUMP_MAP); + + plt_fail_tgt_out(fail_tgt, &po_ver, po_map, pl_debug_msg); + pl_map_update(pl_uuid, po_map, false, PL_TYPE_JUMP_MAP); + + } + + for (fail_tgt = 0; fail_tgt < NUM_TARGETS; ++fail_tgt) { + plt_reint_tgt(fail_tgt, &po_ver, po_map, pl_debug_msg); + pl_map_update(pl_uuid, po_map, false, PL_TYPE_JUMP_MAP); + pl_map = pl_map_find(pl_uuid, oid); + + for (test_num = 0; test_num < TEST_PER_OC; ++test_num) { + rc = pl_obj_place(pl_map, &md_arr[test_num], NULL, + &temp_layout); + D_ASSERT(rc == 0); + + num_reint = pl_obj_find_reint(pl_map, &md_arr[test_num], + NULL, po_ver, spare_tgt_ranks, + shard_ids, SPARE_MAX_NUM, -1); + + reint_check(layout[test_num], temp_layout, + spare_tgt_ranks, shard_ids, num_reint, + fail_tgt); + } + + plt_reint_tgt_up(fail_tgt, &po_ver, po_map, pl_debug_msg); + pl_map_update(pl_uuid, po_map, false, PL_TYPE_JUMP_MAP); + } + free_pool_and_placement_map(po_map, pl_map); + D_PRINT("\tReintegration: OK\n"); +} + +int +main(int argc, char **argv) +{ + struct daos_oclass_attr *obj_attr; + daos_obj_id_t oid; + daos_oclass_id_t *test_classes; + uint32_t num_test_oc; + char oclass_name[50]; + int oc_index; + int rc; + + rc = daos_debug_init(NULL); + if (rc != 0) + return rc; + + rc = pl_init(); + if (rc != 0) { + daos_debug_fini(); + return rc; + } + + num_test_oc = getObjectClasses(&test_classes); + + for (oc_index = 0; oc_index < num_test_oc; ++oc_index) { + + daos_oclass_id2name(test_classes[oc_index], oclass_name); + daos_obj_generate_id(&oid, 0, test_classes[oc_index], 0); + obj_attr = daos_oclass_attr_find(oid); + + /* Skip Erasure Codded objects for now. */ + if (obj_attr->ca_resil == DAOS_RES_EC) { + D_PRINT("Skipping erasure coded class: %s\n", + oclass_name); + continue; + } + + D_PRINT("Running oclass test: %s\n", oclass_name); + + placement_object_class(test_classes[oc_index]); + rebuild_object_class(test_classes[oc_index]); + reint_object_class(test_classes[oc_index]); + + } + + D_FREE(test_classes); + D_PRINT("all tests passed!\n"); + daos_debug_fini(); - D_PRINT("\nall tests passed!\n"); return 0; } diff --git a/src/placement/tests/place_obj_common.c b/src/placement/tests/place_obj_common.c index 8c49d4a31cb..8fdb443467d 100644 --- a/src/placement/tests/place_obj_common.c +++ b/src/placement/tests/place_obj_common.c @@ -26,10 +26,11 @@ #include #include #include "place_obj_common.h" +#include void plt_obj_place(daos_obj_id_t oid, struct pl_obj_layout **layout, - struct pl_map *pl_map) + struct pl_map *pl_map, bool print_layout) { struct daos_obj_md md; int i; @@ -39,19 +40,21 @@ plt_obj_place(daos_obj_id_t oid, struct pl_obj_layout **layout, md.omd_id = oid; md.omd_ver = 1; - D_PRINT("plt_obj_place\n"); rc = pl_obj_place(pl_map, &md, NULL, layout); D_ASSERT(rc == 0); - D_PRINT("Layout of object "DF_OID"\n", DP_OID(oid)); - for (i = 0; i < (*layout)->ol_nr; i++) - D_PRINT("%d ", (*layout)->ol_shards[i].po_target); + if (print_layout) { + D_PRINT("Layout of object "DF_OID"\n", DP_OID(oid)); + for (i = 0; i < (*layout)->ol_nr; i++) + printf("%d ", (*layout)->ol_shards[i].po_target); - D_PRINT("\n"); + printf("\n"); + } } void -plt_obj_layout_check(struct pl_obj_layout *layout, uint32_t pool_size) +plt_obj_layout_check(struct pl_obj_layout *layout, uint32_t pool_size, + int num_allowed_failures) { int i; int target_num; @@ -63,14 +66,49 @@ plt_obj_layout_check(struct pl_obj_layout *layout, uint32_t pool_size) for (i = 0; i < layout->ol_nr; i++) { target_num = layout->ol_shards[i].po_target; - D_ASSERT(target_num != -1); - D_ASSERT(target_set[target_num] != 1); - target_set[target_num] = 1; - } + if (target_num == -1) + num_allowed_failures--; + D_ASSERT(num_allowed_failures >= 0); + if (target_num != -1) { + D_ASSERT(target_set[target_num] != 1); + target_set[target_num] = 1; + } + } D_FREE(target_set); } +void +reint_check(struct pl_obj_layout *layout, struct pl_obj_layout *temp_layout, + uint32_t *spare_tgt_ranks, uint32_t *shard_ids, int num_reint, + uint32_t curr_fail_tgt) +{ + int i; + uint32_t original_target; + uint32_t reint_target; + + D_ASSERT(0 <= num_reint && num_reint < 2); + + /* can't rebuild non replicated date */ + if (temp_layout->ol_grp_size == 1) { + D_ASSERT(num_reint == 0); + if (layout->ol_shards[0].po_target == curr_fail_tgt) + D_ASSERT(temp_layout->ol_shards[0].po_target == -1); + return; + } + + for (i = 0; i < temp_layout->ol_nr; ++i) { + original_target = layout->ol_shards[i].po_target; + reint_target = temp_layout->ol_shards[i].po_target; + + if (original_target == curr_fail_tgt) { + D_ASSERT(num_reint == 1); + D_ASSERT(original_target == spare_tgt_ranks[0]); + D_ASSERT(reint_target != original_target); + } + } +} + void plt_obj_rebuild_unique_check(uint32_t *shard_ids, uint32_t num_shards, uint32_t pool_size) @@ -143,6 +181,7 @@ plt_set_tgt_status(uint32_t id, int status, uint32_t ver, id, target->ta_comp.co_rank, str, ver); target->ta_comp.co_status = status; target->ta_comp.co_fseq = ver; + pool_map_update_failed_cnt(po_map); rc = pool_map_set_version(po_map, ver); D_ASSERT(rc == 0); } @@ -155,6 +194,15 @@ plt_fail_tgt(uint32_t id, uint32_t *po_ver, struct pool_map *po_map, plt_set_tgt_status(id, PO_COMP_ST_DOWN, *po_ver, po_map, pl_debug_msg); } +void +plt_fail_tgt_out(uint32_t id, uint32_t *po_ver, struct pool_map *po_map, + bool pl_debug_msg) +{ + (*po_ver)++; + plt_set_tgt_status(id, PO_COMP_ST_DOWNOUT, *po_ver, po_map, + pl_debug_msg); +} + void plt_reint_tgt(uint32_t id, uint32_t *po_ver, struct pool_map *po_map, bool pl_debug_msg) @@ -164,7 +212,7 @@ plt_reint_tgt(uint32_t id, uint32_t *po_ver, struct pool_map *po_map, } void -plt_add_tgt(uint32_t id, uint32_t *po_ver, struct pool_map *po_map, +plt_reint_tgt_up(uint32_t id, uint32_t *po_ver, struct pool_map *po_map, bool pl_debug_msg) { (*po_ver)++; @@ -201,7 +249,7 @@ plt_spare_tgts_get(uuid_t pl_uuid, daos_obj_id_t oid, uint32_t *failed_tgts, pl_map_decref(pl_map); for (i = 0; i < failed_cnt; i++) - plt_add_tgt(failed_tgts[i], po_ver, po_map, pl_debug_msg); + plt_reint_tgt_up(failed_tgts[i], po_ver, po_map, pl_debug_msg); } void @@ -322,8 +370,46 @@ plt_reint_tgts_get(uuid_t pl_uuid, daos_obj_id_t oid, uint32_t *failed_tgts, pl_map_decref(pl_map); for (i = 0; i < reint_cnt; i++) - plt_add_tgt(reint_tgts[i], po_ver, po_map, pl_debug_msg); + plt_reint_tgt_up(reint_tgts[i], po_ver, po_map, pl_debug_msg); for (i = 0; i < failed_cnt; i++) - plt_add_tgt(failed_tgts[i], po_ver, po_map, pl_debug_msg); + plt_reint_tgt_up(failed_tgts[i], po_ver, po_map, pl_debug_msg); +} + +int +getObjectClasses(daos_oclass_id_t **oclass_id_pp) +{ + const uint32_t str_size = 2560; + char oclass_names[str_size]; + char oclass[64]; + daos_oclass_id_t *oclass_id; + uint32_t length = 0; + uint32_t num_oclass = 0; + uint32_t oclass_str_index = 0; + uint32_t i, oclass_index; + + length = daos_oclass_names_list(str_size, oclass_names); + + for (i = 0; i < length; ++i) { + if (oclass_names[i] == ',') + num_oclass++; + } + + D_ALLOC_ARRAY(*oclass_id_pp, num_oclass); + + for (i = 0, oclass_index = 0; i < length; ++i) { + if (oclass_names[i] == ',') { + oclass_id = &(*oclass_id_pp)[oclass_index]; + oclass[oclass_str_index] = 0; + *oclass_id = daos_oclass_name2id(oclass); + + oclass_index++; + oclass_str_index = 0; + } else if (oclass_names[i] != ' ') { + oclass[oclass_str_index] = oclass_names[i]; + oclass_str_index++; + } + } + + return num_oclass; } diff --git a/src/placement/tests/place_obj_common.h b/src/placement/tests/place_obj_common.h index 851ee22b67c..e9045e5706e 100644 --- a/src/placement/tests/place_obj_common.h +++ b/src/placement/tests/place_obj_common.h @@ -31,10 +31,16 @@ void plt_obj_place(daos_obj_id_t oid, struct pl_obj_layout **layout, - struct pl_map *pl_map); + struct pl_map *pl_map, bool print_layout); void -plt_obj_layout_check(struct pl_obj_layout *layout, uint32_t pool_size); +plt_obj_layout_check(struct pl_obj_layout *layout, uint32_t pool_size, + int num_allowed_failures); + +void +reint_check(struct pl_obj_layout *layout, struct pl_obj_layout *temp_layout, + uint32_t *spare_tgt_ranks, uint32_t *shard_ids, int num_reint, + uint32_t curr_fail_tgt); void plt_obj_rebuild_unique_check(uint32_t *shard_ids, uint32_t num_shards, @@ -52,12 +58,16 @@ void plt_fail_tgt(uint32_t id, uint32_t *po_ver, struct pool_map *po_map, bool pl_debug_msg); +void +plt_fail_tgt_out(uint32_t id, uint32_t *po_ver, struct pool_map *po_map, + bool pl_debug_msg); + void plt_reint_tgt(uint32_t id, uint32_t *po_ver, struct pool_map *po_map, bool pl_debug_msg); void -plt_add_tgt(uint32_t id, uint32_t *po_ver, struct pool_map *po_map, +plt_reint_tgt_up(uint32_t id, uint32_t *po_ver, struct pool_map *po_map, bool pl_debug_msg); void @@ -85,4 +95,7 @@ plt_reint_tgts_get(uuid_t pl_uuid, daos_obj_id_t oid, uint32_t *failed_tgts, uint32_t spare_max_nr, struct pool_map *po_map, struct pl_map *pl_map, uint32_t *po_ver, bool pl_debug_msg); +int +getObjectClasses(daos_oclass_id_t **oclass_id_pp); + #endif /* PL_MAP_COMMON_H */ diff --git a/src/placement/tests/ring_map_place_obj.c b/src/placement/tests/ring_map_place_obj.c index 3441ec4dec9..e7626613fe9 100644 --- a/src/placement/tests/ring_map_place_obj.c +++ b/src/placement/tests/ring_map_place_obj.c @@ -84,16 +84,16 @@ main(int argc, char **argv) /* initial placement when all nodes alive */ daos_obj_generate_id(&oid, 0, OC_RP_4G2, 0); D_PRINT("\ntest initial placement when no failed shard ...\n"); - plt_obj_place(oid, &lo_1, pl_map); - plt_obj_layout_check(lo_1, COMPONENT_NR); + plt_obj_place(oid, &lo_1, pl_map, true); + plt_obj_layout_check(lo_1, COMPONENT_NR, 0); /* test plt_obj_place when some/all shards failed */ D_PRINT("\ntest to fail all shards and new placement ...\n"); for (i = 0; i < SPARE_MAX_NUM && i < lo_1->ol_nr; i++) plt_fail_tgt(lo_1->ol_shards[i].po_target, &po_ver, po_map, pl_debug_msg); - plt_obj_place(oid, &lo_2, pl_map); - plt_obj_layout_check(lo_2, COMPONENT_NR); + plt_obj_place(oid, &lo_2, pl_map, true); + plt_obj_layout_check(lo_2, COMPONENT_NR, 0); D_ASSERT(!pt_obj_layout_match(lo_1, lo_2, DOM_NR)); D_PRINT("spare target candidate:"); for (i = 0; i < SPARE_MAX_NUM && i < lo_1->ol_nr; i++) { @@ -104,10 +104,10 @@ main(int argc, char **argv) D_PRINT("\ntest to add back all failed shards and new placement ...\n"); for (i = 0; i < SPARE_MAX_NUM && i < lo_1->ol_nr; i++) - plt_add_tgt(lo_1->ol_shards[i].po_target, &po_ver, po_map, + plt_reint_tgt_up(lo_1->ol_shards[i].po_target, &po_ver, po_map, pl_debug_msg); - plt_obj_place(oid, &lo_3, pl_map); - plt_obj_layout_check(lo_3, COMPONENT_NR); + plt_obj_place(oid, &lo_3, pl_map, true); + plt_obj_layout_check(lo_3, COMPONENT_NR, 0); D_ASSERT(pt_obj_layout_match(lo_1, lo_3, DOM_NR)); /* test pl_obj_find_rebuild */ From 3c9d3b90e8c759f9abd5e73b0378bb09b3c66b6b Mon Sep 17 00:00:00 2001 From: Peter Fetros Date: Tue, 31 Mar 2020 22:53:58 +0000 Subject: [PATCH 2/4] Addressed Style Warnings. Signed-off-by: Peter Fetros --- src/placement/tests/jump_map_place_obj.c | 2 +- src/placement/tests/place_obj_common.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/placement/tests/jump_map_place_obj.c b/src/placement/tests/jump_map_place_obj.c index 6d83e223e42..eb55450b9b7 100644 --- a/src/placement/tests/jump_map_place_obj.c +++ b/src/placement/tests/jump_map_place_obj.c @@ -127,7 +127,7 @@ rebuild_object_class(daos_oclass_id_t cid) spare_tgt_ranks, shard_ids, SPARE_MAX_NUM, -1); - D_ASSERT(0 <= num_new_spares && num_new_spares < 2); + D_ASSERT(num_new_spares >= 0 && num_new_spares < 2); } diff --git a/src/placement/tests/place_obj_common.c b/src/placement/tests/place_obj_common.c index 8fdb443467d..71f16418d69 100644 --- a/src/placement/tests/place_obj_common.c +++ b/src/placement/tests/place_obj_common.c @@ -87,7 +87,7 @@ reint_check(struct pl_obj_layout *layout, struct pl_obj_layout *temp_layout, uint32_t original_target; uint32_t reint_target; - D_ASSERT(0 <= num_reint && num_reint < 2); + D_ASSERT(num_reint >= 0 && num_reint < 2); /* can't rebuild non replicated date */ if (temp_layout->ol_grp_size == 1) { From 32ede35eceb1b171c28990364120b28581021521 Mon Sep 17 00:00:00 2001 From: Peter Fetros Date: Tue, 7 Apr 2020 23:05:53 +0000 Subject: [PATCH 3/4] Re-enabled Erasure Coding Object Class Tests. Removed the code that skipped over the erasure coded objects. They seem to work, but need at least 18 ranks for the largest one. Signed-off-by: Peter Fetros --- src/placement/tests/jump_map_place_obj.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/src/placement/tests/jump_map_place_obj.c b/src/placement/tests/jump_map_place_obj.c index eb55450b9b7..15b76247de2 100644 --- a/src/placement/tests/jump_map_place_obj.c +++ b/src/placement/tests/jump_map_place_obj.c @@ -28,7 +28,7 @@ #include #include "place_obj_common.h" -#define DOM_NR 8 +#define DOM_NR 18 #define NODE_PER_DOM 1 #define VOS_PER_TARGET 4 #define SPARE_MAX_NUM (DOM_NR * 3) @@ -248,8 +248,7 @@ reint_object_class(daos_oclass_id_t cid) int main(int argc, char **argv) { - struct daos_oclass_attr *obj_attr; - daos_obj_id_t oid; +// struct daos_oclass_attr *obj_attr; daos_oclass_id_t *test_classes; uint32_t num_test_oc; char oclass_name[50]; @@ -271,16 +270,6 @@ main(int argc, char **argv) for (oc_index = 0; oc_index < num_test_oc; ++oc_index) { daos_oclass_id2name(test_classes[oc_index], oclass_name); - daos_obj_generate_id(&oid, 0, test_classes[oc_index], 0); - obj_attr = daos_oclass_attr_find(oid); - - /* Skip Erasure Codded objects for now. */ - if (obj_attr->ca_resil == DAOS_RES_EC) { - D_PRINT("Skipping erasure coded class: %s\n", - oclass_name); - continue; - } - D_PRINT("Running oclass test: %s\n", oclass_name); placement_object_class(test_classes[oc_index]); From 3c94800b29e8ad5a66feee38bf23103998ede87e Mon Sep 17 00:00:00 2001 From: Peter Fetros Date: Wed, 8 Apr 2020 20:50:49 +0000 Subject: [PATCH 4/4] Changed find_targets to find_nodes. Signed-off-by: Peter Fetros --- src/placement/jump_map.c | 4 ++-- src/placement/tests/jump_map_place_obj.c | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/placement/jump_map.c b/src/placement/jump_map.c index ebd139115d9..5428201dc5f 100644 --- a/src/placement/jump_map.c +++ b/src/placement/jump_map.c @@ -339,8 +339,8 @@ count_available_spares(struct pl_jump_map *jmap, struct pl_obj_layout *layout, uint32_t unusable_tgts; uint32_t num_targets; - num_targets = pool_map_find_target(jmap->jmp_map.pl_poolmap, - PO_COMP_ID_ALL, NULL); + num_targets = pool_map_find_domain(jmap->jmp_map.pl_poolmap, + jmap->min_redundant_dom, PO_COMP_ID_ALL, NULL); /* we might not have any valid targets left at all */ unusable_tgts = layout->ol_nr; diff --git a/src/placement/tests/jump_map_place_obj.c b/src/placement/tests/jump_map_place_obj.c index 15b76247de2..b7682809988 100644 --- a/src/placement/tests/jump_map_place_obj.c +++ b/src/placement/tests/jump_map_place_obj.c @@ -248,7 +248,6 @@ reint_object_class(daos_oclass_id_t cid) int main(int argc, char **argv) { -// struct daos_oclass_attr *obj_attr; daos_oclass_id_t *test_classes; uint32_t num_test_oc; char oclass_name[50];