Skip to content

Commit

Permalink
DAOS-7856 tests: add EC aggregation under degraded (daos-stack#5924)
Browse files Browse the repository at this point in the history
1. Add EC aggregation degrade test.

2. Do not return failure if peer parity target is DOWN,
to avoid unnecessary error message.

3. fix typo for PR-5862

Signed-off-by: Di Wang <[email protected]>
  • Loading branch information
wangdi authored Jun 14, 2021
1 parent 05ac48e commit 900a1c9
Show file tree
Hide file tree
Showing 7 changed files with 110 additions and 18 deletions.
2 changes: 1 addition & 1 deletion src/object/cli_shard.c
Original file line number Diff line number Diff line change
Expand Up @@ -1881,7 +1881,7 @@ obj_shard_query_recx_post(struct obj_query_key_cb_args *cb_args, uint32_t shard,
if (DAOS_RECX_END(*result_recx) > end[0] || changed)
*result_recx = recx[0];
} else {
if (DAOS_RECX_END(*result_recx) > end[0] || changed)
if (DAOS_RECX_END(*result_recx) > end[1] || changed)
*result_recx = recx[1];
}
}
Expand Down
8 changes: 4 additions & 4 deletions src/object/srv_ec_aggregate.c
Original file line number Diff line number Diff line change
Expand Up @@ -1582,12 +1582,11 @@ agg_peer_update(struct ec_agg_entry *entry, bool write_parity)
for (i = 0; i < failed_tgts_cnt; i++) {
if (targets[i].ta_comp.co_rank ==
peer_loc->sd_rank) {
D_ERROR(DF_UOID" peer parity tgt "
"failed rank %d, tgt_idx %d.\n",
DP_UOID(entry->ae_oid),
D_DEBUG(DB_EPC, DF_UOID" peer parity "
"tgt failed rank %d, tgt_idx "
"%d.\n", DP_UOID(entry->ae_oid),
peer_loc->sd_rank,
peer_loc->sd_tgt_idx);
rc = -1;
goto out;
}
}
Expand Down Expand Up @@ -2411,6 +2410,7 @@ agg_object(daos_handle_t ih, vos_iter_entry_t *entry,
if (rc == 1 && entry->ie_oid.id_shard >= oca.u.ec.e_k) {
D_DEBUG(DB_EPC, "oid:"DF_UOID" ec agg starting\n",
DP_UOID(entry->ie_oid));

agg_reset_entry(&agg_param->ap_agg_entry, entry, &oca);
rc = 0;
goto out;
Expand Down
10 changes: 6 additions & 4 deletions src/pool/srv_pool.c
Original file line number Diff line number Diff line change
Expand Up @@ -5696,17 +5696,16 @@ ds_pool_check_dtx_leader(struct ds_pool *pool, daos_unit_oid_t *oid,
return rc;
leader_shard = rc;

D_DEBUG(DB_TRACE, "get new leader tgt id %d\n", leader_tgt);
rc = pool_map_find_target(pool->sp_map, leader_tgt, &target);
if (rc < 0)
return rc;
D_GOTO(out, rc);

if (rc != 1)
return -DER_INVAL;
D_GOTO(out, rc = -DER_INVAL);

rc = crt_group_rank(NULL, &myrank);
if (rc < 0)
return rc;
D_GOTO(out, rc);

if (myrank != target->ta_comp.co_rank) {
rc = 0;
Expand All @@ -5716,6 +5715,9 @@ ds_pool_check_dtx_leader(struct ds_pool *pool, daos_unit_oid_t *oid,
rc = 0;
}

out:
D_DEBUG(DB_TRACE, DF_UOID" get new leader shard/tgtid %d/%d: %d\n",
DP_UOID(*oid), leader_shard, leader_tgt, rc);
return rc;
}

Expand Down
2 changes: 1 addition & 1 deletion src/tests/suite/daos_aggregate_ec.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ enum {
EC_SPECIFIED,
};

static bool
bool
oid_is_ec(daos_obj_id_t oid, struct daos_oclass_attr **attr)
{
struct daos_oclass_attr *oca;
Expand Down
55 changes: 55 additions & 0 deletions src/tests/suite/daos_degrade_ec.c
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,59 @@ degrade_multi_conts_agg(void **state)
test_teardown((void **)&args[i]);
}

#define EC_CELL_SIZE 1048576
static void
degrade_ec_partial_update_agg(void **state)
{
test_arg_t *arg = *state;
struct ioreq req;
daos_obj_id_t oid;
d_rank_t rank;
int i;
char *data;
char *verify_data;

if (!test_runable(arg, 6))
return;

data = (char *)malloc(EC_CELL_SIZE);
assert_true(data != NULL);
verify_data = (char *)malloc(EC_CELL_SIZE);
assert_true(verify_data != NULL);
oid = daos_test_oid_gen(arg->coh, OC_EC_4P2G1, 0, 0, arg->myrank);
ioreq_init(&req, arg->coh, oid, DAOS_IOD_ARRAY, arg);
for (i = 0; i < 10; i++) {
daos_recx_t recx;

req.iod_type = DAOS_IOD_ARRAY;
recx.rx_nr = EC_CELL_SIZE;
recx.rx_idx = i * EC_CELL_SIZE;
memset(data, 'a' + i, EC_CELL_SIZE);
insert_recxs("d_key", "a_key", 1, DAOS_TX_NONE, &recx, 1,
data, EC_CELL_SIZE, &req);
}

/* Kill the last parity shard, which is the aggregate leader to verify
* aggregate works in degraded mode.
*/
rank = get_rank_by_oid_shard(arg, oid, 5);
rebuild_pools_ranks(&arg, 1, &rank, 1, false);

/* Trigger aggregation */
daos_pool_set_prop(arg->pool.pool_uuid, "reclaim", "time");
trigger_and_wait_ec_aggreation(arg, &oid, 1);

for (i = 0; i < 10; i++) {
daos_off_t offset = i * EC_CELL_SIZE;

memset(verify_data, 'a' + i, EC_CELL_SIZE);
ec_verify_parity_data(&req, "d_key", "a_key", offset,
(daos_size_t)EC_CELL_SIZE, verify_data);
}
free(data);
free(verify_data);
}

/** create a new pool/container for each test */
static const struct CMUnitTest degrade_tests[] = {
{"DEGRADE0: degrade partial update with data tgt fail",
Expand Down Expand Up @@ -495,6 +548,8 @@ static const struct CMUnitTest degrade_tests[] = {
test_teardown},
{"DEGRADE23: degrade io with multi-containers and aggregation",
degrade_multi_conts_agg, degrade_sub_setup, test_teardown},
{"DEGRADE24: degrade ec aggregation partial update",
degrade_ec_partial_update_agg, degrade_sub_setup, test_teardown},
};

int
Expand Down
41 changes: 33 additions & 8 deletions src/tests/suite/daos_obj_ec.c
Original file line number Diff line number Diff line change
Expand Up @@ -289,23 +289,48 @@ void
trigger_and_wait_ec_aggreation(test_arg_t *arg, daos_obj_id_t *oids,
int oids_nr)
{
d_rank_t ec_agg_rank;
d_rank_t ec_agg_ranks[10];
int i;

for (i = 0; i < oids_nr; i++) {
get_killing_rank_by_oid(arg, oids[i], 0, 1, &ec_agg_rank, NULL);
daos_debug_set_params(arg->group, ec_agg_rank, DMG_KEY_FAIL_LOC,
DAOS_FORCE_EC_AGG | DAOS_FAIL_ALWAYS,
0, NULL);
struct daos_oclass_attr *oca;
int parity_nr;
int j;

assert_true(oid_is_ec(oids[i], &oca));
parity_nr = oca->u.ec.e_p;
assert_true(parity_nr < 10);

get_killing_rank_by_oid(arg, oids[i], 0, parity_nr,
ec_agg_ranks, NULL);
for (j = 0; j < parity_nr; j++)
daos_debug_set_params(arg->group, ec_agg_ranks[j],
DMG_KEY_FAIL_LOC,
DAOS_FORCE_EC_AGG |
DAOS_FAIL_ALWAYS, 0, NULL);
}

print_message("wait for 5 seconds for EC aggregation.\n");
sleep(5);
daos_debug_set_params(arg->group, ec_agg_rank, DMG_KEY_FAIL_LOC,
0, 0, NULL);

for (i = 0; i < oids_nr; i++) {
struct daos_oclass_attr *oca;
int parity_nr;
int j;

assert_true(oid_is_ec(oids[i], &oca));
parity_nr = oca->u.ec.e_p;
assert_true(parity_nr < 10);

get_killing_rank_by_oid(arg, oids[i], 0, parity_nr,
ec_agg_ranks, NULL);
for (j = 0; j < parity_nr; j++)
daos_debug_set_params(arg->group, ec_agg_ranks[j],
DMG_KEY_FAIL_LOC, 0, 0, NULL);
}
}

static void
void
ec_verify_parity_data(struct ioreq *req, char *dkey, char *akey,
daos_off_t offset, daos_size_t size,
char *verify_data)
Expand Down
10 changes: 10 additions & 0 deletions src/tests/suite/daos_test.h
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,14 @@ get_killing_rank_by_oid(test_arg_t *arg, daos_obj_id_t oid, int data,
d_rank_t
get_rank_by_oid_shard(test_arg_t *arg, daos_obj_id_t oid, uint32_t shard);

void
trigger_and_wait_ec_aggreation(test_arg_t *arg, daos_obj_id_t *oid, int oid_nr);

void
ec_verify_parity_data(struct ioreq *req, char *dkey, char *akey,
daos_off_t offset, daos_size_t size,
char *verify_data);

int run_daos_sub_tests(char *test_name, const struct CMUnitTest *tests,
int tests_size, int *sub_tests, int sub_tests_size,
test_setup_cb_t setup_cb, test_setup_cb_t teardown_cb);
Expand Down Expand Up @@ -489,6 +497,8 @@ void write_ec_partial_full(struct ioreq *req, int test_idx, daos_off_t off);
void verify_ec_full_partial(struct ioreq *req, int test_idx, daos_off_t off);
void make_buffer(char *buffer, char start, int total);

bool oid_is_ec(daos_obj_id_t oid, struct daos_oclass_attr **attr);

static inline void
daos_test_print(int rank, char *message)
{
Expand Down

0 comments on commit 900a1c9

Please sign in to comment.