-
Notifications
You must be signed in to change notification settings - Fork 306
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
DAOS-4441 Placement: Modify Placement for OS Reintegration #2402
Changes from 4 commits
c1592ee
697e44a
39370a5
b334eab
1919e92
1f1edc7
a049d45
ddf39f9
e92b736
c597a4b
95976f6
aa7dc32
b35f976
90a9729
de9fc85
9eeb211
7479f24
e26270a
f995c80
381204b
8ffabac
8b983d1
810f09f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -31,6 +31,27 @@ | |
#include <daos/pool_map.h> | ||
#include <isa-l.h> | ||
|
||
|
||
/* | ||
* These ops determine whether extra information is calculated during | ||
* placement. | ||
* | ||
* PL_PLACE_EXTENDED calculates an extended layout for use when there | ||
* is a reintegration operation currently ongoing. | ||
* | ||
* PL_REINT calculates the post-reintegration layout for use during | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (style) trailing whitespace |
||
* reintegration, it treats the UP status targets as UP_IN. | ||
* | ||
* Currently the other OP types calculate a normal layout without extra info. | ||
*/ | ||
enum PL_OP_TYPE { | ||
PL_PLACE, | ||
PL_PLACE_EXTENDED, | ||
PL_REBUILD, | ||
PL_REINT, | ||
PL_ADD, | ||
}; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's better to put some comments to explain these ops, that'll be helpful for understanding the code. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what is the difference between PL_PLACE_EXTENDED and PL_ADD? |
||
|
||
/** | ||
* Contains information related to object layout size. | ||
*/ | ||
|
@@ -374,8 +395,9 @@ count_available_spares(struct pl_jump_map *jmap, struct pl_obj_layout *layout, | |
static int | ||
obj_remap_shards(struct pl_jump_map *jmap, struct daos_obj_md *md, | ||
struct pl_obj_layout *layout, struct jm_obj_placement *jmop, | ||
d_list_t *remap_list, bool for_reint, uint8_t *tgts_used, | ||
uint8_t *dom_used, uint32_t failed_in_layout) | ||
d_list_t *remap_list, enum PL_OP_TYPE op_type, | ||
uint8_t *tgts_used, uint8_t *dom_used, | ||
uint32_t failed_in_layout, d_list_t *extend_list) | ||
{ | ||
struct failed_shard *f_shard; | ||
struct pl_obj_shard *l_shard; | ||
|
@@ -384,13 +406,15 @@ obj_remap_shards(struct pl_jump_map *jmap, struct daos_obj_md *md, | |
d_list_t *current; | ||
daos_obj_id_t oid; | ||
bool spare_avail = true; | ||
bool for_reint; | ||
uint64_t key; | ||
uint32_t spares_left; | ||
int rc; | ||
|
||
|
||
remap_dump(remap_list, md, "before remap:"); | ||
|
||
for_reint = (op_type == PL_REINT); | ||
current = remap_list->next; | ||
spare_tgt = NULL; | ||
oid = md->omd_id; | ||
|
@@ -405,10 +429,8 @@ obj_remap_shards(struct pl_jump_map *jmap, struct daos_obj_md *md, | |
uint64_t rebuild_key; | ||
uint32_t shard_id; | ||
|
||
f_shard = d_list_entry(current, struct failed_shard, | ||
fs_list); | ||
f_shard = d_list_entry(current, struct failed_shard, fs_list); | ||
shard_id = f_shard->fs_shard_idx; | ||
|
||
l_shard = &layout->ol_shards[f_shard->fs_shard_idx]; | ||
|
||
spare_avail = jump_map_remap_next_spare(jmap, jmop, | ||
|
@@ -422,11 +444,18 @@ obj_remap_shards(struct pl_jump_map *jmap, struct daos_obj_md *md, | |
spares_left--; | ||
} | ||
|
||
determine_valid_spares(spare_tgt, md, spare_avail, ¤t, | ||
remap_list, for_reint, f_shard, l_shard); | ||
if (op_type == PL_PLACE_EXTENDED && spare_avail && | ||
spare_tgt->ta_comp.co_status == PO_COMP_ST_UP) | ||
remap_alloc_one(extend_list, shard_id, spare_tgt, true); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you need check remap_alloc_one() return value? |
||
|
||
determine_valid_spares(spare_tgt, md, spare_avail, | ||
¤t, remap_list, for_reint, f_shard, | ||
l_shard); | ||
} | ||
|
||
if (op_type == PL_PLACE_EXTENDED) | ||
pl_map_extend(layout, extend_list); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. check return value? |
||
|
||
remap_dump(remap_list, md, "after remap:"); | ||
return 0; | ||
} | ||
|
@@ -506,16 +535,18 @@ jump_map_obj_spec_place_get(struct pl_jump_map *jmap, daos_obj_id_t oid, | |
static int | ||
get_object_layout(struct pl_jump_map *jmap, struct pl_obj_layout *layout, | ||
struct jm_obj_placement *jmop, d_list_t *remap_list, | ||
bool for_reint, struct daos_obj_md *md) | ||
enum PL_OP_TYPE op_type, struct daos_obj_md *md) | ||
{ | ||
struct pool_target *target; | ||
struct pool_domain *root; | ||
daos_obj_id_t oid; | ||
d_list_t extend_list; | ||
uint8_t *dom_used; | ||
uint8_t *tgts_used; | ||
uint32_t dom_used_length; | ||
uint64_t key; | ||
uint32_t fail_tgt_cnt; | ||
bool for_reint; | ||
int i, j, k, rc; | ||
|
||
/* Set the pool map version */ | ||
|
@@ -527,6 +558,7 @@ get_object_layout(struct pl_jump_map *jmap, struct pl_obj_layout *layout, | |
oid = md->omd_id; | ||
key = oid.hi ^ oid.lo; | ||
target = NULL; | ||
for_reint = (op_type == PL_REINT); | ||
|
||
rc = pool_map_find_domain(jmap->jmp_map.pl_poolmap, PO_COMP_TP_ROOT, | ||
PO_COMP_ID_ALL, &root); | ||
|
@@ -539,6 +571,7 @@ get_object_layout(struct pl_jump_map *jmap, struct pl_obj_layout *layout, | |
|
||
D_ALLOC_ARRAY(dom_used, (dom_used_length / 8) + 1); | ||
D_ALLOC_ARRAY(tgts_used, (root->do_target_nr / 8) + 1); | ||
D_INIT_LIST_HEAD(&extend_list); | ||
|
||
if (dom_used == NULL || tgts_used == NULL) | ||
D_GOTO(out, rc = -DER_NOMEM); | ||
|
@@ -572,6 +605,10 @@ get_object_layout(struct pl_jump_map *jmap, struct pl_obj_layout *layout, | |
rc = remap_alloc_one(remap_list, 0, target, false); | ||
if (rc) | ||
D_GOTO(out, rc); | ||
if (op_type == PL_PLACE_EXTENDED && | ||
target->ta_comp.co_status == PO_COMP_ST_UP) { | ||
remap_alloc_one(&extend_list, k, target, true); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. check return value? |
||
} | ||
} | ||
|
||
/** skip the first shard because it's been | ||
|
@@ -600,17 +637,30 @@ get_object_layout(struct pl_jump_map *jmap, struct pl_obj_layout *layout, | |
/** If target is failed queue it for remap*/ | ||
if (pool_target_unavail(target, for_reint)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. btw: return (status == PO_COMP_ST_DOWN) || There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've separated it out and added comments to the code to better clarify, The last line is used for calculating the data movement when reintegrating a target. When calculating reintegration we treat it as "UPIN" to determine what shards will move there. But during normal placement we treat it as unavailable reintegration hasn't completed. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see. thanks for clarification. |
||
fail_tgt_cnt++; | ||
|
||
rc = remap_alloc_one(remap_list, k, target, | ||
false); | ||
if (rc) | ||
D_GOTO(out, rc); | ||
|
||
if (op_type == PL_PLACE_EXTENDED && | ||
target->ta_comp.co_status == PO_COMP_ST_UP) { | ||
remap_alloc_one(&extend_list, k, | ||
target, true); | ||
} | ||
|
||
} | ||
} | ||
|
||
j = 0; | ||
} | ||
|
||
rc = obj_remap_shards(jmap, md, layout, jmop, remap_list, | ||
for_reint, tgts_used, dom_used, fail_tgt_cnt); | ||
rc = 0; | ||
if (fail_tgt_cnt > 0) | ||
rc = obj_remap_shards(jmap, md, layout, jmop, remap_list, | ||
op_type, tgts_used, dom_used, fail_tgt_cnt, | ||
&extend_list); | ||
|
||
out: | ||
if (rc) { | ||
D_ERROR("jump_map_obj_layout_fill failed, rc "DF_RC"\n", | ||
|
@@ -698,8 +748,6 @@ jump_map_create(struct pool_map *poolmap, struct pl_map_init_attr *mia, | |
return rc; | ||
} | ||
|
||
|
||
|
||
static void | ||
jump_map_print(struct pl_map *map) | ||
{ | ||
|
@@ -753,7 +801,8 @@ jump_map_obj_place(struct pl_map *map, struct daos_obj_md *md, | |
|
||
/* Get root node of pool map */ | ||
D_INIT_LIST_HEAD(&remap_list); | ||
rc = get_object_layout(jmap, layout, &jmop, &remap_list, false, md); | ||
rc = get_object_layout(jmap, layout, &jmop, &remap_list, | ||
PL_PLACE_EXTENDED, md); | ||
if (rc < 0) { | ||
D_ERROR("Could not generate placement layout, rc "DF_RC"\n", | ||
DP_RC(rc)); | ||
|
@@ -841,7 +890,8 @@ jump_map_obj_find_rebuild(struct pl_map *map, struct daos_obj_md *md, | |
} | ||
|
||
D_INIT_LIST_HEAD(&remap_list); | ||
rc = get_object_layout(jmap, layout, &jmop, &remap_list, false, md); | ||
rc = get_object_layout(jmap, layout, &jmop, &remap_list, PL_REBUILD, | ||
md); | ||
|
||
if (rc < 0) { | ||
D_ERROR("Could not generate placement layout, rc "DF_RC"\n", | ||
|
@@ -916,7 +966,7 @@ jump_map_obj_find_reint(struct pl_map *map, struct daos_obj_md *md, | |
D_INIT_LIST_HEAD(&reint_list); | ||
|
||
/* Get original placement */ | ||
rc = get_object_layout(jmap, layout, &jop, &remap_list, false, md); | ||
rc = get_object_layout(jmap, layout, &jop, &remap_list, PL_PLACE, md); | ||
if (rc) | ||
goto out; | ||
|
||
|
@@ -925,7 +975,8 @@ jump_map_obj_find_reint(struct pl_map *map, struct daos_obj_md *md, | |
D_INIT_LIST_HEAD(&remap_list); | ||
|
||
/* Get placement after reintegration. */ | ||
rc = get_object_layout(jmap, reint_layout, &jop, &remap_list, true, md); | ||
rc = get_object_layout(jmap, reint_layout, &jop, &remap_list, PL_REINT, | ||
md); | ||
if (rc) | ||
goto out; | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -93,7 +93,7 @@ remap_alloc_one(d_list_t *remap_list, unsigned int shard_idx, | |
remap_add_one(remap_list, f_new); | ||
} else { | ||
f_new->fs_tgt_id = tgt->ta_comp.co_id; | ||
d_list_add(&f_new->fs_list, remap_list); | ||
d_list_add_tail(&f_new->fs_list, remap_list); | ||
} | ||
|
||
return 0; | ||
|
@@ -399,4 +399,87 @@ determine_valid_spares(struct pool_target *spare_tgt, struct daos_obj_md *md, | |
(*current) = (*current)->next; | ||
} | ||
|
||
int | ||
pl_map_extend(struct pl_obj_layout *layout, d_list_t *extended_list) | ||
{ | ||
struct pl_obj_shard *new_shards; | ||
struct failed_shard *f_shard; | ||
d_list_t *current; | ||
uint32_t *grp_count; | ||
uint32_t max_fail_grp; | ||
uint8_t *grp_map; | ||
uint32_t new_group_size; | ||
uint32_t grp; | ||
uint32_t grp_idx; | ||
int i, j, k = 0; | ||
|
||
/* Empty list, no extension needed */ | ||
if (extended_list == extended_list->next || layout->ol_grp_size == 1) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. d_list_empty(extended_list) |
||
goto out; | ||
|
||
D_ALLOC_ARRAY(grp_map, (layout->ol_nr / 8) + 1); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no big deal, but we normally use this: (layout->ol_nr + 1)/8, otherwise you got two bytes if nr==8. |
||
D_ALLOC_ARRAY(grp_count, layout->ol_grp_nr); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. check the allocation result? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. indeed |
||
i = 0; | ||
max_fail_grp = 0; | ||
|
||
current = extended_list->next; | ||
while (current != extended_list) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. d_list_for_each_safe? |
||
f_shard = d_list_entry(current, struct failed_shard, fs_list); | ||
grp = f_shard->fs_shard_idx / layout->ol_grp_size; | ||
|
||
if (isset(grp_map, f_shard->fs_tgt_id) == false) { | ||
setbit(grp_map, f_shard->fs_tgt_id); | ||
grp_count[grp]++; | ||
|
||
if (max_fail_grp < grp_count[grp]) | ||
max_fail_grp = grp_count[grp]; | ||
} else | ||
d_list_del_init(&f_shard->fs_list); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. style: use braces for all code blocks if any of code blocks of "if{}else" require braces |
||
|
||
current = current->next; | ||
} | ||
|
||
|
||
new_group_size = layout->ol_grp_size + max_fail_grp; | ||
D_ALLOC_ARRAY(new_shards, new_group_size * layout->ol_grp_nr); | ||
if (new_shards == NULL) | ||
return -DER_NOMEM; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. free grp_map and grp_count? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. indeed |
||
|
||
while (k < layout->ol_nr) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. easier to understand in this way? " for (i = k = 0; k < layout->ol_nr;)" |
||
for (j = 0; j < layout->ol_grp_size; ++j, ++k, ++i) | ||
new_shards[i] = layout->ol_shards[k]; | ||
for (; j < new_group_size; ++j, ++i) { | ||
new_shards[i].po_shard = -1; | ||
new_shards[i].po_target = -1; | ||
} | ||
} | ||
|
||
current = extended_list->next; | ||
while (current != extended_list) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. d_list_for_each_entry? |
||
f_shard = d_list_entry(current, struct failed_shard, fs_list); | ||
|
||
grp = f_shard->fs_shard_idx / layout->ol_grp_size; | ||
grp_idx = ((grp + 1) * layout->ol_grp_size) + grp; | ||
grp_count[grp]--; | ||
grp_idx += grp_count[grp]; | ||
|
||
new_shards[grp_idx].po_fseq = f_shard->fs_fseq; | ||
new_shards[grp_idx].po_shard = f_shard->fs_shard_idx; | ||
new_shards[grp_idx].po_target = f_shard->fs_tgt_id; | ||
new_shards[grp_idx].po_rebuilding = 1; | ||
|
||
current = current->next; | ||
} | ||
|
||
layout->ol_grp_size += max_fail_grp; | ||
layout->ol_nr = layout->ol_grp_size * layout->ol_grp_nr; | ||
|
||
D_FREE(layout->ol_shards); | ||
layout->ol_shards = new_shards; | ||
|
||
D_FREE(grp_count); | ||
D_FREE(grp_map); | ||
out: | ||
remap_list_free_all(extended_list); | ||
return 0; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(style) trailing whitespace