-
Notifications
You must be signed in to change notification settings - Fork 304
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
DAOS-14908 vos: Reduce aggregation conflicts #14143
Changes from all commits
50b4d41
2c46207
cb03a97
6277b58
378dc33
f69b883
1ebff89
64605e7
0357fa2
3fcad48
1c5b6ef
728c35a
a350603
dd592f0
6b2871c
4f10858
15d95dd
7d0773c
095f422
4592e42
d196ca5
42ae560
c60f4fd
98d969d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -231,6 +231,15 @@ obj_metrics_alloc_internal(const char *path, int tgt_id, bool server) | |
if (rc) | ||
D_WARN("Failed to create EC partial update counter: " DF_RC "\n", DP_RC(rc)); | ||
|
||
/** Total number of times EC aggregation conflicts with discard or VOS | ||
* aggregation | ||
*/ | ||
rc = d_tm_add_metric(&metrics->opm_ec_agg_blocked, D_TM_COUNTER, | ||
"total number of EC agg pauses due to VOS discard or agg", NULL, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, I was thinking that EC agg never conflicts with VOS agg, so our goal was to fix the regression which makes them being serialized by vos_aggregation_enter() call. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. They are only serialized in this patch on the same object |
||
"%s/EC_agg/blocked%s", path, tgt_path); | ||
if (rc) | ||
D_WARN("Failed to create EC agg blocked counter: " DF_RC "\n", DP_RC(rc)); | ||
|
||
return metrics; | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2608,10 +2608,12 @@ static int | |
cont_ec_aggregate_cb(struct ds_cont_child *cont, daos_epoch_range_t *epr, | ||
uint32_t flags, struct agg_param *agg_param) | ||
{ | ||
struct obj_pool_metrics *opm; | ||
struct ec_agg_param *ec_agg_param = agg_param->ap_data; | ||
vos_iter_param_t iter_param = { 0 }; | ||
struct vos_iter_anchors anchors = { 0 }; | ||
int rc = 0; | ||
int blocks = 0; | ||
|
||
/* | ||
* Avoid calling into vos_aggregate() when aborting aggregation | ||
|
@@ -2645,24 +2647,21 @@ cont_ec_aggregate_cb(struct ds_cont_child *cont, daos_epoch_range_t *epr, | |
goto update_hae; | ||
} | ||
|
||
rc = vos_aggregate_enter(cont->sc_hdl, epr); | ||
if (rc) | ||
goto update_hae; | ||
|
||
iter_param.ip_hdl = cont->sc_hdl; | ||
iter_param.ip_epr.epr_lo = epr->epr_lo; | ||
iter_param.ip_epr.epr_hi = epr->epr_hi; | ||
iter_param.ip_epc_expr = VOS_IT_EPC_RR; | ||
iter_param.ip_flags = VOS_IT_RECX_VISIBLE; | ||
iter_param.ip_flags = VOS_IT_RECX_VISIBLE | VOS_IT_FOR_AGG; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need to serialize EC agg and VOS agg for same object? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe we don't. But it will avoid any potential issue |
||
iter_param.ip_recx.rx_idx = 0ULL; | ||
iter_param.ip_recx.rx_nr = ~PARITY_INDICATOR; | ||
iter_param.ip_filter_cb = agg_filter; | ||
iter_param.ip_filter_arg = ec_agg_param; | ||
|
||
agg_reset_entry(&ec_agg_param->ap_agg_entry, NULL, NULL); | ||
|
||
rc = vos_iterate(&iter_param, VOS_ITER_OBJ, true, &anchors, | ||
agg_iterate_pre_cb, agg_iterate_post_cb, ec_agg_param, NULL); | ||
retry: | ||
rc = vos_iterate(&iter_param, VOS_ITER_OBJ, true, &anchors, agg_iterate_pre_cb, | ||
agg_iterate_post_cb, ec_agg_param, NULL); | ||
|
||
/* Post_cb may not being executed in some cases */ | ||
agg_clear_extents(&ec_agg_param->ap_agg_entry); | ||
|
@@ -2681,7 +2680,20 @@ cont_ec_aggregate_cb(struct ds_cont_child *cont, daos_epoch_range_t *epr, | |
sched_req_sleep(cont->sc_ec_agg_req, 5 * 1000); | ||
} | ||
|
||
vos_aggregate_exit(cont->sc_hdl); | ||
if (rc == -DER_BUSY) { | ||
/** Hit an object conflict VOS aggregation or discard. Rather than exiting, let's | ||
* yield and try again. | ||
*/ | ||
opm = cont->sc_pool->spc_metrics[DAOS_OBJ_MODULE]; | ||
d_tm_inc_counter(opm->opm_ec_agg_blocked, 1); | ||
blocks++; | ||
/** Warn once if it goes over 20 times */ | ||
D_CDEBUG(blocks == 20, DLOG_WARN, DB_EPC, | ||
"EC agg hit conflict with VOS agg or discard (nr=%d), retrying...\n", | ||
blocks); | ||
ec_aggregate_yield(ec_agg_param); | ||
goto retry; | ||
} | ||
|
||
update_hae: | ||
if (rc == 0) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It looks to me this "IT_FOR_PURGE" was only used for aggregation, I think we could just rename it to "IT_FOR_AGG" now?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Probably a good thing to do but can be deferred to another patch.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I just looked again at this. The issue is we need a way to distinguish between iterating for EC agg vs VOS agg. The latter will delete stuff but EC agg does not so we probably want different INTENT behavior.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, I overlooked that EC agg and VOS agg are still mutual exclusive on same object in this PR. I'm wondering if it's necessary?
I was thinking that we only need to make EC agg or VOS agg being mutual exclusive with object discard, did I miss anything?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Same comment as above. They are not supposed to overlap in range is my understanding but serializing them avoids some complication and potential to screw that up. I'd rather take small steps
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok, sounds good to me.