From 50b4d41d457d527dc3014c57a5bc73dacd9b56a5 Mon Sep 17 00:00:00 2001 From: Jeff Olivier Date: Tue, 9 Apr 2024 09:22:15 -0600 Subject: [PATCH] DAOS-14908 vos: Reduce aggregation conflicts Rather than blocking vos_obj_discard entirely when discard or aggregation are running, let's block it only when there is an actual conflict on the object being discarded. Features: rebuild Allow-unstable-test: true Required-githooks: true Change-Id: I110dd2e37e299df25c002bba63776559d689b1cf Signed-off-by: Jeff Olivier --- src/pool/srv_target.c | 2 +- src/rebuild/scan.c | 2 +- src/vos/vos_aggregate.c | 43 +++++++++++++-------------- src/vos/vos_internal.h | 64 ++++++++++++++++++++--------------------- src/vos/vos_obj.c | 13 ++++++++- src/vos/vos_obj.h | 6 ++-- src/vos/vos_obj_cache.c | 23 +++++++++++++-- 7 files changed, 90 insertions(+), 63 deletions(-) diff --git a/src/pool/srv_target.c b/src/pool/srv_target.c index b716f8bfc11..ca69160c71e 100644 --- a/src/pool/srv_target.c +++ b/src/pool/srv_target.c @@ -2136,7 +2136,7 @@ obj_discard_cb(daos_handle_t ch, vos_iter_entry_t *ent, /* Inform the iterator and delete the object */ *acts |= VOS_ITER_CB_DELETE; rc = vos_discard(param->ip_hdl, &ent->ie_oid, &epr, NULL, NULL); - if (rc != -DER_BUSY && rc != -DER_INPROGRESS) + if (rc != -DER_UPDATE_AGAIN && rc != -DER_BUSY && rc != -DER_INPROGRESS) break; D_DEBUG(DB_REBUILD, "retry by "DF_RC"/"DF_UOID"\n", diff --git a/src/rebuild/scan.c b/src/rebuild/scan.c index 22d90eb025e..41ddc6d34ad 100644 --- a/src/rebuild/scan.c +++ b/src/rebuild/scan.c @@ -544,7 +544,7 @@ obj_reclaim(struct pl_map *map, uint32_t layout_ver, uint32_t new_layout_ver, /* Inform the iterator and delete the object */ *acts |= VOS_ITER_CB_DELETE; rc = vos_discard(param->ip_hdl, &oid, &discard_epr, NULL, NULL); - if (rc != -DER_BUSY && rc != -DER_INPROGRESS) + if (rc != -DER_UPDATE_AGAIN && rc != -DER_BUSY && rc != -DER_INPROGRESS) break; D_DEBUG(DB_REBUILD, "retry by "DF_RC"/"DF_UOID"\n", diff --git a/src/vos/vos_aggregate.c b/src/vos/vos_aggregate.c index b7bde4e9b39..fc0c7db64c4 100644 --- a/src/vos/vos_aggregate.c +++ b/src/vos/vos_aggregate.c @@ -1,10 +1,10 @@ /** - * (C) Copyright 2019-2023 Intel Corporation. + * (C) Copyright 2019-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ /** - * Implementation for aggregation and discard + * Implementation for aggregation and discard */ #define D_LOGFAC DD_FAC(vos) @@ -2496,7 +2496,7 @@ aggregate_enter(struct vos_container *cont, int agg_mode, daos_epoch_range_t *ep } if (cont->vc_obj_discard_count != 0) { - D_ERROR(DF_CONT": In object discard epr["DF_U64", "DF_U64"]\n", + D_ERROR(DF_CONT ": In object discard epr[" DF_U64 ", " DF_U64 "]\n", DP_CONT(cont->vc_pool->vp_id, cont->vc_id), cont->vc_epr_discard.epr_lo, cont->vc_epr_discard.epr_hi); return -DER_BUSY; @@ -2523,13 +2523,6 @@ aggregate_enter(struct vos_container *cont, int agg_mode, daos_epoch_range_t *ep return -DER_BUSY; } - if (cont->vc_obj_discard_count != 0) { - D_ERROR(DF_CONT": In object discard epr["DF_U64", "DF_U64"]\n", - DP_CONT(cont->vc_pool->vp_id, cont->vc_id), - cont->vc_epr_discard.epr_lo, cont->vc_epr_discard.epr_hi); - return -DER_BUSY; - } - if (cont->vc_in_discard && cont->vc_epr_discard.epr_lo <= epr->epr_hi) { D_ERROR(DF_CONT": Discard epr["DF_U64", "DF_U64"], " @@ -2549,21 +2542,18 @@ aggregate_enter(struct vos_container *cont, int agg_mode, daos_epoch_range_t *ep break; case AGG_MODE_OBJ_DISCARD: + /** Theoretically, this could overlap with vos_discard as well + * as aggregation but it makes the logic in vos_obj_hold more + * complicated so defer for now and just disallow it. We can + * conflict with aggregation, however without issues. + */ if (cont->vc_in_discard) { - D_ERROR(DF_CONT": In discard epr["DF_U64", "DF_U64"]\n", + D_ERROR(DF_CONT ": Already in discard epr[" DF_U64 ", " DF_U64 "]\n", DP_CONT(cont->vc_pool->vp_id, cont->vc_id), cont->vc_epr_discard.epr_lo, cont->vc_epr_discard.epr_hi); return -DER_BUSY; } - if (cont->vc_in_aggregation) { - D_DEBUG(DB_EPC, DF_CONT": In aggregation epr["DF_U64", "DF_U64"]\n", - DP_CONT(cont->vc_pool->vp_id, cont->vc_id), - cont->vc_epr_aggregation.epr_lo, cont->vc_epr_aggregation.epr_hi); - return -DER_BUSY; - } - - /** Allow discard from multiple objects */ cont->vc_obj_discard_count++; break; } @@ -2703,10 +2693,18 @@ vos_aggregate(daos_handle_t coh, daos_epoch_range_t *epr, ad->ad_agg_param.ap_flags = flags; ad->ad_iter_param.ip_flags |= VOS_IT_FOR_PURGE; +retry: rc = vos_iterate(&ad->ad_iter_param, VOS_ITER_OBJ, true, &ad->ad_anchors, vos_aggregate_pre_cb, vos_aggregate_post_cb, &ad->ad_agg_param, NULL); - if (rc != 0 || ad->ad_agg_param.ap_nospc_err) { + if (rc == -DER_UPDATE_AGAIN) { + /** Hit a conflict with obj_discard. Rather than exiting, let's + * yield and try again. + */ + close_merge_window(&ad->ad_agg_param.ap_window, rc); + vos_aggregate_yield(&ad->ad_agg_param); + goto retry; + } else if (rc != 0 || ad->ad_agg_param.ap_nospc_err) { close_merge_window(&ad->ad_agg_param.ap_window, rc); goto exit; } else if (ad->ad_agg_param.ap_csum_err) { @@ -2820,9 +2818,8 @@ vos_discard(daos_handle_t coh, daos_unit_oid_t *oidp, daos_epoch_range_t *epr, ad->ad_agg_param.ap_yield_arg = yield_arg; ad->ad_iter_param.ip_flags |= VOS_IT_FOR_DISCARD; - rc = vos_iterate(&ad->ad_iter_param, type, true, &ad->ad_anchors, - vos_aggregate_pre_cb, vos_aggregate_post_cb, - &ad->ad_agg_param, NULL); + rc = vos_iterate(&ad->ad_iter_param, type, true, &ad->ad_anchors, vos_aggregate_pre_cb, + vos_aggregate_post_cb, &ad->ad_agg_param, NULL); aggregate_exit(cont, mode); diff --git a/src/vos/vos_internal.h b/src/vos/vos_internal.h index a813bc697de..6dc49e3bc95 100644 --- a/src/vos/vos_internal.h +++ b/src/vos/vos_internal.h @@ -28,40 +28,40 @@ #define VOS_MINOR_EPC_MAX EVT_MINOR_EPC_MAX -#define VOS_TX_LOG_FAIL(rc, ...) \ - do { \ - bool __is_err = true; \ - \ - if (rc >= 0) \ - break; \ - switch (rc) { \ - case -DER_TX_RESTART: \ - case -DER_INPROGRESS: \ - case -DER_EXIST: \ - case -DER_NONEXIST: \ - __is_err = false; \ - break; \ - } \ - D_CDEBUG(__is_err, DLOG_ERR, DB_IO, \ - __VA_ARGS__); \ +#define VOS_TX_LOG_FAIL(rc, ...) \ + do { \ + bool __is_err = true; \ + \ + if (rc >= 0) \ + break; \ + switch (rc) { \ + case -DER_TX_RESTART: \ + case -DER_INPROGRESS: \ + case -DER_UPDATE_AGAIN: \ + case -DER_EXIST: \ + case -DER_NONEXIST: \ + __is_err = false; \ + break; \ + } \ + D_CDEBUG(__is_err, DLOG_ERR, DB_IO, __VA_ARGS__); \ } while (0) -#define VOS_TX_TRACE_FAIL(rc, ...) \ - do { \ - bool __is_err = true; \ - \ - if (rc >= 0) \ - break; \ - switch (rc) { \ - case -DER_TX_RESTART: \ - case -DER_INPROGRESS: \ - case -DER_EXIST: \ - case -DER_NONEXIST: \ - __is_err = false; \ - break; \ - } \ - D_CDEBUG(__is_err, DLOG_ERR, DB_TRACE, \ - __VA_ARGS__); \ +#define VOS_TX_TRACE_FAIL(rc, ...) \ + do { \ + bool __is_err = true; \ + \ + if (rc >= 0) \ + break; \ + switch (rc) { \ + case -DER_TX_RESTART: \ + case -DER_INPROGRESS: \ + case -DER_UPDATE_AGAIN: \ + case -DER_EXIST: \ + case -DER_NONEXIST: \ + __is_err = false; \ + break; \ + } \ + D_CDEBUG(__is_err, DLOG_ERR, DB_TRACE, __VA_ARGS__); \ } while (0) #define VOS_CONT_ORDER 20 /* Order of container tree */ diff --git a/src/vos/vos_obj.c b/src/vos/vos_obj.c index bd2a1f51e2b..cab6590fa95 100644 --- a/src/vos/vos_obj.c +++ b/src/vos/vos_obj.c @@ -1747,6 +1747,9 @@ vos_obj_iter_prep(vos_iter_type_t type, vos_iter_param_t *param, D_GOTO(failed, rc); } + if (oiter->it_iter.it_for_purge) + oiter->it_obj->obj_aggregate = 1; + oiter->it_punched = oiter->it_obj->obj_ilog_info.ii_prior_punch; rc = obj_tree_init(oiter->it_obj); @@ -1902,6 +1905,9 @@ dkey_nested_iter_init(struct vos_obj_iter *oiter, struct vos_iter_info *info) return rc; } + if (oiter->it_iter.it_for_purge) + oiter->it_obj->obj_aggregate = 1; + rc = obj_tree_init(oiter->it_obj); if (rc != 0) @@ -1916,6 +1922,8 @@ dkey_nested_iter_init(struct vos_obj_iter *oiter, struct vos_iter_info *info) return 0; failed: + if (oiter->it_iter.it_for_purge) + oiter->it_obj->obj_aggregate = 0; vos_obj_release(vos_obj_cache_current(cont->vc_pool->vp_sysdb), oiter->it_obj, false); return rc; @@ -2213,9 +2221,12 @@ vos_obj_iter_fini(struct vos_iterator *iter) */ object = oiter->it_obj; if (oiter->it_flags != VOS_IT_KEY_TREE && object != NULL && - (iter->it_type == VOS_ITER_DKEY || !iter->it_from_parent)) + (iter->it_type == VOS_ITER_DKEY || !iter->it_from_parent)) { + if (oiter->it_iter.it_for_purge) + object->obj_aggregate = 0; vos_obj_release(vos_obj_cache_current(object->obj_cont->vc_pool->vp_sysdb), object, false); + } vos_ilog_fetch_finish(&oiter->it_ilog_info); D_FREE(oiter); diff --git a/src/vos/vos_obj.h b/src/vos/vos_obj.h index e4a8c11fd7a..818426e9c20 100644 --- a/src/vos/vos_obj.h +++ b/src/vos/vos_obj.h @@ -49,8 +49,10 @@ struct vos_object { struct vos_container *obj_cont; /** nobody should access this object */ bool obj_zombie; - /** Object is in discard */ - bool obj_discard; + /** Object is held for discard */ + uint32_t obj_discard : 1, + /** Object is held for aggregation */ + obj_aggregate : 1; }; enum { diff --git a/src/vos/vos_obj_cache.c b/src/vos/vos_obj_cache.c index a4577799053..2d4b99db281 100644 --- a/src/vos/vos_obj_cache.c +++ b/src/vos/vos_obj_cache.c @@ -237,8 +237,9 @@ vos_obj_discard_hold(struct daos_lru_cache *occ, struct vos_container *cont, dao return rc; D_ASSERTF(!obj->obj_discard, "vos_obj_hold should return an error if already in discard\n"); + D_ASSERTF(!obj->obj_aggregate, "vos_obj_hold should return an error if in aggregation\n"); - obj->obj_discard = true; + obj->obj_discard = 1; *objp = obj; return 0; @@ -247,7 +248,7 @@ vos_obj_discard_hold(struct daos_lru_cache *occ, struct vos_container *cont, dao void vos_obj_discard_release(struct daos_lru_cache *occ, struct vos_object *obj) { - obj->obj_discard = false; + obj->obj_discard = 0; vos_obj_release(occ, obj, false); } @@ -296,6 +297,22 @@ cache_object(struct daos_lru_cache *occ, struct vos_object **objp) return 0; } +static bool +vos_obj_op_conflict(struct vos_object *obj, bool obj_discard, uint32_t intent, bool create) +{ + if (obj->obj_discard) { + /** Mutually exclusive with create, discard and aggregation */ + if (create || intent == DAOS_INTENT_PURGE || obj_discard) + return true; + } else if (obj->obj_aggregate) { + /** Mutually exclusive with discard */ + if (obj_discard) + return true; + } + + return false; +} + int vos_obj_hold(struct daos_lru_cache *occ, struct vos_container *cont, daos_unit_oid_t oid, daos_epoch_range_t *epr, daos_epoch_t bound, @@ -406,7 +423,7 @@ vos_obj_hold(struct daos_lru_cache *occ, struct vos_container *cont, } check_object: - if (obj->obj_discard && (create || (flags & VOS_OBJ_DISCARD) != 0)) { + if (vos_obj_op_conflict(obj, (flags & VOS_OBJ_DISCARD) != 0, intent, create)) { /** Cleanup before assert so unit test that triggers doesn't corrupt the state */ vos_obj_release(occ, obj, false); /* Update request will retry with this error */