Skip to content

Commit

Permalink
DAOS-16445 client: Add function to cycle OIDs non-sequentially (#14999)
Browse files Browse the repository at this point in the history
We've noticed that with sequential order, object placement is poor.

We get 40% fill for 8GiB files with 25 ranks and 16 targets per rank
with EC_2P1G8. With this patch, we get a much better distribution.

This patch adds the following:

1. A function for cycling oid.hi incrementing by a large prime
2. For DFS, randomize the starting value
3. Modify DFS to cycle OIDs using the new function.

Signed-off-by: Jeff Olivier <[email protected]>
  • Loading branch information
jolivier23 authored Aug 30, 2024
1 parent 9662e98 commit d2f062a
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 16 deletions.
15 changes: 9 additions & 6 deletions src/client/dfs/dfs_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,9 +99,6 @@
/** Max recursion depth for symlinks */
#define DFS_MAX_RECURSION 40

/** MAX value for the HI OID */
#define MAX_OID_HI ((1UL << 32) - 1)

typedef uint64_t dfs_magic_t;
typedef uint16_t dfs_sb_ver_t;
typedef uint16_t dfs_layout_ver_t;
Expand Down Expand Up @@ -164,6 +161,8 @@ struct dfs {
daos_handle_t coh;
/** refcount on cont handle that through the DFS API */
uint32_t coh_refcount;
/** The last oid.hi in the sequence */
uint32_t last_hi;
/** Transaction handle epoch. DAOS_EPOCH_MAX for DAOS_TX_NONE */
daos_epoch_t th_epoch;
/** Transaction handle */
Expand Down Expand Up @@ -343,20 +342,24 @@ oid_gen(dfs_t *dfs, daos_oclass_id_t oclass, bool file, daos_obj_id_t *oid)

D_MUTEX_LOCK(&dfs->lock);
/** If we ran out of local OIDs, alloc one from the container */
if (dfs->oid.hi >= MAX_OID_HI) {
if (dfs->oid.hi == dfs->last_hi) {
/** Allocate an OID for the namespace */
rc = daos_cont_alloc_oids(dfs->coh, 1, &dfs->oid.lo, NULL);
if (rc) {
D_ERROR("daos_cont_alloc_oids() Failed (%d)\n", rc);
D_MUTEX_UNLOCK(&dfs->lock);
return daos_der2errno(rc);
}
dfs->oid.hi = 0;
/** Start such that dfs->last_hi will be final value */
dfs->oid.hi = dfs->last_hi;
}

/** set oid and lo, bump the current hi value */
oid->lo = dfs->oid.lo;
oid->hi = dfs->oid.hi++;
daos_obj_oid_cycle(&dfs->oid);
if (unlikely(dfs->oid.lo == RESERVED_LO && dfs->oid.hi <= 1))
daos_obj_oid_cycle(&dfs->oid); /* Avoid reserved oids */
oid->hi = dfs->oid.hi;
D_MUTEX_UNLOCK(&dfs->lock);

/** if a regular file, use UINT64 typed dkeys for the array object */
Expand Down
18 changes: 9 additions & 9 deletions src/client/dfs/mnt.c
Original file line number Diff line number Diff line change
Expand Up @@ -685,20 +685,20 @@ dfs_mount(daos_handle_t poh, daos_handle_t coh, int flags, dfs_t **_dfs)

/** if RW, allocate an OID for the namespace */
if (amode == O_RDWR) {
dfs->last_hi = (unsigned int)d_rand();
/** Avoid potential conflict with SB or ROOT */
if (dfs->last_hi <= 1)
dfs->last_hi = 2;

rc = daos_cont_alloc_oids(coh, 1, &dfs->oid.lo, NULL);
if (rc) {
D_ERROR("daos_cont_alloc_oids() Failed, " DF_RC "\n", DP_RC(rc));
D_GOTO(err_root, rc = daos_der2errno(rc));
}

/*
* if this is the first time we allocate on this container,
* account 0 for SB, 1 for root obj.
*/
if (dfs->oid.lo == RESERVED_LO)
dfs->oid.hi = ROOT_HI + 1;
else
dfs->oid.hi = 0;
dfs->oid.hi = dfs->last_hi;
/** Increment so that dfs->last_hi is the last value */
daos_obj_oid_cycle(&dfs->oid);
}

dfs->mounted = DFS_MOUNT;
Expand Down Expand Up @@ -1023,7 +1023,7 @@ dfs_global2local(daos_handle_t poh, daos_handle_t coh, int flags, d_iov_t glob,

/** allocate a new oid on the next file or dir creation */
dfs->oid.lo = 0;
dfs->oid.hi = MAX_OID_HI;
dfs->oid.hi = dfs->last_hi;

rc = D_MUTEX_INIT(&dfs->lock, NULL);
if (rc != 0) {
Expand Down
16 changes: 15 additions & 1 deletion src/include/daos_obj.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* (C) Copyright 2015-2023 Intel Corporation.
* (C) Copyright 2015-2024 Intel Corporation.
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -564,6 +564,20 @@ daos_obj_generate_oid(daos_handle_t coh, daos_obj_id_t *oid,
enum daos_otype_t type, daos_oclass_id_t cid,
daos_oclass_hints_t hints, uint32_t args);

/**
* This function, if called 2^32 times will set oid->hi to every unique 32-bit
* value. The caller is responsible for setting the initial value, tracking the
* final value, and avoiding any values that are otherwise reserved.
*
* \param[in, out] oid oid to cycle
*/
static inline void
daos_obj_oid_cycle(daos_obj_id_t *oid)
{
/** Uses a large prime number to guarantee hitting every unique value */
oid->hi = (oid->hi + 999999937) & UINT_MAX;
}

/**
* Open an DAOS object.
*
Expand Down

0 comments on commit d2f062a

Please sign in to comment.