From d2f062a2c7bb8ef87c9ab32d56536608d694006a Mon Sep 17 00:00:00 2001 From: Jeff Olivier Date: Fri, 30 Aug 2024 15:55:27 -0600 Subject: [PATCH] DAOS-16445 client: Add function to cycle OIDs non-sequentially (#14999) We've noticed that with sequential order, object placement is poor. We get 40% fill for 8GiB files with 25 ranks and 16 targets per rank with EC_2P1G8. With this patch, we get a much better distribution. This patch adds the following: 1. A function for cycling oid.hi incrementing by a large prime 2. For DFS, randomize the starting value 3. Modify DFS to cycle OIDs using the new function. Signed-off-by: Jeff Olivier --- src/client/dfs/dfs_internal.h | 15 +++++++++------ src/client/dfs/mnt.c | 18 +++++++++--------- src/include/daos_obj.h | 16 +++++++++++++++- 3 files changed, 33 insertions(+), 16 deletions(-) diff --git a/src/client/dfs/dfs_internal.h b/src/client/dfs/dfs_internal.h index 99f2fb8cde2..7425fc2f00d 100644 --- a/src/client/dfs/dfs_internal.h +++ b/src/client/dfs/dfs_internal.h @@ -99,9 +99,6 @@ /** Max recursion depth for symlinks */ #define DFS_MAX_RECURSION 40 -/** MAX value for the HI OID */ -#define MAX_OID_HI ((1UL << 32) - 1) - typedef uint64_t dfs_magic_t; typedef uint16_t dfs_sb_ver_t; typedef uint16_t dfs_layout_ver_t; @@ -164,6 +161,8 @@ struct dfs { daos_handle_t coh; /** refcount on cont handle that through the DFS API */ uint32_t coh_refcount; + /** The last oid.hi in the sequence */ + uint32_t last_hi; /** Transaction handle epoch. DAOS_EPOCH_MAX for DAOS_TX_NONE */ daos_epoch_t th_epoch; /** Transaction handle */ @@ -343,7 +342,7 @@ oid_gen(dfs_t *dfs, daos_oclass_id_t oclass, bool file, daos_obj_id_t *oid) D_MUTEX_LOCK(&dfs->lock); /** If we ran out of local OIDs, alloc one from the container */ - if (dfs->oid.hi >= MAX_OID_HI) { + if (dfs->oid.hi == dfs->last_hi) { /** Allocate an OID for the namespace */ rc = daos_cont_alloc_oids(dfs->coh, 1, &dfs->oid.lo, NULL); if (rc) { @@ -351,12 +350,16 @@ oid_gen(dfs_t *dfs, daos_oclass_id_t oclass, bool file, daos_obj_id_t *oid) D_MUTEX_UNLOCK(&dfs->lock); return daos_der2errno(rc); } - dfs->oid.hi = 0; + /** Start such that dfs->last_hi will be final value */ + dfs->oid.hi = dfs->last_hi; } /** set oid and lo, bump the current hi value */ oid->lo = dfs->oid.lo; - oid->hi = dfs->oid.hi++; + daos_obj_oid_cycle(&dfs->oid); + if (unlikely(dfs->oid.lo == RESERVED_LO && dfs->oid.hi <= 1)) + daos_obj_oid_cycle(&dfs->oid); /* Avoid reserved oids */ + oid->hi = dfs->oid.hi; D_MUTEX_UNLOCK(&dfs->lock); /** if a regular file, use UINT64 typed dkeys for the array object */ diff --git a/src/client/dfs/mnt.c b/src/client/dfs/mnt.c index a73fafe34df..d270be1e414 100644 --- a/src/client/dfs/mnt.c +++ b/src/client/dfs/mnt.c @@ -685,20 +685,20 @@ dfs_mount(daos_handle_t poh, daos_handle_t coh, int flags, dfs_t **_dfs) /** if RW, allocate an OID for the namespace */ if (amode == O_RDWR) { + dfs->last_hi = (unsigned int)d_rand(); + /** Avoid potential conflict with SB or ROOT */ + if (dfs->last_hi <= 1) + dfs->last_hi = 2; + rc = daos_cont_alloc_oids(coh, 1, &dfs->oid.lo, NULL); if (rc) { D_ERROR("daos_cont_alloc_oids() Failed, " DF_RC "\n", DP_RC(rc)); D_GOTO(err_root, rc = daos_der2errno(rc)); } - /* - * if this is the first time we allocate on this container, - * account 0 for SB, 1 for root obj. - */ - if (dfs->oid.lo == RESERVED_LO) - dfs->oid.hi = ROOT_HI + 1; - else - dfs->oid.hi = 0; + dfs->oid.hi = dfs->last_hi; + /** Increment so that dfs->last_hi is the last value */ + daos_obj_oid_cycle(&dfs->oid); } dfs->mounted = DFS_MOUNT; @@ -1023,7 +1023,7 @@ dfs_global2local(daos_handle_t poh, daos_handle_t coh, int flags, d_iov_t glob, /** allocate a new oid on the next file or dir creation */ dfs->oid.lo = 0; - dfs->oid.hi = MAX_OID_HI; + dfs->oid.hi = dfs->last_hi; rc = D_MUTEX_INIT(&dfs->lock, NULL); if (rc != 0) { diff --git a/src/include/daos_obj.h b/src/include/daos_obj.h index 316d1b5547b..52b15ade40b 100644 --- a/src/include/daos_obj.h +++ b/src/include/daos_obj.h @@ -1,5 +1,5 @@ /** - * (C) Copyright 2015-2023 Intel Corporation. + * (C) Copyright 2015-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -564,6 +564,20 @@ daos_obj_generate_oid(daos_handle_t coh, daos_obj_id_t *oid, enum daos_otype_t type, daos_oclass_id_t cid, daos_oclass_hints_t hints, uint32_t args); +/** + * This function, if called 2^32 times will set oid->hi to every unique 32-bit + * value. The caller is responsible for setting the initial value, tracking the + * final value, and avoiding any values that are otherwise reserved. + * + * \param[in, out] oid oid to cycle + */ +static inline void +daos_obj_oid_cycle(daos_obj_id_t *oid) +{ + /** Uses a large prime number to guarantee hitting every unique value */ + oid->hi = (oid->hi + 999999937) & UINT_MAX; +} + /** * Open an DAOS object. *