Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

patches series for dfuse #15645

Merged
merged 33 commits into from
Dec 29, 2024
Merged
Changes from 1 commit
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
1518ea8
DAOS-15682 dfuse: Perform reads in larger chunks. (#14212)
ashleypittman Sep 26, 2024
a34691b
DAOS-16729 dfuse: Remove deprecated single-threaded option. (#15345)
ashleypittman Nov 4, 2024
ded7560
DAOS-16736 dfuse: Add a common struct for active IE data. (#15362)
ashleypittman Nov 8, 2024
6abf25a
DAOS-16686 dfuse: Move pre-read code to inode from file handle. (#15488)
ashleypittman Nov 15, 2024
e644113
DAOS-16686 dfuse: Fix overlapping chunk reads
wangdi1 Dec 18, 2024
fd27521
DAOS-16686 dfuse: Detect matching reads to avoid network access
wangdi1 Dec 18, 2024
ae9b786
DAOS-15626 dfuse: Improve linear-read detection code
wangdi1 Dec 18, 2024
fbe5483
DAOS-16686 dfuse: avoid duplicate RPC between readahead and read
wangdi1 Dec 18, 2024
58a2513
DAOS-16686 dfuse: set time for new created entry
wangdi1 Dec 18, 2024
cc1b504
DAOS-16686 dfuse: optimize open
wangdi1 Dec 18, 2024
866f078
DAOS-16686 dfuse: use chan patch to avoid some contention
wangdi1 Dec 18, 2024
a331daa
DAOS-16686 dfuse: force readdir plus for all cases temporarily
wangdi1 Dec 18, 2024
3309425
DAOS-16686 dfuse: read from cache for readahead
wangdi1 Dec 18, 2024
a101c88
DAOS-16686 dfuse: fix style
wangdi1 Dec 18, 2024
463edf3
DAOS-16686 dfuse: fix style
wangdi1 Dec 18, 2024
fcdc58c
Merge remote-tracking branch 'refs/remotes/origin/wangdi/google_26_df…
wangdi1 Dec 18, 2024
683d317
DAOS-16686 dfuse: revert chan patch
wangdi1 Dec 18, 2024
687c74b
Run-GHA: true
wangdi1 Dec 20, 2024
ede7e64
DAOS-16686 dfuse: various fixes
wangdi1 Dec 21, 2024
40755d7
DAOS-16686 dfuse: various fixes
wangdi1 Dec 21, 2024
6f0eba8
Merge remote-tracking branch 'refs/remotes/origin/wangdi/google_26_df…
wangdi1 Dec 22, 2024
5a9b03b
DAOS-16686 dfuse: a few fixes for cleanup
wangdi1 Dec 23, 2024
8840964
DAOS-16686 dfuse: fix style
wangdi1 Dec 23, 2024
81f5ff4
DAOS-16686 dfuse: fix hang for chunk read
wangdi1 Dec 24, 2024
9d30f9f
DAOS-16686 dfuse: fix style
wangdi1 Dec 25, 2024
67e1e6c
DAOS-16686 dfuse: fix the typo in utils test
wangdi1 Dec 28, 2024
77643c9
DAOS-16686 dfuse: fix the typo in utils test
wangdi1 Dec 28, 2024
26f3696
Merge remote-tracking branch 'refs/remotes/origin/wangdi/google_26_df…
wangdi1 Dec 28, 2024
9d681a4
DAOS-16686 dfuse: fix the memory leak
wangdi1 Dec 28, 2024
651ba04
DAOS-16686 dfuse: fix style
wangdi1 Dec 28, 2024
c01a526
Merge branch 'wangdi/google_26' into wangdi/google_26_dfuse
wangdi1 Dec 28, 2024
8af7463
Merge branch 'wangdi/google_26' into wangdi/google_26_dfuse
wangdi1 Dec 28, 2024
c171530
Merge remote-tracking branch 'refs/remotes/origin/wangdi/google_26_df…
wangdi1 Dec 29, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
DAOS-16736 dfuse: Add a common struct for active IE data. (#15362)
Create a active_inode struct and allocate it for all inodes which have more than
one open handle. This allows us to share state/caching data across open handles
easier and to better support concurrent readers. Future work here will improve
performance for concurrent readers when caching is used, and allow us to make
the in-memory inode struct smaller which will save memory.

Signed-off-by: Ashley Pittman [email protected]
  • Loading branch information
ashleypittman authored and wangdi1 committed Dec 18, 2024

Verified

This commit was signed with the committer’s verified signature.
Loic-Dumas Loïc Dumas
commit ded7560de9fa7844592d0cf85e77617828a1029f
1 change: 1 addition & 0 deletions src/client/dfuse/SConscript
Original file line number Diff line number Diff line change
@@ -7,6 +7,7 @@ DFUSE_SRC = ['dfuse_core.c',
'dfuse_main.c',
'dfuse_fuseops.c',
'inval.c',
'file.c',
'dfuse_cont.c',
'dfuse_thread.c',
'dfuse_pool.c']
19 changes: 18 additions & 1 deletion src/client/dfuse/dfuse.h
Original file line number Diff line number Diff line change
@@ -1009,12 +1009,29 @@ struct dfuse_inode_entry {
*/
ATOMIC bool ie_linear_read;

struct active_inode *ie_active;

/* Entry on the evict list */
d_list_t ie_evict_entry;
};

struct read_chunk_core *ie_chunk;
struct active_inode {
d_list_t chunks;
pthread_spinlock_t lock;
};

/* Increase active count on inode. This takes a reference and allocates ie->active as required */
int
active_ie_init(struct dfuse_inode_entry *ie);

/* Mark a oh as closing and drop the ref on inode active */
bool
active_oh_decref(struct dfuse_obj_hdl *oh);

/* Decrease active count on inode, called on error where there is no oh */
void
active_ie_decref(struct dfuse_inode_entry *ie);

/* Flush write-back cache writes to a inode. It does this by waiting for and then releasing an
* exclusive lock on the inode. Writes take a shared lock so this will block until all pending
* writes are complete.
1 change: 1 addition & 0 deletions src/client/dfuse/dfuse_core.c
Original file line number Diff line number Diff line change
@@ -1274,6 +1274,7 @@ dfuse_ie_close(struct dfuse_info *dfuse_info, struct dfuse_inode_entry *ie)
atomic_load_relaxed(&ie->ie_il_count));
D_ASSERTF(atomic_load_relaxed(&ie->ie_open_count) == 0, "open_count is %d",
atomic_load_relaxed(&ie->ie_open_count));
D_ASSERT(!ie->ie_active);

if (ie->ie_obj) {
rc = dfs_release(ie->ie_obj);
94 changes: 94 additions & 0 deletions src/client/dfuse/file.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
/**
* (C) Copyright 2024 Intel Corporation.
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/

#include "dfuse_common.h"
#include "dfuse.h"

/* A lock is needed here, not for ie_open_count which is updated atomcially here and elsewhere
* but to ensure that ie_active is also atomically updated with the reference count.
*/
static pthread_mutex_t alock = PTHREAD_MUTEX_INITIALIZER;

/* Perhaps combine with dfuse_open_handle_init? */
int
active_ie_init(struct dfuse_inode_entry *ie)
{
uint32_t oc;
int rc = -DER_SUCCESS;

D_MUTEX_LOCK(&alock);

oc = atomic_fetch_add_relaxed(&ie->ie_open_count, 1);

DFUSE_TRA_DEBUG(ie, "Addref to %d", oc + 1);

if (oc != 0)
goto out;

D_ALLOC_PTR(ie->ie_active);
if (!ie->ie_active)
D_GOTO(out, rc = -DER_NOMEM);

rc = D_SPIN_INIT(&ie->ie_active->lock, 0);
if (rc != -DER_SUCCESS) {
D_FREE(ie->ie_active);
goto out;
}
D_INIT_LIST_HEAD(&ie->ie_active->chunks);
out:
D_MUTEX_UNLOCK(&alock);
return rc;
}

static void
ah_free(struct dfuse_inode_entry *ie)
{
D_SPIN_DESTROY(&ie->ie_active->lock);
D_FREE(ie->ie_active);
}

bool
active_oh_decref(struct dfuse_obj_hdl *oh)
{
uint32_t oc;
bool rcb = true;

D_MUTEX_LOCK(&alock);

oc = atomic_fetch_sub_relaxed(&oh->doh_ie->ie_open_count, 1);
D_ASSERTF(oc >= 1, "Invalid decref from %d on %p %p", oc, oh, oh->doh_ie);

DFUSE_TRA_DEBUG(oh->doh_ie, "Decref to %d", oc - 1);

if (oc != 1)
goto out;

rcb = read_chunk_close(oh->doh_ie);

ah_free(oh->doh_ie);
out:
D_MUTEX_UNLOCK(&alock);
return rcb;
}

void
active_ie_decref(struct dfuse_inode_entry *ie)
{
uint32_t oc;
D_MUTEX_LOCK(&alock);

oc = atomic_fetch_sub_relaxed(&ie->ie_open_count, 1);
D_ASSERTF(oc >= 1, "Invalid decref from %d on %p", oc, ie);

DFUSE_TRA_DEBUG(ie, "Decref to %d", oc - 1);

if (oc != 1)
goto out;

ah_free(ie);
out:
D_MUTEX_UNLOCK(&alock);
}
12 changes: 8 additions & 4 deletions src/client/dfuse/ops/create.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* (C) Copyright 2016-2023 Intel Corporation.
* (C) Copyright 2016-2024 Intel Corporation.
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
@@ -190,7 +190,7 @@ dfuse_cb_create(fuse_req_t req, struct dfuse_inode_entry *parent, const char *na
/** duplicate the file handle for the fuse handle */
rc = dfs_dup(dfs->dfs_ns, oh->doh_obj, O_RDWR, &ie->ie_obj);
if (rc)
D_GOTO(release, rc);
D_GOTO(drop_ie, rc);

oh->doh_writeable = true;

@@ -217,14 +217,18 @@ dfuse_cb_create(fuse_req_t req, struct dfuse_inode_entry *parent, const char *na

dfuse_compute_inode(dfs, &ie->ie_oid, &ie->ie_stat.st_ino);

atomic_fetch_add_relaxed(&ie->ie_open_count, 1);
rc = active_ie_init(ie);
if (rc != -DER_SUCCESS)
goto drop_oh;

/* Return the new inode data, and keep the parent ref */
dfuse_reply_entry(dfuse_info, ie, &fi_out, true, req);

return;
release:
drop_oh:
dfs_release(oh->doh_obj);
drop_ie:
dfs_release(ie->ie_obj);
err:
DFUSE_REPLY_ERR_RAW(parent, req, rc);
dfuse_oh_free(dfuse_info, oh);
12 changes: 12 additions & 0 deletions src/client/dfuse/ops/lookup.c
Original file line number Diff line number Diff line change
@@ -88,6 +88,18 @@ dfuse_reply_entry(struct dfuse_info *dfuse_info, struct dfuse_inode_entry *ie,
D_GOTO(out_err, rc = EIO);
}

/* Make the inode active for the create case */
if (ie->ie_active) {
D_ASSERT(atomic_load_relaxed(&ie->ie_open_count) == 1);
active_ie_decref(ie);
rc = active_ie_init(inode);
if (rc != -DER_SUCCESS) {
atomic_fetch_sub_relaxed(&ie->ie_ref, 1);
dfuse_ie_close(dfuse_info, ie);
D_GOTO(out_err, rc);
}
}

DFUSE_TRA_DEBUG(inode,
"Maybe updating parent inode %#lx dfs_ino %#lx",
entry.ino, ie->ie_dfs->dfs_ino);
25 changes: 16 additions & 9 deletions src/client/dfuse/ops/open.c
Original file line number Diff line number Diff line change
@@ -93,19 +93,21 @@ dfuse_cb_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)

fi_out.fh = (uint64_t)oh;

rc = active_ie_init(ie);
if (rc)
goto err;

/*
* dfs_dup() just locally duplicates the file handle. If we have
* O_TRUNC flag, we need to truncate the file manually.
*/
if (fi->flags & O_TRUNC) {
rc = dfs_punch(ie->ie_dfs->dfs_ns, ie->ie_obj, 0, DFS_MAX_FSIZE);
if (rc)
D_GOTO(err, rc);
D_GOTO(decref, rc);
dfuse_dcache_evict(oh->doh_ie);
}

atomic_fetch_add_relaxed(&ie->ie_open_count, 1);

/* Enable this for files up to the max read size. */
if (prefetch && oh->doh_parent_dir &&
atomic_load_relaxed(&oh->doh_parent_dir->ie_linear_read) && ie->ie_stat.st_size > 0 &&
@@ -127,25 +129,32 @@ dfuse_cb_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
dfuse_pre_read(dfuse_info, oh);

return;
decref:
active_ie_decref(ie);
err:
dfuse_oh_free(dfuse_info, oh);
DFUSE_REPLY_ERR_RAW(ie, req, rc);
}

/* Release a file handle, called after close() by an application.
*
* Can be invoked concurrently on the same inode.
*/
void
dfuse_cb_release(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
{
struct dfuse_info *dfuse_info = fuse_req_userdata(req);
struct dfuse_obj_hdl *oh = (struct dfuse_obj_hdl *)fi->fh;
struct dfuse_inode_entry *ie = NULL;
int rc;
uint32_t oc;
uint32_t il_calls;

/* Perform the opposite of what the ioctl call does, always change the open handle count
* but the inode only tracks number of open handles with non-zero ioctl counts
*/

D_ASSERT(oh->doh_ie->ie_active);

DFUSE_TRA_DEBUG(oh, "Closing %d", oh->doh_caching);

DFUSE_IE_WFLUSH(oh->doh_ie);
@@ -207,17 +216,15 @@ dfuse_cb_release(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
if (il_calls != 0) {
atomic_fetch_sub_relaxed(&oh->doh_ie->ie_il_count, 1);
}
oc = atomic_fetch_sub_relaxed(&oh->doh_ie->ie_open_count, 1);
if (oc == 1) {
if (read_chunk_close(oh->doh_ie))
oh->doh_linear_read = true;
}

if (oh->doh_evict_on_close) {
ie = oh->doh_ie;
atomic_fetch_add_relaxed(&ie->ie_ref, 1);
}

if (active_oh_decref(oh))
oh->doh_linear_read = true;

rc = dfs_release(oh->doh_obj);
if (rc == 0) {
DFUSE_REPLY_ZERO_OH(oh, req);
14 changes: 9 additions & 5 deletions src/client/dfuse/ops/opendir.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* (C) Copyright 2016-2023 Intel Corporation.
* (C) Copyright 2016-2024 Intel Corporation.
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
@@ -19,6 +19,10 @@ dfuse_cb_opendir(fuse_req_t req, struct dfuse_inode_entry *ie, struct fuse_file_
if (!oh)
D_GOTO(err, rc = ENOMEM);

rc = active_ie_init(ie);
if (rc != -DER_SUCCESS)
D_GOTO(free, rc = daos_der2errno(rc));

DFUSE_TRA_UP(oh, ie, "open handle");

dfuse_open_handle_init(dfuse_info, oh, ie);
@@ -35,12 +39,11 @@ dfuse_cb_opendir(fuse_req_t req, struct dfuse_inode_entry *ie, struct fuse_file_
fi_out.keep_cache = 1;
}

atomic_fetch_add_relaxed(&ie->ie_open_count, 1);

DFUSE_REPLY_OPEN_DIR(oh, req, &fi_out);
return;
err:
free:
D_FREE(oh);
err:
DFUSE_REPLY_ERR_RAW(ie, req, rc);
}

@@ -57,7 +60,8 @@ dfuse_cb_releasedir(fuse_req_t req, struct dfuse_inode_entry *ino, struct fuse_f

if (atomic_load_relaxed(&oh->doh_il_calls) != 0)
atomic_fetch_sub_relaxed(&oh->doh_ie->ie_il_count, 1);
atomic_fetch_sub_relaxed(&oh->doh_ie->ie_open_count, 1);

active_oh_decref(oh);

DFUSE_TRA_DEBUG(oh, "Kernel cache flags invalid %d started %d finished %d",
oh->doh_kreaddir_invalid, oh->doh_kreaddir_started,
Loading