Skip to content

Commit

Permalink
DAOS-14824 dfuse: Allow parallel readdir calls. (#13606)
Browse files Browse the repository at this point in the history
Remove the inode lock preventing parallel readdir calls.
Update readdir caching to be thread safe.
Required-githooks: true

Signed-off-by: Ashley Pittman <[email protected]>
  • Loading branch information
ashleypittman authored Feb 13, 2024
1 parent 8a058b7 commit 77d3e3a
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 12 deletions.
20 changes: 17 additions & 3 deletions src/client/dfuse/dfuse.h
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,23 @@ struct dfuse_readdir_hdl {
* readers from sharing the handle
*/
bool drh_valid;

/* Locking:
* There can be multiple readers from the same handle concurrently, to do this use
* read/write locks.
* Initially a read lock is taken and the cache is checked, if anything is present then the
* contents are returned and the lock dropped.
* Then a write lock is taken and the cache is checked, if anything is present then the lock
* is dropped and a read lock is taken, goto above.
* If there is nothing in the cache when the write lock is taken then the cache is extended.
* "plus" calls however require inode references and these may not be held by cache entries,
* therefore track how many cache entries do not hold hash table references and for
* readdir_plus calls where there are cache entries without references then hold a write
* lock from the start.
*/
pthread_rwlock_t drh_lock;

uint32_t drh_no_ref_count;
};

/* Drop a readdir handle from a open directory handle.
Expand Down Expand Up @@ -887,9 +904,6 @@ struct dfuse_inode_entry {

struct dfuse_cont *ie_dfs;

/* Lock, used to protect readdir calls */
pthread_mutex_t ie_lock;

/** Hash table of inodes
* All valid inodes are kept in a hash table, using the hash table locking.
*/
Expand Down
3 changes: 0 additions & 3 deletions src/client/dfuse/dfuse_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -1161,7 +1161,6 @@ dfuse_ie_init(struct dfuse_info *dfuse_info, struct dfuse_inode_entry *ie)
atomic_init(&ie->ie_il_count, 0);
atomic_fetch_add_relaxed(&dfuse_info->di_inode_count, 1);
D_INIT_LIST_HEAD(&ie->ie_evict_entry);
D_MUTEX_INIT(&ie->ie_lock, NULL);
}

void
Expand Down Expand Up @@ -1198,8 +1197,6 @@ dfuse_ie_close(struct dfuse_info *dfuse_info, struct dfuse_inode_entry *ie)
d_hash_rec_decref(&dfp->dfp_cont_table, &dfc->dfs_entry);
}

D_MUTEX_DESTROY(&ie->ie_lock);

dfuse_ie_free(dfuse_info, ie);
}

Expand Down
3 changes: 3 additions & 0 deletions src/client/dfuse/dfuse_fuseops.c
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,9 @@ dfuse_fuse_init(void *arg, struct fuse_conn_info *conn)
DFUSE_TRA_INFO(dfuse_info, "max write %#x", conn->max_write);
DFUSE_TRA_INFO(dfuse_info, "readahead %#x", conn->max_readahead);

if (conn->capable & FUSE_CAP_PARALLEL_DIROPS)
conn->want |= FUSE_CAP_PARALLEL_DIROPS;

DFUSE_TRA_INFO(dfuse_info, "kernel readdir cache support compiled in");

conn->want |= FUSE_CAP_READDIRPLUS;
Expand Down
46 changes: 40 additions & 6 deletions src/client/dfuse/ops/readdir.c
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,9 @@ _handle_init(struct dfuse_cont *dfc)

D_INIT_LIST_HEAD(&hdl->drh_cache_list);
atomic_init(&hdl->drh_ref, 1);

D_RWLOCK_INIT(&hdl->drh_lock, NULL);

hdl->drh_valid = true;
return hdl;
}
Expand Down Expand Up @@ -332,6 +335,7 @@ dfuse_do_readdir(struct dfuse_info *dfuse_info, fuse_req_t req, struct dfuse_obj
bool to_seek = false;
struct dfuse_readdir_hdl *hdl;
size_t size = *out_size;
bool wlock = false;

rc = ensure_rd_handle(dfuse_info, oh);
if (rc != 0)
Expand All @@ -350,6 +354,22 @@ dfuse_do_readdir(struct dfuse_info *dfuse_info, fuse_req_t req, struct dfuse_obj
oh->doh_kreaddir_started = true;
}

D_RWLOCK_RDLOCK(&hdl->drh_lock);

/* If this thread is doing readdir plus and not all entries in the cache have hash table
* references then reading from the cache also needs a write handle. This reader will
* take hash table references however so with multiple readers here only the first one
* through should need this, even in the case where the cache was populated without refs.
*/
if (plus && hdl->drh_no_ref_count != 0) {
DFUSE_TRA_DEBUG(hdl, "Relocking with write lock");
D_RWLOCK_UNLOCK(&hdl->drh_lock);
D_RWLOCK_WRLOCK(&hdl->drh_lock);
wlock = true;
}

restart:

DFUSE_TRA_DEBUG(oh, "plus %d offset %#lx idx %d idx_offset %#lx", plus, offset,
hdl->drh_dre_index, hdl->drh_dre[hdl->drh_dre_index].dre_offset);

Expand Down Expand Up @@ -473,6 +493,7 @@ dfuse_do_readdir(struct dfuse_info *dfuse_info, fuse_req_t req, struct dfuse_obj
drc->drc_stbuf = entry.attr;
d_hash_rec_addref(&dfuse_info->dpi_iet, rlink);
drc->drc_rlink = rlink;
hdl->drh_no_ref_count--;
}

set_entry_params(&entry, ie);
Expand Down Expand Up @@ -512,6 +533,14 @@ dfuse_do_readdir(struct dfuse_info *dfuse_info, fuse_req_t req, struct dfuse_obj
}
}

if (hdl->drh_caching && !wlock) {
DFUSE_TRA_DEBUG(hdl, "Relocking with write lock");
D_RWLOCK_UNLOCK(&hdl->drh_lock);
D_RWLOCK_WRLOCK(&hdl->drh_lock);
wlock = true;
goto restart;
}

if (!to_seek) {
if (hdl->drh_dre_last_index == 0) {
if (offset != hdl->drh_dre[hdl->drh_dre_index].dre_offset &&
Expand All @@ -537,11 +566,16 @@ dfuse_do_readdir(struct dfuse_info *dfuse_info, fuse_req_t req, struct dfuse_obj
/* Drop if shared */
if (oh->doh_rd->drh_caching) {
DFUSE_TRA_DEBUG(oh, "Switching to private handle");
D_RWLOCK_UNLOCK(&hdl->drh_lock);

dfuse_dre_drop(dfuse_info, oh);
oh->doh_rd = _handle_init(oh->doh_ie->ie_dfs);
hdl = oh->doh_rd;
if (oh->doh_rd == NULL)
D_GOTO(out_reset, rc = ENOMEM);

D_RWLOCK_WRLOCK(&hdl->drh_lock);

DFUSE_TRA_UP(oh->doh_rd, oh, "readdir");
} else {
dfuse_readdir_reset(hdl);
Expand Down Expand Up @@ -710,6 +744,7 @@ dfuse_do_readdir(struct dfuse_info *dfuse_info, fuse_req_t req, struct dfuse_obj
if (drc) {
drc->drc_stbuf.st_mode = stbuf.st_mode;
drc->drc_stbuf.st_ino = stbuf.st_ino;
hdl->drh_no_ref_count++;
}
}
if (written > size - buff_offset) {
Expand Down Expand Up @@ -773,11 +808,15 @@ dfuse_do_readdir(struct dfuse_info *dfuse_info, fuse_req_t req, struct dfuse_obj
D_GOTO(out_reset, rc);

*out_size = buff_offset;
D_RWLOCK_UNLOCK(&hdl->drh_lock);

return 0;

out_reset:
if (hdl)
if (hdl) {
dfuse_readdir_reset(hdl);
D_RWLOCK_UNLOCK(&hdl->drh_lock);
}
D_ASSERT(rc != 0);
return rc;
}
Expand All @@ -789,8 +828,6 @@ dfuse_cb_readdir(fuse_req_t req, struct dfuse_obj_hdl *oh, size_t size, off_t of
char *reply_buff = NULL;
int rc = EIO;

D_MUTEX_LOCK(&oh->doh_ie->ie_lock);

/* Handle the EOD case, the kernel will keep reading until it receives zero replies so
* reply early in this case.
*/
Expand All @@ -817,9 +854,6 @@ dfuse_cb_readdir(fuse_req_t req, struct dfuse_obj_hdl *oh, size_t size, off_t of
rc = dfuse_do_readdir(dfuse_info, req, oh, reply_buff, &size, offset, plus);

out:

D_MUTEX_UNLOCK(&oh->doh_ie->ie_lock);

if (rc)
DFUSE_REPLY_ERR_RAW(oh, req, rc);
else
Expand Down

0 comments on commit 77d3e3a

Please sign in to comment.