Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Split dmu_zfetch() speculation and execution parts. #11652

Merged
merged 3 commits into from
Mar 20, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 16 additions & 7 deletions include/sys/dmu_zfetch.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,28 +49,37 @@ typedef struct zfetch {

typedef struct zstream {
uint64_t zs_blkid; /* expect next access at this blkid */
uint64_t zs_pf_blkid; /* next block to prefetch */
uint64_t zs_pf_blkid1; /* first block to prefetch */
uint64_t zs_pf_blkid; /* block to prefetch up to */
Comment on lines +52 to +53
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not a fan of the blkid1->blkid naming convention for a range; how about something like zs_pf_first_blkid and zs_pf_last_blkid ?


/*
* We will next prefetch the L1 indirect block of this level-0
* block id.
*/
uint64_t zs_ipf_blkid;
uint64_t zs_ipf_blkid1; /* first block to prefetch */
uint64_t zs_ipf_blkid; /* block to prefetch up to */

kmutex_t zs_lock; /* protects stream */
hrtime_t zs_atime; /* time last prefetch issued */
hrtime_t zs_start_time; /* start of last prefetch */
list_node_t zs_node; /* link for zf_stream */
hrtime_t zs_atime; /* time last prefetch issued */
zfetch_t *zs_fetch; /* parent fetch */
zfs_refcount_t zs_blocks; /* number of pending blocks in the stream */
boolean_t zs_missed; /* stream saw cache misses */
zfs_refcount_t zs_callers; /* number of pending callers */
/*
* Number of stream references: dnode, callers and pending blocks.
* The stream memory is freed when the number returns to zero.
*/
zfs_refcount_t zs_refs;
} zstream_t;

void zfetch_init(void);
void zfetch_fini(void);

void dmu_zfetch_init(zfetch_t *, struct dnode *);
void dmu_zfetch_fini(zfetch_t *);
void dmu_zfetch(zfetch_t *, uint64_t, uint64_t, boolean_t,
zstream_t *dmu_zfetch_prepare(zfetch_t *, uint64_t, uint64_t, boolean_t,
boolean_t);
void dmu_zfetch_run(zstream_t *, boolean_t, boolean_t);
void dmu_zfetch(zfetch_t *, uint64_t, uint64_t, boolean_t, boolean_t,
boolean_t);


Expand Down
5 changes: 3 additions & 2 deletions module/zfs/dbuf.c
Original file line number Diff line number Diff line change
Expand Up @@ -1640,7 +1640,7 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
mutex_exit(&db->db_mtx);
if (err == 0 && prefetch) {
dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE,
flags & DB_RF_HAVESTRUCT);
B_FALSE, flags & DB_RF_HAVESTRUCT);
}
DB_DNODE_EXIT(db);
DBUF_STAT_BUMP(hash_hits);
Expand All @@ -1662,6 +1662,7 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
*/
if (!err && prefetch) {
dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE,
db->db_state != DB_CACHED,
flags & DB_RF_HAVESTRUCT);
}

Expand Down Expand Up @@ -1691,7 +1692,7 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
mutex_exit(&db->db_mtx);
if (prefetch) {
dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE,
flags & DB_RF_HAVESTRUCT);
B_TRUE, flags & DB_RF_HAVESTRUCT);
}
DB_DNODE_EXIT(db);
DBUF_STAT_BUMP(hash_misses);
Expand Down
35 changes: 28 additions & 7 deletions module/zfs/dmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -497,10 +497,12 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
boolean_t read, void *tag, int *numbufsp, dmu_buf_t ***dbpp, uint32_t flags)
{
dmu_buf_t **dbp;
zstream_t *zs = NULL;
uint64_t blkid, nblks, i;
uint32_t dbuf_flags;
int err;
zio_t *zio = NULL;
boolean_t missed = B_FALSE;

ASSERT(length <= DMU_MAX_ACCESS);

Expand Down Expand Up @@ -536,30 +538,49 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
zio = zio_root(dn->dn_objset->os_spa, NULL, NULL,
ZIO_FLAG_CANFAIL);
blkid = dbuf_whichblock(dn, 0, offset);
if ((flags & DMU_READ_NO_PREFETCH) == 0 &&
DNODE_META_IS_CACHEABLE(dn) && length <= zfetch_array_rd_sz) {
/*
* Prepare the zfetch before initiating the demand reads, so
* that if multiple threads block on same indirect block, we
* base predictions on the original less racy request order.
*/
zs = dmu_zfetch_prepare(&dn->dn_zfetch, blkid, nblks,
read && DNODE_IS_CACHEABLE(dn), B_TRUE);
ahrens marked this conversation as resolved.
Show resolved Hide resolved
}
for (i = 0; i < nblks; i++) {
dmu_buf_impl_t *db = dbuf_hold(dn, blkid + i, tag);
if (db == NULL) {
if (zs)
dmu_zfetch_run(zs, missed, B_TRUE);
ahrens marked this conversation as resolved.
Show resolved Hide resolved
rw_exit(&dn->dn_struct_rwlock);
dmu_buf_rele_array(dbp, nblks, tag);
if (read)
zio_nowait(zio);
return (SET_ERROR(EIO));
}

/* initiate async i/o */
if (read)
/*
* Initiate async demand data read.
* We check the db_state after calling dbuf_read() because
* (1) dbuf_read() may change the state to CACHED due to a
* hit in the ARC, and (2) on a cache miss, a child will
* have been added to "zio" but not yet completed, so the
* state will not yet be CACHED.
*/
if (read) {
(void) dbuf_read(db, zio, dbuf_flags);
if (db->db_state != DB_CACHED)
missed = B_TRUE;
ahrens marked this conversation as resolved.
Show resolved Hide resolved
}
dbp[i] = &db->db;
}

if (!read)
zfs_racct_write(length, nblks);

if ((flags & DMU_READ_NO_PREFETCH) == 0 &&
DNODE_META_IS_CACHEABLE(dn) && length <= zfetch_array_rd_sz) {
dmu_zfetch(&dn->dn_zfetch, blkid, nblks,
read && DNODE_IS_CACHEABLE(dn), B_TRUE);
}
if (zs)
dmu_zfetch_run(zs, missed, B_TRUE);
rw_exit(&dn->dn_struct_rwlock);

if (read) {
Expand Down
Loading