Skip to content

Commit

Permalink
[abd] remove dependency on linear abd
Browse files Browse the repository at this point in the history
- vdev_raidz
- zio, zio_checksum
- zfs_fm
- change abd_alloc_for_io() to use abd_alloc()

Signed-off-by: Gvozden Neskovic <[email protected]>
  • Loading branch information
ironMann committed Sep 7, 2016
1 parent 9939ac0 commit e98a238
Show file tree
Hide file tree
Showing 8 changed files with 143 additions and 118 deletions.
9 changes: 7 additions & 2 deletions cmd/zdb/zdb.c
Original file line number Diff line number Diff line change
Expand Up @@ -3362,6 +3362,7 @@ zdb_read_block(char *thing, spa_t *spa)
void *lbuf, *buf;
char *s, *p, *dup, *vdev, *flagstr;
int i, error;
boolean_t borrowed = B_FALSE;

dup = strdup(thing);
s = strtok(dup, ":");
Expand Down Expand Up @@ -3432,7 +3433,7 @@ zdb_read_block(char *thing, spa_t *spa)
psize = size;
lsize = size;

pabd = abd_alloc_linear(SPA_MAXBLOCKSIZE, B_FALSE);
pabd = abd_alloc_for_io(SPA_MAXBLOCKSIZE, B_FALSE);
lbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);

BP_ZERO(bp);
Expand Down Expand Up @@ -3533,8 +3534,9 @@ zdb_read_block(char *thing, spa_t *spa)
buf = lbuf;
size = lsize;
} else {
buf = abd_to_buf(pabd);
size = psize;
buf = abd_borrow_buf_copy(pabd, size);
borrowed = B_TRUE;
}

if (flags & ZDB_FLAG_PRINT_BLKPTR)
Expand All @@ -3550,6 +3552,9 @@ zdb_read_block(char *thing, spa_t *spa)
else
zdb_dump_block(thing, buf, size, flags);

if (borrowed)
abd_return_buf_copy(pabd, buf, size);

out:
abd_free(pabd);
umem_free(lbuf, SPA_MAXBLOCKSIZE);
Expand Down
6 changes: 3 additions & 3 deletions include/sys/zio.h
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ typedef struct zio_prop {
typedef struct zio_cksum_report zio_cksum_report_t;

typedef void zio_cksum_finish_f(zio_cksum_report_t *rep,
const void *good_data);
const abd_t *good_data);
typedef void zio_cksum_free_f(void *cbdata, size_t size);

struct zio_bad_cksum; /* defined in zio_checksum.h */
Expand Down Expand Up @@ -573,14 +573,14 @@ extern hrtime_t zio_handle_io_delay(zio_t *zio);
extern void zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, struct zio *zio,
uint64_t offset, uint64_t length, void *arg, struct zio_bad_cksum *info);
extern void zfs_ereport_finish_checksum(zio_cksum_report_t *report,
const void *good_data, const void *bad_data, boolean_t drop_if_identical);
const abd_t *good_data, const abd_t *bad_data, boolean_t drop_if_identical);

extern void zfs_ereport_free_checksum(zio_cksum_report_t *report);

/* If we have the good data in hand, this function can be used */
extern void zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd,
struct zio *zio, uint64_t offset, uint64_t length,
const void *good_data, const void *bad_data, struct zio_bad_cksum *info);
const abd_t *good_data, const abd_t *bad_data, struct zio_bad_cksum *info);

/* Called from spa_sync(), but primarily an injection handler */
extern void spa_handle_ignored_writes(spa_t *spa);
Expand Down
2 changes: 1 addition & 1 deletion include/sys/zio_checksum.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ extern int zio_checksum_equal(spa_t *, blkptr_t *, enum zio_checksum,
void *, uint64_t, uint64_t, zio_bad_cksum_t *);
extern void zio_checksum_compute(zio_t *, enum zio_checksum,
struct abd *, uint64_t);
extern int zio_checksum_error_impl(spa_t *, blkptr_t *, enum zio_checksum,
extern int zio_checksum_error_impl(spa_t *, const blkptr_t *, enum zio_checksum,
struct abd *, uint64_t, uint64_t, zio_bad_cksum_t *);
extern int zio_checksum_error(zio_t *zio, zio_bad_cksum_t *out);
extern enum zio_checksum spa_dedup_checksum(spa_t *spa);
Expand Down
6 changes: 1 addition & 5 deletions module/zfs/abd.c
Original file line number Diff line number Diff line change
Expand Up @@ -457,15 +457,11 @@ abd_alloc_sametype(abd_t *sabd, size_t size)
* Currently this is linear ABDs, however if ldi_strategy() can ever issue I/Os
* using a scatter/gather list we should switch to that and replace this call
* with vanilla abd_alloc().
*
* LINUX ABD TODO - once vdev_disk.c has ABD page support change to vanilla
* - TODO vdev_disk.c now has ABD page support, but some disk label checksumming
* code still assumes linear ABD.
*/
abd_t *
abd_alloc_for_io(size_t size, boolean_t is_metadata)
{
return (abd_alloc_linear(size, is_metadata));
return (abd_alloc(size, is_metadata));
}

abd_t *
Expand Down
85 changes: 38 additions & 47 deletions module/zfs/vdev_raidz.c
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,7 @@ vdev_raidz_map_free(raidz_map_t *rm)
abd_free(rm->rm_col[c].rc_abd);

if (rm->rm_col[c].rc_gdata != NULL)
zio_buf_free(rm->rm_col[c].rc_gdata,
rm->rm_col[c].rc_size);
abd_free(rm->rm_col[c].rc_gdata);
}

size = 0;
Expand Down Expand Up @@ -158,14 +157,14 @@ vdev_raidz_cksum_free(void *arg, size_t ignored)
}

static void
vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const void *good_data)
vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const abd_t *good_data)
{
raidz_map_t *rm = zcr->zcr_cbdata;
size_t c = zcr->zcr_cbinfo;
size_t x;
const size_t c = zcr->zcr_cbinfo;
size_t x, offset;

const char *good = NULL;
char *bad;
const abd_t *good = NULL;
const abd_t *bad = rm->rm_col[c].rc_abd;

if (good_data == NULL) {
zfs_ereport_finish_checksum(zcr, NULL, NULL, B_FALSE);
Expand All @@ -180,8 +179,6 @@ vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const void *good_data)
*/
if (rm->rm_col[0].rc_gdata == NULL) {
abd_t *bad_parity[VDEV_RAIDZ_MAXPARITY];
char *buf;
int offset;

/*
* Set up the rm_col[]s to generate the parity for
Expand All @@ -190,20 +187,21 @@ vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const void *good_data)
*/
for (x = 0; x < rm->rm_firstdatacol; x++) {
bad_parity[x] = rm->rm_col[x].rc_abd;
rm->rm_col[x].rc_gdata =
zio_buf_alloc(rm->rm_col[x].rc_size);
rm->rm_col[x].rc_abd =
abd_get_from_buf(rm->rm_col[x].rc_gdata,
rm->rm_col[x].rc_size);
rm->rm_col[x].rc_gdata =
abd_alloc_for_io(rm->rm_col[x].rc_size,
B_FALSE);
}

/* fill in the data columns from good_data */
buf = (char *)good_data;
offset = 0;
for (; x < rm->rm_cols; x++) {
abd_put(rm->rm_col[x].rc_abd);
rm->rm_col[x].rc_abd = abd_get_from_buf(buf,
rm->rm_col[x].rc_size);
buf += rm->rm_col[x].rc_size;

rm->rm_col[x].rc_abd =
abd_get_offset_size((abd_t *)good_data,
offset, rm->rm_col[x].rc_size);
offset += rm->rm_col[x].rc_size;
}

/*
Expand All @@ -212,10 +210,8 @@ vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const void *good_data)
vdev_raidz_generate_parity(rm);

/* restore everything back to its original state */
for (x = 0; x < rm->rm_firstdatacol; x++) {
abd_put(rm->rm_col[x].rc_abd);
for (x = 0; x < rm->rm_firstdatacol; x++)
rm->rm_col[x].rc_abd = bad_parity[x];
}

offset = 0;
for (x = rm->rm_firstdatacol; x < rm->rm_cols; x++) {
Expand All @@ -228,19 +224,19 @@ vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const void *good_data)
}

ASSERT3P(rm->rm_col[c].rc_gdata, !=, NULL);
good = rm->rm_col[c].rc_gdata;
good = abd_get_offset(rm->rm_col[c].rc_gdata, 0);
} else {
/* adjust good_data to point at the start of our column */
good = good_data;

offset = 0;
for (x = rm->rm_firstdatacol; x < c; x++)
good += rm->rm_col[x].rc_size;
offset += rm->rm_col[x].rc_size;

good = abd_get_offset((abd_t *)good_data, offset);
}

bad = abd_borrow_buf_copy(rm->rm_col[c].rc_abd, rm->rm_col[c].rc_size);
/* we drop the ereport if it ends up that the data was good */
zfs_ereport_finish_checksum(zcr, good, bad, B_TRUE);
abd_return_buf(rm->rm_col[c].rc_abd, bad, rm->rm_col[c].rc_size);
abd_put((abd_t *)good);
}

/*
Expand Down Expand Up @@ -283,15 +279,15 @@ vdev_raidz_cksum_report(zio_t *zio, zio_cksum_report_t *zcr, void *arg)
for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++)
size += rm->rm_col[c].rc_size;

rm->rm_abd_copy =
abd_alloc_sametype(rm->rm_col[rm->rm_firstdatacol].rc_abd, size);
rm->rm_abd_copy = abd_alloc_for_io(size, B_FALSE);

for (offset = 0, c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
raidz_col_t *col = &rm->rm_col[c];
abd_t *tmp = abd_get_offset_size(rm->rm_abd_copy, offset,
col->rc_size);

abd_copy(tmp, col->rc_abd, col->rc_size);

abd_put(col->rc_abd);
col->rc_abd = tmp;

Expand Down Expand Up @@ -712,9 +708,8 @@ vdev_raidz_io_start(zio_t *zio)
* Report a checksum error for a child of a RAID-Z device.
*/
static void
raidz_checksum_error(zio_t *zio, raidz_col_t *rc, void *bad_data)
raidz_checksum_error(zio_t *zio, raidz_col_t *rc, abd_t *bad_data)
{
void *buf;
vdev_t *vd = zio->io_vd->vdev_child[rc->rc_devidx];

if (!(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
Expand All @@ -728,11 +723,9 @@ raidz_checksum_error(zio_t *zio, raidz_col_t *rc, void *bad_data)
zbc.zbc_has_cksum = 0;
zbc.zbc_injected = rm->rm_ecksuminjected;

buf = abd_borrow_buf_copy(rc->rc_abd, rc->rc_size);
zfs_ereport_post_checksum(zio->io_spa, vd, zio,
rc->rc_offset, rc->rc_size, buf, bad_data,
rc->rc_offset, rc->rc_size, rc->rc_abd, bad_data,
&zbc);
abd_return_buf(rc->rc_abd, buf, rc->rc_size);
}
}

Expand Down Expand Up @@ -765,16 +758,17 @@ raidz_checksum_verify(zio_t *zio)
static int
raidz_parity_verify(zio_t *zio, raidz_map_t *rm)
{
void *orig[VDEV_RAIDZ_MAXPARITY];
abd_t *orig[VDEV_RAIDZ_MAXPARITY];
int c, ret = 0;
raidz_col_t *rc;

for (c = 0; c < rm->rm_firstdatacol; c++) {
rc = &rm->rm_col[c];
if (!rc->rc_tried || rc->rc_error != 0)
continue;
orig[c] = zio_buf_alloc(rc->rc_size);
abd_copy_to_buf(orig[c], rc->rc_abd, rc->rc_size);

orig[c] = abd_alloc_sametype(rc->rc_abd, rc->rc_size);
abd_copy(orig[c], rc->rc_abd, rc->rc_size);
}

vdev_raidz_generate_parity(rm);
Expand All @@ -783,12 +777,12 @@ raidz_parity_verify(zio_t *zio, raidz_map_t *rm)
rc = &rm->rm_col[c];
if (!rc->rc_tried || rc->rc_error != 0)
continue;
if (bcmp(orig[c], abd_to_buf(rc->rc_abd), rc->rc_size) != 0) {
if (abd_cmp(orig[c], rc->rc_abd) != 0) {
raidz_checksum_error(zio, rc, orig[c]);
rc->rc_error = SET_ERROR(ECKSUM);
ret++;
}
zio_buf_free(orig[c], rc->rc_size);
abd_free(orig[c]);
}

return (ret);
Expand Down Expand Up @@ -818,7 +812,7 @@ vdev_raidz_combrec(zio_t *zio, int total_errors, int data_errors)
{
raidz_map_t *rm = zio->io_vsd;
raidz_col_t *rc;
void *orig[VDEV_RAIDZ_MAXPARITY];
abd_t *orig[VDEV_RAIDZ_MAXPARITY] = { NULL };
int tstore[VDEV_RAIDZ_MAXPARITY + 2];
int *tgts = &tstore[1];
int curr, next, i, c, n;
Expand Down Expand Up @@ -867,7 +861,7 @@ vdev_raidz_combrec(zio_t *zio, int total_errors, int data_errors)
ASSERT(orig[i] != NULL);
}

orig[n - 1] = zio_buf_alloc(rm->rm_col[0].rc_size);
orig[n - 1] = abd_alloc_for_io(rm->rm_col[0].rc_size, B_FALSE);

curr = 0;
next = tgts[curr];
Expand All @@ -886,8 +880,7 @@ vdev_raidz_combrec(zio_t *zio, int total_errors, int data_errors)
ASSERT3S(c, >=, 0);
ASSERT3S(c, <, rm->rm_cols);
rc = &rm->rm_col[c];
abd_copy_to_buf(orig[i], rc->rc_abd,
rc->rc_size);
abd_copy(orig[i], rc->rc_abd, rc->rc_size);
}

/*
Expand Down Expand Up @@ -917,8 +910,7 @@ vdev_raidz_combrec(zio_t *zio, int total_errors, int data_errors)
for (i = 0; i < n; i++) {
c = tgts[i];
rc = &rm->rm_col[c];
abd_copy_from_buf(rc->rc_abd, orig[i],
rc->rc_size);
abd_copy(rc->rc_abd, orig[i], rc->rc_size);
}

do {
Expand Down Expand Up @@ -955,9 +947,8 @@ vdev_raidz_combrec(zio_t *zio, int total_errors, int data_errors)
}
n--;
done:
for (i = 0; i < n; i++) {
zio_buf_free(orig[i], rm->rm_col[0].rc_size);
}
for (i = 0; i < n; i++)
abd_free(orig[i]);

return (ret);
}
Expand Down
23 changes: 16 additions & 7 deletions module/zfs/zfs_fm.c
Original file line number Diff line number Diff line change
Expand Up @@ -561,11 +561,11 @@ zei_range_total_size(zfs_ecksum_info_t *eip)

static zfs_ecksum_info_t *
annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info,
const uint8_t *goodbuf, const uint8_t *badbuf, size_t size,
const abd_t *goodabd, const abd_t *badabd, size_t size,
boolean_t drop_if_identical)
{
const uint64_t *good = (const uint64_t *)goodbuf;
const uint64_t *bad = (const uint64_t *)badbuf;
const uint64_t *good;
const uint64_t *bad;

uint64_t allset = 0;
uint64_t allcleared = 0;
Expand Down Expand Up @@ -609,13 +609,16 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info,
}
}

if (badbuf == NULL || goodbuf == NULL)
if (badabd == NULL || goodabd == NULL)
return (eip);

ASSERT3U(size, ==, nui64s * sizeof (uint64_t));
ASSERT3U(size, <=, SPA_MAXBLOCKSIZE);
ASSERT3U(size, <=, UINT32_MAX);

good = (const uint64_t *) abd_borrow_buf_copy((abd_t *)goodabd, size);
bad = (const uint64_t *) abd_borrow_buf_copy((abd_t *)badabd, size);

/* build up the range list by comparing the two buffers. */
for (idx = 0; idx < nui64s; idx++) {
if (good[idx] == bad[idx]) {
Expand Down Expand Up @@ -645,6 +648,8 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info,
*/
if (inline_size == 0 && drop_if_identical) {
kmem_free(eip, sizeof (*eip));
abd_return_buf((abd_t *)goodabd, (void *)good, size);
abd_return_buf((abd_t *)badabd, (void *)bad, size);
return (NULL);
}

Expand Down Expand Up @@ -685,6 +690,10 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info,
eip->zei_ranges[range].zr_start *= sizeof (uint64_t);
eip->zei_ranges[range].zr_end *= sizeof (uint64_t);
}

abd_return_buf((abd_t *)goodabd, (void *)good, size);
abd_return_buf((abd_t *)badabd, (void *)bad, size);

eip->zei_allowed_mingap *= sizeof (uint64_t);
inline_size *= sizeof (uint64_t);

Expand Down Expand Up @@ -781,8 +790,8 @@ zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd,
}

void
zfs_ereport_finish_checksum(zio_cksum_report_t *report,
const void *good_data, const void *bad_data, boolean_t drop_if_identical)
zfs_ereport_finish_checksum(zio_cksum_report_t *report, const abd_t *good_data,
const abd_t *bad_data, boolean_t drop_if_identical)
{
#ifdef _KERNEL
zfs_ecksum_info_t *info;
Expand Down Expand Up @@ -824,7 +833,7 @@ zfs_ereport_free_checksum(zio_cksum_report_t *rpt)
void
zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd,
struct zio *zio, uint64_t offset, uint64_t length,
const void *good_data, const void *bad_data, zio_bad_cksum_t *zbc)
const abd_t *good_data, const abd_t *bad_data, zio_bad_cksum_t *zbc)
{
#ifdef _KERNEL
nvlist_t *ereport = NULL;
Expand Down
Loading

0 comments on commit e98a238

Please sign in to comment.