Skip to content

Commit

Permalink
4757 ZFS embedded-data block pointers ("zero block compression")
Browse files Browse the repository at this point in the history
4913 zfs release should not be subject to space checks
Reviewed by: Adam Leventhal <[email protected]>
Reviewed by: Max Grossman <[email protected]>
Reviewed by: George Wilson <[email protected]>
Reviewed by: Christopher Siden <[email protected]>
Reviewed by: Dan McDonald <[email protected]>
Approved by: Dan McDonald <[email protected]>
  • Loading branch information
ahrens authored and Christopher Siden committed Jun 5, 2014
1 parent 107c18c commit 5d7b4d4
Show file tree
Hide file tree
Showing 50 changed files with 1,306 additions and 279 deletions.
4 changes: 2 additions & 2 deletions usr/src/cmd/mdb/common/mdb/mdb_ctf.c
Original file line number Diff line number Diff line change
Expand Up @@ -1543,10 +1543,10 @@ vread_helper(mdb_ctf_id_t modid, char *modbuf,
*
* typedef struct mdb_zio {
* enum zio_type io_type;
* void *io_waiter;
* uintptr_t io_waiter;
* struct {
* struct {
* void *list_next;
* uintptr_t list_next;
* } list_head;
* } io_parent_list;
* int io_error;
Expand Down
8 changes: 4 additions & 4 deletions usr/src/cmd/truss/expound.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
/*
* Copyright 2012 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
*/

/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
Expand Down Expand Up @@ -5071,9 +5071,9 @@ show_zfs_ioc(private_t *pri, long addr)
(void) printf(" zc_defer_destroy=%d\n",
(int)zc.zc_defer_destroy);
}
if (zc.zc_temphold) {
(void) printf(" zc_temphold=%d\n",
(int)zc.zc_temphold);
if (zc.zc_flags) {
(void) printf(" zc_flags=0x%x\n",
zc.zc_flags);
}
if (zc.zc_action_handle) {
(void) printf(" zc_action_handle=%llu\n",
Expand Down
65 changes: 52 additions & 13 deletions usr/src/cmd/zdb/zdb.c
Original file line number Diff line number Diff line change
Expand Up @@ -1032,8 +1032,17 @@ snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp)
return;
}

blkbuf[0] = '\0';
if (BP_IS_EMBEDDED(bp)) {
(void) sprintf(blkbuf,
"EMBEDDED et=%u %llxL/%llxP B=%llu",
(int)BPE_GET_ETYPE(bp),
(u_longlong_t)BPE_GET_LSIZE(bp),
(u_longlong_t)BPE_GET_PSIZE(bp),
(u_longlong_t)bp->blk_birth);
return;
}

blkbuf[0] = '\0';
for (int i = 0; i < ndvas; i++)
(void) snprintf(blkbuf + strlen(blkbuf),
buflen - strlen(blkbuf), "%llu:%llx:%llx ",
Expand All @@ -1051,7 +1060,7 @@ snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp)
"%llxL/%llxP F=%llu B=%llu/%llu",
(u_longlong_t)BP_GET_LSIZE(bp),
(u_longlong_t)BP_GET_PSIZE(bp),
(u_longlong_t)bp->blk_fill,
(u_longlong_t)BP_GET_FILL(bp),
(u_longlong_t)bp->blk_birth,
(u_longlong_t)BP_PHYSICAL_BIRTH(bp));
}
Expand All @@ -1064,8 +1073,10 @@ print_indirect(blkptr_t *bp, const zbookmark_t *zb,
char blkbuf[BP_SPRINTF_LEN];
int l;

ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
if (!BP_IS_EMBEDDED(bp)) {
ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
}

(void) printf("%16llx ", (u_longlong_t)blkid2offset(dnp, bp, zb));

Expand Down Expand Up @@ -1119,10 +1130,10 @@ visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
err = visit_indirect(spa, dnp, cbp, &czb);
if (err)
break;
fill += cbp->blk_fill;
fill += BP_GET_FILL(cbp);
}
if (!err)
ASSERT3U(fill, ==, bp->blk_fill);
ASSERT3U(fill, ==, BP_GET_FILL(bp));
(void) arc_buf_remove_ref(buf, &buf);
}

Expand Down Expand Up @@ -1789,14 +1800,14 @@ dump_dir(objset_t *os)

if (dds.dds_type == DMU_OST_META) {
dds.dds_creation_txg = TXG_INITIAL;
usedobjs = os->os_rootbp->blk_fill;
usedobjs = BP_GET_FILL(os->os_rootbp);
refdbytes = os->os_spa->spa_dsl_pool->
dp_mos_dir->dd_phys->dd_used_bytes;
} else {
dmu_objset_space(os, &refdbytes, &scratch, &usedobjs, &scratch);
}

ASSERT3U(usedobjs, ==, os->os_rootbp->blk_fill);
ASSERT3U(usedobjs, ==, BP_GET_FILL(os->os_rootbp));

zdb_nicenum(refdbytes, numbuf);

Expand Down Expand Up @@ -2107,6 +2118,9 @@ typedef struct zdb_cb {
zdb_blkstats_t zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1];
uint64_t zcb_dedup_asize;
uint64_t zcb_dedup_blocks;
uint64_t zcb_embedded_blocks[NUM_BP_EMBEDDED_TYPES];
uint64_t zcb_embedded_histogram[NUM_BP_EMBEDDED_TYPES]
[BPE_PAYLOAD_SIZE];
uint64_t zcb_start;
uint64_t zcb_lastprint;
uint64_t zcb_totalasize;
Expand Down Expand Up @@ -2161,6 +2175,13 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,

}

if (BP_IS_EMBEDDED(bp)) {
zcb->zcb_embedded_blocks[BPE_GET_ETYPE(bp)]++;
zcb->zcb_embedded_histogram[BPE_GET_ETYPE(bp)]
[BPE_GET_PSIZE(bp)]++;
return;
}

if (dump_opt['L'])
return;

Expand Down Expand Up @@ -2258,7 +2279,8 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,

is_metadata = (BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type));

if (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata)) {
if (!BP_IS_EMBEDDED(bp) &&
(dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata))) {
size_t size = BP_GET_PSIZE(bp);
void *data = zio_data_buf_alloc(size);
int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW;
Expand Down Expand Up @@ -2450,7 +2472,7 @@ dump_block_stats(spa_t *spa)
zdb_blkstats_t *zb, *tzb;
uint64_t norm_alloc, norm_space, total_alloc, total_found;
int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_HARD;
int leaks = 0;
boolean_t leaks = B_FALSE;

(void) printf("\nTraversing all blocks %s%s%s%s%s...\n\n",
(dump_opt['c'] || !dump_opt['L']) ? "to verify " : "",
Expand Down Expand Up @@ -2538,7 +2560,7 @@ dump_block_stats(spa_t *spa)
(u_longlong_t)total_alloc,
(dump_opt['L']) ? "unreachable" : "leaked",
(longlong_t)(total_alloc - total_found));
leaks = 1;
leaks = B_TRUE;
}

if (tzb->zb_count == 0)
Expand Down Expand Up @@ -2570,6 +2592,23 @@ dump_block_stats(spa_t *spa)
(void) printf("\tSPA allocated: %10llu used: %5.2f%%\n",
(u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space);

for (bp_embedded_type_t i = 0; i < NUM_BP_EMBEDDED_TYPES; i++) {
if (zcb.zcb_embedded_blocks[i] == 0)
continue;
(void) printf("\n");
(void) printf("\tadditional, non-pointer bps of type %u: "
"%10llu\n",
i, (u_longlong_t)zcb.zcb_embedded_blocks[i]);

if (dump_opt['b'] >= 3) {
(void) printf("\t number of (compressed) bytes: "
"number of bps\n");
dump_histogram(zcb.zcb_embedded_histogram[i],
sizeof (zcb.zcb_embedded_histogram[i]) /
sizeof (zcb.zcb_embedded_histogram[i][0]), 0);
}
}

if (tzb->zb_ditto_samevdev != 0) {
(void) printf("\tDittoed blocks on same vdev: %llu\n",
(longlong_t)tzb->zb_ditto_samevdev);
Expand Down Expand Up @@ -2682,14 +2721,14 @@ zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
avl_index_t where;
zdb_ddt_entry_t *zdde, zdde_search;

if (BP_IS_HOLE(bp))
if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
return (0);

if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) {
(void) printf("traversing objset %llu, %llu objects, "
"%lu blocks so far\n",
(u_longlong_t)zb->zb_objset,
(u_longlong_t)bp->blk_fill,
(u_longlong_t)BP_GET_FILL(bp),
avl_numnodes(t));
}

Expand Down
17 changes: 13 additions & 4 deletions usr/src/cmd/zfs/zfs_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -256,9 +256,9 @@ get_usage(zfs_help_t idx)
case HELP_ROLLBACK:
return (gettext("\trollback [-rRf] <snapshot>\n"));
case HELP_SEND:
return (gettext("\tsend [-DnPpRv] [-[iI] snapshot] "
return (gettext("\tsend [-DnPpRve] [-[iI] snapshot] "
"<snapshot>\n"
"\tsend [-i snapshot|bookmark] "
"\tsend [-e] [-i snapshot|bookmark] "
"<filesystem|volume|snapshot>\n"));
case HELP_SET:
return (gettext("\tset <property=value> "
Expand Down Expand Up @@ -573,6 +573,7 @@ finish_progress(char *done)
free(pt_header);
pt_header = NULL;
}

/*
* zfs clone [-p] [-o prop=value] ... <snap> <fs | vol>
*
Expand Down Expand Up @@ -3299,6 +3300,7 @@ rollback_check_dependent(zfs_handle_t *zhp, void *data)
zfs_close(zhp);
return (0);
}

/*
* Report any snapshots more recent than the one specified. Used when '-r' is
* not specified. We reuse this same callback for the snapshot dependents - if
Expand Down Expand Up @@ -3638,7 +3640,7 @@ zfs_do_send(int argc, char **argv)
boolean_t extraverbose = B_FALSE;

/* check options */
while ((c = getopt(argc, argv, ":i:I:RDpvnP")) != -1) {
while ((c = getopt(argc, argv, ":i:I:RDpvnPe")) != -1) {
switch (c) {
case 'i':
if (fromname)
Expand Down Expand Up @@ -3673,6 +3675,9 @@ zfs_do_send(int argc, char **argv)
case 'n':
flags.dryrun = B_TRUE;
break;
case 'e':
flags.embed_data = B_TRUE;
break;
case ':':
(void) fprintf(stderr, gettext("missing argument for "
"'%c' option\n"), optopt);
Expand Down Expand Up @@ -3711,6 +3716,7 @@ zfs_do_send(int argc, char **argv)
if (strchr(argv[0], '@') == NULL ||
(fromname && strchr(fromname, '#') != NULL)) {
char frombuf[ZFS_MAXNAMELEN];
enum lzc_send_flags lzc_flags = 0;

if (flags.replicate || flags.doall || flags.props ||
flags.dedup || flags.dryrun || flags.verbose ||
Expand All @@ -3725,6 +3731,9 @@ zfs_do_send(int argc, char **argv)
if (zhp == NULL)
return (1);

if (flags.embed_data)
lzc_flags |= LZC_SEND_FLAG_EMBED_DATA;

if (fromname != NULL &&
(fromname[0] == '#' || fromname[0] == '@')) {
/*
Expand All @@ -3738,7 +3747,7 @@ zfs_do_send(int argc, char **argv)
(void) strlcat(frombuf, fromname, sizeof (frombuf));
fromname = frombuf;
}
err = zfs_send_one(zhp, fromname, STDOUT_FILENO);
err = zfs_send_one(zhp, fromname, STDOUT_FILENO, lzc_flags);
zfs_close(zhp);
return (err != 0);
}
Expand Down
55 changes: 44 additions & 11 deletions usr/src/cmd/zstreamdump/zstreamdump.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@
*/
#define DUMP_GROUPING 4

uint64_t drr_record_count[DRR_NUMTYPES];
uint64_t total_write_size = 0;
uint64_t total_stream_len = 0;
FILE *send_stream = 0;
Expand Down Expand Up @@ -123,7 +122,7 @@ print_block(char *buf, int length)
* Start printing ASCII characters at a constant offset, after
* the hex prints. Leave 3 characters per byte on a line (2 digit
* hex number plus 1 space) plus spaces between characters and
* groupings
* groupings.
*/
int ascii_start = BYTES_PER_LINE * 3 +
BYTES_PER_LINE / DUMP_GROUPING + 2;
Expand Down Expand Up @@ -160,6 +159,8 @@ int
main(int argc, char *argv[])
{
char *buf = malloc(INITIAL_BUFLEN);
uint64_t drr_record_count[DRR_NUMTYPES] = { 0 };
uint64_t total_records = 0;
dmu_replay_record_t thedrr;
dmu_replay_record_t *drr = &thedrr;
struct drr_begin *drrb = &thedrr.drr_u.drr_begin;
Expand All @@ -170,6 +171,7 @@ main(int argc, char *argv[])
struct drr_write_byref *drrwbr = &thedrr.drr_u.drr_write_byref;
struct drr_free *drrf = &thedrr.drr_u.drr_free;
struct drr_spill *drrs = &thedrr.drr_u.drr_spill;
struct drr_write_embedded *drrwe = &thedrr.drr_u.drr_write_embedded;
char c;
boolean_t verbose = B_FALSE;
boolean_t first = B_TRUE;
Expand Down Expand Up @@ -264,6 +266,7 @@ main(int argc, char *argv[])
}

drr_record_count[drr->drr_type]++;
total_records++;

switch (drr->drr_type) {
case DRR_BEGIN:
Expand Down Expand Up @@ -376,8 +379,8 @@ main(int argc, char *argv[])
drro->drr_bonuslen);
}
if (drro->drr_bonuslen > 0) {
(void) ssread(buf, P2ROUNDUP(drro->drr_bonuslen,
8), &zc);
(void) ssread(buf,
P2ROUNDUP(drro->drr_bonuslen, 8), &zc);
if (dump) {
print_block(buf,
P2ROUNDUP(drro->drr_bonuslen, 8));
Expand Down Expand Up @@ -506,6 +509,38 @@ main(int argc, char *argv[])
print_block(buf, drrs->drr_length);
}
break;
case DRR_WRITE_EMBEDDED:
if (do_byteswap) {
drrwe->drr_object =
BSWAP_64(drrwe->drr_object);
drrwe->drr_offset =
BSWAP_64(drrwe->drr_offset);
drrwe->drr_length =
BSWAP_64(drrwe->drr_length);
drrwe->drr_toguid =
BSWAP_64(drrwe->drr_toguid);
drrwe->drr_lsize =
BSWAP_32(drrwe->drr_lsize);
drrwe->drr_psize =
BSWAP_32(drrwe->drr_psize);
}
if (verbose) {
(void) printf("WRITE_EMBEDDED object = %llu "
"offset = %llu length = %llu\n"
"toguid = %llx comp = %u etype = %u "
"lsize = %u psize = %u\n",
(u_longlong_t)drrwe->drr_object,
(u_longlong_t)drrwe->drr_offset,
(u_longlong_t)drrwe->drr_length,
(u_longlong_t)drrwe->drr_toguid,
drrwe->drr_compression,
drrwe->drr_etype,
drrwe->drr_lsize,
drrwe->drr_psize);
}
(void) ssread(buf,
P2ROUNDUP(drrwe->drr_psize, 8), &zc);
break;
}
pcksum = zc;
}
Expand All @@ -524,18 +559,16 @@ main(int argc, char *argv[])
(u_longlong_t)drr_record_count[DRR_FREEOBJECTS]);
(void) printf("\tTotal DRR_WRITE records = %lld\n",
(u_longlong_t)drr_record_count[DRR_WRITE]);
(void) printf("\tTotal DRR_WRITE_BYREF records = %lld\n",
(u_longlong_t)drr_record_count[DRR_WRITE_BYREF]);
(void) printf("\tTotal DRR_WRITE_EMBEDDED records = %lld\n",
(u_longlong_t)drr_record_count[DRR_WRITE_EMBEDDED]);
(void) printf("\tTotal DRR_FREE records = %lld\n",
(u_longlong_t)drr_record_count[DRR_FREE]);
(void) printf("\tTotal DRR_SPILL records = %lld\n",
(u_longlong_t)drr_record_count[DRR_SPILL]);
(void) printf("\tTotal records = %lld\n",
(u_longlong_t)(drr_record_count[DRR_BEGIN] +
drr_record_count[DRR_OBJECT] +
drr_record_count[DRR_FREEOBJECTS] +
drr_record_count[DRR_WRITE] +
drr_record_count[DRR_FREE] +
drr_record_count[DRR_SPILL] +
drr_record_count[DRR_END]));
(u_longlong_t)total_records);
(void) printf("\tTotal write size = %lld (0x%llx)\n",
(u_longlong_t)total_write_size, (u_longlong_t)total_write_size);
(void) printf("\tTotal stream length = %lld (0x%llx)\n",
Expand Down
Loading

0 comments on commit 5d7b4d4

Please sign in to comment.