Skip to content

Commit

Permalink
sstable: flush value blocks if 128 are buffered
Browse files Browse the repository at this point in the history
  • Loading branch information
dt committed Dec 28, 2023
1 parent 1cce3d0 commit 5aae780
Show file tree
Hide file tree
Showing 6 changed files with 417 additions and 29 deletions.
13 changes: 13 additions & 0 deletions sstable/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,16 @@ type WriterOptions struct {
// RequiredInPlaceValueBound mirrors
// Options.Experimental.RequiredInPlaceValueBound.
RequiredInPlaceValueBound UserKeyPrefixBound

// ValueBlockBufferLimit is the number of value blocks to buffer in-memory
// before flushing them to the underlying writer. Buffering these blocks and
// flushing them in groups, rather than interleaved block-by-block with data
// blocks, potentially improves locality of scans over data blocks in the
// presence of prefetching/read-ahead, page caching, etc.
//
// A value of 0 implies the default of max(8MB/BlockSize, 16) while a value of
// less than 0 disables buffering entirely.
ValueBlockBufferLimit int
}

func (o WriterOptions) ensureDefaults() WriterOptions {
Expand All @@ -288,6 +298,9 @@ func (o WriterOptions) ensureDefaults() WriterOptions {
if o.IndexBlockSize <= 0 {
o.IndexBlockSize = o.BlockSize
}
if o.ValueBlockBufferLimit == 0 {
o.ValueBlockBufferLimit = max(16, 8<<20/o.BlockSize)
}
if o.MergerName == "" {
o.MergerName = base.DefaultMerger.Name
}
Expand Down
315 changes: 315 additions & 0 deletions sstable/testdata/writer_value_blocks
Original file line number Diff line number Diff line change
Expand Up @@ -328,3 +328,318 @@ layout
787 version: 4
791 magic number: 0xf09faab3f09faab3
799 EOF

# Show value block buffering of 2 causing groups of 2 val blocks in the middle.
build block-size=8 buf-limit=2
[email protected]:blue10
[email protected]:blue8
[email protected]:blue8s
[email protected]:blue6isverylong
[email protected]:blue6isverylong1
[email protected]:blue6isverylong1
[email protected]:blue6isverylong1
[email protected]:blue6isverylong1
----
value-blocks: num-values 7, num-blocks: 6, size: 149

layout
----
0 data (33)
0 record (25 = 3 [0] + 15 + 7) [restart]
blue@10#20,1:blue10
25 [restart 0]
33 [trailer compression=none checksum=0x5fb0d551]
38 data (29)
38 record (21 = 3 [0] + 14 + 4) [restart]
blue@8#18,1:value handle {valueLen:5 blockNum:0 offsetInBlock:0}
59 [restart 38]
67 [trailer compression=none checksum=0x628e4a10]
72 data (29)
72 record (21 = 3 [0] + 14 + 4) [restart]
blue@8#16,1:value handle {valueLen:6 blockNum:0 offsetInBlock:5}
93 [restart 72]
101 [trailer compression=none checksum=0x4e65b9b6]
106 data (29)
106 record (21 = 3 [0] + 14 + 4) [restart]
blue@6#16,1:value handle {valueLen:15 blockNum:1 offsetInBlock:0}
127 [restart 106]
135 [trailer compression=none checksum=0x9f60e629]
140 value-block (11)
156 value-block (15)
176 data (29)
176 record (21 = 3 [0] + 14 + 4) [restart]
blue@6#14,1:value handle {valueLen:16 blockNum:2 offsetInBlock:0}
197 [restart 176]
205 [trailer compression=none checksum=0x62a8bb33]
210 data (29)
210 record (21 = 3 [0] + 14 + 4) [restart]
blue@6#12,1:value handle {valueLen:16 blockNum:3 offsetInBlock:0}
231 [restart 210]
239 [trailer compression=none checksum=0x93d50e8f]
244 value-block (16)
265 value-block (16)
286 data (29)
286 record (21 = 3 [0] + 14 + 4) [restart]
blue@6#11,1:value handle {valueLen:16 blockNum:4 offsetInBlock:0}
307 [restart 286]
315 [trailer compression=none checksum=0x681fe9ba]
320 data (29)
320 record (21 = 3 [0] + 14 + 4) [restart]
blue@6#10,1:value handle {valueLen:16 blockNum:5 offsetInBlock:0}
341 [restart 320]
349 [trailer compression=none checksum=0x67e8a95e]
354 index (28)
354 block:0/33 [restart]
374 [restart 354]
382 [trailer compression=none checksum=0x32b37f08]
387 index (27)
387 block:38/29 [restart]
406 [restart 387]
414 [trailer compression=none checksum=0x21d27815]
419 index (30)
419 block:72/29 [restart]
441 [restart 419]
449 [trailer compression=none checksum=0xba0b26fe]
454 index (27)
454 block:106/29 [restart]
473 [restart 454]
481 [trailer compression=none checksum=0xccb5a649]
486 index (31)
486 block:176/29 [restart]
509 [restart 486]
517 [trailer compression=none checksum=0x1c2b03b0]
522 index (31)
522 block:210/29 [restart]
545 [restart 522]
553 [trailer compression=none checksum=0x16793618]
558 index (31)
558 block:286/29 [restart]
581 [restart 558]
589 [trailer compression=none checksum=0xee99d776]
594 index (26)
594 block:320/29 [restart]
612 [restart 594]
620 [trailer compression=none checksum=0x5d0eec20]
625 top-index (149)
625 block:354/28 [restart]
646 block:387/27 [restart]
666 block:419/30 [restart]
689 block:454/27 [restart]
709 block:486/31 [restart]
732 block:522/31 [restart]
755 block:558/31 [restart]
778 block:594/26 [restart]
796 [restart 625]
800 [restart 646]
804 [restart 666]
808 [restart 689]
812 [restart 709]
816 [restart 732]
820 [restart 755]
824 [restart 778]
774 [trailer compression=snappy checksum=0x513a3455]
779 value-block (16)
800 value-block (16)
821 value-index (24)
850 properties (678)
850 obsolete-key (16) [restart]
866 pebble.num.value-blocks (27)
893 pebble.num.values.in.value-blocks (21)
914 pebble.value-blocks.size (22)
936 rocksdb.block.based.table.index.type (43)
979 rocksdb.block.based.table.prefix.filtering (20)
999 rocksdb.block.based.table.whole.key.filtering (23)
1022 rocksdb.comparator (37)
1059 rocksdb.compression (16)
1075 rocksdb.compression_options (106)
1181 rocksdb.data.size (14)
1195 rocksdb.deleted.keys (15)
1210 rocksdb.external_sst_file.global_seqno (41)
1251 rocksdb.external_sst_file.version (14)
1265 rocksdb.filter.size (15)
1280 rocksdb.index.partitions (20)
1300 rocksdb.index.size (9)
1309 rocksdb.merge.operands (18)
1327 rocksdb.merge.operator (24)
1351 rocksdb.num.data.blocks (19)
1370 rocksdb.num.entries (11)
1381 rocksdb.num.range-deletions (19)
1400 rocksdb.prefix.extractor.name (31)
1431 rocksdb.property.collectors (34)
1465 rocksdb.raw.key.size (16)
1481 rocksdb.raw.value.size (14)
1495 rocksdb.top-level.index.size (25)
1520 [restart 850]
1528 [trailer compression=none checksum=0xe690121f]
1533 meta-index (64)
1533 pebble.value_index block:821/24 value-blocks-index-lengths: 1(num), 2(offset), 1(length) [restart]
1560 rocksdb.properties block:850/678 [restart]
1585 [restart 1533]
1589 [restart 1560]
1597 [trailer compression=none checksum=0xbb589b04]
1602 footer (53)
1602 checksum type: crc32c
1603 meta: offset=1533, length=64
1606 index: offset=625, length=149
1610 [padding]
1643 version: 4
1647 magic number: 0xf09faab3f09faab3
1655 EOF

# Show val block buffering limit of 1 flushing every block.
build block-size=8 buf-limit=1
[email protected]:blue10
[email protected]:blue8
[email protected]:blue8s
[email protected]:blue6isverylong
[email protected]:blue6isverylong1
[email protected]:blue6isverylong1
[email protected]:blue6isverylong1
[email protected]:blue6isverylong1
----
value-blocks: num-values 7, num-blocks: 6, size: 149


layout
----
0 data (33)
0 record (25 = 3 [0] + 15 + 7) [restart]
blue@10#20,1:blue10
25 [restart 0]
33 [trailer compression=none checksum=0x5fb0d551]
38 data (29)
38 record (21 = 3 [0] + 14 + 4) [restart]
blue@8#18,1:value handle {valueLen:5 blockNum:0 offsetInBlock:0}
59 [restart 38]
67 [trailer compression=none checksum=0x628e4a10]
72 data (29)
72 record (21 = 3 [0] + 14 + 4) [restart]
blue@8#16,1:value handle {valueLen:6 blockNum:0 offsetInBlock:5}
93 [restart 72]
101 [trailer compression=none checksum=0x4e65b9b6]
106 value-block (11)
122 data (29)
122 record (21 = 3 [0] + 14 + 4) [restart]
blue@6#16,1:value handle {valueLen:15 blockNum:1 offsetInBlock:0}
143 [restart 122]
151 [trailer compression=none checksum=0x9f60e629]
156 value-block (15)
176 data (29)
176 record (21 = 3 [0] + 14 + 4) [restart]
blue@6#14,1:value handle {valueLen:16 blockNum:2 offsetInBlock:0}
197 [restart 176]
205 [trailer compression=none checksum=0x62a8bb33]
210 value-block (16)
231 data (29)
231 record (21 = 3 [0] + 14 + 4) [restart]
blue@6#12,1:value handle {valueLen:16 blockNum:3 offsetInBlock:0}
252 [restart 231]
260 [trailer compression=none checksum=0x93d50e8f]
265 value-block (16)
286 data (29)
286 record (21 = 3 [0] + 14 + 4) [restart]
blue@6#11,1:value handle {valueLen:16 blockNum:4 offsetInBlock:0}
307 [restart 286]
315 [trailer compression=none checksum=0x681fe9ba]
320 data (29)
320 record (21 = 3 [0] + 14 + 4) [restart]
blue@6#10,1:value handle {valueLen:16 blockNum:5 offsetInBlock:0}
341 [restart 320]
349 [trailer compression=none checksum=0x67e8a95e]
354 index (28)
354 block:0/33 [restart]
374 [restart 354]
382 [trailer compression=none checksum=0x32b37f08]
387 index (27)
387 block:38/29 [restart]
406 [restart 387]
414 [trailer compression=none checksum=0x21d27815]
419 index (30)
419 block:72/29 [restart]
441 [restart 419]
449 [trailer compression=none checksum=0xba0b26fe]
454 index (27)
454 block:122/29 [restart]
473 [restart 454]
481 [trailer compression=none checksum=0xf362eb28]
486 index (31)
486 block:176/29 [restart]
509 [restart 486]
517 [trailer compression=none checksum=0x1c2b03b0]
522 index (31)
522 block:231/29 [restart]
545 [restart 522]
553 [trailer compression=none checksum=0x7b2dd3bc]
558 index (31)
558 block:286/29 [restart]
581 [restart 558]
589 [trailer compression=none checksum=0xee99d776]
594 index (26)
594 block:320/29 [restart]
612 [restart 594]
620 [trailer compression=none checksum=0x5d0eec20]
625 top-index (149)
625 block:354/28 [restart]
646 block:387/27 [restart]
666 block:419/30 [restart]
689 block:454/27 [restart]
709 block:486/31 [restart]
732 block:522/31 [restart]
755 block:558/31 [restart]
778 block:594/26 [restart]
796 [restart 625]
800 [restart 646]
804 [restart 666]
808 [restart 689]
812 [restart 709]
816 [restart 732]
820 [restart 755]
824 [restart 778]
774 [trailer compression=snappy checksum=0x513a3455]
779 value-block (16)
800 value-block (16)
821 value-index (24)
850 properties (678)
850 obsolete-key (16) [restart]
866 pebble.num.value-blocks (27)
893 pebble.num.values.in.value-blocks (21)
914 pebble.value-blocks.size (22)
936 rocksdb.block.based.table.index.type (43)
979 rocksdb.block.based.table.prefix.filtering (20)
999 rocksdb.block.based.table.whole.key.filtering (23)
1022 rocksdb.comparator (37)
1059 rocksdb.compression (16)
1075 rocksdb.compression_options (106)
1181 rocksdb.data.size (14)
1195 rocksdb.deleted.keys (15)
1210 rocksdb.external_sst_file.global_seqno (41)
1251 rocksdb.external_sst_file.version (14)
1265 rocksdb.filter.size (15)
1280 rocksdb.index.partitions (20)
1300 rocksdb.index.size (9)
1309 rocksdb.merge.operands (18)
1327 rocksdb.merge.operator (24)
1351 rocksdb.num.data.blocks (19)
1370 rocksdb.num.entries (11)
1381 rocksdb.num.range-deletions (19)
1400 rocksdb.prefix.extractor.name (31)
1431 rocksdb.property.collectors (34)
1465 rocksdb.raw.key.size (16)
1481 rocksdb.raw.value.size (14)
1495 rocksdb.top-level.index.size (25)
1520 [restart 850]
1528 [trailer compression=none checksum=0xe690121f]
1533 meta-index (64)
1533 pebble.value_index block:821/24 value-blocks-index-lengths: 1(num), 2(offset), 1(length) [restart]
1560 rocksdb.properties block:850/678 [restart]
1585 [restart 1533]
1589 [restart 1560]
1597 [trailer compression=none checksum=0xbb589b04]
1602 footer (53)
1602 checksum type: crc32c
1603 meta: offset=1533, length=64
1606 index: offset=625, length=149
1610 [padding]
1643 version: 4
1647 magic number: 0xf09faab3f09faab3
1655 EOF
Loading

0 comments on commit 5aae780

Please sign in to comment.