Skip to content

Commit

Permalink
Make RrocksDB checksum type configurable (apache#3793)
Browse files Browse the repository at this point in the history
### Motivation
Fix apache#3734 (comment)

We have two rocksDB tables, one for the ledger index, and another for the entry log location.
 - ledger index RocksDB table: Use the default table option, and the checksum is `kCRC32c`
 - entry log location RocksDb table: Use configured table option, and the checksum is `kxxHash`

When we upgrade the RocksDB version from 6.10.2 to 7.9.2, the new RocksDB version's default table checksum has changed from `kCRC32c` to `kXXH3`, and `kXXH3` only supported since RocksDB 6.27. The RocksDB version rollback to 6.10.2 will be failed due to RocksDB 6.10.2 doesn't support the `kXXH3` checksum type.

### Modifications
In this PR, I make the RocksDB checksum type configurable. But there is one change that will change the ledger index RocksDB table's checksum type from the default `kCRC32c` to `kxxHash`. I have tested the compatibility of the two checksum types in and between multiple RocksDB versions, it works fine.

After setting the two RocksDB table's checksum type to `kxxHash`, the RocksDB's version upgraded from 6.10.2 to 7.9.2, and rolling back to 6.10.2 works fine.

### More to discuss
When writing the unit test to read the table checksum type from RocksDB configuration files, it failed. I found the related issue on RocksDB: facebook/rocksdb#5297
The related PR: facebook/rocksdb#10826

It means we still can't load RocksDB table options from configuration files. Maybe I missed some parts about reading RocksDB table options from the configuration file.

If this issue exists, we do **NOT** recommend users configure RocksDB configurations through configuration files.

@merlimat @eolivelli @dlg99 Please help take a look, thanks.

(cherry picked from commit 3844bf1)
  • Loading branch information
hangc0276 authored and dlg99 committed Jul 2, 2024
1 parent a4a7c5c commit 7198b03
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ public class KeyValueStorageRocksDB implements KeyValueStorage {
private static final String ROCKSDB_NUM_FILES_IN_LEVEL0 = "dbStorage_rocksDB_numFilesInLevel0";
private static final String ROCKSDB_MAX_SIZE_IN_LEVEL1_MB = "dbStorage_rocksDB_maxSizeInLevel1MB";
private static final String ROCKSDB_FORMAT_VERSION = "dbStorage_rocksDB_format_version";
private static final String ROCKSDB_CHECKSUM_TYPE = "dbStorage_rocksDB_checksum_type";

public KeyValueStorageRocksDB(String basePath, String subPath, DbConfigType dbConfigType, ServerConfiguration conf)
throws IOException {
Expand Down Expand Up @@ -174,6 +175,7 @@ private RocksDB initializeRocksDBWithBookieConf(String basePath, String subPath,
ServerConfiguration conf, boolean readOnly) throws IOException {
Options options = new Options();
options.setCreateIfMissing(true);
ChecksumType checksumType = ChecksumType.valueOf(conf.getString(ROCKSDB_CHECKSUM_TYPE, "kxxHash"));

if (dbConfigType == DbConfigType.EntryLocation) {
/* Set default RocksDB block-cache size to 10% / numberOfLedgers of direct memory, unless override */
Expand Down Expand Up @@ -214,7 +216,7 @@ private RocksDB initializeRocksDBWithBookieConf(String basePath, String subPath,
tableOptions.setBlockSize(blockSize);
tableOptions.setBlockCache(cache);
tableOptions.setFormatVersion(formatVersion);
tableOptions.setChecksumType(ChecksumType.kxxHash);
tableOptions.setChecksumType(checksumType);
if (bloomFilterBitsPerKey > 0) {
tableOptions.setFilterPolicy(new BloomFilter(bloomFilterBitsPerKey, false));
}
Expand All @@ -226,6 +228,9 @@ private RocksDB initializeRocksDBWithBookieConf(String basePath, String subPath,
options.setTableFormatConfig(tableOptions);
} else {
this.cache = null;
BlockBasedTableConfig tableOptions = new BlockBasedTableConfig();
tableOptions.setChecksumType(checksumType);
options.setTableFormatConfig(tableOptions);
}

// Configure file path
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
import java.util.List;
import org.apache.bookkeeper.conf.ServerConfiguration;
import org.junit.Test;
import org.rocksdb.BlockBasedTableConfig;
import org.rocksdb.ChecksumType;
import org.rocksdb.ColumnFamilyDescriptor;
import org.rocksdb.ColumnFamilyOptions;
import org.rocksdb.CompressionType;
Expand Down Expand Up @@ -78,4 +80,37 @@ public void testRocksDBInitiateWithConfigurationFile() throws Exception {
assertEquals(1, familyOptions.maxWriteBufferNumber());
rocksDB.close();
}

@Test
public void testReadChecksumTypeFromBookieConfiguration() throws Exception {
ServerConfiguration configuration = new ServerConfiguration();
configuration.setEntryLocationRocksdbConf("entry_location_rocksdb.conf");
File tmpDir = Files.createTempDirectory("bk-kv-rocksdbtest-conf").toFile();
Files.createDirectory(Paths.get(tmpDir.toString(), "subDir"));
KeyValueStorageRocksDB rocksDB = new KeyValueStorageRocksDB(tmpDir.toString(), "subDir",
KeyValueStorageFactory.DbConfigType.EntryLocation, configuration);
assertNull(rocksDB.getColumnFamilyDescriptors());

Options options = (Options) rocksDB.getOptions();
assertEquals(ChecksumType.kxxHash, ((BlockBasedTableConfig) options.tableFormatConfig()).checksumType());
}

//@Test
public void testReadChecksumTypeFromConfigurationFile() throws Exception {
ServerConfiguration configuration = new ServerConfiguration();
URL url = getClass().getClassLoader().getResource("test_entry_location_rocksdb.conf");
configuration.setEntryLocationRocksdbConf(url.getPath());
File tmpDir = Files.createTempDirectory("bk-kv-rocksdbtest-file").toFile();
Files.createDirectory(Paths.get(tmpDir.toString(), "subDir"));
KeyValueStorageRocksDB rocksDB = new KeyValueStorageRocksDB(tmpDir.toString(), "subDir",
KeyValueStorageFactory.DbConfigType.EntryLocation, configuration);
assertNotNull(rocksDB.getColumnFamilyDescriptors());

List<ColumnFamilyDescriptor> columnFamilyDescriptorList = rocksDB.getColumnFamilyDescriptors();
ColumnFamilyOptions familyOptions = columnFamilyDescriptorList.get(0).getOptions();
// There is a bug in RocksDB, which can't load BlockedBasedTableConfig from Options file.
// https://github.com/facebook/rocksdb/issues/5297
// After the PR: https://github.com/facebook/rocksdb/pull/10826 merge, we can turn on this test.
assertEquals(ChecksumType.kxxHash, ((BlockBasedTableConfig) familyOptions.tableFormatConfig()).checksumType());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,19 @@
write_buffer_size=1024
# set by jni: options.setMaxWriteBufferNumber
max_write_buffer_number=1

[TableOptions/BlockBasedTable "default"]
# set by jni: tableOptions.setBlockSize
block_size=65536
# set by jni: tableOptions.setBlockCache
block_cache=206150041
# set by jni: tableOptions.setFormatVersion
format_version=2
# set by jni: tableOptions.setChecksumType
checksum=kxxHash
# set by jni: tableOptions.setFilterPolicy, bloomfilter:[bits_per_key]:[use_block_based_builder]
filter_policy=rocksdb.BloomFilter:10:false
# set by jni: tableOptions.setCacheIndexAndFilterBlocks
cache_index_and_filter_blocks=true
# set by jni: options.setLevelCompactionDynamicLevelBytes
level_compaction_dynamic_level_bytes=true
6 changes: 5 additions & 1 deletion conf/default_rocksdb.conf
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,8 @@

[CFOptions "default"]
# set by jni: options.setLogFileTimeToRoll
log_file_time_to_roll=86400
log_file_time_to_roll=86400

[TableOptions/BlockBasedTable "default"]
# set by jni: tableOptions.setChecksumType
checksum=kxxHash
6 changes: 5 additions & 1 deletion conf/ledger_metadata_rocksdb.conf
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,8 @@

[CFOptions "default"]
# set by jni: options.setLogFileTimeToRoll
log_file_time_to_roll=86400
log_file_time_to_roll=86400

[TableOptions/BlockBasedTable "default"]
# set by jni: tableOptions.setChecksumType
checksum=kxxHash

0 comments on commit 7198b03

Please sign in to comment.