Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adapt RocksDB 8.1.1 #112

Merged
merged 5 commits into from
May 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,4 @@ libs/bzip2/libbz2.a
build/*
.vscode/c_cpp_properties.json
dist
.vscode/settings.json
8 changes: 4 additions & 4 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@ CMAKE_REQUIRED_PARAMS="-DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_INSTALL_PREF
snappy_version="1.1.10"
cd $BUILD_PATH && wget https://github.com/google/snappy/archive/${snappy_version}.tar.gz && tar xzf ${snappy_version}.tar.gz && cd snappy-${snappy_version} && \
mkdir -p build_place && cd build_place && \
CXXFLAGS='-fPIC -O3 -pipe -Wno-uninitialized -Werror,-Wno-sign-compare' cmake $CMAKE_REQUIRED_PARAMS -DSNAPPY_BUILD_TESTS=OFF -DSNAPPY_BUILD_BENCHMARKS=OFF .. && make install/strip -j16 && \
CXXFLAGS='-fPIC -O3 -pipe -Wno-uninitialized -Wno-sign-compare' cmake $CMAKE_REQUIRED_PARAMS -DSNAPPY_BUILD_TESTS=OFF -DSNAPPY_BUILD_BENCHMARKS=OFF .. && make install/strip -j16 && \
cd $BUILD_PATH && rm -rf *

export CFLAGS='-fPIC -O3 -pipe'
export CXXFLAGS='-fPIC -O3 -pipe -Wno-uninitialized'

zlib_version="1.2.11"
zlib_version="1.2.13"
cd $BUILD_PATH && wget https://github.com/madler/zlib/archive/v${zlib_version}.tar.gz && tar xzf v${zlib_version}.tar.gz && cd zlib-${zlib_version} && \
./configure --prefix=$INSTALL_PREFIX --static && make -j16 install && \
cd $BUILD_PATH && rm -rf *
Expand All @@ -27,7 +27,7 @@ cd $BUILD_PATH && wget https://github.com/lz4/lz4/archive/v${lz4_version}.tar.gz
cmake $CMAKE_REQUIRED_PARAMS -DLZ4_BUILD_LEGACY_LZ4C=OFF -DBUILD_SHARED_LIBS=OFF -DLZ4_POSITION_INDEPENDENT_LIB=ON && make -j16 install && \
cd $BUILD_PATH && rm -rf *

zstd_version="1.5.4"
zstd_version="1.5.5"
cd $BUILD_PATH && wget https://github.com/facebook/zstd/archive/v${zstd_version}.tar.gz && tar xzf v${zstd_version}.tar.gz && \
cd zstd-${zstd_version}/build/cmake && mkdir -p build_place && cd build_place && \
cmake $CMAKE_REQUIRED_PARAMS -DZSTD_BUILD_PROGRAMS=OFF -DZSTD_BUILD_CONTRIB=OFF -DZSTD_BUILD_STATIC=ON -DZSTD_BUILD_SHARED=OFF -DZSTD_BUILD_TESTS=OFF \
Expand All @@ -37,7 +37,7 @@ cd $BUILD_PATH && wget https://github.com/facebook/zstd/archive/v${zstd_version}
# Note: if you don't have a good reason, please do not set -DPORTABLE=ON
#
# This one is set here on purpose of compatibility with github action runtime processor
rocksdb_version="8.0.0"
rocksdb_version="8.1.1"
cd $BUILD_PATH && wget https://github.com/facebook/rocksdb/archive/v${rocksdb_version}.tar.gz && tar xzf v${rocksdb_version}.tar.gz && cd rocksdb-${rocksdb_version}/ && \
mkdir -p build_place && cd build_place && cmake -DCMAKE_BUILD_TYPE=Release $CMAKE_REQUIRED_PARAMS -DCMAKE_PREFIX_PATH=$INSTALL_PREFIX -DWITH_TESTS=OFF -DWITH_GFLAGS=OFF \
-DWITH_BENCHMARK_TOOLS=OFF -DWITH_TOOLS=OFF -DWITH_MD_LIBRARY=OFF -DWITH_RUNTIME_DEBUG=OFF -DROCKSDB_BUILD_SHARED=OFF -DWITH_SNAPPY=ON -DWITH_LZ4=ON -DWITH_ZLIB=ON -DWITH_LIBURING=OFF \
Expand Down
57 changes: 52 additions & 5 deletions c.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ typedef struct rocksdb_backup_engine_options_t rocksdb_backup_engine_options_t;
typedef struct rocksdb_restore_options_t rocksdb_restore_options_t;
typedef struct rocksdb_memory_allocator_t rocksdb_memory_allocator_t;
typedef struct rocksdb_lru_cache_options_t rocksdb_lru_cache_options_t;
typedef struct rocksdb_hyper_clock_cache_options_t
rocksdb_hyper_clock_cache_options_t;
typedef struct rocksdb_cache_t rocksdb_cache_t;
typedef struct rocksdb_compactionfilter_t rocksdb_compactionfilter_t;
typedef struct rocksdb_compactionfiltercontext_t
Expand Down Expand Up @@ -597,13 +599,14 @@ extern ROCKSDB_LIBRARY_API void rocksdb_release_snapshot(
extern ROCKSDB_LIBRARY_API char* rocksdb_property_value(rocksdb_t* db,
const char* propname);
/* returns 0 on success, -1 otherwise */
int rocksdb_property_int(rocksdb_t* db, const char* propname,
uint64_t* out_val);
extern ROCKSDB_LIBRARY_API int rocksdb_property_int(rocksdb_t* db,
linxGnu marked this conversation as resolved.
Show resolved Hide resolved
const char* propname,
uint64_t* out_val);

/* returns 0 on success, -1 otherwise */
int rocksdb_property_int_cf(rocksdb_t* db,
rocksdb_column_family_handle_t* column_family,
const char* propname, uint64_t* out_val);
extern ROCKSDB_LIBRARY_API int rocksdb_property_int_cf(
rocksdb_t* db, rocksdb_column_family_handle_t* column_family,
const char* propname, uint64_t* out_val);

extern ROCKSDB_LIBRARY_API char* rocksdb_property_value_cf(
rocksdb_t* db, rocksdb_column_family_handle_t* column_family,
Expand Down Expand Up @@ -662,6 +665,11 @@ extern ROCKSDB_LIBRARY_API void rocksdb_flush_cf(
rocksdb_t* db, const rocksdb_flushoptions_t* options,
rocksdb_column_family_handle_t* column_family, char** errptr);

extern ROCKSDB_LIBRARY_API void rocksdb_flush_cfs(
rocksdb_t* db, const rocksdb_flushoptions_t* options,
rocksdb_column_family_handle_t** column_family, int num_column_families,
char** errptr);

extern ROCKSDB_LIBRARY_API void rocksdb_flush_wal(rocksdb_t* db,
unsigned char sync,
char** errptr);
Expand Down Expand Up @@ -2012,6 +2020,29 @@ rocksdb_cache_get_usage(rocksdb_cache_t* cache);
extern ROCKSDB_LIBRARY_API size_t
rocksdb_cache_get_pinned_usage(rocksdb_cache_t* cache);

/* HyperClockCache */
extern ROCKSDB_LIBRARY_API rocksdb_hyper_clock_cache_options_t*
rocksdb_hyper_clock_cache_options_create(size_t capacity,
size_t estimated_entry_charge);
extern ROCKSDB_LIBRARY_API void rocksdb_hyper_clock_cache_options_destroy(
rocksdb_hyper_clock_cache_options_t*);
extern ROCKSDB_LIBRARY_API void rocksdb_hyper_clock_cache_options_set_capacity(
rocksdb_hyper_clock_cache_options_t*, size_t);
extern ROCKSDB_LIBRARY_API void
rocksdb_hyper_clock_cache_options_set_estimated_entry_charge(
rocksdb_hyper_clock_cache_options_t*, size_t);
extern ROCKSDB_LIBRARY_API void
rocksdb_hyper_clock_cache_options_set_num_shard_bits(
rocksdb_hyper_clock_cache_options_t*, int);
extern ROCKSDB_LIBRARY_API void
rocksdb_hyper_clock_cache_options_set_memory_allocator(
rocksdb_hyper_clock_cache_options_t*, rocksdb_memory_allocator_t*);

extern ROCKSDB_LIBRARY_API rocksdb_cache_t* rocksdb_cache_create_hyper_clock(
size_t capacity, size_t estimated_entry_charge);
extern ROCKSDB_LIBRARY_API rocksdb_cache_t*
rocksdb_cache_create_hyper_clock_opts(rocksdb_hyper_clock_cache_options_t*);

/* DBPath */

extern ROCKSDB_LIBRARY_API rocksdb_dbpath_t* rocksdb_dbpath_create(
Expand Down Expand Up @@ -2116,6 +2147,11 @@ rocksdb_ingestexternalfileoptions_set_allow_blocking_flush(
extern ROCKSDB_LIBRARY_API void
rocksdb_ingestexternalfileoptions_set_ingest_behind(
rocksdb_ingestexternalfileoptions_t* opt, unsigned char ingest_behind);
extern ROCKSDB_LIBRARY_API void
rocksdb_ingestexternalfileoptions_set_fail_if_not_bottommost_level(
rocksdb_ingestexternalfileoptions_t* opt,
unsigned char fail_if_not_bottommost_level);

extern ROCKSDB_LIBRARY_API void rocksdb_ingestexternalfileoptions_destroy(
rocksdb_ingestexternalfileoptions_t* opt);

Expand Down Expand Up @@ -2198,6 +2234,12 @@ extern ROCKSDB_LIBRARY_API void rocksdb_universal_compaction_options_destroy(
extern ROCKSDB_LIBRARY_API rocksdb_fifo_compaction_options_t*
rocksdb_fifo_compaction_options_create(void);
extern ROCKSDB_LIBRARY_API void
rocksdb_fifo_compaction_options_set_allow_compaction(
rocksdb_fifo_compaction_options_t* fifo_opts, unsigned char allow_compaction);
extern ROCKSDB_LIBRARY_API unsigned char
rocksdb_fifo_compaction_options_get_allow_compaction(
rocksdb_fifo_compaction_options_t* fifo_opts);
extern ROCKSDB_LIBRARY_API void
rocksdb_fifo_compaction_options_set_max_table_files_size(
rocksdb_fifo_compaction_options_t* fifo_opts, uint64_t size);
extern ROCKSDB_LIBRARY_API uint64_t
Expand Down Expand Up @@ -2622,6 +2664,11 @@ extern ROCKSDB_LIBRARY_API void rocksdb_transactiondb_flush_cf(
rocksdb_transactiondb_t* txn_db, const rocksdb_flushoptions_t* options,
rocksdb_column_family_handle_t* column_family, char** errptr);

extern ROCKSDB_LIBRARY_API void rocksdb_transactiondb_flush_cfs(
rocksdb_transactiondb_t* txn_db, const rocksdb_flushoptions_t* options,
rocksdb_column_family_handle_t** column_families, int num_column_families,
char** errptr);

extern ROCKSDB_LIBRARY_API void rocksdb_transactiondb_flush_wal(
rocksdb_transactiondb_t* txn_db, unsigned char sync, char** errptr);

Expand Down
98 changes: 98 additions & 0 deletions cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,18 @@ func NewLRUCacheWithOptions(opt *LRUCacheOptions) *Cache {
return newNativeCache(cCache)
}

// NewHyperClockCache creates a new hyper clock cache.
func NewHyperClockCache(capacity, estimatedEntryCharge int) *Cache {
cCache := C.rocksdb_cache_create_hyper_clock(C.size_t(capacity), C.size_t(estimatedEntryCharge))
return newNativeCache(cCache)
}

// NewHyperClockCacheWithOpts creates a hyper clock cache with predefined options.
func NewHyperClockCacheWithOpts(opt *HyperClockCacheOptions) *Cache {
cCache := C.rocksdb_cache_create_hyper_clock_opts(opt.c)
return newNativeCache(cCache)
}

// NewNativeCache creates a Cache object.
func newNativeCache(c *C.rocksdb_cache_t) *Cache {
return &Cache{c: c}
Expand Down Expand Up @@ -90,3 +102,89 @@ func (l *LRUCacheOptions) SetNumShardBits(n int) {
func (l *LRUCacheOptions) SetMemoryAllocator(m *MemoryAllocator) {
C.rocksdb_lru_cache_options_set_memory_allocator(l.c, m.c)
}

// HyperClockCacheOptions are options for HyperClockCache.
//
// HyperClockCache is a lock-free Cache alternative for RocksDB block cache
// that offers much improved CPU efficiency vs. LRUCache under high parallel
// load or high contention, with some caveats:
// * Not a general Cache implementation: can only be used for
// BlockBasedTableOptions::block_cache, which RocksDB uses in a way that is
// compatible with HyperClockCache.
// * Requires an extra tuning parameter: see estimated_entry_charge below.
// Similarly, substantially changing the capacity with SetCapacity could
// harm efficiency.
// * SecondaryCache is not yet supported.
// * Cache priorities are less aggressively enforced, which could cause
// cache dilution from long range scans (unless they use fill_cache=false).
// * Can be worse for small caches, because if almost all of a cache shard is
// pinned (more likely with non-partitioned filters), then CLOCK eviction
// becomes very CPU intensive.
//
// See internal cache/clock_cache.h for full description.
type HyperClockCacheOptions struct {
c *C.rocksdb_hyper_clock_cache_options_t
}

// NewHyperClockCacheOptions creates new options for hyper clock cache.
func NewHyperClockCacheOptions(capacity, estimatedEntryCharge int) *HyperClockCacheOptions {
return &HyperClockCacheOptions{
c: C.rocksdb_hyper_clock_cache_options_create(C.size_t(capacity), C.size_t(estimatedEntryCharge)),
}
}

// SetCapacity sets the capacity of the cache.
func (h *HyperClockCacheOptions) SetCapacity(capacity int) {
C.rocksdb_hyper_clock_cache_options_set_capacity(h.c, C.size_t(capacity))
}

// SetEstimatedEntryCharge sets the estimated average `charge` associated with cache entries.
//
// This is a critical configuration parameter for good performance from the hyper
// cache, because having a table size that is fixed at creation time greatly
// reduces the required synchronization between threads.
// * If the estimate is substantially too low (e.g. less than half the true
// average) then metadata space overhead with be substantially higher (e.g.
// 200 bytes per entry rather than 100). With kFullChargeCacheMetadata, this
// can slightly reduce cache hit rates, and slightly reduce access times due
// to the larger working memory size.
// * If the estimate is substantially too high (e.g. 25% higher than the true
// average) then there might not be sufficient slots in the hash table for
// both efficient operation and capacity utilization (hit rate). The hyper
// cache will evict entries to prevent load factors that could dramatically
// affect lookup times, instead letting the hit rate suffer by not utilizing
// the full capacity.
//
// A reasonable choice is the larger of block_size and metadata_block_size.
// When WriteBufferManager (and similar) charge memory usage to the block
// cache, this can lead to the same effect as estimate being too low, which
// is better than the opposite. Therefore, the general recommendation is to
// assume that other memory charged to block cache could be negligible, and
// ignore it in making the estimate.
//
// The best parameter choice based on a cache in use is given by
// GetUsage() / GetOccupancyCount(), ignoring metadata overheads such as
// with kDontChargeCacheMetadata. More precisely with
// kFullChargeCacheMetadata is (GetUsage() - 64 * GetTableAddressCount()) /
// GetOccupancyCount(). However, when the average value size might vary
// (e.g. balance between metadata and data blocks in cache), it is better
// to estimate toward the lower side than the higher side.
func (h *HyperClockCacheOptions) SetEstimatedEntryCharge(v int) {
C.rocksdb_hyper_clock_cache_options_set_estimated_entry_charge(h.c, C.size_t(v))
}

// SetCapacity sets number of shards used for this cache.
func (h *HyperClockCacheOptions) SetNumShardBits(n int) {
C.rocksdb_hyper_clock_cache_options_set_num_shard_bits(h.c, C.int(n))
}

// SetMemoryAllocator for this cache.
func (h *HyperClockCacheOptions) SetMemoryAllocator(m *MemoryAllocator) {
C.rocksdb_hyper_clock_cache_options_set_memory_allocator(h.c, m.c)
}

// Destroy the options.
func (h *HyperClockCacheOptions) Destroy() {
C.rocksdb_hyper_clock_cache_options_destroy(h.c)
h.c = nil
}
32 changes: 30 additions & 2 deletions cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import (
"github.com/stretchr/testify/require"
)

func TestCache(t *testing.T) {
func TestLRUCache(t *testing.T) {
cache := NewLRUCache(19)
defer cache.Destroy()

Expand All @@ -17,7 +17,18 @@ func TestCache(t *testing.T) {
cache.DisownData()
}

func TestCacheWithOpts(t *testing.T) {
func TestHyperClockCache(t *testing.T) {
cache := NewHyperClockCache(100, 10)
defer cache.Destroy()

require.EqualValues(t, 100, cache.GetCapacity())
cache.SetCapacity(128)
require.EqualValues(t, 128, cache.GetCapacity())

cache.DisownData()
}

func TestLRUCacheWithOpts(t *testing.T) {
opts := NewLRUCacheOptions()
opts.SetCapacity(19)
opts.SetNumShardBits(2)
Expand All @@ -32,3 +43,20 @@ func TestCacheWithOpts(t *testing.T) {

cache.DisownData()
}

func TestHyperClockCacheWithOpts(t *testing.T) {
opts := NewHyperClockCacheOptions(100, 10)
opts.SetCapacity(19)
opts.SetEstimatedEntryCharge(10)
opts.SetNumShardBits(2)
defer opts.Destroy()

cache := NewHyperClockCacheWithOpts(opts)
defer cache.Destroy()

require.EqualValues(t, 19, cache.GetCapacity())
cache.SetCapacity(128)
require.EqualValues(t, 128, cache.GetCapacity())

cache.DisownData()
}
1 change: 1 addition & 0 deletions cf_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ func TestColumnFamilyBatchPutGet(t *testing.T) {

// trigger flush
require.Nil(t, db.FlushCF(cfh[0], NewDefaultFlushOptions()))
require.Nil(t, db.FlushCFs(cfh, NewDefaultFlushOptions()))

meta := db.GetColumnFamilyMetadataCF(cfh[0])
require.NotNil(t, meta)
Expand Down
33 changes: 33 additions & 0 deletions db.go
Original file line number Diff line number Diff line change
Expand Up @@ -1223,6 +1223,24 @@ func (db *DB) GetProperty(propName string) (value string) {
return
}

// GetIntProperty similar to `GetProperty`, but only works for a subset of properties whose
// return value is an integer. Return the value by integer.
func (db *DB) GetIntProperty(propName string) (value uint64, success bool) {
cProp := C.CString(propName)
success = C.rocksdb_property_int(db.c, cProp, (*C.uint64_t)(&value)) == 0
C.free(unsafe.Pointer(cProp))
return
}

// GetIntPropertyCF similar to `GetProperty`, but only works for a subset of properties whose
// return value is an integer. Return the value by integer.
func (db *DB) GetIntPropertyCF(propName string, cf *ColumnFamilyHandle) (value uint64, success bool) {
cProp := C.CString(propName)
success = C.rocksdb_property_int_cf(db.c, cf.c, cProp, (*C.uint64_t)(&value)) == 0
C.free(unsafe.Pointer(cProp))
return
}

// GetPropertyCF returns the value of a database property.
func (db *DB) GetPropertyCF(propName string, cf *ColumnFamilyHandle) (value string) {
cProp := C.CString(propName)
Expand Down Expand Up @@ -1562,6 +1580,21 @@ func (db *DB) FlushCF(cf *ColumnFamilyHandle, opts *FlushOptions) (err error) {
return
}

// FlushCFs triggers a manual flush for the database on specific column families.
func (db *DB) FlushCFs(cfs []*ColumnFamilyHandle, opts *FlushOptions) (err error) {
if n := len(cfs); n > 0 {
_cfs := make([]*C.rocksdb_column_family_handle_t, n)
for i := range _cfs {
_cfs[i] = cfs[i].c
}

var cErr *C.char
C.rocksdb_flush_cfs(db.c, opts.c, &_cfs[0], C.int(n), &cErr)
err = fromCError(cErr)
}
return
}

// FlushWAL flushes the WAL memory buffer to the file. If sync is true, it calls SyncWAL
// afterwards.
func (db *DB) FlushWAL(sync bool) (err error) {
Expand Down
4 changes: 4 additions & 0 deletions db_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,11 @@ import (
func TestOpenDb(t *testing.T) {
db := newTestDB(t, nil)
defer db.Close()

require.EqualValues(t, "0", db.GetProperty("rocksdb.num-immutable-mem-table"))
v, success := db.GetIntProperty("rocksdb.num-immutable-mem-table")
require.EqualValues(t, uint64(0), v)
require.True(t, success)
}

func TestDBCRUD(t *testing.T) {
Expand Down
Loading