From 344d8306ef844100b0e92bdd0a96ad5aa0ebb951 Mon Sep 17 00:00:00 2001 From: Alexey Maykov Date: Wed, 17 Dec 2014 00:53:27 -0800 Subject: [PATCH] Implemented RocksDB::WriteOptions Summary: Setting WriteOptions through global system variables. The options are read-write. The WriteOptions object is copied for each use to avoid threading issues. Test Plan: Ran the test. Observed that sync option turns on and off WAL flushes per transaction (ie SQL statement). Stopped in the gdb and checked that WriteOptions had correct values. The diff is for this task: https://github.com/MariaDB/webscalesql-5.6/issues/15 Reviewers: mcallaghan, jonah Subscribers: webscalesql-eng@ Differential Revision: https://phabricator.fb.com/D1745210 --- mysql-test/suite/rocksdb/write_sync.result | 35 ++++++++++ mysql-test/suite/rocksdb/write_sync.test | 29 +++++++++ storage/rocksdb/ha_rocksdb.cc | 75 +++++++++++++++++++++- 3 files changed, 136 insertions(+), 3 deletions(-) create mode 100644 mysql-test/suite/rocksdb/write_sync.result create mode 100644 mysql-test/suite/rocksdb/write_sync.test diff --git a/mysql-test/suite/rocksdb/write_sync.result b/mysql-test/suite/rocksdb/write_sync.result new file mode 100644 index 000000000000..549851284ff7 --- /dev/null +++ b/mysql-test/suite/rocksdb/write_sync.result @@ -0,0 +1,35 @@ +SET GLOBAL rocksdb_write_disable_wal=false; +SET GLOBAL rocksdb_write_timeout_hint_us=555; +SET GLOBAL rocksdb_write_ignore_missing_column_families=true; +create table aaa (id int primary key, i int) engine rocksdb; +SET GLOBAL rocksdb_write_sync=off; +insert aaa(id, i) values(1,1); +show status like 'rocksdb_wal_synced'; +Variable_name Value +rocksdb_wal_synced 1 +insert aaa(id, i) values(2,1); +show status like 'rocksdb_wal_synced'; +Variable_name Value +rocksdb_wal_synced 1 +insert aaa(id, i) values(3,1); +show status like 'rocksdb_wal_synced'; +Variable_name Value +rocksdb_wal_synced 1 +SET GLOBAL rocksdb_write_sync=on_commit; +insert aaa(id, i) values(4,1); +show status like 'rocksdb_wal_synced'; +Variable_name Value +rocksdb_wal_synced 2 +insert aaa(id, i) values(5,1); +show status like 'rocksdb_wal_synced'; +Variable_name Value +rocksdb_wal_synced 3 +insert aaa(id, i) values(6,1); +show status like 'rocksdb_wal_synced'; +Variable_name Value +rocksdb_wal_synced 4 +drop table aaa; +SET GLOBAL rocksdb_write_sync=off; +SET GLOBAL rocksdb_write_disable_wal=false; +SET GLOBAL rocksdb_write_timeout_hint_us=0; +SET GLOBAL rocksdb_write_ignore_missing_column_families=false; diff --git a/mysql-test/suite/rocksdb/write_sync.test b/mysql-test/suite/rocksdb/write_sync.test new file mode 100644 index 000000000000..a3c0006e0749 --- /dev/null +++ b/mysql-test/suite/rocksdb/write_sync.test @@ -0,0 +1,29 @@ +SET GLOBAL rocksdb_write_disable_wal=false; +SET GLOBAL rocksdb_write_timeout_hint_us=555; +SET GLOBAL rocksdb_write_ignore_missing_column_families=true; + +create table aaa (id int primary key, i int) engine rocksdb; + +SET GLOBAL rocksdb_write_sync=off; +insert aaa(id, i) values(1,1); +show status like 'rocksdb_wal_synced'; +insert aaa(id, i) values(2,1); +show status like 'rocksdb_wal_synced'; +insert aaa(id, i) values(3,1); +show status like 'rocksdb_wal_synced'; + +SET GLOBAL rocksdb_write_sync=on_commit; +insert aaa(id, i) values(4,1); +show status like 'rocksdb_wal_synced'; +insert aaa(id, i) values(5,1); +show status like 'rocksdb_wal_synced'; +insert aaa(id, i) values(6,1); +show status like 'rocksdb_wal_synced'; + +# Cleanup +drop table aaa; +SET GLOBAL rocksdb_write_sync=off; +SET GLOBAL rocksdb_write_disable_wal=false; +SET GLOBAL rocksdb_write_timeout_hint_us=0; +SET GLOBAL rocksdb_write_ignore_missing_column_families=false; + diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc index 00f575af5e7e..c72a1c8ea843 100644 --- a/storage/rocksdb/ha_rocksdb.cc +++ b/storage/rocksdb/ha_rocksdb.cc @@ -117,6 +117,8 @@ rocksdb_cf_options_file_update(THD* thd, static long long rocksdb_block_cache_size; static uint64_t rocksdb_info_log_level; static uint64_t rocksdb_index_type; +static uint64_t rocksdb_write_sync; +static rocksdb::WriteOptions rocksdb_write_options; static rocksdb::DBOptions init_db_options() { rocksdb::DBOptions o; @@ -156,6 +158,27 @@ static TYPELIB index_type_typelib = { nullptr }; +enum write_sync_options { + WRITE_SYNC_OFF, + WRITE_SYNC_ON_COMMIT, + WRITE_SYNC_BACKGROUND +}; + +static const char* write_sync_names[] = { + "off", + "on_commit", + "background", + NullS +}; + +static TYPELIB write_sync_typelib = { + array_elements(write_sync_names) - 1, + "write_sync_typelib", + write_sync_names, + nullptr +}; + + //TODO: 0 means don't wait at all, and we don't support it yet? static MYSQL_THDVAR_ULONG(lock_wait_timeout, PLUGIN_VAR_RQCMDARG, "Number of seconds to wait for lock", @@ -449,6 +472,32 @@ static MYSQL_SYSVAR_STR(cf_options_file, rocksdb_cf_options_file, rocksdb_cf_options_file_validate, rocksdb_cf_options_file_update, ""); +static MYSQL_SYSVAR_ENUM(write_sync, + rocksdb_write_sync, + PLUGIN_VAR_RQCMDARG, + "WriteOptions::write_sync for RocksDB", + NULL, NULL, WRITE_SYNC_OFF, &write_sync_typelib); + +static MYSQL_SYSVAR_BOOL(write_disable_wal, + *reinterpret_cast(&rocksdb_write_options.disableWAL), + PLUGIN_VAR_RQCMDARG, + "WriteOptions::disableWAL for RocksDB", + NULL, NULL, rocksdb_write_options.disableWAL); + +static MYSQL_SYSVAR_ULONG(write_timeout_hint_us, + rocksdb_write_options.timeout_hint_us, + PLUGIN_VAR_RQCMDARG, + "WriteOptions::timeout_hint_us for RocksDB", + NULL, NULL, rocksdb_write_options.timeout_hint_us, + /* min */ 0L, /* max */ LONG_MAX, 0); + +static MYSQL_SYSVAR_BOOL(write_ignore_missing_column_families, + *reinterpret_cast( + &rocksdb_write_options.ignore_missing_column_families), + PLUGIN_VAR_RQCMDARG, + "WriteOptions::ignore_missing_column_families for RocksDB", + NULL, NULL, rocksdb_write_options.ignore_missing_column_families); + const longlong ROCKSDB_WRITE_BUFFER_SIZE_DEFAULT=4194304; static struct st_mysql_sys_var* rocksdb_system_variables[]= { @@ -503,9 +552,30 @@ static struct st_mysql_sys_var* rocksdb_system_variables[]= { MYSQL_SYSVAR(default_cf_options), MYSQL_SYSVAR(cf_options_file), + MYSQL_SYSVAR(write_sync), + MYSQL_SYSVAR(write_disable_wal), + MYSQL_SYSVAR(write_timeout_hint_us), + MYSQL_SYSVAR(write_ignore_missing_column_families), + NULL }; +static rocksdb::WriteOptions get_write_options() { + rocksdb::WriteOptions opt(rocksdb_write_options); + switch (rocksdb_write_sync) { + case WRITE_SYNC_OFF: + opt.sync = false; + break; + case WRITE_SYNC_ON_COMMIT: + opt.sync = true; + break; + case WRITE_SYNC_BACKGROUND: + // this option is not implemented yet + DBUG_ASSERT(0); + break; + } + return opt; +} /////////////////////////////////////////////////////////////////////////////////////////// @@ -743,7 +813,7 @@ class Rdb_transaction } } } - rocksdb::Status s= rdb->Write(rocksdb::WriteOptions(), &batch); + rocksdb::Status s= rdb->Write(get_write_options(), &batch); res= !s.ok(); // we return true when something failed return res; } @@ -1069,7 +1139,6 @@ static int rocksdb_init_func(void *p) DBUG_RETURN(0); } - static int rocksdb_done_func(void *p) { int error= 0; @@ -3367,7 +3436,7 @@ void ha_rocksdb::remove_rows(RDBSE_TABLE_DEF *tbl) rocksdb::Slice key= it->key(); if (!tbl->key_descr[i]->covers_key(key.data(), key.size())) break; - rdb->Delete(rocksdb::WriteOptions(), key); + rdb->Delete(get_write_options(), key); it->Next(); } }