Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: trie cache factory to allow variable cache sizes #7022

Merged
merged 11 commits into from
Jun 15, 2022
2 changes: 1 addition & 1 deletion chain/chain/src/test_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ impl KeyValueRuntime {
epoch_length: u64,
no_gc: bool,
) -> Self {
let tries = ShardTries::new(store.clone(), 0, num_shards);
let tries = ShardTries::test(store.clone(), num_shards);
let mut initial_amounts = HashMap::new();
for (i, validator) in validators.iter().flatten().enumerate() {
initial_amounts.insert(validator.clone(), (1000 + 100 * i) as u128);
Expand Down
2 changes: 1 addition & 1 deletion core/primitives/src/shard_layout.rs
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ fn is_top_level_account(top_account: &AccountId, account: &AccountId) -> bool {
}

/// ShardUId is an unique representation for shards from different shard layout
#[derive(Hash, Clone, Debug, Copy, PartialEq, Eq, PartialOrd, Ord)]
#[derive(Serialize, Deserialize, Hash, Clone, Debug, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub struct ShardUId {
pub version: ShardVersion,
pub shard_id: u32,
Expand Down
118 changes: 73 additions & 45 deletions core/store/src/config.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use near_primitives::shard_layout::ShardUId;
use near_primitives::version::DbVersion;

const STORE_PATH: &str = "data";
Expand All @@ -16,7 +17,7 @@ pub struct StoreConfig {
pub enable_statistics: bool,

/// Re-export storage layer statistics as prometheus metrics.
#[serde(default = "default_enable_statistics_export")]
#[serde(default = "StoreConfig::default_enable_statistics_export")]
pub enable_statistics_export: bool,

/// Maximum number of store files being opened simultaneously.
Expand All @@ -26,69 +27,88 @@ pub struct StoreConfig {
/// needs.
/// Increasing this value up to a value higher than 1024 also requires setting `ulimit -n` in
/// Linux.
#[serde(default = "default_max_open_files")]
#[serde(default = "StoreConfig::default_max_open_files")]
pub max_open_files: u32,

/// Cache size for DBCol::State column.
/// Default value: 512MiB.
/// Increasing DBCol::State cache size helps making storage more efficient. On the other hand we
/// don't want to increase hugely requirements for running a node so currently we use a small
/// default value for it.
#[serde(default = "default_col_state_cache_size")]
#[serde(default = "StoreConfig::default_col_state_cache_size")]
pub col_state_cache_size: bytesize::ByteSize,

/// Block size used internally in RocksDB.
/// Default value: 16KiB.
/// We're still experimented with this parameter and it seems decreasing its value can improve
/// the performance of the storage
#[serde(default = "default_block_size")]
#[serde(default = "StoreConfig::default_block_size")]
pub block_size: bytesize::ByteSize,
}

fn default_enable_statistics_export() -> bool {
StoreConfig::DEFAULT.enable_statistics_export
/// Trie cache capacities
/// Default value: ShardUId{1, 2} -> 2 GiB.
/// We're still experimented with this parameter and it seems decreasing its value can improve
/// the performance of the storage
#[serde(default = "StoreConfig::default_trie_cache_capacities")]
pub trie_cache_capacities: Vec<(ShardUId, usize)>,
Longarithm marked this conversation as resolved.
Show resolved Hide resolved
}

fn default_max_open_files() -> u32 {
StoreConfig::DEFAULT.max_open_files
}
impl StoreConfig {
/// Returns the default value for the database's `max_open_files` limit.
///
/// We used to use value of 512 but we were hitting that limit often and
/// store had to constantly close and reopen the same set of files. Running
/// state viewer on a dense set of 500 blocks did almost 200k file opens
/// (having less than 7K unique files opened, some files were opened 400+
/// times). Using 10k limit for max_open_files led to performance
/// improvement of ~11%.
const fn default_max_open_files() -> u32 {
10_000
}

fn default_col_state_cache_size() -> bytesize::ByteSize {
StoreConfig::DEFAULT.col_state_cache_size
}
/// Returns the default [`DBCol::State`] cache size.
///
/// We used to have the same cache size for all columns, 32 MiB. When some
/// RocksDB inefficiencies were found [`DBCol::State`] cache size was
/// increased up to 512 MiB. This was done on 13th of Nov 2021 and we
/// consider increasing the value. Tests have shown that increase to 25 GiB
/// (we've used this big value to estimate performance improvement headroom)
/// having `max_open_files` at 10k improved performance of state viewer by
/// 60%.
const fn default_col_state_cache_size() -> bytesize::ByteSize {
bytesize::ByteSize::mib(512)
}

fn default_block_size() -> bytesize::ByteSize {
StoreConfig::DEFAULT.block_size
}
/// Returns the default value for database block size.
///
/// This value was taken from the Openethereum default parameter and we use
/// it since then.
const fn default_block_size() -> bytesize::ByteSize {
bytesize::ByteSize::kib(16)
}

impl StoreConfig {
/// We've used a value of 512 for max_open_files since 3 Dec 2019. As it turned out we were
/// hitting that limit and store had to constantly close/reopen the same set of files.
/// Running state viewer on a dense set of 500 blocks did almost 200K file opens (having less
/// than 7K unique files opened, some files were opened 400+ times).
/// Using 10K limit for max_open_files led to performance improvement of ~11%.
const DEFAULT_MAX_OPEN_FILES: u32 = 10_000;

/// We used to have the same cache size for all columns 32MB. When some RocksDB
/// inefficiencies were found DBCol::State cache size was increased up to 512MB.
/// This was done Nov 13 2021 and we consider increasing the value.
/// Tests have shown that increase of col_state_cache_size up to 25GB (we've used this big
/// value to estimate performance improvement headroom) having max_open_files=10K improved
/// performance of state viewer by 60%.
const DEFAULT_COL_STATE_CACHE_SIZE: bytesize::ByteSize = bytesize::ByteSize::mib(512);

/// Earlier this value was taken from the openethereum default parameter and we use it since
/// then.
const DEFAULT_BLOCK_SIZE: bytesize::ByteSize = bytesize::ByteSize::kib(16);

pub const DEFAULT: Self = Self {
path: None,
enable_statistics: false,
enable_statistics_export: true,
max_open_files: Self::DEFAULT_MAX_OPEN_FILES,
col_state_cache_size: Self::DEFAULT_COL_STATE_CACHE_SIZE,
block_size: Self::DEFAULT_BLOCK_SIZE,
};
/// Returns the default for `enable_statistics_export` setting.
const fn default_enable_statistics_export() -> bool {
true
}

/// Returns the default for trie cache capacities.
///
/// By default we use `TRIE_DEFAULT_SHARD_CACHE_SIZE`, but as long as shard 2 has increased load, we reserve
/// 2M elements for it, which may result in 2GB occupied.
fn default_trie_cache_capacities() -> Vec<(ShardUId, usize)> {
vec![(ShardUId { version: 1, shard_id: 2 }, 2_000_000)]
}

/// Returns configuration meant for tests.
///
/// Since tests often operate with less data than real node, the test
/// configuration is adjusted to reduce resource use. For example, default
/// `max_open_files` limit is 512 which helps in situations when tests are
/// run in isolated environments with tighter resource limits.
pub fn test_config() -> Self {
Self { max_open_files: 512, ..Self::default() }
}

/// Returns cache size for given column.
pub const fn col_cache_size(&self, col: crate::DBCol) -> bytesize::ByteSize {
Expand All @@ -101,7 +121,15 @@ impl StoreConfig {

impl Default for StoreConfig {
fn default() -> Self {
Self::DEFAULT
Self {
path: None,
enable_statistics: false,
enable_statistics_export: Self::default_enable_statistics_export(),
max_open_files: Self::default_max_open_files(),
col_state_cache_size: Self::default_col_state_cache_size(),
block_size: Self::default_block_size(),
trie_cache_capacities: Self::default_trie_cache_capacities(),
}
}
}

Expand Down
2 changes: 1 addition & 1 deletion core/store/src/db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -806,7 +806,7 @@ mod tests {
#[test]
fn test_prewrite_check() {
let tmp_dir = tempfile::Builder::new().prefix("prewrite_check").tempdir().unwrap();
let store = RocksDB::open(tmp_dir.path(), &StoreConfig::DEFAULT, false).unwrap();
let store = RocksDB::open(tmp_dir.path(), &StoreConfig::test_config(), false).unwrap();
store.pre_write_check().unwrap()
}

Expand Down
7 changes: 5 additions & 2 deletions core/store/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use std::{fmt, io};

use borsh::{BorshDeserialize, BorshSerialize};
use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
use once_cell::sync::Lazy;

pub use columns::DBCol;
pub use db::{
Expand Down Expand Up @@ -35,7 +36,8 @@ pub use crate::trie::iterator::TrieIterator;
pub use crate::trie::update::{TrieUpdate, TrieUpdateIterator, TrieUpdateValuePtr};
pub use crate::trie::{
estimator, split_state, ApplyStatePartResult, KeyForStateChanges, PartialStorage, ShardTries,
Trie, TrieCache, TrieCachingStorage, TrieChanges, TrieStorage, WrappedTrieChanges,
Trie, TrieCache, TrieCacheFactory, TrieCachingStorage, TrieChanges, TrieStorage,
WrappedTrieChanges,
};

mod columns;
Expand Down Expand Up @@ -67,8 +69,9 @@ impl Store {
/// Caller must hold the temporary directory returned as first element of
/// the tuple while the store is open.
pub fn tmp_opener() -> (tempfile::TempDir, StoreOpener<'static>) {
static CONFIG: Lazy<StoreConfig> = Lazy::new(StoreConfig::test_config);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure I understand why we need to cache the config here. Can we not always call test_config? Is calling test_config a very expensive operation or can it return different values when called again and again?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cc @mina86, this is the kind of side-effects which made me to avoid lifetime parameters by default (echoing back #6973 (review)).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is one of changes I took from #7027, and I don't fully understand the reasoning as well. Is it fine to proceed with this PR and discuss this moment in #7027?

let dir = tempfile::tempdir().unwrap();
let opener = Self::opener(dir.path(), &StoreConfig::DEFAULT);
let opener = Self::opener(dir.path(), &CONFIG);
(dir, opener)
}

Expand Down
6 changes: 4 additions & 2 deletions core/store/src/test_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use rand::seq::SliceRandom;
use rand::Rng;

use crate::db::TestDB;
use crate::{ShardTries, Store};
use crate::{ShardTries, Store, TrieCacheFactory};
use near_primitives::account::id::AccountId;
use near_primitives::hash::CryptoHash;
use near_primitives::receipt::{DataReceipt, Receipt, ReceiptEnum};
Expand All @@ -26,7 +26,9 @@ pub fn create_tries() -> ShardTries {

pub fn create_tries_complex(shard_version: ShardVersion, num_shards: NumShards) -> ShardTries {
let store = create_test_store();
ShardTries::new(store, shard_version, num_shards)
let trie_cache_factory =
TrieCacheFactory { capacities: Default::default(), shard_version, num_shards };
ShardTries::new(store, trie_cache_factory)
}

pub fn test_populate_trie(
Expand Down
16 changes: 9 additions & 7 deletions core/store/src/trie/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ use near_primitives::types::{StateRoot, StateRootNode};
use crate::trie::insert_delete::NodesStorage;
use crate::trie::iterator::TrieIterator;
use crate::trie::nibble_slice::NibbleSlice;
pub use crate::trie::shard_tries::{KeyForStateChanges, ShardTries, WrappedTrieChanges};
pub use crate::trie::shard_tries::{
KeyForStateChanges, ShardTries, TrieCacheFactory, WrappedTrieChanges,
};
pub use crate::trie::trie_storage::{TrieCache, TrieCachingStorage, TrieStorage};
use crate::trie::trie_storage::{TrieMemoryPartialStorage, TrieRecordingStorage};
use crate::StorageError;
Expand Down Expand Up @@ -1096,7 +1098,7 @@ mod tests {
#[test]
fn test_trie_restart() {
let store = create_test_store();
let tries = ShardTries::new(store.clone(), 0, 1);
let tries = ShardTries::test(store.clone(), 1);
let empty_root = Trie::empty_root();
let changes = vec![
(b"doge".to_vec(), Some(b"coin".to_vec())),
Expand All @@ -1108,7 +1110,7 @@ mod tests {
];
let root = test_populate_trie(&tries, &empty_root, ShardUId::single_shard(), changes);

let tries2 = ShardTries::new(store, 0, 1);
let tries2 = ShardTries::test(store, 1);
let trie2 = tries2.get_trie_for_shard(ShardUId::single_shard());
assert_eq!(trie2.get(&root, b"doge"), Ok(Some(b"coin".to_vec())));
}
Expand All @@ -1117,7 +1119,7 @@ mod tests {
#[test]
fn test_trie_recording_reads() {
let store = create_test_store();
let tries = ShardTries::new(store, 0, 1);
let tries = ShardTries::test(store, 1);
let empty_root = Trie::empty_root();
let changes = vec![
(b"doge".to_vec(), Some(b"coin".to_vec())),
Expand All @@ -1144,7 +1146,7 @@ mod tests {
#[test]
fn test_trie_recording_reads_update() {
let store = create_test_store();
let tries = ShardTries::new(store, 0, 1);
let tries = ShardTries::test(store, 1);
let empty_root = Trie::empty_root();
let changes = vec![
(b"doge".to_vec(), Some(b"coin".to_vec())),
Expand Down Expand Up @@ -1179,7 +1181,7 @@ mod tests {
#[test]
fn test_dump_load_trie() {
let store = create_test_store();
let tries = ShardTries::new(store.clone(), 0, 1);
let tries = ShardTries::test(store.clone(), 1);
let empty_root = Trie::empty_root();
let changes = vec![
(b"doge".to_vec(), Some(b"coin".to_vec())),
Expand All @@ -1190,7 +1192,7 @@ mod tests {
store.save_to_file(DBCol::State, &dir.path().join("test.bin")).unwrap();
let store2 = create_test_store();
store2.load_from_file(DBCol::State, &dir.path().join("test.bin")).unwrap();
let tries2 = ShardTries::new(store2, 0, 1);
let tries2 = ShardTries::test(store2, 1);
let trie2 = tries2.get_trie_for_shard(ShardUId::single_shard());
assert_eq!(trie2.get(&root, b"doge").unwrap().unwrap(), b"coin");
}
Expand Down
Loading