From 0d670751f9ac6b07caa94810f96e338c5c8f1819 Mon Sep 17 00:00:00 2001 From: Michal Kucharczyk <1728078+michalkucharczyk@users.noreply.github.com> Date: Tue, 24 Jan 2023 14:26:04 +0100 Subject: [PATCH] service: storage monitor added (#13082) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * service: storage monitor added Storage monitor added. It uses `notify` create to get notifications about any changes to monitored path (which is database path). Notifications are consumed in essential task which terminates when available storage space drops below given threshold. Closes: #12399 * Cargo.lock updated * misspell * fs events throttling added * minor updates * filter out non mutating events * misspell * ".git/.scripts/commands/fmt/fmt.sh" * Update client/service/src/storage_monitor.rs Co-authored-by: Anton * storage-monitor crate added * cleanup: configuration + service builder * storage_monitor in custom service (wip) * copy-paste bad desc fixed * notify removed * storage_monitor added to node * fix for clippy * publish = false * Update bin/node/cli/src/command.rs Co-authored-by: Dmitry Markin * Apply suggestions from code review Co-authored-by: Bastian Köcher * crate name: storage-monitor -> sc-storage-monitor * error handling improved * Apply suggestions from code review Co-authored-by: Bastian Köcher * publish=false removed Co-authored-by: command-bot <> Co-authored-by: Anton Co-authored-by: Dmitry Markin Co-authored-by: Bastian Köcher --- Cargo.lock | 31 +++++ Cargo.toml | 1 + bin/node/cli/Cargo.toml | 2 + bin/node/cli/src/cli.rs | 4 + bin/node/cli/src/command.rs | 3 +- bin/node/cli/src/service.rs | 19 ++- client/cli/src/params/database_params.rs | 4 +- client/service/Cargo.toml | 1 + client/service/src/error.rs | 3 + client/storage-monitor/Cargo.toml | 20 +++ client/storage-monitor/src/lib.rs | 149 +++++++++++++++++++++++ 11 files changed, 227 insertions(+), 10 deletions(-) create mode 100644 client/storage-monitor/Cargo.toml create mode 100644 client/storage-monitor/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 08948ffbd970b..214c4f72c7d0d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4665,6 +4665,20 @@ dependencies = [ "memoffset 0.6.5", ] +[[package]] +name = "nix" +version = "0.26.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46a58d1d356c6597d08cde02c2f09d785b09e28711837b1ed667dc652c08a694" +dependencies = [ + "bitflags", + "cfg-if", + "libc", + "memoffset 0.7.1", + "pin-utils", + "static_assertions", +] + [[package]] name = "node-bench" version = "0.9.0-dev" @@ -4752,6 +4766,7 @@ dependencies = [ "sc-rpc", "sc-service", "sc-service-test", + "sc-storage-monitor", "sc-sync-state-rpc", "sc-sysinfo", "sc-telemetry", @@ -8895,6 +8910,7 @@ dependencies = [ "sc-rpc", "sc-rpc-server", "sc-rpc-spec-v2", + "sc-storage-monitor", "sc-sysinfo", "sc-telemetry", "sc-tracing", @@ -8973,6 +8989,21 @@ dependencies = [ "sp-core", ] +[[package]] +name = "sc-storage-monitor" +version = "0.1.0" +dependencies = [ + "clap 4.0.32", + "futures", + "log", + "nix 0.26.1", + "sc-client-db", + "sc-utils", + "sp-core", + "thiserror", + "tokio", +] + [[package]] name = "sc-sync-state-rpc" version = "0.10.0-dev" diff --git a/Cargo.toml b/Cargo.toml index 8f55d8e527ecd..f7eedadd1e8d7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -64,6 +64,7 @@ members = [ "client/service", "client/service/test", "client/state-db", + "client/storage-monitor", "client/sysinfo", "client/sync-state-rpc", "client/telemetry", diff --git a/bin/node/cli/Cargo.toml b/bin/node/cli/Cargo.toml index 4d9279b857eed..8a883ee0119ec 100644 --- a/bin/node/cli/Cargo.toml +++ b/bin/node/cli/Cargo.toml @@ -81,6 +81,7 @@ sc-executor = { version = "0.10.0-dev", path = "../../../client/executor" } sc-authority-discovery = { version = "0.10.0-dev", path = "../../../client/authority-discovery" } sc-sync-state-rpc = { version = "0.10.0-dev", path = "../../../client/sync-state-rpc" } sc-sysinfo = { version = "6.0.0-dev", path = "../../../client/sysinfo" } +sc-storage-monitor = { version = "0.1.0", path = "../../../client/storage-monitor" } # frame dependencies frame-system = { version = "4.0.0-dev", path = "../../../frame/system" } @@ -138,6 +139,7 @@ substrate-frame-cli = { version = "4.0.0-dev", optional = true, path = "../../.. try-runtime-cli = { version = "0.10.0-dev", optional = true, path = "../../../utils/frame/try-runtime/cli" } sc-cli = { version = "0.10.0-dev", path = "../../../client/cli", optional = true } pallet-balances = { version = "4.0.0-dev", path = "../../../frame/balances" } +sc-storage-monitor = { version = "0.1.0", path = "../../../client/storage-monitor" } [features] default = ["cli"] diff --git a/bin/node/cli/src/cli.rs b/bin/node/cli/src/cli.rs index bb7f8a4c60aa9..7bea336c8e41a 100644 --- a/bin/node/cli/src/cli.rs +++ b/bin/node/cli/src/cli.rs @@ -36,6 +36,10 @@ pub struct Cli { /// telemetry, if telemetry is enabled. #[arg(long)] pub no_hardware_benchmarks: bool, + + #[allow(missing_docs)] + #[clap(flatten)] + pub storage_monitor: sc_storage_monitor::StorageMonitorParams, } /// Possible subcommands of the main binary. diff --git a/bin/node/cli/src/command.rs b/bin/node/cli/src/command.rs index fd464bbc914a5..2ed8a2c754018 100644 --- a/bin/node/cli/src/command.rs +++ b/bin/node/cli/src/command.rs @@ -87,8 +87,7 @@ pub fn run() -> Result<()> { None => { let runner = cli.create_runner(&cli.run)?; runner.run_node_until_exit(|config| async move { - service::new_full(config, cli.no_hardware_benchmarks) - .map_err(sc_cli::Error::Service) + service::new_full(config, cli).map_err(sc_cli::Error::Service) }) }, Some(Subcommand::Inspect(cmd)) => { diff --git a/bin/node/cli/src/service.rs b/bin/node/cli/src/service.rs index d77a333dfa220..e329087947c98 100644 --- a/bin/node/cli/src/service.rs +++ b/bin/node/cli/src/service.rs @@ -20,6 +20,7 @@ //! Service implementation. Specialized wrapper over substrate service. +use crate::Cli; use codec::Encode; use frame_benchmarking_cli::SUBSTRATE_REFERENCE_HARDWARE; use frame_system_rpc_runtime_api::AccountNonceApi; @@ -556,12 +557,18 @@ pub fn new_full_base( } /// Builds a new service for a full client. -pub fn new_full( - config: Configuration, - disable_hardware_benchmarks: bool, -) -> Result { - new_full_base(config, disable_hardware_benchmarks, |_, _| ()) - .map(|NewFullBase { task_manager, .. }| task_manager) +pub fn new_full(config: Configuration, cli: Cli) -> Result { + let database_source = config.database.clone(); + let task_manager = new_full_base(config, cli.no_hardware_benchmarks, |_, _| ()) + .map(|NewFullBase { task_manager, .. }| task_manager)?; + + sc_storage_monitor::StorageMonitorService::try_spawn( + cli.storage_monitor, + database_source, + &task_manager.spawn_essential_handle(), + )?; + + Ok(task_manager) } #[cfg(test)] diff --git a/client/cli/src/params/database_params.rs b/client/cli/src/params/database_params.rs index fdd3622580a6d..06a154fd60867 100644 --- a/client/cli/src/params/database_params.rs +++ b/client/cli/src/params/database_params.rs @@ -19,7 +19,7 @@ use crate::arg_enums::Database; use clap::Args; -/// Parameters for block import. +/// Parameters for database #[derive(Debug, Clone, PartialEq, Args)] pub struct DatabaseParams { /// Select database backend to use. @@ -32,7 +32,7 @@ pub struct DatabaseParams { } impl DatabaseParams { - /// Limit the memory the database cache can use. + /// Database backend pub fn database(&self) -> Option { self.database } diff --git a/client/service/Cargo.toml b/client/service/Cargo.toml index 6122895d2300c..b4ce3bbbb7f1c 100644 --- a/client/service/Cargo.toml +++ b/client/service/Cargo.toml @@ -73,6 +73,7 @@ sc-offchain = { version = "4.0.0-dev", path = "../offchain" } prometheus-endpoint = { package = "substrate-prometheus-endpoint", path = "../../utils/prometheus", version = "0.10.0-dev" } sc-tracing = { version = "4.0.0-dev", path = "../tracing" } sc-sysinfo = { version = "6.0.0-dev", path = "../sysinfo" } +sc-storage-monitor = { version = "0.1.0", path = "../storage-monitor" } tracing = "0.1.29" tracing-futures = { version = "0.2.4" } async-trait = "0.1.57" diff --git a/client/service/src/error.rs b/client/service/src/error.rs index 001a83922d776..ec2951193964c 100644 --- a/client/service/src/error.rs +++ b/client/service/src/error.rs @@ -48,6 +48,9 @@ pub enum Error { #[error(transparent)] Telemetry(#[from] sc_telemetry::Error), + #[error(transparent)] + Storage(#[from] sc_storage_monitor::Error), + #[error("Best chain selection strategy (SelectChain) is not provided.")] SelectChainRequired, diff --git a/client/storage-monitor/Cargo.toml b/client/storage-monitor/Cargo.toml new file mode 100644 index 0000000000000..2ba24f9e2e718 --- /dev/null +++ b/client/storage-monitor/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "sc-storage-monitor" +version = "0.1.0" +authors = ["Parity Technologies "] +edition = "2021" +license = "GPL-3.0-or-later WITH Classpath-exception-2.0" +repository = "https://github.com/paritytech/substrate" +description = "Storage monitor service for substrate" +homepage = "https://substrate.io" + +[dependencies] +clap = { version = "4.0.9", features = ["derive", "string"] } +futures = "0.3.21" +log = "0.4.17" +nix = { version = "0.26.1", features = ["fs"] } +sc-client-db = { version = "0.10.0-dev", default-features = false, path = "../db" } +sc-utils = { version = "4.0.0-dev", path = "../utils" } +sp-core = { version = "7.0.0", path = "../../primitives/core" } +tokio = "1.22.0" +thiserror = "1.0.30" diff --git a/client/storage-monitor/src/lib.rs b/client/storage-monitor/src/lib.rs new file mode 100644 index 0000000000000..39bd15675b350 --- /dev/null +++ b/client/storage-monitor/src/lib.rs @@ -0,0 +1,149 @@ +// This file is part of Substrate. + +// Copyright (C) 2022 Parity Technologies (UK) Ltd. +// SPDX-License-Identifier: GPL-3.0-or-later WITH Classpath-exception-2.0 + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +use clap::Args; +use nix::{errno::Errno, sys::statvfs::statvfs}; +use sc_client_db::DatabaseSource; +use sp_core::traits::SpawnEssentialNamed; +use std::{ + path::{Path, PathBuf}, + time::Duration, +}; + +const LOG_TARGET: &str = "storage-monitor"; + +/// Error type used in this crate. +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error("IO Error")] + IOError(#[from] Errno), + #[error("Out of storage space: available {0}MB, required {1}MB")] + StorageOutOfSpace(u64, u64), +} + +/// Parameters used to create the storage monitor. +#[derive(Default, Debug, Clone, Args)] +pub struct StorageMonitorParams { + /// Required available space on database storage. If available space for DB storage drops below + /// the given threshold, node will be gracefully terminated. If `0` is given monitoring will be + /// disabled. + #[arg(long = "db-storage-threshold", value_name = "MB", default_value_t = 1000)] + pub threshold: u64, + + /// How often available space is polled. + #[arg(long = "db-storage-polling-period", value_name = "SECONDS", default_value_t = 5, value_parser = clap::value_parser!(u32).range(1..))] + pub polling_period: u32, +} + +/// Storage monitor service: checks the available space for the filesystem for fiven path. +pub struct StorageMonitorService { + /// watched path + path: PathBuf, + /// number of megabytes that shall be free on the filesystem for watched path + threshold: u64, + /// storage space polling period (seconds) + polling_period: u32, +} + +impl StorageMonitorService { + /// Creates new StorageMonitorService for given client config + pub fn try_spawn( + parameters: StorageMonitorParams, + database: DatabaseSource, + spawner: &impl SpawnEssentialNamed, + ) -> Result<(), Error> { + Ok(match (parameters.threshold, database.path()) { + (0, _) => { + log::info!( + target: LOG_TARGET, + "StorageMonitorService: threshold `0` given, storage monitoring disabled", + ); + }, + (_, None) => { + log::warn!( + target: LOG_TARGET, + "StorageMonitorService: no database path to observe", + ); + }, + (threshold, Some(path)) => { + log::debug!( + target: LOG_TARGET, + "Initializing StorageMonitorService for db path: {:?}", + path, + ); + + Self::check_free_space(&path, threshold)?; + + let storage_monitor_service = StorageMonitorService { + path: path.to_path_buf(), + threshold, + polling_period: parameters.polling_period, + }; + + spawner.spawn_essential( + "storage-monitor", + None, + Box::pin(storage_monitor_service.run()), + ); + }, + }) + } + + /// Main monitoring loop, intended to be spawned as essential task. Quits if free space drop + /// below threshold. + async fn run(self) { + loop { + tokio::time::sleep(Duration::from_secs(self.polling_period.into())).await; + if Self::check_free_space(&self.path, self.threshold).is_err() { + break + }; + } + } + + /// Returns free space in MB, or error if statvfs failed. + fn free_space(path: &Path) -> Result { + statvfs(path) + .map(|stats| stats.blocks_available() * stats.block_size() / 1_000_000) + .map_err(Error::from) + } + + /// Checks if the amount of free space for given `path` is above given `threshold`. + /// If it dropped below, error is returned. + /// System errors are silently ignored. + fn check_free_space(path: &Path, threshold: u64) -> Result<(), Error> { + match StorageMonitorService::free_space(path) { + Ok(available_space) => { + log::trace!( + target: LOG_TARGET, + "free: {available_space} , threshold: {threshold}.", + ); + + if available_space < threshold { + log::error!(target: LOG_TARGET, "Available space {available_space}MB for path `{}` dropped below threshold: {threshold}MB , terminating...", path.display()); + Err(Error::StorageOutOfSpace(available_space, threshold)) + } else { + Ok(()) + } + }, + Err(e) => { + log::error!(target: LOG_TARGET, "Could not read available space: {:?}.", e); + Err(e) + }, + } + } +}