diff --git a/Cargo.lock b/Cargo.lock index b9923d34a8e8..e6e8c4902d04 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6251,7 +6251,7 @@ dependencies = [ "reth-payload-validator", "reth-primitives", "reth-provider", - "reth-prune-types", + "reth-prune", "reth-revm", "reth-rpc", "reth-rpc-api", @@ -8187,6 +8187,7 @@ dependencies = [ "reth-provider", "reth-prune-types", "reth-stages", + "reth-stages-types", "reth-static-file-types", "reth-storage-errors", "reth-testing-utils", diff --git a/bin/reth/Cargo.toml b/bin/reth/Cargo.toml index 1440922ee225..5e6a4ca6e06b 100644 --- a/bin/reth/Cargo.toml +++ b/bin/reth/Cargo.toml @@ -66,7 +66,7 @@ reth-node-builder.workspace = true reth-node-events.workspace = true reth-consensus.workspace = true reth-optimism-primitives.workspace = true -reth-prune-types.workspace = true +reth-prune.workspace = true # crypto alloy-rlp.workspace = true diff --git a/bin/reth/src/cli/mod.rs b/bin/reth/src/cli/mod.rs index ff5c4add541c..4dd567630756 100644 --- a/bin/reth/src/cli/mod.rs +++ b/bin/reth/src/cli/mod.rs @@ -8,7 +8,7 @@ use crate::{ commands::{ config_cmd, db, debug_cmd, dump_genesis, import, init_cmd, init_state, node::{self, NoArgs}, - p2p, recover, stage, test_vectors, + p2p, prune, recover, stage, test_vectors, }, version::{LONG_VERSION, SHORT_VERSION}, }; @@ -164,6 +164,7 @@ impl Cli { Commands::Config(command) => runner.run_until_ctrl_c(command.execute()), Commands::Debug(command) => runner.run_command_until_exit(|ctx| command.execute(ctx)), Commands::Recover(command) => runner.run_command_until_exit(|ctx| command.execute(ctx)), + Commands::Prune(command) => runner.run_until_ctrl_c(command.execute()), } } @@ -223,6 +224,9 @@ pub enum Commands { /// Scripts for node recovery #[command(name = "recover")] Recover(recover::Command), + /// Prune according to the configuration without any limits + #[command(name = "prune")] + Prune(prune::PruneCommand), } #[cfg(test)] diff --git a/bin/reth/src/commands/debug_cmd/execution.rs b/bin/reth/src/commands/debug_cmd/execution.rs index c1fd4cfa5fa2..9e39c90b39fc 100644 --- a/bin/reth/src/commands/debug_cmd/execution.rs +++ b/bin/reth/src/commands/debug_cmd/execution.rs @@ -26,7 +26,7 @@ use reth_primitives::{BlockHashOrNumber, BlockNumber, B256}; use reth_provider::{ BlockExecutionWriter, ChainSpecProvider, ProviderFactory, StageCheckpointReader, }; -use reth_prune_types::PruneModes; +use reth_prune::PruneModes; use reth_stages::{ sets::DefaultStages, stages::{ExecutionStage, ExecutionStageThresholds}, diff --git a/bin/reth/src/commands/debug_cmd/merkle.rs b/bin/reth/src/commands/debug_cmd/merkle.rs index 46e76d1da090..5244cbad316f 100644 --- a/bin/reth/src/commands/debug_cmd/merkle.rs +++ b/bin/reth/src/commands/debug_cmd/merkle.rs @@ -23,7 +23,7 @@ use reth_provider::{ BlockNumReader, BlockWriter, ChainSpecProvider, HeaderProvider, LatestStateProviderRef, OriginalValuesKnown, ProviderError, ProviderFactory, StateWriter, }; -use reth_prune_types::PruneModes; +use reth_prune::PruneModes; use reth_revm::database::StateProviderDatabase; use reth_stages::{ stages::{AccountHashingStage, MerkleStage, StorageHashingStage}, diff --git a/bin/reth/src/commands/debug_cmd/replay_engine.rs b/bin/reth/src/commands/debug_cmd/replay_engine.rs index 26c8a3558e73..7b8e7167eeeb 100644 --- a/bin/reth/src/commands/debug_cmd/replay_engine.rs +++ b/bin/reth/src/commands/debug_cmd/replay_engine.rs @@ -22,7 +22,7 @@ use reth_payload_builder::{PayloadBuilderHandle, PayloadBuilderService}; use reth_provider::{ providers::BlockchainProvider, CanonStateSubscriptions, ChainSpecProvider, ProviderFactory, }; -use reth_prune_types::PruneModes; +use reth_prune::PruneModes; use reth_stages::Pipeline; use reth_static_file::StaticFileProducer; use reth_tasks::TaskExecutor; diff --git a/bin/reth/src/commands/import.rs b/bin/reth/src/commands/import.rs index 25d1864a2434..71357e083aaf 100644 --- a/bin/reth/src/commands/import.rs +++ b/bin/reth/src/commands/import.rs @@ -27,7 +27,7 @@ use reth_provider::{ BlockNumReader, ChainSpecProvider, HeaderProvider, ProviderError, ProviderFactory, StageCheckpointReader, }; -use reth_prune_types::PruneModes; +use reth_prune::PruneModes; use reth_stages::{prelude::*, Pipeline, StageId, StageSet}; use reth_static_file::StaticFileProducer; use std::{path::PathBuf, sync::Arc}; diff --git a/bin/reth/src/commands/import_op.rs b/bin/reth/src/commands/import_op.rs index 646cd4f97232..f4b8716fe210 100644 --- a/bin/reth/src/commands/import_op.rs +++ b/bin/reth/src/commands/import_op.rs @@ -17,7 +17,7 @@ use reth_downloaders::file_client::{ }; use reth_optimism_primitives::bedrock_import::is_dup_tx; use reth_provider::StageCheckpointReader; -use reth_prune_types::PruneModes; +use reth_prune::PruneModes; use reth_stages::StageId; use reth_static_file::StaticFileProducer; use std::{path::PathBuf, sync::Arc}; diff --git a/bin/reth/src/commands/mod.rs b/bin/reth/src/commands/mod.rs index cd5a7e7ba6a3..0763ecc2203e 100644 --- a/bin/reth/src/commands/mod.rs +++ b/bin/reth/src/commands/mod.rs @@ -7,12 +7,11 @@ pub mod dump_genesis; pub mod import; pub mod import_op; pub mod import_receipts_op; - pub mod init_cmd; pub mod init_state; - pub mod node; pub mod p2p; +pub mod prune; pub mod recover; pub mod stage; pub mod test_vectors; diff --git a/bin/reth/src/commands/prune.rs b/bin/reth/src/commands/prune.rs new file mode 100644 index 000000000000..f3b0fcaab966 --- /dev/null +++ b/bin/reth/src/commands/prune.rs @@ -0,0 +1,43 @@ +//! Command that runs pruning without any limits. + +use crate::commands::common::{AccessRights, Environment, EnvironmentArgs}; +use clap::Parser; +use reth_prune::PrunerBuilder; +use reth_static_file::StaticFileProducer; +use tracing::info; + +/// Prunes according to the configuration without any limits +#[derive(Debug, Parser)] +pub struct PruneCommand { + #[command(flatten)] + env: EnvironmentArgs, +} + +impl PruneCommand { + /// Execute the `prune` command + pub async fn execute(self) -> eyre::Result<()> { + let Environment { config, provider_factory, .. } = self.env.init(AccessRights::RW)?; + let prune_config = config.prune.unwrap_or_default(); + + // Copy data from database to static files + info!(target: "reth::cli", "Copying data from database to static files..."); + let static_file_producer = + StaticFileProducer::new(provider_factory.clone(), prune_config.segments.clone()); + let lowest_static_file_height = static_file_producer.lock().copy_to_static_files()?.min(); + info!(target: "reth::cli", ?lowest_static_file_height, "Copied data from database to static files"); + + // Delete data which has been copied to static files. + if let Some(prune_tip) = lowest_static_file_height { + info!(target: "reth::cli", ?prune_tip, ?prune_config, "Pruning data from database..."); + // Run the pruner according to the configuration, and don't enforce any limits on it + let mut pruner = PrunerBuilder::new(prune_config) + .prune_delete_limit(usize::MAX) + .build(provider_factory); + + pruner.run(prune_tip)?; + info!(target: "reth::cli", "Pruned data from database"); + } + + Ok(()) + } +} diff --git a/bin/reth/src/commands/stage/dump/merkle.rs b/bin/reth/src/commands/stage/dump/merkle.rs index fa345bb474a4..adc2e61485c9 100644 --- a/bin/reth/src/commands/stage/dump/merkle.rs +++ b/bin/reth/src/commands/stage/dump/merkle.rs @@ -8,7 +8,7 @@ use reth_exex::ExExManagerHandle; use reth_node_core::dirs::{ChainPath, DataDirPath}; use reth_primitives::BlockNumber; use reth_provider::{providers::StaticFileProvider, ProviderFactory}; -use reth_prune_types::PruneModes; +use reth_prune::PruneModes; use reth_stages::{ stages::{ AccountHashingStage, ExecutionStage, ExecutionStageThresholds, MerkleStage, diff --git a/bin/reth/src/commands/stage/unwind.rs b/bin/reth/src/commands/stage/unwind.rs index 157f33bff7cb..b778758caacb 100644 --- a/bin/reth/src/commands/stage/unwind.rs +++ b/bin/reth/src/commands/stage/unwind.rs @@ -13,7 +13,7 @@ use reth_provider::{ BlockExecutionWriter, BlockNumReader, ChainSpecProvider, FinalizedBlockReader, FinalizedBlockWriter, ProviderFactory, StaticFileProviderFactory, }; -use reth_prune_types::PruneModes; +use reth_prune::PruneModes; use reth_stages::{ sets::DefaultStages, stages::{ExecutionStage, ExecutionStageThresholds}, diff --git a/book/SUMMARY.md b/book/SUMMARY.md index 499b6dd97f9a..ad1a54633674 100644 --- a/book/SUMMARY.md +++ b/book/SUMMARY.md @@ -72,6 +72,7 @@ - [`reth debug replay-engine`](./cli/reth/debug/replay-engine.md) - [`reth recover`](./cli/reth/recover.md) - [`reth recover storage-tries`](./cli/reth/recover/storage-tries.md) + - [`reth prune`](./cli/reth/prune.md) - [Developers](./developers/developers.md) - [Execution Extensions](./developers/exex/exex.md) - [How do ExExes work?](./developers/exex/how-it-works.md) diff --git a/book/cli/SUMMARY.md b/book/cli/SUMMARY.md index 089de1b65a67..9f9d0fdb1dc3 100644 --- a/book/cli/SUMMARY.md +++ b/book/cli/SUMMARY.md @@ -43,4 +43,5 @@ - [`reth debug replay-engine`](./reth/debug/replay-engine.md) - [`reth recover`](./reth/recover.md) - [`reth recover storage-tries`](./reth/recover/storage-tries.md) + - [`reth prune`](./reth/prune.md) diff --git a/book/cli/reth.md b/book/cli/reth.md index a4ba8f3d3d9c..b8ac550816d0 100644 --- a/book/cli/reth.md +++ b/book/cli/reth.md @@ -19,6 +19,7 @@ Commands: config Write config to stdout debug Various debug routines recover Scripts for node recovery + prune Prune according to the configuration without any limits help Print this message or the help of the given subcommand(s) Options: diff --git a/book/cli/reth/prune.md b/book/cli/reth/prune.md new file mode 100644 index 000000000000..77ea724abd88 --- /dev/null +++ b/book/cli/reth/prune.md @@ -0,0 +1,146 @@ +# reth prune + +Prune according to the configuration without any limits + +```bash +$ reth prune --help +Usage: reth prune [OPTIONS] + +Options: + --instance + Add a new instance of a node. + + Configures the ports of the node to avoid conflicts with the defaults. This is useful for running multiple nodes on the same machine. + + Max number of instances is 200. It is chosen in a way so that it's not possible to have port numbers that conflict with each other. + + Changes to the following port numbers: - `DISCOVERY_PORT`: default + `instance` - 1 - `AUTH_PORT`: default + `instance` * 100 - 100 - `HTTP_RPC_PORT`: default - `instance` + 1 - `WS_RPC_PORT`: default + `instance` * 2 - 2 + + [default: 1] + + -h, --help + Print help (see a summary with '-h') + +Datadir: + --datadir + The path to the data dir for all reth files and subdirectories. + + Defaults to the OS-specific data directory: + + - Linux: `$XDG_DATA_HOME/reth/` or `$HOME/.local/share/reth/` + - Windows: `{FOLDERID_RoamingAppData}/reth/` + - macOS: `$HOME/Library/Application Support/reth/` + + [default: default] + + --datadir.static_files + The absolute path to store static files in. + + --config + The path to the configuration file to use + + --chain + The chain this node is running. + Possible values are either a built-in chain or the path to a chain specification file. + + Built-in chains: + mainnet, sepolia, goerli, holesky, dev + + [default: mainnet] + +Database: + --db.log-level + Database logging level. Levels higher than "notice" require a debug build + + Possible values: + - fatal: Enables logging for critical conditions, i.e. assertion failures + - error: Enables logging for error conditions + - warn: Enables logging for warning conditions + - notice: Enables logging for normal but significant condition + - verbose: Enables logging for verbose informational + - debug: Enables logging for debug-level messages + - trace: Enables logging for trace debug-level messages + - extra: Enables logging for extra debug-level messages + + --db.exclusive + Open environment in exclusive/monopolistic mode. Makes it possible to open a database on an NFS volume + + [possible values: true, false] + +Logging: + --log.stdout.format + The format to use for logs written to stdout + + [default: terminal] + + Possible values: + - json: Represents JSON formatting for logs. This format outputs log records as JSON objects, making it suitable for structured logging + - log-fmt: Represents logfmt (key=value) formatting for logs. This format is concise and human-readable, typically used in command-line applications + - terminal: Represents terminal-friendly formatting for logs + + --log.stdout.filter + The filter to use for logs written to stdout + + [default: ] + + --log.file.format + The format to use for logs written to the log file + + [default: terminal] + + Possible values: + - json: Represents JSON formatting for logs. This format outputs log records as JSON objects, making it suitable for structured logging + - log-fmt: Represents logfmt (key=value) formatting for logs. This format is concise and human-readable, typically used in command-line applications + - terminal: Represents terminal-friendly formatting for logs + + --log.file.filter + The filter to use for logs written to the log file + + [default: debug] + + --log.file.directory + The path to put log files in + + [default: /logs] + + --log.file.max-size + The maximum size (in MB) of one log file + + [default: 200] + + --log.file.max-files + The maximum amount of log files that will be stored. If set to 0, background file logging is disabled + + [default: 5] + + --log.journald + Write logs to journald + + --log.journald.filter + The filter to use for logs written to journald + + [default: error] + + --color + Sets whether or not the formatter emits ANSI terminal escape codes for colors and other text formatting + + [default: always] + + Possible values: + - always: Colors on + - auto: Colors on + - never: Colors off + +Display: + -v, --verbosity... + Set the minimum log level. + + -v Errors + -vv Warnings + -vvv Info + -vvvv Debug + -vvvvv Traces (warning: very verbose!) + + -q, --quiet + Silence all log output +``` \ No newline at end of file diff --git a/crates/stages/api/src/pipeline/mod.rs b/crates/stages/api/src/pipeline/mod.rs index 67ef53855b15..1be468702e43 100644 --- a/crates/stages/api/src/pipeline/mod.rs +++ b/crates/stages/api/src/pipeline/mod.rs @@ -13,7 +13,6 @@ use reth_provider::{ }; use reth_prune::PrunerBuilder; use reth_static_file::StaticFileProducer; -use reth_static_file_types::HighestStaticFiles; use reth_tokio_util::{EventSender, EventStream}; use std::pin::Pin; use tokio::sync::watch; @@ -248,26 +247,9 @@ where /// CAUTION: This method locks the static file producer Mutex, hence can block the thread if the /// lock is occupied. pub fn move_to_static_files(&self) -> RethResult<()> { - let static_file_producer = self.static_file_producer.lock(); - // Copies data from database to static files - let lowest_static_file_height = { - let provider = self.provider_factory.provider()?; - let stages_checkpoints = [StageId::Headers, StageId::Execution, StageId::Bodies] - .into_iter() - .map(|stage| { - provider.get_stage_checkpoint(stage).map(|c| c.map(|c| c.block_number)) - }) - .collect::, _>>()?; - - let targets = static_file_producer.get_static_file_targets(HighestStaticFiles { - headers: stages_checkpoints[0], - receipts: stages_checkpoints[1], - transactions: stages_checkpoints[2], - })?; - static_file_producer.run(targets)?; - stages_checkpoints.into_iter().min().expect("exists") - }; + let lowest_static_file_height = + self.static_file_producer.lock().copy_to_static_files()?.min(); // Deletes data which has been copied to static files. if let Some(prune_tip) = lowest_static_file_height { diff --git a/crates/static-file/static-file/Cargo.toml b/crates/static-file/static-file/Cargo.toml index 29a601f050d0..1a1921d58c5c 100644 --- a/crates/static-file/static-file/Cargo.toml +++ b/crates/static-file/static-file/Cargo.toml @@ -21,6 +21,7 @@ reth-nippy-jar.workspace = true reth-tokio-util.workspace = true reth-prune-types.workspace = true reth-static-file-types.workspace = true +reth-stages-types.workspace = true alloy-primitives.workspace = true diff --git a/crates/static-file/static-file/src/static_file_producer.rs b/crates/static-file/static-file/src/static_file_producer.rs index 44ea3a5c84b4..eb9422804495 100644 --- a/crates/static-file/static-file/src/static_file_producer.rs +++ b/crates/static-file/static-file/src/static_file_producer.rs @@ -5,8 +5,12 @@ use alloy_primitives::BlockNumber; use parking_lot::Mutex; use rayon::prelude::*; use reth_db_api::database::Database; -use reth_provider::{providers::StaticFileWriter, ProviderFactory, StaticFileProviderFactory}; +use reth_provider::{ + providers::StaticFileWriter, ProviderFactory, StageCheckpointReader as _, + StaticFileProviderFactory, +}; use reth_prune_types::PruneModes; +use reth_stages_types::StageId; use reth_static_file_types::HighestStaticFiles; use reth_storage_errors::provider::ProviderResult; use reth_tokio_util::{EventSender, EventStream}; @@ -56,7 +60,7 @@ pub struct StaticFileProducerInner { event_sender: EventSender, } -/// Static File targets, per data part, measured in [`BlockNumber`]. +/// Static File targets, per data segment, measured in [`BlockNumber`]. #[derive(Debug, Clone, Eq, PartialEq)] pub struct StaticFileTargets { headers: Option>, @@ -167,6 +171,28 @@ impl StaticFileProducerInner { Ok(targets) } + /// Copies data from database to static files according to + /// [stage checkpoints](reth_stages_types::StageCheckpoint). + /// + /// Returns highest block numbers for all static file segments. + pub fn copy_to_static_files(&self) -> ProviderResult { + let provider = self.provider_factory.provider()?; + let stages_checkpoints = [StageId::Headers, StageId::Execution, StageId::Bodies] + .into_iter() + .map(|stage| provider.get_stage_checkpoint(stage).map(|c| c.map(|c| c.block_number))) + .collect::, _>>()?; + + let highest_static_files = HighestStaticFiles { + headers: stages_checkpoints[0], + receipts: stages_checkpoints[1], + transactions: stages_checkpoints[2], + }; + let targets = self.get_static_file_targets(highest_static_files)?; + self.run(targets)?; + + Ok(highest_static_files) + } + /// Returns a static file targets at the provided finalized block numbers per segment. /// The target is determined by the check against highest `static_files` using /// [`reth_provider::providers::StaticFileProvider::get_highest_static_files`]. diff --git a/crates/static-file/types/src/lib.rs b/crates/static-file/types/src/lib.rs index f78d61f6961b..556ec8f90676 100644 --- a/crates/static-file/types/src/lib.rs +++ b/crates/static-file/types/src/lib.rs @@ -20,7 +20,7 @@ pub use segment::{SegmentConfig, SegmentHeader, SegmentRangeInclusive, StaticFil /// Default static file block count. pub const BLOCKS_PER_STATIC_FILE: u64 = 500_000; -/// Highest static file block numbers, per data part. +/// Highest static file block numbers, per data segment. #[derive(Debug, Clone, Copy, Default, Eq, PartialEq)] pub struct HighestStaticFiles { /// Highest static file block of headers, inclusive. @@ -53,6 +53,11 @@ impl HighestStaticFiles { } } + /// Returns the minimum block of all segments. + pub fn min(&self) -> Option { + [self.headers, self.transactions, self.receipts].iter().filter_map(|&option| option).min() + } + /// Returns the maximum block of all segments. pub fn max(&self) -> Option { [self.headers, self.transactions, self.receipts].iter().filter_map(|&option| option).max()