From 558fdbd8b1ecf4522d32965f0be06b219c865bcb Mon Sep 17 00:00:00 2001 From: Ashok Menon Date: Mon, 19 Aug 2024 11:53:22 +0100 Subject: [PATCH] [sui-tool] dump-packages uses GraphQL (#18337) ## Description Replace the original implementation of the dump-packages command (which requires access to an indexer database) with an implementation that reads from a GraphQL service. The former is not readily accessible, but the latter should be. The new tool is also able to run incrementally: Fetching only packages created before a certain checkpoint, or pick up where a previous invocation took off to fetch new packages that were introduced since. ## Test plan Ran a test invocation, on our experimental read replica. With a max page size of 200, I was able to fetch 17000 packages (all the packages at the time the read replica was created) in 3 minutes. ## Stack - #17543 - #17692 - #17693 - #17696 - #18287 - #18288 - #18336 --- ## Release notes Check each box that your changes affect. If none of the boxes relate to your changes, release notes aren't required. For each box you select, include information after the relevant heading that describes the impact of your changes that a user might notice and any actions they must take to implement updates. - [ ] Protocol: - [ ] Nodes (Validators and Full nodes): - [ ] Indexer: - [ ] JSON-RPC: - [ ] GraphQL: - [ ] CLI: - [ ] Rust SDK: --- Cargo.lock | 182 +++++++++++++++++-- Cargo.toml | 4 + crates/sui-package-dump/Cargo.toml | 22 +++ crates/sui-package-dump/build.rs | 10 + crates/sui-package-dump/src/client.rs | 39 ++++ crates/sui-package-dump/src/lib.rs | 251 ++++++++++++++++++++++++++ crates/sui-package-dump/src/query.rs | 105 +++++++++++ crates/sui-tool/Cargo.toml | 3 +- crates/sui-tool/src/commands.rs | 26 ++- crates/sui-tool/src/lib.rs | 1 - crates/sui-tool/src/pkg_dump.rs | 122 ------------- 11 files changed, 618 insertions(+), 147 deletions(-) create mode 100644 crates/sui-package-dump/Cargo.toml create mode 100644 crates/sui-package-dump/build.rs create mode 100644 crates/sui-package-dump/src/client.rs create mode 100644 crates/sui-package-dump/src/lib.rs create mode 100644 crates/sui-package-dump/src/query.rs delete mode 100644 crates/sui-tool/src/pkg_dump.rs diff --git a/Cargo.lock b/Cargo.lock index 4449deae1a1d5..37af0287befea 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2947,6 +2947,15 @@ dependencies = [ "siphasher", ] +[[package]] +name = "counter" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d458e66999348f56fd3ffcfbb7f7951542075ca8359687c703de6500c1ddccd" +dependencies = [ + "num-traits", +] + [[package]] name = "cpp_demangle" version = "0.4.0" @@ -3288,6 +3297,62 @@ dependencies = [ "syn 1.0.107", ] +[[package]] +name = "cynic" +version = "3.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "478c02b53607e3f21c374f024c2cfc2154e554905bba478e8e09409f10ce3726" +dependencies = [ + "cynic-proc-macros", + "ref-cast", + "reqwest 0.12.5", + "serde", + "serde_json", + "static_assertions", + "thiserror", +] + +[[package]] +name = "cynic-codegen" +version = "3.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c0ec86f960a00ce087e96ff6f073f6ff28b6876d69ce8caa06c03fb4143981c" +dependencies = [ + "counter", + "cynic-parser", + "darling 0.20.3", + "once_cell", + "ouroboros 0.18.4", + "proc-macro2 1.0.78", + "quote 1.0.35", + "strsim 0.10.0", + "syn 2.0.48", + "thiserror", +] + +[[package]] +name = "cynic-parser" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "718f6cd8c54ae5249fd42b0c86639df0100b8a86eea2e5f1b915cde2e1481453" +dependencies = [ + "indexmap 2.2.6", + "lalrpop-util", + "logos", +] + +[[package]] +name = "cynic-proc-macros" +version = "3.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25a69ecdf4aa110fed1c0c8de290bc8ccb2835388733cf2f418f0abdf6ff3899" +dependencies = [ + "cynic-codegen", + "darling 0.20.3", + "quote 1.0.35", + "syn 2.0.48", +] + [[package]] name = "darling" version = "0.13.4" @@ -4496,7 +4561,7 @@ dependencies = [ "tokio", "tracing", "walkdir", - "yansi", + "yansi 0.5.1", ] [[package]] @@ -6630,6 +6695,39 @@ dependencies = [ "serde", ] +[[package]] +name = "logos" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff1ceb190eb9bdeecdd8f1ad6a71d6d632a50905948771718741b5461fb01e13" +dependencies = [ + "logos-derive", +] + +[[package]] +name = "logos-codegen" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90be66cb7bd40cb5cc2e9cfaf2d1133b04a3d93b72344267715010a466e0915a" +dependencies = [ + "beef", + "fnv", + "lazy_static", + "proc-macro2 1.0.78", + "quote 1.0.35", + "regex-syntax 0.8.2", + "syn 2.0.48", +] + +[[package]] +name = "logos-derive" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45154231e8e96586b39494029e58f12f8ffcb5ecf80333a603a13aa205ea8cbd" +dependencies = [ + "logos-codegen", +] + [[package]] name = "lru" version = "0.7.8" @@ -7308,7 +7406,7 @@ dependencies = [ "move-ir-to-bytecode-syntax", "move-ir-types", "move-symbol-pool", - "ouroboros", + "ouroboros 0.17.2", ] [[package]] @@ -8956,7 +9054,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2ba07320d39dfea882faa70554b4bd342a5f273ed59ba7c1c6b4c840492c954" dependencies = [ "aliasable", - "ouroboros_macro", + "ouroboros_macro 0.17.2", + "static_assertions", +] + +[[package]] +name = "ouroboros" +version = "0.18.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "944fa20996a25aded6b4795c6d63f10014a7a83f8be9828a11860b08c5fc4a67" +dependencies = [ + "aliasable", + "ouroboros_macro 0.18.4", "static_assertions", ] @@ -8973,6 +9082,20 @@ dependencies = [ "syn 2.0.48", ] +[[package]] +name = "ouroboros_macro" +version = "0.18.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39b0deead1528fd0e5947a8546a9642a9777c25f6e1e26f34c97b204bbb465bd" +dependencies = [ + "heck 0.4.1", + "itertools 0.12.0", + "proc-macro2 1.0.78", + "proc-macro2-diagnostics", + "quote 1.0.35", + "syn 2.0.48", +] + [[package]] name = "output_vt100" version = "0.1.3" @@ -9698,7 +9821,7 @@ dependencies = [ "ctor", "diff", "output_vt100", - "yansi", + "yansi 0.5.1", ] [[package]] @@ -9818,6 +9941,19 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "proc-macro2-diagnostics" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8" +dependencies = [ + "proc-macro2 1.0.78", + "quote 1.0.35", + "syn 2.0.48", + "version_check", + "yansi 1.0.1", +] + [[package]] name = "prometheus" version = "0.13.3" @@ -10378,22 +10514,22 @@ dependencies = [ [[package]] name = "ref-cast" -version = "1.0.14" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c78fb8c9293bcd48ef6fce7b4ca950ceaf21210de6e105a883ee280c0f7b9ed" +checksum = "ccf0a6f84d5f1d581da8b41b47ec8600871962f2a528115b542b362d4b744931" dependencies = [ "ref-cast-impl", ] [[package]] name = "ref-cast-impl" -version = "1.0.14" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f9c0c92af03644e4806106281fe2e068ac5bc0ae74a707266d06ea27bccee5f" +checksum = "bcc303e793d3734489387d205e9b186fac9c6cfacedd98cbb2e8a5943595f3e6" dependencies = [ "proc-macro2 1.0.78", "quote 1.0.35", - "syn 1.0.107", + "syn 2.0.48", ] [[package]] @@ -14036,6 +14172,23 @@ dependencies = [ "tracing", ] +[[package]] +name = "sui-package-dump" +version = "1.32.0" +dependencies = [ + "anyhow", + "bcs", + "cynic", + "cynic-codegen", + "fastcrypto", + "move-core-types", + "reqwest 0.12.5", + "serde", + "serde_json", + "sui-types", + "tracing", +] + [[package]] name = "sui-package-management" version = "1.32.0" @@ -14733,7 +14886,6 @@ dependencies = [ "clap", "colored", "comfy-table", - "diesel", "eyre", "fastcrypto", "futures", @@ -14754,8 +14906,8 @@ dependencies = [ "sui-archival", "sui-config", "sui-core", - "sui-indexer", "sui-network", + "sui-package-dump", "sui-protocol-config", "sui-replay", "sui-sdk 1.32.0", @@ -16328,7 +16480,7 @@ dependencies = [ "itertools 0.10.5", "msim", "once_cell", - "ouroboros", + "ouroboros 0.17.2", "proc-macro2 1.0.78", "prometheus", "quote 1.0.35", @@ -17275,6 +17427,12 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" +[[package]] +name = "yansi" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" + [[package]] name = "yasna" version = "0.5.2" diff --git a/Cargo.toml b/Cargo.toml index 78b95b57b41e3..3b3d1836ba1c8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -130,6 +130,7 @@ members = [ "crates/sui-open-rpc", "crates/sui-open-rpc-macros", "crates/sui-oracle", + "crates/sui-package-dump", "crates/sui-package-management", "crates/sui-package-resolver", "crates/sui-proc-macros", @@ -305,6 +306,8 @@ criterion = { version = "0.5.0", features = [ ] } crossterm = "0.25.0" csv = "1.2.1" +cynic = { version = "3.7.3", features = ["http-reqwest"] } +cynic-codegen = "= 3.7.3" dashmap = "5.5.3" # datatest-stable = "0.1.2" datatest-stable = { git = "https://github.com/nextest-rs/datatest-stable.git", rev = "72db7f6d1bbe36a5407e96b9488a581f763e106f" } @@ -628,6 +631,7 @@ sui-network = { path = "crates/sui-network" } sui-node = { path = "crates/sui-node" } sui-open-rpc = { path = "crates/sui-open-rpc" } sui-open-rpc-macros = { path = "crates/sui-open-rpc-macros" } +sui-package-dump = { path = "crates/sui-package-dump" } sui-package-management = { path = "crates/sui-package-management" } sui-package-resolver = { path = "crates/sui-package-resolver" } sui-proc-macros = { path = "crates/sui-proc-macros" } diff --git a/crates/sui-package-dump/Cargo.toml b/crates/sui-package-dump/Cargo.toml new file mode 100644 index 0000000000000..92632519a9877 --- /dev/null +++ b/crates/sui-package-dump/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "sui-package-dump" +version.workspace = true +authors = ["Mysten Labs Result { + Ok(Self { + inner: reqwest::Client::builder() + .user_agent(concat!("sui-package-dump/", env!("CARGO_PKG_VERSION"))) + .build() + .context("Failed to create GraphQL client")?, + url: url.into_url().context("Invalid RPC URL")?, + }) + } + + pub(crate) async fn query(&self, query: Operation) -> Result + where + V: Serialize, + Q: DeserializeOwned + QueryBuilder + 'static, + { + self.inner + .post(self.url.clone()) + .run_graphql(query) + .await + .context("Failed to send GraphQL query")? + .data + .ok_or_else(|| anyhow!("Empty response to query")) + } +} diff --git a/crates/sui-package-dump/src/lib.rs b/crates/sui-package-dump/src/lib.rs new file mode 100644 index 0000000000000..8438a8bec7f88 --- /dev/null +++ b/crates/sui-package-dump/src/lib.rs @@ -0,0 +1,251 @@ +// Copyright (c) Mysten Labs, Inc. +// SPDX-License-Identifier: Apache-2.0 + +use std::{ + collections::BTreeMap, + fs, + path::{Path, PathBuf}, +}; + +use anyhow::{bail, ensure, Context, Result}; +use client::Client; +use fastcrypto::encoding::{Base64, Encoding}; +use query::{limits, packages, SuiAddress, UInt53}; +use sui_types::object::Object; +use tracing::info; + +mod client; +mod query; + +/// Ensure all packages created before `before_checkpoint` are written to the `output_dir`ectory, +/// from the GraphQL service at `rpc_url`. +/// +/// `output_dir` can be a path to a non-existent directory, in which case it will be created, or an +/// existing empty directory (in which case it will be filled), or an existing directory that has +/// been written to in the past (in which case this invocation will pick back up from where the +/// previous invocation left off). +pub async fn dump( + rpc_url: String, + output_dir: PathBuf, + before_checkpoint: Option, +) -> Result<()> { + ensure_output_directory(&output_dir)?; + + let client = Client::new(rpc_url)?; + let after_checkpoint = read_last_checkpoint(&output_dir)?; + let limit = max_page_size(&client).await?; + let (last_checkpoint, packages) = + fetch_packages(&client, limit, after_checkpoint, before_checkpoint).await?; + + for package in &packages { + let SuiAddress(address) = &package.address; + dump_package(&output_dir, package) + .with_context(|| format!("Failed to dump package {address}"))?; + } + + if let Some(last_checkpoint) = last_checkpoint { + write_last_checkpoint(&output_dir, last_checkpoint)?; + } + + Ok(()) +} + +/// Ensure the output directory exists, either because it already exists as a writable directory, or +/// by creating a new directory. +fn ensure_output_directory(path: impl Into) -> Result<()> { + let path: PathBuf = path.into(); + if !path.exists() { + fs::create_dir_all(&path).context("Making output directory")?; + return Ok(()); + } + + ensure!( + path.is_dir(), + "Output path is not a directory: {}", + path.display() + ); + + let metadata = fs::metadata(&path).context("Getting metadata for output path")?; + + ensure!( + !metadata.permissions().readonly(), + "Output directory is not writable: {}", + path.display() + ); + + Ok(()) +} + +/// Load the last checkpoint that was loaded by a previous run of the tool, if there is a previous +/// run. +fn read_last_checkpoint(output: &Path) -> Result> { + let path = output.join("last-checkpoint"); + if !path.exists() { + return Ok(None); + } + + let content = fs::read_to_string(&path).context("Failed to read last checkpoint")?; + let checkpoint: u64 = + serde_json::from_str(&content).context("Failed to parse last checkpoint")?; + + info!("Resuming download after checkpoint {checkpoint}"); + + Ok(Some(checkpoint)) +} + +/// Write the max checkpoint that we have seen a package from back to the output directory. +fn write_last_checkpoint(output: &Path, checkpoint: u64) -> Result<()> { + let path = output.join("last-checkpoint"); + let content = + serde_json::to_string(&checkpoint).context("Failed to serialize last checkpoint")?; + + fs::write(path, content).context("Failed to write last checkpoint")?; + Ok(()) +} + +/// Read the max page size supported by the GraphQL service. +async fn max_page_size(client: &Client) -> Result { + Ok(client + .query(limits::build()) + .await + .context("Failed to fetch max page size")? + .service_config + .max_page_size) +} + +/// Read all the packages between `after_checkpoint` and `before_checkpoint`, in batches of +/// `page_size` from the `client` connected to a GraphQL service. +/// +/// If `after_checkpoint` is not provided, packages will be read from genesis. If +/// `before_checkpoint` is not provided, packages will be read until the latest checkpoint. +/// +/// Returns the latest checkpoint that was read from in this fetch, and a list of all the packages +/// that were read. +async fn fetch_packages( + client: &Client, + page_size: i32, + after_checkpoint: Option, + before_checkpoint: Option, +) -> Result<(Option, Vec)> { + let packages::Query { + checkpoint: checkpoint_viewed_at, + packages: + packages::MovePackageConnection { + mut page_info, + mut nodes, + }, + } = client + .query(packages::build( + page_size, + None, + after_checkpoint.map(UInt53), + before_checkpoint.map(UInt53), + )) + .await + .with_context(|| "Failed to fetch page 1 of packages.")?; + + for i in 2.. { + if !page_info.has_next_page { + break; + } + + let packages = client + .query(packages::build( + page_size, + page_info.end_cursor, + after_checkpoint.map(UInt53), + before_checkpoint.map(UInt53), + )) + .await + .with_context(|| format!("Failed to fetch page {i} of packages."))? + .packages; + + nodes.extend(packages.nodes); + page_info = packages.page_info; + + info!( + "Fetched page {i} ({} package{} so far).", + nodes.len(), + if nodes.len() == 1 { "" } else { "s" }, + ); + } + + use packages::Checkpoint as C; + let last_checkpoint = match (checkpoint_viewed_at, before_checkpoint) { + ( + Some(C { + sequence_number: UInt53(v), + }), + Some(b), + ) if b > 0 => Some(v.min(b - 1)), + ( + Some(C { + sequence_number: UInt53(c), + }), + _, + ) + | (_, Some(c)) => Some(c), + _ => None, + }; + + Ok((last_checkpoint, nodes)) +} + +/// Write out `pkg` to the `output_dir`ectory, using the package's address and name as the directory +/// name. The following files are written for each directory: +/// +/// - `object.bcs` -- the BCS serialized form of the `Object` type containing the package. +/// +/// - `linkage.json` -- a JSON serialization of the package's linkage table, mapping dependency +/// original IDs to the version of the dependency being depended on and the ID of the object +/// on-chain that contains that version. +/// +/// - `origins.json` -- a JSON serialize of the type origin table, mapping type names contained in +/// this package to the version of the package that first introduced that type. +/// +/// - `*.mv` -- a BCS serialization of each compiled module in the package. +fn dump_package(output_dir: &Path, pkg: &packages::MovePackage) -> Result<()> { + let Some(query::Base64(bcs)) = &pkg.bcs else { + bail!("Missing BCS"); + }; + + let bytes = Base64::decode(bcs).context("Failed to decode BCS")?; + + let object = bcs::from_bytes::(&bytes).context("Failed to deserialize")?; + let id = object.id(); + let Some(package) = object.data.try_as_package() else { + bail!("Not a package"); + }; + + let origins: BTreeMap<_, _> = package + .type_origin_table() + .iter() + .map(|o| { + ( + format!("{}::{}", o.module_name, o.datatype_name), + o.package.to_string(), + ) + }) + .collect(); + + let package_dir = output_dir.join(format!("{}.{}", id, package.version().value())); + fs::create_dir(&package_dir).context("Failed to make output directory")?; + + let linkage_json = serde_json::to_string_pretty(package.linkage_table()) + .context("Failed to serialize linkage")?; + let origins_json = + serde_json::to_string_pretty(&origins).context("Failed to serialize type origins")?; + + fs::write(package_dir.join("object.bcs"), bytes).context("Failed to write object BCS")?; + fs::write(package_dir.join("linkage.json"), linkage_json).context("Failed to write linkage")?; + fs::write(package_dir.join("origins.json"), origins_json) + .context("Failed to write type origins")?; + + for (module_name, module_bytes) in package.serialized_module_map() { + let module_path = package_dir.join(format!("{module_name}.mv")); + fs::write(module_path, module_bytes) + .with_context(|| format!("Failed to write module: {module_name}"))? + } + + Ok(()) +} diff --git a/crates/sui-package-dump/src/query.rs b/crates/sui-package-dump/src/query.rs new file mode 100644 index 0000000000000..a0d2c0ae391d5 --- /dev/null +++ b/crates/sui-package-dump/src/query.rs @@ -0,0 +1,105 @@ +// Copyright (c) Mysten Labs, Inc. +// SPDX-License-Identifier: Apache-2.0 + +use cynic::Operation; +use cynic::QueryBuilder; + +#[cynic::schema("sui")] +mod schema {} + +#[derive(cynic::Scalar, Debug)] +pub(crate) struct SuiAddress(pub String); + +#[derive(cynic::Scalar, Debug)] +pub(crate) struct Base64(pub String); + +#[derive(cynic::Scalar, Debug)] +pub(crate) struct UInt53(pub u64); + +/// Query types related to GraphQL service limits. +pub(crate) mod limits { + use super::*; + + pub(crate) fn build() -> Operation { + Query::build(()) + } + + #[derive(cynic::QueryFragment, Debug)] + pub(crate) struct Query { + pub(crate) service_config: ServiceConfig, + } + + #[derive(cynic::QueryFragment, Debug)] + pub(crate) struct ServiceConfig { + pub(crate) max_page_size: i32, + } +} + +/// Query types related to fetching packages. +pub(crate) mod packages { + use super::*; + + pub(crate) fn build( + first: i32, + after: Option, + after_checkpoint: Option, + before_checkpoint: Option, + ) -> Operation { + Query::build(Vars { + first, + after, + filter: Some(MovePackageCheckpointFilter { + after_checkpoint, + before_checkpoint, + }), + }) + } + + #[derive(cynic::QueryVariables, Debug)] + pub(crate) struct Vars { + pub(crate) first: i32, + pub(crate) after: Option, + pub(crate) filter: Option, + } + + #[derive(cynic::InputObject, Debug)] + pub(crate) struct MovePackageCheckpointFilter { + pub(crate) after_checkpoint: Option, + pub(crate) before_checkpoint: Option, + } + + #[derive(cynic::QueryFragment, Debug)] + #[cynic(variables = "Vars")] + pub(crate) struct Query { + pub(crate) checkpoint: Option, + #[arguments( + first: $first, + after: $after, + filter: $filter, + )] + pub(crate) packages: MovePackageConnection, + } + + #[derive(cynic::QueryFragment, Debug)] + pub(crate) struct Checkpoint { + pub(crate) sequence_number: UInt53, + } + + #[derive(cynic::QueryFragment, Debug)] + pub(crate) struct MovePackageConnection { + pub(crate) page_info: PageInfo, + pub(crate) nodes: Vec, + } + + #[derive(cynic::QueryFragment, Debug)] + pub(crate) struct PageInfo { + pub(crate) has_next_page: bool, + pub(crate) end_cursor: Option, + } + + #[derive(cynic::QueryFragment, Debug)] + pub(crate) struct MovePackage { + pub(crate) address: SuiAddress, + pub(crate) bcs: Option, + } +} diff --git a/crates/sui-tool/Cargo.toml b/crates/sui-tool/Cargo.toml index 2db2c5395e241..a19bfaa194b94 100644 --- a/crates/sui-tool/Cargo.toml +++ b/crates/sui-tool/Cargo.toml @@ -13,7 +13,6 @@ bcs.workspace = true clap = { version = "4.1.4", features = ["derive"] } colored.workspace = true comfy-table.workspace = true -diesel.workspace = true eyre.workspace = true futures.workspace = true hex.workspace = true @@ -41,7 +40,6 @@ narwhal-storage.workspace = true narwhal-types.workspace = true sui-config.workspace = true sui-core.workspace = true -sui-indexer.workspace = true sui-network.workspace = true sui-snapshot.workspace = true sui-protocol-config.workspace = true @@ -50,4 +48,5 @@ sui-sdk.workspace = true sui-storage.workspace = true sui-types.workspace = true sui-archival.workspace = true +sui-package-dump.workspace = true bin-version.workspace = true diff --git a/crates/sui-tool/src/commands.rs b/crates/sui-tool/src/commands.rs index 95b39183c96f6..3871143671e3e 100644 --- a/crates/sui-tool/src/commands.rs +++ b/crates/sui-tool/src/commands.rs @@ -5,7 +5,7 @@ use crate::{ check_completed_snapshot, db_tool::{execute_db_tool_command, print_db_all_tables, DbToolCommand}, download_db_snapshot, download_formal_snapshot, dump_checkpoints_from_archive, - get_latest_available_epoch, get_object, get_transaction_block, make_clients, pkg_dump, + get_latest_available_epoch, get_object, get_transaction_block, make_clients, restore_from_db_checkpoint, verify_archive, verify_archive_by_checksum, ConciseObjectOutput, GroupedObjectOutput, SnapshotVerifyMode, VerboseObjectOutput, }; @@ -177,21 +177,26 @@ pub enum ToolCommand { max_content_length: usize, }, - /// Download all packages to the local filesystem from an indexer database. Each package gets - /// its own sub-directory, named for its ID on-chain, containing two metadata files - /// (linkage.json and origins.json) as well as a file for every module it contains. Each module - /// file is named for its module name, with a .mv suffix, and contains Move bytecode (suitable - /// for passing into a disassembler). + /// Download all packages to the local filesystem from a GraphQL service. Each package gets its + /// own sub-directory, named for its ID on-chain and version containing two metadata files + /// (linkage.json and origins.json), a file containing the overall object and a file for every + /// module it contains. Each module file is named for its module name, with a .mv suffix, and + /// contains Move bytecode (suitable for passing into a disassembler). #[command(name = "dump-packages")] DumpPackages { - /// Connection information for the Indexer's Postgres DB. + /// Connection information for a GraphQL service. #[clap(long, short)] - db_url: String, + rpc_url: String, /// Path to a non-existent directory that can be created and filled with package information. #[clap(long, short)] output_dir: PathBuf, + /// Only fetch packages that were created before this checkpoint (given by its sequence + /// number). + #[clap(long)] + before_checkpoint: Option, + /// If false (default), log level will be overridden to "off", and output will be reduced to /// necessary status information. #[clap(short, long = "verbose")] @@ -633,8 +638,9 @@ impl ToolCommand { } } ToolCommand::DumpPackages { - db_url, + rpc_url, output_dir, + before_checkpoint, verbose, } => { if !verbose { @@ -643,7 +649,7 @@ impl ToolCommand { .expect("Failed to update log level"); } - pkg_dump::dump(db_url, output_dir).await?; + sui_package_dump::dump(rpc_url, output_dir, before_checkpoint).await?; } ToolCommand::DumpValidators { genesis, concise } => { let genesis = Genesis::load(genesis).unwrap(); diff --git a/crates/sui-tool/src/lib.rs b/crates/sui-tool/src/lib.rs index 574eb87a64f1b..b4290dc48b510 100644 --- a/crates/sui-tool/src/lib.rs +++ b/crates/sui-tool/src/lib.rs @@ -71,7 +71,6 @@ use typed_store::rocks::MetricConf; pub mod commands; pub mod db_tool; -pub mod pkg_dump; #[derive( Clone, Serialize, Deserialize, Debug, PartialEq, Copy, PartialOrd, Ord, Eq, ValueEnum, Default, diff --git a/crates/sui-tool/src/pkg_dump.rs b/crates/sui-tool/src/pkg_dump.rs deleted file mode 100644 index bd78cbf2b4b87..0000000000000 --- a/crates/sui-tool/src/pkg_dump.rs +++ /dev/null @@ -1,122 +0,0 @@ -// Copyright (c) Mysten Labs, Inc. -// SPDX-License-Identifier: Apache-2.0 - -use std::{ - collections::BTreeMap, - fs, - path::{Path, PathBuf}, - time::Duration, -}; - -use anyhow::{anyhow, ensure, Context, Result}; -use diesel::{ - r2d2::{ConnectionManager, Pool}, - PgConnection, RunQueryDsl, -}; -use sui_indexer::{models::packages::StoredPackage, schema::packages}; -use sui_types::{base_types::SuiAddress, move_package::MovePackage}; -use tracing::info; - -type PgPool = Pool>; - -pub(crate) async fn dump(db_url: String, output_dir: PathBuf) -> Result<()> { - ensure_output_directory(&output_dir)?; - - let conn = ConnectionManager::::new(db_url); - let pool = Pool::builder() - .max_size(1) - .connection_timeout(Duration::from_secs(30)) - .build(conn) - .context("Failed to create connection pool.")?; - - info!("Querying Indexer..."); - let pkgs = query_packages(&pool)?; - let total = pkgs.len(); - - let mut progress = 0; - for (i, pkg) in pkgs.into_iter().enumerate() { - let pct = (100 * i) / total; - if pct % 5 == 0 && pct > progress { - info!("Dumping packages ({total}): {pct: >3}%"); - progress = pct; - } - - let id = SuiAddress::from_bytes(&pkg.package_id).context("Parsing package ID")?; - dump_package(&output_dir, id, &pkg.move_package) - .with_context(|| format!("Dumping package: {id}"))?; - } - - info!("Dumping packages ({total}): 100%, Done."); - Ok(()) -} - -/// Ensure the output directory exists, either because it already exists as an empty, writable -/// directory, or by creating a new directory. -fn ensure_output_directory(path: impl Into) -> Result<()> { - let path: PathBuf = path.into(); - if path.exists() { - ensure!( - path.is_dir(), - "Output path is not a directory: {}", - path.display() - ); - ensure!( - path.read_dir().is_ok_and(|mut d| d.next().is_none()), - "Output directory is not empty: {}", - path.display(), - ); - - let metadata = fs::metadata(&path).context("Getting metadata for output path")?; - - ensure!( - !metadata.permissions().readonly(), - "Output directory is not writable: {}", - path.display() - ) - } else { - fs::create_dir_all(&path).context("Making output directory")?; - } - - Ok(()) -} - -fn query_packages(pool: &PgPool) -> Result> { - let mut conn = pool - .get() - .map_err(|e| anyhow!("Failed to get connection: {e}"))?; - Ok(packages::dsl::packages.load::(&mut conn)?) -} - -fn dump_package(output_dir: &Path, id: SuiAddress, pkg: &[u8]) -> Result<()> { - let package = bcs::from_bytes::(pkg).context("Deserializing")?; - let origins: BTreeMap<_, _> = package - .type_origin_table() - .iter() - .map(|o| { - ( - format!("{}::{}", o.module_name, o.datatype_name), - o.package.to_string(), - ) - }) - .collect(); - - let package_dir = output_dir.join(format!("{}.{}", id, package.version().value())); - fs::create_dir(&package_dir).context("Making output directory")?; - - let linkage_json = - serde_json::to_string_pretty(package.linkage_table()).context("Serializing linkage")?; - let origins_json = - serde_json::to_string_pretty(&origins).context("Serializing type origins")?; - - fs::write(package_dir.join("package.bcs"), pkg).context("Writing package BCS")?; - fs::write(package_dir.join("linkage.json"), linkage_json).context("Writing linkage")?; - fs::write(package_dir.join("origins.json"), origins_json).context("Writing type origins")?; - - for (module_name, module_bytes) in package.serialized_module_map() { - let module_path = package_dir.join(format!("{module_name}.mv")); - fs::write(module_path, module_bytes) - .with_context(|| format!("Writing module: {module_name}"))? - } - - Ok(()) -}