From fc57f20d047a43e156a167b44cc98f7ea5ba4739 Mon Sep 17 00:00:00 2001 From: Rain Date: Sat, 9 Nov 2024 01:18:54 +0000 Subject: [PATCH 1/3] [spr] initial version Created using spr 1.3.6-beta.1 --- Cargo.lock | 43 +- Cargo.toml | 8 + dev-tools/reconfigurator-cli/Cargo.toml | 3 +- dev-tools/reconfigurator-cli/src/main.rs | 713 ++++++++---------- .../tests/input/cmds-example.txt | 4 +- .../reconfigurator-cli/tests/input/cmds.txt | 2 +- .../tests/output/cmd-example-stdout | 13 +- .../tests/output/cmd-stdout | 144 ++-- .../reconfigurator-cli/tests/test_basic.rs | 15 +- nexus/reconfigurator/planning/src/example.rs | 10 +- nexus/reconfigurator/planning/src/planner.rs | 2 +- nexus/reconfigurator/simulation/Cargo.toml | 24 + nexus/reconfigurator/simulation/src/config.rs | 149 ++++ nexus/reconfigurator/simulation/src/errors.rs | 117 +++ nexus/reconfigurator/simulation/src/lib.rs | 61 ++ nexus/reconfigurator/simulation/src/rng.rs | 145 ++++ nexus/reconfigurator/simulation/src/sim.rs | 146 ++++ nexus/reconfigurator/simulation/src/state.rs | 510 +++++++++++++ nexus/reconfigurator/simulation/src/system.rs | 473 ++++++++++++ nexus/types/src/deployment/execution/dns.rs | 4 +- uuid-kinds/src/lib.rs | 1 + 21 files changed, 2099 insertions(+), 488 deletions(-) create mode 100644 nexus/reconfigurator/simulation/Cargo.toml create mode 100644 nexus/reconfigurator/simulation/src/config.rs create mode 100644 nexus/reconfigurator/simulation/src/errors.rs create mode 100644 nexus/reconfigurator/simulation/src/lib.rs create mode 100644 nexus/reconfigurator/simulation/src/rng.rs create mode 100644 nexus/reconfigurator/simulation/src/sim.rs create mode 100644 nexus/reconfigurator/simulation/src/state.rs create mode 100644 nexus/reconfigurator/simulation/src/system.rs diff --git a/Cargo.lock b/Cargo.lock index de46e4f49f..7dacc6e925 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5821,6 +5821,27 @@ dependencies = [ "slog-error-chain", ] +[[package]] +name = "nexus-reconfigurator-simulation" +version = "0.1.0" +dependencies = [ + "anyhow", + "chrono", + "indexmap 2.6.0", + "nexus-inventory", + "nexus-reconfigurator-planning", + "nexus-types", + "omicron-common", + "omicron-uuid-kinds", + "omicron-workspace-hack", + "petname", + "slog", + "sync-ptr", + "thiserror", + "typed-rng", + "uuid", +] + [[package]] name = "nexus-saga-recovery" version = "0.1.0" @@ -8121,6 +8142,19 @@ dependencies = [ "serde_derive", ] +[[package]] +name = "petname" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cd31dcfdbbd7431a807ef4df6edd6473228e94d5c805e8cf671227a21bad068" +dependencies = [ + "anyhow", + "itertools 0.13.0", + "proc-macro2", + "quote", + "rand", +] + [[package]] name = "phf" version = "0.11.2" @@ -9008,13 +9042,13 @@ dependencies = [ "dropshot", "expectorate", "humantime", - "indexmap 2.6.0", "internal-dns-types", "nexus-client", "nexus-db-queries", "nexus-inventory", "nexus-reconfigurator-planning", "nexus-reconfigurator-preparation", + "nexus-reconfigurator-simulation", "nexus-sled-agent-shared", "nexus-test-utils", "nexus-test-utils-macros", @@ -9035,7 +9069,6 @@ dependencies = [ "swrite", "tabled", "tokio", - "typed-rng", "uuid", ] @@ -11050,6 +11083,12 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "sync-ptr" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df0928969361c40d9ca9d3f12828adf628107fdbe31e8e17324923a90837249b" + [[package]] name = "sync_wrapper" version = "1.0.1" diff --git a/Cargo.toml b/Cargo.toml index 67fe816748..95be3b15b1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -78,6 +78,7 @@ members = [ "nexus/reconfigurator/execution", "nexus/reconfigurator/planning", "nexus/reconfigurator/preparation", + "nexus/reconfigurator/simulation", "nexus/saga-recovery", "nexus/test-interface", "nexus/test-utils-macros", @@ -205,6 +206,7 @@ default-members = [ "nexus/reconfigurator/execution", "nexus/reconfigurator/planning", "nexus/reconfigurator/preparation", + "nexus/reconfigurator/simulation", "nexus/saga-recovery", "nexus/test-interface", "nexus/test-utils-macros", @@ -452,6 +454,7 @@ nexus-networking = { path = "nexus/networking" } nexus-reconfigurator-execution = { path = "nexus/reconfigurator/execution" } nexus-reconfigurator-planning = { path = "nexus/reconfigurator/planning" } nexus-reconfigurator-preparation = { path = "nexus/reconfigurator/preparation" } +nexus-reconfigurator-simulation = { path = "nexus/reconfigurator/simulation" } nexus-saga-recovery = { path = "nexus/saga-recovery" } nexus-sled-agent-shared = { path = "nexus-sled-agent-shared" } nexus-test-interface = { path = "nexus/test-interface" } @@ -511,6 +514,10 @@ paste = "1.0.15" percent-encoding = "2.3.1" peg = "0.8.4" pem = "3.0" +# petname's default features pull in clap for CLI parsing, which we don't need. +# Note that if you depend on petname, you must also set default-features = +# false: petname = { workspace = true, default-features = false }. +petname = { version = "2.0.2", default-features = false, features = ["default-rng", "default-words"] } petgraph = "0.6.5" postgres-protocol = "0.6.7" predicates = "3.1.2" @@ -601,6 +608,7 @@ strum = { version = "0.26", features = [ "derive" ] } subprocess = "0.2.9" supports-color = "3.0.1" swrite = "0.1.0" +sync-ptr = "0.1.1" libsw = { version = "3.3.1", features = ["tokio"] } syn = { version = "2.0" } tabled = "0.15.0" diff --git a/dev-tools/reconfigurator-cli/Cargo.toml b/dev-tools/reconfigurator-cli/Cargo.toml index 2aab2c2333..ad336e3939 100644 --- a/dev-tools/reconfigurator-cli/Cargo.toml +++ b/dev-tools/reconfigurator-cli/Cargo.toml @@ -18,10 +18,10 @@ chrono.workspace = true clap.workspace = true dropshot.workspace = true humantime.workspace = true -indexmap.workspace = true internal-dns-types.workspace = true nexus-inventory.workspace = true nexus-reconfigurator-planning.workspace = true +nexus-reconfigurator-simulation.workspace = true nexus-sled-agent-shared.workspace = true nexus-types.workspace = true omicron-common.workspace = true @@ -34,7 +34,6 @@ slog-error-chain.workspace = true slog.workspace = true swrite.workspace = true tabled.workspace = true -typed-rng.workspace = true uuid.workspace = true omicron-workspace-hack.workspace = true diff --git a/dev-tools/reconfigurator-cli/src/main.rs b/dev-tools/reconfigurator-cli/src/main.rs index c23ad8e4be..777693f536 100644 --- a/dev-tools/reconfigurator-cli/src/main.rs +++ b/dev-tools/reconfigurator-cli/src/main.rs @@ -6,21 +6,20 @@ use anyhow::{anyhow, bail, Context}; use camino::Utf8PathBuf; -use chrono::Utc; use clap::CommandFactory; use clap::FromArgMatches; use clap::ValueEnum; use clap::{Args, Parser, Subcommand}; -use indexmap::IndexMap; use internal_dns_types::diff::DnsDiff; use nexus_inventory::CollectionBuilder; use nexus_reconfigurator_planning::blueprint_builder::BlueprintBuilder; use nexus_reconfigurator_planning::blueprint_builder::EnsureMultiple; use nexus_reconfigurator_planning::example::ExampleSystemBuilder; use nexus_reconfigurator_planning::planner::Planner; -use nexus_reconfigurator_planning::system::{ - SledBuilder, SledHwInventory, SystemDescription, -}; +use nexus_reconfigurator_planning::system::{SledBuilder, SystemDescription}; +use nexus_reconfigurator_simulation::SimState; +use nexus_reconfigurator_simulation::SimStateBuilder; +use nexus_reconfigurator_simulation::Simulator; use nexus_sled_agent_shared::inventory::ZoneKind; use nexus_types::deployment::execution; use nexus_types::deployment::execution::blueprint_external_dns_config; @@ -30,15 +29,13 @@ use nexus_types::deployment::OmicronZoneNic; use nexus_types::deployment::PlanningInput; use nexus_types::deployment::SledFilter; use nexus_types::deployment::{Blueprint, UnstableReconfiguratorState}; -use nexus_types::internal_api::params::DnsConfigParams; -use nexus_types::inventory::Collection; use omicron_common::api::external::Generation; use omicron_common::api::external::Name; use omicron_common::policy::NEXUS_REDUNDANCY; -use omicron_uuid_kinds::CollectionKind; use omicron_uuid_kinds::CollectionUuid; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::OmicronZoneUuid; +use omicron_uuid_kinds::ReconfiguratorSimUuid; use omicron_uuid_kinds::SledUuid; use omicron_uuid_kinds::VnicUuid; use reedline::{Reedline, Signal}; @@ -47,116 +44,47 @@ use std::collections::BTreeMap; use std::io::BufRead; use swrite::{swriteln, SWrite}; use tabled::Tabled; -use typed_rng::TypedUuidRng; use uuid::Uuid; /// REPL state #[derive(Debug)] struct ReconfiguratorSim { - /// describes the sleds in the system - /// - /// This resembles what we get from the `sled` table in a real system. It - /// also contains enough information to generate inventory collections that - /// describe the system. - system: SystemDescription, - - /// inventory collections created by the user - collections: IndexMap, - - /// blueprints created by the user - blueprints: IndexMap, - - /// internal DNS configurations - internal_dns: BTreeMap, - /// external DNS configurations - external_dns: BTreeMap, - - /// Set of silo names configured - /// - /// These are used to determine the contents of external DNS. - silo_names: Vec, - - /// External DNS zone name configured - external_dns_zone_name: String, - - /// RNG for collection IDs - collection_id_rng: TypedUuidRng, - - /// Policy overrides - num_nexus: Option, - + // The simulator currently being used. + sim: Simulator, + // The current state. + current: ReconfiguratorSimUuid, + // The current system state log: slog::Logger, } impl ReconfiguratorSim { - fn new(log: slog::Logger) -> Self { + fn new(log: slog::Logger, seed: Option) -> Self { Self { - system: SystemDescription::new(), - collections: IndexMap::new(), - blueprints: IndexMap::new(), - internal_dns: BTreeMap::new(), - external_dns: BTreeMap::new(), - silo_names: vec!["example-silo".parse().unwrap()], - external_dns_zone_name: String::from("oxide.example"), - collection_id_rng: TypedUuidRng::from_entropy(), - num_nexus: None, + sim: Simulator::new(&log, seed), + current: Simulator::ROOT_ID, log, } } - /// Returns true if the user has made local changes to the simulated - /// system. - /// - /// This is used when the user asks to load an example system. Doing that - /// basically requires a clean slate. - fn user_made_system_changes(&self) -> bool { - // Use this pattern to ensure that if a new field is added to - // ReconfiguratorSim, it will fail to compile until it's added here. - let Self { - system, - collections, - blueprints, - internal_dns, - external_dns, - // For purposes of this method, we let these policy parameters be - // set to any arbitrary value. This lets example systems be - // generated using these values. - silo_names: _, - external_dns_zone_name: _, - collection_id_rng: _, - num_nexus: _, - log: _, - } = self; - - system.has_sleds() - || !collections.is_empty() - || !blueprints.is_empty() - || !internal_dns.is_empty() - || !external_dns.is_empty() - } - - // Reset the state of the REPL. - fn wipe(&mut self) { - *self = Self::new(self.log.clone()); - } - - fn blueprint_lookup(&self, id: Uuid) -> Result<&Blueprint, anyhow::Error> { - self.blueprints - .get(&id) - .ok_or_else(|| anyhow!("no such blueprint: {}", id)) + fn current_state(&self) -> &SimState { + self.sim + .get_state(self.current) + .expect("current state should always exist") } - fn blueprint_insert_new(&mut self, blueprint: Blueprint) { - let previous = self.blueprints.insert(blueprint.id, blueprint); - assert!(previous.is_none()); + fn commit_and_bump(&mut self, description: String, state: SimStateBuilder) { + let new_id = state.commit(description, &mut self.sim); + self.current = new_id; } fn planning_input( &self, parent_blueprint: &Blueprint, ) -> anyhow::Result { - let mut builder = self - .system + let state = self.current_state(); + let mut builder = state + .system() + .description() .to_planning_input_builder() .context("generating planning input builder")?; @@ -218,6 +146,10 @@ impl ReconfiguratorSim { #[derive(Parser, Debug)] struct CmdReconfiguratorSim { input_file: Option, + + /// The RNG seed to initialize the simulator with. + #[clap(long)] + seed: Option, } // REPL implementation @@ -231,7 +163,13 @@ fn main() -> anyhow::Result<()> { .to_logger("reconfigurator-sim") .context("creating logger")?; - let mut sim = ReconfiguratorSim::new(log); + let seed_provided = cmd.seed.is_some(); + let mut sim = ReconfiguratorSim::new(log, cmd.seed); + if seed_provided { + println!("using provided RNG seed: {}", sim.sim.initial_seed()); + } else { + println!("generated RNG seed: {}", sim.sim.initial_seed()); + } if let Some(input_file) = cmd.input_file { let file = std::fs::File::open(&input_file) @@ -348,7 +286,7 @@ fn process_entry(sim: &mut ReconfiguratorSim, entry: String) -> LoopResult { Commands::LoadExample(args) => cmd_load_example(sim, args), Commands::FileContents(args) => cmd_file_contents(args), Commands::Save(args) => cmd_save(sim, args), - Commands::Wipe => cmd_wipe(sim), + Commands::Wipe(args) => cmd_wipe(sim, args), }; match cmd_result { @@ -422,7 +360,7 @@ enum Commands { /// show information about what's in a saved file FileContents(FileContentsArgs), /// reset the state of the REPL - Wipe, + Wipe(WipeArgs), } #[derive(Debug, Args)] @@ -556,11 +494,11 @@ struct LoadArgs { struct LoadExampleArgs { /// Seed for the RNG that's used to generate the example system. /// - /// Setting this makes it possible for callers to get deterministic - /// results. In automated tests, the seed is typically the name of the - /// test. - #[clap(long, default_value = "reconfigurator_cli_example")] - seed: String, + /// If this is provided, the RNG is updated with this seed before the + /// example system is generated. If it's not provided, the existing RNG is + /// used. + #[clap(long)] + seed: Option, /// The number of sleds in the example system. #[clap(short = 's', long, default_value_t = ExampleSystemBuilder::DEFAULT_N_SLEDS)] @@ -591,13 +529,32 @@ struct SaveArgs { filename: Utf8PathBuf, } +#[derive(Debug, Args)] +struct WipeArgs { + /// What to wipe + #[clap(subcommand)] + command: WipeCommand, +} + +#[derive(Debug, Subcommand)] +enum WipeCommand { + /// Wipe everything + All, + /// Wipe the system + System, + /// Reset configuration to default + Config, + /// Reset RNG state + Rng, +} + // Command handlers fn cmd_silo_list( sim: &mut ReconfiguratorSim, ) -> anyhow::Result> { let mut s = String::new(); - for silo_name in &sim.silo_names { + for silo_name in sim.current_state().config().silo_names() { swriteln!(s, "{}", silo_name); } Ok(Some(s)) @@ -607,11 +564,10 @@ fn cmd_silo_add( sim: &mut ReconfiguratorSim, args: SiloAddRemoveArgs, ) -> anyhow::Result> { - if sim.silo_names.contains(&args.silo_name) { - bail!("silo already exists: {:?}", &args.silo_name); - } - - sim.silo_names.push(args.silo_name); + let mut state = sim.current_state().to_mut(); + let config = state.config_mut(); + config.add_silo(args.silo_name)?; + sim.commit_and_bump("reconfigurator-cli silo-add".to_owned(), state); Ok(None) } @@ -619,11 +575,10 @@ fn cmd_silo_remove( sim: &mut ReconfiguratorSim, args: SiloAddRemoveArgs, ) -> anyhow::Result> { - let size_before = sim.silo_names.len(); - sim.silo_names.retain(|n| *n != args.silo_name); - if sim.silo_names.len() == size_before { - bail!("no such silo: {:?}", &args.silo_name); - } + let mut state = sim.current_state().to_mut(); + let config = state.config_mut(); + config.remove_silo(args.silo_name)?; + sim.commit_and_bump("reconfigurator-cli silo-remove".to_owned(), state); Ok(None) } @@ -638,8 +593,10 @@ fn cmd_sled_list( subnet: String, } - let planning_input = sim - .system + let state = sim.current_state(); + let planning_input = state + .system() + .description() .to_planning_input_builder() .context("failed to generate planning input")? .build(); @@ -661,21 +618,26 @@ fn cmd_sled_add( sim: &mut ReconfiguratorSim, add: SledAddArgs, ) -> anyhow::Result> { - let mut new_sled = SledBuilder::new(); - if let Some(sled_id) = add.sled_id { - new_sled = new_sled.id(sled_id); - } + let mut state = sim.current_state().to_mut(); + let sled_id = add.sled_id.unwrap_or_else(|| state.rng_mut().next_sled_id()); + let new_sled = SledBuilder::new().id(sled_id); + state.system_mut().description_mut().sled(new_sled)?; + sim.commit_and_bump( + format!("reconfigurator-cli sled-add: {sled_id}"), + state, + ); - let _ = sim.system.sled(new_sled).context("adding sled")?; - Ok(Some(String::from("added sled"))) + Ok(Some(format!("added sled {}", sled_id))) } fn cmd_sled_show( sim: &mut ReconfiguratorSim, args: SledArgs, ) -> anyhow::Result> { - let planning_input = sim - .system + let state = sim.current_state(); + let planning_input = state + .system() + .description() .to_planning_input_builder() .context("failed to generate planning_input builder")? .build(); @@ -707,7 +669,8 @@ fn cmd_inventory_list( time_done: String, } - let rows = sim.collections.values().map(|collection| { + let state = sim.current_state(); + let rows = state.system().all_collections().map(|collection| { let id = collection.id; InventoryRow { id, @@ -728,21 +691,22 @@ fn cmd_inventory_list( fn cmd_inventory_generate( sim: &mut ReconfiguratorSim, ) -> anyhow::Result> { - let builder = - sim.system.to_collection_builder().context("generating inventory")?; + let mut state = sim.current_state().to_mut(); + let builder = state.to_collection_builder()?; - // sim.system carries around Omicron zones, which will make their way into + // The system carries around Omicron zones, which will make their way into // the inventory. - let mut inventory = builder.build(); - // Assign collection IDs from the RNG. This enables consistent results when - // callers have explicitly seeded the RNG (e.g., in tests). - inventory.id = sim.collection_id_rng.next(); + let inventory = builder.build(); let rv = format!( "generated inventory collection {} from configured sleds", inventory.id ); - sim.collections.insert(inventory.id, inventory); + state.system_mut().add_collection(inventory)?; + sim.commit_and_bump( + "reconfigurator-cli inventory-generate".to_owned(), + state, + ); Ok(Some(rv)) } @@ -757,7 +721,9 @@ fn cmd_blueprint_list( time_created: String, } - let mut rows = sim.blueprints.values().collect::>(); + let state = sim.current_state(); + + let mut rows = state.system().all_blueprints().collect::>(); rows.sort_unstable_by_key(|blueprint| blueprint.time_created); let rows = rows.into_iter().map(|blueprint| BlueprintRow { id: blueprint.id, @@ -781,13 +747,15 @@ fn cmd_blueprint_plan( sim: &mut ReconfiguratorSim, args: BlueprintPlanArgs, ) -> anyhow::Result> { + let mut state = sim.current_state().to_mut(); + let rng = state.rng_mut().next_planner_rng(); + let system = state.system_mut(); + let parent_blueprint_id = args.parent_blueprint_id; let collection_id = args.collection_id; - let parent_blueprint = sim.blueprint_lookup(parent_blueprint_id)?; - let collection = sim - .collections - .get(&collection_id) - .ok_or_else(|| anyhow!("no such collection: {}", collection_id))?; + let parent_blueprint = system.get_blueprint(parent_blueprint_id)?; + let collection = system.get_collection(collection_id)?; + let creator = "reconfigurator-sim"; let planning_input = sim.planning_input(parent_blueprint)?; let planner = Planner::new_based_on( @@ -797,13 +765,18 @@ fn cmd_blueprint_plan( creator, collection, ) - .context("creating planner")?; + .context("creating planner")? + .with_rng(rng); + let blueprint = planner.plan().context("generating blueprint")?; let rv = format!( "generated blueprint {} based on parent blueprint {}", blueprint.id, parent_blueprint_id, ); - sim.blueprint_insert_new(blueprint); + system.add_blueprint(blueprint)?; + + sim.commit_and_bump("reconfigurator-cli blueprint-plan".to_owned(), state); + Ok(Some(rv)) } @@ -811,16 +784,23 @@ fn cmd_blueprint_edit( sim: &mut ReconfiguratorSim, args: BlueprintEditArgs, ) -> anyhow::Result> { + let mut state = sim.current_state().to_mut(); + let rng = state.rng_mut().next_planner_rng(); + let system = state.system_mut(); + let blueprint_id = args.blueprint_id; - let blueprint = sim.blueprint_lookup(blueprint_id)?; + let blueprint = system.get_blueprint(blueprint_id)?; let creator = args.creator.as_deref().unwrap_or("reconfigurator-cli"); let planning_input = sim.planning_input(blueprint)?; - let latest_collection = sim - .collections - .iter() - .max_by_key(|(_, c)| c.time_started) - .map(|(_, c)| c.clone()) + + // TODO: We may want to do something other than just using the latest + // collection -- add a way to specify which collection to use. + let latest_collection = system + .all_collections() + .max_by_key(|c| c.time_started) + .map(|c| c.clone()) .unwrap_or_else(|| CollectionBuilder::new("sim").build()); + let mut builder = BlueprintBuilder::new_based_on( &sim.log, blueprint, @@ -829,6 +809,7 @@ fn cmd_blueprint_edit( creator, ) .context("creating blueprint builder")?; + builder.set_rng(rng); if let Some(comment) = args.comment { builder.comment(comment); @@ -892,7 +873,9 @@ fn cmd_blueprint_edit( "blueprint {} created from blueprint {}: {}", new_blueprint.id, blueprint_id, label ); - sim.blueprint_insert_new(new_blueprint); + system.add_blueprint(new_blueprint)?; + + sim.commit_and_bump("reconfigurator-cli blueprint-edit".to_owned(), state); Ok(Some(rv)) } @@ -900,7 +883,8 @@ fn cmd_blueprint_show( sim: &mut ReconfiguratorSim, args: BlueprintArgs, ) -> anyhow::Result> { - let blueprint = sim.blueprint_lookup(args.blueprint_id)?; + let state = sim.current_state(); + let blueprint = state.system().get_blueprint(args.blueprint_id)?; Ok(Some(format!("{}", blueprint.display()))) } @@ -911,8 +895,10 @@ fn cmd_blueprint_diff( let mut rv = String::new(); let blueprint1_id = args.blueprint1_id; let blueprint2_id = args.blueprint2_id; - let blueprint1 = sim.blueprint_lookup(blueprint1_id)?; - let blueprint2 = sim.blueprint_lookup(blueprint2_id)?; + + let state = sim.current_state(); + let blueprint1 = state.system().get_blueprint(blueprint1_id)?; + let blueprint2 = state.system().get_blueprint(blueprint2_id)?; let sled_diff = blueprint2.diff_since_blueprint(&blueprint1); swriteln!(rv, "{}", sled_diff.display()); @@ -920,7 +906,7 @@ fn cmd_blueprint_diff( // Diff'ing DNS is a little trickier. First, compute what DNS should be for // each blueprint. To do that we need to construct a list of sleds suitable // for the executor. - let sleds_by_id = make_sleds_by_id(&sim.system)?; + let sleds_by_id = make_sleds_by_id(state.system().description())?; let internal_dns_config1 = blueprint_internal_dns_config( &blueprint1, &sleds_by_id, @@ -935,15 +921,16 @@ fn cmd_blueprint_diff( .context("failed to assemble DNS diff")?; swriteln!(rv, "internal DNS:\n{}", dns_diff); + let external_dns_zone_name = state.config().external_dns_zone_name(); let external_dns_config1 = blueprint_external_dns_config( &blueprint1, - &sim.silo_names, - sim.external_dns_zone_name.clone(), + state.config().silo_names(), + external_dns_zone_name.to_owned(), ); let external_dns_config2 = blueprint_external_dns_config( &blueprint2, - &sim.silo_names, - sim.external_dns_zone_name.clone(), + state.config().silo_names(), + external_dns_zone_name.to_owned(), ); let dns_diff = DnsDiff::new(&external_dns_config1, &external_dns_config2) .context("failed to assemble external DNS diff")?; @@ -983,19 +970,22 @@ fn cmd_blueprint_diff_dns( let dns_group = args.dns_group; let dns_version = Generation::from(args.dns_version); let blueprint_id = args.blueprint_id; - let blueprint = sim.blueprint_lookup(blueprint_id)?; + + let state = sim.current_state(); + let blueprint = state.system().get_blueprint(blueprint_id)?; let existing_dns_config = match dns_group { - CliDnsGroup::Internal => sim.internal_dns.get(&dns_version), - CliDnsGroup::External => sim.external_dns.get(&dns_version), - } - .ok_or_else(|| { - anyhow!("no such {:?} DNS version: {}", dns_group, dns_version) - })?; + CliDnsGroup::Internal => { + state.system().get_internal_dns(dns_version)? + } + CliDnsGroup::External => { + state.system().get_external_dns(dns_version)? + } + }; let blueprint_dns_zone = match dns_group { CliDnsGroup::Internal => { - let sleds_by_id = make_sleds_by_id(&sim.system)?; + let sleds_by_id = make_sleds_by_id(state.system().description())?; blueprint_internal_dns_config( blueprint, &sleds_by_id, @@ -1004,8 +994,8 @@ fn cmd_blueprint_diff_dns( } CliDnsGroup::External => blueprint_external_dns_config( blueprint, - &sim.silo_names, - sim.external_dns_zone_name.clone(), + state.config().silo_names(), + state.config().external_dns_zone_name().to_owned(), ), }; @@ -1021,10 +1011,10 @@ fn cmd_blueprint_diff_inventory( ) -> anyhow::Result> { let collection_id = args.collection_id; let blueprint_id = args.blueprint_id; - let collection = sim.collections.get(&collection_id).ok_or_else(|| { - anyhow!("no such inventory collection: {}", collection_id) - })?; - let blueprint = sim.blueprint_lookup(blueprint_id)?; + + let state = sim.current_state(); + let collection = state.system().get_collection(collection_id)?; + let blueprint = state.system().get_blueprint(blueprint_id)?; let diff = blueprint.diff_since_collection(&collection); Ok(Some(diff.display().to_string())) } @@ -1034,7 +1024,9 @@ fn cmd_blueprint_save( args: BlueprintSaveArgs, ) -> anyhow::Result> { let blueprint_id = args.blueprint_id; - let blueprint = sim.blueprint_lookup(blueprint_id)?; + + let state = sim.current_state(); + let blueprint = state.system().get_blueprint(blueprint_id)?; let output_path = &args.filename; let output_str = serde_json::to_string_pretty(&blueprint) @@ -1048,20 +1040,8 @@ fn cmd_save( sim: &mut ReconfiguratorSim, args: SaveArgs, ) -> anyhow::Result> { - let planning_input = sim - .system - .to_planning_input_builder() - .context("creating planning input builder")? - .build(); - let saved = UnstableReconfiguratorState { - planning_input, - collections: sim.collections.values().cloned().collect(), - blueprints: sim.blueprints.values().cloned().collect(), - internal_dns: sim.internal_dns.clone(), - external_dns: sim.external_dns.clone(), - silo_names: sim.silo_names.clone(), - external_dns_zone_names: vec![sim.external_dns_zone_name.clone()], - }; + let state = sim.current_state(); + let saved = state.to_serializable()?; let output_path = &args.filename; let output_str = @@ -1074,36 +1054,80 @@ fn cmd_save( ))) } -fn cmd_wipe(sim: &mut ReconfiguratorSim) -> anyhow::Result> { - sim.wipe(); - Ok(Some("wiped reconfigurator-sim state".to_string())) +fn cmd_wipe( + sim: &mut ReconfiguratorSim, + args: WipeArgs, +) -> anyhow::Result> { + let mut state = sim.current_state().to_mut(); + let output = match args.command { + WipeCommand::All => { + state.system_mut().wipe(); + state.config_mut().wipe(); + state.rng_mut().reset_state(); + format!( + "- wiped system, reconfigurator-sim config, and RNG state\n + - reset seed to {}", + state.rng_mut().seed() + ) + } + WipeCommand::System => { + state.system_mut().wipe(); + "wiped system".to_string() + } + WipeCommand::Config => { + state.config_mut().wipe(); + "wiped reconfigurator-sim config".to_string() + } + WipeCommand::Rng => { + // Don't allow wiping the RNG state if the system is non-empty. + // Wiping the RNG state is likely to cause duplicate IDs to be + // generated. + if !state.system_mut().is_empty() { + bail!( + "cannot wipe RNG state with non-empty system: \ + run `wipe system` first" + ); + } + state.rng_mut().reset_state(); + format!( + "- wiped RNG state\n- reset seed to {}", + state.rng_mut().seed() + ) + } + }; + + sim.commit_and_bump(output.clone(), state); + Ok(Some(output)) } fn cmd_show(sim: &mut ReconfiguratorSim) -> anyhow::Result> { let mut s = String::new(); - do_print_properties(&mut s, sim); + let state = sim.current_state(); + do_print_properties(&mut s, state); swriteln!( s, "target number of Nexus instances: {}", - match sim.num_nexus { - Some(n) => n.to_string(), - None => String::from("default"), - } + state + .config() + .num_nexus() + .map_or_else(|| "default".to_owned(), |n| n.to_string()) ); Ok(Some(s)) } -fn do_print_properties(s: &mut String, sim: &ReconfiguratorSim) { +// TODO: consider moving this to a method on `SimState`. +fn do_print_properties(s: &mut String, state: &SimState) { swriteln!( s, "configured external DNS zone name: {}", - sim.external_dns_zone_name, + state.config().external_dns_zone_name(), ); swriteln!( s, "configured silo names: {}", - sim.silo_names - .iter() + state + .config() + .silo_names() .map(|s| s.as_str()) .collect::>() .join(", ") @@ -1111,18 +1135,20 @@ fn do_print_properties(s: &mut String, sim: &ReconfiguratorSim) { swriteln!( s, "internal DNS generations: {}", - sim.internal_dns - .keys() - .map(|s| s.to_string()) + state + .system() + .all_internal_dns() + .map(|params| params.generation.to_string()) .collect::>() .join(", "), ); swriteln!( s, "external DNS generations: {}", - sim.external_dns - .keys() - .map(|s| s.to_string()) + state + .system() + .all_external_dns() + .map(|params| params.generation.to_string()) .collect::>() .join(", "), ); @@ -1132,20 +1158,34 @@ fn cmd_set( sim: &mut ReconfiguratorSim, args: SetArgs, ) -> anyhow::Result> { - Ok(Some(match args { + let mut state = sim.current_state().to_mut(); + let rv = match args { SetArgs::NumNexus { num_nexus } => { - let rv = format!("{:?} -> {}", sim.num_nexus, num_nexus); - sim.num_nexus = Some(num_nexus); - sim.system.target_nexus_zone_count(usize::from(num_nexus)); + let rv = format!( + "target number of Nexus zones: {:?} -> {}", + state.config_mut().num_nexus(), + num_nexus + ); + state.config_mut().set_num_nexus(num_nexus); + state + .system_mut() + .description_mut() + .target_nexus_zone_count(usize::from(num_nexus)); rv } SetArgs::ExternalDnsZoneName { zone_name } => { - let rv = - format!("{:?} -> {:?}", sim.external_dns_zone_name, zone_name); - sim.external_dns_zone_name = zone_name; + let rv = format!( + "external DNS zone name: {:?} -> {:?}", + state.config_mut().external_dns_zone_name(), + zone_name + ); + state.config_mut().set_external_dns_zone_name(zone_name); rv } - })) + }; + + sim.commit_and_bump(format!("reconfigurator-cli set: {}", rv), state); + Ok(Some(rv)) } fn read_file( @@ -1162,167 +1202,44 @@ fn cmd_load( sim: &mut ReconfiguratorSim, args: LoadArgs, ) -> anyhow::Result> { - if sim.user_made_system_changes() { - bail!("changes made to simulated system: run `wipe` before loading"); + let mut state = sim.current_state().to_mut(); + if !state.system_mut().is_empty() { + bail!( + "changes made to simulated system: run `wipe system` before \ + loading" + ); } let input_path = args.filename; let collection_id = args.collection_id; let loaded = read_file(&input_path)?; - let mut s = String::new(); - - let collection_id = match collection_id { - Some(s) => s, - None => match loaded.collections.len() { - 1 => loaded.collections[0].id, - 0 => bail!( - "no collection_id specified and file contains 0 collections" - ), - count => bail!( - "no collection_id specified and file contains {} \ - collections: {}", - count, - loaded - .collections - .iter() - .map(|c| c.id.to_string()) - .collect::>() - .join(", ") - ), - }, - }; + let result = state.load_serialized(loaded, collection_id)?; - swriteln!( - s, - "using collection {} as source of sled inventory data", - collection_id + sim.commit_and_bump( + format!("reconfigurator-sim: load {:?}", input_path), + state, ); - let primary_collection = - loaded.collections.iter().find(|c| c.id == collection_id).ok_or_else( - || { - anyhow!( - "collection {} not found in file {:?}", - collection_id, - input_path - ) - }, - )?; - - for (sled_id, sled_details) in - loaded.planning_input.all_sleds(SledFilter::Commissioned) - { - let Some(inventory_sled_agent) = - primary_collection.sled_agents.get(&sled_id) - else { - swriteln!( - s, - "error: load sled {}: no inventory found for sled agent in \ - collection {}", - sled_id, - collection_id - ); - continue; - }; - - let inventory_sp = inventory_sled_agent.baseboard_id.as_ref().and_then( - |baseboard_id| { - let inv_sp = primary_collection.sps.get(baseboard_id); - let inv_rot = primary_collection.rots.get(baseboard_id); - if let (Some(inv_sp), Some(inv_rot)) = (inv_sp, inv_rot) { - Some(SledHwInventory { - baseboard_id: &baseboard_id, - sp: inv_sp, - rot: inv_rot, - }) - } else { - None - } - }, - ); - - let result = sim.system.sled_full( - sled_id, - sled_details.policy, - sled_details.state, - sled_details.resources.clone(), - inventory_sp, - inventory_sled_agent, - ); - match result { - Ok(_) => swriteln!(s, "sled {} loaded", sled_id), - Err(error) => { - swriteln!(s, "error: load sled {}: {:#}", sled_id, error) - } - }; - } + let mut s = String::new(); - for collection in loaded.collections { - match sim.collections.entry(collection.id) { - indexmap::map::Entry::Occupied(_) => { - // We started with an empty system, so the only way we can hit - // this is if the serialized state contains a duplicate - // collection ID. - swriteln!( - s, - "error: collection {} skipped (duplicate found)", - collection.id - ) - } - indexmap::map::Entry::Vacant(entry) => { - swriteln!(s, "collection {} loaded", collection.id); - entry.insert(collection); - } - } - } + swriteln!(s, "loaded data from {:?}", input_path); - for blueprint in loaded.blueprints { - match sim.blueprints.entry(blueprint.id) { - // We started with an empty system, so the only way we can hit this - // is if the serialized state contains a duplicate blueprint ID. - indexmap::map::Entry::Occupied(_) => { - swriteln!( - s, - "error: blueprint {} skipped (duplicate found)", - blueprint.id - ) - } - indexmap::map::Entry::Vacant(entry) => { - swriteln!(s, "blueprint {} loaded", blueprint.id); - entry.insert(blueprint); - } + if !result.warnings.is_empty() { + swriteln!(s, "warnings:"); + for warning in result.warnings { + swriteln!(s, " {}", warning); } } - sim.system.service_ip_pool_ranges( - loaded.planning_input.service_ip_pool_ranges().to_vec(), - ); - swriteln!( - s, - "loaded service IP pool ranges: {:?}", - loaded.planning_input.service_ip_pool_ranges() - ); - - sim.internal_dns = loaded.internal_dns; - sim.external_dns = loaded.external_dns; - sim.silo_names = loaded.silo_names; - - let nnames = loaded.external_dns_zone_names.len(); - if nnames > 0 { - if nnames > 1 { - swriteln!( - s, - "warn: found {} external DNS names; using only the first one", - nnames - ); + if !result.notices.is_empty() { + swriteln!(s, "notices:"); + for notice in result.notices { + swriteln!(s, " {}", notice); } - sim.external_dns_zone_name = - loaded.external_dns_zone_names.into_iter().next().unwrap(); } - do_print_properties(&mut s, sim); - swriteln!(s, "loaded data from {:?}", input_path); + do_print_properties(&mut s, sim.current_state()); Ok(Some(s)) } @@ -1330,18 +1247,46 @@ fn cmd_load_example( sim: &mut ReconfiguratorSim, args: LoadExampleArgs, ) -> anyhow::Result> { - if sim.user_made_system_changes() { - bail!("changes made to simulated system: run `wipe` before loading"); + let mut s = String::new(); + let mut state = sim.current_state().to_mut(); + if !state.system_mut().is_empty() { + bail!( + "changes made to simulated system: run `wipe system` before \ + loading" + ); } // Generate the example system. - let (example, blueprint) = ExampleSystemBuilder::new(&sim.log, &args.seed) - .nsleds(args.nsleds) - .ndisks_per_sled(args.ndisks_per_sled) - .nexus_count(sim.num_nexus.map_or(NEXUS_REDUNDANCY, |n| n.into())) - .create_zones(!args.no_zones) - .create_disks_in_blueprint(!args.no_disks_in_blueprint) - .build(); + match args.seed { + Some(seed) => { + // In this case, reset the RNG state to the provided seed. + swriteln!(s, "setting new RNG seed: {}", seed); + state.rng_mut().set_seed(seed); + } + None => { + // In this case, use the existing RNG state. + swriteln!( + s, + "using existing RNG state (seed: {})", + state.rng_mut().seed() + ); + } + }; + let rng = state.rng_mut().next_example_rng(); + + let (example, blueprint) = + ExampleSystemBuilder::new_with_rng(&sim.log, rng) + .nsleds(args.nsleds) + .ndisks_per_sled(args.ndisks_per_sled) + .nexus_count( + state + .config_mut() + .num_nexus() + .map_or(NEXUS_REDUNDANCY, |n| n.into()), + ) + .create_zones(!args.no_zones) + .create_disks_in_blueprint(!args.no_disks_in_blueprint) + .build(); // Generate the internal and external DNS configs based on the blueprint. let sleds_by_id = make_sleds_by_id(&example.system)?; @@ -1350,38 +1295,22 @@ fn cmd_load_example( &sleds_by_id, &Default::default(), )?; + let external_dns_zone_name = + state.config_mut().external_dns_zone_name().to_owned(); let external_dns = blueprint_external_dns_config( &blueprint, - &sim.silo_names, - sim.external_dns_zone_name.clone(), + state.config_mut().silo_names(), + external_dns_zone_name, ); - // No more fallible operations from here on out: set the system state. - let collection_id = example.collection.id; let blueprint_id = blueprint.id; - sim.system = example.system; - sim.collections.insert(collection_id, example.collection); - sim.internal_dns.insert( - blueprint.internal_dns_version, - DnsConfigParams { - generation: blueprint.internal_dns_version, - time_created: Utc::now(), - zones: vec![internal_dns], - }, - ); - sim.external_dns.insert( - blueprint.external_dns_version, - DnsConfigParams { - generation: blueprint.external_dns_version, - time_created: Utc::now(), - zones: vec![external_dns], - }, - ); - sim.blueprints - .insert(example.initial_blueprint.id, example.initial_blueprint); - sim.blueprints.insert(blueprint.id, blueprint); - sim.collection_id_rng = - TypedUuidRng::from_seed(&args.seed, "reconfigurator-cli"); + let collection_id = example.collection.id; + + state + .system_mut() + .load_example(example, blueprint, internal_dns, external_dns) + .expect("already checked non-empty state above"); + sim.commit_and_bump("reconfigurator-cli load-example".to_owned(), state); Ok(Some(format!( "loaded example system with:\n\ diff --git a/dev-tools/reconfigurator-cli/tests/input/cmds-example.txt b/dev-tools/reconfigurator-cli/tests/input/cmds-example.txt index b3143ac016..1bf52b1ff9 100644 --- a/dev-tools/reconfigurator-cli/tests/input/cmds-example.txt +++ b/dev-tools/reconfigurator-cli/tests/input/cmds-example.txt @@ -13,9 +13,9 @@ blueprint-show ade5749d-bdf3-4fab-a8ae-00bea01b3a5a blueprint-diff-inventory 9e187896-7809-46d0-9210-d75be1b3c4d4 ade5749d-bdf3-4fab-a8ae-00bea01b3a5a inventory-generate -blueprint-diff-inventory b32394d8-7d79-486f-8657-fd5219508181 ade5749d-bdf3-4fab-a8ae-00bea01b3a5a +blueprint-diff-inventory 972ca69a-384c-4a9c-a87d-c2cf21e114e0 ade5749d-bdf3-4fab-a8ae-00bea01b3a5a -wipe +wipe system load-example --seed test-basic --nsleds 1 --ndisks-per-sled 4 --no-zones sled-list diff --git a/dev-tools/reconfigurator-cli/tests/input/cmds.txt b/dev-tools/reconfigurator-cli/tests/input/cmds.txt index 1a537ba524..9aa8c125ca 100644 --- a/dev-tools/reconfigurator-cli/tests/input/cmds.txt +++ b/dev-tools/reconfigurator-cli/tests/input/cmds.txt @@ -16,6 +16,6 @@ inventory-list save state.json load state.json -wipe +wipe system load state.json sled-show dde1c0e2-b10d-4621-b420-f179f7a7a00a diff --git a/dev-tools/reconfigurator-cli/tests/output/cmd-example-stdout b/dev-tools/reconfigurator-cli/tests/output/cmd-example-stdout index 48664c5e6d..eccf315e9f 100644 --- a/dev-tools/reconfigurator-cli/tests/output/cmd-example-stdout +++ b/dev-tools/reconfigurator-cli/tests/output/cmd-example-stdout @@ -1,10 +1,11 @@ +using provided RNG seed: test_example > load-example --seed test-basic loaded example system with: - collection: 9e187896-7809-46d0-9210-d75be1b3c4d4 - blueprint: ade5749d-bdf3-4fab-a8ae-00bea01b3a5a > load-example --seed test-basic -error: changes made to simulated system: run `wipe` before loading +error: changes made to simulated system: run `wipe system` before loading > @@ -485,10 +486,10 @@ to: blueprint ade5749d-bdf3-4fab-a8ae-00bea01b3a5a > > inventory-generate -generated inventory collection b32394d8-7d79-486f-8657-fd5219508181 from configured sleds +generated inventory collection 972ca69a-384c-4a9c-a87d-c2cf21e114e0 from configured sleds -> blueprint-diff-inventory b32394d8-7d79-486f-8657-fd5219508181 ade5749d-bdf3-4fab-a8ae-00bea01b3a5a -from: collection b32394d8-7d79-486f-8657-fd5219508181 +> blueprint-diff-inventory 972ca69a-384c-4a9c-a87d-c2cf21e114e0 ade5749d-bdf3-4fab-a8ae-00bea01b3a5a +from: collection 972ca69a-384c-4a9c-a87d-c2cf21e114e0 to: blueprint ade5749d-bdf3-4fab-a8ae-00bea01b3a5a UNCHANGED SLEDS: @@ -774,8 +775,8 @@ to: blueprint ade5749d-bdf3-4fab-a8ae-00bea01b3a5a > -> wipe -wiped reconfigurator-sim state +> wipe system +wiped system > load-example --seed test-basic --nsleds 1 --ndisks-per-sled 4 --no-zones loaded example system with: diff --git a/dev-tools/reconfigurator-cli/tests/output/cmd-stdout b/dev-tools/reconfigurator-cli/tests/output/cmd-stdout index 40489caeb5..96b59715e3 100644 --- a/dev-tools/reconfigurator-cli/tests/output/cmd-stdout +++ b/dev-tools/reconfigurator-cli/tests/output/cmd-stdout @@ -1,3 +1,4 @@ +using provided RNG seed: test_basic > sled-list ID NZPOOLS SUBNET @@ -9,62 +10,62 @@ ID PARENT TIME_CREATED > -> sled-show ..................... -error: sled ..................... was not found in the planning input +> sled-show dde1c0e2-b10d-4621-b420-f179f7a7a00a +error: sled dde1c0e2-b10d-4621-b420-f179f7a7a00a was not found in the planning input -> sled-add ..................... -added sled +> sled-add dde1c0e2-b10d-4621-b420-f179f7a7a00a +added sled dde1c0e2-b10d-4621-b420-f179f7a7a00a > sled-list ID NZPOOLS SUBNET -..................... 10 fd00:1122:3344:101::/64 +dde1c0e2-b10d-4621-b420-f179f7a7a00a 10 fd00:1122:3344:101::/64 -> sled-show ..................... -sled ..................... +> sled-show dde1c0e2-b10d-4621-b420-f179f7a7a00a +sled dde1c0e2-b10d-4621-b420-f179f7a7a00a subnet fd00:1122:3344:101::/64 zpools (10): - ..................... (zpool) - SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-....................." }, disk_id: ..................... (physical_disk), policy: InService, state: Active } - ..................... (zpool) - SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-....................." }, disk_id: ..................... (physical_disk), policy: InService, state: Active } - ..................... (zpool) - SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-....................." }, disk_id: ..................... (physical_disk), policy: InService, state: Active } - ..................... (zpool) - SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-....................." }, disk_id: ..................... (physical_disk), policy: InService, state: Active } - ..................... (zpool) - SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-....................." }, disk_id: ..................... (physical_disk), policy: InService, state: Active } - ..................... (zpool) - SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-....................." }, disk_id: ..................... (physical_disk), policy: InService, state: Active } - ..................... (zpool) - SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-....................." }, disk_id: ..................... (physical_disk), policy: InService, state: Active } - ..................... (zpool) - SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-....................." }, disk_id: ..................... (physical_disk), policy: InService, state: Active } - ..................... (zpool) - SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-....................." }, disk_id: ..................... (physical_disk), policy: InService, state: Active } - ..................... (zpool) - SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-....................." }, disk_id: ..................... (physical_disk), policy: InService, state: Active } - - -> sled-add ..................... -added sled - -> sled-add ..................... -added sled + 0f3f1de0-7e5a-4032-a73a-74fbdabbd2fa (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-0f3f1de0-7e5a-4032-a73a-74fbdabbd2fa" }, disk_id: 2dbf19d4-7b7d-48d5-9d1c-64ac2922093b (physical_disk), policy: InService, state: Active } + 104f891f-e018-4787-a346-3cfaa6cc7e9d (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-104f891f-e018-4787-a346-3cfaa6cc7e9d" }, disk_id: 301ab9e6-bdc1-4287-a37d-2604893712f8 (physical_disk), policy: InService, state: Active } + 111f7a4e-5696-4be8-b13d-8ef314bc83e0 (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-111f7a4e-5696-4be8-b13d-8ef314bc83e0" }, disk_id: 1f77c099-8205-41b3-ac34-3807f3bbaf56 (physical_disk), policy: InService, state: Active } + 5a1786e9-770d-4ac9-b291-4501398170b5 (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-5a1786e9-770d-4ac9-b291-4501398170b5" }, disk_id: b111a961-be34-4ede-80e2-ef92af5e0a1f (physical_disk), policy: InService, state: Active } + 658fef3f-c3cd-4e6d-8823-79f9a0bec4c0 (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-658fef3f-c3cd-4e6d-8823-79f9a0bec4c0" }, disk_id: b3a01997-9894-4abd-83ad-e2d520d4c3a0 (physical_disk), policy: InService, state: Active } + 73ce66f5-a39a-4dd1-ad84-5647a5038d35 (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-73ce66f5-a39a-4dd1-ad84-5647a5038d35" }, disk_id: 48568b33-8f21-4537-b330-666aa3334236 (physical_disk), policy: InService, state: Active } + 7480aa69-3a3d-478d-bbdb-ba1fb74752ef (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-7480aa69-3a3d-478d-bbdb-ba1fb74752ef" }, disk_id: 9a968677-4da7-40b3-9579-9c54a7620b58 (physical_disk), policy: InService, state: Active } + 9ff438c6-00bb-4daf-9013-87969c892b02 (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-9ff438c6-00bb-4daf-9013-87969c892b02" }, disk_id: cc22404e-8a30-4b98-9552-790e84a162bd (physical_disk), policy: InService, state: Active } + ad0602bf-f577-401a-a28b-687c3d86f6bb (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-ad0602bf-f577-401a-a28b-687c3d86f6bb" }, disk_id: 32baf388-4cd9-4435-b70b-d8b2e515d918 (physical_disk), policy: InService, state: Active } + da6e6a21-8d32-46f9-a2b3-635f6700c3f0 (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-da6e6a21-8d32-46f9-a2b3-635f6700c3f0" }, disk_id: 1e7ee543-fe10-4ba7-b8f3-d579e8e0803a (physical_disk), policy: InService, state: Active } + + +> sled-add 90c1102a-b9f5-4d88-92a2-60d54a2d98cc +added sled 90c1102a-b9f5-4d88-92a2-60d54a2d98cc + +> sled-add 04ef3330-c682-4a08-8def-fcc4bef31bcd +added sled 04ef3330-c682-4a08-8def-fcc4bef31bcd > sled-list ID NZPOOLS SUBNET -..................... 10 fd00:1122:3344:103::/64 -..................... 10 fd00:1122:3344:102::/64 -..................... 10 fd00:1122:3344:101::/64 +04ef3330-c682-4a08-8def-fcc4bef31bcd 10 fd00:1122:3344:103::/64 +90c1102a-b9f5-4d88-92a2-60d54a2d98cc 10 fd00:1122:3344:102::/64 +dde1c0e2-b10d-4621-b420-f179f7a7a00a 10 fd00:1122:3344:101::/64 > > inventory-generate -generated inventory collection ..................... from configured sleds +generated inventory collection 6e066695-94bc-4250-bd63-fd799c166cc1 from configured sleds > inventory-list ID NERRORS TIME_DONE -..................... 0 +6e066695-94bc-4250-bd63-fd799c166cc1 0 > @@ -72,50 +73,51 @@ ID NERRORS TIME_DONE saved planning input, collections, and blueprints to "state.json" > load state.json -error: changes made to simulated system: run `wipe` before loading +error: changes made to simulated system: run `wipe system` before loading > -> wipe -wiped reconfigurator-sim state +> wipe system +wiped system > load state.json -using collection ..................... as source of sled inventory data -sled ..................... loaded -sled ..................... loaded -sled ..................... loaded -collection ..................... loaded -loaded service IP pool ranges: [V4(Ipv4Range { first: 192.0.2.2, last: 192.0.2.20 })] +loaded data from "state.json" +notices: + using collection 6e066695-94bc-4250-bd63-fd799c166cc1 as source of sled inventory data + sled 04ef3330-c682-4a08-8def-fcc4bef31bcd: loaded + sled 90c1102a-b9f5-4d88-92a2-60d54a2d98cc: loaded + sled dde1c0e2-b10d-4621-b420-f179f7a7a00a: loaded + collection 6e066695-94bc-4250-bd63-fd799c166cc1: loaded + loaded service IP pool ranges: [V4(Ipv4Range { first: 192.0.2.2, last: 192.0.2.20 })] configured external DNS zone name: oxide.example configured silo names: example-silo internal DNS generations: external DNS generations: -loaded data from "state.json" -> sled-show ..................... -sled ..................... +> sled-show dde1c0e2-b10d-4621-b420-f179f7a7a00a +sled dde1c0e2-b10d-4621-b420-f179f7a7a00a subnet fd00:1122:3344:101::/64 zpools (10): - ..................... (zpool) - SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-....................." }, disk_id: ..................... (physical_disk), policy: InService, state: Active } - ..................... (zpool) - SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-....................." }, disk_id: ..................... (physical_disk), policy: InService, state: Active } - ..................... (zpool) - SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-....................." }, disk_id: ..................... (physical_disk), policy: InService, state: Active } - ..................... (zpool) - SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-....................." }, disk_id: ..................... (physical_disk), policy: InService, state: Active } - ..................... (zpool) - SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-....................." }, disk_id: ..................... (physical_disk), policy: InService, state: Active } - ..................... (zpool) - SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-....................." }, disk_id: ..................... (physical_disk), policy: InService, state: Active } - ..................... (zpool) - SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-....................." }, disk_id: ..................... (physical_disk), policy: InService, state: Active } - ..................... (zpool) - SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-....................." }, disk_id: ..................... (physical_disk), policy: InService, state: Active } - ..................... (zpool) - SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-....................." }, disk_id: ..................... (physical_disk), policy: InService, state: Active } - ..................... (zpool) - SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-....................." }, disk_id: ..................... (physical_disk), policy: InService, state: Active } + 0f3f1de0-7e5a-4032-a73a-74fbdabbd2fa (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-0f3f1de0-7e5a-4032-a73a-74fbdabbd2fa" }, disk_id: 2dbf19d4-7b7d-48d5-9d1c-64ac2922093b (physical_disk), policy: InService, state: Active } + 104f891f-e018-4787-a346-3cfaa6cc7e9d (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-104f891f-e018-4787-a346-3cfaa6cc7e9d" }, disk_id: 301ab9e6-bdc1-4287-a37d-2604893712f8 (physical_disk), policy: InService, state: Active } + 111f7a4e-5696-4be8-b13d-8ef314bc83e0 (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-111f7a4e-5696-4be8-b13d-8ef314bc83e0" }, disk_id: 1f77c099-8205-41b3-ac34-3807f3bbaf56 (physical_disk), policy: InService, state: Active } + 5a1786e9-770d-4ac9-b291-4501398170b5 (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-5a1786e9-770d-4ac9-b291-4501398170b5" }, disk_id: b111a961-be34-4ede-80e2-ef92af5e0a1f (physical_disk), policy: InService, state: Active } + 658fef3f-c3cd-4e6d-8823-79f9a0bec4c0 (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-658fef3f-c3cd-4e6d-8823-79f9a0bec4c0" }, disk_id: b3a01997-9894-4abd-83ad-e2d520d4c3a0 (physical_disk), policy: InService, state: Active } + 73ce66f5-a39a-4dd1-ad84-5647a5038d35 (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-73ce66f5-a39a-4dd1-ad84-5647a5038d35" }, disk_id: 48568b33-8f21-4537-b330-666aa3334236 (physical_disk), policy: InService, state: Active } + 7480aa69-3a3d-478d-bbdb-ba1fb74752ef (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-7480aa69-3a3d-478d-bbdb-ba1fb74752ef" }, disk_id: 9a968677-4da7-40b3-9579-9c54a7620b58 (physical_disk), policy: InService, state: Active } + 9ff438c6-00bb-4daf-9013-87969c892b02 (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-9ff438c6-00bb-4daf-9013-87969c892b02" }, disk_id: cc22404e-8a30-4b98-9552-790e84a162bd (physical_disk), policy: InService, state: Active } + ad0602bf-f577-401a-a28b-687c3d86f6bb (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-ad0602bf-f577-401a-a28b-687c3d86f6bb" }, disk_id: 32baf388-4cd9-4435-b70b-d8b2e515d918 (physical_disk), policy: InService, state: Active } + da6e6a21-8d32-46f9-a2b3-635f6700c3f0 (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-da6e6a21-8d32-46f9-a2b3-635f6700c3f0" }, disk_id: 1e7ee543-fe10-4ba7-b8f3-d579e8e0803a (physical_disk), policy: InService, state: Active } diff --git a/dev-tools/reconfigurator-cli/tests/test_basic.rs b/dev-tools/reconfigurator-cli/tests/test_basic.rs index f4c03cf5e8..bb31fb1871 100644 --- a/dev-tools/reconfigurator-cli/tests/test_basic.rs +++ b/dev-tools/reconfigurator-cli/tests/test_basic.rs @@ -38,7 +38,10 @@ fn path_to_cli() -> PathBuf { path_to_executable(env!("CARGO_BIN_EXE_reconfigurator-cli")) } -fn run_cli(file: impl AsRef) -> (ExitStatus, String, String) { +fn run_cli( + file: impl AsRef, + args: &[&str], +) -> (ExitStatus, String, String) { let file = file.as_ref(); // Turn the path into an absolute one, because we're going to set a custom @@ -49,7 +52,7 @@ fn run_cli(file: impl AsRef) -> (ExitStatus, String, String) { // Create a temporary directory for the CLI to use -- that will let it // read and write files in its own sandbox. let tmpdir = camino_tempfile::tempdir().expect("failed to create tmpdir"); - let exec = Exec::cmd(path_to_cli()).arg(file).cwd(tmpdir.path()); + let exec = Exec::cmd(path_to_cli()).arg(file).args(args).cwd(tmpdir.path()); run_command(exec) } @@ -57,9 +60,11 @@ fn run_cli(file: impl AsRef) -> (ExitStatus, String, String) { #[test] fn test_basic() { let (exit_status, stdout_text, stderr_text) = - run_cli("tests/input/cmds.txt"); + run_cli("tests/input/cmds.txt", &["--seed", "test_basic"]); assert_exit_code(exit_status, EXIT_SUCCESS, &stderr_text); - let stdout_text = Redactor::default().do_redact(&stdout_text); + + // Everything is deterministic, so we don't need to redact UUIDs. + let stdout_text = Redactor::default().uuids(false).do_redact(&stdout_text); assert_contents("tests/output/cmd-stdout", &stdout_text); assert_contents("tests/output/cmd-stderr", &stderr_text); } @@ -68,7 +73,7 @@ fn test_basic() { #[test] fn test_example() { let (exit_status, stdout_text, stderr_text) = - run_cli("tests/input/cmds-example.txt"); + run_cli("tests/input/cmds-example.txt", &["--seed", "test_example"]); assert_exit_code(exit_status, EXIT_SUCCESS, &stderr_text); // The example system uses a fixed seed, which means that UUIDs are diff --git a/nexus/reconfigurator/planning/src/example.rs b/nexus/reconfigurator/planning/src/example.rs index d1d36d8c47..aeaa82772d 100644 --- a/nexus/reconfigurator/planning/src/example.rs +++ b/nexus/reconfigurator/planning/src/example.rs @@ -63,6 +63,10 @@ impl SimRngState { } } + pub fn seed(&self) -> &str { + &self.seed + } + pub fn next_system_rng(&mut self) -> ExampleSystemRng { // Different behavior for the first system_rng_gen is a bit weird, but // it retains backwards compatibility with existing tests -- it means @@ -82,10 +86,8 @@ impl SimRngState { self.collection_rng_gen += 1; // We don't need to pass in extra bits unique to collections, because // `CollectionBuilderRng` adds its own. - CollectionBuilderRng::from_seed(( - self.seed.as_str(), - self.collection_rng_gen, - )) + let seed = (self.seed.as_str(), self.collection_rng_gen); + CollectionBuilderRng::from_seed(seed) } pub fn next_planner_rng(&mut self) -> PlannerRng { diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index a893163faa..88e9dad480 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -28,7 +28,6 @@ use nexus_types::external_api::views::SledPolicy; use nexus_types::external_api::views::SledState; use nexus_types::inventory::Collection; use omicron_uuid_kinds::SledUuid; -use rng::PlannerRng; use slog::error; use slog::{info, warn, Logger}; use std::collections::BTreeMap; @@ -38,6 +37,7 @@ use std::str::FromStr; pub(crate) use self::omicron_zone_placement::DiscretionaryOmicronZone; use self::omicron_zone_placement::OmicronZonePlacement; use self::omicron_zone_placement::OmicronZonePlacementSledState; +pub use self::rng::PlannerRng; mod omicron_zone_placement; pub(crate) mod rng; diff --git a/nexus/reconfigurator/simulation/Cargo.toml b/nexus/reconfigurator/simulation/Cargo.toml new file mode 100644 index 0000000000..c8d838149e --- /dev/null +++ b/nexus/reconfigurator/simulation/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "nexus-reconfigurator-simulation" +version = "0.1.0" +edition = "2021" + +[lints] +workspace = true + +[dependencies] +anyhow.workspace = true +chrono.workspace = true +indexmap.workspace = true +nexus-inventory.workspace = true +nexus-reconfigurator-planning.workspace = true +nexus-types.workspace = true +omicron-common.workspace = true +omicron-uuid-kinds.workspace = true +omicron-workspace-hack.workspace = true +petname = { workspace = true, default-features = false } +slog.workspace = true +sync-ptr.workspace = true +thiserror.workspace = true +typed-rng.workspace = true +uuid.workspace = true diff --git a/nexus/reconfigurator/simulation/src/config.rs b/nexus/reconfigurator/simulation/src/config.rs new file mode 100644 index 0000000000..91b287e833 --- /dev/null +++ b/nexus/reconfigurator/simulation/src/config.rs @@ -0,0 +1,149 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use indexmap::IndexSet; +use omicron_common::api::external::Name; + +use crate::errors::{DuplicateError, KeyError}; + +/// Versioned simulator configuration. +/// +/// This is part of the state that is versioned and stored in the store. +#[derive(Clone, Debug)] +pub struct SimConfig { + /// Set of silo names configured + /// + /// These are used to determine the contents of external DNS. + silo_names: IndexSet, + + /// External DNS zone name configured + external_dns_zone_name: String, + + /// The number of Nexus zones to create. + /// + /// TODO: This doesn't quite fit in here because it's more of a policy + /// setting than a config option. But we can't set it in the + /// `SystemDescription` because need to persist policy across system wipes. + /// So callers have to remember to set num_nexus twice: once in the config + /// and once in the policy. + /// + /// We can likely make this better after addressing + /// . + num_nexus: Option, +} + +impl SimConfig { + pub(crate) fn new() -> Self { + Self { + // We use "example-silo" here rather than "default-silo" to make it + // clear that we're in a test environment. + silo_names: std::iter::once("example-silo".parse().unwrap()) + .collect(), + external_dns_zone_name: String::from("oxide.example"), + num_nexus: None, + } + } + + #[inline] + pub fn silo_names(&self) -> impl ExactSizeIterator { + self.silo_names.iter() + } + + #[inline] + pub fn external_dns_zone_name(&self) -> &str { + &self.external_dns_zone_name + } + + #[inline] + pub fn num_nexus(&self) -> Option { + self.num_nexus + } + + pub(crate) fn to_mut(&self) -> SimConfigBuilder { + SimConfigBuilder { config: self.clone(), log: Vec::new() } + } +} + +/// A [`SimConfig`] that can be changed to create new states. +/// +/// Returned by +/// [`SimStateBuilder::config_mut`](crate::SimStateBuilder::config_mut). +#[derive(Clone, Debug)] +pub struct SimConfigBuilder { + config: SimConfig, + log: Vec, +} + +impl SimConfigBuilder { + // These methods are duplicated from `SimConfig`. The forwarding is all + // valid because we don't cache pending changes in this struct, instead + // making them directly to the underlying config. If we did cache changes, + // we'd need to be more careful about how we forward these methods. + + #[inline] + pub fn silo_names(&self) -> impl ExactSizeIterator { + self.config.silo_names() + } + + #[inline] + pub fn external_dns_zone_name(&self) -> &str { + self.config.external_dns_zone_name() + } + + #[inline] + pub fn num_nexus(&self) -> Option { + self.config.num_nexus() + } + + pub fn set_silo_names(&mut self, names: impl IntoIterator) { + self.config.silo_names = names.into_iter().collect(); + self.log.push(SimConfigLogEntry::SetSiloNames( + self.config.silo_names.clone(), + )); + } + + pub fn add_silo(&mut self, name: Name) -> Result<(), DuplicateError> { + if self.config.silo_names.contains(&name) { + return Err(DuplicateError::silo_name(name)); + } + self.config.silo_names.insert(name.clone()); + self.log.push(SimConfigLogEntry::AddSilo(name)); + Ok(()) + } + + pub fn remove_silo(&mut self, name: Name) -> Result<(), KeyError> { + if !self.config.silo_names.shift_remove(&name) { + return Err(KeyError::silo_name(name)); + } + self.log.push(SimConfigLogEntry::RemoveSilo(name)); + Ok(()) + } + + pub fn set_external_dns_zone_name(&mut self, name: String) { + self.config.external_dns_zone_name = name.clone(); + self.log.push(SimConfigLogEntry::SetExternalDnsZoneName(name)); + } + + pub fn set_num_nexus(&mut self, num_nexus: u16) { + self.config.num_nexus = Some(num_nexus); + } + + pub fn wipe(&mut self) { + self.config = SimConfig::new(); + self.log.push(SimConfigLogEntry::Wipe); + } + + pub(crate) fn into_parts(self) -> (SimConfig, Vec) { + (self.config, self.log) + } +} + +#[derive(Clone, Debug)] +pub enum SimConfigLogEntry { + AddSilo(Name), + RemoveSilo(Name), + SetSiloNames(IndexSet), + SetExternalDnsZoneName(String), + Wipe, +} diff --git a/nexus/reconfigurator/simulation/src/errors.rs b/nexus/reconfigurator/simulation/src/errors.rs new file mode 100644 index 0000000000..571bc90f43 --- /dev/null +++ b/nexus/reconfigurator/simulation/src/errors.rs @@ -0,0 +1,117 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use omicron_common::api::external::{Generation, Name}; +use omicron_uuid_kinds::CollectionUuid; +use thiserror::Error; +use uuid::Uuid; + +/// The caller attempted to insert a duplicate key. +#[derive(Clone, Debug, Error)] +#[error("attempted to insert duplicate value: {}", self.id.to_error_string())] +pub struct DuplicateError { + id: ObjectId, +} + +impl DuplicateError { + pub fn id(&self) -> &ObjectId { + &self.id + } + + pub(crate) fn collection(id: CollectionUuid) -> Self { + Self { id: ObjectId::Collection(id) } + } + + pub(crate) fn blueprint(id: Uuid) -> Self { + Self { id: ObjectId::Blueprint(id) } + } + + pub(crate) fn internal_dns(generation: Generation) -> Self { + Self { id: ObjectId::InternalDns(generation) } + } + + pub(crate) fn external_dns(generation: Generation) -> Self { + Self { id: ObjectId::ExternalDns(generation) } + } + + pub(crate) fn silo_name(name: Name) -> Self { + Self { id: ObjectId::SiloName(name) } + } +} + +#[derive(Clone, Debug)] +pub enum ObjectId { + Collection(CollectionUuid), + Blueprint(Uuid), + InternalDns(Generation), + ExternalDns(Generation), + SiloName(Name), +} + +impl ObjectId { + fn to_error_string(&self) -> String { + match self { + ObjectId::Collection(id) => { + format!("collection ID {id}") + } + ObjectId::Blueprint(id) => { + format!("blueprint ID {id}") + } + ObjectId::InternalDns(generation) => { + format!("internal DNS at generation {generation}") + } + ObjectId::ExternalDns(generation) => { + format!("external DNS at generation {generation}") + } + ObjectId::SiloName(name) => { + format!("silo name {name}") + } + } + } +} + +/// The caller attempted to access a key that does not exist. +#[derive(Clone, Debug, Error)] +#[error("no such key: {}", self.id.to_error_string())] +pub struct KeyError { + id: ObjectId, +} + +impl KeyError { + pub fn id(&self) -> &ObjectId { + &self.id + } + + pub(crate) fn collection(id: CollectionUuid) -> Self { + Self { id: ObjectId::Collection(id) } + } + + pub(crate) fn blueprint(id: Uuid) -> Self { + Self { id: ObjectId::Blueprint(id) } + } + + pub(crate) fn internal_dns(generation: Generation) -> Self { + Self { id: ObjectId::InternalDns(generation) } + } + + pub(crate) fn external_dns(generation: Generation) -> Self { + Self { id: ObjectId::ExternalDns(generation) } + } + + pub(crate) fn silo_name(name: Name) -> Self { + Self { id: ObjectId::SiloName(name) } + } +} + +/// An operation that requires an empty system was performed on a non-empty +/// system. +#[derive(Clone, Debug, Error)] +#[error("operation requires an empty system")] +pub struct NonEmptySystemError {} + +impl NonEmptySystemError { + pub(crate) fn new() -> Self { + Self {} + } +} diff --git a/nexus/reconfigurator/simulation/src/lib.rs b/nexus/reconfigurator/simulation/src/lib.rs new file mode 100644 index 0000000000..c5f22ab273 --- /dev/null +++ b/nexus/reconfigurator/simulation/src/lib.rs @@ -0,0 +1,61 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Simulation of reconfigurator states. +//! +//! `nexus-reconfigurator-simulation` consists of facilities for: +//! +//! * Simulating successive system states in the face of reconfiguration +//! events. +//! * Tracking a history of states, allowing for states to be rewound and +//! new branches explored. +//! +//! `nexus-reconfigurator-simulation` is intended to be used in: +//! +//! * The reconfigurator CLI for interactive exploration. +//! * Example-based tests. +//! * More complex randomized (chaos) testing scenarios, such as randomly +//! generating failure conditions. +//! +//! # Usage +//! +//! The main entrypoint of the library is the [`Simulator`] type. This +//! simulator type stores a tree of states in a UUID-indexed store. This is +//! similar to Git and other source control systems, except we don't use a +//! Merkle tree for now. But we could in the future if there's a compelling +//! reason to do so. +//! +//! Each state is captured in a [`SimState`], and consists of: +//! +//! * The ID of the state. +//! * The ID of the parent state. +//! * Metadata about the state, including the generation number, a +//! description, and a log of changes made in that state. +//! * The contents of the state, which are: +//! * The system itself, as a [`SimSystem`]. +//! * Configuration and policy knobs, as a [`SimConfig`]. +//! * The RNG state, as a [`SimRng`]. +//! +//! Mutating states is done by calling [`SimState::to_mut`], which returns a +//! [`SimStateBuilder`]. Once changes are made, the state can be committed back +//! to the system with [`SimStateBuilder::commit`]. +//! +//! ## Determinism +//! +//! `nexus-reconfigurator-simulation` is structured to be fully deterministic, +//! so that simulations can be replayed. Internally, it uses a seeded RNG, and +//! the only source of non-determinism is the seed for the RNG. + +mod config; +pub mod errors; +mod rng; +mod sim; +mod state; +mod system; + +pub use config::*; +pub use rng::*; +pub use sim::*; +pub use state::*; +pub use system::*; diff --git a/nexus/reconfigurator/simulation/src/rng.rs b/nexus/reconfigurator/simulation/src/rng.rs new file mode 100644 index 0000000000..5926eeb24f --- /dev/null +++ b/nexus/reconfigurator/simulation/src/rng.rs @@ -0,0 +1,145 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Versioned random number generation for the simulator. + +use nexus_inventory::CollectionBuilderRng; +use nexus_reconfigurator_planning::{ + example::{ExampleSystemRng, SimRngState}, + planner::PlannerRng, +}; +use omicron_uuid_kinds::SledUuid; + +/// Versioned random number generator for the simulator. +/// +/// The simulator is designed to be as deterministic as possible, so that +/// simulations can be replayed and compared. To that end, this RNG is +/// versioned. +#[derive(Clone, Debug)] +pub struct SimRng { + // SimRngState is cheap to clone (just a string and a bunch of integers), so + // there's no need for Arc. + state: SimRngState, +} + +impl SimRng { + /// Create a new RNG. + pub fn from_entropy() -> Self { + let seed = seed_from_entropy(); + let state = SimRngState::from_seed(&seed); + Self { state } + } + + pub fn from_seed(seed: String) -> Self { + let state = SimRngState::from_seed(&seed); + Self { state } + } + + /// Obtain the current seed. + pub fn seed(&self) -> &str { + self.state.seed() + } + + pub(crate) fn to_mut(&self) -> SimRngBuilder { + SimRngBuilder { rng: self.clone(), log: Vec::new() } + } +} + +/// A [`SimRng`] that can be changed to create new states. +/// +/// Returned by [`SimStateBuilder::rng_mut`](crate::SimStateBuilder::rng_mut). +#[derive(Clone, Debug)] +pub struct SimRngBuilder { + rng: SimRng, + log: Vec, +} + +impl SimRngBuilder { + /// Obtain the current seed. + pub fn seed(&self) -> &str { + self.rng.seed() + } + + /// Set a new seed for the RNG, resetting internal state. + pub fn set_seed(&mut self, seed: String) { + self.rng = SimRng::from_seed(seed.clone()); + self.log.push(SimRngLogEntry::SetSeed(seed)); + } + + /// Reset internal state while keeping the same seed. + /// + /// RNGs are stateful, so it can be useful to reset them back to their + /// initial state. + /// + /// In general, it only makes sense to call this as part of a system wipe. + /// If it is called outside of a system wipe, then duplicate IDs might be + /// generated. + pub fn reset_state(&mut self) { + let existing_seed = self.rng.seed().to_owned(); + self.rng = SimRng::from_seed(existing_seed.clone()); + } + + /// Regenerate a new seed for the RNG from entropy (not from the existing + /// seed!), resetting internal state. + /// + /// The seed is returned, and the caller may wish to log it. + #[must_use = "consider logging or displaying the new seed"] + pub fn regenerate_seed_from_entropy(&mut self) -> String { + let seed = seed_from_entropy(); + self.rng = SimRng::from_seed(seed.clone()); + self.log.push(SimRngLogEntry::RegenerateSeedFromEntropy(seed.clone())); + seed + } + + /// Get the next example system RNG. + pub fn next_example_rng(&mut self) -> ExampleSystemRng { + self.log.push(SimRngLogEntry::NextExampleRng); + self.rng.state.next_system_rng() + } + + /// Get the next collection RNG. + pub fn next_collection_rng(&mut self) -> CollectionBuilderRng { + self.log.push(SimRngLogEntry::NextCollectionRng); + self.rng.state.next_collection_rng() + } + + /// Get the next blueprint RNG. + pub fn next_planner_rng(&mut self) -> PlannerRng { + self.log.push(SimRngLogEntry::NextPlannerRng); + self.rng.state.next_planner_rng() + } + + /// Get the next sled ID. + #[must_use] + pub fn next_sled_id(&mut self) -> SledUuid { + let id = self.rng.state.next_sled_id_rng().next(); + self.log.push(SimRngLogEntry::NextSledId(id)); + id + } + + pub(crate) fn into_parts(self) -> (SimRng, Vec) { + (self.rng, self.log) + } +} + +#[derive(Clone, Debug)] +pub enum SimRngLogEntry { + SetSeed(String), + ResetState { existing_seed: String }, + RegenerateSeedFromEntropy(String), + NextExampleRng, + NextCollectionRng, + NextPlannerRng, + NextSledId(SledUuid), +} + +pub(crate) fn seed_from_entropy() -> String { + // Each of the word lists petname uses are drawn from a pool of roughly + // 1000 words, so 3 words gives us around 30 bits of entropy. That should + // hopefully be enough to explore the entire state space. But if necessary + // we could also increase the length or expand the word lists (petname has + // much bigger ones too). + petname::petname(3, "-") + .expect("non-zero length requested => cannot be empty") +} diff --git a/nexus/reconfigurator/simulation/src/sim.rs b/nexus/reconfigurator/simulation/src/sim.rs new file mode 100644 index 0000000000..65ac4153b5 --- /dev/null +++ b/nexus/reconfigurator/simulation/src/sim.rs @@ -0,0 +1,146 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! A store of successive reconfigurator states: the main entrypoint for +//! reconfigurator simulation. + +use std::{collections::HashMap, sync::Arc}; + +use indexmap::IndexSet; +use omicron_uuid_kinds::{ReconfiguratorSimKind, ReconfiguratorSimUuid}; +use typed_rng::TypedUuidRng; + +use crate::{seed_from_entropy, SimState}; + +/// A store to track reconfigurator states: the main entrypoint for +/// reconfigurator simulation. +/// +/// This store has key-based storage for systems and their states, and allows +/// for append-only storage of new states. +/// +/// # Implementation notes +/// +/// We currently index by UUIDs, but we could index by the hash of the contents +/// and make it a Merkle tree just as well. (We'd have to hook up canonical +/// hashing etc; it's a bunch of work but not too difficult). If there's a +/// compelling reason to use Merkle trees, we should switch over to that. +/// +/// Currently, all data within is stored via `Arc` instances to enable cheap +/// cloning. In the future, it would be interesting to store data at the top +/// level, and only store references to it within each `SimState`. (The +/// references could be UUIDs, Rust `&` references, or content-addressed +/// hashes.) This does add some complexity, though, and there isn't a clear +/// benefit at the moment. For more, see the comment within the definition of +/// `system.rs`'s `SimSystem`. +#[derive(Clone, Debug)] +pub struct Simulator { + log: slog::Logger, + // In the future, it would be interesting to store a chain of every set of + // heads over time, similar to `jj op log`. That would let us implement undo + // and restore operations. + heads: IndexSet, + states: HashMap>, + // This state corresponds to `ROOT_ID`. + // + // Storing it in the Arc is extremely important! `SimStateBuilder` stores a + // pointer to the root state to ensure that there's no attempt to compare + // "unrelated histories". The Simulator struct itself can both be cloned and + // moved in memory, but because `root_state` is never changed, it always + // points to the same memory address. + root_state: Arc, + // Top-level (unversioned) RNG. + sim_uuid_rng: TypedUuidRng, +} + +impl Simulator { + /// The root ID of the store. + /// + /// This is always defined to be the nil UUID, and if queried will always + /// have a state associated with it. + pub const ROOT_ID: ReconfiguratorSimUuid = ReconfiguratorSimUuid::nil(); + + /// Create a new simulator with the given initial seed. + pub fn new(log: &slog::Logger, seed: Option) -> Self { + let seed = match seed { + Some(seed) => seed, + None => seed_from_entropy(), + }; + Self::new_inner(log, seed) + } + + fn new_inner(log: &slog::Logger, seed: String) -> Self { + let log = log.new(slog::o!("component" => "SimStore")); + let sim_uuid_rng = + TypedUuidRng::from_seed(&seed, "ReconfiguratorSimUuid"); + let root_state = SimState::new_root(seed); + Self { + log, + heads: IndexSet::new(), + states: HashMap::new(), + root_state, + sim_uuid_rng, + } + } + + /// Get the initial RNG seed. + /// + /// Versioned configurations start with this seed, though they may choose + /// to change it as they go along. + pub fn initial_seed(&self) -> &str { + &self.root_state.rng().seed() + } + + /// Get the current heads of the store. + #[inline] + pub fn heads(&self) -> &IndexSet { + &self.heads + } + + /// Get the state for the given UUID. + pub fn get_state(&self, id: ReconfiguratorSimUuid) -> Option<&SimState> { + if id == Self::ROOT_ID { + return Some(&self.root_state); + } + Some(&**self.states.get(&id)?) + } + + /// Get the root state. + /// + /// This is equivalent to + /// [`Self::get_state`]`(`[`Self::ROOT_ID`]`).unwrap()`. + pub fn root_state(&self) -> &SimState { + &self.root_state + } + + #[inline] + pub(crate) fn next_sim_uuid(&mut self) -> ReconfiguratorSimUuid { + self.sim_uuid_rng.next() + } + + // Invariant: the ID should be not present in the store, having been + // generated by next_sim_uuid. + pub(crate) fn add_state(&mut self, state: Arc) { + let id = state.id(); + let parent = state.parent(); + if self.states.insert(id, state).is_some() { + panic!("ID {id} should be unique and generated by the store"); + } + + // Remove the parent if it exists as a head, and in any case add the + // new one. Unlike in source control we don't have a concept of + // "merges" here, so there's exactly one parent that may need to be + // removed. + if let Some(parent) = parent { + self.heads.shift_remove(&parent); + } + self.heads.insert(id); + + slog::debug!( + self.log, + "committed new state"; + "id" => %id, + "parent" => ?parent, + ); + } +} diff --git a/nexus/reconfigurator/simulation/src/state.rs b/nexus/reconfigurator/simulation/src/state.rs new file mode 100644 index 0000000000..5eccd23594 --- /dev/null +++ b/nexus/reconfigurator/simulation/src/state.rs @@ -0,0 +1,510 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::sync::Arc; + +use anyhow::{anyhow, bail, Context}; +use nexus_inventory::CollectionBuilder; +use nexus_reconfigurator_planning::system::SledHwInventory; +use nexus_types::deployment::{SledFilter, UnstableReconfiguratorState}; +use omicron_common::api::external::Generation; +use omicron_uuid_kinds::{CollectionUuid, ReconfiguratorSimUuid}; +use sync_ptr::SyncConstPtr; + +use crate::{ + config::SimConfig, errors::NonEmptySystemError, SimConfigBuilder, + SimConfigLogEntry, SimRng, SimRngBuilder, SimRngLogEntry, SimSystem, + SimSystemBuilder, SimSystemLogEntry, Simulator, +}; + +/// A top-level, versioned snapshot of reconfigurator state. +/// +/// This snapshot consists of a system, along with a policy and a stateful RNG. +#[derive(Clone, Debug)] +pub struct SimState { + // A pointer to the root state (self if the current state *is* the root + // state). This is used to check in `SimStateBuilder` that the simulator is + // the same as the one that created this state. + // + // We store the root state, not the simulator itself, because the root state + // is stored behind an `Arc`. This means that the address stays stable even + // if the `Simulator` struct is cloned or moved in memory. + root_state: SyncConstPtr, + id: ReconfiguratorSimUuid, + // The parent state that this state was derived from. + parent: Option, + // The state's generation, starting from 0. + // + // TODO: Should this be its own type to avoid confusion with other + // Generation instances? Generation numbers start from 1, but in our case 0 + // provides a better user experience. + generation: Generation, + description: String, + system: SimSystem, + config: SimConfig, + rng: SimRng, + // A log of changes in this state compared to the parent state. + log: SimStateLog, +} + +impl SimState { + pub(crate) fn new_root(seed: String) -> Arc { + Arc::new_cyclic(|state| { + Self { + // Store a pointer to the root state's allocation to ensure that + // unrelated histories aren't mixed up. + // + // SAFETY: We only care about pointer equality, and never + // dereference this pointer. + root_state: unsafe { SyncConstPtr::new(state.as_ptr()) }, + id: Simulator::ROOT_ID, + parent: None, + // We don't normally use generation 0 in the production system, but + // having it here means that we can present a better user + // experience (first change is generation 1). + generation: Generation::from_u32(0), + description: "root state".to_string(), + system: SimSystem::new(), + config: SimConfig::new(), + rng: SimRng::from_seed(seed), + log: SimStateLog { + system: Vec::new(), + config: Vec::new(), + rng: Vec::new(), + }, + } + }) + } + + #[inline] + #[must_use] + pub fn id(&self) -> ReconfiguratorSimUuid { + self.id + } + + #[inline] + #[must_use] + pub fn parent(&self) -> Option { + self.parent + } + + #[inline] + #[must_use] + pub fn description(&self) -> &str { + &self.description + } + + #[inline] + #[must_use] + pub fn system(&self) -> &SimSystem { + &self.system + } + + #[inline] + #[must_use] + pub fn config(&self) -> &SimConfig { + &self.config + } + + #[inline] + #[must_use] + pub fn rng(&self) -> &SimRng { + &self.rng + } + + #[inline] + #[must_use] + pub fn log(&self) -> &SimStateLog { + &self.log + } + + /// Convert the state to a serializable form. + /// + /// Return a [`UnstableReconfiguratorState`] with information about the + /// current state. + pub fn to_serializable( + &self, + ) -> anyhow::Result { + let planning_input = self + .system() + .description() + .to_planning_input_builder() + .context("creating planning input builder")? + .build(); + + Ok(UnstableReconfiguratorState { + planning_input, + collections: self.system.all_collections().cloned().collect(), + blueprints: self.system.all_blueprints().cloned().collect(), + internal_dns: self + .system + .all_internal_dns() + .map(|params| (params.generation, params.clone())) + .collect(), + external_dns: self + .system + .all_external_dns() + .map(|params| (params.generation, params.clone())) + .collect(), + silo_names: self.config.silo_names().cloned().collect(), + external_dns_zone_names: vec![self + .config + .external_dns_zone_name() + .to_owned()], + }) + } + + pub fn to_mut(&self) -> SimStateBuilder { + SimStateBuilder { + root_state: self.root_state, + parent: self.id, + parent_gen: self.generation, + system: self.system.to_mut(), + config: self.config.to_mut(), + rng: self.rng.to_mut(), + } + } +} + +/// A [`SimState`] that can be changed to create new states. +/// +/// Created by [`SimState::to_mut`]. +/// +/// `SimStateBuilder` is ephemeral, so it can be freely mutated without +/// affecting anything else about the system. To store it into a system, call +/// [`Self::commit`]. +#[derive(Clone, Debug)] +pub struct SimStateBuilder { + // Used to check that the simulator is the same as the one that created + // this state. + root_state: SyncConstPtr, + parent: ReconfiguratorSimUuid, + parent_gen: Generation, + system: SimSystemBuilder, + config: SimConfigBuilder, + rng: SimRngBuilder, +} + +impl SimStateBuilder { + #[inline] + #[must_use] + pub fn parent(&self) -> ReconfiguratorSimUuid { + self.parent + } + + #[inline] + #[must_use] + pub fn system_mut(&mut self) -> &mut SimSystemBuilder { + &mut self.system + } + + #[inline] + #[must_use] + pub fn config_mut(&mut self) -> &mut SimConfigBuilder { + &mut self.config + } + + #[inline] + #[must_use] + pub fn rng_mut(&mut self) -> &mut SimRngBuilder { + &mut self.rng + } + + /// Load a serialized state into an empty system. + /// + /// If the primary collection ID is not provided, the serialized state must + /// only contain one collection. + pub fn load_serialized( + &mut self, + state: UnstableReconfiguratorState, + primary_collection_id: Option, + ) -> anyhow::Result { + if !self.system.is_empty() { + return Err(anyhow!(NonEmptySystemError::new())); + } + + let collection_id = + get_primary_collection_id(&state, primary_collection_id)?; + + // NOTE: If more error cases are added, ensure that they're checked + // before load_serialized_inner is called. This ensures that the system + // is not modified if there are errors. + let mut res = LoadResultBuilder::default(); + self.load_serialized_inner(state, collection_id, &mut res); + + Ok(LoadResult { + primary_collection_id: collection_id, + notices: res.notices, + warnings: res.warnings, + }) + } + + // This method MUST be infallible. It should only be called after checking + // the invariant: the primary collection ID is valid. + fn load_serialized_inner( + &mut self, + state: UnstableReconfiguratorState, + primary_collection_id: CollectionUuid, + res: &mut LoadResultBuilder, + ) { + res.notices.push(format!( + "using collection {} as source of sled inventory data", + primary_collection_id, + )); + let primary_collection = state + .collections + .iter() + .find(|c| c.id == primary_collection_id) + .expect("invariant: primary collection ID is valid"); + + for (sled_id, sled_details) in + state.planning_input.all_sleds(SledFilter::Commissioned) + { + let Some(inventory_sled_agent) = + primary_collection.sled_agents.get(&sled_id) + else { + res.warnings.push(format!( + "sled {}: skipped (no inventory found for sled agent in \ + collection {}", + sled_id, primary_collection_id + )); + continue; + }; + + let inventory_sp = inventory_sled_agent + .baseboard_id + .as_ref() + .and_then(|baseboard_id| { + let inv_sp = primary_collection.sps.get(baseboard_id); + let inv_rot = primary_collection.rots.get(baseboard_id); + if let (Some(inv_sp), Some(inv_rot)) = (inv_sp, inv_rot) { + Some(SledHwInventory { + baseboard_id: &baseboard_id, + sp: inv_sp, + rot: inv_rot, + }) + } else { + None + } + }); + + // XXX: Should this error ever happen? The only case where it + // errors is if the sled ID is already present, but we know that + // the system is empty, and the state's planning input is keyed by + // sled ID, so there should be no duplicates. + let result = self.system.description_mut().sled_full( + sled_id, + sled_details.policy, + sled_details.state, + sled_details.resources.clone(), + inventory_sp, + inventory_sled_agent, + ); + + match result { + Ok(_) => { + res.notices.push(format!("sled {}: loaded", sled_id)); + } + Err(error) => { + // Failing to load a sled shouldn't really happen, but if + // it does, it is a non-fatal error. + res.warnings.push(format!("sled {}: {:#}", sled_id, error)); + } + }; + } + + for collection in state.collections { + let collection_id = collection.id; + match self.system.add_collection(collection) { + Ok(_) => { + res.notices + .push(format!("collection {}: loaded", collection_id)); + } + Err(_) => { + res.warnings.push(format!( + "collection {}: skipped (duplicate found)", + collection_id, + )); + } + } + } + + for blueprint in state.blueprints { + let blueprint_id = blueprint.id; + match self.system.add_blueprint(blueprint) { + Ok(_) => { + res.notices + .push(format!("blueprint {}: loaded", blueprint_id)); + } + Err(_) => { + res.notices.push(format!( + "blueprint {}: skipped (duplicate found)", + blueprint_id, + )); + } + } + } + + self.system.description_mut().service_ip_pool_ranges( + state.planning_input.service_ip_pool_ranges().to_vec(), + ); + res.notices.push(format!( + // TODO: better output format? + "loaded service IP pool ranges: {:?}", + state.planning_input.service_ip_pool_ranges() + )); + + self.system.set_internal_dns(state.internal_dns); + self.system.set_external_dns(state.external_dns); + + let nnames = state.external_dns_zone_names.len(); + if nnames > 0 { + if nnames > 1 { + res.warnings.push(format!( + "found {} external DNS names; using only the first one", + nnames + )); + } + self.config.set_external_dns_zone_name( + state.external_dns_zone_names[0].clone(), + ); + } + + // TODO: Currently this doesn't return notices for DNS and silo names. + // The only caller of this function prints them separately after + // committing this state. We may want to record this information in the + // MergeResult instead. + + // TODO: log what happened here. This is a cross-cutting change so we + // may want to log it as a single big entry (like + // SimSystemBuilder::load_example) rather than lots of little ones. + } + + /// Commit the current state to the store, returning the new state's UUID. + /// + /// # Panics + /// + /// Panics if `sim` is not the same simulator that created this state. This + /// should ordinarily never happen and always indicates a programming + /// error. + #[must_use = "callers should update their pointers with the returned UUID"] + pub fn commit( + self, + description: String, + sim: &mut Simulator, + ) -> ReconfiguratorSimUuid { + // Check for unrelated histories. + if !std::ptr::eq(sim.root_state(), self.root_state.inner()) { + panic!( + "this state was created by a different simulator than the one \ + it is being committed to" + ); + } + + let id = sim.next_sim_uuid(); + let (system, system_log) = self.system.into_parts(); + let (config, config_log) = self.config.into_parts(); + let (rng, rng_log) = self.rng.into_parts(); + let log = SimStateLog { + system: system_log, + config: config_log, + rng: rng_log, + }; + let state = SimState { + root_state: self.root_state, + id, + description, + parent: Some(self.parent), + generation: self.parent_gen.next(), + system, + config, + rng, + log, + }; + sim.add_state(Arc::new(state)); + id + } + + // TODO: should probably enforce that RNG is set, maybe by hiding the + // SystemDescription struct? + pub fn to_collection_builder( + &mut self, + ) -> anyhow::Result { + let mut builder = self + .system + .description() + .to_collection_builder() + .context("generating inventory")?; + + let rng = self.rng.next_collection_rng(); + builder.set_rng(rng); + + Ok(builder) + } +} + +/// A log of changes made to a state compared to the parent. +#[derive(Clone, Debug)] +pub struct SimStateLog { + pub system: Vec, + pub config: Vec, + pub rng: Vec, +} + +/// The output of merging a serializable state into a mutable state. +#[derive(Clone, Debug)] +#[must_use] +pub struct LoadResult { + // TODO: Storing notices and warnings as strings is a carryover from + // reconfigurator-cli. We may wish to store data in a more structured form. + // For example, store a map of sled IDs to their statuses, etc. + /// The primary collection ID. + pub primary_collection_id: CollectionUuid, + + /// Notices for the caller to display. + pub notices: Vec, + + /// Non-fatal warnings that occurred. + pub warnings: Vec, +} + +/// Check and get the primary collection ID for a serialized state. +fn get_primary_collection_id( + state: &UnstableReconfiguratorState, + provided: Option, +) -> anyhow::Result { + match provided { + Some(id) => { + // Check that the collection ID is valid. + if state.collections.iter().any(|c| c.id == id) { + Ok(id) + } else { + bail!("collection {} not found in data", id) + } + } + None => match state.collections.len() { + 1 => Ok(state.collections[0].id), + 0 => bail!( + "no collection_id specified and file contains 0 collections" + ), + count => bail!( + "no collection_id specified and file contains {} \ + collections: {}", + count, + state + .collections + .iter() + .map(|c| c.id.to_string()) + .collect::>() + .join(", ") + ), + }, + } +} + +#[derive(Debug, Default)] +struct LoadResultBuilder { + notices: Vec, + warnings: Vec, +} diff --git a/nexus/reconfigurator/simulation/src/system.rs b/nexus/reconfigurator/simulation/src/system.rs new file mode 100644 index 0000000000..11fa24a3ac --- /dev/null +++ b/nexus/reconfigurator/simulation/src/system.rs @@ -0,0 +1,473 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! A simulated reconfigurator system. + +use std::{collections::BTreeMap, sync::Arc}; + +use chrono::Utc; +use indexmap::IndexMap; +use nexus_reconfigurator_planning::{ + example::ExampleSystem, system::SystemDescription, +}; +use nexus_types::{ + deployment::Blueprint, + internal_api::params::{DnsConfigParams, DnsConfigZone}, + inventory::Collection, +}; +use omicron_common::api::external::Generation; +use omicron_uuid_kinds::CollectionUuid; +use uuid::Uuid; + +use crate::errors::{DuplicateError, KeyError, NonEmptySystemError}; + +/// A versioned, simulated reconfigurator system. +#[derive(Clone, Debug)] +pub struct SimSystem { + // Implementation note: an alternative way to store data would be for + // `Simulator` to carry a global store with it, and then each system only + // stores the presence of blueprints/collections/etc rather than the + // objects themselves. In other words, a simulator-wide object store. A few + // things become easier that way, such as being able to iterate over all + // known objects of a given type. + // + // However, there are a few issues with this approach in practice: + // + // 1. The blueprints and collections are not guaranteed to be unique per + // UUID. Unlike (say) source control commit hashes, UUIDs are not + // content-hashed, so the same UUID can be associated with different + // blueprints/collections. + // 2. DNS configs are absolutely not unique per generation! Again, not + // content-hashed. + // 3. The mutable system may wish to not add objects to the store until + // it's committed. This means that the mutable system would probably + // have to maintain a list of pending objects to add to the store. That + // complicates some of the internals, if not the API. + // 4. We'll have to figure out how to manage the store (so SimSystemBuilder + // can access existing blueprints/collections while it's in flight). + // Storing a &mut reference is not an option, and we probably want it to + // be thread-safe, so the options seem to be either `&Mutex` or + // `Arc>`. Our current approach is more simplistic, but + // also lock-free. + // + // None of these are insurmountable, but they do make the global store a + // bit less appealing than it might seem at first glance. + // + /// Describes the sleds in the system. + /// + /// This resembles what we get from the `sled` table in a real system. It + /// also contains enough information to generate inventory collections that + /// describe the system. + description: SystemDescription, + + /// Inventory collections created by the user. + /// + /// Stored with `Arc` to allow cheap cloning. + collections: IndexMap>, + + /// Blueprints created by the user. + /// + /// Stored with `Arc` to allow cheap cloning. + blueprints: IndexMap>, + + /// Internal DNS configurations. + /// + /// Stored with `Arc` to allow cheap cloning. + internal_dns: BTreeMap>, + + /// External DNS configurations. + /// + /// Stored with `Arc` to allow cheap cloning. + external_dns: BTreeMap>, +} + +impl SimSystem { + pub fn new() -> Self { + Self { + description: SystemDescription::new(), + collections: IndexMap::new(), + blueprints: IndexMap::new(), + internal_dns: BTreeMap::new(), + external_dns: BTreeMap::new(), + } + } + + pub fn is_empty(&self) -> bool { + !self.description.has_sleds() + && self.collections.is_empty() + && self.blueprints.is_empty() + && self.internal_dns.is_empty() + && self.external_dns.is_empty() + } + + #[inline] + pub fn description(&self) -> &SystemDescription { + &self.description + } + + pub fn get_collection( + &self, + id: CollectionUuid, + ) -> Result<&Collection, KeyError> { + match self.collections.get(&id) { + Some(c) => Ok(&**c), + None => Err(KeyError::collection(id)), + } + } + + pub fn all_collections( + &self, + ) -> impl ExactSizeIterator { + self.collections.values().map(|c| &**c) + } + + pub fn get_blueprint(&self, id: Uuid) -> Result<&Blueprint, KeyError> { + match self.blueprints.get(&id) { + Some(b) => Ok(&**b), + None => Err(KeyError::blueprint(id)), + } + } + + pub fn all_blueprints(&self) -> impl ExactSizeIterator { + self.blueprints.values().map(|b| &**b) + } + + pub fn get_internal_dns( + &self, + generation: Generation, + ) -> Result<&DnsConfigParams, KeyError> { + self.internal_dns + .get(&generation) + .map(|d| &**d) + .ok_or_else(|| KeyError::internal_dns(generation)) + } + + pub fn all_internal_dns( + &self, + ) -> impl ExactSizeIterator { + self.internal_dns.values().map(|d| &**d) + } + + pub fn get_external_dns( + &self, + generation: Generation, + ) -> Result<&DnsConfigParams, KeyError> { + self.external_dns + .get(&generation) + .map(|d| &**d) + .ok_or_else(|| KeyError::external_dns(generation)) + } + + pub fn all_external_dns( + &self, + ) -> impl ExactSizeIterator { + self.external_dns.values().map(|d| &**d) + } + + pub(crate) fn to_mut(&self) -> SimSystemBuilder { + SimSystemBuilder { system: self.clone(), log: Vec::new() } + } +} + +/// A [`SimSystem`] that can be changed to create new states. +/// +/// Returned by +/// [`SimStateBuilder::system_mut`](crate::SimStateBuilder::system_mut). +#[derive(Clone, Debug)] +pub struct SimSystemBuilder { + // The underlying `SimSystem`. + system: SimSystem, + // Operation log on the system. + log: Vec, +} + +impl SimSystemBuilder { + // These methods are duplicated from `SimSystem`. The forwarding is all + // valid because we don't cache pending changes in this struct, instead + // making them directly to the underlying system. If we did cache changes, + // we'd need to be more careful about how we forward these methods. + + #[inline] + pub fn is_empty(&self) -> bool { + self.system.is_empty() + } + + #[inline] + pub fn description(&self) -> &SystemDescription { + &self.system.description() + } + + #[inline] + pub fn get_collection( + &self, + id: CollectionUuid, + ) -> Result<&Collection, KeyError> { + self.system.get_collection(id) + } + + #[inline] + pub fn all_collections( + &self, + ) -> impl ExactSizeIterator { + self.system.all_collections() + } + + #[inline] + pub fn get_blueprint(&self, id: Uuid) -> Result<&Blueprint, KeyError> { + self.system.get_blueprint(id) + } + + #[inline] + pub fn all_blueprints(&self) -> impl ExactSizeIterator { + self.system.all_blueprints() + } + + #[inline] + pub fn get_internal_dns( + &self, + generation: Generation, + ) -> Result<&DnsConfigParams, KeyError> { + self.system.get_internal_dns(generation) + } + + #[inline] + pub fn all_internal_dns( + &self, + ) -> impl ExactSizeIterator { + self.system.all_internal_dns() + } + + #[inline] + pub fn get_external_dns( + &self, + generation: Generation, + ) -> Result<&DnsConfigParams, KeyError> { + self.system.get_external_dns(generation) + } + + #[inline] + pub fn all_external_dns( + &self, + ) -> impl ExactSizeIterator { + self.system.all_external_dns() + } + + // TODO: track changes to the SystemDescription -- we'll probably want to + // have a separation between a type that represents a read-only system + // description and a type that can mutate it. + pub fn description_mut(&mut self) -> &mut SystemDescription { + &mut self.system.description + } + + pub fn load_example( + &mut self, + example: ExampleSystem, + blueprint: Blueprint, + internal_dns: DnsConfigZone, + external_dns: DnsConfigZone, + ) -> Result<(), NonEmptySystemError> { + if !self.system.is_empty() { + return Err(NonEmptySystemError::new()); + } + + // NOTE: If more error cases are added, ensure that they're checked + // before load_example_inner is called. This ensures that the system is + // not modified if there are errors. + self.load_example_inner(example, blueprint, internal_dns, external_dns); + Ok(()) + } + + // This method MUST be infallible. It should only be called after checking + // the invariant: the system must be empty. + fn load_example_inner( + &mut self, + example: ExampleSystem, + blueprint: Blueprint, + internal_dns: DnsConfigZone, + external_dns: DnsConfigZone, + ) { + self.log.push(SimSystemLogEntry::LoadExample { + collection_id: example.collection.id, + blueprint_id: blueprint.id, + internal_dns_version: blueprint.internal_dns_version, + external_dns_version: blueprint.external_dns_version, + }); + + self.system.description = example.system; + self.system + .collections + .insert(example.collection.id, Arc::new(example.collection)); + self.system.internal_dns.insert( + blueprint.internal_dns_version, + Arc::new(DnsConfigParams { + generation: blueprint.internal_dns_version, + // TODO: probably want to make time controllable by the caller. + time_created: Utc::now(), + zones: vec![internal_dns], + }), + ); + self.system.external_dns.insert( + blueprint.external_dns_version, + Arc::new(DnsConfigParams { + generation: blueprint.external_dns_version, + // TODO: probably want to make time controllable by the caller. + time_created: Utc::now(), + zones: vec![external_dns], + }), + ); + self.system.blueprints.insert( + example.initial_blueprint.id, + Arc::new(example.initial_blueprint), + ); + self.system.blueprints.insert(blueprint.id, Arc::new(blueprint)); + } + + pub fn add_collection( + &mut self, + collection: impl Into>, + ) -> Result<(), DuplicateError> { + let collection = collection.into(); + self.add_collection_inner(collection) + } + + fn add_collection_inner( + &mut self, + collection: Arc, + ) -> Result<(), DuplicateError> { + let collection_id = collection.id; + match self.system.collections.entry(collection_id) { + indexmap::map::Entry::Vacant(entry) => { + entry.insert(collection); + self.log.push(SimSystemLogEntry::AddCollection(collection_id)); + Ok(()) + } + indexmap::map::Entry::Occupied(_) => { + Err(DuplicateError::collection(collection_id)) + } + } + } + + pub fn add_blueprint( + &mut self, + blueprint: impl Into>, + ) -> Result<(), DuplicateError> { + let blueprint = blueprint.into(); + self.add_blueprint_inner(blueprint) + } + + fn add_blueprint_inner( + &mut self, + blueprint: Arc, + ) -> Result<(), DuplicateError> { + let blueprint_id = blueprint.id; + match self.system.blueprints.entry(blueprint_id) { + indexmap::map::Entry::Vacant(entry) => { + entry.insert(blueprint); + self.log.push(SimSystemLogEntry::AddBlueprint(blueprint_id)); + Ok(()) + } + indexmap::map::Entry::Occupied(_) => { + Err(DuplicateError::blueprint(blueprint_id)) + } + } + } + + pub fn add_internal_dns( + &mut self, + params: impl Into>, + ) -> Result<(), DuplicateError> { + let params = params.into(); + self.add_internal_dns_inner(params) + } + + fn add_internal_dns_inner( + &mut self, + params: Arc, + ) -> Result<(), DuplicateError> { + let generation = params.generation; + match self.system.internal_dns.entry(generation) { + std::collections::btree_map::Entry::Vacant(entry) => { + entry.insert(params); + Ok(()) + } + std::collections::btree_map::Entry::Occupied(_) => { + Err(DuplicateError::internal_dns(generation)) + } + } + } + + pub fn add_external_dns( + &mut self, + params: impl Into>, + ) -> Result<(), DuplicateError> { + let params = params.into(); + self.add_external_dns_inner(params) + } + + fn add_external_dns_inner( + &mut self, + params: Arc, + ) -> Result<(), DuplicateError> { + let generation = params.generation; + match self.system.external_dns.entry(generation) { + std::collections::btree_map::Entry::Vacant(entry) => { + entry.insert(params); + Ok(()) + } + std::collections::btree_map::Entry::Occupied(_) => { + Err(DuplicateError::external_dns(generation)) + } + } + } + + pub fn wipe(&mut self) { + self.system = SimSystem::new(); + self.log.push(SimSystemLogEntry::Wipe); + } + + // Not public: the only users that want to replace DNS wholesale are + // internal to this crate. + pub(crate) fn set_internal_dns( + &mut self, + dns: impl IntoIterator, + ) { + let internal_dns = dns + .into_iter() + .map(|(generation, params)| (generation, Arc::new(params))) + .collect(); + self.system.internal_dns = internal_dns; + } + + // Not public: the only users that want to replace DNS wholesale are + // internal to this crate. + pub(crate) fn set_external_dns( + &mut self, + dns: impl IntoIterator, + ) { + let external_dns = dns + .into_iter() + .map(|(generation, params)| (generation, Arc::new(params))) + .collect(); + self.system.external_dns = external_dns; + } + + pub(crate) fn into_parts(self) -> (SimSystem, Vec) { + (self.system, self.log) + } +} + +/// A log entry corresponding to an individual operation on a +/// [`SimSystemBuilder`]. +#[derive(Clone, Debug)] +pub enum SimSystemLogEntry { + LoadExample { + collection_id: CollectionUuid, + blueprint_id: Uuid, + internal_dns_version: Generation, + external_dns_version: Generation, + }, + AddCollection(CollectionUuid), + AddBlueprint(Uuid), + Wipe, +} diff --git a/nexus/types/src/deployment/execution/dns.rs b/nexus/types/src/deployment/execution/dns.rs index 66f47c8b5c..8225168dca 100644 --- a/nexus/types/src/deployment/execution/dns.rs +++ b/nexus/types/src/deployment/execution/dns.rs @@ -131,9 +131,9 @@ pub fn blueprint_internal_dns_config( Ok(dns_builder.build_zone()) } -pub fn blueprint_external_dns_config( +pub fn blueprint_external_dns_config<'a>( blueprint: &Blueprint, - silos: &[Name], + silos: impl IntoIterator, external_dns_zone_name: String, ) -> DnsConfigZone { let nexus_external_ips = blueprint_nexus_external_ips(blueprint); diff --git a/uuid-kinds/src/lib.rs b/uuid-kinds/src/lib.rs index 7947062a82..c83717ef2e 100644 --- a/uuid-kinds/src/lib.rs +++ b/uuid-kinds/src/lib.rs @@ -63,6 +63,7 @@ impl_typed_uuid_kind! { Propolis => "propolis", RackInit => "rack_init", RackReset => "rack_reset", + ReconfiguratorSim => "reconfigurator_sim", Region => "region", Sled => "sled", TufRepo => "tuf_repo", From 84ff94c73f34bcad8f01ece4cf53a782ddd24700 Mon Sep 17 00:00:00 2001 From: Rain Date: Thu, 14 Nov 2024 03:56:51 +0000 Subject: [PATCH 2/3] cleaner output Created using spr 1.3.6-beta.1 --- dev-tools/reconfigurator-cli/src/main.rs | 18 +++++++++++---- .../tests/output/cmd-stdout | 22 +++++++++---------- 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/dev-tools/reconfigurator-cli/src/main.rs b/dev-tools/reconfigurator-cli/src/main.rs index 6635a9c612..e32da7b181 100644 --- a/dev-tools/reconfigurator-cli/src/main.rs +++ b/dev-tools/reconfigurator-cli/src/main.rs @@ -1234,10 +1234,20 @@ fn cmd_load( } } - swriteln!(s, "result:"); - let mut writer = IndentWriter::new(" ", &mut s); - writeln!(writer, "{}", result.system)?; - writeln!(writer, "{}", result.config)?; + swriteln!(s, "result:\n system:"); + { + let mut writer = IndentWriter::new(" ", &mut s); + // It's assumed that the result.system Display impl always ends in a + // newline, so we use `write!` instead of `writeln!`. + write!(writer, "{}", result.system)?; + } + swriteln!(s, " config:"); + { + let mut writer = IndentWriter::new(" ", &mut s); + // It's assumed that the result.config Display impl always ends in a + // newline, so we use `write!` instead of `writeln!`. + write!(writer, "{}", result.config)?; + } Ok(Some(s)) } diff --git a/dev-tools/reconfigurator-cli/tests/output/cmd-stdout b/dev-tools/reconfigurator-cli/tests/output/cmd-stdout index 0c98103343..2ded40b611 100644 --- a/dev-tools/reconfigurator-cli/tests/output/cmd-stdout +++ b/dev-tools/reconfigurator-cli/tests/output/cmd-stdout @@ -83,17 +83,17 @@ wiped system > load state.json loaded data from "state.json" result: - using collection 6e066695-94bc-4250-bd63-fd799c166cc1 as source of sled inventory data - loaded sleds: 04ef3330-c682-4a08-8def-fcc4bef31bcd, 90c1102a-b9f5-4d88-92a2-60d54a2d98cc, dde1c0e2-b10d-4621-b420-f179f7a7a00a - loaded collections: 6e066695-94bc-4250-bd63-fd799c166cc1 - loaded blueprints: (none) - loaded service IP pool ranges: [V4(Ipv4Range { first: 192.0.2.2, last: 192.0.2.20 })] - loaded internal DNS generations: (none) - loaded external DNS generations: (none) - - configured external DNS zone name: oxide.example - configured silo names: example-silo - + system: + using collection 6e066695-94bc-4250-bd63-fd799c166cc1 as source of sled inventory data + loaded sleds: 04ef3330-c682-4a08-8def-fcc4bef31bcd, 90c1102a-b9f5-4d88-92a2-60d54a2d98cc, dde1c0e2-b10d-4621-b420-f179f7a7a00a + loaded collections: 6e066695-94bc-4250-bd63-fd799c166cc1 + loaded blueprints: (none) + loaded service IP pool ranges: [V4(Ipv4Range { first: 192.0.2.2, last: 192.0.2.20 })] + loaded internal DNS generations: (none) + loaded external DNS generations: (none) + config: + configured external DNS zone name: oxide.example + configured silo names: example-silo > sled-show dde1c0e2-b10d-4621-b420-f179f7a7a00a From 2dc6988fbf7f0ce433a44c659ab57bf28032f11e Mon Sep 17 00:00:00 2001 From: Rain Date: Thu, 14 Nov 2024 03:57:20 +0000 Subject: [PATCH 3/3] trailing whitespace Created using spr 1.3.6-beta.1 --- nexus/reconfigurator/simulation/src/sim.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nexus/reconfigurator/simulation/src/sim.rs b/nexus/reconfigurator/simulation/src/sim.rs index fc41804a03..06e89288ad 100644 --- a/nexus/reconfigurator/simulation/src/sim.rs +++ b/nexus/reconfigurator/simulation/src/sim.rs @@ -38,7 +38,7 @@ pub struct Simulator { log: slog::Logger, // The set of terminal nodes in the tree -- all states are reachable from // one or more of these. - // + // // Similar to the list of Git branches or Jujutsu/Mercurial heads. // // In the future, it would be interesting to store a chain of every set of