From 1727f5fb1d26064baa94920aea5377ce0ee70019 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Fri, 18 Mar 2022 15:24:12 -0400 Subject: [PATCH 01/16] Supply real volume data to propolis Instance creation now can specify that existing disks be attached to the instance (disk creation during instance creation is not yet implemented). There's a few important changes here. Nexus will instruct the sled agent to send volume construction requests when it ensures the instance, and therefore has to build these requests. This is done in sdc_regions_ensure as regions are allocated. sic_create_instance_record no longer returns the runtime of an instance, but the instance's name. Importantly, the instance creation saga now calls Nexus' instance_set_runtime instead of duplicating the logic to call instance_put. Nexus' instance_set_runtime will populate the InstanceHardware with NICs and disks. Nexus' disk_set_runtime now *optionally* calls the sled agent's disk_put if the instance is running already, otherwise the disk will be attached as part of the instance ensure by specifying volume requests. request_body_max_bytes had to be increased to 1M because the volume requests could be arbitrarily large (and eventually, the cloud init bytes will add to the body size too). Note: spawning an instance with this *will not work* unless the propolis zone has a valid static IPv6 address. This PR has grown enough that I think that work should be separate. --- Cargo.lock | 232 +++++++++++++++- common/src/api/external/error.rs | 6 + common/src/api/external/mod.rs | 4 + nexus/Cargo.toml | 3 +- nexus/src/db/model.rs | 31 ++- nexus/src/external_api/params.rs | 21 ++ nexus/src/nexus.rs | 140 ++++++++-- nexus/src/sagas.rs | 257 ++++++++++++++---- nexus/test-utils/src/resource_helpers.rs | 1 + nexus/tests/integration_tests/endpoints.rs | 1 + nexus/tests/integration_tests/instances.rs | 95 +++++++ .../integration_tests/subnet_allocation.rs | 1 + openapi/nexus.json | 74 +++++ openapi/sled-agent.json | 177 ++++++++++++ sled-agent/Cargo.toml | 4 +- sled-agent/src/bin/sled-agent-sim.rs | 2 +- sled-agent/src/bin/sled-agent.rs | 6 +- sled-agent/src/instance.rs | 13 +- sled-agent/src/instance_manager.rs | 1 + sled-agent/src/params.rs | 1 + sled-agent/src/sim/storage.rs | 1 - tools/oxapi_demo | 20 +- 22 files changed, 1007 insertions(+), 84 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b3e92df11d..b7cd2d86a8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,6 +8,53 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +[[package]] +name = "aead" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b613b8e1e3cf911a086f53f03bf286f52fd7a7258e4fa606f0ef220d39d8877" +dependencies = [ + "generic-array 0.14.4", +] + +[[package]] +name = "aes" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e8b47f52ea9bae42228d07ec09eb676433d7c4ed1ebdf0f1d1c29ed446f1ab8" +dependencies = [ + "cfg-if", + "cipher", + "cpufeatures", + "opaque-debug 0.3.0", +] + +[[package]] +name = "aes-gcm-siv" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589c637f0e68c877bbd59a4599bbe849cac8e5f3e4b5a3ebae8f528cd218dcdc" +dependencies = [ + "aead", + "aes", + "cipher", + "ctr", + "polyval", + "subtle", + "zeroize", +] + +[[package]] +name = "ahash" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" +dependencies = [ + "getrandom", + "once_cell", + "version_check", +] + [[package]] name = "aho-corasick" version = "0.7.18" @@ -305,6 +352,15 @@ dependencies = [ "winapi", ] +[[package]] +name = "cipher" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ee52072ec15386f770805afd189a01c8841be8696bed250fa2f13c4c0d6dfb7" +dependencies = [ + "generic-array 0.14.4", +] + [[package]] name = "clap" version = "2.34.0" @@ -518,10 +574,42 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "crucible" +version = "0.0.1" +source = "git+https://github.com/oxidecomputer/crucible?rev=4090a023b77dcab7a5000057cf2c96cdbb0469b6#4090a023b77dcab7a5000057cf2c96cdbb0469b6" +dependencies = [ + "aes-gcm-siv", + "anyhow", + "base64", + "bytes", + "crucible-common", + "crucible-protocol", + "crucible-scope", + "dropshot", + "futures", + "futures-core", + "rand", + "rand_chacha", + "reqwest", + "ringbuffer", + "schemars", + "serde", + "serde_json", + "structopt", + "tokio", + "tokio-rustls", + "tokio-util 0.7.0", + "toml", + "tracing", + "usdt 0.3.1", + "uuid", +] + [[package]] name = "crucible-agent-client" version = "0.0.1" -source = "git+https://github.com/oxidecomputer/crucible?rev=3e7e49eeb88fa8ad74375b0642aabd4224b1f2cb#3e7e49eeb88fa8ad74375b0642aabd4224b1f2cb" +source = "git+https://github.com/oxidecomputer/crucible?rev=32e603bdc3b6e5eb6b880a2ddde7e05f043b5357#32e603bdc3b6e5eb6b880a2ddde7e05f043b5357" dependencies = [ "anyhow", "chrono", @@ -533,6 +621,53 @@ dependencies = [ "serde_json", ] +[[package]] +name = "crucible-common" +version = "0.0.0" +source = "git+https://github.com/oxidecomputer/crucible?rev=4090a023b77dcab7a5000057cf2c96cdbb0469b6#4090a023b77dcab7a5000057cf2c96cdbb0469b6" +dependencies = [ + "anyhow", + "rusqlite", + "rustls-pemfile 0.3.0", + "serde", + "serde_json", + "tempfile", + "thiserror", + "tokio-rustls", + "toml", + "twox-hash", + "uuid", +] + +[[package]] +name = "crucible-protocol" +version = "0.0.0" +source = "git+https://github.com/oxidecomputer/crucible?rev=4090a023b77dcab7a5000057cf2c96cdbb0469b6#4090a023b77dcab7a5000057cf2c96cdbb0469b6" +dependencies = [ + "anyhow", + "bincode", + "bytes", + "crucible-common", + "serde", + "tokio-util 0.7.0", + "uuid", +] + +[[package]] +name = "crucible-scope" +version = "0.0.0" +source = "git+https://github.com/oxidecomputer/crucible?rev=4090a023b77dcab7a5000057cf2c96cdbb0469b6#4090a023b77dcab7a5000057cf2c96cdbb0469b6" +dependencies = [ + "anyhow", + "futures", + "futures-core", + "serde", + "serde_json", + "tokio", + "tokio-util 0.7.0", + "toml", +] + [[package]] name = "crunchy" version = "0.2.2" @@ -592,6 +727,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "ctr" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "049bb91fb4aaf0e3c7efa6cd5ef877dbbbd15b39dad06d9948de4ec8a75761ea" +dependencies = [ + "cipher", +] + [[package]] name = "curve25519-dalek" version = "4.0.0-pre.1" @@ -929,6 +1073,12 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" +[[package]] +name = "fallible-streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" + [[package]] name = "fastrand" version = "1.6.0" @@ -1251,6 +1401,18 @@ name = "hashbrown" version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" +dependencies = [ + "ahash", +] + +[[package]] +name = "hashlink" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7249a3129cbc1ffccd74857f81464a323a152173cdb134e0fd81bc803b29facf" +dependencies = [ + "hashbrown", +] [[package]] name = "headers" @@ -1603,6 +1765,16 @@ version = "0.2.119" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1bf2e165bb3457c8e098ea76f3e3bc9db55f87aa90d52d0e6be741470916aaa4" +[[package]] +name = "libsqlite3-sys" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb644c388dfaefa18035c12614156d285364769e818893da0dda9030c80ad2ba" +dependencies = [ + "pkg-config", + "vcpkg", +] + [[package]] name = "lock_api" version = "0.4.6" @@ -1973,6 +2145,7 @@ dependencies = [ "api_identity", "async-bb8-diesel", "async-trait", + "base64", "bb8", "chrono", "cookie", @@ -2697,6 +2870,18 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "polyval" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8419d2b623c7c0896ff2d5d96e2cb4ede590fed28fcc34934f4c33c036e620a1" +dependencies = [ + "cfg-if", + "cpufeatures", + "opaque-debug 0.3.0", + "universal-hash", +] + [[package]] name = "postgres-protocol" version = "0.6.3" @@ -2881,8 +3066,9 @@ dependencies = [ [[package]] name = "propolis-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=0e3798510ae190131f63b9df767ec01b2beacf91#0e3798510ae190131f63b9df767ec01b2beacf91" +source = "git+https://github.com/oxidecomputer/propolis?rev=832a86afce308d3210685af987ace1ba74c2ecd6#832a86afce308d3210685af987ace1ba74c2ecd6" dependencies = [ + "crucible", "reqwest", "ring", "schemars", @@ -3112,6 +3298,21 @@ dependencies = [ "array-init", ] +[[package]] +name = "rusqlite" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85127183a999f7db96d1a976a309eebbfb6ea3b0b400ddd8340190129de6eb7a" +dependencies = [ + "bitflags", + "fallible-iterator", + "fallible-streaming-iterator", + "hashlink", + "libsqlite3-sys", + "memchr", + "smallvec", +] + [[package]] name = "rustc_version" version = "0.1.7" @@ -3742,6 +3943,12 @@ dependencies = [ "der", ] +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "steno" version = "0.1.0" @@ -4299,6 +4506,17 @@ dependencies = [ "toml", ] +[[package]] +name = "twox-hash" +version = "1.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ee73e6e4924fe940354b8d4d98cad5231175d615cd855b758adc658c0aac6a0" +dependencies = [ + "cfg-if", + "rand", + "static_assertions", +] + [[package]] name = "typenum" version = "1.14.0" @@ -4400,6 +4618,16 @@ version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f14ee04d9415b52b3aeab06258a3f07093182b88ba0f9b8d203f211a7a7d41c7" +[[package]] +name = "universal-hash" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f214e8f697e925001e66ec2c6e37a4ef93f0f78c2eed7814394e10c62025b05" +dependencies = [ + "generic-array 0.14.4", + "subtle", +] + [[package]] name = "untrusted" version = "0.7.1" diff --git a/common/src/api/external/error.rs b/common/src/api/external/error.rs index f66de9cd30..7efeae4815 100644 --- a/common/src/api/external/error.rs +++ b/common/src/api/external/error.rs @@ -296,6 +296,12 @@ impl From> for Error { } } +impl From for Error { + fn from(e: serde_json::Error) -> Self { + Error::internal_error(&e.to_string()) + } +} + /// Like [`assert!`], except that instead of panicking, this function returns an /// `Err(Error::InternalError)` with an appropriate message if the given /// condition is not true. diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index f79e7a4b69..68a57161de 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -467,6 +467,10 @@ impl Generation { assert!(next_gen <= u64::try_from(i64::MAX).unwrap()); Generation(next_gen) } + + pub fn get(&self) -> u64 { + self.0 + } } impl Display for Generation { diff --git a/nexus/Cargo.toml b/nexus/Cargo.toml index 87ad139102..7bdf38cb2d 100644 --- a/nexus/Cargo.toml +++ b/nexus/Cargo.toml @@ -11,9 +11,10 @@ path = "../rpaths" anyhow = "1.0" async-bb8-diesel = { git = "https://github.com/oxidecomputer/async-bb8-diesel", rev = "c849b717be" } async-trait = "0.1.51" +base64 = "0.13.0" bb8 = "0.7.1" cookie = "0.16" -crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "3e7e49eeb88fa8ad74375b0642aabd4224b1f2cb" } +crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "32e603bdc3b6e5eb6b880a2ddde7e05f043b5357" } # Tracking pending 2.0 version. diesel = { git = "https://github.com/diesel-rs/diesel", rev = "ce77c382", features = ["postgres", "r2d2", "chrono", "serde_json", "network-address", "uuid"] } futures = "0.3.21" diff --git a/nexus/src/db/model.rs b/nexus/src/db/model.rs index ebc837db71..410380e157 100644 --- a/nexus/src/db/model.rs +++ b/nexus/src/db/model.rs @@ -646,7 +646,11 @@ impl Sled { pub fn address(&self) -> SocketAddr { // TODO: avoid this unwrap - SocketAddr::new(self.ip.ip(), u16::try_from(self.port).unwrap()) + self.address_with_port(u16::try_from(self.port).unwrap()) + } + + pub fn address_with_port(&self, port: u16) -> SocketAddr { + SocketAddr::new(self.ip.ip(), port) } } @@ -787,7 +791,11 @@ impl Dataset { pub fn address(&self) -> SocketAddr { // TODO: avoid this unwrap - SocketAddr::new(self.ip.ip(), u16::try_from(self.port).unwrap()) + self.address_with_port(u16::try_from(self.port).unwrap()) + } + + pub fn address_with_port(&self, port: u16) -> SocketAddr { + SocketAddr::new(self.ip.ip(), port) } } @@ -905,6 +913,10 @@ impl Volume { data, } } + + pub fn data(&self) -> &String { + &self.data + } } /// Describes an organization within the database. @@ -1266,6 +1278,10 @@ impl Disk { pub fn runtime(&self) -> DiskRuntimeState { self.runtime_state.clone() } + + pub fn id(&self) -> Uuid { + self.identity.id + } } /// Conversion to the external API type. @@ -1319,6 +1335,17 @@ impl DiskRuntimeState { } } + pub fn attach(self, instance_id: Uuid) -> Self { + Self { + disk_state: external::DiskState::Attached(instance_id) + .label() + .to_string(), + attach_instance_id: Some(instance_id), + gen: self.gen.next().into(), + time_updated: Utc::now(), + } + } + pub fn detach(self) -> Self { Self { disk_state: external::DiskState::Detached.label().to_string(), diff --git a/nexus/src/external_api/params.rs b/nexus/src/external_api/params.rs index 2775df390a..16734a4758 100644 --- a/nexus/src/external_api/params.rs +++ b/nexus/src/external_api/params.rs @@ -105,6 +105,23 @@ impl Default for InstanceNetworkInterfaceAttachment { } } +/// Describe the instance's disks at creation time +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum InstanceDiskAttachment { + /// During instance creation, create and attach disks + Create(DiskCreate), + + /// During instance creation, attach this disk + Attach(InstanceDiskAttach), +} + +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +pub struct InstanceDiskAttach { + /// A disk name to attach + pub disk: Name, +} + /// Create-time parameters for an [`Instance`](omicron_common::api::external::Instance) #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] pub struct InstanceCreate { @@ -117,6 +134,10 @@ pub struct InstanceCreate { /// The network interfaces to be created for this instance. #[serde(default)] pub network_interfaces: InstanceNetworkInterfaceAttachment, + + /// The disks to be created or attached for this instance. + #[serde(default)] + pub disks: Vec, } /// Migration parameters for an [`Instance`](omicron_common::api::external::Instance) diff --git a/nexus/src/nexus.rs b/nexus/src/nexus.rs index 96131ff6e9..383f6898ff 100644 --- a/nexus/src/nexus.rs +++ b/nexus/src/nexus.rs @@ -858,6 +858,8 @@ impl Nexus { let saga_params = Arc::new(sagas::ParamsInstanceCreate { serialized_authn: authn::saga::Serialized::for_opctx(opctx), + organization_name: organization_name.clone().into(), + project_name: project_name.clone().into(), project_id: authz_project.id(), create_params: params.clone(), }); @@ -1209,7 +1211,7 @@ impl Nexus { /// Modifies the runtime state of the Instance as requested. This generally /// means booting or halting the Instance. - async fn instance_set_runtime( + pub(crate) async fn instance_set_runtime( &self, opctx: &OpContext, authz_instance: &authz::Instance, @@ -1223,23 +1225,71 @@ impl Nexus { &requested, )?; - let sa = self.instance_sled(&db_instance).await?; + // Gather disk information and turn that into DiskRequests + let disks = self + .db_datastore + .instance_list_disks( + &opctx, + &authz_instance, + &DataPageParams { + marker: None, + direction: dropshot::PaginationOrder::Ascending, + // TODO: is there a limit to the number of disks an instance + // can have attached? + limit: std::num::NonZeroU32::new(8).unwrap(), + }, + ) + .await?; + + let mut disk_reqs = vec![]; + for (i, disk) in disks.iter().enumerate() { + let volume = self.db_datastore.volume_get(disk.volume_id).await?; + disk_reqs.push(sled_agent_client::types::DiskRequest { + name: disk.name().to_string(), + slot: sled_agent_client::types::Slot(i as u8), + // TODO offer ability to attach read-only? + read_only: false, + device: "nvme".to_string(), + gen: disk.runtime_state.gen.0.get(), + volume_construction_request: serde_json::from_str( + &volume.data(), + )?, + }); + } + + let nics: Vec = self + .db_datastore + .instance_list_network_interfaces( + &opctx, + &authz_instance, + &DataPageParams { + marker: None, + direction: dropshot::PaginationOrder::Ascending, + // TODO: is there a limit to the number of NICs an instance + // can have attached? + limit: std::num::NonZeroU32::new(8).unwrap(), + }, + ) + .await? + .iter() + .map(|x| x.clone().into()) + .collect(); // Ask the sled agent to begin the state change. Then update the // database to reflect the new intermediate state. If this update is // not the newest one, that's fine. That might just mean the sled agent // beat us to it. - // TODO: Populate this with an appropriate NIC. - // See also: sic_create_instance_record in sagas.rs for a similar - // construction. let instance_hardware = sled_agent_client::types::InstanceHardware { runtime: sled_agent_client::types::InstanceRuntimeState::from( db_instance.runtime().clone(), ), - nics: vec![], + nics: nics.iter().map(|nic| nic.clone().into()).collect(), + disks: disk_reqs, }; + let sa = self.instance_sled(&db_instance).await?; + let new_runtime = sa .instance_put( &db_instance.id(), @@ -1381,6 +1431,7 @@ impl Nexus { opctx, &authz_disk, &db_disk, + &db_instance, self.instance_sled(&db_instance).await?, sled_agent_client::types::DiskStateRequested::Attached( *instance_id, @@ -1455,6 +1506,7 @@ impl Nexus { opctx, &authz_disk, &db_disk, + &db_instance, self.instance_sled(&db_instance).await?, sled_agent_client::types::DiskStateRequested::Detached, ) @@ -1469,30 +1521,74 @@ impl Nexus { opctx: &OpContext, authz_disk: &authz::Disk, db_disk: &db::model::Disk, + db_instance: &db::model::Instance, sa: Arc, requested: sled_agent_client::types::DiskStateRequested, ) -> Result<(), Error> { + use sled_agent_client::types::DiskStateRequested; + let runtime: DiskRuntimeState = db_disk.runtime().into(); opctx.authorize(authz::Action::Modify, authz_disk).await?; - // Ask the Sled Agent to begin the state change. Then update the - // database to reflect the new intermediate state. - let new_runtime = sa - .disk_put( - &authz_disk.id(), - &sled_agent_client::types::DiskEnsureBody { - initial_runtime: - sled_agent_client::types::DiskRuntimeState::from( - runtime, - ), - target: requested, - }, - ) - .await - .map_err(Error::from)?; + let new_runtime: DiskRuntimeState = match &db_instance + .runtime_state + .state + .state() + { + InstanceState::Running | InstanceState::Starting => { + /* + * If there's a propolis zone for this instnace, ask the Sled + * Agent to hot-plug or hot-remove disk. Then update the + * database to reflect the new intermediate state. + * + * TODO this will probably involve volume construction requests + * as well! + */ + let new_runtime = sa + .disk_put( + &authz_disk.id(), + &sled_agent_client::types::DiskEnsureBody { + initial_runtime: + sled_agent_client::types::DiskRuntimeState::from( + runtime, + ), + target: requested, + }, + ) + .await + .map_err(Error::from)?; + + new_runtime.into_inner().into() + } - let new_runtime: DiskRuntimeState = new_runtime.into_inner().into(); + InstanceState::Creating => { + // If we're still creating this instance, then the disks will be + // attached as part of the instance ensure by specifying volume + // construction requests. + match requested { + DiskStateRequested::Detached => { + db_disk.runtime().detach().into() + } + DiskStateRequested::Attached(id) => { + db_disk.runtime().attach(id).into() + } + DiskStateRequested::Destroyed => { + todo!() + } + DiskStateRequested::Faulted => { + todo!() + } + } + } + + _ => { + todo!( + "implement state {:?}", + db_instance.runtime_state.state.state() + ); + } + }; self.db_datastore .disk_update_runtime(opctx, authz_disk, &new_runtime.into()) diff --git a/nexus/src/sagas.rs b/nexus/src/sagas.rs index 53197ecb60..1191b6e466 100644 --- a/nexus/src/sagas.rs +++ b/nexus/src/sagas.rs @@ -30,6 +30,7 @@ use omicron_common::api::external::Name; use omicron_common::api::external::NetworkInterface; use omicron_common::api::internal::nexus::InstanceRuntimeState; use omicron_common::backoff::{self, BackoffError}; +use rand::{rngs::StdRng, RngCore, SeedableRng}; use serde::Deserialize; use serde::Serialize; use sled_agent_client::types::InstanceEnsureBody; @@ -113,6 +114,8 @@ async fn saga_generate_uuid( #[derive(Debug, Deserialize, Serialize)] pub struct ParamsInstanceCreate { pub serialized_authn: authn::saga::Serialized, + pub organization_name: Name, + pub project_name: Name, pub project_id: Uuid, pub create_params: params::InstanceCreate, } @@ -149,7 +152,7 @@ pub fn saga_instance_create() -> SagaTemplate { ); template_builder.append( - "initial_runtime", + "instance_name", "CreateInstanceRecord", new_action_noop_undo(sic_create_instance_record), ); @@ -184,12 +187,22 @@ pub fn saga_instance_create() -> SagaTemplate { sic_create_network_interfaces_undo, ), ); + template_builder.append( "network_interfaces", "CreateNetworkInterfaces", new_action_noop_undo(sic_create_network_interfaces), ); + template_builder.append( + "attach_disks", + "AttachDisksToInstance", + ActionFunc::new_action( + sic_attach_disks_to_instance, + sic_attach_disks_to_instance_undo, + ), + ); + template_builder.append( "instance_ensure", "InstanceEnsure", @@ -475,9 +488,77 @@ async fn sic_create_network_interfaces_undo( Ok(()) } +async fn sic_attach_disks_to_instance( + sagactx: ActionContext, +) -> Result<(), ActionError> { + ensure_instance_disk_attach_state(sagactx, true).await +} + +async fn sic_attach_disks_to_instance_undo( + sagactx: ActionContext, +) -> Result<(), anyhow::Error> { + Ok(ensure_instance_disk_attach_state(sagactx, false).await?) +} + +async fn ensure_instance_disk_attach_state( + sagactx: ActionContext, + attached: bool, +) -> Result<(), ActionError> { + let osagactx = sagactx.user_data(); + let saga_params = sagactx.saga_params(); + let opctx = + OpContext::for_saga_action(&sagactx, &saga_params.serialized_authn); + + let saga_disks = &saga_params.create_params.disks; + let instance_name = sagactx.lookup::("instance_name")?; + + let organization_name: db::model::Name = + saga_params.organization_name.clone().into(); + let project_name: db::model::Name = saga_params.project_name.clone().into(); + + for disk in saga_disks { + match disk { + params::InstanceDiskAttachment::Create(_) => { + todo!(); + } + params::InstanceDiskAttachment::Attach(instance_disk_attach) => { + let disk_name: db::model::Name = + instance_disk_attach.disk.clone().into(); + + if attached { + osagactx + .nexus() + .instance_attach_disk( + &opctx, + &organization_name, + &project_name, + &instance_name, + &disk_name, + ) + .await + } else { + osagactx + .nexus() + .instance_detach_disk( + &opctx, + &organization_name, + &project_name, + &instance_name, + &disk_name, + ) + .await + } + .map_err(ActionError::action_failed)?; + } + } + } + + Ok(()) +} + async fn sic_create_instance_record( sagactx: ActionContext, -) -> Result { +) -> Result { let osagactx = sagactx.user_data(); let params = sagactx.saga_params(); let sled_uuid = sagactx.lookup::("server_id"); @@ -511,7 +592,7 @@ async fn sic_create_instance_record( .await .map_err(ActionError::action_failed)?; - Ok(instance.runtime().clone().into()) + Ok(instance.name().clone()) } async fn sic_instance_ensure( @@ -519,50 +600,39 @@ async fn sic_instance_ensure( ) -> Result<(), ActionError> { // TODO-correctness is this idempotent? let osagactx = sagactx.user_data(); + let params = sagactx.saga_params(); let runtime_params = InstanceRuntimeStateRequested { run_state: InstanceStateRequested::Running, migration_params: None, }; - let instance_id = sagactx.lookup::("instance_id")?; - let sled_uuid = sagactx.lookup::("server_id")?; - let nics = sagactx - .lookup::>>("network_interfaces")? - .unwrap_or_default(); - let runtime = sagactx.lookup::("initial_runtime")?; - let initial_hardware = InstanceHardware { - runtime: runtime.into(), - nics: nics.into_iter().map(|nic| nic.into()).collect(), - }; - let sa = osagactx - .sled_client(&sled_uuid) + + let instance_name = sagactx.lookup::("instance_name")?; + let opctx = OpContext::for_saga_action(&sagactx, ¶ms.serialized_authn); + + let authz_project = osagactx + .datastore() + .project_lookup_by_id(params.project_id) .await .map_err(ActionError::action_failed)?; - // Ask the sled agent to begin the state change. Then update the database - // to reflect the new intermediate state. If this update is not the newest - // one, that's fine. That might just mean the sled agent beat us to it. - let new_runtime_state = sa - .instance_put( - &instance_id, - &InstanceEnsureBody { - initial: initial_hardware, - target: runtime_params, - migrate: None, - }, - ) + let (authz_instance, instance) = osagactx + .datastore() + .instance_fetch(&opctx, &authz_project, &instance_name) .await - .map_err(omicron_common::api::external::Error::from) .map_err(ActionError::action_failed)?; - let new_runtime_state: InstanceRuntimeState = - new_runtime_state.into_inner().into(); - osagactx - .datastore() - .instance_update_runtime(&instance_id, &new_runtime_state.into()) + .nexus() + .instance_set_runtime( + &opctx, + &authz_instance, + &instance, + runtime_params, + ) .await - .map(|_| ()) - .map_err(ActionError::action_failed) + .map_err(ActionError::action_failed)?; + + Ok(()) } // "Migrate Instance" saga template @@ -670,6 +740,8 @@ async fn sim_instance_migrate( runtime: runtime.into(), // TODO: populate NICs nics: vec![], + // TODO: populate disks + disks: vec![], }; let target = InstanceRuntimeStateRequested { run_state: InstanceStateRequested::Migrating, @@ -879,7 +951,6 @@ async fn ensure_region_in_dataset( // TODO: Can we avoid casting from UUID to string? // NOTE: This'll require updating the crucible agent client. id: RegionId(region.id().to_string()), - volume_id: region.volume_id().to_string(), encrypted: region.encrypted(), cert_pem: None, key_pem: None, @@ -927,30 +998,120 @@ const MAX_CONCURRENT_REGION_REQUESTS: usize = 3; async fn sdc_regions_ensure( sagactx: ActionContext, -) -> Result<(), ActionError> { +) -> Result { let log = sagactx.user_data().log(); let datasets_and_regions = sagactx .lookup::>( "datasets_and_regions", )?; + let request_count = datasets_and_regions.len(); - futures::stream::iter(datasets_and_regions) + + // Allocate regions, and additionally return the dataset that the region was + // allocated in. + let datasets_and_regions: Vec<( + db::model::Dataset, + crucible_agent_client::types::Region, + )> = futures::stream::iter(datasets_and_regions) .map(|(dataset, region)| async move { - ensure_region_in_dataset(log, &dataset, ®ion).await + match ensure_region_in_dataset(log, &dataset, ®ion).await { + Ok(result) => Ok((dataset, result)), + Err(e) => Err(e), + } }) // Execute the allocation requests concurrently. .buffer_unordered(std::cmp::min( request_count, MAX_CONCURRENT_REGION_REQUESTS, )) - .collect::>>() + .collect::, + >>() .await .into_iter() - .collect::, _>>() + .collect::, + Error, + >>() .map_err(ActionError::action_failed)?; - // TODO: Region has a port value, we could store this in the DB? - Ok(()) + // Assert each region has the same block size, otherwise Volume creation + // will fail. + let all_region_have_same_block_size = datasets_and_regions + .windows(2) + .all(|w| w[0].1.block_size == w[1].1.block_size); + + if !all_region_have_same_block_size { + return Err(ActionError::new_subsaga( + // XXX wrong error type? + anyhow::anyhow!( + "volume creation will fail due to block size mismatch" + ), + )); + } + + let block_size = datasets_and_regions[0].1.block_size; + + // Store volume details in db + let mut rng = StdRng::from_entropy(); + let volume_construction_request = + sled_agent_client::types::VolumeConstructionRequest::Volume { + block_size, + sub_volumes: vec![ + // XXX allocation algorithm only supports one sub vol? + sled_agent_client::types::VolumeConstructionRequest::Region { + block_size, + // gen of 0 is here, these regions were just allocated. + gen: 0, + opts: sled_agent_client::types::CrucibleOpts { + target: datasets_and_regions + .iter() + .map(|(dataset, region)| { + dataset + .address_with_port(region.port_number) + .to_string() + }) + .collect(), + + lossy: false, + + // all downstairs will expect encrypted blocks + key: Some(base64::encode({ + // XXX the current encryption key + // requirement is 32 bytes, what if that + // changes? + let mut random_bytes: [u8; 32] = [0; 32]; + rng.fill_bytes(&mut random_bytes); + random_bytes + })), + + // TODO TLS, which requires sending X509 stuff during + // downstairs region allocation too. + cert_pem: None, + key_pem: None, + root_cert_pem: None, + + // TODO open a control socket for the whole volume, not + // in the sub volumes + control: None, + }, + }, + ], + read_only_parent: None, + }; + + let volume_data = serde_json::to_string(&volume_construction_request) + .map_err(|_| { + ActionError::new_subsaga( + // XXX wrong error type? + anyhow::anyhow!("serde_json::to_string"), + ) + })?; + + Ok(volume_data) } async fn delete_regions( @@ -1008,16 +1169,16 @@ async fn sdc_create_volume_record( let osagactx = sagactx.user_data(); let volume_id = sagactx.lookup::("volume_id")?; - let volume = db::model::Volume::new( - volume_id, - // TODO: Patch this up with serialized contents that Crucible can use. - "Some Data".to_string(), - ); + let volume_data = sagactx.lookup::("regions_ensure")?; + + let volume = db::model::Volume::new(volume_id, volume_data); + let volume_created = osagactx .datastore() .volume_create(volume) .await .map_err(ActionError::action_failed)?; + Ok(volume_created) } diff --git a/nexus/test-utils/src/resource_helpers.rs b/nexus/test-utils/src/resource_helpers.rs index 3f7068e77d..2896244be6 100644 --- a/nexus/test-utils/src/resource_helpers.rs +++ b/nexus/test-utils/src/resource_helpers.rs @@ -143,6 +143,7 @@ pub async fn create_instance( hostname: String::from("the_host"), network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, + disks: vec![], }, ) .await diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs index 6af67d4f04..492ec4c4b8 100644 --- a/nexus/tests/integration_tests/endpoints.rs +++ b/nexus/tests/integration_tests/endpoints.rs @@ -166,6 +166,7 @@ lazy_static! { hostname: String::from("demo-instance"), network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, + disks: vec![], }; } diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs index e166ad3fde..3e9ad1f32c 100644 --- a/nexus/tests/integration_tests/instances.rs +++ b/nexus/tests/integration_tests/instances.rs @@ -9,8 +9,12 @@ use http::StatusCode; use nexus_test_utils::http_testing::AuthnMode; use nexus_test_utils::http_testing::NexusRequest; use nexus_test_utils::http_testing::RequestBuilder; +use nexus_test_utils::resource_helpers::create_disk; use nexus_test_utils::resource_helpers::objects_list_page_authz; +use nexus_test_utils::resource_helpers::DiskTest; use omicron_common::api::external::ByteCount; +use omicron_common::api::external::Disk; +use omicron_common::api::external::DiskState; use omicron_common::api::external::IdentityMetadataCreateParams; use omicron_common::api::external::Instance; use omicron_common::api::external::InstanceCpuCount; @@ -144,6 +148,7 @@ async fn test_instances_create_reboot_halt( hostname: instance.hostname.clone(), network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, + disks: vec![], })) .expect_status(Some(StatusCode::BAD_REQUEST)), ) @@ -551,6 +556,7 @@ async fn test_instance_with_single_explicit_ip_address( memory: ByteCount::from_mebibytes_u32(4), hostname: String::from("nic-test"), network_interfaces: interface_params, + disks: vec![], }; let response = NexusRequest::objects_post(client, &url_instances, &instance_params) @@ -666,6 +672,7 @@ async fn test_instance_with_new_custom_network_interfaces( memory: ByteCount::from_mebibytes_u32(4), hostname: String::from("nic-test"), network_interfaces: interface_params, + disks: vec![], }; let response = NexusRequest::objects_post(client, &url_instances, &instance_params) @@ -758,6 +765,7 @@ async fn test_instance_create_delete_network_interface( memory: ByteCount::from_mebibytes_u32(4), hostname: String::from("nic-test"), network_interfaces: params::InstanceNetworkInterfaceAttachment::None, + disks: vec![], }; let response = NexusRequest::objects_post(client, &url_instances, &instance_params) @@ -940,6 +948,7 @@ async fn test_instance_with_multiple_nics_unwinds_completely( memory: ByteCount::from_mebibytes_u32(4), hostname: String::from("nic-test"), network_interfaces: interface_params, + disks: vec![], }; let builder = RequestBuilder::new(client, http::Method::POST, &url_instances) @@ -969,6 +978,92 @@ async fn test_instance_with_multiple_nics_unwinds_completely( ); } +/// Create a disk and attach during instance creation +#[nexus_test] +async fn test_attach_one_disk_to_instance(cptestctx: &ControlPlaneTestContext) { + let client = &cptestctx.external_client; + + const ORGANIZATION_NAME: &str = "bobs-barrel-of-bytes"; + const PROJECT_NAME: &str = "bit-barrel"; + + // Test pre-reqs + DiskTest::new(&cptestctx).await; + create_organization(&client, ORGANIZATION_NAME).await; + create_project(client, ORGANIZATION_NAME, PROJECT_NAME).await; + + // Create the "probablydata" disk + create_disk(&client, ORGANIZATION_NAME, PROJECT_NAME, "probablydata").await; + + // Verify disk is there and currently detached + let url_project_disks = format!( + "/organizations/{}/projects/{}/disks", + ORGANIZATION_NAME, PROJECT_NAME, + ); + let disks: Vec = NexusRequest::iter_collection_authn( + client, + &url_project_disks, + "", + None, + ) + .await + .expect("failed to list disks") + .all_items; + assert_eq!(disks.len(), 1); + assert_eq!(disks[0].state, DiskState::Detached); + + // Create the instance + let instance_params = params::InstanceCreate { + identity: IdentityMetadataCreateParams { + name: Name::try_from(String::from("nfs")).unwrap(), + description: String::from("probably serving data"), + }, + ncpus: InstanceCpuCount::try_from(2).unwrap(), + memory: ByteCount::from_mebibytes_u32(4), + hostname: String::from("nfs"), + network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, + disks: vec![params::InstanceDiskAttachment::Attach( + params::InstanceDiskAttach { + disk: Name::try_from(String::from("probablydata")).unwrap(), + }, + )], + }; + + let url_instances = format!( + "/organizations/{}/projects/{}/instances", + ORGANIZATION_NAME, PROJECT_NAME + ); + let builder = + RequestBuilder::new(client, http::Method::POST, &url_instances) + .body(Some(&instance_params)) + .expect_status(Some(http::StatusCode::CREATED)); + let response = NexusRequest::new(builder) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("Expected instance creation to work!"); + + let instance = response.parsed_body::().unwrap(); + + // Verify disk is attached to the instance + let url_instance_disks = format!( + "/organizations/{}/projects/{}/instances/{}/disks", + ORGANIZATION_NAME, + PROJECT_NAME, + instance.identity.name.as_str(), + ); + let disks: Vec = NexusRequest::iter_collection_authn( + client, + &url_instance_disks, + "", + None, + ) + .await + .expect("failed to list disks") + .all_items; + assert_eq!(disks.len(), 1); + assert_eq!(disks[0].state, DiskState::Attached(instance.identity.id)); +} + async fn instance_get( client: &ClientTestContext, instance_url: &str, diff --git a/nexus/tests/integration_tests/subnet_allocation.rs b/nexus/tests/integration_tests/subnet_allocation.rs index 47a0e5ef72..9c8279d852 100644 --- a/nexus/tests/integration_tests/subnet_allocation.rs +++ b/nexus/tests/integration_tests/subnet_allocation.rs @@ -40,6 +40,7 @@ async fn create_instance_expect_failure( memory: ByteCount::from_mebibytes_u32(256), hostname: name.to_string(), network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, + disks: vec![], }; NexusRequest::new( diff --git a/openapi/nexus.json b/openapi/nexus.json index e14c22ff71..97aea62914 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -4597,6 +4597,13 @@ "description": { "type": "string" }, + "disks": { + "description": "The disks to be created or attached for this instance.", + "type": "array", + "items": { + "$ref": "#/components/schemas/InstanceDiskAttachment" + } + }, "hostname": { "type": "string" }, @@ -4626,6 +4633,73 @@ "ncpus" ] }, + "InstanceDiskAttachment": { + "description": "Describe the instance's disks at creation time", + "oneOf": [ + { + "description": "During instance creation, create and attach disks", + "type": "object", + "properties": { + "description": { + "type": "string" + }, + "name": { + "$ref": "#/components/schemas/Name" + }, + "size": { + "description": "size of the Disk", + "allOf": [ + { + "$ref": "#/components/schemas/ByteCount" + } + ] + }, + "snapshot_id": { + "nullable": true, + "description": "id for snapshot from which the Disk should be created, if any", + "type": "string", + "format": "uuid" + }, + "type": { + "type": "string", + "enum": [ + "create" + ] + } + }, + "required": [ + "description", + "name", + "size", + "type" + ] + }, + { + "description": "During instance creation, attach this disk", + "type": "object", + "properties": { + "disk": { + "description": "A disk name to attach", + "allOf": [ + { + "$ref": "#/components/schemas/Name" + } + ] + }, + "type": { + "type": "string", + "enum": [ + "attach" + ] + } + }, + "required": [ + "disk", + "type" + ] + } + ] + }, "InstanceMigrate": { "description": "Migration parameters for an [`Instance`](omicron_common::api::external::Instance)", "type": "object", diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 2b5a3bc1b9..5d3fe21f20 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -199,6 +199,44 @@ "format": "uint64", "minimum": 0 }, + "CrucibleOpts": { + "type": "object", + "properties": { + "cert_pem": { + "nullable": true, + "type": "string" + }, + "control": { + "nullable": true, + "type": "string" + }, + "key": { + "nullable": true, + "type": "string" + }, + "key_pem": { + "nullable": true, + "type": "string" + }, + "lossy": { + "type": "boolean" + }, + "root_cert_pem": { + "nullable": true, + "type": "string" + }, + "target": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "lossy", + "target" + ] + }, "DatasetEnsureBody": { "description": "Used to request a new partition kind exists within a zpool.\n\nMany partition types are associated with services that will be instantiated when the partition is detected.", "type": "object", @@ -301,6 +339,39 @@ "target" ] }, + "DiskRequest": { + "type": "object", + "properties": { + "device": { + "type": "string" + }, + "gen": { + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "name": { + "type": "string" + }, + "read_only": { + "type": "boolean" + }, + "slot": { + "$ref": "#/components/schemas/Slot" + }, + "volume_construction_request": { + "$ref": "#/components/schemas/VolumeConstructionRequest" + } + }, + "required": [ + "device", + "gen", + "name", + "read_only", + "slot", + "volume_construction_request" + ] + }, "DiskRuntimeState": { "description": "Runtime state of the Disk, which includes its attach state and some minimal metadata", "type": "object", @@ -594,6 +665,12 @@ "description": "Describes the instance hardware.", "type": "object", "properties": { + "disks": { + "type": "array", + "items": { + "$ref": "#/components/schemas/DiskRequest" + } + }, "nics": { "type": "array", "items": { @@ -605,6 +682,7 @@ } }, "required": [ + "disks", "nics", "runtime" ] @@ -895,6 +973,12 @@ "name" ] }, + "Slot": { + "description": "A stable index which is translated by Propolis into a PCI BDF, visible to the guest.", + "type": "integer", + "format": "uint8", + "minimum": 0 + }, "UpdateArtifact": { "description": "Description of a single update artifact.", "type": "object", @@ -922,6 +1006,99 @@ "enum": [ "zone" ] + }, + "VolumeConstructionRequest": { + "oneOf": [ + { + "type": "object", + "properties": { + "block_size": { + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "read_only_parent": { + "nullable": true, + "allOf": [ + { + "$ref": "#/components/schemas/VolumeConstructionRequest" + } + ] + }, + "sub_volumes": { + "type": "array", + "items": { + "$ref": "#/components/schemas/VolumeConstructionRequest" + } + }, + "type": { + "type": "string", + "enum": [ + "Volume" + ] + } + }, + "required": [ + "block_size", + "sub_volumes", + "type" + ] + }, + { + "type": "object", + "properties": { + "block_size": { + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "type": { + "type": "string", + "enum": [ + "Url" + ] + }, + "url": { + "type": "string" + } + }, + "required": [ + "block_size", + "type", + "url" + ] + }, + { + "type": "object", + "properties": { + "block_size": { + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "gen": { + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "opts": { + "$ref": "#/components/schemas/CrucibleOpts" + }, + "type": { + "type": "string", + "enum": [ + "Region" + ] + } + }, + "required": [ + "block_size", + "gen", + "opts", + "type" + ] + } + ] } } } diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index 6e155cf282..ef1ce6abe4 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -12,7 +12,7 @@ bytes = "1.1" cfg-if = "1.0" chrono = { version = "0.4", features = [ "serde" ] } # Only used by the simulated sled agent. -crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "3e7e49eeb88fa8ad74375b0642aabd4224b1f2cb" } +crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "32e603bdc3b6e5eb6b880a2ddde7e05f043b5357" } dropshot = { git = "https://github.com/oxidecomputer/dropshot", branch = "main", features = [ "usdt-probes" ] } futures = "0.3.21" ipnetwork = "0.18" @@ -21,7 +21,7 @@ omicron-common = { path = "../common" } p256 = "0.9.0" percent-encoding = "2.1.0" progenitor = { git = "https://github.com/oxidecomputer/progenitor" } -propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "0e3798510ae190131f63b9df767ec01b2beacf91" } +propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "832a86afce308d3210685af987ace1ba74c2ecd6" } rand = { version = "0.8.5", features = ["getrandom"] } reqwest = { version = "0.11.8", default-features = false, features = ["rustls-tls", "stream"] } schemars = { version = "0.8", features = [ "chrono", "uuid" ] } diff --git a/sled-agent/src/bin/sled-agent-sim.rs b/sled-agent/src/bin/sled-agent-sim.rs index cc7b3ddf7f..61adec0ce5 100644 --- a/sled-agent/src/bin/sled-agent-sim.rs +++ b/sled-agent/src/bin/sled-agent-sim.rs @@ -68,7 +68,7 @@ async fn do_run() -> Result<(), CmdError> { nexus_address: args.nexus_addr, dropshot: ConfigDropshot { bind_address: args.sled_agent_addr, - request_body_max_bytes: 2048, + request_body_max_bytes: 1024 * 1024, ..Default::default() }, log: ConfigLogging::StderrTerminal { level: ConfigLoggingLevel::Info }, diff --git a/sled-agent/src/bin/sled-agent.rs b/sled-agent/src/bin/sled-agent.rs index 4edfe39f85..6c24ed541a 100644 --- a/sled-agent/src/bin/sled-agent.rs +++ b/sled-agent/src/bin/sled-agent.rs @@ -81,8 +81,10 @@ async fn do_run() -> Result<(), CmdError> { } }, Args::Run { config_path } => { - let config = SledConfig::from_file(&config_path) + let mut config = SledConfig::from_file(&config_path) .map_err(|e| CmdError::Failure(e.to_string()))?; + config.dropshot.request_body_max_bytes = 1024 * 1024; + let config = config; // - Sled agent starts with the normal config file - typically // called "config.toml". @@ -116,7 +118,7 @@ async fn do_run() -> Result<(), CmdError> { id: config.id, dropshot: ConfigDropshot { bind_address: config.bootstrap_address, - request_body_max_bytes: 2048, + request_body_max_bytes: 1024 * 1024, ..Default::default() }, log: ConfigLogging::StderrTerminal { diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index fe202861cf..904b7c80b8 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -22,6 +22,7 @@ use futures::lock::{Mutex, MutexGuard}; use omicron_common::api::external::NetworkInterface; use omicron_common::api::internal::nexus::InstanceRuntimeState; use omicron_common::backoff; +use propolis_client::api::DiskRequest; use propolis_client::Client as PropolisClient; use slog::Logger; use std::net::SocketAddr; @@ -62,6 +63,9 @@ pub enum Error { #[error(transparent)] RunningZone(#[from] crate::illumos::running_zone::Error), + + #[error("serde_json failure: {0}")] + SerdeJsonError(#[from] serde_json::Error), } // Issues read-only, idempotent HTTP requests at propolis until it responds with @@ -181,6 +185,9 @@ struct InstanceInner { requested_nics: Vec, vlan: Option, + // Disk related properties + requested_disks: Vec, + // Internal State management state: InstanceStates, running_state: Option, @@ -284,9 +291,9 @@ impl InstanceInner { let request = propolis_client::api::InstanceEnsureRequest { properties: self.properties.clone(), nics, - // TODO: Actual disks need to be wired up here. - disks: vec![], + disks: self.requested_disks.clone(), migrate, + cloud_init_bytes: None, }; info!(self.log, "Sending ensure request to propolis: {:?}", request); @@ -416,6 +423,7 @@ impl Instance { }, vnic_allocator, requested_nics: initial.nics, + requested_disks: initial.disks, vlan, state: InstanceStates::new(initial.runtime), running_state: None, @@ -678,6 +686,7 @@ mod test { time_updated: Utc::now(), }, nics: vec![], + disks: vec![], } } diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index d8b7e946c8..af250d2f93 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -236,6 +236,7 @@ mod test { time_updated: Utc::now(), }, nics: vec![], + disks: vec![], } } diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index c691d90eac..23c8f739a1 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -50,6 +50,7 @@ pub struct DiskEnsureBody { pub struct InstanceHardware { pub runtime: InstanceRuntimeState, pub nics: Vec, + pub disks: Vec, } /// Sent to a sled agent to establish the runtime state of an Instance diff --git a/sled-agent/src/sim/storage.rs b/sled-agent/src/sim/storage.rs index 7d9770a249..61cd59ec6a 100644 --- a/sled-agent/src/sim/storage.rs +++ b/sled-agent/src/sim/storage.rs @@ -52,7 +52,6 @@ impl CrucibleDataInner { let region = Region { id: params.id, - volume_id: params.volume_id, block_size: params.block_size, extent_size: params.extent_size, extent_count: params.extent_count, diff --git a/tools/oxapi_demo b/tools/oxapi_demo index e22ca9f221..0a3a4629a4 100755 --- a/tools/oxapi_demo +++ b/tools/oxapi_demo @@ -39,6 +39,7 @@ PROJECTS INSTANCES instance_create_demo ORGANIZATION_NAME PROJECT_NAME INSTANCE_NAME + instance_create_attach_disk_demo ORGANIZATION_NAME PROJECT_NAME INSTANCE_NAME DISK_NAME instance_get ORGANIZATION_NAME PROJECT_NAME INSTANCE_NAME instance_delete ORGANIZATION_NAME PROJECT_NAME INSTANCE_NAME instance_migrate ORGANIZATION_NAME PROJECT_NAME INSTANCE_NAME SLED_ID @@ -280,6 +281,22 @@ function cmd_instance_create_demo -X POST -T - } +function cmd_instance_create_attach_disk_demo +{ + [[ $# != 4 ]] && usage "expected ORGANIZATION_NAME PROJECT_NAME INSTANCE_NAME DISK_NAME" + do_curl_authn "/organizations/$1/projects/$2/instances" \ + -X POST -T - < Date: Mon, 21 Mar 2022 10:31:38 -0400 Subject: [PATCH 02/16] don't expose Generation as u64 --- common/src/api/external/mod.rs | 4 ---- nexus/src/nexus.rs | 3 ++- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index 68a57161de..f79e7a4b69 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -467,10 +467,6 @@ impl Generation { assert!(next_gen <= u64::try_from(i64::MAX).unwrap()); Generation(next_gen) } - - pub fn get(&self) -> u64 { - self.0 - } } impl Display for Generation { diff --git a/nexus/src/nexus.rs b/nexus/src/nexus.rs index 383f6898ff..e194e3b682 100644 --- a/nexus/src/nexus.rs +++ b/nexus/src/nexus.rs @@ -1244,13 +1244,14 @@ impl Nexus { let mut disk_reqs = vec![]; for (i, disk) in disks.iter().enumerate() { let volume = self.db_datastore.volume_get(disk.volume_id).await?; + let gen: i64 = (&disk.runtime_state.gen.0).into(); disk_reqs.push(sled_agent_client::types::DiskRequest { name: disk.name().to_string(), slot: sled_agent_client::types::Slot(i as u8), // TODO offer ability to attach read-only? read_only: false, device: "nvme".to_string(), - gen: disk.runtime_state.gen.0.get(), + gen: gen as u64, volume_construction_request: serde_json::from_str( &volume.data(), )?, From 04b12c8751a3bcc9501a42999681676e93a3f567 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Mon, 21 Mar 2022 10:42:16 -0400 Subject: [PATCH 03/16] &str --- nexus/src/db/model.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nexus/src/db/model.rs b/nexus/src/db/model.rs index 410380e157..948a315f80 100644 --- a/nexus/src/db/model.rs +++ b/nexus/src/db/model.rs @@ -914,7 +914,7 @@ impl Volume { } } - pub fn data(&self) -> &String { + pub fn data(&self) -> &str { &self.data } } From e6a7c8c850e3fedd70a443b291a1654589ecc011 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Mon, 21 Mar 2022 10:45:31 -0400 Subject: [PATCH 04/16] comments --- nexus/src/nexus.rs | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/nexus/src/nexus.rs b/nexus/src/nexus.rs index e194e3b682..5d0cbaba7d 100644 --- a/nexus/src/nexus.rs +++ b/nexus/src/nexus.rs @@ -1538,14 +1538,12 @@ impl Nexus { .state() { InstanceState::Running | InstanceState::Starting => { - /* - * If there's a propolis zone for this instnace, ask the Sled - * Agent to hot-plug or hot-remove disk. Then update the - * database to reflect the new intermediate state. - * - * TODO this will probably involve volume construction requests - * as well! - */ + // If there's a propolis zone for this instnace, ask the Sled + // Agent to hot-plug or hot-remove disk. Then update the + // database to reflect the new intermediate state. + // + // TODO this will probably involve volume construction requests + // as well! let new_runtime = sa .disk_put( &authz_disk.id(), From a2ab203e9a85ddea43d6899529887d84b4579242 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Mon, 21 Mar 2022 10:49:14 -0400 Subject: [PATCH 05/16] instance typo --- nexus/src/nexus.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nexus/src/nexus.rs b/nexus/src/nexus.rs index 5d0cbaba7d..6237f930b4 100644 --- a/nexus/src/nexus.rs +++ b/nexus/src/nexus.rs @@ -1538,7 +1538,7 @@ impl Nexus { .state() { InstanceState::Running | InstanceState::Starting => { - // If there's a propolis zone for this instnace, ask the Sled + // If there's a propolis zone for this instance, ask the Sled // Agent to hot-plug or hot-remove disk. Then update the // database to reflect the new intermediate state. // From 581bdeef1926b6e6af0c76fb32cb510f62a415f0 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Mon, 21 Mar 2022 11:22:43 -0400 Subject: [PATCH 06/16] hoist "If propolis zone" logic out of disk_set_runtime, eliminate todos --- nexus/src/nexus.rs | 149 +++++++++++++++++++++------------------------ 1 file changed, 71 insertions(+), 78 deletions(-) diff --git a/nexus/src/nexus.rs b/nexus/src/nexus.rs index 6237f930b4..82d0b37b9f 100644 --- a/nexus/src/nexus.rs +++ b/nexus/src/nexus.rs @@ -1428,17 +1428,36 @@ impl Nexus { } } - self.disk_set_runtime( - opctx, - &authz_disk, - &db_disk, - &db_instance, - self.instance_sled(&db_instance).await?, - sled_agent_client::types::DiskStateRequested::Attached( - *instance_id, - ), - ) - .await?; + match &db_instance.runtime_state.state.state() { + // If there's a propolis zone for this instance, ask the Sled Agent + // to hot-plug the disk. + // + // TODO this will probably involve volume construction requests as + // well! + InstanceState::Running | InstanceState::Starting => { + self.disk_set_runtime( + opctx, + &authz_disk, + &db_disk, + self.instance_sled(&db_instance).await?, + sled_agent_client::types::DiskStateRequested::Attached( + *instance_id, + ), + ) + .await?; + } + + _ => { + // If there is not a propolis zone, then disk attach only occurs + // in the DB. + let new_runtime = db_disk.runtime().attach(*instance_id); + + self.db_datastore + .disk_update_runtime(opctx, &authz_disk, &new_runtime) + .await?; + } + } + self.db_datastore.disk_refetch(opctx, &authz_disk).await } @@ -1503,15 +1522,31 @@ impl Nexus { DiskState::Attached(_) => (), } - self.disk_set_runtime( - opctx, - &authz_disk, - &db_disk, - &db_instance, - self.instance_sled(&db_instance).await?, - sled_agent_client::types::DiskStateRequested::Detached, - ) - .await?; + // If there's a propolis zone for this instance, ask the Sled + // Agent to hot-remove the disk. + match &db_instance.runtime_state.state.state() { + InstanceState::Running | InstanceState::Starting => { + self.disk_set_runtime( + opctx, + &authz_disk, + &db_disk, + self.instance_sled(&db_instance).await?, + sled_agent_client::types::DiskStateRequested::Detached, + ) + .await?; + } + + _ => { + // If there is not a propolis zone, then disk detach only occurs + // in the DB. + let new_runtime = db_disk.runtime().detach(); + + self.db_datastore + .disk_update_runtime(opctx, &authz_disk, &new_runtime) + .await?; + } + } + self.db_datastore.disk_refetch(opctx, &authz_disk).await } @@ -1522,72 +1557,30 @@ impl Nexus { opctx: &OpContext, authz_disk: &authz::Disk, db_disk: &db::model::Disk, - db_instance: &db::model::Instance, sa: Arc, requested: sled_agent_client::types::DiskStateRequested, ) -> Result<(), Error> { - use sled_agent_client::types::DiskStateRequested; - let runtime: DiskRuntimeState = db_disk.runtime().into(); opctx.authorize(authz::Action::Modify, authz_disk).await?; - let new_runtime: DiskRuntimeState = match &db_instance - .runtime_state - .state - .state() - { - InstanceState::Running | InstanceState::Starting => { - // If there's a propolis zone for this instance, ask the Sled - // Agent to hot-plug or hot-remove disk. Then update the - // database to reflect the new intermediate state. - // - // TODO this will probably involve volume construction requests - // as well! - let new_runtime = sa - .disk_put( - &authz_disk.id(), - &sled_agent_client::types::DiskEnsureBody { - initial_runtime: - sled_agent_client::types::DiskRuntimeState::from( - runtime, - ), - target: requested, - }, - ) - .await - .map_err(Error::from)?; - - new_runtime.into_inner().into() - } - - InstanceState::Creating => { - // If we're still creating this instance, then the disks will be - // attached as part of the instance ensure by specifying volume - // construction requests. - match requested { - DiskStateRequested::Detached => { - db_disk.runtime().detach().into() - } - DiskStateRequested::Attached(id) => { - db_disk.runtime().attach(id).into() - } - DiskStateRequested::Destroyed => { - todo!() - } - DiskStateRequested::Faulted => { - todo!() - } - } - } + // Ask the Sled Agent to begin the state change. Then update the + // database to reflect the new intermediate state. + let new_runtime = sa + .disk_put( + &authz_disk.id(), + &sled_agent_client::types::DiskEnsureBody { + initial_runtime: + sled_agent_client::types::DiskRuntimeState::from( + runtime, + ), + target: requested, + }, + ) + .await + .map_err(Error::from)?; - _ => { - todo!( - "implement state {:?}", - db_instance.runtime_state.state.state() - ); - } - }; + let new_runtime: DiskRuntimeState = new_runtime.into_inner().into(); self.db_datastore .disk_update_runtime(opctx, authz_disk, &new_runtime.into()) From e8981d529e9f3718173e84a141060e18fa24b8b1 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Mon, 21 Mar 2022 11:37:24 -0400 Subject: [PATCH 07/16] mock up saga step for creating disks during instance create --- nexus/src/sagas.rs | 43 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/nexus/src/sagas.rs b/nexus/src/sagas.rs index 1191b6e466..64e5fab77a 100644 --- a/nexus/src/sagas.rs +++ b/nexus/src/sagas.rs @@ -194,6 +194,15 @@ pub fn saga_instance_create() -> SagaTemplate { new_action_noop_undo(sic_create_network_interfaces), ); + template_builder.append( + "create_disks", + "CreateDisksForInstance", + ActionFunc::new_action( + sic_create_disks_for_instance, + sic_create_disks_for_instance_undo, + ), + ); + template_builder.append( "attach_disks", "AttachDisksToInstance", @@ -488,6 +497,38 @@ async fn sic_create_network_interfaces_undo( Ok(()) } +/// Create disks during instance creation, and return a list of disk names +// TODO implement +async fn sic_create_disks_for_instance( + sagactx: ActionContext, +) -> Result, ActionError> { + let saga_params = sagactx.saga_params(); + let saga_disks = &saga_params.create_params.disks; + + for disk in saga_disks { + match disk { + params::InstanceDiskAttachment::Create(_create_params) => { + return Err(ActionError::action_failed( + "Creating disk during instance create unsupported!" + .to_string(), + )); + } + + _ => {} + } + } + + Ok(vec![]) +} + +/// Undo disks created during instance creation +// TODO implement +async fn sic_create_disks_for_instance_undo( + _sagactx: ActionContext, +) -> Result<(), anyhow::Error> { + Ok(()) +} + async fn sic_attach_disks_to_instance( sagactx: ActionContext, ) -> Result<(), ActionError> { @@ -519,7 +560,7 @@ async fn ensure_instance_disk_attach_state( for disk in saga_disks { match disk { params::InstanceDiskAttachment::Create(_) => { - todo!(); + // TODO grab disks created in sic_create_disks_for_instance } params::InstanceDiskAttachment::Attach(instance_disk_attach) => { let disk_name: db::model::Name = From ea23469443d7806966f049ebb8680136fbe3b8e9 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Mon, 21 Mar 2022 11:50:35 -0400 Subject: [PATCH 08/16] update crucible rev --- nexus/Cargo.toml | 2 +- sled-agent/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/nexus/Cargo.toml b/nexus/Cargo.toml index 7bdf38cb2d..787832b36f 100644 --- a/nexus/Cargo.toml +++ b/nexus/Cargo.toml @@ -14,7 +14,7 @@ async-trait = "0.1.51" base64 = "0.13.0" bb8 = "0.7.1" cookie = "0.16" -crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "32e603bdc3b6e5eb6b880a2ddde7e05f043b5357" } +crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "945daedb88cefa790f1d994b3a038b8fa9ac514a" } # Tracking pending 2.0 version. diesel = { git = "https://github.com/diesel-rs/diesel", rev = "ce77c382", features = ["postgres", "r2d2", "chrono", "serde_json", "network-address", "uuid"] } futures = "0.3.21" diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index ef1ce6abe4..5ba603a32d 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -12,7 +12,7 @@ bytes = "1.1" cfg-if = "1.0" chrono = { version = "0.4", features = [ "serde" ] } # Only used by the simulated sled agent. -crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "32e603bdc3b6e5eb6b880a2ddde7e05f043b5357" } +crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "945daedb88cefa790f1d994b3a038b8fa9ac514a" } dropshot = { git = "https://github.com/oxidecomputer/dropshot", branch = "main", features = [ "usdt-probes" ] } futures = "0.3.21" ipnetwork = "0.18" From 44a3f3ffda5c9c047f05ad91b07258ebbf589e07 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Wed, 23 Mar 2022 10:13:56 -0400 Subject: [PATCH 09/16] use internal_error for block size mismatch --- Cargo.lock | 2 +- nexus/src/sagas.rs | 9 +++------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b7cd2d86a8..68e6531f36 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -609,7 +609,7 @@ dependencies = [ [[package]] name = "crucible-agent-client" version = "0.0.1" -source = "git+https://github.com/oxidecomputer/crucible?rev=32e603bdc3b6e5eb6b880a2ddde7e05f043b5357#32e603bdc3b6e5eb6b880a2ddde7e05f043b5357" +source = "git+https://github.com/oxidecomputer/crucible?rev=945daedb88cefa790f1d994b3a038b8fa9ac514a#945daedb88cefa790f1d994b3a038b8fa9ac514a" dependencies = [ "anyhow", "chrono", diff --git a/nexus/src/sagas.rs b/nexus/src/sagas.rs index 64e5fab77a..46937e0f1b 100644 --- a/nexus/src/sagas.rs +++ b/nexus/src/sagas.rs @@ -1086,12 +1086,9 @@ async fn sdc_regions_ensure( .all(|w| w[0].1.block_size == w[1].1.block_size); if !all_region_have_same_block_size { - return Err(ActionError::new_subsaga( - // XXX wrong error type? - anyhow::anyhow!( - "volume creation will fail due to block size mismatch" - ), - )); + return Err(ActionError::action_failed(Error::internal_error( + "volume creation will fail due to block size mismatch", + ))); } let block_size = datasets_and_regions[0].1.block_size; From db559795be95f1983b10eb7a4587d9022b06dffb Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Wed, 23 Mar 2022 10:15:37 -0400 Subject: [PATCH 10/16] remove some XXX --- nexus/src/sagas.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/nexus/src/sagas.rs b/nexus/src/sagas.rs index 46937e0f1b..010dc3c289 100644 --- a/nexus/src/sagas.rs +++ b/nexus/src/sagas.rs @@ -1099,7 +1099,6 @@ async fn sdc_regions_ensure( sled_agent_client::types::VolumeConstructionRequest::Volume { block_size, sub_volumes: vec![ - // XXX allocation algorithm only supports one sub vol? sled_agent_client::types::VolumeConstructionRequest::Region { block_size, // gen of 0 is here, these regions were just allocated. @@ -1118,7 +1117,7 @@ async fn sdc_regions_ensure( // all downstairs will expect encrypted blocks key: Some(base64::encode({ - // XXX the current encryption key + // TODO the current encryption key // requirement is 32 bytes, what if that // changes? let mut random_bytes: [u8; 32] = [0; 32]; From 8c8324dbe97deb53b7bbd79a140aa46208df0986 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Wed, 23 Mar 2022 10:21:52 -0400 Subject: [PATCH 11/16] internal_error for serialization error --- nexus/src/sagas.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/nexus/src/sagas.rs b/nexus/src/sagas.rs index 010dc3c289..3143298272 100644 --- a/nexus/src/sagas.rs +++ b/nexus/src/sagas.rs @@ -1141,11 +1141,8 @@ async fn sdc_regions_ensure( }; let volume_data = serde_json::to_string(&volume_construction_request) - .map_err(|_| { - ActionError::new_subsaga( - // XXX wrong error type? - anyhow::anyhow!("serde_json::to_string"), - ) + .map_err(|e| { + ActionError::action_failed(Error::internal_error(&e.to_string())) })?; Ok(volume_data) From ad3390b21e4187028c7ab5220304b8fde4f93613 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Thu, 24 Mar 2022 13:30:31 -0400 Subject: [PATCH 12/16] Fix-up instance creation saga so it actually works I had an incorrect mental model of how distributed sagas work and this caused problems - disks would not be detached when instance creation failed. I tried to correct the code in a slightly different way than was done for NICs. Instead of pre-allocating IDs and having a previous saga node delete based on these IDs, I chose to split the attach disks to instance node into 8, each with its own undo. This is now tested by trying to attach 8 disks, one of which is marked as faulted. The whole saga should unwind and the test checks for semantic equivalence. Added some tests for the new arbitrary 8 disk limit. This required bumping the request max body size because of the larger instance ensure bodies. --- nexus/src/db/model.rs | 9 + nexus/src/lib.rs | 1 + nexus/src/nexus.rs | 47 +++ nexus/src/sagas.rs | 161 ++++++---- nexus/test-utils/src/lib.rs | 2 +- nexus/tests/config.test.toml | 4 +- nexus/tests/integration_tests/instances.rs | 338 +++++++++++++++++++++ 7 files changed, 497 insertions(+), 65 deletions(-) diff --git a/nexus/src/db/model.rs b/nexus/src/db/model.rs index 948a315f80..d31f33b8d8 100644 --- a/nexus/src/db/model.rs +++ b/nexus/src/db/model.rs @@ -1366,6 +1366,15 @@ impl DiskRuntimeState { .unwrap(), ) } + + pub fn faulted(self) -> Self { + Self { + disk_state: external::DiskState::Faulted.label().to_string(), + attach_instance_id: None, + gen: self.gen.next().into(), + time_updated: Utc::now(), + } + } } /// Conversion from the internal API type. diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index bf24258464..bf7d2183c2 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -1,6 +1,7 @@ // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +#![feature(async_closure)] //! Library interface to the Nexus, the heart of the control plane diff --git a/nexus/src/nexus.rs b/nexus/src/nexus.rs index 82d0b37b9f..6d93e1f96f 100644 --- a/nexus/src/nexus.rs +++ b/nexus/src/nexus.rs @@ -105,6 +105,8 @@ pub trait TestInterfaces { &self, session: db::model::ConsoleSession, ) -> CreateResult; + + async fn set_disk_as_faulted(&self, disk_id: &Uuid) -> Result; } pub static BASE_ARTIFACT_DIR: &str = "/var/tmp/oxide_artifacts"; @@ -856,6 +858,13 @@ impl Nexus { opctx.authorize(authz::Action::CreateChild, &authz_project).await?; + // Validate parameters + if params.disks.len() > 8 { + return Err(Error::invalid_request( + "cannot attach more than 8 disks to instance!", + )); + } + let saga_params = Arc::new(sagas::ParamsInstanceCreate { serialized_authn: authn::saga::Serialized::for_opctx(opctx), organization_name: organization_name.clone().into(), @@ -1359,6 +1368,27 @@ impl Nexus { .await?; let instance_id = &authz_instance.id(); + // Enforce attached disks limit + let attached_disks = self + .instance_list_disks( + opctx, + organization_name, + project_name, + instance_name, + &DataPageParams { + marker: None, + direction: dropshot::PaginationOrder::Ascending, + limit: std::num::NonZeroU32::new(8).unwrap(), + }, + ) + .await?; + + if attached_disks.len() == 8 { + return Err(Error::invalid_request( + "cannot attach more than 8 disks to instance!", + )); + } + fn disk_attachment_error( disk: &db::model::Disk, ) -> CreateResult { @@ -3164,4 +3194,21 @@ impl TestInterfaces for Nexus { ) -> CreateResult { Ok(self.db_datastore.session_create(session).await?) } + + async fn set_disk_as_faulted(&self, disk_id: &Uuid) -> Result { + let opctx = OpContext::for_tests( + self.log.new(o!()), + Arc::clone(&self.db_datastore), + ); + + let authz_disk = self.db_datastore.disk_lookup_by_id(*disk_id).await?; + let db_disk = + self.db_datastore.disk_refetch(&opctx, &authz_disk).await?; + + let new_runtime = db_disk.runtime_state.faulted(); + + self.db_datastore + .disk_update_runtime(&opctx, &authz_disk, &new_runtime) + .await + } } diff --git a/nexus/src/sagas.rs b/nexus/src/sagas.rs index 3143298272..0c74ac8af6 100644 --- a/nexus/src/sagas.rs +++ b/nexus/src/sagas.rs @@ -194,23 +194,48 @@ pub fn saga_instance_create() -> SagaTemplate { new_action_noop_undo(sic_create_network_interfaces), ); - template_builder.append( - "create_disks", - "CreateDisksForInstance", - ActionFunc::new_action( - sic_create_disks_for_instance, - sic_create_disks_for_instance_undo, - ), - ); + // Saga actions must be atomic - they have to fully complete or fully abort. + // This is because Steno assumes that the saga actions are atomic and + // therefore undo actions are *not* run for the failing node. + // + // For this reason, each disk is created and attached with a separate saga + // node. If a saga node had a loop to attach or detach all disks, and one + // failed, any disks that were attached would not be detached because the + // corresponding undo action would not be run. Separate each disk create and + // attach to their own saga node and ensure that each function behaves + // atomically. + // + // Currently, instances can have a maximum of 8 disks attached. Create two + // saga nodes for each disk that will unconditionally run but contain + // conditional logic depending on if that disk index is going to be used. + // Steno does not currently support the saga node graph changing shape. + for i in 0..8 { + template_builder.append( + &format!("create_disks{}", i), + "CreateDisksForInstance", + ActionFunc::new_action( + async move |sagactx| { + sic_create_disks_for_instance(sagactx, i).await + }, + async move |sagactx| { + sic_create_disks_for_instance_undo(sagactx, i).await + }, + ), + ); - template_builder.append( - "attach_disks", - "AttachDisksToInstance", - ActionFunc::new_action( - sic_attach_disks_to_instance, - sic_attach_disks_to_instance_undo, - ), - ); + template_builder.append( + &format!("attach_disks{}", i), + "AttachDisksToInstance", + ActionFunc::new_action( + async move |sagactx| { + sic_attach_disks_to_instance(sagactx, i).await + }, + async move |sagactx| { + sic_attach_disks_to_instance_undo(sagactx, i).await + }, + ), + ); + } template_builder.append( "instance_ensure", @@ -501,48 +526,56 @@ async fn sic_create_network_interfaces_undo( // TODO implement async fn sic_create_disks_for_instance( sagactx: ActionContext, -) -> Result, ActionError> { + disk_index: usize, +) -> Result, ActionError> { let saga_params = sagactx.saga_params(); let saga_disks = &saga_params.create_params.disks; - for disk in saga_disks { - match disk { - params::InstanceDiskAttachment::Create(_create_params) => { - return Err(ActionError::action_failed( - "Creating disk during instance create unsupported!" - .to_string(), - )); - } + if disk_index >= saga_disks.len() { + return Ok(None); + } - _ => {} + let disk = &saga_disks[disk_index]; + + match disk { + params::InstanceDiskAttachment::Create(_create_params) => { + return Err(ActionError::action_failed( + "Creating disk during instance create unsupported!".to_string(), + )); } + + _ => {} } - Ok(vec![]) + Ok(None) } /// Undo disks created during instance creation // TODO implement async fn sic_create_disks_for_instance_undo( _sagactx: ActionContext, + _disk_index: usize, ) -> Result<(), anyhow::Error> { Ok(()) } async fn sic_attach_disks_to_instance( sagactx: ActionContext, + disk_index: usize, ) -> Result<(), ActionError> { - ensure_instance_disk_attach_state(sagactx, true).await + ensure_instance_disk_attach_state(sagactx, disk_index, true).await } async fn sic_attach_disks_to_instance_undo( sagactx: ActionContext, + disk_index: usize, ) -> Result<(), anyhow::Error> { - Ok(ensure_instance_disk_attach_state(sagactx, false).await?) + Ok(ensure_instance_disk_attach_state(sagactx, disk_index, false).await?) } async fn ensure_instance_disk_attach_state( sagactx: ActionContext, + disk_index: usize, attached: bool, ) -> Result<(), ActionError> { let osagactx = sagactx.user_data(); @@ -553,44 +586,48 @@ async fn ensure_instance_disk_attach_state( let saga_disks = &saga_params.create_params.disks; let instance_name = sagactx.lookup::("instance_name")?; + if disk_index >= saga_disks.len() { + return Ok(()); + } + + let disk = &saga_disks[disk_index]; + let organization_name: db::model::Name = saga_params.organization_name.clone().into(); let project_name: db::model::Name = saga_params.project_name.clone().into(); - for disk in saga_disks { - match disk { - params::InstanceDiskAttachment::Create(_) => { - // TODO grab disks created in sic_create_disks_for_instance - } - params::InstanceDiskAttachment::Attach(instance_disk_attach) => { - let disk_name: db::model::Name = - instance_disk_attach.disk.clone().into(); - - if attached { - osagactx - .nexus() - .instance_attach_disk( - &opctx, - &organization_name, - &project_name, - &instance_name, - &disk_name, - ) - .await - } else { - osagactx - .nexus() - .instance_detach_disk( - &opctx, - &organization_name, - &project_name, - &instance_name, - &disk_name, - ) - .await - } - .map_err(ActionError::action_failed)?; + match disk { + params::InstanceDiskAttachment::Create(_) => { + // TODO grab disks created in sic_create_disks_for_instance + } + params::InstanceDiskAttachment::Attach(instance_disk_attach) => { + let disk_name: db::model::Name = + instance_disk_attach.disk.clone().into(); + + if attached { + osagactx + .nexus() + .instance_attach_disk( + &opctx, + &organization_name, + &project_name, + &instance_name, + &disk_name, + ) + .await + } else { + osagactx + .nexus() + .instance_detach_disk( + &opctx, + &organization_name, + &project_name, + &instance_name, + &disk_name, + ) + .await } + .map_err(ActionError::action_failed)?; } } diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 090f98ba41..be5e77c0c1 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -177,7 +177,7 @@ pub async fn start_sled_agent( nexus_address, dropshot: ConfigDropshot { bind_address: SocketAddr::new("127.0.0.1".parse().unwrap(), 0), - request_body_max_bytes: 2048, + request_body_max_bytes: 1024 * 1024, ..Default::default() }, // TODO-cleanup this is unused diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index 4ded6ef38a..5ae440e662 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -31,12 +31,12 @@ url = "postgresql://root@127.0.0.1:0/omicron?sslmode=disable" # [dropshot_external] bind_address = "127.0.0.1:0" -request_body_max_bytes = 2048 +request_body_max_bytes = 1048576 # port must be 0. see above [dropshot_internal] bind_address = "127.0.0.1:0" -request_body_max_bytes = 2048 +request_body_max_bytes = 1048576 # # NOTE: for the test suite, if mode = "file", the file path MUST be the sentinel diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs index 3e9ad1f32c..4293a853f6 100644 --- a/nexus/tests/integration_tests/instances.rs +++ b/nexus/tests/integration_tests/instances.rs @@ -1064,6 +1064,344 @@ async fn test_attach_one_disk_to_instance(cptestctx: &ControlPlaneTestContext) { assert_eq!(disks[0].state, DiskState::Attached(instance.identity.id)); } +// Test that 8 disks is supported +#[nexus_test] +async fn test_attach_eight_disks_to_instance( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + + const ORGANIZATION_NAME: &str = "bobs-barrel-of-bytes"; + const PROJECT_NAME: &str = "bit-barrel"; + + // Test pre-reqs + DiskTest::new(&cptestctx).await; + create_organization(&client, ORGANIZATION_NAME).await; + create_project(client, ORGANIZATION_NAME, PROJECT_NAME).await; + + // Make 8 disks + for i in 0..8 { + create_disk( + &client, + ORGANIZATION_NAME, + PROJECT_NAME, + &format!("probablydata{}", i,), + ) + .await; + } + + // Assert we created 8 disks + let url_project_disks = format!( + "/organizations/{}/projects/{}/disks", + ORGANIZATION_NAME, PROJECT_NAME, + ); + let disks: Vec = NexusRequest::iter_collection_authn( + client, + &url_project_disks, + "", + None, + ) + .await + .expect("failed to list disks") + .all_items; + assert_eq!(disks.len(), 8); + + // Try to boot an instance that has 8 disks attached + let instance_params = params::InstanceCreate { + identity: IdentityMetadataCreateParams { + name: Name::try_from(String::from("nfs")).unwrap(), + description: String::from("probably serving data"), + }, + ncpus: InstanceCpuCount::try_from(2).unwrap(), + memory: ByteCount::from_mebibytes_u32(4), + hostname: String::from("nfs"), + network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, + disks: (0..8) + .map(|i| { + params::InstanceDiskAttachment::Attach( + params::InstanceDiskAttach { + disk: Name::try_from( + format!("probablydata{}", i).to_string(), + ) + .unwrap(), + }, + ) + }) + .collect(), + }; + + let url_instances = format!( + "/organizations/{}/projects/{}/instances", + ORGANIZATION_NAME, PROJECT_NAME + ); + + let builder = + RequestBuilder::new(client, http::Method::POST, &url_instances) + .body(Some(&instance_params)) + .expect_status(Some(http::StatusCode::CREATED)); + + let response = NexusRequest::new(builder) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("Expected instance creation!"); + + let instance = response.parsed_body::().unwrap(); + + // Assert disks are attached + let url_project_disks = format!( + "/organizations/{}/projects/{}/disks", + ORGANIZATION_NAME, PROJECT_NAME, + ); + let disks: Vec = NexusRequest::iter_collection_authn( + client, + &url_project_disks, + "", + None, + ) + .await + .expect("failed to list disks") + .all_items; + assert_eq!(disks.len(), 8); + + for disk in disks { + assert_eq!(disk.state, DiskState::Attached(instance.identity.id)); + } +} + +// Test that disk attach limit is enforced +#[nexus_test] +async fn test_cannot_attach_nine_disks_to_instance( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + + const ORGANIZATION_NAME: &str = "bobs-barrel-of-bytes"; + const PROJECT_NAME: &str = "bit-barrel"; + + // Test pre-reqs + DiskTest::new(&cptestctx).await; + create_organization(&client, ORGANIZATION_NAME).await; + create_project(client, ORGANIZATION_NAME, PROJECT_NAME).await; + + // Make 9 disks + for i in 0..9 { + create_disk( + &client, + ORGANIZATION_NAME, + PROJECT_NAME, + &format!("probablydata{}", i,), + ) + .await; + } + + // Assert we created 9 disks + let url_project_disks = format!( + "/organizations/{}/projects/{}/disks", + ORGANIZATION_NAME, PROJECT_NAME, + ); + let disks: Vec = NexusRequest::iter_collection_authn( + client, + &url_project_disks, + "", + None, + ) + .await + .expect("failed to list disks") + .all_items; + assert_eq!(disks.len(), 9); + + // Try to boot an instance that has 9 disks attached + let instance_params = params::InstanceCreate { + identity: IdentityMetadataCreateParams { + name: Name::try_from(String::from("nfs")).unwrap(), + description: String::from("probably serving data"), + }, + ncpus: InstanceCpuCount::try_from(2).unwrap(), + memory: ByteCount::from_mebibytes_u32(4), + hostname: String::from("nfs"), + network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, + disks: (0..9) + .map(|i| { + params::InstanceDiskAttachment::Attach( + params::InstanceDiskAttach { + disk: Name::try_from( + format!("probablydata{}", i).to_string(), + ) + .unwrap(), + }, + ) + }) + .collect(), + }; + + let url_instances = format!( + "/organizations/{}/projects/{}/instances", + ORGANIZATION_NAME, PROJECT_NAME + ); + + let builder = + RequestBuilder::new(client, http::Method::POST, &url_instances) + .body(Some(&instance_params)) + .expect_status(Some(http::StatusCode::BAD_REQUEST)); + + let _response = NexusRequest::new(builder) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("Expected instance creation to fail with bad request!"); + + // Check that disks are still detached + let url_project_disks = format!( + "/organizations/{}/projects/{}/disks", + ORGANIZATION_NAME, PROJECT_NAME, + ); + let disks: Vec = NexusRequest::iter_collection_authn( + client, + &url_project_disks, + "", + None, + ) + .await + .expect("failed to list disks") + .all_items; + assert_eq!(disks.len(), 9); + + for disk in disks { + assert_eq!(disk.state, DiskState::Detached); + } +} + +// Test that faulted disks cannot be attached +#[nexus_test] +async fn test_cannot_attach_faulted_disks(cptestctx: &ControlPlaneTestContext) { + let client = &cptestctx.external_client; + + const ORGANIZATION_NAME: &str = "bobs-barrel-of-bytes"; + const PROJECT_NAME: &str = "bit-barrel"; + + // Test pre-reqs + DiskTest::new(&cptestctx).await; + create_organization(&client, ORGANIZATION_NAME).await; + create_project(client, ORGANIZATION_NAME, PROJECT_NAME).await; + + // Make 8 disks + for i in 0..8 { + create_disk( + &client, + ORGANIZATION_NAME, + PROJECT_NAME, + &format!("probablydata{}", i,), + ) + .await; + } + + // Assert we created 8 disks + let url_project_disks = format!( + "/organizations/{}/projects/{}/disks", + ORGANIZATION_NAME, PROJECT_NAME, + ); + let disks: Vec = NexusRequest::iter_collection_authn( + client, + &url_project_disks, + "", + None, + ) + .await + .expect("failed to list disks") + .all_items; + assert_eq!(disks.len(), 8); + + // Set the 7th to FAULTED + let apictx = &cptestctx.server.apictx; + let nexus = &apictx.nexus; + assert!(nexus.set_disk_as_faulted(&disks[6].identity.id).await.unwrap()); + + // Assert FAULTED + let disks: Vec = NexusRequest::iter_collection_authn( + client, + &url_project_disks, + "", + None, + ) + .await + .expect("failed to list disks") + .all_items; + assert_eq!(disks.len(), 8); + + for (i, disk) in disks.iter().enumerate() { + if i == 6 { + assert_eq!(disk.state, DiskState::Faulted); + } else { + assert_eq!(disk.state, DiskState::Detached); + } + } + + // Try to boot the instance + let instance_params = params::InstanceCreate { + identity: IdentityMetadataCreateParams { + name: Name::try_from(String::from("nfs")).unwrap(), + description: String::from("probably serving data"), + }, + ncpus: InstanceCpuCount::try_from(2).unwrap(), + memory: ByteCount::from_mebibytes_u32(4), + hostname: String::from("nfs"), + network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, + disks: (0..8) + .map(|i| { + params::InstanceDiskAttachment::Attach( + params::InstanceDiskAttach { + disk: Name::try_from( + format!("probablydata{}", i).to_string(), + ) + .unwrap(), + }, + ) + }) + .collect(), + }; + + let url_instances = format!( + "/organizations/{}/projects/{}/instances", + ORGANIZATION_NAME, PROJECT_NAME + ); + + let builder = + RequestBuilder::new(client, http::Method::POST, &url_instances) + .body(Some(&instance_params)) + .expect_status(Some(http::StatusCode::BAD_REQUEST)); + + let _response = NexusRequest::new(builder) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("Expected instance creation to fail!"); + + // Assert disks are detached (except for the 7th) + let url_project_disks = format!( + "/organizations/{}/projects/{}/disks", + ORGANIZATION_NAME, PROJECT_NAME, + ); + let disks: Vec = NexusRequest::iter_collection_authn( + client, + &url_project_disks, + "", + None, + ) + .await + .expect("failed to list disks") + .all_items; + assert_eq!(disks.len(), 8); + + for (i, disk) in disks.iter().enumerate() { + if i == 6 { + assert_eq!(disk.state, DiskState::Faulted); + } else { + assert_eq!(disk.state, DiskState::Detached); + } + } +} + async fn instance_get( client: &ClientTestContext, instance_url: &str, From 98cf0e6dd529545b82a70d8f76975369e5f31a03 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Thu, 24 Mar 2022 13:56:19 -0400 Subject: [PATCH 13/16] no more magic constant, use MAX_DISKS_PER_INSTANCE --- nexus/src/nexus.rs | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/nexus/src/nexus.rs b/nexus/src/nexus.rs index 6d93e1f96f..530edbd1fb 100644 --- a/nexus/src/nexus.rs +++ b/nexus/src/nexus.rs @@ -111,6 +111,8 @@ pub trait TestInterfaces { pub static BASE_ARTIFACT_DIR: &str = "/var/tmp/oxide_artifacts"; +const MAX_DISKS_PER_INSTANCE: u32 = 8; + /// Manages an Oxide fleet -- the heart of the control plane pub struct Nexus { /// uuid for this nexus instance. @@ -859,10 +861,11 @@ impl Nexus { opctx.authorize(authz::Action::CreateChild, &authz_project).await?; // Validate parameters - if params.disks.len() > 8 { - return Err(Error::invalid_request( - "cannot attach more than 8 disks to instance!", - )); + if params.disks.len() > MAX_DISKS_PER_INSTANCE as usize { + return Err(Error::invalid_request(&format!( + "cannot attach more than {} disks to instance!", + MAX_DISKS_PER_INSTANCE + ))); } let saga_params = Arc::new(sagas::ParamsInstanceCreate { @@ -1243,9 +1246,8 @@ impl Nexus { &DataPageParams { marker: None, direction: dropshot::PaginationOrder::Ascending, - // TODO: is there a limit to the number of disks an instance - // can have attached? - limit: std::num::NonZeroU32::new(8).unwrap(), + limit: std::num::NonZeroU32::new(MAX_DISKS_PER_INSTANCE) + .unwrap(), }, ) .await?; @@ -1257,7 +1259,6 @@ impl Nexus { disk_reqs.push(sled_agent_client::types::DiskRequest { name: disk.name().to_string(), slot: sled_agent_client::types::Slot(i as u8), - // TODO offer ability to attach read-only? read_only: false, device: "nvme".to_string(), gen: gen as u64, @@ -1378,15 +1379,17 @@ impl Nexus { &DataPageParams { marker: None, direction: dropshot::PaginationOrder::Ascending, - limit: std::num::NonZeroU32::new(8).unwrap(), + limit: std::num::NonZeroU32::new(MAX_DISKS_PER_INSTANCE) + .unwrap(), }, ) .await?; - if attached_disks.len() == 8 { - return Err(Error::invalid_request( - "cannot attach more than 8 disks to instance!", - )); + if attached_disks.len() == MAX_DISKS_PER_INSTANCE as usize { + return Err(Error::invalid_request(&format!( + "cannot attach more than {} disks to instance!", + MAX_DISKS_PER_INSTANCE + ))); } fn disk_attachment_error( From 859d4a3304f83473a056ec8b0a80cb1cf4d57f49 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Thu, 24 Mar 2022 22:09:26 -0400 Subject: [PATCH 14/16] missed magic constant --- nexus/src/nexus.rs | 2 +- nexus/src/sagas.rs | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/nexus/src/nexus.rs b/nexus/src/nexus.rs index 530edbd1fb..c86934255b 100644 --- a/nexus/src/nexus.rs +++ b/nexus/src/nexus.rs @@ -111,7 +111,7 @@ pub trait TestInterfaces { pub static BASE_ARTIFACT_DIR: &str = "/var/tmp/oxide_artifacts"; -const MAX_DISKS_PER_INSTANCE: u32 = 8; +pub(crate) const MAX_DISKS_PER_INSTANCE: u32 = 8; /// Manages an Oxide fleet -- the heart of the control plane pub struct Nexus { diff --git a/nexus/src/sagas.rs b/nexus/src/sagas.rs index 0c74ac8af6..a8c11718c3 100644 --- a/nexus/src/sagas.rs +++ b/nexus/src/sagas.rs @@ -209,16 +209,17 @@ pub fn saga_instance_create() -> SagaTemplate { // saga nodes for each disk that will unconditionally run but contain // conditional logic depending on if that disk index is going to be used. // Steno does not currently support the saga node graph changing shape. - for i in 0..8 { + for i in 0..crate::nexus::MAX_DISKS_PER_INSTANCE { template_builder.append( &format!("create_disks{}", i), "CreateDisksForInstance", ActionFunc::new_action( async move |sagactx| { - sic_create_disks_for_instance(sagactx, i).await + sic_create_disks_for_instance(sagactx, i as usize).await }, async move |sagactx| { - sic_create_disks_for_instance_undo(sagactx, i).await + sic_create_disks_for_instance_undo(sagactx, i as usize) + .await }, ), ); @@ -228,10 +229,10 @@ pub fn saga_instance_create() -> SagaTemplate { "AttachDisksToInstance", ActionFunc::new_action( async move |sagactx| { - sic_attach_disks_to_instance(sagactx, i).await + sic_attach_disks_to_instance(sagactx, i as usize).await }, async move |sagactx| { - sic_attach_disks_to_instance_undo(sagactx, i).await + sic_attach_disks_to_instance_undo(sagactx, i as usize).await }, ), ); From 959d49d92c885ddd94d739d77731b538dc40dc0b Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Thu, 24 Mar 2022 22:12:28 -0400 Subject: [PATCH 15/16] return unsupported error --- nexus/src/sagas.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/nexus/src/sagas.rs b/nexus/src/sagas.rs index a8c11718c3..a688e81d3c 100644 --- a/nexus/src/sagas.rs +++ b/nexus/src/sagas.rs @@ -600,6 +600,9 @@ async fn ensure_instance_disk_attach_state( match disk { params::InstanceDiskAttachment::Create(_) => { // TODO grab disks created in sic_create_disks_for_instance + return Err(ActionError::action_failed(Error::invalid_request( + "creating disks while creating an instance not supported", + ))); } params::InstanceDiskAttachment::Attach(instance_disk_attach) => { let disk_name: db::model::Name = From 003d9f0fe2bc917dab5eb3efb9929350fe98252a Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Fri, 25 Mar 2022 16:05:02 -0400 Subject: [PATCH 16/16] update propolis-client so that builds work, also fix code after merge --- Cargo.lock | 70 +++++++++------------- nexus/src/sagas.rs | 26 +++++--- nexus/tests/integration_tests/instances.rs | 2 + sled-agent/Cargo.toml | 2 +- 4 files changed, 48 insertions(+), 52 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 80e068c250..6bfa1b9a07 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -14,7 +14,7 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b613b8e1e3cf911a086f53f03bf286f52fd7a7258e4fa606f0ef220d39d8877" dependencies = [ - "generic-array 0.14.4", + "generic-array 0.14.5", ] [[package]] @@ -358,7 +358,7 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ee52072ec15386f770805afd189a01c8841be8696bed250fa2f13c4c0d6dfb7" dependencies = [ - "generic-array 0.14.4", + "generic-array 0.14.5", ] [[package]] @@ -443,7 +443,7 @@ version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94d4706de1b0fa5b132270cddffa8585166037822e260a944fe161acd137ca05" dependencies = [ - "time 0.3.7", + "time 0.3.9", "version_check", ] @@ -616,7 +616,7 @@ dependencies = [ "tokio-util 0.7.0", "toml", "tracing", - "usdt 0.3.1", + "usdt 0.3.2", "uuid", ] @@ -642,7 +642,7 @@ source = "git+https://github.com/oxidecomputer/crucible?rev=4090a023b77dcab7a500 dependencies = [ "anyhow", "rusqlite", - "rustls-pemfile 0.3.0", + "rustls-pemfile", "serde", "serde_json", "tempfile", @@ -751,19 +751,6 @@ dependencies = [ "cipher", ] -[[package]] -name = "curve25519-dalek" -version = "4.0.0-pre.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4033478fbf70d6acf2655ac70da91ee65852d69daf7a67bf7a2f518fb47aafcf" -dependencies = [ - "byteorder", - "digest 0.9.0", - "rand_core", - "subtle", - "zeroize", -] - [[package]] name = "darling" version = "0.13.1" @@ -949,7 +936,7 @@ checksum = "1435fa1053d8b2fbbe9be7e97eca7f33d37b28409959813daefc1446a14247f1" [[package]] name = "dropshot" version = "0.6.1-dev" -source = "git+https://github.com/oxidecomputer/dropshot?branch=main#cc7ac240249b7b7f793261962e5461adbf438d82" +source = "git+https://github.com/oxidecomputer/dropshot?branch=main#0590171143964e32d913113df85174771d4699d9" dependencies = [ "async-stream", "async-trait", @@ -987,7 +974,7 @@ dependencies = [ [[package]] name = "dropshot_endpoint" version = "0.6.1-dev" -source = "git+https://github.com/oxidecomputer/dropshot?branch=main#cc7ac240249b7b7f793261962e5461adbf438d82" +source = "git+https://github.com/oxidecomputer/dropshot?branch=main#0590171143964e32d913113df85174771d4699d9" dependencies = [ "proc-macro2", "quote", @@ -1598,9 +1585,9 @@ dependencies = [ [[package]] name = "hyper" -version = "0.14.17" +version = "0.14.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "043f0e083e9901b6cc658a77d1eb86f4fc650bbb977a4337dd63192826aa85dd" +checksum = "b26ae0a80afebe130861d90abf98e3814a4f28a4c6ffeb5ab8ebb2be311e0ef2" dependencies = [ "bytes", "futures-channel", @@ -1822,9 +1809,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.14" +version = "0.4.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" +checksum = "6389c490849ff5bc16be905ae24bc913a9c8892e19b2341dbc175e14c341c2b8" dependencies = [ "cfg-if", ] @@ -3086,7 +3073,7 @@ dependencies = [ [[package]] name = "propolis-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=832a86afce308d3210685af987ace1ba74c2ecd6#832a86afce308d3210685af987ace1ba74c2ecd6" +source = "git+https://github.com/oxidecomputer/propolis?rev=3a4fd8fa54ce8e1117bfa259bea39bca87f8ea14#3a4fd8fa54ce8e1117bfa259bea39bca87f8ea14" dependencies = [ "crucible", "reqwest", @@ -3183,9 +3170,9 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.2.11" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8380fe0152551244f0747b1bf41737e0f8a74f97a14ccefd1148187271634f3c" +checksum = "8ae183fc1b06c149f0c1793e1eb447c8b04bfe46d48e9e48bfb8d2d7ed64ecf0" dependencies = [ "bitflags", ] @@ -3531,12 +3518,11 @@ dependencies = [ [[package]] name = "serde-big-array" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18b20e7752957bbe9661cff4e0bb04d183d0948cdab2ea58cdb9df36a61dfe62" +checksum = "cd31f59f6fe2b0c055371bb2f16d7f0aa7d8881676c04a55b1596d1a17cd10a4" dependencies = [ "serde", - "serde_derive", ] [[package]] @@ -3822,7 +3808,7 @@ dependencies = [ "hostname", "slog", "slog-json", - "time 0.3.7", + "time 0.3.9", ] [[package]] @@ -3840,14 +3826,14 @@ dependencies = [ [[package]] name = "slog-json" -version = "2.6.0" +version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70f825ce7346f40aa318111df5d3a94945a7fdca9081584cb9b05692fb3dfcb4" +checksum = "3e1e53f61af1e3c8b852eef0a9dee29008f55d6dd63794f3f12cef786cf0f219" dependencies = [ "serde", "serde_json", "slog", - "time 0.3.7", + "time 0.3.9", ] [[package]] @@ -3860,7 +3846,7 @@ dependencies = [ "slog", "term", "thread_local", - "time 0.3.7", + "time 0.3.9", ] [[package]] @@ -4256,9 +4242,9 @@ dependencies = [ [[package]] name = "time" -version = "0.3.7" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "004cbc98f30fa233c61a38bc77e96a9106e65c88f2d3bef182ae952027e5753d" +checksum = "c2702e08a7a860f005826c6815dcac101b19b5eb330c27fe4a5928fec1d20ddd" dependencies = [ "itoa 1.0.1", "libc", @@ -4268,9 +4254,9 @@ dependencies = [ [[package]] name = "time-macros" -version = "0.2.3" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25eb0ca3468fc0acc11828786797f6ef9aa1555e4a211a60d64cc8e4d1be47d6" +checksum = "42657b1a6f4d817cda8e7a0ace261fe0cc946cf3a80314390b22cc61ae080792" [[package]] name = "tiny-keccak" @@ -4647,7 +4633,7 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9f214e8f697e925001e66ec2c6e37a4ef93f0f78c2eed7814394e10c62025b05" dependencies = [ - "generic-array 0.14.4", + "generic-array 0.14.5", "subtle", ] @@ -5095,9 +5081,9 @@ dependencies = [ [[package]] name = "zeroize" -version = "1.4.3" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d68d9dcec5f9b43a30d38c49f91dfedfaac384cb8f085faca366c26207dd1619" +checksum = "4756f7db3f7b5574938c3eb1c117038b8e07f95ee6718c0efad4ac21508f1efd" dependencies = [ "zeroize_derive", ] diff --git a/nexus/src/sagas.rs b/nexus/src/sagas.rs index d6801b25c6..ec37b084aa 100644 --- a/nexus/src/sagas.rs +++ b/nexus/src/sagas.rs @@ -687,11 +687,22 @@ async fn sic_delete_instance_record( let params = sagactx.saga_params(); let opctx = OpContext::for_saga_action(&sagactx, ¶ms.serialized_authn); let instance_id = sagactx.lookup::("instance_id")?; + let instance_name = sagactx.lookup::("instance_name")?; // We currently only support deleting an instance if it is stopped or // failed, so update the state accordingly to allow deletion. - let runtime_state = - sagactx.lookup::("initial_runtime")?; + let authz_project = osagactx + .datastore() + .project_lookup_by_id(params.project_id) + .await + .map_err(ActionError::action_failed)?; + + let (authz_instance, db_instance) = osagactx + .datastore() + .instance_fetch(&opctx, &authz_project, &instance_name) + .await + .map_err(ActionError::action_failed)?; + let runtime_state = db::model::InstanceRuntimeState { state: db::model::InstanceState::new(InstanceState::Failed), // Must update the generation, or the database query will fail. @@ -700,14 +711,10 @@ async fn sic_delete_instance_record( // of the successful completion of the saga, or in this action during // saga unwinding. So we're guaranteed that the cached generation in the // saga log is the most recent in the database. - gen: db::model::Generation::from(runtime_state.gen.next()), - ..db::model::InstanceRuntimeState::from(runtime_state) + gen: db::model::Generation::from(db_instance.runtime_state.gen.next()), + ..db_instance.runtime_state }; - let authz_instance = osagactx - .datastore() - .instance_lookup_by_id(instance_id) - .await - .map_err(ActionError::action_failed)?; + let updated = osagactx .datastore() .instance_update_runtime(&instance_id, &runtime_state) @@ -727,6 +734,7 @@ async fn sic_delete_instance_record( .project_delete_instance(&opctx, &authz_instance) .await .map_err(ActionError::action_failed)?; + Ok(()) } diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs index e9c62bc32a..b70db9d192 100644 --- a/nexus/tests/integration_tests/instances.rs +++ b/nexus/tests/integration_tests/instances.rs @@ -565,6 +565,7 @@ async fn test_instance_create_saga_removes_instance_database_record( memory: ByteCount::from_mebibytes_u32(4), hostname: String::from("inst"), network_interfaces: interface_params.clone(), + disks: vec![], }; let response = NexusRequest::objects_post(client, &url_instances, &instance_params) @@ -585,6 +586,7 @@ async fn test_instance_create_saga_removes_instance_database_record( memory: ByteCount::from_mebibytes_u32(4), hostname: String::from("inst2"), network_interfaces: interface_params, + disks: vec![], }; let _ = NexusRequest::objects_post(client, &url_instances, &instance_params) diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index 1e46ec57b4..5682d02d5e 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -21,7 +21,7 @@ omicron-common = { path = "../common" } p256 = "0.9.0" percent-encoding = "2.1.0" progenitor = { git = "https://github.com/oxidecomputer/progenitor" } -propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "832a86afce308d3210685af987ace1ba74c2ecd6" } +propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "3a4fd8fa54ce8e1117bfa259bea39bca87f8ea14" } rand = { version = "0.8.5", features = ["getrandom"] } reqwest = { version = "0.11.8", default-features = false, features = ["rustls-tls", "stream"] } schemars = { version = "0.8", features = [ "chrono", "uuid" ] }