From 9b7206f655d218533e1955fed7f5c5f7c9cc2ee5 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Wed, 3 May 2023 18:50:46 +0000 Subject: [PATCH] [sled-agent] Encrypt U.2 zpool top-level datasets Encryption of U.2 devices relies on key generated by the `KeyManager` introduced in #2990. The `KeyManager` is parameterized by a `SecretRetriever`, which provides input key material (IKM) that the `KeyManager` can use to derive keys. In this commit we introduce a `LocalSecretRetriever` which provides secrets local to a sled. Right now the secret is hardcoded, but we intend to either store random secrets on the M.2s or retrieve them from VPD data in the short term. Longer term, we will replace the `LocalSecretRetriever` with a `TrustQuorumSecretRetriever` which will provide a rack secret recomputed from trust quorum key shares. The `KeyManger` has been extended to run in an tokio task and process requests for from different "requesters". Each requester only has the ability to request keys that it requires to do its job. Currently there is a single requester, `StorageKeyRequester`, that is owned by the `StorageManager` which loans it out to `sled_hardware::Disk` to request keys on demand. `sled_hardware::Disk` requests keys for both creation of encrypted datasets and mounting of existing encrypted datasets. In order to actually allow ZFS to use the generated keys, it creates keyfiles inside a RAMDisk. Attempts are made to limit the lifetime of these files as well as accidental leakage of secrets. Changes have also been made to `illumos_utils::Zfs` to support encrytpion. To support future secret reconfiguration with trust quorum we store an additional `epoch` property on each encrypted dataset, which allows the `KeyManager` to retrieve the right vesion of the secret from a `SecretRetriever`. --- Cargo.lock | 3 +- Cargo.toml | 1 + illumos-utils/src/zfs.rs | 95 ++++++++-- installinator/src/dispatch.rs | 8 +- installinator/src/hardware.rs | 17 +- key-manager/Cargo.toml | 1 - key-manager/src/lib.rs | 142 +++++++++++++-- sled-agent/Cargo.toml | 1 + sled-agent/src/bootstrap/agent.rs | 19 ++ sled-agent/src/bootstrap/hardware.rs | 19 +- sled-agent/src/bootstrap/mod.rs | 1 + sled-agent/src/bootstrap/secret_retriever.rs | 39 ++++ sled-agent/src/services.rs | 59 +++++- sled-agent/src/storage_manager.rs | 46 ++++- sled-hardware/Cargo.toml | 1 + sled-hardware/src/disk.rs | 179 +++++++++++++++++-- 16 files changed, 562 insertions(+), 69 deletions(-) create mode 100644 sled-agent/src/bootstrap/secret_retriever.rs diff --git a/Cargo.lock b/Cargo.lock index 61f9f3d62e..994a48a759 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3512,7 +3512,6 @@ dependencies = [ "hkdf", "secrecy", "sha3", - "sled-hardware", "thiserror", "tokio", "zeroize", @@ -4768,6 +4767,7 @@ dependencies = [ "internal-dns", "ipnetwork", "itertools", + "key-manager", "macaddr", "mockall", "nexus-client 0.1.0", @@ -7413,6 +7413,7 @@ dependencies = [ "futures", "illumos-devinfo", "illumos-utils", + "key-manager", "libc", "libefi-illumos", "macaddr", diff --git a/Cargo.toml b/Cargo.toml index 8e2e2430f1..6b8790d6de 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -190,6 +190,7 @@ internal-dns = { path = "internal-dns" } ipcc-key-value = { path = "ipcc-key-value" } ipnetwork = "0.20" itertools = "0.10.5" +key-manager = { path = "key-manager" } lazy_static = "1.4.0" libc = "0.2.143" linear-map = "1.2.0" diff --git a/illumos-utils/src/zfs.rs b/illumos-utils/src/zfs.rs index 2b9bd67f00..35d90cccda 100644 --- a/illumos-utils/src/zfs.rs +++ b/illumos-utils/src/zfs.rs @@ -40,6 +40,9 @@ enum EnsureFilesystemErrorRaw { #[error("Unexpected output from ZFS commands: {0}")] Output(String), + + #[error("Failed to mount encrypted filesystem")] + MountEncryptedFsFailed(crate::ExecutionError), } /// Error returned by [`Zfs::ensure_filesystem`]. @@ -104,6 +107,22 @@ impl fmt::Display for Mountpoint { } } +/// This is the path for an encryption key used by ZFS +#[derive(Debug, Clone)] +pub struct Keypath(pub PathBuf); + +impl fmt::Display for Keypath { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0.display()) + } +} + +#[derive(Debug)] +pub struct EncryptionDetails { + pub keypath: Keypath, + pub epoch: u64, +} + #[cfg_attr(any(test, feature = "testing"), mockall::automock, allow(dead_code))] impl Zfs { /// Lists all datasets within a pool or existing dataset. @@ -142,22 +161,16 @@ impl Zfs { mountpoint: Mountpoint, zoned: bool, do_format: bool, + encryption_details: Option, ) -> Result<(), EnsureFilesystemError> { - // If the dataset exists, we're done. - let mut command = std::process::Command::new(ZFS); - let cmd = command.args(&["list", "-Hpo", "name,type,mountpoint", name]); - // If the list command returns any valid output, validate it. - if let Ok(output) = execute(cmd) { - let stdout = String::from_utf8_lossy(&output.stdout); - let values: Vec<&str> = stdout.trim().split('\t').collect(); - if values != &[name, "filesystem", &mountpoint.to_string()] { - return Err(EnsureFilesystemError { - name: name.to_string(), - mountpoint, - err: EnsureFilesystemErrorRaw::Output(stdout.to_string()), - }); + if Self::dataset_exists(name, &mountpoint)? { + if encryption_details.is_none() { + // If the dataset exists, we're done. + return Ok(()); + } else { + // We need to load the encryption key and mount the filesystem + return Self::mount_encrypted_dataset(name, &mountpoint); } - return Ok(()); } if !do_format { @@ -174,6 +187,21 @@ impl Zfs { if zoned { cmd.args(&["-o", "zoned=on"]); } + if let Some(details) = encryption_details { + let keyloc = + format!("keylocation=file://{}", details.keypath.to_string()); + let epoch = format!("oxide:epoch={}", details.epoch); + cmd.args(&[ + "-o", + "encryption=aes-256-gcm", + "-o", + "keyformat=raw", + "-o", + &keyloc, + "-o", + &epoch, + ]); + } cmd.args(&["-o", &format!("mountpoint={}", mountpoint), name]); execute(cmd).map_err(|err| EnsureFilesystemError { name: name.to_string(), @@ -183,6 +211,45 @@ impl Zfs { Ok(()) } + fn mount_encrypted_dataset( + name: &str, + mountpoint: &Mountpoint, + ) -> Result<(), EnsureFilesystemError> { + let mut command = std::process::Command::new(PFEXEC); + let cmd = command.args(&[ZFS, "mount", "-l", name]); + execute(cmd).map_err(|err| EnsureFilesystemError { + name: name.to_string(), + mountpoint: mountpoint.clone(), + err: EnsureFilesystemErrorRaw::MountEncryptedFsFailed(err), + })?; + Ok(()) + } + + // Return true if the dataset exists, with an optional epoch if there is one. + // Epochs are only written to encrypted root datasets + fn dataset_exists( + name: &str, + mountpoint: &Mountpoint, + ) -> Result { + let mut command = std::process::Command::new(ZFS); + let cmd = command.args(&["list", "-Hpo", "name,type,mountpoint", name]); + // If the list command returns any valid output, validate it. + if let Ok(output) = execute(cmd) { + let stdout = String::from_utf8_lossy(&output.stdout); + let values: Vec<&str> = stdout.trim().split('\t').collect(); + if values != &[name, "filesystem", &mountpoint.to_string()] { + return Err(EnsureFilesystemError { + name: name.to_string(), + mountpoint: mountpoint.clone(), + err: EnsureFilesystemErrorRaw::Output(stdout.to_string()), + }); + } + Ok(true) + } else { + Ok(false) + } + } + pub fn set_oxide_value( filesystem_name: &str, name: &str, diff --git a/installinator/src/dispatch.rs b/installinator/src/dispatch.rs index f9a21b0216..7376995523 100644 --- a/installinator/src/dispatch.rs +++ b/installinator/src/dispatch.rs @@ -114,7 +114,7 @@ impl DebugHardwareScan { // Finding the write destination from the gimlet hardware logs details // about what it's doing sufficiently for this subcommand; just create a // write destination and then discard it. - _ = WriteDestination::from_hardware(log)?; + _ = WriteDestination::from_hardware(log).await?; Ok(()) } } @@ -243,11 +243,7 @@ impl InstallOpts { |cx| async move { let destination = if self.install_on_gimlet { let log = log.clone(); - tokio::task::spawn_blocking(move || { - WriteDestination::from_hardware(&log) - }) - .await - .unwrap()? + WriteDestination::from_hardware(&log).await? } else { // clap ensures `self.destination` is not `None` if // `install_on_gimlet` is false. diff --git a/installinator/src/hardware.rs b/installinator/src/hardware.rs index 421fcdc786..ca2debbc23 100644 --- a/installinator/src/hardware.rs +++ b/installinator/src/hardware.rs @@ -22,7 +22,7 @@ pub struct Hardware { } impl Hardware { - pub fn scan(log: &Logger) -> Result { + pub async fn scan(log: &Logger) -> Result { let is_gimlet = sled_hardware::is_gimlet() .context("failed to detect whether host is a gimlet")?; ensure!(is_gimlet, "hardware scan only supported on gimlets"); @@ -64,13 +64,14 @@ impl Hardware { } DiskVariant::M2 => (), } - - Some( - Disk::new(log, disk) - .context("failed to instantiate Disk handle for M.2"), - ) - }) - .collect::>>()?; + DiskVariant::M2 => { + let disk = Disk::new(log, disk, None) + .await + .context("failed to instantiate Disk handle for M.2")?; + m2_disks.push(disk); + } + } + } Ok(Self { m2_disks }) } diff --git a/key-manager/Cargo.toml b/key-manager/Cargo.toml index 53c164d064..a51afab0aa 100644 --- a/key-manager/Cargo.toml +++ b/key-manager/Cargo.toml @@ -9,7 +9,6 @@ async-trait.workspace = true hkdf = "0.12.3" secrecy.workspace = true sha3.workspace = true -sled-hardware.workspace = true thiserror.workspace = true tokio.workspace = true zeroize.workspace = true diff --git a/key-manager/src/lib.rs b/key-manager/src/lib.rs index 614ea46108..23cdf6e409 100644 --- a/key-manager/src/lib.rs +++ b/key-manager/src/lib.rs @@ -11,7 +11,7 @@ use async_trait::async_trait; use hkdf::Hkdf; use secrecy::{ExposeSecret, Secret}; use sha3::Sha3_256; -use sled_hardware::DiskIdentity; +use tokio::sync::{mpsc, oneshot}; use zeroize::{Zeroize, ZeroizeOnDrop}; /// Input Key Material @@ -62,6 +62,8 @@ pub enum Error { #[derive(Zeroize, ZeroizeOnDrop, Default)] struct Aes256GcmDiskEncryptionKey(Box<[u8; 32]>); +/// A Disk encryption key for a given epoch to be used with ZFS datasets for +/// U.2 devices pub struct VersionedAes256GcmDiskEncryptionKey { epoch: u64, key: Secret, @@ -77,6 +79,70 @@ impl VersionedAes256GcmDiskEncryptionKey { } } +/// We have to define a separate type here to prevent a circular dependency +/// with the `sled-hardware` crate. `sled-hardware` uses `key-manager`, so we +/// can't have `key-manager` also depend on `sled-hardware` +/// +/// We provide `From for ` +/// in the `sled-hardware` crate, which is what uses `StorageKeyRequester` in the first +/// place. +#[derive(Debug, Clone)] +pub struct DiskIdentity { + pub vendor: String, + pub serial: String, + pub model: String, +} + +/// A request sent from a [`StorageKeyRequester`] to the [`KeyManager`]. +enum StorageKeyRequest { + GetKey { + epoch: u64, + disk_id: DiskIdentity, + responder: + oneshot::Sender>, + }, + LoadLatestSecret { + responder: oneshot::Sender>, + }, +} + +/// A client of [`KeyManager`] that can request generation of storage related keys +pub struct StorageKeyRequester { + tx: mpsc::Sender, +} + +impl StorageKeyRequester { + /// Get a disk encryption key from the [`KeyManager`] for the given epoch + pub async fn get_key( + &self, + epoch: u64, + disk_id: DiskIdentity, + ) -> Result { + let (tx, rx) = oneshot::channel(); + self.tx + .send(StorageKeyRequest::GetKey { epoch, disk_id, responder: tx }) + .await + .map_err(|e| e.to_string()) + .expect("Failed to send GetKey request to KeyManager"); + + rx.await.expect("KeyManager bug (dropped responder without responding)") + } + + /// Loads the rack secret for the latest epoch into the [`KeyManager`] + /// + /// Return the latest epoch on success + pub async fn load_latest_secret(&self) -> Result { + let (tx, rx) = oneshot::channel(); + self.tx + .send(StorageKeyRequest::LoadLatestSecret { responder: tx }) + .await + .map_err(|e| e.to_string()) + .expect("Failed to send LoadLatestSecret request to KeyManager"); + + rx.await.expect("KeyManager bug (dropped responder without responding)") + } +} + /// The main mechanism used to derive keys from a shared secret for the Oxide /// control plane. pub struct KeyManager { @@ -96,22 +162,65 @@ pub struct KeyManager { /// reconfigurations before a node is no longer part of the trust quorum. /// This should most likely be a small number like `3`. prks: BTreeMap>, + + // Receives requests from a `StorageKeyRequester`, which is expected to run + // in the `StorageWorker` task. + storage_rx: mpsc::Receiver, } impl KeyManager { - pub fn new(secret_retriever: S) -> KeyManager { - KeyManager { secret_retriever, prks: BTreeMap::new() } + pub fn new(secret_retriever: S) -> (KeyManager, StorageKeyRequester) { + // There are up to 10 U.2 drives per sleds, but only one request should + // come in at a time from a single worker. We leave a small buffer for + // the possibility of asynchronous requests without responses that we + // may want to send in series. + let (tx, rx) = mpsc::channel(10); + + let storage_key_requester = StorageKeyRequester { tx }; + let key_manager = KeyManager { + secret_retriever, + prks: BTreeMap::new(), + storage_rx: rx, + }; + + (key_manager, storage_key_requester) + } + + /// Run the main receive loop of the `KeyManager` + /// + /// This should be spawned into a tokio task + pub async fn run(&mut self) { + loop { + tokio::select! { + Some(request) = self.storage_rx.recv() => { + use StorageKeyRequest::*; + match request { + GetKey{epoch, disk_id, responder} => { + let rsp = self.disk_encryption_key(epoch, &disk_id).await; + let _ = responder.send(rsp); + } + LoadLatestSecret{responder} => { + let rsp = self.load_latest_secret().await; + let _ = responder.send(rsp); + } + } + } + } + } } /// Load latest version of the input key material into the key manager. - pub async fn load_latest_secret(&mut self) -> Result<(), Error> { + /// + /// Return the latest epoch on success + async fn load_latest_secret(&mut self) -> Result { let ikm = self.secret_retriever.get_latest().await?; + let epoch = ikm.epoch(); self.insert_prk(ikm); - Ok(()) + Ok(epoch) } /// Load input key material for the given epoch into the key manager. - pub async fn load_secret(&mut self, epoch: u64) -> Result<(), Error> { + async fn load_secret(&mut self, epoch: u64) -> Result<(), Error> { match self.secret_retriever.get(epoch).await? { SecretState::Current(ikm) => self.insert_prk(ikm), SecretState::Reconfiguration { old, new } => { @@ -122,8 +231,8 @@ impl KeyManager { Ok(()) } - /// Derive an encryption key for the given [`sled_hardware::DiskIdentity`] - pub async fn disk_encryption_key( + /// Derive an encryption key for the given [`DiskIdentity`] + async fn disk_encryption_key( &mut self, epoch: u64, disk_id: &DiskIdentity, @@ -153,7 +262,8 @@ impl KeyManager { } /// Return the epochs for all secrets which are loaded - pub fn loaded_epochs(&self) -> Vec { + #[cfg(test)] + fn loaded_epochs(&self) -> Vec { self.prks.keys().copied().collect() } @@ -162,7 +272,11 @@ impl KeyManager { /// TODO(AJS): From what I can tell, the internal PRKs inside /// Hkdf instances are not zeroized, and so will remain in memory /// after drop. We should fix this one way or another. - pub fn clear(&mut self) { + /// + /// TODO(AJS): We should have a timeout mechanism so that we can clear + /// unused secrets from memory. + #[allow(unused)] + fn clear(&mut self) { self.prks = BTreeMap::new(); } @@ -269,7 +383,7 @@ mod tests { #[tokio::test] async fn disk_encryption_key_epoch_0() { - let mut km = KeyManager::new(TestSecretRetriever::new()); + let (mut km, _) = KeyManager::new(TestSecretRetriever::new()); km.load_latest_secret().await.unwrap(); let disk_id = DiskIdentity { vendor: "a".to_string(), @@ -293,7 +407,7 @@ mod tests { #[tokio::test] async fn different_disks_produce_different_keys() { - let mut km = KeyManager::new(TestSecretRetriever::new()); + let (mut km, _) = KeyManager::new(TestSecretRetriever::new()); km.load_latest_secret().await.unwrap(); let id_1 = DiskIdentity { vendor: "a".to_string(), @@ -321,7 +435,7 @@ mod tests { // Load a distinct secret (IKM) for epoch 1 retriever.insert(1, [1u8; 32]); - let mut km = KeyManager::new(retriever); + let (mut km, _) = KeyManager::new(retriever); km.load_latest_secret().await.unwrap(); let disk_id = DiskIdentity { vendor: "a".to_string(), @@ -343,7 +457,7 @@ mod tests { // Load a distinct secret (IKM) for epoch 1 retriever.insert(1, [1u8; 32]); - let mut km = KeyManager::new(retriever); + let (mut km, _) = KeyManager::new(retriever); let disk_id = DiskIdentity { vendor: "a".to_string(), diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index 15180aeeeb..486e3dd18c 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -31,6 +31,7 @@ illumos-utils.workspace = true internal-dns.workspace = true ipnetwork.workspace = true itertools.workspace = true +key-manager.workspace = true macaddr.workspace = true nexus-client.workspace = true omicron-common.workspace = true diff --git a/sled-agent/src/bootstrap/agent.rs b/sled-agent/src/bootstrap/agent.rs index b87043876b..9b5cedce51 100644 --- a/sled-agent/src/bootstrap/agent.rs +++ b/sled-agent/src/bootstrap/agent.rs @@ -290,6 +290,23 @@ impl Agent { err, })?; + // We expect this directory to exist for Key Management + // It's purposefully in the ramdisk and files only exist long enough + // to create and mount encrypted datasets. + info!( + log, "Ensuring zfs key directory exists"; + "path" => sled_hardware::disk::KEYPATH_ROOT, + ); + tokio::fs::create_dir_all(sled_hardware::disk::KEYPATH_ROOT) + .await + .map_err(|err| BootstrapError::Io { + message: format!( + "Creating zfs key directory {}", + sled_hardware::disk::KEYPATH_ROOT + ), + err, + })?; + let bootstrap_etherstub = bootstrap_etherstub()?; let bootstrap_etherstub_vnic = Dladm::ensure_etherstub_vnic( @@ -340,6 +357,7 @@ impl Agent { // currently part of the ramdisk. let zoned = true; let do_format = true; + let encryption_details = None; Zfs::ensure_filesystem( ZONE_ZFS_RAMDISK_DATASET, Mountpoint::Path(Utf8PathBuf::from( @@ -347,6 +365,7 @@ impl Agent { )), zoned, do_format, + encryption_details, )?; // Before we start monitoring for hardware, ensure we're running from a diff --git a/sled-agent/src/bootstrap/hardware.rs b/sled-agent/src/bootstrap/hardware.rs index a99661aab4..807852f2eb 100644 --- a/sled-agent/src/bootstrap/hardware.rs +++ b/sled-agent/src/bootstrap/hardware.rs @@ -4,10 +4,12 @@ //! The bootstrap agent's view into hardware +use crate::bootstrap::secret_retriever::LocalSecretRetriever; use crate::config::{Config as SledConfig, SledMode as SledModeConfig}; use crate::services::ServiceManager; use crate::storage_manager::StorageManager; use illumos_utils::dladm::{Etherstub, EtherstubVnic}; +use key_manager::KeyManager; use sled_hardware::{DendriteAsic, HardwareManager, SledMode}; use slog::Logger; use std::net::Ipv6Addr; @@ -184,7 +186,22 @@ impl HardwareMonitor { let hardware = HardwareManager::new(log, sled_mode) .map_err(|e| Error::Hardware(e))?; - let storage_manager = StorageManager::new(&log).await; + // Spawn the `KeyManager` which is needed by the the StorageManager to + // retrieve encryption keys. + // + // TODO: Eventually other things are going to want access to "key + // requesters" returned by the key manager. We should probably move the + // creation and initialization upwards in the call stack along with the + // call to `StorageManager::new`. + let (mut key_manager, storage_key_requester) = + KeyManager::new(LocalSecretRetriever {}); + + // TODO: Should we keep a handle to the key_manager? It shouldn't ever + // actually be stopped. + tokio::spawn(async move { key_manager.run().await }); + + let storage_manager = + StorageManager::new(&log, storage_key_requester).await; let service_manager = ServiceManager::new( log.clone(), diff --git a/sled-agent/src/bootstrap/mod.rs b/sled-agent/src/bootstrap/mod.rs index b7c0d5c2f4..55fbbc118e 100644 --- a/sled-agent/src/bootstrap/mod.rs +++ b/sled-agent/src/bootstrap/mod.rs @@ -12,6 +12,7 @@ mod http_entrypoints; mod maghemite; pub(crate) mod params; pub(crate) mod rss_handle; +mod secret_retriever; pub mod server; pub mod trust_quorum; mod views; diff --git a/sled-agent/src/bootstrap/secret_retriever.rs b/sled-agent/src/bootstrap/secret_retriever.rs new file mode 100644 index 0000000000..9901387082 --- /dev/null +++ b/sled-agent/src/bootstrap/secret_retriever.rs @@ -0,0 +1,39 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Key retrieval mechanisms for use by [`key-manager::KeyManager`] + +use async_trait::async_trait; +use key_manager::{ + SecretRetriever, SecretRetrieverError, SecretState, VersionedIkm, +}; + +/// A [`key-manager::SecretRetriever`] for use before trust quorum is production ready +/// +/// The local retriever only returns keys for epoch 0 +pub struct LocalSecretRetriever {} + +#[async_trait] +impl SecretRetriever for LocalSecretRetriever { + /// TODO: Figure out where we want our local key material to come from + /// See RFD 388 + async fn get_latest(&self) -> Result { + let epoch = 0; + let salt = [0u8; 32]; + let secret = [0x1d; 32]; + + Ok(VersionedIkm::new(epoch, salt, &secret)) + } + + /// We don't plan to do any key rotation before trust quorum is ready + async fn get( + &self, + epoch: u64, + ) -> Result { + if epoch != 0 { + return Err(SecretRetrieverError::NoSuchEpoch(epoch)); + } + Ok(SecretState::Current(self.get_latest().await?)) + } +} diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 32e8474f09..e686c5c1bb 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -2326,6 +2326,7 @@ impl ServiceManager { mod test { use super::*; use crate::params::{ServiceZoneService, ZoneType}; + use async_trait::async_trait; use illumos_utils::{ dladm::{ Etherstub, MockDladm, BOOTSTRAP_ETHERSTUB_NAME, @@ -2334,6 +2335,10 @@ mod test { svc, zone::MockZones, }; + use key_manager::{ + KeyManager, SecretRetriever, SecretRetrieverError, SecretState, + StorageKeyRequester, VersionedIkm, + }; use std::net::Ipv6Addr; use std::os::unix::process::ExitStatusExt; use uuid::Uuid; @@ -2473,6 +2478,39 @@ mod test { } } + pub struct TestSecretRetriever {} + + #[async_trait] + impl SecretRetriever for TestSecretRetriever { + async fn get_latest( + &self, + ) -> Result { + let epoch = 0; + let salt = [0u8; 32]; + let secret = [0x1d; 32]; + + Ok(VersionedIkm::new(epoch, salt, &secret)) + } + + async fn get( + &self, + epoch: u64, + ) -> Result { + if epoch != 0 { + return Err(SecretRetrieverError::NoSuchEpoch(epoch)); + } + Ok(SecretState::Current(self.get_latest().await?)) + } + } + + async fn spawn_key_manager() -> StorageKeyRequester { + let (mut key_manager, storage_key_requester) = + KeyManager::new(TestSecretRetriever {}); + + tokio::spawn(async move { key_manager.run().await }); + storage_key_requester + } + #[tokio::test] #[serial_test::serial] async fn test_ensure_service() { @@ -2480,6 +2518,7 @@ mod test { omicron_test_utils::dev::test_setup_log("test_ensure_service"); let log = logctx.log.clone(); let test_config = TestConfig::new().await; + let storage_key_requester = spawn_key_manager().await; let mgr = ServiceManager::new( log.clone(), @@ -2492,7 +2531,7 @@ mod test { "rev-test".to_string(), SWITCH_ZONE_BOOTSTRAP_IP, vec![], - StorageManager::new(&log).await, + StorageManager::new(&log, storage_key_requester).await, ) .await .unwrap(); @@ -2529,6 +2568,7 @@ mod test { ); let log = logctx.log.clone(); let test_config = TestConfig::new().await; + let storage_key_requester = spawn_key_manager().await; let mgr = ServiceManager::new( log.clone(), @@ -2541,7 +2581,7 @@ mod test { "rev-test".to_string(), SWITCH_ZONE_BOOTSTRAP_IP, vec![], - StorageManager::new(&log).await, + StorageManager::new(&log, storage_key_requester).await, ) .await .unwrap(); @@ -2579,6 +2619,7 @@ mod test { ); let log = logctx.log.clone(); let test_config = TestConfig::new().await; + let storage_key_requester = spawn_key_manager().await; // First, spin up a ServiceManager, create a new service, and tear it // down. @@ -2593,7 +2634,7 @@ mod test { "rev-test".to_string(), SWITCH_ZONE_BOOTSTRAP_IP, vec![], - StorageManager::new(&log).await, + StorageManager::new(&log, storage_key_requester).await, ) .await .unwrap(); @@ -2633,7 +2674,7 @@ mod test { "rev-test".to_string(), SWITCH_ZONE_BOOTSTRAP_IP, vec![], - StorageManager::new(&log).await, + StorageManager::new(&log, storage_key_requester).await, ) .await .unwrap(); @@ -2668,6 +2709,7 @@ mod test { ); let log = logctx.log.clone(); let test_config = TestConfig::new().await; + let storage_key_requester = spawn_key_manager().await; // First, spin up a ServiceManager, create a new service, and tear it // down. @@ -2682,7 +2724,7 @@ mod test { "rev-test".to_string(), SWITCH_ZONE_BOOTSTRAP_IP, vec![], - StorageManager::new(&log).await, + StorageManager::new(&log, storage_key_requester).await, ) .await .unwrap(); @@ -2714,6 +2756,11 @@ mod test { ) .unwrap(); + // We don't really have a need to make the StorageKeyRequester `Clone` + // and we want to keep the channel buffer size management simple. So + // for tests, just create another key manager and `storage_key_requester`. + // They all manage the same hardcoded test secrets and will derive the same keys. + let storage_key_requester = spawn_key_manager().await; // Observe that the old service is not re-initialized. let mgr = ServiceManager::new( logctx.log.clone(), @@ -2726,7 +2773,7 @@ mod test { "rev-test".to_string(), SWITCH_ZONE_BOOTSTRAP_IP, vec![], - StorageManager::new(&log).await, + StorageManager::new(&log, storage_key_requester).await, ) .await .unwrap(); diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index 73ddd75cdd..e5ff7142b9 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -13,6 +13,7 @@ use futures::FutureExt; use futures::StreamExt; use illumos_utils::zpool::{ZpoolKind, ZpoolName}; use illumos_utils::{zfs::Mountpoint, zpool::ZpoolInfo}; +use key_manager::StorageKeyRequester; use nexus_client::types::PhysicalDiskDeleteRequest; use nexus_client::types::PhysicalDiskKind; use nexus_client::types::PhysicalDiskPutRequest; @@ -260,6 +261,10 @@ struct StorageWorker { nexus_notifications: FuturesOrdered, rx: mpsc::Receiver, underlay: Arc>>, + + // A mechanism for requesting disk encryption keys from the + // [`key_manager::KeyManager`] + key_requester: StorageKeyRequester, } #[derive(Clone, Debug)] @@ -281,11 +286,13 @@ impl StorageWorker { let zoned = true; let fs_name = &dataset_name.full(); let do_format = true; + let encryption_details = None; Zfs::ensure_filesystem( &dataset_name.full(), Mountpoint::Path(Utf8PathBuf::from("/data")), zoned, do_format, + encryption_details, )?; // Ensure the dataset has a usable UUID. if let Ok(id_str) = Zfs::get_oxide_value(&fs_name, "uuid") { @@ -380,7 +387,13 @@ impl StorageWorker { // Ensure all disks conform to the expected partition layout. for disk in unparsed_disks.into_iter() { - match sled_hardware::Disk::new(&self.log, disk).map_err(|err| { + match sled_hardware::Disk::new( + &self.log, + disk, + Some(&self.key_requester), + ) + .await + .map_err(|err| { warn!(self.log, "Could not ensure partitions: {err}"); err }) { @@ -473,11 +486,16 @@ impl StorageWorker { info!(self.log, "Upserting disk: {disk:?}"); // Ensure the disk conforms to an expected partition layout. - let disk = - sled_hardware::Disk::new(&self.log, disk).map_err(|err| { - warn!(self.log, "Could not ensure partitions: {err}"); - err - })?; + let disk = sled_hardware::Disk::new( + &self.log, + disk, + Some(&self.key_requester), + ) + .await + .map_err(|err| { + warn!(self.log, "Could not ensure partitions: {err}"); + err + })?; let mut disks = resources.disks.lock().await; let disk = DiskWrapper::Real { @@ -495,7 +513,18 @@ impl StorageWorker { info!(self.log, "Upserting synthetic disk for: {zpool_name:?}"); let mut disks = resources.disks.lock().await; - sled_hardware::Disk::ensure_zpool_ready(&self.log, &zpool_name)?; + let synthetic_id = DiskIdentity { + vendor: "fake_vendor".to_string(), + serial: "fake_serial".to_string(), + model: zpool_name.id().to_string(), + }; + sled_hardware::Disk::ensure_zpool_ready( + &self.log, + &zpool_name, + &synthetic_id, + Some(&self.key_requester), + ) + .await?; let disk = DiskWrapper::Synthetic { zpool_name }; self.upsert_disk_locked(resources, &mut disks, disk).await } @@ -761,7 +790,7 @@ pub struct StorageManager { impl StorageManager { /// Creates a new [`StorageManager`] which should manage local storage. - pub async fn new(log: &Logger) -> Self { + pub async fn new(log: &Logger, key_requester: StorageKeyRequester) -> Self { let log = log.new(o!("component" => "StorageManager")); let resources = StorageResources { disks: Arc::new(Mutex::new(HashMap::new())), @@ -780,6 +809,7 @@ impl StorageManager { nexus_notifications: FuturesOrdered::new(), rx, underlay: Arc::new(Mutex::new(None)), + key_requester, }; worker.do_work(resources).await diff --git a/sled-hardware/Cargo.toml b/sled-hardware/Cargo.toml index 941ac74a07..5550d8ac05 100644 --- a/sled-hardware/Cargo.toml +++ b/sled-hardware/Cargo.toml @@ -11,6 +11,7 @@ camino.workspace = true cfg-if.workspace = true futures.workspace = true illumos-utils.workspace = true +key-manager.workspace = true libc.workspace = true macaddr.workspace = true nexus-client.workspace = true diff --git a/sled-hardware/src/disk.rs b/sled-hardware/src/disk.rs index 3dae8b9953..eb84b1adaa 100644 --- a/sled-hardware/src/disk.rs +++ b/sled-hardware/src/disk.rs @@ -4,15 +4,20 @@ use camino::{Utf8Path, Utf8PathBuf}; use illumos_utils::fstyp::Fstyp; +use illumos_utils::zfs::EncryptionDetails; +use illumos_utils::zfs::Keypath; use illumos_utils::zfs::Mountpoint; use illumos_utils::zfs::Zfs; use illumos_utils::zpool::Zpool; use illumos_utils::zpool::ZpoolKind; use illumos_utils::zpool::ZpoolName; +use key_manager::StorageKeyRequester; use slog::Logger; use slog::{info, warn}; use uuid::Uuid; +pub const KEYPATH_ROOT: &str = "/var/run/oxide/"; + cfg_if::cfg_if! { if #[cfg(target_os = "illumos")] { use crate::illumos::*; @@ -41,6 +46,12 @@ pub enum DiskError { CannotFormatMissingDevPath { path: Utf8PathBuf }, #[error("Formatting M.2 devices is not yet implemented")] CannotFormatM2NotImplemented, + #[error("KeyManager error: {0}")] + KeyManager(#[from] key_manager::Error), + #[error("Missing StorageKeyRequester when creating U.2 disk")] + MissingStorageKeyRequester, + #[error("Encrypted filesystem '{0}' missing 'oxide:epoch' property")] + MissingEpochProperty(String), } /// A partition (or 'slice') of a disk. @@ -74,6 +85,29 @@ pub struct DiskIdentity { pub model: String, } +impl From for key_manager::DiskIdentity { + fn from(value: DiskIdentity) -> Self { + key_manager::DiskIdentity { + vendor: value.vendor, + serial: value.serial, + model: value.model, + } + } +} + +impl From<&DiskIdentity> for Keypath { + fn from(id: &DiskIdentity) -> Self { + let filename = format!( + "{}-{}-{}-zfs-aes-256-gcm.key", + id.vendor, id.serial, id.model + ); + let mut path = PathBuf::new(); + path.push(KEYPATH_ROOT); + path.push(filename); + Keypath(path) + } +} + impl DiskPaths { // Returns the "illumos letter-indexed path" for a device. fn partition_path(&self, index: usize, raw: bool) -> Option { @@ -194,6 +228,9 @@ pub const CLUSTER_DATASET: &'static str = "cluster"; pub const CONFIG_DATASET: &'static str = "config"; pub const ZONE_DATASET: &'static str = "zone"; +// This is the root dataset for all U.2 drives. Encryption is inherited. +pub const CRYPT_DATASET: &'static str = "crypt"; + const U2_EXPECTED_DATASET_COUNT: usize = 1; static U2_EXPECTED_DATASETS: [&'static str; U2_EXPECTED_DATASET_COUNT] = [ // Stores filesystems for zones @@ -222,9 +259,17 @@ static M2_EXPECTED_DATASETS: [&'static str; M2_EXPECTED_DATASET_COUNT] = [ ]; impl Disk { - pub fn new( + /// Create a new Disk + /// + /// WARNING: In all cases where a U.2 is a possible `DiskVariant`, a + /// `StorageKeyRequester` must be passed so that disk encryption can + /// be used. The `StorageManager` for the sled-agent always has a + /// `StorageKeyRequester` available, and so the only place we should pass + /// `None` is for the M.2s touched by the Installinator. + pub async fn new( log: &Logger, unparsed_disk: UnparsedDisk, + key_requester: Option<&StorageKeyRequester>, ) -> Result { let paths = &unparsed_disk.paths; let variant = unparsed_disk.variant; @@ -243,7 +288,13 @@ impl Disk { )?; let zpool_name = Self::ensure_zpool_exists(log, variant, &zpool_path)?; - Self::ensure_zpool_ready(log, &zpool_name)?; + Self::ensure_zpool_ready( + log, + &zpool_name, + &unparsed_disk.identity, + key_requester, + ) + .await?; Ok(Self { paths: unparsed_disk.paths, @@ -256,9 +307,11 @@ impl Disk { }) } - pub fn ensure_zpool_ready( + pub async fn ensure_zpool_ready( log: &Logger, zpool_name: &ZpoolName, + disk_identity: &DiskIdentity, + key_requester: Option<&StorageKeyRequester>, ) -> Result<(), DiskError> { Self::ensure_zpool_imported(log, &zpool_name)?; Self::ensure_zpool_failmode_is_continue(log, &zpool_name)?; @@ -341,23 +394,83 @@ impl Disk { // Ensure that the zpool contains all the datasets we would like it to // contain. - fn ensure_zpool_has_datasets( + async fn ensure_zpool_has_datasets( + log: &Logger, zpool_name: &ZpoolName, + disk_identity: &DiskIdentity, + key_requester: Option<&StorageKeyRequester>, ) -> Result<(), DiskError> { - let datasets = match zpool_name.kind().into() { - DiskVariant::M2 => M2_EXPECTED_DATASETS.iter(), - DiskVariant::U2 => U2_EXPECTED_DATASETS.iter(), + let (root, datasets) = match zpool_name.kind().into() { + DiskVariant::M2 => (None, M2_EXPECTED_DATASETS.iter()), + DiskVariant::U2 => { + (Some(CRYPT_DATASET), U2_EXPECTED_DATASETS.iter()) + } }; - for dataset in datasets.into_iter() { + + let zoned = false; + let do_format = true; + + // Ensure the root encrypted filesystem exists + // Datasets below this in the hierarchy will inherit encryption + if let Some(dataset) = root { + let Some(key_requester) = key_requester else { + return Err(DiskError::MissingStorageKeyRequester); + }; let mountpoint = zpool_name.dataset_mountpoint(dataset); + let keypath: Keypath = disk_identity.into(); + + let epoch = if let Ok(epoch_str) = + Zfs::get_oxide_value(, "epoch") + { + if let Ok(epoch) = epoch_str.parse::() { + epoch + } else { + return Err(DiskError::MissingEpochProperty( + dataset.to_string(), + )); + } + } else { + key_requester.load_latest_secret().await? + }; + + let key = key_requester + .get_key(epoch, disk_identity.clone().into()) + .await?; + + let mut keyfile = + KeyFile::create(keypath.clone(), key.expose_secret(), log) + .await + .map_err(|error| DiskError::IoError { + path: keypath.0.clone(), + error, + })?; + + let encryption_details = EncryptionDetails { keypath, epoch }; + + let result = Zfs::ensure_filesystem( + &format!("{}/{}", zpool_name, dataset), + Mountpoint::Path(mountpoint), + zoned, + do_format, + Some(encryption_details), + ); + + keyfile.zero_and_unlink().await.map_err(|error| { + DiskError::IoError { path: keyfile.path().0.clone(), error } + })?; - let zoned = false; - let do_format = true; + result?; + } + + for dataset in datasets.into_iter() { + let mountpoint = zpool_name.dataset_mountpoint(dataset); + let encryption_details = None; Zfs::ensure_filesystem( &format!("{}/{}", zpool_name, dataset), Mountpoint::Path(mountpoint), zoned, do_format, + encryption_details, )?; } Ok(()) @@ -415,6 +528,52 @@ impl From for DiskVariant { } } +/// A file that wraps a zfs encryption key. +/// +/// We put this in a RAM backed filesystem and zero and delete it when we are +/// done with it. Unfortunately we cannot do this inside `Drop` because there is no +/// equivalent async drop. +pub struct KeyFile { + path: Keypath, + file: File, + log: Logger, +} + +impl KeyFile { + pub async fn create( + path: Keypath, + key: &[u8; 32], + log: &Logger, + ) -> std::io::Result { + // TODO: fix this to not truncate + // We want to overwrite any existing contents. + // If we truncate we may leave dirty pages around + // containing secrets. + let mut file = tokio::fs::OpenOptions::new() + .create(true) + .write(true) + .open(&path.0) + .await?; + file.write_all(key).await?; + Ok(KeyFile { path, file, log: log.clone() }) + } + + // It'd be nice to `impl Drop for `KeyFile` and then call `zero` + // from within the drop handler, but async `Drop` isn't supported. + pub async fn zero_and_unlink(&mut self) -> std::io::Result<()> { + let zeroes = [0u8; 32]; + let _ = self.file.seek(SeekFrom::Start(0)).await?; + self.file.write_all(&zeroes).await?; + info!(self.log, "Zeroed keyfile {}", self.path); + remove_file(&self.path().0).await?; + Ok(()) + } + + pub fn path(&self) -> &Keypath { + &self.path + } +} + #[cfg(test)] mod test { use super::*;