From 988eb6ffe4a9a346074d12724d533e09e28a917a Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Tue, 2 May 2023 05:47:58 +0000 Subject: [PATCH 1/5] Introduce a `KeyManager` The `KeyManager` will be used by sled-agent to retrieve rack secrets via an implementation of a `SecretRetriever` and generate derived keys. Currently the only keys produced are those necessary for U.2 drive encryption, and follow the derivation scheme in section 4 of RFD 301. For initail implementation, input key material (IKM) will be retrieved from a `SecretRetriever` backed by a sled local mechanism. Future implementations of `SecretRetriever` will use the trust quorum rack secret retrieved from the bootstore. --- Cargo.lock | 23 +++ Cargo.toml | 3 + key-manager/Cargo.toml | 16 ++ key-manager/src/lib.rs | 344 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 386 insertions(+) create mode 100644 key-manager/Cargo.toml create mode 100644 key-manager/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index ed93f9366f..3a5de616ff 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3421,6 +3421,20 @@ dependencies = [ "cpufeatures", ] +[[package]] +name = "key-manager" +version = "0.1.0" +dependencies = [ + "async-trait", + "hkdf", + "secrecy", + "sha3", + "sled-hardware", + "thiserror", + "tokio", + "zeroize", +] + [[package]] name = "kstat-macro" version = "0.1.0" @@ -6670,6 +6684,15 @@ dependencies = [ "zeroize", ] +[[package]] +name = "secrecy" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9bd1c54ea06cfd2f6b63219704de0b9b4f72dcc2b8fdef820be6cd799780e91e" +dependencies = [ + "zeroize", +] + [[package]] name = "security-framework" version = "2.8.2" diff --git a/Cargo.toml b/Cargo.toml index a37172286f..86ea8ec841 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,6 +22,7 @@ members = [ "installinator-common", "internal-dns", "ipcc-key-value", + "key-manager", "nexus", "nexus-client", "nexus/authz-macros", @@ -78,6 +79,7 @@ default-members = [ "installinator-common", "internal-dns", "ipcc-key-value", + "key-manager", "nexus", "nexus-client", "nexus/authz-macros", @@ -262,6 +264,7 @@ rustfmt-wrapper = "0.2" rustls = "0.21.0" samael = { git = "https://github.com/njaremko/samael", features = ["xmlsec"], branch = "master" } schemars = "0.8.12" +secrecy = "0.8.0" semver = { version = "1.0.17", features = ["std", "serde"] } serde = { version = "1.0", default-features = false, features = [ "derive" ] } serde_derive = "1.0" diff --git a/key-manager/Cargo.toml b/key-manager/Cargo.toml new file mode 100644 index 0000000000..53c164d064 --- /dev/null +++ b/key-manager/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "key-manager" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[dependencies] +async-trait.workspace = true +hkdf = "0.12.3" +secrecy.workspace = true +sha3.workspace = true +sled-hardware.workspace = true +thiserror.workspace = true +tokio.workspace = true +zeroize.workspace = true + diff --git a/key-manager/src/lib.rs b/key-manager/src/lib.rs new file mode 100644 index 0000000000..1f39bf1322 --- /dev/null +++ b/key-manager/src/lib.rs @@ -0,0 +1,344 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! A crate used to derive keys useful for the Oxide control plane + +use std::collections::BTreeMap; + +use async_trait::async_trait; +use hkdf::Hkdf; +use secrecy::{ExposeSecret, Secret}; +use sha3::Sha3_256; +use sled_hardware::DiskIdentity; +use zeroize::{Zeroize, ZeroizeOnDrop}; + +/// Input Key Material +/// +/// This should never be used directly, and always wrapped in a `Secret` +/// upon construction. We sparate the two types, because a `Secret` must contain +/// `Zeroizable` data, and a `Box<[u8; 32]>` is not zeroizable on its own. +#[derive(Zeroize, ZeroizeOnDrop)] +pub struct Ikm(pub Box<[u8; 32]>); + +/// Secret Input Key Material for a given rack reconfiguration epoch +pub struct VersionedIkm { + pub epoch: u64, + pub salt: [u8; 32], + pub ikm: Secret, +} + +impl VersionedIkm { + pub fn new(epoch: u64, salt: [u8; 32], data: &[u8; 32]) -> VersionedIkm { + let ikm = Secret::new(Ikm(Box::new(*data))); + VersionedIkm { epoch, salt, ikm } + } +} + +/// An error returned by the [`KeyManager`] +#[derive(thiserror::Error, Debug)] +pub enum Error { + #[error("Secret not loaded for {epoch}")] + SecretNotLoaded { epoch: u64 }, + + #[error("Failed to retreive secret: {0}")] + SecretRetreival(#[from] SecretRetrieverError), +} + +/// Derived Disk Encryption key +#[derive(Zeroize, ZeroizeOnDrop, Default)] +pub struct Aes256GcmDiskEncryptionKey(pub Box<[u8; 32]>); + +pub struct VersionedAes256GcmDiskEncryptionKey { + pub epoch: u64, + pub key: Secret, +} + +/// The main mechanism used to derive keys from a shared secret for the Oxide +/// control plane. +pub struct KeyManager { + // A mechanism for retrieving input key material + secret_retriever: S, + + /// Pseudo-Random-Keys (PRKs) wrapped in an `Hkdf` structure so that we can + /// create keys from them with `HKDF-Expand` (`prk.expand_multi_info`). + /// + /// In the common case, we will only have a single PRK for everything. + /// + /// If there is an ongoing reconfiguration, we will have at least 2 PRKs. In + /// some cases of failure while multiple reconfigurations have taken place, + /// we may have multiple PRKs. Policy should limit the number of allowed + /// reconfigurations before a node is no longer part of the trust quorum. + /// This should most likely be a small number like `3`. + prks: BTreeMap>, +} + +impl KeyManager { + pub fn new(secret_retriever: S) -> KeyManager { + KeyManager { secret_retriever, prks: BTreeMap::new() } + } + + /// Load latest version of the input key material into the key manager. + pub async fn load_latest_secret(&mut self) -> Result<(), Error> { + let ikm = self.secret_retriever.get_latest().await?; + self.insert_prk(ikm); + Ok(()) + } + + /// Load input key material for the given epoch into the key manager. + pub async fn load_secret(&mut self, epoch: u64) -> Result<(), Error> { + match self.secret_retriever.get(epoch).await? { + SecretState::Current(ikm) => self.insert_prk(ikm), + SecretState::Reconfiguration { old, new } => { + self.insert_prk(old); + self.insert_prk(new); + } + } + Ok(()) + } + + /// Derive an encryption key for the given [`sled_hardware::DiskIdentity`] + pub async fn disk_encryption_key( + &mut self, + epoch: u64, + disk_id: &DiskIdentity, + ) -> Result { + let prk = if let Some(prk) = self.prks.get(&epoch) { + prk + } else { + self.load_secret(epoch).await?; + self.prks.get(&epoch).unwrap() + }; + + let mut key = Aes256GcmDiskEncryptionKey::default(); + + // Unwrap is safe because we know our buffer is large enough to hold the output key + prk.expand_multi_info( + &[ + b"U.2-zfs-", + disk_id.vendor.as_bytes(), + disk_id.model.as_bytes(), + disk_id.serial.as_bytes(), + ], + key.0.as_mut(), + ) + .unwrap(); + + Ok(VersionedAes256GcmDiskEncryptionKey { epoch, key: Secret::new(key) }) + } + + /// Return the epochs for all secrets which are loaded + pub fn loaded_secrets(&self) -> Vec { + self.prks.keys().copied().collect() + } + + /// Clear the PRKs + /// + /// TODO(AJS): From what I can tell, the internal PRKs inside + /// Hkdf instances are not zeroized, and so will remain in memory + /// after drop. We should fix this one way or another. + pub fn clear(&mut self) { + self.prks = BTreeMap::new(); + } + + fn insert_prk(&mut self, ikm: VersionedIkm) { + let prk = + Hkdf::new(Some(&ikm.salt), ikm.ikm.expose_secret().0.as_ref()); + self.prks.insert(ikm.epoch, prk); + } +} + +/// The current state returned from a [`SecretRetriever`] +pub enum SecretState { + /// A reconfiguration is not ongoing + Current(VersionedIkm), + + /// A reconfiguration is ongoing + Reconfiguration { old: VersionedIkm, new: VersionedIkm }, +} + +#[derive(thiserror::Error, Debug)] +pub enum SecretRetrieverError { + #[error("Secret does not exist for {0}")] + NoSuchEpoch(u64), +} + +/// A mechanism for retrieving a secrets to use as input key material to HKDF- +/// Extract. +#[async_trait] +pub trait SecretRetriever { + /// Return the latest secret + //// + /// This is useful when a new entity is being encrypted and there is no need + /// for a reconfiguration. When an entity is already encrypted, and needs to + /// be decrypted, the user should instead call the [`SecretRetriever::get`]. + async fn get_latest(&self) -> Result; + + /// Get the secret for the given epoch + /// + /// If the requested epoch is not the latest one, then return the secret for + /// the latest epoch, along with the secret for the requested epoch so that + /// the key can be rotated. Note that it is not necessary for the latest + /// epoch to be exactly 1 epoch greater than the requested epoch. Multiple + /// epochs can pass, without a reconfiguration taking place due to a node + /// being temporarily offline. + /// + /// Return an error if its not possible to recover the old secret given the + /// latest secret. + /// + /// TODO(AJS): Ensure that we store the epoch of the actual key protecting + /// data in a ZFS property for each drive. This will allow us to retrieve + /// the correct keys when needed. + async fn get( + &self, + epoch: u64, + ) -> Result; +} + +#[cfg(test)] +mod tests { + use super::*; + use std::collections::BTreeMap; + + pub struct TestSecretRetriever { + ikms: BTreeMap, + } + + impl TestSecretRetriever { + pub fn new() -> TestSecretRetriever { + TestSecretRetriever { ikms: BTreeMap::from([(0, [0u8; 32])]) } + } + + pub fn insert(&mut self, epoch: u64, bytes: [u8; 32]) { + self.ikms.insert(epoch, bytes); + } + } + + #[async_trait] + impl SecretRetriever for TestSecretRetriever { + async fn get_latest( + &self, + ) -> Result { + let salt = [0u8; 32]; + let (epoch, bytes) = self.ikms.last_key_value().unwrap(); + Ok(VersionedIkm::new(*epoch, salt, bytes)) + } + + async fn get( + &self, + epoch: u64, + ) -> Result { + let salt = [0u8; 32]; + let bytes = self + .ikms + .get(&epoch) + .ok_or(SecretRetrieverError::NoSuchEpoch(epoch))?; + let ikm = VersionedIkm::new(epoch, salt, bytes); + let latest = self.get_latest().await.unwrap(); + if ikm.epoch != latest.epoch { + Ok(SecretState::Reconfiguration { old: ikm, new: latest }) + } else { + Ok(SecretState::Current(ikm)) + } + } + } + + #[tokio::test] + async fn disk_encryption_key_epoch_0() { + let mut km = KeyManager::new(TestSecretRetriever::new()); + km.load_latest_secret().await.unwrap(); + let disk_id = DiskIdentity { + vendor: "a".to_string(), + model: "b".to_string(), + serial: "c".to_string(), + }; + let epoch = 0; + let key = km.disk_encryption_key(epoch, &disk_id).await.unwrap(); + assert_eq!(key.epoch, epoch); + + // Key derivation is deterministic based on disk_id and loaded secrets + let key2 = km.disk_encryption_key(epoch, &disk_id).await.unwrap(); + assert_eq!(key.epoch, key2.epoch); + assert_eq!(key.key.expose_secret().0, key2.key.expose_secret().0); + + // There is no secret for epoch 1 + let epoch = 1; + assert!(km.disk_encryption_key(epoch, &disk_id).await.is_err()); + assert_eq!(vec![0], km.loaded_secrets()); + } + + #[tokio::test] + async fn different_disks_produce_different_keys() { + let mut km = KeyManager::new(TestSecretRetriever::new()); + km.load_latest_secret().await.unwrap(); + let id_1 = DiskIdentity { + vendor: "a".to_string(), + model: "b".to_string(), + serial: "c".to_string(), + }; + let id_2 = DiskIdentity { + vendor: "a".to_string(), + model: "b".to_string(), + serial: "d".to_string(), + }; + + let epoch = 0; + let key1 = km.disk_encryption_key(epoch, &id_1).await.unwrap(); + let key2 = km.disk_encryption_key(epoch, &id_2).await.unwrap(); + assert_eq!(key1.epoch, epoch); + assert_eq!(key2.epoch, epoch); + assert_ne!(key1.key.expose_secret().0, key2.key.expose_secret().0); + } + + #[tokio::test] + async fn different_ikm_produces_different_keys() { + let mut retriever = TestSecretRetriever::new(); + + // Load a distinct secret (IKM) for epoch 1 + retriever.insert(1, [1u8; 32]); + + let mut km = KeyManager::new(retriever); + km.load_latest_secret().await.unwrap(); + let disk_id = DiskIdentity { + vendor: "a".to_string(), + model: "b".to_string(), + serial: "c".to_string(), + }; + let epoch = 0; + let key0 = km.disk_encryption_key(epoch, &disk_id).await.unwrap(); + + let epoch = 1; + let key1 = km.disk_encryption_key(epoch, &disk_id).await.unwrap(); + assert_ne!(key0.key.expose_secret().0, key1.key.expose_secret().0); + } + + #[tokio::test] + async fn loading_key_for_old_epoch_loads_latest_epoch() { + let mut retriever = TestSecretRetriever::new(); + + // Load a distinct secret (IKM) for epoch 1 + retriever.insert(1, [1u8; 32]); + + let mut km = KeyManager::new(retriever); + + let disk_id = DiskIdentity { + vendor: "a".to_string(), + model: "b".to_string(), + serial: "c".to_string(), + }; + let epoch = 0; + + assert_eq!(0, km.loaded_secrets().len()); + + // This will load secrets if they are not already loaded. + // Note that we never called `km.load_latest_secret()` + let _ = km.disk_encryption_key(epoch, &disk_id).await.unwrap(); + assert_eq!(2, km.loaded_secrets().len()); + + // Loading just the latest secret will not load any other secrets + km.clear(); + let epoch = 1; + let _ = km.disk_encryption_key(epoch, &disk_id).await.unwrap(); + assert_eq!(1, km.loaded_secrets().len()); + } +} From 75d92cf2095a16ce622234f5c06c16d232e4a712 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Wed, 3 May 2023 18:55:29 +0000 Subject: [PATCH 2/5] Review Fixes --- key-manager/src/lib.rs | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/key-manager/src/lib.rs b/key-manager/src/lib.rs index 1f39bf1322..41f4152c8d 100644 --- a/key-manager/src/lib.rs +++ b/key-manager/src/lib.rs @@ -60,10 +60,12 @@ pub struct KeyManager { // A mechanism for retrieving input key material secret_retriever: S, - /// Pseudo-Random-Keys (PRKs) wrapped in an `Hkdf` structure so that we can - /// create keys from them with `HKDF-Expand` (`prk.expand_multi_info`). + /// Rack configuration epochs mapped to Pseudo-Random-Keys (PRKs) wrapped + /// in an `Hkdf` structure so that we can create keys from them with `HKDF- + /// Expand` (`prk.expand_multi_info`). /// - /// In the common case, we will only have a single PRK for everything. + /// In the common case, we will only have a single PRK for everything, mapped + /// from the latest epoch. /// /// If there is an ongoing reconfiguration, we will have at least 2 PRKs. In /// some cases of failure while multiple reconfigurations have taken place, @@ -128,7 +130,7 @@ impl KeyManager { } /// Return the epochs for all secrets which are loaded - pub fn loaded_secrets(&self) -> Vec { + pub fn loaded_epochs(&self) -> Vec { self.prks.keys().copied().collect() } @@ -264,7 +266,7 @@ mod tests { // There is no secret for epoch 1 let epoch = 1; assert!(km.disk_encryption_key(epoch, &disk_id).await.is_err()); - assert_eq!(vec![0], km.loaded_secrets()); + assert_eq!(vec![0], km.loaded_epochs()); } #[tokio::test] @@ -328,17 +330,17 @@ mod tests { }; let epoch = 0; - assert_eq!(0, km.loaded_secrets().len()); + assert_eq!(0, km.loaded_epochs().len()); // This will load secrets if they are not already loaded. // Note that we never called `km.load_latest_secret()` let _ = km.disk_encryption_key(epoch, &disk_id).await.unwrap(); - assert_eq!(2, km.loaded_secrets().len()); + assert_eq!(2, km.loaded_epochs().len()); // Loading just the latest secret will not load any other secrets km.clear(); let epoch = 1; let _ = km.disk_encryption_key(epoch, &disk_id).await.unwrap(); - assert_eq!(1, km.loaded_secrets().len()); + assert_eq!(1, km.loaded_epochs().len()); } } From 6abfb00eab09d091fd0ddb659d2626c2dcea4294 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Wed, 3 May 2023 19:19:13 +0000 Subject: [PATCH 3/5] impl Debug for Ikm --- key-manager/src/lib.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/key-manager/src/lib.rs b/key-manager/src/lib.rs index 41f4152c8d..e675d826e5 100644 --- a/key-manager/src/lib.rs +++ b/key-manager/src/lib.rs @@ -5,6 +5,7 @@ //! A crate used to derive keys useful for the Oxide control plane use std::collections::BTreeMap; +use std::fmt::Debug; use async_trait::async_trait; use hkdf::Hkdf; @@ -21,6 +22,14 @@ use zeroize::{Zeroize, ZeroizeOnDrop}; #[derive(Zeroize, ZeroizeOnDrop)] pub struct Ikm(pub Box<[u8; 32]>); +/// We impl Debug here only to guarantee nobody re-implements this to actually +/// expose the secret. +impl Debug for Ikm { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Ikm(secret)") + } +} + /// Secret Input Key Material for a given rack reconfiguration epoch pub struct VersionedIkm { pub epoch: u64, From 79cf577eda5264af8622c8e48299c706b2a89a8e Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Wed, 3 May 2023 20:07:28 +0000 Subject: [PATCH 4/5] Make Ikm and Aes256GcmDiskEncryptionKey private --- key-manager/src/lib.rs | 54 ++++++++++++++++++++++++++---------------- 1 file changed, 34 insertions(+), 20 deletions(-) diff --git a/key-manager/src/lib.rs b/key-manager/src/lib.rs index e675d826e5..2f46ce378d 100644 --- a/key-manager/src/lib.rs +++ b/key-manager/src/lib.rs @@ -20,21 +20,13 @@ use zeroize::{Zeroize, ZeroizeOnDrop}; /// upon construction. We sparate the two types, because a `Secret` must contain /// `Zeroizable` data, and a `Box<[u8; 32]>` is not zeroizable on its own. #[derive(Zeroize, ZeroizeOnDrop)] -pub struct Ikm(pub Box<[u8; 32]>); - -/// We impl Debug here only to guarantee nobody re-implements this to actually -/// expose the secret. -impl Debug for Ikm { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "Ikm(secret)") - } -} +struct Ikm(Box<[u8; 32]>); /// Secret Input Key Material for a given rack reconfiguration epoch pub struct VersionedIkm { - pub epoch: u64, - pub salt: [u8; 32], - pub ikm: Secret, + epoch: u64, + salt: [u8; 32], + ikm: Secret, } impl VersionedIkm { @@ -42,6 +34,18 @@ impl VersionedIkm { let ikm = Secret::new(Ikm(Box::new(*data))); VersionedIkm { epoch, salt, ikm } } + + pub fn epoch(&self) -> u64 { + self.epoch + } + + pub fn salt(&self) -> &[u8; 32] { + &self.salt + } + + pub fn expose_secret(&self) -> &[u8; 32] { + &self.ikm.expose_secret().0 + } } /// An error returned by the [`KeyManager`] @@ -56,11 +60,21 @@ pub enum Error { /// Derived Disk Encryption key #[derive(Zeroize, ZeroizeOnDrop, Default)] -pub struct Aes256GcmDiskEncryptionKey(pub Box<[u8; 32]>); +struct Aes256GcmDiskEncryptionKey(Box<[u8; 32]>); pub struct VersionedAes256GcmDiskEncryptionKey { - pub epoch: u64, - pub key: Secret, + epoch: u64, + key: Secret, +} + +impl VersionedAes256GcmDiskEncryptionKey { + pub fn epoch(&self) -> u64 { + self.epoch + } + + pub fn expose_secret(&self) -> &[u8; 32] { + &self.key.expose_secret().0 + } } /// The main mechanism used to derive keys from a shared secret for the Oxide @@ -246,7 +260,7 @@ mod tests { .ok_or(SecretRetrieverError::NoSuchEpoch(epoch))?; let ikm = VersionedIkm::new(epoch, salt, bytes); let latest = self.get_latest().await.unwrap(); - if ikm.epoch != latest.epoch { + if ikm.epoch() != latest.epoch() { Ok(SecretState::Reconfiguration { old: ikm, new: latest }) } else { Ok(SecretState::Current(ikm)) @@ -269,8 +283,8 @@ mod tests { // Key derivation is deterministic based on disk_id and loaded secrets let key2 = km.disk_encryption_key(epoch, &disk_id).await.unwrap(); - assert_eq!(key.epoch, key2.epoch); - assert_eq!(key.key.expose_secret().0, key2.key.expose_secret().0); + assert_eq!(key.epoch(), key2.epoch()); + assert_eq!(key.expose_secret(), key2.expose_secret()); // There is no secret for epoch 1 let epoch = 1; @@ -298,7 +312,7 @@ mod tests { let key2 = km.disk_encryption_key(epoch, &id_2).await.unwrap(); assert_eq!(key1.epoch, epoch); assert_eq!(key2.epoch, epoch); - assert_ne!(key1.key.expose_secret().0, key2.key.expose_secret().0); + assert_ne!(key1.expose_secret(), key2.expose_secret()); } #[tokio::test] @@ -320,7 +334,7 @@ mod tests { let epoch = 1; let key1 = km.disk_encryption_key(epoch, &disk_id).await.unwrap(); - assert_ne!(key0.key.expose_secret().0, key1.key.expose_secret().0); + assert_ne!(key0.expose_secret(), key1.expose_secret()); } #[tokio::test] From 8b9d7751cd8441799676163e9607bf11ba099c18 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Wed, 3 May 2023 20:19:15 +0000 Subject: [PATCH 5/5] Clean up usage of VersionedIkm --- key-manager/src/lib.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/key-manager/src/lib.rs b/key-manager/src/lib.rs index 2f46ce378d..614ea46108 100644 --- a/key-manager/src/lib.rs +++ b/key-manager/src/lib.rs @@ -167,9 +167,8 @@ impl KeyManager { } fn insert_prk(&mut self, ikm: VersionedIkm) { - let prk = - Hkdf::new(Some(&ikm.salt), ikm.ikm.expose_secret().0.as_ref()); - self.prks.insert(ikm.epoch, prk); + let prk = Hkdf::new(Some(ikm.salt()), ikm.expose_secret()); + self.prks.insert(ikm.epoch(), prk); } }