From 6fa27a335a58181262e85824922bdf0dc68cfc9b Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 10 Nov 2022 15:11:47 -0500 Subject: [PATCH] [sled-agent] Monitor for Tofino driver as factor in 'are we scrimlet' decision (#1918) - Adds `hardware` module to Sled Agent for minimally monitoring `devinfo` output. This will certainly evolve, but this PR includes a "bare minimum" real example of tracking the tofino driver. - Stop relying on `rss-config.toml` to decide if we're running on a scrimlet. Instead... - (General case) Rely on monitoring hardware - (Testing, manual case) Provide a `force-scrimlet` option to make the sled agent assume that it is a scrimlet Fixes https://github.com/oxidecomputer/minimum-upgradable-product/issues/19 Part of https://github.com/oxidecomputer/omicron/issues/1917 Part of https://github.com/oxidecomputer/omicron/issues/823 Pre-requisite for https://github.com/oxidecomputer/minimum-upgradable-product/issues/16 Pre-requisite for https://github.com/oxidecomputer/minimum-upgradable-product/issues/18 --- Cargo.lock | 11 + sled-agent/Cargo.toml | 1 + sled-agent/src/bootstrap/agent.rs | 30 +-- sled-agent/src/bootstrap/server.rs | 2 +- sled-agent/src/config.rs | 3 + sled-agent/src/hardware/illumos/mod.rs | 228 +++++++++++++++++++++ sled-agent/src/hardware/mod.rs | 28 +++ sled-agent/src/hardware/non_illumos/mod.rs | 37 ++++ sled-agent/src/lib.rs | 1 + sled-agent/src/nexus.rs | 43 ++++ sled-agent/src/server.rs | 61 +----- sled-agent/src/sled_agent.rs | 219 ++++++++++++++++++-- smf/sled-agent/config.toml | 9 + 13 files changed, 576 insertions(+), 97 deletions(-) create mode 100644 sled-agent/src/hardware/illumos/mod.rs create mode 100644 sled-agent/src/hardware/mod.rs create mode 100644 sled-agent/src/hardware/non_illumos/mod.rs diff --git a/Cargo.lock b/Cargo.lock index b4d9873d73..ccfd503648 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2450,6 +2450,16 @@ dependencies = [ "unicode-normalization", ] +[[package]] +name = "illumos-devinfo" +version = "0.1.0" +source = "git+https://github.com/oxidecomputer/illumos-devinfo?rev=8fca0709e5137a3758374cb41ab1bfc60b03e6a9#8fca0709e5137a3758374cb41ab1bfc60b03e6a9" +dependencies = [ + "anyhow", + "libc", + "num_enum", +] + [[package]] name = "illumos-sys-hdrs" version = "0.1.0" @@ -3455,6 +3465,7 @@ dependencies = [ "expectorate", "futures", "http", + "illumos-devinfo", "internal-dns-client", "ipnetwork", "libc", diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index c0fd3cc29c..44691d9bb7 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -59,6 +59,7 @@ vsss-rs = { version = "2.0.0", default-features = false, features = ["std"] } zone = "0.1" [target.'cfg(target_os = "illumos")'.dependencies] +illumos-devinfo = { git = "https://github.com/oxidecomputer/illumos-devinfo", rev = "8fca0709e5137a3758374cb41ab1bfc60b03e6a9" } opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "23fdf5856f10f23e2d26865d2d7e2d3bc537bca3" } [dev-dependencies] diff --git a/sled-agent/src/bootstrap/agent.rs b/sled-agent/src/bootstrap/agent.rs index b5b964edb0..252b3789d5 100644 --- a/sled-agent/src/bootstrap/agent.rs +++ b/sled-agent/src/bootstrap/agent.rs @@ -272,19 +272,11 @@ impl Agent { *self.share.lock().await = Some(share); } - // TODO(https://github.com/oxidecomputer/omicron/issues/823): - // Currently, the prescence or abscence of RSS is our signal - // for "is this a scrimlet or not". - // Longer-term, we should make this call based on the underlying - // hardware. - let is_scrimlet = self.rss.lock().await.is_some(); - // Server does not exist, initialize it. let server = SledServer::start( &self.sled_config, self.parent_log.clone(), sled_address, - is_scrimlet, request.clone(), ) .await @@ -464,9 +456,13 @@ impl Agent { Ok(rack_secret) } - // Initializes the Rack Setup Service. - async fn start_rss(&self, config: &Config) -> Result<(), BootstrapError> { + /// Initializes the Rack Setup Service, if requested by `config`. + pub async fn start_rss( + &self, + config: &Config, + ) -> Result<(), BootstrapError> { if let Some(rss_config) = &config.rss_config { + info!(&self.log, "bootstrap service initializing RSS"); let rss = RssHandle::start_rss( &self.parent_log, rss_config.clone(), @@ -484,20 +480,6 @@ impl Agent { } Ok(()) } - - /// Performs device initialization: - /// - /// - Verifies, unpacks, and launches other services. - pub async fn initialize( - &self, - config: &Config, - ) -> Result<(), BootstrapError> { - info!(&self.log, "bootstrap service initializing"); - - self.start_rss(config).await?; - - Ok(()) - } } // We intentionally DO NOT derive `Debug` or `Serialize`; both provide avenues diff --git a/sled-agent/src/bootstrap/server.rs b/sled-agent/src/bootstrap/server.rs index 35193f6c33..d79d82e47b 100644 --- a/sled-agent/src/bootstrap/server.rs +++ b/sled-agent/src/bootstrap/server.rs @@ -113,7 +113,7 @@ impl Server { // This ordering allows the bootstrap agent to communicate with // other bootstrap agents on the rack during the initialization // process. - if let Err(e) = server.bootstrap_agent.initialize(&config).await { + if let Err(e) = server.bootstrap_agent.start_rss(&config).await { server.inner.abort(); return Err(e.to_string()); } diff --git a/sled-agent/src/config.rs b/sled-agent/src/config.rs index e1042f3b4b..7986b3b647 100644 --- a/sled-agent/src/config.rs +++ b/sled-agent/src/config.rs @@ -19,6 +19,9 @@ pub struct Config { pub id: Uuid, /// Configuration for the sled agent debug log pub log: ConfigLogging, + /// Optionally force the sled to self-identify as a scrimlet (or gimlet, + /// if set to false). + pub stub_scrimlet: Option, /// Optional VLAN ID to be used for tagging guest VNICs. pub vlan: Option, /// Optional list of zpools to be used as "discovered disks". diff --git a/sled-agent/src/hardware/illumos/mod.rs b/sled-agent/src/hardware/illumos/mod.rs new file mode 100644 index 0000000000..e9a54cfc77 --- /dev/null +++ b/sled-agent/src/hardware/illumos/mod.rs @@ -0,0 +1,228 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use illumos_devinfo::DevInfo; +use slog::Logger; +use std::sync::Arc; +use std::sync::Mutex; +use tokio::sync::broadcast; +use tokio::task::JoinHandle; + +// A snapshot of information about the underlying Tofino device +struct TofinoSnapshot { + exists: bool, + driver_loaded: bool, +} + +impl TofinoSnapshot { + fn new() -> Self { + Self { exists: false, driver_loaded: false } + } +} + +// A snapshot of information about the underlying hardware +struct HardwareSnapshot { + tofino: TofinoSnapshot, +} + +impl HardwareSnapshot { + fn new() -> Self { + Self { tofino: TofinoSnapshot::new() } + } +} + +// Describes a view of the Tofino switch. +enum TofinoView { + // The view of the Tofino switch exactly matches the snapshot of hardware. + Real(TofinoSnapshot), + // The Tofino switch has been "stubbed out", and the underlying hardware is + // being ignored. + Stub { active: bool }, +} + +// A cached copy of "our latest view of what hardware exists". +// +// This struct can be expanded arbitrarily, as it's useful for the Sled Agent +// to perceive hardware. +// +// Q: Why bother caching this information at all? Why not rely on devinfo for +// all queries? +// A: By keeping an in-memory representation, we can "diff" with the information +// reported from libdevinfo to decide when to send notifications and change +// which services are currently executing. +struct HardwareView { + tofino: TofinoView, + // TODO: Add U.2s, M.2s, other devices. +} + +impl HardwareView { + fn new() -> Self { + Self { tofino: TofinoView::Real(TofinoSnapshot::new()) } + } + + fn new_stub_tofino(active: bool) -> Self { + Self { tofino: TofinoView::Stub { active } } + } +} + +const TOFINO_SUBSYSTEM_VID: i32 = 0x1d1c; +const TOFINO_SUBSYSTEM_ID: i32 = 0x100; + +fn node_name(subsystem_vid: i32, subsystem_id: i32) -> String { + format!("pci{subsystem_vid:x},{subsystem_id:x}") +} + +// Performs a single walk of the device info tree, updating our view of hardware +// and sending notifications to any subscribers. +fn poll_device_tree( + log: &Logger, + inner: &Arc>, + tx: &broadcast::Sender, +) -> Result<(), String> { + // Construct a view of hardware by walking the device tree. + let mut device_info = DevInfo::new().map_err(|e| e.to_string())?; + let mut node_walker = device_info.walk_node(); + let mut polled_hw = HardwareSnapshot::new(); + while let Some(node) = + node_walker.next().transpose().map_err(|e| e.to_string())? + { + if node.node_name() + == node_name(TOFINO_SUBSYSTEM_VID, TOFINO_SUBSYSTEM_ID) + { + polled_hw.tofino.exists = true; + polled_hw.tofino.driver_loaded = + node.driver_name().as_deref() == Some("tofino"); + } + } + + // After inspecting the device tree, diff with the old view, and provide + // necessary updates. + let mut updates = vec![]; + { + let mut inner = inner.lock().unwrap(); + match inner.tofino { + TofinoView::Real(TofinoSnapshot { driver_loaded, exists }) => { + use super::HardwareUpdate::*; + // Identify if the Tofino device changed power states. + if exists != polled_hw.tofino.exists { + updates.push(TofinoDeviceChange); + } + + // Identify if the Tofino driver was recently loaded/unloaded. + match (driver_loaded, polled_hw.tofino.driver_loaded) { + (false, true) => updates.push(TofinoLoaded), + (true, false) => updates.push(TofinoUnloaded), + _ => (), + }; + + // Update our view of the underlying hardware + inner.tofino = TofinoView::Real(polled_hw.tofino); + } + TofinoView::Stub { .. } => (), + } + }; + + for update in updates.into_iter() { + info!(log, "Update from polling device tree: {:?}", update); + let _ = tx.send(update); + } + + Ok(()) +} + +async fn hardware_tracking_task( + log: Logger, + inner: Arc>, + tx: broadcast::Sender, +) { + loop { + if let Err(err) = poll_device_tree(&log, &inner, &tx) { + warn!(log, "Failed to query device tree: {err}"); + } + tokio::time::sleep(tokio::time::Duration::from_secs(5)).await; + } +} + +/// A representation of the underlying hardware. +/// +/// This structure provides interfaces for both querying and for receiving new +/// events. +pub struct HardwareManager { + log: Logger, + inner: Arc>, + tx: broadcast::Sender, + _worker: JoinHandle<()>, +} + +impl HardwareManager { + /// Creates a new representation of the underlying hardware, and initializes + /// a task which periodically updates that representation. + /// + /// Arguments: + /// - `stub_scrimlet`: Identifies if we should ignore the attached Tofino + /// device, and assume the device is a scrimlet (true) or gimlet (false). + /// If this argument is not supplied, we assume the device is a gimlet until + /// device scanning informs us otherwise. + pub fn new( + log: Logger, + stub_scrimlet: Option, + ) -> Result { + let log = log.new(o!("component" => "HardwareManager")); + + // The size of the broadcast channel is arbitrary, but bounded. + // If the channel fills up, old notifications will be dropped, and the + // receiver will receive a tokio::sync::broadcast::error::RecvError::Lagged + // error, indicating they should re-scan the hardware themselves. + let (tx, _) = broadcast::channel(1024); + let hw = match stub_scrimlet { + None => HardwareView::new(), + Some(active) => HardwareView::new_stub_tofino(active), + }; + let inner = Arc::new(Mutex::new(hw)); + + // Force the device tree to be polled at least once before returning. + // This mitigates issues where the Sled Agent could try to propagate + // an "empty" view of hardware to other consumers before the first + // query. + poll_device_tree(&log, &inner, &tx) + .map_err(|err| format!("Failed to poll device tree: {err}"))?; + + let log2 = log.clone(); + let inner2 = inner.clone(); + let tx2 = tx.clone(); + let _worker = tokio::task::spawn(async move { + hardware_tracking_task(log2, inner2, tx2).await + }); + + Ok(Self { log, inner, tx, _worker }) + } + + pub fn is_scrimlet(&self) -> bool { + let inner = self.inner.lock().unwrap(); + match inner.tofino { + TofinoView::Real(TofinoSnapshot { exists, .. }) => exists, + TofinoView::Stub { active } => active, + } + } + + pub fn is_scrimlet_driver_loaded(&self) -> bool { + let inner = self.inner.lock().unwrap(); + match inner.tofino { + TofinoView::Real(TofinoSnapshot { driver_loaded, .. }) => { + driver_loaded + } + TofinoView::Stub { active } => active, + } + } + + pub fn monitor(&self) -> broadcast::Receiver { + info!(self.log, "Monitoring for hardware updates"); + self.tx.subscribe() + // TODO: Do we want to send initial messages, based on the existing + // state? Or should we leave this responsibility to the caller, to + // start monitoring, and then query for the initial state? + // + // This could simplify the `SledAgent::monitor` function? + } +} diff --git a/sled-agent/src/hardware/mod.rs b/sled-agent/src/hardware/mod.rs new file mode 100644 index 0000000000..b678087a8e --- /dev/null +++ b/sled-agent/src/hardware/mod.rs @@ -0,0 +1,28 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +cfg_if::cfg_if! { + if #[cfg(target_os = "illumos")] { + mod illumos; + pub(crate) use illumos::*; + } else { + mod non_illumos; + pub(crate) use non_illumos::*; + } +} + +/// Provides information from the underlying hardware about updates +/// which may require action on behalf of the Sled Agent. +/// +/// These updates should generally be "non-opinionated" - the higher +/// layers of the sled agent can make the call to ignore these updates +/// or not. +#[derive(Clone, Debug)] +#[allow(dead_code)] +pub enum HardwareUpdate { + TofinoDeviceChange, + TofinoLoaded, + TofinoUnloaded, + // TODO: Notify about disks being added / removed, etc. +} diff --git a/sled-agent/src/hardware/non_illumos/mod.rs b/sled-agent/src/hardware/non_illumos/mod.rs new file mode 100644 index 0000000000..18e89c4dba --- /dev/null +++ b/sled-agent/src/hardware/non_illumos/mod.rs @@ -0,0 +1,37 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use slog::Logger; +use tokio::sync::broadcast; + +/// An unimplemented, stub representation of the underlying hardware. +/// +/// This is intended for non-illumos systems to have roughly the same interface +/// as illumos systems - it allows compilation to "work" on non-illumos +/// platforms, which can be handy for editor support. +/// +/// If you're actually trying to run the Sled Agent on non-illumos platforms, +/// use the simulated sled agent, which does not attempt to abstract hardware. +pub struct HardwareManager {} + +impl HardwareManager { + pub fn new( + _log: Logger, + _stub_scrimlet: Option, + ) -> Result { + unimplemented!("Accessing hardware unsupported on non-illumos"); + } + + pub fn is_scrimlet(&self) -> bool { + unimplemented!("Accessing hardware unsupported on non-illumos"); + } + + pub fn is_scrimlet_driver_loaded(&self) -> bool { + unimplemented!("Accessing hardware unsupported on non-illumos"); + } + + pub fn monitor(&self) -> broadcast::Receiver { + unimplemented!("Accessing hardware unsupported on non-illumos"); + } +} diff --git a/sled-agent/src/lib.rs b/sled-agent/src/lib.rs index d36880b06d..73dcdca062 100644 --- a/sled-agent/src/lib.rs +++ b/sled-agent/src/lib.rs @@ -21,6 +21,7 @@ pub mod common; // Modules for the non-simulated sled agent. pub mod bootstrap; pub mod config; +mod hardware; mod http_entrypoints; pub mod illumos; mod instance; diff --git a/sled-agent/src/nexus.rs b/sled-agent/src/nexus.rs index 738371efd4..9f9206545a 100644 --- a/sled-agent/src/nexus.rs +++ b/sled-agent/src/nexus.rs @@ -13,8 +13,12 @@ use internal_dns_client::{ }; use omicron_common::address::NEXUS_INTERNAL_PORT; use slog::Logger; +use std::future::Future; use std::net::Ipv6Addr; +use std::pin::Pin; use std::sync::Arc; +use tokio::sync::mpsc; +use tokio::task::JoinHandle; struct Inner { log: Logger, @@ -59,3 +63,42 @@ impl LazyNexusClient { )) } } + +type NexusRequestFut = dyn Future + Send; +type NexusRequest = Pin>; + +/// A queue of futures which represent requests to Nexus. +pub struct NexusRequestQueue { + tx: mpsc::UnboundedSender, + _worker: JoinHandle<()>, +} + +impl NexusRequestQueue { + /// Creates a new request queue, along with a worker which executes + /// any incoming tasks. + pub fn new() -> Self { + // TODO(https://github.com/oxidecomputer/omicron/issues/1917): + // In the future, this should basically just be a wrapper around a + // generation number, and we shouldn't be serializing requests to Nexus. + // + // In the meanwhile, we're using an unbounded_channel for simplicity, so + // that we don't need to cope with dropped notifications / + // retransmissions. + let (tx, mut rx) = mpsc::unbounded_channel(); + + let _worker = tokio::spawn(async move { + while let Some(fut) = rx.recv().await { + fut.await; + } + }); + + Self { tx, _worker } + } + + /// Gets access to the sending portion of the request queue. + /// + /// Callers can use this to add their own requests. + pub fn sender(&self) -> &mpsc::UnboundedSender { + &self.tx + } +} diff --git a/sled-agent/src/server.rs b/sled-agent/src/server.rs index 4319f7019a..4aa9078890 100644 --- a/sled-agent/src/server.rs +++ b/sled-agent/src/server.rs @@ -9,9 +9,6 @@ use super::http_entrypoints::api as http_api; use super::sled_agent::SledAgent; use crate::bootstrap::params::SledAgentRequest; use crate::nexus::LazyNexusClient; -use omicron_common::backoff::{ - internal_service_policy_with_max, retry_notify, BackoffError, -}; use slog::Logger; use std::net::{SocketAddr, SocketAddrV6}; use uuid::Uuid; @@ -21,7 +18,6 @@ use uuid::Uuid; pub struct Server { /// Dropshot server for the API. http_server: dropshot::HttpServer, - _nexus_notifier_handle: tokio::task::JoinHandle<()>, } impl Server { @@ -38,7 +34,6 @@ impl Server { config: &Config, log: Logger, addr: SocketAddrV6, - is_scrimlet: bool, request: SledAgentRequest, ) -> Result { info!(log, "setting up sled agent server"); @@ -71,61 +66,7 @@ impl Server { .map_err(|error| format!("initializing server: {}", error))? .start(); - let sled_address = http_server.local_addr(); - let sled_id = config.id; - let nexus_notifier_handle = tokio::task::spawn(async move { - // Notify the control plane that we're up, and continue trying this - // until it succeeds. We retry with an randomized, capped exponential - // backoff. - // - // TODO-robustness if this returns a 400 error, we probably want to - // return a permanent error from the `notify_nexus` closure. - let notify_nexus = || async { - info!( - log, - "contacting server nexus, registering sled: {}", sled_id - ); - let role = if is_scrimlet { - nexus_client::types::SledRole::Scrimlet - } else { - nexus_client::types::SledRole::Gimlet - }; - - let nexus_client = lazy_nexus_client - .get() - .await - .map_err(|err| BackoffError::transient(err.to_string()))?; - nexus_client - .sled_agent_put( - &sled_id, - &nexus_client::types::SledAgentStartupInfo { - sa_address: sled_address.to_string(), - role, - }, - ) - .await - .map_err(|err| BackoffError::transient(err.to_string())) - }; - let log_notification_failure = |err, delay| { - warn!( - log, - "failed to notify nexus about sled agent: {}, will retry in {:?}", err, delay; - ); - }; - retry_notify( - internal_service_policy_with_max( - std::time::Duration::from_secs(1), - ), - notify_nexus, - log_notification_failure, - ) - .await - .expect("Expected an infinite retry loop contacting Nexus"); - }); - Ok(Server { - http_server, - _nexus_notifier_handle: nexus_notifier_handle, - }) + Ok(Server { http_server }) } /// Wait for the given server to shut down diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index c5e11a1f4b..4cc5d65ad7 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -6,6 +6,7 @@ use crate::bootstrap::params::SledAgentRequest; use crate::config::Config; +use crate::hardware::HardwareManager; use crate::illumos::vnic::VnicKind; use crate::illumos::zfs::{ Mountpoint, ZONE_ZFS_DATASET, ZONE_ZFS_DATASET_MOUNTPOINT, @@ -13,7 +14,7 @@ use crate::illumos::zfs::{ use crate::illumos::zone::IPADM; use crate::illumos::{execute, PFEXEC}; use crate::instance_manager::InstanceManager; -use crate::nexus::LazyNexusClient; +use crate::nexus::{LazyNexusClient, NexusRequestQueue}; use crate::params::{ DatasetKind, DiskStateRequested, InstanceHardware, InstanceMigrateParams, InstanceRuntimeStateRequested, InstanceSerialConsoleData, @@ -27,9 +28,13 @@ use omicron_common::api::{ internal::nexus::DiskRuntimeState, internal::nexus::InstanceRuntimeState, internal::nexus::UpdateArtifact, }; +use omicron_common::backoff::{ + internal_service_policy_with_max, retry_notify, BackoffError, +}; use slog::Logger; use std::net::SocketAddrV6; use std::process::Command; +use std::sync::Arc; use uuid::Uuid; use crucible_client_types::VolumeConstructionRequest; @@ -92,6 +97,9 @@ pub enum Error { #[error("Error managing guest networking: {0}")] Opte(#[from] crate::opte::Error), + #[error("Error monitoring hardware: {0}")] + Hardware(String), + #[error("Error resolving DNS name: {0}")] ResolveError(#[from] internal_dns_client::multiclient::ResolveError), } @@ -140,20 +148,35 @@ impl From for dropshot::HttpError { /// Describes an executing Sled Agent object. /// /// Contains both a connection to the Nexus, as well as managed instances. -pub struct SledAgent { +struct SledAgentInner { // ID of the Sled id: Uuid, + // Sled underlay address + addr: SocketAddrV6, + // Component of Sled Agent responsible for storage and dataset management. storage: StorageManager, // Component of Sled Agent responsible for managing Propolis instances. instances: InstanceManager, - lazy_nexus_client: LazyNexusClient, + // Component of Sled Agent responsible for monitoring hardware. + hardware: HardwareManager, // Other Oxide-controlled services running on this Sled. services: ServiceManager, + + // Lazily-acquired connection to Nexus. + lazy_nexus_client: LazyNexusClient, + + // A serialized request queue for operations interacting with Nexus. + nexus_request_queue: NexusRequestQueue, +} + +#[derive(Clone)] +pub struct SledAgent { + inner: Arc, } impl SledAgent { @@ -298,6 +321,11 @@ impl SledAgent { gateway_address: request.gateway.address, ..Default::default() }; + + let hardware = + HardwareManager::new(parent_log.clone(), config.stub_scrimlet) + .map_err(|e| Error::Hardware(e))?; + let services = ServiceManager::new( parent_log.clone(), etherstub.clone(), @@ -309,11 +337,170 @@ impl SledAgent { ) .await?; - Ok(SledAgent { id, storage, instances, lazy_nexus_client, services }) + let sled_agent = SledAgent { + inner: Arc::new(SledAgentInner { + id, + addr: sled_address, + storage, + instances, + hardware, + services, + lazy_nexus_client, + + // TODO(https://github.com/oxidecomputer/omicron/issues/1917): + // Propagate usage of this request queue throughout the Sled Agent. + // + // Also, we could maybe de-dup some of the backoff code in the request queue? + nexus_request_queue: NexusRequestQueue::new(), + }), + }; + + // We immediately add a notification to the request queue about our + // existence. If inspection of the hardware later informs us that we're + // actually running on a scrimlet, that's fine, the updated value will + // be received by Nexus eventually. + sled_agent.notify_nexus_about_self(&log); + + // Begin monitoring the underlying hardware, and reacting to changes. + let sa = sled_agent.clone(); + tokio::spawn(async move { + sa.monitor(log).await; + }); + + Ok(sled_agent) + } + + // Observe the current hardware state manually. + // + // We use this when we're monitoring hardware for the first + // time, and if we miss notifications. + async fn full_hardware_scan(&self, log: &Logger) { + info!(log, "Performing full hardware scan"); + self.notify_nexus_about_self(log); + if self.inner.hardware.is_scrimlet_driver_loaded() { + self.ensure_scrimlet_services_active(&log).await; + } else { + self.ensure_scrimlet_services_deactive(&log).await; + } + } + + async fn monitor(&self, log: Logger) { + // Start monitoring the hardware for changes + let mut hardware_updates = self.inner.hardware.monitor(); + + // Scan the system manually for events we have have missed + // before we started monitoring. + self.full_hardware_scan(&log).await; + + // Rely on monitoring for tracking all future updates. + loop { + use tokio::sync::broadcast::error::RecvError; + match hardware_updates.recv().await { + Ok(update) => match update { + crate::hardware::HardwareUpdate::TofinoDeviceChange => { + // Inform Nexus that we're now a scrimlet, instead of a Gimlet. + // + // This won't block on Nexus responding; it may take while before + // Nexus actually comes online. + self.notify_nexus_about_self(&log); + } + crate::hardware::HardwareUpdate::TofinoLoaded => { + self.ensure_scrimlet_services_active(&log).await; + } + crate::hardware::HardwareUpdate::TofinoUnloaded => { + self.ensure_scrimlet_services_deactive(&log).await; + } + }, + Err(RecvError::Lagged(count)) => { + warn!(log, "Hardware monitor missed {count} messages"); + self.full_hardware_scan(&log).await; + } + Err(RecvError::Closed) => { + warn!(log, "Hardware monitor receiver closed; exiting"); + return; + } + } + } + } + + async fn ensure_scrimlet_services_active(&self, log: &Logger) { + // TODO(https://github.com/oxidecomputer/omicron/issues/823): Launch the switch zone, with + // Dendrite, MGS, and any other services we want to enable. + warn!(log, "Activating scrimlet services not yet implemented"); + } + + async fn ensure_scrimlet_services_deactive(&self, log: &Logger) { + // TODO(https://github.com/oxidecomputer/omicron/issues/823): Terminate the switch zone. + warn!(log, "Deactivating scrimlet services not yet implemented"); } pub fn id(&self) -> Uuid { - self.id + self.inner.id + } + + // Sends a request to Nexus informing it that the current sled exists. + fn notify_nexus_about_self(&self, log: &Logger) { + let sled_id = self.inner.id; + let lazy_nexus_client = self.inner.lazy_nexus_client.clone(); + let sled_address = self.inner.addr; + let is_scrimlet = self.inner.hardware.is_scrimlet(); + let log = log.clone(); + let fut = async move { + // Notify the control plane that we're up, and continue trying this + // until it succeeds. We retry with an randomized, capped exponential + // backoff. + // + // TODO-robustness if this returns a 400 error, we probably want to + // return a permanent error from the `notify_nexus` closure. + let notify_nexus = || async { + info!( + log, + "contacting server nexus, registering sled: {}", sled_id + ); + let role = if is_scrimlet { + nexus_client::types::SledRole::Scrimlet + } else { + nexus_client::types::SledRole::Gimlet + }; + + let nexus_client = lazy_nexus_client + .get() + .await + .map_err(|err| BackoffError::transient(err.to_string()))?; + nexus_client + .sled_agent_put( + &sled_id, + &nexus_client::types::SledAgentStartupInfo { + sa_address: sled_address.to_string(), + role, + }, + ) + .await + .map_err(|err| BackoffError::transient(err.to_string())) + }; + let log_notification_failure = |err, delay| { + warn!( + log, + "failed to notify nexus about sled agent: {}, will retry in {:?}", err, delay; + ); + }; + retry_notify( + internal_service_policy_with_max( + std::time::Duration::from_secs(1), + ), + notify_nexus, + log_notification_failure, + ) + .await + .expect("Expected an infinite retry loop contacting Nexus"); + }; + self.inner + .nexus_request_queue + .sender() + .send(Box::pin(fut)) + .unwrap_or_else(|err| { + panic!("Failed to send future to request queue: {err}"); + }); } /// Ensures that particular services should be initialized. @@ -324,7 +511,7 @@ impl SledAgent { &self, requested_services: ServiceEnsureBody, ) -> Result<(), Error> { - self.services.ensure(requested_services).await?; + self.inner.services.ensure(requested_services).await?; Ok(()) } @@ -335,7 +522,8 @@ impl SledAgent { dataset_kind: DatasetKind, address: SocketAddrV6, ) -> Result<(), Error> { - self.storage + self.inner + .storage .upsert_filesystem(zpool_uuid, dataset_kind, address) .await?; Ok(()) @@ -349,7 +537,8 @@ impl SledAgent { target: InstanceRuntimeStateRequested, migrate: Option, ) -> Result { - self.instances + self.inner + .instances .ensure(instance_id, initial, target, migrate) .await .map_err(|e| Error::Instance(e)) @@ -373,7 +562,7 @@ impl SledAgent { &self, artifact: UpdateArtifact, ) -> Result<(), Error> { - let nexus_client = self.lazy_nexus_client.get().await?; + let nexus_client = self.inner.lazy_nexus_client.get().await?; crate::updates::download_artifact(artifact, &nexus_client).await?; Ok(()) } @@ -384,7 +573,8 @@ impl SledAgent { byte_offset: ByteOffset, max_bytes: Option, ) -> Result { - self.instances + self.inner + .instances .instance_serial_console_buffer_data( instance_id, byte_offset, @@ -401,7 +591,8 @@ impl SledAgent { disk_id: Uuid, snapshot_id: Uuid, ) -> Result<(), Error> { - self.instances + self.inner + .instances .instance_issue_disk_snapshot_request( instance_id, disk_id, @@ -430,7 +621,11 @@ impl SledAgent { _vpc_id: Uuid, rules: &[VpcFirewallRule], ) -> Result<(), Error> { - self.instances.firewall_rules_ensure(rules).await.map_err(Error::from) + self.inner + .instances + .firewall_rules_ensure(rules) + .await + .map_err(Error::from) } } diff --git a/smf/sled-agent/config.toml b/smf/sled-agent/config.toml index 9af1db6f2e..cf082e7dfc 100644 --- a/smf/sled-agent/config.toml +++ b/smf/sled-agent/config.toml @@ -2,6 +2,15 @@ id = "fb0f7546-4d46-40ca-9d56-cbb810684ca7" +# Identifies if the sled should be unconditionally treated as a scrimlet. +# +# If this is set to "true", the sled agent treats itself as a scrimlet. +# If this is set to "false", the sled agent treats itself as a gimlet. +# If this is unset: +# - On illumos, the sled automatically detects whether or not it is a scrimlet. +# - On all other platforms, the sled assumes it is a gimlet. +# stub_scrimlet = true + # A file-backed zpool can be manually created with the following: # # truncate -s 10GB testpool.vdev # # zpool create oxp_d462a7f7-b628-40fe-80ff-4e4189e2d62b "$PWD/testpool.vdev"