From 29bbfc22ad0ec65dc39187dfed60e7cb2cd99043 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 30 Oct 2022 15:22:40 -0400 Subject: [PATCH] [package] Destroy datasets as a part of uninstallation Major changes: - Uninstallation now collects all Zpool and Zone based datasets, prompts the user, and destroys them during the uninstall process. - Adds a "-f / --force" option to "omicron-package", allowing callers to skip the new confirmation prompt. - Adds a "deactivate" command to "omicron-package". This allows callers to remove Zones and disable services, but does not delete durable configurations and storage. A caller should be able to call "deactivate" -> "activate" -> "deactivate" repeatedly without losing durable state. Minor changes: - Updates documentation for omicron-package - Improves handling of addresses deleted from `cleanup_networking_resources`, especially in cases of duplicates - Rename "filesystem" to "dataset" in functions where it's more appropriate to be generic in the context of ZFS. Fixes https://github.com/oxidecomputer/omicron/issues/1884 Part of https://github.com/oxidecomputer/omicron/issues/1119 Part of https://github.com/oxidecomputer/omicron/issues/1313 --- package/src/bin/omicron-package.rs | 136 ++++++++++++++++++++++++----- package/src/lib.rs | 70 ++++++++++----- sled-agent/src/illumos/zfs.rs | 38 +++++--- sled-agent/src/illumos/zpool.rs | 43 ++++++++- sled-agent/src/lib.rs | 2 + sled-agent/src/sled_agent.rs | 12 ++- sled-agent/src/storage_manager.rs | 4 +- 7 files changed, 242 insertions(+), 63 deletions(-) diff --git a/package/src/bin/omicron-package.rs b/package/src/bin/omicron-package.rs index 4907d2e7bb..c72cdac9b5 100644 --- a/package/src/bin/omicron-package.rs +++ b/package/src/bin/omicron-package.rs @@ -10,7 +10,7 @@ use futures::stream::{self, StreamExt, TryStreamExt}; use indicatif::{MultiProgress, ProgressBar, ProgressStyle}; use omicron_package::{parse, BuildCommand, DeployCommand}; use omicron_sled_agent::cleanup_networking_resources; -use omicron_sled_agent::zone; +use omicron_sled_agent::{zfs, zone, zpool}; use omicron_zone_package::config::Config as PackageConfig; use omicron_zone_package::package::{Package, PackageOutput, PackageSource}; use omicron_zone_package::progress::Progress; @@ -24,6 +24,7 @@ use slog::Drain; use slog::Logger; use std::env; use std::fs::create_dir_all; +use std::io::Write; use std::path::{Path, PathBuf}; use std::sync::Arc; use tokio::io::{AsyncReadExt, AsyncWriteExt, BufReader}; @@ -71,6 +72,15 @@ struct Args { )] target: Target, + #[clap( + short, + long, + help = "Skip confirmation prompt for destructive operations", + action, + default_value_t = false + )] + force: bool, + #[clap(subcommand)] subcommand: SubCommand, } @@ -311,7 +321,7 @@ async fn do_package(config: &Config, output_directory: &Path) -> Result<()> { Ok(()) } -fn do_unpack( +async fn do_unpack( config: &Config, artifact_dir: &Path, install_dir: &Path, @@ -335,15 +345,20 @@ fn do_unpack( "src" => %src.to_string_lossy(), "dst" => %dst.to_string_lossy(), ); - std::fs::copy(&src, &dst)?; + std::fs::copy(&src, &dst).map_err(|err| { + anyhow!( + "Failed to copy {src} to {dst}: {err}", + src = src.display(), + dst = dst.display() + ) + })?; Ok(()) }, )?; if env::var("OMICRON_NO_UNINSTALL").is_err() { // Ensure we start from a clean slate - remove all zones & packages. - uninstall_all_packages(config); - uninstall_all_omicron_zones()?; + do_uninstall(config).await?; } // Extract all global zone services. @@ -395,12 +410,12 @@ fn do_activate(config: &Config, install_dir: &Path) -> Result<()> { Ok(()) } -fn do_install( +async fn do_install( config: &Config, artifact_dir: &Path, install_dir: &Path, ) -> Result<()> { - do_unpack(config, artifact_dir, install_dir)?; + do_unpack(config, artifact_dir, install_dir).await?; do_activate(config, install_dir) } @@ -412,6 +427,46 @@ fn uninstall_all_omicron_zones() -> Result<()> { Ok(()) } +fn get_all_omicron_datasets() -> Result> { + let mut datasets = vec![]; + + // Collect all datasets within Oxide zpools. + // + // This includes cockroachdb, clickhouse, and crucible datasets. + let zpools = zpool::Zpool::list()?; + for pool in &zpools { + let pool = pool.to_string(); + for dataset in &zfs::Zfs::list_datasets(&pool)? { + datasets.push(format!("{pool}/{dataset}")); + } + } + + // Collect all datasets for Oxide zones. + for dataset in &zfs::Zfs::list_datasets(&zfs::ZONE_ZFS_DATASET)? { + datasets.push(format!("{}/{dataset}", zfs::ZONE_ZFS_DATASET)); + } + + Ok(datasets) +} + +fn uninstall_all_omicron_datasets(config: &Config) -> Result<()> { + let datasets = get_all_omicron_datasets()?; + if datasets.is_empty() { + return Ok(()); + } + + config.confirm(&format!( + "About to delete the following datasets: {:#?}", + datasets + ))?; + for dataset in &datasets { + info!(config.log, "Deleting dataset: {dataset}"); + zfs::Zfs::destroy_dataset(dataset)?; + } + + Ok(()) +} + // Attempts to both disable and delete all requested packages. fn uninstall_all_packages(config: &Config) { for (_, package) in config @@ -426,7 +481,9 @@ fn uninstall_all_packages(config: &Config) { .run(smf::AdmSelection::ByPattern(&[&package.service_name])); let _ = smf::Config::delete().force().run(&package.service_name); } +} +fn uninstall_omicron_config() { // Once all packages have been removed, also remove any locally-stored // configuration. remove_all_unless_already_removed(omicron_common::OMICRON_CONFIG_PATH) @@ -479,7 +536,7 @@ fn remove_all_except>( Ok(()) } -async fn do_uninstall(config: &Config) -> Result<()> { +async fn do_deactivate(config: &Config) -> Result<()> { info!(&config.log, "Removing all Omicron zones"); uninstall_all_omicron_zones()?; info!(config.log, "Uninstalling all packages"); @@ -489,11 +546,21 @@ async fn do_uninstall(config: &Config) -> Result<()> { Ok(()) } +async fn do_uninstall(config: &Config) -> Result<()> { + do_deactivate(config).await?; + info!(config.log, "Uninstalling Omicron configuration"); + uninstall_omicron_config(); + info!(config.log, "Removing datasets"); + uninstall_all_omicron_datasets(config)?; + Ok(()) +} + async fn do_clean( config: &Config, artifact_dir: &Path, install_dir: &Path, ) -> Result<()> { + do_uninstall(&config).await?; info!( config.log, "Removing artifacts from {}", @@ -590,6 +657,27 @@ struct Config { package_config: PackageConfig, // Description of the target we're trying to operate on. target: Target, + // True if we should skip confirmations for destructive operations. + force: bool, +} + +impl Config { + /// Prompts the user for input before proceeding with an operation. + fn confirm(&self, prompt: &str) -> Result<()> { + if self.force { + return Ok(()); + } + + print!("{prompt}\n[yY to confirm] >> "); + let _ = std::io::stdout().flush(); + + let mut input = String::new(); + std::io::stdin().read_line(&mut input)?; + match input.as_str().trim() { + "y" | "Y" => Ok(()), + _ => bail!("Aborting"), + } + } } #[tokio::main] @@ -604,8 +692,12 @@ async fn main() -> Result<()> { debug!(log, "target: {:?}", args.target); - let config = - Config { log: log.clone(), package_config, target: args.target }; + let config = Config { + log: log.clone(), + package_config, + target: args.target, + force: args.force, + }; // Use a CWD that is the root of the Omicron repository. if let Ok(manifest) = env::var("CARGO_MANIFEST_DIR") { @@ -623,26 +715,28 @@ async fn main() -> Result<()> { artifact_dir, install_dir, }) => { - do_install(&config, &artifact_dir, &install_dir)?; - } - SubCommand::Deploy(DeployCommand::Uninstall) => { - do_uninstall(&config).await?; + do_install(&config, &artifact_dir, &install_dir).await?; } - SubCommand::Deploy(DeployCommand::Clean { + SubCommand::Deploy(DeployCommand::Unpack { artifact_dir, install_dir, }) => { + do_unpack(&config, &artifact_dir, &install_dir).await?; + } + SubCommand::Deploy(DeployCommand::Activate { install_dir }) => { + do_activate(&config, &install_dir)?; + } + SubCommand::Deploy(DeployCommand::Deactivate) => { + do_deactivate(&config).await?; + } + SubCommand::Deploy(DeployCommand::Uninstall) => { do_uninstall(&config).await?; - do_clean(&config, &artifact_dir, &install_dir).await?; } - SubCommand::Deploy(DeployCommand::Unpack { + SubCommand::Deploy(DeployCommand::Clean { artifact_dir, install_dir, }) => { - do_unpack(&config, &artifact_dir, &install_dir)?; - } - SubCommand::Deploy(DeployCommand::Activate { install_dir }) => { - do_activate(&config, &install_dir)?; + do_clean(&config, &artifact_dir, &install_dir).await?; } } diff --git a/package/src/lib.rs b/package/src/lib.rs index 2d927d4933..bc0b2e5b71 100644 --- a/package/src/lib.rs +++ b/package/src/lib.rs @@ -30,8 +30,8 @@ pub fn parse, C: DeserializeOwned>( /// Commands which should execute on a host building packages. #[derive(Debug, Subcommand)] pub enum BuildCommand { - /// Builds the packages specified in a manifest, and places them into a target - /// directory. + /// Builds the packages specified in a manifest, and places them into an + /// 'out' directory. Package { /// The output directory, where artifacts should be placed. /// @@ -39,16 +39,15 @@ pub enum BuildCommand { #[clap(long = "out", default_value = "out", action)] artifact_dir: PathBuf, }, - /// Checks the packages specified in a manifest, without building. + /// Checks the packages specified in a manifest, without building them. Check, } /// Commands which should execute on a host installing packages. #[derive(Debug, Subcommand)] pub enum DeployCommand { - /// Installs the packages to a target machine and starts the sled-agent - /// - /// This is a combination of `Unpack` and `Activate` + /// Installs the packages and starts the sled-agent. Shortcut for `unpack` + /// and `activate`. Install { /// The directory from which artifacts will be pulled. /// @@ -62,40 +61,63 @@ pub enum DeployCommand { #[clap(long = "out", default_value = "/opt/oxide", action)] install_dir: PathBuf, }, - /// Uninstalls the packages from the target machine. - Uninstall, - /// Uninstalls and removes the packages from the target machine. - Clean { - /// The directory from which artifacts were be pulled. + /// Unpacks the files created by `package` to an install directory. + /// Issues the `uninstall` command. + /// + /// This command performs uninstallation by default as a safety measure, + /// to ensure that we are not swapping packages underneath running services, + /// which may result in unexpected behavior. + /// The "uninstall before unpack" behavior can be disabled by setting + /// the environment variable OMICRON_NO_INSTALL. + /// + /// `unpack` does not actually start any services, but it prepares services + /// to be launched with the `activate` command. + Unpack { + /// The directory from which artifacts will be pulled. /// /// Should match the format from the Package subcommand. #[clap(long = "in", default_value = "out", action)] artifact_dir: PathBuf, - /// The directory to which artifacts were installed. + /// The directory to which artifacts will be installed. /// /// Defaults to "/opt/oxide". #[clap(long = "out", default_value = "/opt/oxide", action)] install_dir: PathBuf, }, - - /// Unpacks the package files on the target machine - Unpack { - /// The directory from which artifacts will be pulled. - /// - /// Should match the format from the Package subcommand. - #[clap(long = "in", default_value = "out", action)] - artifact_dir: PathBuf, - + /// Imports and starts the sled-agent illumos service + /// + /// The necessary packages must exist in the installation directory + /// already; this can be done with the `unpack` command. + Activate { /// The directory to which artifacts will be installed. /// /// Defaults to "/opt/oxide". #[clap(long = "out", default_value = "/opt/oxide", action)] install_dir: PathBuf, }, - /// Installs the sled-agent illumos service and starts it - Activate { - /// The directory to which artifacts will be installed. + /// Deletes all Omicron zones and stops all services. + /// + /// This command may be used to stop the currently executing Omicron + /// services, such that they could be restarted later. + Deactivate, + /// Uninstalls packages and deletes durable Omicron storage. Issues the + /// `deactivate` command. + /// + /// This command deletes all state used by Omicron services, but leaves + /// the packages in the installation directory. This means that a later + /// call to `activate` could re-install Omicron services. + Uninstall, + /// Uninstalls packages and removes them from the installation directory. + /// Issues the `uninstall` command. + Clean { + /// The directory from which artifacts were be pulled. + /// + /// Should match the format from the Package subcommand. + #[clap(long = "in", default_value = "out", action)] + artifact_dir: PathBuf, + + /// The directory to which artifacts were installed. /// /// Defaults to "/opt/oxide". #[clap(long = "out", default_value = "/opt/oxide", action)] diff --git a/sled-agent/src/illumos/zfs.rs b/sled-agent/src/illumos/zfs.rs index 7ffe7c6143..0dc65124e1 100644 --- a/sled-agent/src/illumos/zfs.rs +++ b/sled-agent/src/illumos/zfs.rs @@ -12,10 +12,19 @@ pub const ZONE_ZFS_DATASET_MOUNTPOINT: &str = "/zone"; pub const ZONE_ZFS_DATASET: &str = "rpool/zone"; const ZFS: &str = "/usr/sbin/zfs"; -/// Error returned by [`Zfs::list_filesystems`]. +/// Error returned by [`Zfs::list_datasets`]. #[derive(thiserror::Error, Debug)] -#[error("Could not list filesystems within zpool {name}: {err}")] -pub struct ListFilesystemsError { +#[error("Could not list datasets within zpool {name}: {err}")] +pub struct ListDatasetsError { + name: String, + #[source] + err: crate::illumos::ExecutionError, +} + +/// Error returned by [`Zfs::destroy_dataset`]. +#[derive(thiserror::Error, Debug)] +#[error("Could not destroy dataset {name}: {err}")] +pub struct DestroyDatasetError { name: String, #[source] err: crate::illumos::ExecutionError, @@ -97,17 +106,13 @@ impl fmt::Display for Mountpoint { #[cfg_attr(test, mockall::automock, allow(dead_code))] impl Zfs { - /// Lists all filesystems within a zpool. - pub fn list_filesystems( - name: &str, - ) -> Result, ListFilesystemsError> { + /// Lists all datasets within a pool or existing dataset. + pub fn list_datasets(name: &str) -> Result, ListDatasetsError> { let mut command = std::process::Command::new(ZFS); let cmd = command.args(&["list", "-d", "1", "-rHpo", "name", name]); - let output = execute(cmd).map_err(|err| ListFilesystemsError { - name: name.to_string(), - err, - })?; + let output = execute(cmd) + .map_err(|err| ListDatasetsError { name: name.to_string(), err })?; let stdout = String::from_utf8_lossy(&output.stdout); let filesystems: Vec = stdout .trim() @@ -120,6 +125,17 @@ impl Zfs { Ok(filesystems) } + /// Destroys a dataset. + pub fn destroy_dataset(name: &str) -> Result<(), DestroyDatasetError> { + let mut command = std::process::Command::new(PFEXEC); + let cmd = command.args(&[ZFS, "destroy", "-r", name]); + execute(cmd).map_err(|err| DestroyDatasetError { + name: name.to_string(), + err, + })?; + Ok(()) + } + /// Creates a new ZFS filesystem named `name`, unless one already exists. pub fn ensure_zoned_filesystem( name: &str, diff --git a/sled-agent/src/illumos/zpool.rs b/sled-agent/src/illumos/zpool.rs index b1d528c05d..40d1281894 100644 --- a/sled-agent/src/illumos/zpool.rs +++ b/sled-agent/src/illumos/zpool.rs @@ -9,6 +9,7 @@ use serde::{Deserialize, Deserializer}; use std::str::FromStr; use uuid::Uuid; +const ZPOOL_PREFIX: &str = "oxp_"; const ZPOOL: &str = "/usr/sbin/zpool"; #[derive(thiserror::Error, Debug, PartialEq, Eq)] @@ -24,6 +25,14 @@ enum Error { Parse(#[from] ParseError), } +#[derive(thiserror::Error, Debug)] +#[error("Failed to list zpools: {err}")] +pub struct ListError { + #[from] + #[source] + err: Error, +} + #[derive(thiserror::Error, Debug)] #[error("Failed to get info for zpool '{name}': {err}")] pub struct GetInfoError { @@ -143,8 +152,22 @@ impl FromStr for ZpoolInfo { /// Wraps commands for interacting with ZFS pools. pub struct Zpool {} -#[cfg_attr(test, mockall::automock, allow(dead_code))] +#[cfg_attr(test, mockall::automock)] impl Zpool { + pub fn list() -> Result, ListError> { + let mut command = std::process::Command::new(ZPOOL); + let cmd = command.args(&["list", "-Hpo", "name"]); + + let output = execute(cmd).map_err(Error::from)?; + let stdout = String::from_utf8_lossy(&output.stdout); + let zpool = stdout + .lines() + .filter_map(|line| line.parse::().ok()) + .collect(); + Ok(zpool) + } + + #[cfg_attr(test, allow(dead_code))] pub fn get_info(name: &str) -> Result { let mut command = std::process::Command::new(ZPOOL); let cmd = command.args(&[ @@ -190,7 +213,7 @@ impl<'de> Deserialize<'de> for ZpoolName { D: Deserializer<'de>, { let s = String::deserialize(deserializer)?; - let s = s.strip_prefix("oxp_").ok_or_else(|| { + let s = s.strip_prefix(ZPOOL_PREFIX).ok_or_else(|| { serde::de::Error::custom( "Bad zpool prefix - must start with 'oxp_'", ) @@ -200,9 +223,21 @@ impl<'de> Deserialize<'de> for ZpoolName { } } +impl FromStr for ZpoolName { + type Err = String; + + fn from_str(s: &str) -> Result { + let s = s.strip_prefix(ZPOOL_PREFIX).ok_or_else(|| { + format!("Bad zpool name {}; must start with {}", s, ZPOOL_PREFIX) + })?; + let id = Uuid::from_str(&s).map_err(|e| e.to_string())?; + Ok(ZpoolName(id)) + } +} + impl ToString for ZpoolName { fn to_string(&self) -> String { - format!("oxp_{}", self.0) + format!("{}{}", ZPOOL_PREFIX, self.0) } } @@ -228,7 +263,7 @@ mod test { fn test_parse_zpool_name() { let uuid: Uuid = "d462a7f7-b628-40fe-80ff-4e4189e2d62b".parse().unwrap(); - let good_name = format!("oxp_{}", uuid); + let good_name = format!("{}{}", ZPOOL_PREFIX, uuid); let name = parse_name(&good_name).expect("Cannot parse as ZpoolName"); assert_eq!(uuid, name.id()); diff --git a/sled-agent/src/lib.rs b/sled-agent/src/lib.rs index 602293cab2..1381076ceb 100644 --- a/sled-agent/src/lib.rs +++ b/sled-agent/src/lib.rs @@ -37,7 +37,9 @@ pub mod sp; mod storage_manager; mod updates; +pub use illumos::zfs; pub use illumos::zone; +pub use illumos::zpool; pub use sled_agent::cleanup_networking_resources; #[cfg(test)] diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index c803f90e91..c5e11a1f4b 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -458,7 +458,17 @@ fn delete_addresses_matching_prefixes( let mut cmd = Command::new(PFEXEC); let cmd = cmd.args(&[IPADM, "show-addr", "-p", "-o", "ADDROBJ"]); let output = execute(cmd)?; - for addrobj in output.stdout.lines().flatten() { + + // `ipadm show-addr` can return multiple addresses with the same name, but + // multiple values. Collecting to a set ensures that only a single name is + // used. + let addrobjs = output + .stdout + .lines() + .flatten() + .collect::>(); + + for addrobj in addrobjs { if prefixes.iter().any(|prefix| addrobj.starts_with(prefix)) { warn!( log, diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index e9cfa6f6b4..492c26fbbe 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -51,7 +51,7 @@ const CRUCIBLE_AGENT_DEFAULT_SVC: &str = "svc:/oxide/crucible/agent:default"; pub enum Error { // TODO: We could add the context of "why are we doint this op", maybe? #[error(transparent)] - ZfsListFilesystems(#[from] crate::illumos::zfs::ListFilesystemsError), + ZfsListDataset(#[from] crate::illumos::zfs::ListDatasetsError), #[error(transparent)] ZfsEnsureFilesystem(#[from] crate::illumos::zfs::EnsureFilesystemError), @@ -848,7 +848,7 @@ impl StorageWorker { // If we find filesystems within our datasets, ensure their // zones are up-and-running. let mut datasets = vec![]; - let existing_filesystems = Zfs::list_filesystems(&pool_name.to_string())?; + let existing_filesystems = Zfs::list_datasets(&pool_name.to_string())?; for fs_name in existing_filesystems { info!(&self.log, "StorageWorker loading fs {} on zpool {}", fs_name, pool_name.to_string()); // We intentionally do not exit on error here -