-
Notifications
You must be signed in to change notification settings - Fork 40
/
zfs.rs
608 lines (545 loc) · 20 KB
/
zfs.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
//! Utilities for poking at ZFS.
use crate::{execute, PFEXEC};
use camino::Utf8PathBuf;
use omicron_common::disk::DiskIdentity;
use std::fmt;
// These locations in the ramdisk must only be used by the switch zone.
//
// We need the switch zone online before we can create the U.2 drives and
// encrypt the zpools during rack initialization. Without the switch zone we
// cannot get the rack initialization request from wicketd in RSS which allows
// us to initialize the trust quorum and derive the encryption keys needed for
// the U.2 disks.
pub const ZONE_ZFS_RAMDISK_DATASET_MOUNTPOINT: &str = "/zone";
pub const ZONE_ZFS_RAMDISK_DATASET: &str = "rpool/zone";
pub const ZFS: &str = "/usr/sbin/zfs";
/// This path is intentionally on a `tmpfs` to prevent copy-on-write behavior
/// and to ensure it goes away on power off.
///
/// We want minimize the time the key files are in memory, and so we rederive
/// the keys and recreate the files on demand when creating and mounting
/// encrypted filesystems. We then zero them and unlink them.
pub const KEYPATH_ROOT: &str = "/var/run/oxide/";
// Use /tmp so we don't have to worry about running tests with pfexec
pub const TEST_KEYPATH_ROOT: &str = "/tmp";
/// Error returned by [`Zfs::list_datasets`].
#[derive(thiserror::Error, Debug)]
#[error("Could not list datasets within zpool {name}: {err}")]
pub struct ListDatasetsError {
name: String,
#[source]
err: crate::ExecutionError,
}
#[derive(thiserror::Error, Debug)]
pub enum DestroyDatasetErrorVariant {
#[error("Dataset not found")]
NotFound,
#[error(transparent)]
Other(crate::ExecutionError),
}
/// Error returned by [`Zfs::destroy_dataset`].
#[derive(thiserror::Error, Debug)]
#[error("Could not destroy dataset {name}: {err}")]
pub struct DestroyDatasetError {
name: String,
#[source]
pub err: DestroyDatasetErrorVariant,
}
#[derive(thiserror::Error, Debug)]
enum EnsureFilesystemErrorRaw {
#[error("ZFS execution error: {0}")]
Execution(#[from] crate::ExecutionError),
#[error("Filesystem does not exist, and formatting was not requested")]
NotFoundNotFormatted,
#[error("Unexpected output from ZFS commands: {0}")]
Output(String),
#[error("Failed to mount encrypted filesystem: {0}")]
MountEncryptedFsFailed(crate::ExecutionError),
#[error("Failed to mount overlay filesystem: {0}")]
MountOverlayFsFailed(crate::ExecutionError),
}
/// Error returned by [`Zfs::ensure_filesystem`].
#[derive(thiserror::Error, Debug)]
#[error(
"Failed to ensure filesystem '{name}' exists at '{mountpoint:?}': {err}"
)]
pub struct EnsureFilesystemError {
name: String,
mountpoint: Mountpoint,
#[source]
err: EnsureFilesystemErrorRaw,
}
/// Error returned by [`Zfs::set_oxide_value`]
#[derive(thiserror::Error, Debug)]
#[error(
"Failed to set value '{name}={value}' on filesystem {filesystem}: {err}"
)]
pub struct SetValueError {
filesystem: String,
name: String,
value: String,
err: crate::ExecutionError,
}
#[derive(thiserror::Error, Debug)]
enum GetValueErrorRaw {
#[error(transparent)]
Execution(#[from] crate::ExecutionError),
#[error("No value found with that name")]
MissingValue,
}
/// Error returned by [`Zfs::get_oxide_value`].
#[derive(thiserror::Error, Debug)]
#[error("Failed to get value '{name}' from filesystem {filesystem}: {err}")]
pub struct GetValueError {
filesystem: String,
name: String,
err: GetValueErrorRaw,
}
#[derive(Debug, thiserror::Error)]
#[error("Failed to list snapshots: {0}")]
pub struct ListSnapshotsError(#[from] crate::ExecutionError);
#[derive(Debug, thiserror::Error)]
#[error("Failed to create snapshot '{snap_name}' from filesystem '{filesystem}': {err}")]
pub struct CreateSnapshotError {
filesystem: String,
snap_name: String,
err: crate::ExecutionError,
}
#[derive(Debug, thiserror::Error)]
#[error("Failed to delete snapshot '{filesystem}@{snap_name}': {err}")]
pub struct DestroySnapshotError {
filesystem: String,
snap_name: String,
err: crate::ExecutionError,
}
/// Wraps commands for interacting with ZFS.
pub struct Zfs {}
/// Describes a mountpoint for a ZFS filesystem.
#[derive(Debug, Clone)]
pub enum Mountpoint {
#[allow(dead_code)]
Legacy,
Path(Utf8PathBuf),
}
impl fmt::Display for Mountpoint {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Mountpoint::Legacy => write!(f, "legacy"),
Mountpoint::Path(p) => write!(f, "{p}"),
}
}
}
/// This is the path for an encryption key used by ZFS
#[derive(Debug, Clone)]
pub struct Keypath(pub Utf8PathBuf);
impl fmt::Display for Keypath {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.0)
}
}
#[cfg(not(feature = "tmp_keypath"))]
impl From<&DiskIdentity> for Keypath {
fn from(id: &DiskIdentity) -> Self {
build_keypath(id, KEYPATH_ROOT)
}
}
#[cfg(feature = "tmp_keypath")]
impl From<&DiskIdentity> for Keypath {
fn from(id: &DiskIdentity) -> Self {
build_keypath(id, TEST_KEYPATH_ROOT)
}
}
fn build_keypath(id: &DiskIdentity, root: &str) -> Keypath {
let filename =
format!("{}-{}-{}-zfs-aes-256-gcm.key", id.vendor, id.serial, id.model);
let path: Utf8PathBuf = [root, &filename].iter().collect();
Keypath(path)
}
#[derive(Debug)]
pub struct EncryptionDetails {
pub keypath: Keypath,
pub epoch: u64,
}
#[derive(Debug, Default)]
pub struct SizeDetails {
pub quota: Option<usize>,
pub compression: Option<&'static str>,
}
#[cfg_attr(any(test, feature = "testing"), mockall::automock, allow(dead_code))]
impl Zfs {
/// Lists all datasets within a pool or existing dataset.
pub fn list_datasets(name: &str) -> Result<Vec<String>, ListDatasetsError> {
let mut command = std::process::Command::new(ZFS);
let cmd = command.args(&["list", "-d", "1", "-rHpo", "name", name]);
let output = execute(cmd)
.map_err(|err| ListDatasetsError { name: name.to_string(), err })?;
let stdout = String::from_utf8_lossy(&output.stdout);
let filesystems: Vec<String> = stdout
.trim()
.split('\n')
.filter(|n| *n != name)
.map(|s| {
String::from(s.strip_prefix(&format!("{}/", name)).unwrap())
})
.collect();
Ok(filesystems)
}
/// Return the name of a dataset for a ZFS object.
///
/// The object can either be a dataset name, or a path, in which case it
/// will be resolved to the _mounted_ ZFS dataset containing that path.
pub fn get_dataset_name(object: &str) -> Result<String, ListDatasetsError> {
let mut command = std::process::Command::new(ZFS);
let cmd = command.args(&["get", "-Hpo", "value", "name", object]);
execute(cmd)
.map(|output| {
String::from_utf8_lossy(&output.stdout).trim().to_string()
})
.map_err(|err| ListDatasetsError { name: object.to_string(), err })
}
/// Destroys a dataset.
pub fn destroy_dataset(name: &str) -> Result<(), DestroyDatasetError> {
let mut command = std::process::Command::new(PFEXEC);
let cmd = command.args(&[ZFS, "destroy", "-r", name]);
execute(cmd).map_err(|err| {
let variant = match err {
crate::ExecutionError::CommandFailure(info)
if info.stderr.contains("does not exist") =>
{
DestroyDatasetErrorVariant::NotFound
}
_ => DestroyDatasetErrorVariant::Other(err),
};
DestroyDatasetError { name: name.to_string(), err: variant }
})?;
Ok(())
}
/// Creates a new ZFS filesystem named `name`, unless one already exists.
///
/// Applies an optional quota, provided _in bytes_.
#[allow(clippy::too_many_arguments)]
pub fn ensure_filesystem(
name: &str,
mountpoint: Mountpoint,
zoned: bool,
do_format: bool,
encryption_details: Option<EncryptionDetails>,
size_details: Option<SizeDetails>,
additional_options: Option<Vec<String>>,
) -> Result<(), EnsureFilesystemError> {
let (exists, mounted) = Self::dataset_exists(name, &mountpoint)?;
if exists {
if let Some(SizeDetails { quota, compression }) = size_details {
// apply quota and compression mode (in case they've changed across
// sled-agent versions since creation)
Self::apply_properties(name, &mountpoint, quota, compression)?;
}
if encryption_details.is_none() {
// If the dataset exists, we're done. Unencrypted datasets are
// automatically mounted.
return Ok(());
} else {
if mounted {
// The dataset exists and is mounted
return Ok(());
}
// We need to load the encryption key and mount the filesystem
return Self::mount_encrypted_dataset(name, &mountpoint);
}
}
if !do_format {
return Err(EnsureFilesystemError {
name: name.to_string(),
mountpoint,
err: EnsureFilesystemErrorRaw::NotFoundNotFormatted,
});
}
// If it doesn't exist, make it.
let mut command = std::process::Command::new(PFEXEC);
let cmd = command.args(&[ZFS, "create"]);
if zoned {
cmd.args(&["-o", "zoned=on"]);
}
if let Some(details) = encryption_details {
let keyloc = format!("keylocation=file://{}", details.keypath);
let epoch = format!("oxide:epoch={}", details.epoch);
cmd.args(&[
"-o",
"encryption=aes-256-gcm",
"-o",
"keyformat=raw",
"-o",
&keyloc,
"-o",
&epoch,
]);
}
if let Some(opts) = additional_options {
for o in &opts {
cmd.args(&["-o", &o]);
}
}
cmd.args(&["-o", &format!("mountpoint={}", mountpoint), name]);
execute(cmd).map_err(|err| EnsureFilesystemError {
name: name.to_string(),
mountpoint: mountpoint.clone(),
err: err.into(),
})?;
if let Some(SizeDetails { quota, compression }) = size_details {
// Apply any quota and compression mode.
Self::apply_properties(name, &mountpoint, quota, compression)?;
}
Ok(())
}
fn apply_properties(
name: &str,
mountpoint: &Mountpoint,
quota: Option<usize>,
compression: Option<&'static str>,
) -> Result<(), EnsureFilesystemError> {
if let Some(quota) = quota {
if let Err(err) =
Self::set_value(name, "quota", &format!("{quota}"))
{
return Err(EnsureFilesystemError {
name: name.to_string(),
mountpoint: mountpoint.clone(),
// Take the execution error from the SetValueError
err: err.err.into(),
});
}
}
if let Some(compression) = compression {
if let Err(err) = Self::set_value(name, "compression", compression)
{
return Err(EnsureFilesystemError {
name: name.to_string(),
mountpoint: mountpoint.clone(),
// Take the execution error from the SetValueError
err: err.err.into(),
});
}
}
Ok(())
}
fn mount_encrypted_dataset(
name: &str,
mountpoint: &Mountpoint,
) -> Result<(), EnsureFilesystemError> {
let mut command = std::process::Command::new(PFEXEC);
let cmd = command.args(&[ZFS, "mount", "-l", name]);
execute(cmd).map_err(|err| EnsureFilesystemError {
name: name.to_string(),
mountpoint: mountpoint.clone(),
err: EnsureFilesystemErrorRaw::MountEncryptedFsFailed(err),
})?;
Ok(())
}
pub fn mount_overlay_dataset(
name: &str,
mountpoint: &Mountpoint,
) -> Result<(), EnsureFilesystemError> {
let mut command = std::process::Command::new(PFEXEC);
let cmd = command.args(&[ZFS, "mount", "-O", name]);
execute(cmd).map_err(|err| EnsureFilesystemError {
name: name.to_string(),
mountpoint: mountpoint.clone(),
err: EnsureFilesystemErrorRaw::MountOverlayFsFailed(err),
})?;
Ok(())
}
// Return (true, mounted) if the dataset exists, (false, false) otherwise,
// where mounted is if the dataset is mounted.
fn dataset_exists(
name: &str,
mountpoint: &Mountpoint,
) -> Result<(bool, bool), EnsureFilesystemError> {
let mut command = std::process::Command::new(ZFS);
let cmd = command.args(&[
"list",
"-Hpo",
"name,type,mountpoint,mounted",
name,
]);
// If the list command returns any valid output, validate it.
if let Ok(output) = execute(cmd) {
let stdout = String::from_utf8_lossy(&output.stdout);
let values: Vec<&str> = stdout.trim().split('\t').collect();
if &values[..3] != &[name, "filesystem", &mountpoint.to_string()] {
return Err(EnsureFilesystemError {
name: name.to_string(),
mountpoint: mountpoint.clone(),
err: EnsureFilesystemErrorRaw::Output(stdout.to_string()),
});
}
let mounted = values[3] == "yes";
Ok((true, mounted))
} else {
Ok((false, false))
}
}
/// Set the value of an Oxide-managed ZFS property.
pub fn set_oxide_value(
filesystem_name: &str,
name: &str,
value: &str,
) -> Result<(), SetValueError> {
Zfs::set_value(filesystem_name, &format!("oxide:{}", name), value)
}
fn set_value(
filesystem_name: &str,
name: &str,
value: &str,
) -> Result<(), SetValueError> {
let mut command = std::process::Command::new(PFEXEC);
let value_arg = format!("{}={}", name, value);
let cmd = command.args(&[ZFS, "set", &value_arg, filesystem_name]);
execute(cmd).map_err(|err| SetValueError {
filesystem: filesystem_name.to_string(),
name: name.to_string(),
value: value.to_string(),
err,
})?;
Ok(())
}
/// Get the value of an Oxide-managed ZFS property.
pub fn get_oxide_value(
filesystem_name: &str,
name: &str,
) -> Result<String, GetValueError> {
Zfs::get_value(filesystem_name, &format!("oxide:{}", name))
}
pub fn get_value(
filesystem_name: &str,
name: &str,
) -> Result<String, GetValueError> {
let mut command = std::process::Command::new(PFEXEC);
let cmd =
command.args(&[ZFS, "get", "-Ho", "value", &name, filesystem_name]);
let output = execute(cmd).map_err(|err| GetValueError {
filesystem: filesystem_name.to_string(),
name: name.to_string(),
err: err.into(),
})?;
let stdout = String::from_utf8_lossy(&output.stdout);
let value = stdout.trim();
if value == "-" {
return Err(GetValueError {
filesystem: filesystem_name.to_string(),
name: name.to_string(),
err: GetValueErrorRaw::MissingValue,
});
}
Ok(value.to_string())
}
/// List all extant snapshots.
pub fn list_snapshots() -> Result<Vec<Snapshot>, ListSnapshotsError> {
let mut command = std::process::Command::new(ZFS);
let cmd = command.args(&["list", "-H", "-o", "name", "-t", "snapshot"]);
execute(cmd)
.map(|output| {
let stdout = String::from_utf8_lossy(&output.stdout);
stdout
.trim()
.lines()
.map(|line| {
let (filesystem, snap_name) =
line.split_once('@').unwrap();
Snapshot {
filesystem: filesystem.to_string(),
snap_name: snap_name.to_string(),
}
})
.collect()
})
.map_err(ListSnapshotsError::from)
}
/// Create a snapshot of a filesystem.
///
/// A list of properties, as name-value tuples, may be passed to this
/// method, for creating properties directly on the snapshots.
pub fn create_snapshot<'a>(
filesystem: &'a str,
snap_name: &'a str,
properties: &'a [(&'a str, &'a str)],
) -> Result<(), CreateSnapshotError> {
let mut command = std::process::Command::new(ZFS);
let mut cmd = command.arg("snapshot");
for (name, value) in properties.iter() {
cmd = cmd.arg("-o").arg(&format!("{name}={value}"));
}
cmd.arg(&format!("{filesystem}@{snap_name}"));
execute(cmd).map(|_| ()).map_err(|err| CreateSnapshotError {
filesystem: filesystem.to_string(),
snap_name: snap_name.to_string(),
err,
})
}
/// Destroy a named snapshot of a filesystem.
pub fn destroy_snapshot(
filesystem: &str,
snap_name: &str,
) -> Result<(), DestroySnapshotError> {
let mut command = std::process::Command::new(ZFS);
let path = format!("{filesystem}@{snap_name}");
let cmd = command.args(&["destroy", &path]);
execute(cmd).map(|_| ()).map_err(|err| DestroySnapshotError {
filesystem: filesystem.to_string(),
snap_name: snap_name.to_string(),
err,
})
}
}
/// A read-only snapshot of a ZFS filesystem.
#[derive(Clone, Debug)]
pub struct Snapshot {
pub filesystem: String,
pub snap_name: String,
}
impl Snapshot {
/// Return the full path to the snapshot directory within the filesystem.
pub fn full_path(&self) -> Result<Utf8PathBuf, GetValueError> {
let mountpoint = Zfs::get_value(&self.filesystem, "mountpoint")?;
Ok(Utf8PathBuf::from(mountpoint)
.join(format!(".zfs/snapshot/{}", self.snap_name)))
}
}
impl fmt::Display for Snapshot {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}@{}", self.filesystem, self.snap_name)
}
}
/// Returns all datasets managed by Omicron
pub fn get_all_omicron_datasets_for_delete() -> anyhow::Result<Vec<String>> {
let mut datasets = vec![];
// Collect all datasets within Oxide zpools.
//
// This includes cockroachdb, clickhouse, and crucible datasets.
let zpools = crate::zpool::Zpool::list()?;
for pool in &zpools {
let internal = pool.kind() == crate::zpool::ZpoolKind::Internal;
let pool = pool.to_string();
for dataset in &Zfs::list_datasets(&pool)? {
// Avoid erasing crashdump, backing data and swap datasets on
// internal pools. The swap device may be in use.
if internal
&& (["crash", "backing", "swap"].contains(&dataset.as_str())
|| dataset.starts_with("backing/"))
{
continue;
}
datasets.push(format!("{pool}/{dataset}"));
}
}
// Collect all datasets for ramdisk-based Oxide zones, if any exist.
if let Ok(ramdisk_datasets) = Zfs::list_datasets(&ZONE_ZFS_RAMDISK_DATASET)
{
for dataset in &ramdisk_datasets {
datasets.push(format!("{}/{dataset}", ZONE_ZFS_RAMDISK_DATASET));
}
};
Ok(datasets)
}