Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

lib: log vCPU diagnostics on triple fault and for some unhandled exit types #795

Merged
merged 11 commits into from
Oct 22, 2024
13 changes: 8 additions & 5 deletions bin/propolis-server/src/lib/migrate/source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

use bitvec::prelude::{BitSlice, Lsb0};
use futures::{SinkExt, StreamExt};
use propolis::common::{GuestAddr, PAGE_SIZE};
use propolis::common::{GuestAddr, GuestData, PAGE_SIZE};
use propolis::migrate::{
MigrateCtx, MigrateStateError, Migrator, PayloadOutputs,
};
Expand Down Expand Up @@ -657,9 +657,12 @@ impl<'vm, T: MigrateConn> RonV0Runner<'vm, T> {
info!(self.log(), "ram_push: xfer RAM between {start:#x} and {end:#x}",);
self.send_msg(memx::make_mem_xfer(start, end, bits)).await?;
for addr in PageIter::new(start, end, bits) {
let mut bytes = [0u8; PAGE_SIZE];
self.read_guest_mem(GuestAddr(addr), &mut bytes).await?;
self.send_msg(codec::Message::Page(bytes.into())).await?;
let mut byte_buffer = [0u8; PAGE_SIZE];
{
let mut bytes = GuestData::from(byte_buffer.as_mut_slice());
self.read_guest_mem(GuestAddr(addr), &mut bytes).await?;
}
self.send_msg(codec::Message::Page(byte_buffer.into())).await?;
probes::migrate_xfer_ram_page!(|| (addr, PAGE_SIZE as u64));
}
Ok(())
Expand Down Expand Up @@ -919,7 +922,7 @@ impl<'vm, T: MigrateConn> RonV0Runner<'vm, T> {
async fn read_guest_mem(
&mut self,
addr: GuestAddr,
buf: &mut [u8],
buf: &mut GuestData<&mut [u8]>,
) -> Result<(), MigrateError> {
let objects = self.vm.lock_shared().await;
let memctx = objects.access_mem().unwrap();
Expand Down
79 changes: 78 additions & 1 deletion bin/propolis-server/src/lib/vcpu_tasks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ impl VcpuTasks {
VmEntry::Run
}
VmExitKind::Suspended(SuspendDetail { kind, when }) => {
use propolis::vcpu::Diagnostics;
match kind {
exits::Suspend::Halt => {
event_handler.suspend_halt_event(when);
Expand All @@ -167,6 +168,13 @@ impl VcpuTasks {
event_handler.suspend_reset_event(when);
}
exits::Suspend::TripleFault(vcpuid) => {
slog::info!(
&log,
"triple fault on vcpu {}",
vcpu.id;
"state" => %Diagnostics::capture(vcpu)
);

if vcpuid == -1 || vcpuid == vcpu.id {
event_handler
.suspend_triple_fault_event(vcpu.id, when);
Expand All @@ -187,7 +195,76 @@ impl VcpuTasks {
task.force_hold();
VmEntry::Run
}
_ => {
VmExitKind::InstEmul(inst) => {
let diag = propolis::vcpu::Diagnostics::capture(vcpu);
error!(log,
"instruction emulation exit on vCPU {}",
vcpu.id;
"context" => ?inst,
"vcpu_state" => %diag);

event_handler.unhandled_vm_exit(vcpu.id, exit.kind);
VmEntry::Run
}
VmExitKind::Unknown(code) => {
error!(log,
"unrecognized exit code on vCPU {}",
vcpu.id;
"code" => code);

event_handler.unhandled_vm_exit(vcpu.id, exit.kind);
VmEntry::Run
}
// Bhyve emits the `Bogus` exit kind when there is no actual
// guest exit for user space to handle, but circumstances
// nevertheless dictate that the kernel VMM should exit to
// user space (e.g. a caller requested that all vCPUs be
// forced to exit to user space so their threads can
// rendezvous there).
//
// `process_vmexit` should always successfully handle this
// exit, since it never entails any work that could fail to
// be completed.
VmExitKind::Bogus => {
unreachable!(
"propolis-lib always handles VmExitKind::Bogus"
);
}
VmExitKind::Debug => {
error!(log,
"lib returned debug exit from vCPU {}",
vcpu.id);

event_handler.unhandled_vm_exit(vcpu.id, exit.kind);
VmEntry::Run
}
VmExitKind::VmxError(detail) => {
error!(log,
"unclassified VMX exit on vCPU {}",
vcpu.id;
"detail" => ?detail);

event_handler.unhandled_vm_exit(vcpu.id, exit.kind);
VmEntry::Run
}
VmExitKind::SvmError(detail) => {
error!(log,
"unclassified SVM exit on vCPU {}",
vcpu.id;
"detail" => ?detail);

event_handler.unhandled_vm_exit(vcpu.id, exit.kind);
VmEntry::Run
}
VmExitKind::Paging(gpa, fault_type) => {
let diag = propolis::vcpu::Diagnostics::capture(vcpu);
error!(log,
"unhandled paging exit on vCPU {}",
vcpu.id;
"gpa" => gpa,
"fault_type" => fault_type,
"vcpu_state" => %diag);

event_handler.unhandled_vm_exit(vcpu.id, exit.kind);
VmEntry::Run
}
Expand Down
7 changes: 7 additions & 0 deletions bin/propolis-server/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,13 @@ fn run_server(
Err(e).context("API version checks")?;
}

// If this is a development image being run outside of an Omicron zone,
// enable the display (in logs, panic messages, and the like) of diagnostic
// data that may have originated in the guest.
#[cfg(not(feature = "omicron-build"))]
propolis::common::DISPLAY_GUEST_DATA
.store(true, std::sync::atomic::Ordering::SeqCst);

let use_reservoir = config::reservoir_decide(&log);

let context = server::DropshotEndpointContext::new(
Expand Down
3 changes: 3 additions & 0 deletions bin/propolis-standalone/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1421,6 +1421,9 @@ fn main() -> anyhow::Result<ExitCode> {
// Check that vmm and viona device version match what we expect
api_version_checks(&log).context("API version checks")?;

propolis::common::DISPLAY_GUEST_DATA
.store(true, std::sync::atomic::Ordering::SeqCst);

// Load/parse the config first, since it's required to size the tokio runtime
// used to run the instance.
let config = if restore {
Expand Down
79 changes: 79 additions & 0 deletions lib/propolis/src/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,88 @@
use std::ops::{Add, BitAnd};
use std::ops::{Bound::*, RangeBounds};
use std::slice::SliceIndex;
use std::sync::atomic::{AtomicBool, Ordering};

use crate::vmm::SubMapping;

/// Controls whether items wrapped in a [`GuestData`] are displayed or redacted
/// when the wrappers are printed via their `Display` or `Debug` impls.
//
// The Propolis server binary should only link the Propolis lib once (any
// structure that links the lib multiple times means something is very odd about
// its dependency graph), so there should never be any ambiguity about what
// `DISPLAY_GUEST_DATA` refers to when linking. But to be maximally cautious,
// label this static as `no_mangle` so that pulling in multiple Propolis
// libraries will break the build instead of possibly resolving ambiguously.
#[no_mangle]
gjcolombo marked this conversation as resolved.
Show resolved Hide resolved
pub static DISPLAY_GUEST_DATA: AtomicBool = AtomicBool::new(false);

/// A wrapper type denoting that the contained `T` was obtained from the guest
/// (e.g. by reading the guest's memory). This type implements various traits
/// (`Deref`, `DerefMut`, and `Borrow`) that allow it to be treated in most
/// cases as just another instance of a `T`. The main difference is that this
/// wrapper has custom `Display` and `Debug` implementations that redact the
/// wrapped value unless the program has set the [`DISPLAY_GUEST_DATA`] flag.
///
/// NOTE: This wrapper type is not airtight: owners of a wrapper can always
/// dereference it and invoke the Display/Debug impls directly on the resulting
/// reference to the wrapped value. If `T` is `Clone`, they can also clone the
/// dereferenced value and display the clone. (This comes with the territory
/// here: users need to be able to get at the wrapped value to be able to do
/// anything useful with it!)
///
/// NOTE: This type does not provide any other security guarantees (e.g. it does
/// not ensure that the wrapped memory will be zeroed on drop).
#[derive(Clone, Copy)]
#[repr(transparent)]
pub struct GuestData<T: ?Sized>(T);

impl<T: std::fmt::Display> std::fmt::Display for GuestData<T> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if DISPLAY_GUEST_DATA.load(Ordering::Relaxed) {
write!(f, "{}", self.0)
} else {
write!(f, "<guest data redacted>")
}
}
}

impl<T: std::fmt::Debug> std::fmt::Debug for GuestData<T> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if DISPLAY_GUEST_DATA.load(Ordering::Relaxed) {
write!(f, "{:?}", self.0)
} else {
write!(f, "<guest data redacted>")
}
}
}

impl<T> std::ops::Deref for GuestData<T> {
type Target = T;

fn deref(&self) -> &Self::Target {
&self.0
}
}

impl<T> std::ops::DerefMut for GuestData<T> {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}

impl<T> From<T> for GuestData<T> {
fn from(value: T) -> Self {
Self(value)
}
}

impl<T> std::borrow::Borrow<T> for GuestData<T> {
fn borrow(&self) -> &T {
&self.0
}
}

fn numeric_bounds(
bound: impl RangeBounds<usize>,
len: usize,
Expand Down
8 changes: 6 additions & 2 deletions lib/propolis/src/exits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ use bhyve_api::{
vm_suspend_how,
};

use crate::common::GuestData;

/// Describes the reason for exiting execution of a vCPU.
pub struct VmExit {
/// The instruction pointer of the guest at the time of exit.
Expand Down Expand Up @@ -95,17 +97,19 @@ impl From<&bhyve_api::vm_exit_vmx> for VmxDetail {

#[derive(Copy, Clone, Debug)]
pub struct InstEmul {
pub inst_data: [u8; 15],
pub inst_data: GuestData<[u8; 15]>,
pub len: u8,
}

impl InstEmul {
pub fn bytes(&self) -> &[u8] {
&self.inst_data[..usize::min(self.inst_data.len(), self.len as usize)]
}
}
impl From<&bhyve_api::vm_inst_emul> for InstEmul {
fn from(raw: &bhyve_api::vm_inst_emul) -> Self {
let mut res = Self { inst_data: [0u8; 15], len: raw.num_valid };
let mut res =
Self { inst_data: GuestData::from([0u8; 15]), len: raw.num_valid };
assert!(res.len as usize <= res.inst_data.len());
res.inst_data.copy_from_slice(&raw.inst[..]);

Expand Down
24 changes: 14 additions & 10 deletions lib/propolis/src/hw/nvme/cmds.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,14 @@ pub enum AdminCmd {
/// Asynchronous Event Request Command
AsyncEventReq,
/// An unknown admin command
Unknown(#[allow(dead_code)] SubmissionQueueEntry),
Unknown(#[allow(dead_code)] GuestData<SubmissionQueueEntry>),
}

impl AdminCmd {
/// Try to parse an `AdminCmd` out of a raw Submission Entry.
pub fn parse(raw: SubmissionQueueEntry) -> Result<Self, ParseErr> {
pub fn parse(
raw: GuestData<SubmissionQueueEntry>,
) -> Result<Self, ParseErr> {
let cmd = match raw.opcode() {
bits::ADMIN_OPC_DELETE_IO_SQ => {
AdminCmd::DeleteIOSubQ(raw.cdw10 as u16)
Expand Down Expand Up @@ -645,12 +647,14 @@ pub enum NvmCmd {
/// Read data and metadata
Read(ReadCmd),
/// An unknown NVM command
Unknown(SubmissionQueueEntry),
Unknown(GuestData<SubmissionQueueEntry>),
}

impl NvmCmd {
/// Try to parse an `NvmCmd` out of a raw Submission Entry.
pub fn parse(raw: SubmissionQueueEntry) -> Result<Self, ParseErr> {
pub fn parse(
raw: GuestData<SubmissionQueueEntry>,
) -> Result<Self, ParseErr> {
let _fuse = match (raw.cdw0 >> 8) & 0b11 {
0b00 => Ok(()), // Normal (non-fused) operation
0b01 => Err(ParseErr::Fused), // First fused op
Expand Down Expand Up @@ -882,29 +886,29 @@ impl PrpIter<'_> {
PrpNext::List(base, idx) => {
assert!(idx <= PRP_LIST_MAX);
let entry_addr = base + u64::from(idx) * 8;
let entry: u64 = self
let entry: GuestData<u64> = self
.mem
.read(GuestAddr(entry_addr))
.ok_or_else(|| "Unable to read PRP list entry")?;
probes::nvme_prp_entry!(
|| (self as *const Self as u64, entry,)
);

if entry & PAGE_OFFSET as u64 != 0 {
if *entry & PAGE_OFFSET as u64 != 0 {
return Err("Inappropriate PRP list entry offset");
}

if self.remain <= PAGE_SIZE as u64 {
(entry, self.remain, PrpNext::Done)
(*entry, self.remain, PrpNext::Done)
} else if idx != PRP_LIST_MAX {
(entry, PAGE_SIZE as u64, PrpNext::List(base, idx + 1))
(*entry, PAGE_SIZE as u64, PrpNext::List(base, idx + 1))
} else {
// The last PRP in this list chains to another
// (page-aligned) list with the next PRP.
self.next = PrpNext::List(entry, 0);
self.next = PrpNext::List(*entry, 0);
probes::nvme_prp_list!(|| (
self as *const Self as u64,
entry,
*entry,
0,
));
return self.get_next();
Expand Down
2 changes: 1 addition & 1 deletion lib/propolis/src/hw/nvme/queue.rs
Original file line number Diff line number Diff line change
Expand Up @@ -429,7 +429,7 @@ impl SubQueue {
pub fn pop(
self: &Arc<SubQueue>,
mem: &MemCtx,
) -> Option<(SubmissionQueueEntry, Permit, u16)> {
) -> Option<(GuestData<SubmissionQueueEntry>, Permit, u16)> {
// Attempt to reserve an entry on the Completion Queue
let permit = self.cq.reserve_entry(&self)?;
let mut state = self.state.lock();
Expand Down
12 changes: 6 additions & 6 deletions lib/propolis/src/hw/qemu/fwcfg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -599,7 +599,7 @@ impl FwCfg {

let mem_guard = self.acc_mem.access().expect("usable mem accessor");
let mem = mem_guard.deref();
let req: FwCfgDmaAccess =
let req: GuestData<FwCfgDmaAccess> =
mem.read(GuestAddr(addr)).ok_or(FwCfgErr::BadAddr)?;

fn dma_write_result(
Expand Down Expand Up @@ -632,7 +632,7 @@ impl FwCfg {
fn dma_operation(
&self,
state: &mut MutexGuard<State>,
req: FwCfgDmaAccess,
req: GuestData<FwCfgDmaAccess>,
mem: &MemCtx,
) -> Result<(), FwCfgErr> {
let opts = FwCfgDmaCtrl::from_bits_truncate(req.ctrl.get());
Expand Down Expand Up @@ -995,9 +995,9 @@ mod test {
submit_dma_req(&dev, req_addr);

// DMA should have successfully completed now
assert_eq!(mem.read::<u32>(GuestAddr(req_addr)).unwrap(), 0);
assert_eq!(*mem.read::<u32>(GuestAddr(req_addr)).unwrap(), 0);
let data = mem.read::<[u8; 4]>(GuestAddr(dma_addr)).unwrap();
assert_eq!(&data, "QEMU".as_bytes());
assert_eq!(&*data, "QEMU".as_bytes());
}

#[test]
Expand All @@ -1019,9 +1019,9 @@ mod test {
submit_dma_req(&dev, req_addr);

// DMA should have successfully completed now
assert_eq!(mem.read::<u32>(GuestAddr(req_addr)).unwrap(), 0);
assert_eq!(*mem.read::<u32>(GuestAddr(req_addr)).unwrap(), 0);
let data = mem.read::<[u8; 4]>(GuestAddr(dma_addr)).unwrap();
assert_eq!(data, [0u8; 4]);
assert_eq!(*data, [0u8; 4]);
}

#[test]
Expand Down
Loading
Loading