Skip to content

Commit

Permalink
KVM: SEV: Allow for VMPL level specification in AP create
Browse files Browse the repository at this point in the history
Update AP creation to support ADD/DESTROY of VMSAs at levels other than
VMPL0 in order to run under an SVSM at VMPL1 or lower. To maintain
backwards compatibility, the VMPL is specified in bits 16 to 19 of the
AP Creation request in SW_EXITINFO1 of the GHCB.

In order to track the VMSAs at different levels, create arrays for the
VMSAs, GHCBs, registered GHCBs and others. When switching VMPL levels,
these entries will be used to set the VMSA and GHCB physical addresses
in the VMCB for the VMPL level.

In order ensure that the proper responses are returned in the proper GHCB,
the GHCB must be unmapped at the current level and saved for restoration
later when switching back to that VMPL level.

Additional checks are applied to prevent a non-VMPL0 vCPU from being able
to perform an AP creation request at VMPL0. Additionally, a vCPU cannot
replace its own VMSA.

Signed-off-by: Tom Lendacky <[email protected]>
  • Loading branch information
tlendacky authored and roy-hopkins committed Oct 1, 2024
1 parent c957831 commit 641ca6d
Show file tree
Hide file tree
Showing 6 changed files with 169 additions and 48 deletions.
9 changes: 9 additions & 0 deletions arch/x86/include/asm/svm.h
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,15 @@ static_assert((X2AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_
(SVM_SEV_FEAT_RESTRICTED_INJECTION | \
SVM_SEV_FEAT_ALTERNATE_INJECTION)

enum {
SVM_SEV_VMPL0 = 0,
SVM_SEV_VMPL1,
SVM_SEV_VMPL2,
SVM_SEV_VMPL3,

SVM_SEV_VMPL_MAX
};

struct vmcb_seg {
u16 selector;
u16 attrib;
Expand Down
2 changes: 2 additions & 0 deletions arch/x86/include/uapi/asm/svm.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,8 @@
#define SVM_VMGEXIT_AP_CREATE_ON_INIT 0
#define SVM_VMGEXIT_AP_CREATE 1
#define SVM_VMGEXIT_AP_DESTROY 2
#define SVM_VMGEXIT_AP_VMPL_MASK GENMASK(19, 16)
#define SVM_VMGEXIT_AP_VMPL_SHIFT 16
#define SVM_VMGEXIT_GET_APIC_IDS 0x80000017
#define SVM_VMGEXIT_SNP_RUN_VMPL 0x80000018
#define SVM_VMGEXIT_HV_FEATURES 0x8000fffd
Expand Down
146 changes: 107 additions & 39 deletions arch/x86/kvm/svm/sev.c
Original file line number Diff line number Diff line change
Expand Up @@ -807,7 +807,7 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm)
{
struct kvm_vcpu *vcpu = &svm->vcpu;
struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info;
struct sev_es_save_area *save = svm->sev_es.vmsa;
struct sev_es_save_area *save = vmpl_vmsa(svm, SVM_SEV_VMPL0);
struct xregs_state *xsave;
const u8 *s;
u8 *d;
Expand Down Expand Up @@ -920,11 +920,11 @@ static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu,
* the VMSA memory content (i.e it will write the same memory region
* with the guest's key), so invalidate it first.
*/
clflush_cache_range(svm->sev_es.vmsa, PAGE_SIZE);
clflush_cache_range(vmpl_vmsa(svm, SVM_SEV_VMPL0), PAGE_SIZE);

vmsa.reserved = 0;
vmsa.handle = to_kvm_sev_info(kvm)->handle;
vmsa.address = __sme_pa(svm->sev_es.vmsa);
vmsa.address = __sme_pa(vmpl_vmsa(svm, SVM_SEV_VMPL0));
vmsa.len = PAGE_SIZE;
ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, &vmsa, error);
if (ret)
Expand Down Expand Up @@ -2453,7 +2453,7 @@ static int snp_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp)

kvm_for_each_vcpu(i, vcpu, kvm) {
struct vcpu_svm *svm = to_svm(vcpu);
u64 pfn = __pa(svm->sev_es.vmsa) >> PAGE_SHIFT;
u64 pfn = __pa(vmpl_vmsa(svm, SVM_SEV_VMPL0)) >> PAGE_SHIFT;

ret = sev_es_sync_vmsa(svm);
if (ret)
Expand All @@ -2465,7 +2465,7 @@ static int snp_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp)
return ret;

/* Issue the SNP command to encrypt the VMSA */
data.address = __sme_pa(svm->sev_es.vmsa);
data.address = __sme_pa(vmpl_vmsa(svm, SVM_SEV_VMPL0));
ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_SNP_LAUNCH_UPDATE,
&data, &argp->error);
if (ret) {
Expand Down Expand Up @@ -3179,16 +3179,16 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu)
* releasing it back to the system.
*/
if (sev_snp_guest(vcpu->kvm)) {
u64 pfn = __pa(svm->sev_es.vmsa) >> PAGE_SHIFT;
u64 pfn = __pa(vmpl_vmsa(svm, SVM_SEV_VMPL0)) >> PAGE_SHIFT;

if (kvm_rmp_make_shared(vcpu->kvm, pfn, PG_LEVEL_4K))
goto skip_vmsa_free;
}

if (vcpu->arch.guest_state_protected)
sev_flush_encrypted_page(vcpu, svm->sev_es.vmsa);
sev_flush_encrypted_page(vcpu, vmpl_vmsa(svm, SVM_SEV_VMPL0));

__free_page(virt_to_page(svm->sev_es.vmsa));
__free_page(virt_to_page(vmpl_vmsa(svm, SVM_SEV_VMPL0)));

skip_vmsa_free:
if (svm->sev_es.ghcb_sa_free)
Expand Down Expand Up @@ -3386,13 +3386,19 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
if (!kvm_ghcb_sw_scratch_is_valid(svm))
goto vmgexit_err;
break;
case SVM_VMGEXIT_AP_CREATION:
case SVM_VMGEXIT_AP_CREATION: {
unsigned int request;

if (!sev_snp_guest(vcpu->kvm))
goto vmgexit_err;
if (lower_32_bits(control->exit_info_1) != SVM_VMGEXIT_AP_DESTROY)

request = lower_32_bits(control->exit_info_1);
request &= ~SVM_VMGEXIT_AP_VMPL_MASK;
if (request != SVM_VMGEXIT_AP_DESTROY)
if (!kvm_ghcb_rax_is_valid(svm))
goto vmgexit_err;
break;
}
case SVM_VMGEXIT_GET_APIC_IDS:
if (!kvm_ghcb_rax_is_valid(svm))
goto vmgexit_err;
Expand Down Expand Up @@ -3851,9 +3857,10 @@ static int __sev_snp_update_protected_guest_state(struct kvm_vcpu *vcpu)

/* Clear use of the VMSA */
svm->vmcb->control.vmsa_pa = INVALID_PAGE;
tgt_vmpl_vmsa_hpa(svm) = INVALID_PAGE;

if (VALID_PAGE(svm->sev_es.snp_vmsa_gpa)) {
gfn_t gfn = gpa_to_gfn(svm->sev_es.snp_vmsa_gpa);
if (VALID_PAGE(tgt_vmpl_vmsa_gpa(svm))) {
gfn_t gfn = gpa_to_gfn(tgt_vmpl_vmsa_gpa(svm));
struct kvm_memory_slot *slot;
kvm_pfn_t pfn;

Expand All @@ -3871,32 +3878,54 @@ static int __sev_snp_update_protected_guest_state(struct kvm_vcpu *vcpu)
/*
* From this point forward, the VMSA will always be a
* guest-mapped page rather than the initial one allocated
* by KVM in svm->sev_es.vmsa. In theory, svm->sev_es.vmsa
* could be free'd and cleaned up here, but that involves
* cleanups like wbinvd_on_all_cpus() which would ideally
* be handled during teardown rather than guest boot.
* Deferring that also allows the existing logic for SEV-ES
* VMSAs to be re-used with minimal SNP-specific changes.
* by KVM in svm->sev_es.vmsa_info[vmpl].vmsa. In theory,
* svm->sev_es.vmsa_info[vmpl].vmsa could be free'd and cleaned
* up here, but that involves cleanups like wbinvd_on_all_cpus()
* which would ideally be handled during teardown rather than
* guest boot. Deferring that also allows the existing logic for
* SEV-ES VMSAs to be re-used with minimal SNP-specific changes.
*/
svm->sev_es.snp_has_guest_vmsa = true;
tgt_vmpl_has_guest_vmsa(svm) = true;

/* Use the new VMSA */
svm->vmcb->control.vmsa_pa = pfn_to_hpa(pfn);
tgt_vmpl_vmsa_hpa(svm) = pfn_to_hpa(pfn);

/*
* Since the vCPU may not have gone through the LAUNCH_UPDATE_VMSA path,
* be sure to mark the guest state as protected and enable LBR virtualization.
*/
vcpu->arch.guest_state_protected = true;
svm_enable_lbrv(vcpu);

/* Mark the vCPU as runnable */
vcpu->arch.pv.pv_unhalted = false;
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;

svm->sev_es.snp_vmsa_gpa = INVALID_PAGE;
tgt_vmpl_vmsa_gpa(svm) = INVALID_PAGE;

/*
* gmem pages aren't currently migratable, but if this ever
* changes then care should be taken to ensure
* svm->sev_es.vmsa is pinned through some other means.
* svm->sev_es.vmsa_info[vmpl].vmsa is pinned through some other
* means.
*/
kvm_release_pfn_clean(pfn);
}

if (cur_vmpl(svm) != tgt_vmpl(svm)) {
/* Unmap the current GHCB */
sev_es_unmap_ghcb(svm);

/* Save the GHCB GPA of the current VMPL */
svm->sev_es.ghcb_gpa[cur_vmpl(svm)] = svm->vmcb->control.ghcb_gpa;

/* Set the GHCB_GPA for the target VMPL and make it the current VMPL */
svm->vmcb->control.ghcb_gpa = svm->sev_es.ghcb_gpa[tgt_vmpl(svm)];

cur_vmpl(svm) = tgt_vmpl(svm);
}

/*
* When replacing the VMSA during SEV-SNP AP creation,
* mark the VMCB dirty so that full state is always reloaded.
Expand All @@ -3919,10 +3948,10 @@ void sev_snp_init_protected_guest_state(struct kvm_vcpu *vcpu)

mutex_lock(&svm->sev_es.snp_vmsa_mutex);

if (!svm->sev_es.snp_ap_waiting_for_reset)
if (!tgt_vmpl_ap_waiting_for_reset(svm))
goto unlock;

svm->sev_es.snp_ap_waiting_for_reset = false;
tgt_vmpl_ap_waiting_for_reset(svm) = false;

ret = __sev_snp_update_protected_guest_state(vcpu);
if (ret)
Expand All @@ -3940,12 +3969,24 @@ static int sev_snp_ap_creation(struct vcpu_svm *svm)
struct vcpu_svm *target_svm;
unsigned int request;
unsigned int apic_id;
unsigned int vmpl;
bool kick;
int ret;

request = lower_32_bits(svm->vmcb->control.exit_info_1);
apic_id = upper_32_bits(svm->vmcb->control.exit_info_1);

vmpl = (request & SVM_VMGEXIT_AP_VMPL_MASK) >> SVM_VMGEXIT_AP_VMPL_SHIFT;
request &= ~SVM_VMGEXIT_AP_VMPL_MASK;

/* Validate the requested VMPL level */
if (vmpl >= SVM_SEV_VMPL_MAX) {
vcpu_unimpl(vcpu, "vmgexit: invalid VMPL level [%u] from guest\n",
vmpl);
return -EINVAL;
}
vmpl = array_index_nospec(vmpl, SVM_SEV_VMPL_MAX);

/* Validate the APIC ID */
target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, apic_id);
if (!target_vcpu) {
Expand All @@ -3967,13 +4008,22 @@ static int sev_snp_ap_creation(struct vcpu_svm *svm)

mutex_lock(&target_svm->sev_es.snp_vmsa_mutex);

target_svm->sev_es.snp_vmsa_gpa = INVALID_PAGE;
target_svm->sev_es.snp_ap_waiting_for_reset = true;
vmpl_vmsa_gpa(target_svm, vmpl) = INVALID_PAGE;
vmpl_ap_waiting_for_reset(target_svm, vmpl) = true;

/* Interrupt injection mode shouldn't change for AP creation */
/* VMPL0 can only be replaced by another vCPU running VMPL0 */
if (vmpl == SVM_SEV_VMPL0 &&
(vcpu == target_vcpu ||
vmpl_vmsa_hpa(svm, SVM_SEV_VMPL0) != svm->vmcb->control.vmsa_pa)) {
ret = -EINVAL;
goto out;
}

/* Perform common AP creation validation */
if (request < SVM_VMGEXIT_AP_DESTROY) {
u64 sev_features;

/* Interrupt injection mode shouldn't change for AP creation */
sev_features = vcpu->arch.regs[VCPU_REGS_RAX];
sev_features ^= sev->vmsa_features;

Expand All @@ -3983,13 +4033,8 @@ static int sev_snp_ap_creation(struct vcpu_svm *svm)
ret = -EINVAL;
goto out;
}
}

switch (request) {
case SVM_VMGEXIT_AP_CREATE_ON_INIT:
kick = false;
fallthrough;
case SVM_VMGEXIT_AP_CREATE:
/* Validate the input VMSA page */
if (!page_address_valid(vcpu, svm->vmcb->control.exit_info_2)) {
vcpu_unimpl(vcpu, "vmgexit: invalid AP VMSA address [%#llx] from guest\n",
svm->vmcb->control.exit_info_2);
Expand All @@ -4011,8 +4056,17 @@ static int sev_snp_ap_creation(struct vcpu_svm *svm)
ret = -EINVAL;
goto out;
}
}

target_svm->sev_es.snp_vmsa_gpa = svm->vmcb->control.exit_info_2;
switch (request) {
case SVM_VMGEXIT_AP_CREATE_ON_INIT:
/* Delay switching to the new VMSA */
kick = false;
fallthrough;
case SVM_VMGEXIT_AP_CREATE:
/* Switch to new VMSA on the next VMRUN */
target_svm->sev_es.snp_target_vmpl = vmpl;
vmpl_vmsa_gpa(target_svm, vmpl) = svm->vmcb->control.exit_info_2 & PAGE_MASK;
break;
case SVM_VMGEXIT_AP_DESTROY:
break;
Expand Down Expand Up @@ -4299,7 +4353,7 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
gfn = get_ghcb_msr_bits(svm, GHCB_MSR_GPA_VALUE_MASK,
GHCB_MSR_GPA_VALUE_POS);

svm->sev_es.ghcb_registered_gpa = gfn_to_gpa(gfn);
svm->sev_es.ghcb_registered_gpa[cur_vmpl(svm)] = gfn_to_gpa(gfn);

set_ghcb_msr_bits(svm, gfn, GHCB_MSR_GPA_VALUE_MASK,
GHCB_MSR_GPA_VALUE_POS);
Expand Down Expand Up @@ -4580,8 +4634,8 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
* the VMSA will be NULL if this vCPU is the destination for intrahost
* migration, and will be copied later.
*/
if (svm->sev_es.vmsa && !svm->sev_es.snp_has_guest_vmsa)
svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa);
if (cur_vmpl_vmsa(svm) && !cur_vmpl_has_guest_vmsa(svm))
svm->vmcb->control.vmsa_pa = __pa(cur_vmpl_vmsa(svm));

/* Can't intercept CR register access, HV can't modify CR registers */
svm_clr_intercept(svm, INTERCEPT_CR0_READ);
Expand Down Expand Up @@ -4644,16 +4698,30 @@ void sev_es_vcpu_reset(struct vcpu_svm *svm)
{
struct kvm_vcpu *vcpu = &svm->vcpu;
struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info;
unsigned int i;
u64 sev_info;

/*
* Set the GHCB MSR value as per the GHCB specification when emulating
* vCPU RESET for an SEV-ES guest.
*/
set_ghcb_msr(svm, GHCB_MSR_SEV_INFO((__u64)sev->ghcb_version,
GHCB_VERSION_MIN,
sev_enc_bit));
sev_info = GHCB_MSR_SEV_INFO((__u64)sev->ghcb_version, GHCB_VERSION_MIN,
sev_enc_bit);
set_ghcb_msr(svm, sev_info);
svm->sev_es.ghcb_gpa[SVM_SEV_VMPL0] = sev_info;

mutex_init(&svm->sev_es.snp_vmsa_mutex);

/*
* When not running under SNP, the "current VMPL" tracking for a guest
* is always 0 and the base tracking of GPAs and SPAs will be as before
* multiple VMPL support. However, under SNP, multiple VMPL levels can
* be run, so initialize these values appropriately.
*/
for (i = 1; i < SVM_SEV_VMPL_MAX; i++) {
svm->sev_es.vmsa_info[i].hpa = INVALID_PAGE;
svm->sev_es.ghcb_gpa[i] = sev_info;
}
}

void sev_es_prepare_switch_to_guest(struct vcpu_svm *svm, struct sev_es_save_area *hostsa)
Expand Down
6 changes: 4 additions & 2 deletions arch/x86/kvm/svm/svm.c
Original file line number Diff line number Diff line change
Expand Up @@ -1463,8 +1463,10 @@ static int svm_vcpu_create(struct kvm_vcpu *vcpu)
svm->vmcb01.pa = __sme_set(page_to_pfn(vmcb01_page) << PAGE_SHIFT);
svm_switch_vmcb(svm, &svm->vmcb01);

if (vmsa_page)
svm->sev_es.vmsa = page_address(vmsa_page);
if (vmsa_page) {
vmpl_vmsa(svm, SVM_SEV_VMPL0) = page_address(vmsa_page);
vmpl_vmsa_hpa(svm, SVM_SEV_VMPL0) = __pa(page_address(vmsa_page));
}

svm->guest_state_loaded = false;

Expand Down
Loading

0 comments on commit 641ca6d

Please sign in to comment.