Skip to content

Commit

Permalink
Merge tag 'kvmarm-fixes-6.14-1' of git://git.kernel.org/pub/scm/linux…
Browse files Browse the repository at this point in the history
…/kernel/git/kvmarm/kvmarm into HEAD

KVM/arm64 fixes for 6.14, take #1

- Correctly clean the BSS to the PoC before allowing EL2 to access it
  on nVHE/hVHE/protected configurations

- Propagate ownership of debug registers in protected mode after
  the rework that landed in 6.14-rc1

- Stop pretending that we can run the protected mode without a GICv3
  being present on the host

- Fix a use-after-free situation that can occur if a vcpu fails to
  initialise the NV shadow S2 MMU contexts

- Always evaluate the need to arm a background timer for fully emulated
  guest timers

- Fix the emulation of EL1 timers in the absence of FEAT_ECV

- Correctly handle the EL2 virtual timer, specially when HCR_EL2.E2H==0
  • Loading branch information
bonzini committed Feb 4, 2025
2 parents 35441cd + 0e45981 commit 5e21d0c
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 45 deletions.
49 changes: 11 additions & 38 deletions arch/arm64/kvm/arch_timer.c
Original file line number Diff line number Diff line change
Expand Up @@ -471,10 +471,8 @@ static void timer_emulate(struct arch_timer_context *ctx)

trace_kvm_timer_emulate(ctx, should_fire);

if (should_fire != ctx->irq.level) {
if (should_fire != ctx->irq.level)
kvm_timer_update_irq(ctx->vcpu, should_fire, ctx);
return;
}

kvm_timer_update_status(ctx, should_fire);

Expand Down Expand Up @@ -761,21 +759,6 @@ static void kvm_timer_vcpu_load_nested_switch(struct kvm_vcpu *vcpu,
timer_irq(map->direct_ptimer),
&arch_timer_irq_ops);
WARN_ON_ONCE(ret);

/*
* The virtual offset behaviour is "interesting", as it
* always applies when HCR_EL2.E2H==0, but only when
* accessed from EL1 when HCR_EL2.E2H==1. So make sure we
* track E2H when putting the HV timer in "direct" mode.
*/
if (map->direct_vtimer == vcpu_hvtimer(vcpu)) {
struct arch_timer_offset *offs = &map->direct_vtimer->offset;

if (vcpu_el2_e2h_is_set(vcpu))
offs->vcpu_offset = NULL;
else
offs->vcpu_offset = &__vcpu_sys_reg(vcpu, CNTVOFF_EL2);
}
}
}

Expand Down Expand Up @@ -976,31 +959,21 @@ void kvm_timer_sync_nested(struct kvm_vcpu *vcpu)
* which allows trapping of the timer registers even with NV2.
* Still, this is still worse than FEAT_NV on its own. Meh.
*/
if (!vcpu_el2_e2h_is_set(vcpu)) {
if (cpus_have_final_cap(ARM64_HAS_ECV))
return;

/*
* A non-VHE guest hypervisor doesn't have any direct access
* to its timers: the EL2 registers trap (and the HW is
* fully emulated), while the EL0 registers access memory
* despite the access being notionally direct. Boo.
*
* We update the hardware timer registers with the
* latest value written by the guest to the VNCR page
* and let the hardware take care of the rest.
*/
write_sysreg_el0(__vcpu_sys_reg(vcpu, CNTV_CTL_EL0), SYS_CNTV_CTL);
write_sysreg_el0(__vcpu_sys_reg(vcpu, CNTV_CVAL_EL0), SYS_CNTV_CVAL);
write_sysreg_el0(__vcpu_sys_reg(vcpu, CNTP_CTL_EL0), SYS_CNTP_CTL);
write_sysreg_el0(__vcpu_sys_reg(vcpu, CNTP_CVAL_EL0), SYS_CNTP_CVAL);
} else {
if (!cpus_have_final_cap(ARM64_HAS_ECV)) {
/*
* For a VHE guest hypervisor, the EL2 state is directly
* stored in the host EL1 timers, while the emulated EL0
* stored in the host EL1 timers, while the emulated EL1
* state is stored in the VNCR page. The latter could have
* been updated behind our back, and we must reset the
* emulation of the timers.
*
* A non-VHE guest hypervisor doesn't have any direct access
* to its timers: the EL2 registers trap despite being
* notionally direct (we use the EL1 HW, as for VHE), while
* the EL1 registers access memory.
*
* In both cases, process the emulated timers on each guest
* exit. Boo.
*/
struct timer_map map;
get_timer_map(vcpu, &map);
Expand Down
20 changes: 20 additions & 0 deletions arch/arm64/kvm/arm.c
Original file line number Diff line number Diff line change
Expand Up @@ -2290,6 +2290,19 @@ static int __init init_subsystems(void)
break;
case -ENODEV:
case -ENXIO:
/*
* No VGIC? No pKVM for you.
*
* Protected mode assumes that VGICv3 is present, so no point
* in trying to hobble along if vgic initialization fails.
*/
if (is_protected_kvm_enabled())
goto out;

/*
* Otherwise, userspace could choose to implement a GIC for its
* guest on non-cooperative hardware.
*/
vgic_present = false;
err = 0;
break;
Expand Down Expand Up @@ -2400,6 +2413,13 @@ static void kvm_hyp_init_symbols(void)
kvm_nvhe_sym(id_aa64smfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64SMFR0_EL1);
kvm_nvhe_sym(__icache_flags) = __icache_flags;
kvm_nvhe_sym(kvm_arm_vmid_bits) = kvm_arm_vmid_bits;

/*
* Flush entire BSS since part of its data containing init symbols is read
* while the MMU is off.
*/
kvm_flush_dcache_to_poc(kvm_ksym_ref(__hyp_bss_start),
kvm_ksym_ref(__hyp_bss_end) - kvm_ksym_ref(__hyp_bss_start));
}

static int __init kvm_hyp_init_protection(u32 hyp_va_bits)
Expand Down
24 changes: 24 additions & 0 deletions arch/arm64/kvm/hyp/nvhe/hyp-main.c
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,34 @@ static void fpsimd_sve_sync(struct kvm_vcpu *vcpu)
*host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
}

static void flush_debug_state(struct pkvm_hyp_vcpu *hyp_vcpu)
{
struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;

hyp_vcpu->vcpu.arch.debug_owner = host_vcpu->arch.debug_owner;

if (kvm_guest_owns_debug_regs(&hyp_vcpu->vcpu))
hyp_vcpu->vcpu.arch.vcpu_debug_state = host_vcpu->arch.vcpu_debug_state;
else if (kvm_host_owns_debug_regs(&hyp_vcpu->vcpu))
hyp_vcpu->vcpu.arch.external_debug_state = host_vcpu->arch.external_debug_state;
}

static void sync_debug_state(struct pkvm_hyp_vcpu *hyp_vcpu)
{
struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;

if (kvm_guest_owns_debug_regs(&hyp_vcpu->vcpu))
host_vcpu->arch.vcpu_debug_state = hyp_vcpu->vcpu.arch.vcpu_debug_state;
else if (kvm_host_owns_debug_regs(&hyp_vcpu->vcpu))
host_vcpu->arch.external_debug_state = hyp_vcpu->vcpu.arch.external_debug_state;
}

static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
{
struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;

fpsimd_sve_flush();
flush_debug_state(hyp_vcpu);

hyp_vcpu->vcpu.arch.ctxt = host_vcpu->arch.ctxt;

Expand Down Expand Up @@ -123,6 +146,7 @@ static void sync_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
unsigned int i;

fpsimd_sve_sync(&hyp_vcpu->vcpu);
sync_debug_state(hyp_vcpu);

host_vcpu->arch.ctxt = hyp_vcpu->vcpu.arch.ctxt;

Expand Down
9 changes: 5 additions & 4 deletions arch/arm64/kvm/nested.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,26 +67,27 @@ int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu)
if (!tmp)
return -ENOMEM;

swap(kvm->arch.nested_mmus, tmp);

/*
* If we went through a realocation, adjust the MMU back-pointers in
* the previously initialised kvm_pgtable structures.
*/
if (kvm->arch.nested_mmus != tmp)
for (int i = 0; i < kvm->arch.nested_mmus_size; i++)
tmp[i].pgt->mmu = &tmp[i];
kvm->arch.nested_mmus[i].pgt->mmu = &kvm->arch.nested_mmus[i];

for (int i = kvm->arch.nested_mmus_size; !ret && i < num_mmus; i++)
ret = init_nested_s2_mmu(kvm, &tmp[i]);
ret = init_nested_s2_mmu(kvm, &kvm->arch.nested_mmus[i]);

if (ret) {
for (int i = kvm->arch.nested_mmus_size; i < num_mmus; i++)
kvm_free_stage2_pgd(&tmp[i]);
kvm_free_stage2_pgd(&kvm->arch.nested_mmus[i]);

return ret;
}

kvm->arch.nested_mmus_size = num_mmus;
kvm->arch.nested_mmus = tmp;

return 0;
}
Expand Down
16 changes: 13 additions & 3 deletions arch/arm64/kvm/sys_regs.c
Original file line number Diff line number Diff line change
Expand Up @@ -1452,6 +1452,16 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu,
return true;
}

static bool access_hv_timer(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
if (!vcpu_el2_e2h_is_set(vcpu))
return undef_access(vcpu, p, r);

return access_arch_timer(vcpu, p, r);
}

static s64 kvm_arm64_ftr_safe_value(u32 id, const struct arm64_ftr_bits *ftrp,
s64 new, s64 cur)
{
Expand Down Expand Up @@ -3103,9 +3113,9 @@ static const struct sys_reg_desc sys_reg_descs[] = {
EL2_REG(CNTHP_CTL_EL2, access_arch_timer, reset_val, 0),
EL2_REG(CNTHP_CVAL_EL2, access_arch_timer, reset_val, 0),

{ SYS_DESC(SYS_CNTHV_TVAL_EL2), access_arch_timer },
EL2_REG(CNTHV_CTL_EL2, access_arch_timer, reset_val, 0),
EL2_REG(CNTHV_CVAL_EL2, access_arch_timer, reset_val, 0),
{ SYS_DESC(SYS_CNTHV_TVAL_EL2), access_hv_timer },
EL2_REG(CNTHV_CTL_EL2, access_hv_timer, reset_val, 0),
EL2_REG(CNTHV_CVAL_EL2, access_hv_timer, reset_val, 0),

{ SYS_DESC(SYS_CNTKCTL_EL12), access_cntkctl_el12 },

Expand Down

0 comments on commit 5e21d0c

Please sign in to comment.