diff options
| author | Paolo Bonzini <pbonzini@redhat.com> | 2025-09-30 13:23:28 -0400 |
|---|---|---|
| committer | Paolo Bonzini <pbonzini@redhat.com> | 2025-09-30 13:23:28 -0400 |
| commit | 924ebaefcec28289c210cad92551ae900e8fc220 (patch) | |
| tree | 1c372899d06a5e0c15d8bd3edc0442d1b14bf1ac | |
| parent | 8cbb0df2945a0fcb1f0b4384e65f13ec727baef4 (diff) | |
| parent | 10fd0285305d0b48e8a3bf15d4f17fc4f3d68cb6 (diff) | |
Merge tag 'kvmarm-6.18' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD
KVM/arm64 updates for 6.18
- Add support for FF-A 1.2 as the secure memory conduit for pKVM,
allowing more registers to be used as part of the message payload.
- Change the way pKVM allocates its VM handles, making sure that the
privileged hypervisor is never tricked into using uninitialised
data.
- Speed up MMIO range registration by avoiding unnecessary RCU
synchronisation, which results in VMs starting much quicker.
- Add the dump of the instruction stream when panic-ing in the EL2
payload, just like the rest of the kernel has always done. This will
hopefully help debugging non-VHE setups.
- Add 52bit PA support to the stage-1 page-table walker, and make use
of it to populate the fault level reported to the guest on failing
to translate a stage-1 walk.
- Add NV support to the GICv3-on-GICv5 emulation code, ensuring
feature parity for guests, irrespective of the host platform.
- Fix some really ugly architecture problems when dealing with debug
in a nested VM. This has some bad performance impacts, but is at
least correct.
- Add enough infrastructure to be able to disable EL2 features and
give effective values to the EL2 control registers. This then allows
a bunch of features to be turned off, which helps cross-host
migration.
- Large rework of the selftest infrastructure to allow most tests to
transparently run at EL2. This is the first step towards enabling
NV testing.
- Various fixes and improvements all over the map, including one BE
fix, just in time for the removal of the feature.
72 files changed, 1696 insertions, 688 deletions
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index bec227f9500a..9da54d4ee49e 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -81,6 +81,8 @@ enum __kvm_host_smccc_func { __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff, __KVM_HOST_SMCCC_FUNC___vgic_v3_save_vmcr_aprs, __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_vmcr_aprs, + __KVM_HOST_SMCCC_FUNC___pkvm_reserve_vm, + __KVM_HOST_SMCCC_FUNC___pkvm_unreserve_vm, __KVM_HOST_SMCCC_FUNC___pkvm_init_vm, __KVM_HOST_SMCCC_FUNC___pkvm_init_vcpu, __KVM_HOST_SMCCC_FUNC___pkvm_teardown_vm, diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index fa8a08a1ccd5..c9eab316398e 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -220,6 +220,20 @@ static inline bool vcpu_el2_tge_is_set(const struct kvm_vcpu *vcpu) static inline bool vcpu_el2_amo_is_set(const struct kvm_vcpu *vcpu) { + /* + * DDI0487L.b Known Issue D22105 + * + * When executing at EL2 and HCR_EL2.{E2H,TGE} = {1, 0} it is + * IMPLEMENTATION DEFINED whether the effective value of HCR_EL2.AMO + * is the value programmed or 1. + * + * Make the implementation choice of treating the effective value as 1 as + * we cannot subsequently catch changes to TGE or AMO that would + * otherwise lead to the SError becoming deliverable. + */ + if (vcpu_is_el2(vcpu) && vcpu_el2_e2h_is_set(vcpu) && !vcpu_el2_tge_is_set(vcpu)) + return true; + return ctxt_sys_reg(&vcpu->arch.ctxt, HCR_EL2) & HCR_AMO; } @@ -511,21 +525,29 @@ static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) if (vcpu_mode_is_32bit(vcpu)) { *vcpu_cpsr(vcpu) |= PSR_AA32_E_BIT; } else { - u64 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); + enum vcpu_sysreg r; + u64 sctlr; + + r = vcpu_has_nv(vcpu) ? SCTLR_EL2 : SCTLR_EL1; + + sctlr = vcpu_read_sys_reg(vcpu, r); sctlr |= SCTLR_ELx_EE; - vcpu_write_sys_reg(vcpu, sctlr, SCTLR_EL1); + vcpu_write_sys_reg(vcpu, sctlr, r); } } static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu) { + enum vcpu_sysreg r; + u64 bit; + if (vcpu_mode_is_32bit(vcpu)) return !!(*vcpu_cpsr(vcpu) & PSR_AA32_E_BIT); - if (vcpu_mode_priv(vcpu)) - return !!(vcpu_read_sys_reg(vcpu, SCTLR_EL1) & SCTLR_ELx_EE); - else - return !!(vcpu_read_sys_reg(vcpu, SCTLR_EL1) & SCTLR_EL1_E0E); + r = is_hyp_ctxt(vcpu) ? SCTLR_EL2 : SCTLR_EL1; + bit = vcpu_mode_priv(vcpu) ? SCTLR_ELx_EE : SCTLR_EL1_E0E; + + return vcpu_read_sys_reg(vcpu, r) & bit; } static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu, diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 0ee4f6fa3a17..b763293281c8 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -252,7 +252,8 @@ struct kvm_protected_vm { pkvm_handle_t handle; struct kvm_hyp_memcache teardown_mc; struct kvm_hyp_memcache stage2_teardown_mc; - bool enabled; + bool is_protected; + bool is_created; }; struct kvm_mpidr_data { @@ -1442,7 +1443,7 @@ struct kvm *kvm_arch_alloc_vm(void); #define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE -#define kvm_vm_is_protected(kvm) (is_protected_kvm_enabled() && (kvm)->arch.pkvm.enabled) +#define kvm_vm_is_protected(kvm) (is_protected_kvm_enabled() && (kvm)->arch.pkvm.is_protected) #define vcpu_is_protected(vcpu) kvm_vm_is_protected((vcpu)->kvm) diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h index 7fd76f41c296..f7c06a840963 100644 --- a/arch/arm64/include/asm/kvm_nested.h +++ b/arch/arm64/include/asm/kvm_nested.h @@ -83,6 +83,8 @@ extern void check_nested_vcpu_requests(struct kvm_vcpu *vcpu); extern void kvm_nested_flush_hwstate(struct kvm_vcpu *vcpu); extern void kvm_nested_sync_hwstate(struct kvm_vcpu *vcpu); +extern void kvm_nested_setup_mdcr_el2(struct kvm_vcpu *vcpu); + struct kvm_s2_trans { phys_addr_t output; unsigned long block_size; @@ -265,7 +267,7 @@ static inline u64 decode_range_tlbi(u64 val, u64 *range, u16 *asid) return base; } -static inline unsigned int ps_to_output_size(unsigned int ps) +static inline unsigned int ps_to_output_size(unsigned int ps, bool pa52bit) { switch (ps) { case 0: return 32; @@ -273,7 +275,10 @@ static inline unsigned int ps_to_output_size(unsigned int ps) case 2: return 40; case 3: return 42; case 4: return 44; - case 5: + case 5: return 48; + case 6: if (pa52bit) + return 52; + fallthrough; default: return 48; } @@ -285,13 +290,28 @@ enum trans_regime { TR_EL2, }; +struct s1_walk_info; + +struct s1_walk_context { + struct s1_walk_info *wi; + u64 table_ipa; + int level; +}; + +struct s1_walk_filter { + int (*fn)(struct s1_walk_context *, void *); + void *priv; +}; + struct s1_walk_info { + struct s1_walk_filter *filter; u64 baddr; enum trans_regime regime; unsigned int max_oa_bits; unsigned int pgshift; unsigned int txsz; int sl; + u8 sh; bool as_el0; bool hpd; bool e0poe; @@ -299,6 +319,7 @@ struct s1_walk_info { bool pan; bool be; bool s2; + bool pa52bit; }; struct s1_walk_result { @@ -334,6 +355,8 @@ struct s1_walk_result { int __kvm_translate_va(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, struct s1_walk_result *wr, u64 va); +int __kvm_find_s1_desc_level(struct kvm_vcpu *vcpu, u64 va, u64 ipa, + int *level); /* VNCR management */ int kvm_vcpu_allocate_vncr_tlb(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h index ea58282f59bb..08be89c95466 100644 --- a/arch/arm64/include/asm/kvm_pkvm.h +++ b/arch/arm64/include/asm/kvm_pkvm.h @@ -18,6 +18,7 @@ int pkvm_init_host_vm(struct kvm *kvm); int pkvm_create_hyp_vm(struct kvm *kvm); +bool pkvm_hyp_vm_is_created(struct kvm *kvm); void pkvm_destroy_hyp_vm(struct kvm *kvm); int pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h index e3e8944a71c3..e92e4a0e48fc 100644 --- a/arch/arm64/include/asm/traps.h +++ b/arch/arm64/include/asm/traps.h @@ -36,6 +36,7 @@ int kasan_brk_handler(struct pt_regs *regs, unsigned long esr); int ubsan_brk_handler(struct pt_regs *regs, unsigned long esr); int early_brk64(unsigned long addr, unsigned long esr, struct pt_regs *regs); +void dump_kernel_instr(unsigned long kaddr); /* * Move regs->pc to next instruction and do necessary setup before it diff --git a/arch/arm64/include/asm/vncr_mapping.h b/arch/arm64/include/asm/vncr_mapping.h index f6ec500ad3fa..c2485a862e69 100644 --- a/arch/arm64/include/asm/vncr_mapping.h +++ b/arch/arm64/include/asm/vncr_mapping.h @@ -94,6 +94,8 @@ #define VNCR_PMSICR_EL1 0x838 #define VNCR_PMSIRR_EL1 0x840 #define VNCR_PMSLATFR_EL1 0x848 +#define VNCR_PMSNEVFR_EL1 0x850 +#define VNCR_PMSDSFR_EL1 0x858 #define VNCR_TRFCR_EL1 0x880 #define VNCR_MPAM1_EL1 0x900 #define VNCR_MPAMHCR_EL2 0x930 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index ef269a5a37e1..770a41fa7214 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2539,6 +2539,15 @@ test_has_mpam_hcr(const struct arm64_cpu_capabilities *entry, int scope) return idr & MPAMIDR_EL1_HAS_HCR; } +static bool +test_has_gicv5_legacy(const struct arm64_cpu_capabilities *entry, int scope) +{ + if (!this_cpu_has_cap(ARM64_HAS_GICV5_CPUIF)) + return false; + + return !!(read_sysreg_s(SYS_ICC_IDR0_EL1) & ICC_IDR0_EL1_GCIE_LEGACY); +} + static const struct arm64_cpu_capabilities arm64_features[] = { { .capability = ARM64_ALWAYS_BOOT, @@ -3156,6 +3165,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64PFR2_EL1, GCIE, IMP) }, + { + .desc = "GICv5 Legacy vCPU interface", + .type = ARM64_CPUCAP_EARLY_LOCAL_CPU_FEATURE, + .capability = ARM64_HAS_GICV5_LEGACY, + .matches = test_has_gicv5_legacy, + }, {}, }; diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 714b0b5ec5ac..5369763606e7 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -105,6 +105,9 @@ KVM_NVHE_ALIAS(__hyp_stub_vectors); KVM_NVHE_ALIAS(vgic_v2_cpuif_trap); KVM_NVHE_ALIAS(vgic_v3_cpuif_trap); +/* Static key indicating whether GICv3 has GICv2 compatibility */ +KVM_NVHE_ALIAS(vgic_v3_has_v2_compat); + /* Static key which is set if CNTVOFF_EL2 is unusable */ KVM_NVHE_ALIAS(broken_cntvoff_key); diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index f528b6041f6a..83e6d1409e1f 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -149,19 +149,18 @@ pstate_check_t * const aarch32_opcode_cond_checks[16] = { int show_unhandled_signals = 0; -static void dump_kernel_instr(const char *lvl, struct pt_regs *regs) +void dump_kernel_instr(unsigned long kaddr) { - unsigned long addr = instruction_pointer(regs); char str[sizeof("00000000 ") * 5 + 2 + 1], *p = str; int i; - if (user_mode(regs)) + if (!is_ttbr1_addr(kaddr)) return; for (i = -4; i < 1; i++) { unsigned int val, bad; - bad = aarch64_insn_read(&((u32 *)addr)[i], &val); + bad = aarch64_insn_read(&((u32 *)kaddr)[i], &val); if (!bad) p += sprintf(p, i == 0 ? "(%08x) " : "%08x ", val); @@ -169,7 +168,7 @@ static void dump_kernel_instr(const char *lvl, struct pt_regs *regs) p += sprintf(p, i == 0 ? "(????????) " : "???????? "); } - printk("%sCode: %s\n", lvl, str); + printk(KERN_EMERG "Code: %s\n", str); } #define S_SMP " SMP" @@ -178,6 +177,7 @@ static int __die(const char *str, long err, struct pt_regs *regs) { static int die_counter; int ret; + unsigned long addr = instruction_pointer(regs); pr_emerg("Internal error: %s: %016lx [#%d] " S_SMP "\n", str, err, ++die_counter); @@ -190,7 +190,10 @@ static int __die(const char *str, long err, struct pt_regs *regs) print_modules(); show_regs(regs); - dump_kernel_instr(KERN_EMERG, regs); + if (user_mode(regs)) + return ret; + + dump_kernel_instr(addr); return ret; } diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index bd6b6a620a09..fa79744290f3 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -170,10 +170,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (ret) return ret; - ret = pkvm_init_host_vm(kvm); - if (ret) - goto err_unshare_kvm; - if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL_ACCOUNT)) { ret = -ENOMEM; goto err_unshare_kvm; @@ -184,6 +180,16 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (ret) goto err_free_cpumask; + if (is_protected_kvm_enabled()) { + /* + * If any failures occur after this is successful, make sure to + * call __pkvm_unreserve_vm to unreserve the VM in hyp. + */ + ret = pkvm_init_host_vm(kvm); + if (ret) + goto err_free_cpumask; + } + kvm_vgic_early_init(kvm); kvm_timer_init_vm(kvm); @@ -2317,8 +2323,9 @@ static int __init init_subsystems(void) } if (kvm_mode == KVM_MODE_NV && - !(vgic_present && kvm_vgic_global_state.type == VGIC_V3)) { - kvm_err("NV support requires GICv3, giving up\n"); + !(vgic_present && (kvm_vgic_global_state.type == VGIC_V3 || + kvm_vgic_global_state.has_gcie_v3_compat))) { + kvm_err("NV support requires GICv3 or GICv5 with legacy support, giving up\n"); err = -EINVAL; goto out; } diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index d71ca4ddc9d1..20bb9af125b1 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -28,9 +28,57 @@ static int get_ia_size(struct s1_walk_info *wi) /* Return true if the IPA is out of the OA range */ static bool check_output_size(u64 ipa, struct s1_walk_info *wi) { + if (wi->pa52bit) + return wi->max_oa_bits < 52 && (ipa & GENMASK_ULL(51, wi->max_oa_bits)); return wi->max_oa_bits < 48 && (ipa & GENMASK_ULL(47, wi->max_oa_bits)); } +static bool has_52bit_pa(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, u64 tcr) +{ + switch (BIT(wi->pgshift)) { + case SZ_64K: + default: /* IMPDEF: treat any other value as 64k */ + if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, PARANGE, 52)) + return false; + return ((wi->regime == TR_EL2 ? + FIELD_GET(TCR_EL2_PS_MASK, tcr) : + FIELD_GET(TCR_IPS_MASK, tcr)) == 0b0110); + case SZ_16K: + if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN16, 52_BIT)) + return false; + break; + case SZ_4K: + if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN4, 52_BIT)) + return false; + break; + } + + return (tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS)); +} + +static u64 desc_to_oa(struct s1_walk_info *wi, u64 desc) +{ + u64 addr; + + if (!wi->pa52bit) + return desc & GENMASK_ULL(47, wi->pgshift); + + switch (BIT(wi->pgshift)) { + case SZ_4K: + case SZ_16K: + addr = desc & GENMASK_ULL(49, wi->pgshift); + addr |= FIELD_GET(KVM_PTE_ADDR_51_50_LPA2, desc) << 50; + break; + case SZ_64K: + default: /* IMPDEF: treat any other value as 64k */ + addr = desc & GENMASK_ULL(47, wi->pgshift); + addr |= FIELD_GET(KVM_PTE_ADDR_51_48, desc) << 48; + break; + } + + return addr; +} + /* Return the translation regime that applies to an AT instruction */ static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 op) { @@ -50,21 +98,26 @@ static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 o } } +static u64 effective_tcr2(struct kvm_vcpu *vcpu, enum trans_regime regime) +{ + if (regime == TR_EL10) { + if (vcpu_has_nv(vcpu) && + !(__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En)) + return 0; + + return vcpu_read_sys_reg(vcpu, TCR2_EL1); + } + + return vcpu_read_sys_reg(vcpu, TCR2_EL2); +} |
