From f414269392443f666bd8bef0cb3f0b53d5147be3 Mon Sep 17 00:00:00 2001 From: Per Larsen Date: Wed, 20 Aug 2025 01:10:05 +0000 Subject: KVM: arm64: Correct return value on host version downgrade attempt Once the hypervisor negotiates the FF-A version with the host, it should remain locked-in. However, it is possible to load FF-A as a module first supporting version 1.1 and then 1.0. Without this patch, the FF-A 1.0 driver will use 1.0 data structures to make calls which the hypervisor will incorrectly interpret as 1.1 data structures. With this patch, negotiation will fail. This patch does not change existing functionality in the case where a FF-A 1.2 driver is loaded after a 1.1 driver; the 1.2 driver will need to use 1.1 in order to proceed. Acked-by: Will Deacon Signed-off-by: Per Larsen Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/ffa.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c index 3369dd0c4009..2c199d40811e 100644 --- a/arch/arm64/kvm/hyp/nvhe/ffa.c +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c @@ -712,7 +712,10 @@ static void do_ffa_version(struct arm_smccc_res *res, hyp_spin_lock(&version_lock); if (has_version_negotiated) { - res->a0 = hyp_ffa_version; + if (FFA_MINOR_VERSION(ffa_req_version) < FFA_MINOR_VERSION(hyp_ffa_version)) + res->a0 = FFA_RET_NOT_SUPPORTED; + else + res->a0 = hyp_ffa_version; goto unlock; } -- cgit v1.2.3 From 6f4c348b1d5c08f1105e645700962cc4353a8ac9 Mon Sep 17 00:00:00 2001 From: Per Larsen Date: Wed, 20 Aug 2025 01:10:06 +0000 Subject: KVM: arm64: Use SMCCC 1.2 for FF-A initialization and in host handler SMCCC 1.1 and prior allows four registers to be sent back as a result of an FF-A interface. SMCCC 1.2 increases the number of results that can be sent back to 8 and 16 for 32-bit and 64-bit SMC/HVCs respectively. FF-A 1.0 references SMCCC 1.2 (reference [4] on page xi) and FF-A 1.2 explicitly requires SMCCC 1.2 so it should be safe to use this version unconditionally. Moreover, it is simpler to implement FF-A features without having to worry about compatibility with SMCCC 1.1 and older. SMCCC 1.2 requires that SMC32/HVC32 from aarch64 mode preserves x8-x30 but given that there is no reliable way to distinguish 32-bit/64-bit calls, we assume SMC64 unconditionally. This has the benefit of being consistent with the handling of calls that are passed through, i.e., not proxied. (A cleaner solution will become available in FF-A 1.3.) Update the FF-A initialization and host handler code to use SMCCC 1.2. Signed-off-by: Per Larsen Acked-by: Will Deacon Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/Makefile | 1 + arch/arm64/kvm/hyp/nvhe/ffa.c | 193 +++++++++++++++++++++++++-------------- 2 files changed, 125 insertions(+), 69 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index 0b0a68b663d4..a244ec25f8c5 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -27,6 +27,7 @@ hyp-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o stacktrace.o ffa.o hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o +hyp-obj-y += ../../../kernel/smccc-call.o hyp-obj-$(CONFIG_LIST_HARDENED) += list_debug.o hyp-obj-y += $(lib-objs) diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c index 2c199d40811e..8290396384a2 100644 --- a/arch/arm64/kvm/hyp/nvhe/ffa.c +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c @@ -71,36 +71,68 @@ static u32 hyp_ffa_version; static bool has_version_negotiated; static hyp_spinlock_t version_lock; -static void ffa_to_smccc_error(struct arm_smccc_res *res, u64 ffa_errno) +static void ffa_to_smccc_error(struct arm_smccc_1_2_regs *res, u64 ffa_errno) { - *res = (struct arm_smccc_res) { + *res = (struct arm_smccc_1_2_regs) { .a0 = FFA_ERROR, .a2 = ffa_errno, }; } -static void ffa_to_smccc_res_prop(struct arm_smccc_res *res, int ret, u64 prop) +static void ffa_to_smccc_res_prop(struct arm_smccc_1_2_regs *res, int ret, u64 prop) { if (ret == FFA_RET_SUCCESS) { - *res = (struct arm_smccc_res) { .a0 = FFA_SUCCESS, - .a2 = prop }; + *res = (struct arm_smccc_1_2_regs) { .a0 = FFA_SUCCESS, + .a2 = prop }; } else { ffa_to_smccc_error(res, ret); } } -static void ffa_to_smccc_res(struct arm_smccc_res *res, int ret) +static void ffa_to_smccc_res(struct arm_smccc_1_2_regs *res, int ret) { ffa_to_smccc_res_prop(res, ret, 0); } static void ffa_set_retval(struct kvm_cpu_context *ctxt, - struct arm_smccc_res *res) + struct arm_smccc_1_2_regs *res) { cpu_reg(ctxt, 0) = res->a0; cpu_reg(ctxt, 1) = res->a1; cpu_reg(ctxt, 2) = res->a2; cpu_reg(ctxt, 3) = res->a3; + cpu_reg(ctxt, 4) = res->a4; + cpu_reg(ctxt, 5) = res->a5; + cpu_reg(ctxt, 6) = res->a6; + cpu_reg(ctxt, 7) = res->a7; + + /* + * DEN0028C 2.6: SMC32/HVC32 call from aarch64 must preserve x8-x30. + * + * In FF-A 1.2, we cannot rely on the function ID sent by the caller to + * detect 32-bit calls because the CPU cycle management interfaces (e.g. + * FFA_MSG_WAIT, FFA_RUN) are 32-bit only but can have 64-bit responses. + * + * FFA-1.3 introduces 64-bit variants of the CPU cycle management + * interfaces. Moreover, FF-A 1.3 clarifies that SMC32 direct requests + * complete with SMC32 direct reponses which *should* allow us use the + * function ID sent by the caller to determine whether to return x8-x17. + * + * Note that we also cannot rely on function IDs in the response. + * + * Given the above, assume SMC64 and send back x0-x17 unconditionally + * as the passthrough code (__kvm_hyp_host_forward_smc) does the same. + */ + cpu_reg(ctxt, 8) = res->a8; + cpu_reg(ctxt, 9) = res->a9; + cpu_reg(ctxt, 10) = res->a10; + cpu_reg(ctxt, 11) = res->a11; + cpu_reg(ctxt, 12) = res->a12; + cpu_reg(ctxt, 13) = res->a13; + cpu_reg(ctxt, 14) = res->a14; + cpu_reg(ctxt, 15) = res->a15; + cpu_reg(ctxt, 16) = res->a16; + cpu_reg(ctxt, 17) = res->a17; } static bool is_ffa_call(u64 func_id) @@ -113,82 +145,92 @@ static bool is_ffa_call(u64 func_id) static int ffa_map_hyp_buffers(u64 ffa_page_count) { - struct arm_smccc_res res; + struct arm_smccc_1_2_regs res; - arm_smccc_1_1_smc(FFA_FN64_RXTX_MAP, - hyp_virt_to_phys(hyp_buffers.tx), - hyp_virt_to_phys(hyp_buffers.rx), - ffa_page_count, - 0, 0, 0, 0, - &res); + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) { + .a0 = FFA_FN64_RXTX_MAP, + .a1 = hyp_virt_to_phys(hyp_buffers.tx), + .a2 = hyp_virt_to_phys(hyp_buffers.rx), + .a3 = ffa_page_count, + }, &res); return res.a0 == FFA_SUCCESS ? FFA_RET_SUCCESS : res.a2; } static int ffa_unmap_hyp_buffers(void) { - struct arm_smccc_res res; + struct arm_smccc_1_2_regs res; - arm_smccc_1_1_smc(FFA_RXTX_UNMAP, - HOST_FFA_ID, - 0, 0, 0, 0, 0, 0, - &res); + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) { + .a0 = FFA_RXTX_UNMAP, + .a1 = HOST_FFA_ID, + }, &res); return res.a0 == FFA_SUCCESS ? FFA_RET_SUCCESS : res.a2; } -static void ffa_mem_frag_tx(struct arm_smccc_res *res, u32 handle_lo, +static void ffa_mem_frag_tx(struct arm_smccc_1_2_regs *res, u32 handle_lo, u32 handle_hi, u32 fraglen, u32 endpoint_id) { - arm_smccc_1_1_smc(FFA_MEM_FRAG_TX, - handle_lo, handle_hi, fraglen, endpoint_id, - 0, 0, 0, - res); + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) { + .a0 = FFA_MEM_FRAG_TX, + .a1 = handle_lo, + .a2 = handle_hi, + .a3 = fraglen, + .a4 = endpoint_id, + }, res); } -static void ffa_mem_frag_rx(struct arm_smccc_res *res, u32 handle_lo, +static void ffa_mem_frag_rx(struct arm_smccc_1_2_regs *res, u32 handle_lo, u32 handle_hi, u32 fragoff) { - arm_smccc_1_1_smc(FFA_MEM_FRAG_RX, - handle_lo, handle_hi, fragoff, HOST_FFA_ID, - 0, 0, 0, - res); + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) { + .a0 = FFA_MEM_FRAG_RX, + .a1 = handle_lo, + .a2 = handle_hi, + .a3 = fragoff, + .a4 = HOST_FFA_ID, + }, res); } -static void ffa_mem_xfer(struct arm_smccc_res *res, u64 func_id, u32 len, +static void ffa_mem_xfer(struct arm_smccc_1_2_regs *res, u64 func_id, u32 len, u32 fraglen) { - arm_smccc_1_1_smc(func_id, len, fraglen, - 0, 0, 0, 0, 0, - res); + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) { + .a0 = func_id, + .a1 = len, + .a2 = fraglen, + }, res); } -static void ffa_mem_reclaim(struct arm_smccc_res *res, u32 handle_lo, +static void ffa_mem_reclaim(struct arm_smccc_1_2_regs *res, u32 handle_lo, u32 handle_hi, u32 flags) { - arm_smccc_1_1_smc(FFA_MEM_RECLAIM, - handle_lo, handle_hi, flags, - 0, 0, 0, 0, - res); + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) { + .a0 = FFA_MEM_RECLAIM, + .a1 = handle_lo, + .a2 = handle_hi, + .a3 = flags, + }, res); } -static void ffa_retrieve_req(struct arm_smccc_res *res, u32 len) +static void ffa_retrieve_req(struct arm_smccc_1_2_regs *res, u32 len) { - arm_smccc_1_1_smc(FFA_FN64_MEM_RETRIEVE_REQ, - len, len, - 0, 0, 0, 0, 0, - res); + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) { + .a0 = FFA_FN64_MEM_RETRIEVE_REQ, + .a1 = len, + .a2 = len, + }, res); } -static void ffa_rx_release(struct arm_smccc_res *res) +static void ffa_rx_release(struct arm_smccc_1_2_regs *res) { - arm_smccc_1_1_smc(FFA_RX_RELEASE, - 0, 0, - 0, 0, 0, 0, 0, - res); + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) { + .a0 = FFA_RX_RELEASE, + }, res); } -static void do_ffa_rxtx_map(struct arm_smccc_res *res, +static void do_ffa_rxtx_map(struct arm_smccc_1_2_regs *res, struct kvm_cpu_context *ctxt) { DECLARE_REG(phys_addr_t, tx, ctxt, 1); @@ -267,7 +309,7 @@ err_unmap: goto out_unlock; } -static void do_ffa_rxtx_unmap(struct arm_smccc_res *res, +static void do_ffa_rxtx_unmap(struct arm_smccc_1_2_regs *res, struct kvm_cpu_context *ctxt) { DECLARE_REG(u32, id, ctxt, 1); @@ -368,7 +410,7 @@ static int ffa_host_unshare_ranges(struct ffa_mem_region_addr_range *ranges, return ret; } -static void do_ffa_mem_frag_tx(struct arm_smccc_res *res, +static void do_ffa_mem_frag_tx(struct arm_smccc_1_2_regs *res, struct kvm_cpu_context *ctxt) { DECLARE_REG(u32, handle_lo, ctxt, 1); @@ -427,7 +469,7 @@ out: } static void __do_ffa_mem_xfer(const u64 func_id, - struct arm_smccc_res *res, + struct arm_smccc_1_2_regs *res, struct kvm_cpu_context *ctxt) { DECLARE_REG(u32, len, ctxt, 1); @@ -521,7 +563,7 @@ err_unshare: __do_ffa_mem_xfer((fid), (res), (ctxt)); \ } while (0); -static void do_ffa_mem_reclaim(struct arm_smccc_res *res, +static void do_ffa_mem_reclaim(struct arm_smccc_1_2_regs *res, struct kvm_cpu_context *ctxt) { DECLARE_REG(u32, handle_lo, ctxt, 1); @@ -634,7 +676,7 @@ static bool ffa_call_supported(u64 func_id) return true; } -static bool do_ffa_features(struct arm_smccc_res *res, +static bool do_ffa_features(struct arm_smccc_1_2_regs *res, struct kvm_cpu_context *ctxt) { DECLARE_REG(u32, id, ctxt, 1); @@ -666,17 +708,21 @@ out_handled: static int hyp_ffa_post_init(void) { size_t min_rxtx_sz; - struct arm_smccc_res res; + struct arm_smccc_1_2_regs res; - arm_smccc_1_1_smc(FFA_ID_GET, 0, 0, 0, 0, 0, 0, 0, &res); + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs){ + .a0 = FFA_ID_GET, + }, &res); if (res.a0 != FFA_SUCCESS) return -EOPNOTSUPP; if (res.a2 != HOST_FFA_ID) return -EINVAL; - arm_smccc_1_1_smc(FFA_FEATURES, FFA_FN64_RXTX_MAP, - 0, 0, 0, 0, 0, 0, &res); + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs){ + .a0 = FFA_FEATURES, + .a1 = FFA_FN64_RXTX_MAP, + }, &res); if (res.a0 != FFA_SUCCESS) return -EOPNOTSUPP; @@ -700,7 +746,7 @@ static int hyp_ffa_post_init(void) return 0; } -static void do_ffa_version(struct arm_smccc_res *res, +static void do_ffa_version(struct arm_smccc_1_2_regs *res, struct kvm_cpu_context *ctxt) { DECLARE_REG(u32, ffa_req_version, ctxt, 1); @@ -724,9 +770,10 @@ static void do_ffa_version(struct arm_smccc_res *res, * first if TEE supports it. */ if (FFA_MINOR_VERSION(ffa_req_version) < FFA_MINOR_VERSION(hyp_ffa_version)) { - arm_smccc_1_1_smc(FFA_VERSION, ffa_req_version, 0, - 0, 0, 0, 0, 0, - res); + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) { + .a0 = FFA_VERSION, + .a1 = ffa_req_version, + }, res); if (res->a0 == FFA_RET_NOT_SUPPORTED) goto unlock; @@ -743,7 +790,7 @@ unlock: hyp_spin_unlock(&version_lock); } -static void do_ffa_part_get(struct arm_smccc_res *res, +static void do_ffa_part_get(struct arm_smccc_1_2_regs *res, struct kvm_cpu_context *ctxt) { DECLARE_REG(u32, uuid0, ctxt, 1); @@ -759,9 +806,14 @@ static void do_ffa_part_get(struct arm_smccc_res *res, goto out_unlock; } - arm_smccc_1_1_smc(FFA_PARTITION_INFO_GET, uuid0, uuid1, - uuid2, uuid3, flags, 0, 0, - res); + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) { + .a0 = FFA_PARTITION_INFO_GET, + .a1 = uuid0, + .a2 = uuid1, + .a3 = uuid2, + .a4 = uuid3, + .a5 = flags, + }, res); if (res->a0 != FFA_SUCCESS) goto out_unlock; @@ -794,7 +846,7 @@ out_unlock: bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id) { - struct arm_smccc_res res; + struct arm_smccc_1_2_regs res; /* * There's no way we can tell what a non-standard SMC call might @@ -863,13 +915,16 @@ out_handled: int hyp_ffa_init(void *pages) { - struct arm_smccc_res res; + struct arm_smccc_1_2_regs res; void *tx, *rx; if (kvm_host_psci_config.smccc_version < ARM_SMCCC_VERSION_1_2) return 0; - arm_smccc_1_1_smc(FFA_VERSION, FFA_VERSION_1_1, 0, 0, 0, 0, 0, 0, &res); + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) { + .a0 = FFA_VERSION, + .a1 = FFA_VERSION_1_1, + }, &res); if (res.a0 == FFA_RET_NOT_SUPPORTED) return 0; -- cgit v1.2.3 From 79195f342417ff773048515964707aba3bfe0e41 Mon Sep 17 00:00:00 2001 From: Per Larsen Date: Wed, 20 Aug 2025 01:10:07 +0000 Subject: KVM: arm64: Mark FFA_NOTIFICATION_* calls as unsupported Prevent FFA_NOTIFICATION_* interfaces from being passed through to TZ. Acked-by: Will Deacon Signed-off-by: Per Larsen Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/ffa.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c index 8290396384a2..40a785b6ed06 100644 --- a/arch/arm64/kvm/hyp/nvhe/ffa.c +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c @@ -670,6 +670,14 @@ static bool ffa_call_supported(u64 func_id) case FFA_RXTX_MAP: case FFA_MEM_DONATE: case FFA_MEM_RETRIEVE_REQ: + /* Optional notification interfaces added in FF-A 1.1 */ + case FFA_NOTIFICATION_BITMAP_CREATE: + case FFA_NOTIFICATION_BITMAP_DESTROY: + case FFA_NOTIFICATION_BIND: + case FFA_NOTIFICATION_UNBIND: + case FFA_NOTIFICATION_SET: + case FFA_NOTIFICATION_GET: + case FFA_NOTIFICATION_INFO_GET: return false; } -- cgit v1.2.3 From 8d24683e3e0f93b4bfdb558df50923514042817b Mon Sep 17 00:00:00 2001 From: Per Larsen Date: Wed, 20 Aug 2025 01:10:08 +0000 Subject: KVM: arm64: Mark optional FF-A 1.2 interfaces as unsupported Mark FF-A 1.2 interfaces as unsupported lest they get proxied. Acked-by: Will Deacon Signed-off-by: Per Larsen Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/ffa.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c index 40a785b6ed06..96109aa99c48 100644 --- a/arch/arm64/kvm/hyp/nvhe/ffa.c +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c @@ -678,6 +678,11 @@ static bool ffa_call_supported(u64 func_id) case FFA_NOTIFICATION_SET: case FFA_NOTIFICATION_GET: case FFA_NOTIFICATION_INFO_GET: + /* Optional interfaces added in FF-A 1.2 */ + case FFA_MSG_SEND_DIRECT_REQ2: /* Optional per 7.5.1 */ + case FFA_MSG_SEND_DIRECT_RESP2: /* Optional per 7.5.1 */ + case FFA_CONSOLE_LOG: /* Optional per 13.1: not in Table 13.1 */ + case FFA_PARTITION_INFO_GET_REGS: /* Optional for virtual instances per 13.1 */ return false; } -- cgit v1.2.3 From 3f5952917498e7bb9d227812d4349668f62c413b Mon Sep 17 00:00:00 2001 From: Per Larsen Date: Wed, 20 Aug 2025 01:10:09 +0000 Subject: KVM: arm64: Mask response to FFA_FEATURE call The minimum size and alignment boundary for FFA_RXTX_MAP is returned in bit[1:0]. Mask off any other bits in w2 when reading the minimum buffer size in hyp_ffa_post_init. Acked-by: Will Deacon Signed-off-by: Per Larsen Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/ffa.c | 2 +- include/linux/arm_ffa.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c index 96109aa99c48..a8ec1a94f3f8 100644 --- a/arch/arm64/kvm/hyp/nvhe/ffa.c +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c @@ -739,7 +739,7 @@ static int hyp_ffa_post_init(void) if (res.a0 != FFA_SUCCESS) return -EOPNOTSUPP; - switch (res.a2) { + switch (res.a2 & FFA_FEAT_RXTX_MIN_SZ_MASK) { case FFA_FEAT_RXTX_MIN_SZ_4K: min_rxtx_sz = SZ_4K; break; diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index e1634897e159..cd7ee4df9045 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -128,6 +128,7 @@ #define FFA_FEAT_RXTX_MIN_SZ_4K 0 #define FFA_FEAT_RXTX_MIN_SZ_64K 1 #define FFA_FEAT_RXTX_MIN_SZ_16K 2 +#define FFA_FEAT_RXTX_MIN_SZ_MASK GENMASK(1, 0) /* FFA Bus/Device/Driver related */ struct ffa_device { -- cgit v1.2.3 From 162190f2ccdc5964efa2a26e9fc3e56cf80fc29b Mon Sep 17 00:00:00 2001 From: Per Larsen Date: Wed, 20 Aug 2025 01:10:10 +0000 Subject: KVM: arm64: Bump the supported version of FF-A to 1.2 FF-A version 1.2 introduces the DIRECT_REQ2 ABI. Bump the FF-A version preferred by the hypervisor to enable implementation of the 1.2-only FFA_MSG_SEND_DIRECT_REQ2 and FFA_MSG_SEND_RESP2 messaging interfaces. Co-developed-by: Ayrton Munoz Signed-off-by: Ayrton Munoz Acked-by: Will Deacon Signed-off-by: Per Larsen Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/ffa.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c index a8ec1a94f3f8..4e16f9b96f63 100644 --- a/arch/arm64/kvm/hyp/nvhe/ffa.c +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c @@ -936,7 +936,7 @@ int hyp_ffa_init(void *pages) arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) { .a0 = FFA_VERSION, - .a1 = FFA_VERSION_1_1, + .a1 = FFA_VERSION_1_2, }, &res); if (res.a0 == FFA_RET_NOT_SUPPORTED) return 0; @@ -957,10 +957,10 @@ int hyp_ffa_init(void *pages) if (FFA_MAJOR_VERSION(res.a0) != 1) return -EOPNOTSUPP; - if (FFA_MINOR_VERSION(res.a0) < FFA_MINOR_VERSION(FFA_VERSION_1_1)) + if (FFA_MINOR_VERSION(res.a0) < FFA_MINOR_VERSION(FFA_VERSION_1_2)) hyp_ffa_version = res.a0; else - hyp_ffa_version = FFA_VERSION_1_1; + hyp_ffa_version = FFA_VERSION_1_2; tx = pages; pages += KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE; -- cgit v1.2.3 From 8673e5b22e1e114213d3ca74f415034aed45e528 Mon Sep 17 00:00:00 2001 From: Wei-Lin Chang Date: Sat, 9 Aug 2025 21:53:56 +0800 Subject: KVM: arm64: ptdump: Don't test PTE_VALID alongside other attributes The attribute masks and test values in the ptdump code are meant for individual attributes, however for stage-2 ptdump we included PTE_VALID while testing for R, W, X, and AF. This led to some confusion and the flipped output for the executable attribute. Remove PTE_VALID from all attribute masks and values so that each test matches only the relevant bits. Additionally, the executable attribute printing is updated to align with stage-1 ptdump, printing "NX" for non-executable regions and "x " for executable ones. Suggested-by: Anshuman Khandual Suggested-by: Mark Rutland Suggested-by: Sebastian Ene Signed-off-by: Wei-Lin Chang Acked-by: Will Deacon Reviewed-by: Anshuman Khandual Signed-off-by: Marc Zyngier --- arch/arm64/kvm/ptdump.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kvm/ptdump.c b/arch/arm64/kvm/ptdump.c index 098416d7e5c2..dc5acfb00af9 100644 --- a/arch/arm64/kvm/ptdump.c +++ b/arch/arm64/kvm/ptdump.c @@ -32,23 +32,23 @@ static const struct ptdump_prot_bits stage2_pte_bits[] = { .set = " ", .clear = "F", }, { - .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID, - .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID, + .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R, + .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R, .set = "R", .clear = " ", }, { - .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID, - .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID, + .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W, + .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W, .set = "W", .clear = " ", }, { - .mask = KVM_PTE_LEAF_ATTR_HI_S2_XN | PTE_VALID, - .val = PTE_VALID, - .set = " ", - .clear = "X", + .mask = KVM_PTE_LEAF_ATTR_HI_S2_XN, + .val = KVM_PTE_LEAF_ATTR_HI_S2_XN, + .set = "NX", + .clear = "x ", }, { - .mask = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID, - .val = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID, + .mask = KVM_PTE_LEAF_ATTR_LO_S2_AF, + .val = KVM_PTE_LEAF_ATTR_LO_S2_AF, .set = "AF", .clear = " ", }, { -- cgit v1.2.3 From 2ba972bf71cb71d2127ec6c3db1ceb6dd0c73173 Mon Sep 17 00:00:00 2001 From: Ben Horgan Date: Fri, 15 Aug 2025 17:26:55 +0100 Subject: KVM: arm64: Fix debug checking for np-guests using huge mappings When running with transparent huge pages and CONFIG_NVHE_EL2_DEBUG then the debug checking in assert_host_shared_guest() fails on the launch of an np-guest. This WARN_ON() causes a panic and generates the stack below. In __pkvm_host_relax_perms_guest() the debug checking assumes the mapping is a single page but it may be a block map. Update the checking so that the size is not checked and just assumes the correct size. While we're here make the same fix in __pkvm_host_mkyoung_guest(). Info: # lkvm run -k /share/arch/arm64/boot/Image -m 704 -c 8 --name guest-128 Info: Removed ghost socket file "/.lkvm//guest-128.sock". [ 1406.521757] kvm [141]: nVHE hyp BUG at: arch/arm64/kvm/hyp/nvhe/mem_protect.c:1088! [ 1406.521804] kvm [141]: nVHE call trace: [ 1406.521828] kvm [141]: [] __kvm_nvhe_hyp_panic+0xb4/0xe8 [ 1406.521946] kvm [141]: [] __kvm_nvhe_assert_host_shared_guest+0xb0/0x10c [ 1406.522049] kvm [141]: [] __kvm_nvhe___pkvm_host_relax_perms_guest+0x48/0x104 [ 1406.522157] kvm [141]: [] __kvm_nvhe_handle___pkvm_host_relax_perms_guest+0x64/0x7c [ 1406.522250] kvm [141]: [] __kvm_nvhe_handle_trap+0x8c/0x1a8 [ 1406.522333] kvm [141]: [] __kvm_nvhe___skip_pauth_save+0x4/0x4 [ 1406.522454] kvm [141]: ---[ end nVHE call trace ]--- [ 1406.522477] kvm [141]: Hyp Offset: 0xfffece8013600000 [ 1406.522554] Kernel panic - not syncing: HYP panic: [ 1406.522554] PS:834003c9 PC:0000b1806db6d170 ESR:00000000f2000800 [ 1406.522554] FAR:ffff8000804be420 HPFAR:0000000000804be0 PAR:0000000000000000 [ 1406.522554] VCPU:0000000000000000 [ 1406.523337] CPU: 3 UID: 0 PID: 141 Comm: kvm-vcpu-0 Not tainted 6.16.0-rc7 #97 PREEMPT [ 1406.523485] Hardware name: FVP Base RevC (DT) [ 1406.523566] Call trace: [ 1406.523629] show_stack+0x18/0x24 (C) [ 1406.523753] dump_stack_lvl+0xd4/0x108 [ 1406.523899] dump_stack+0x18/0x24 [ 1406.524040] panic+0x3d8/0x448 [ 1406.524184] nvhe_hyp_panic_handler+0x10c/0x23c [ 1406.524325] kvm_handle_guest_abort+0x68c/0x109c [ 1406.524500] handle_exit+0x60/0x17c [ 1406.524630] kvm_arch_vcpu_ioctl_run+0x2e0/0x8c0 [ 1406.524794] kvm_vcpu_ioctl+0x1a8/0x9cc [ 1406.524919] __arm64_sys_ioctl+0xac/0x104 [ 1406.525067] invoke_syscall+0x48/0x10c [ 1406.525189] el0_svc_common.constprop.0+0x40/0xe0 [ 1406.525322] do_el0_svc+0x1c/0x28 [ 1406.525441] el0_svc+0x38/0x120 [ 1406.525588] el0t_64_sync_handler+0x10c/0x138 [ 1406.525750] el0t_64_sync+0x1ac/0x1b0 [ 1406.525876] SMP: stopping secondary CPUs [ 1406.525965] Kernel Offset: disabled [ 1406.526032] CPU features: 0x0000,00000080,8e134ca1,9446773f [ 1406.526130] Memory Limit: none [ 1406.959099] ---[ end Kernel panic - not syncing: HYP panic: [ 1406.959099] PS:834003c9 PC:0000b1806db6d170 ESR:00000000f2000800 [ 1406.959099] FAR:ffff8000804be420 HPFAR:0000000000804be0 PAR:0000000000000000 [ 1406.959099] VCPU:0000000000000000 ] Signed-off-by: Ben Horgan Fixes: f28f1d02f4eaa ("KVM: arm64: Add a range to __pkvm_host_unshare_guest()") Cc: Vincent Donnefort Cc: Quentin Perret Cc: Ryan Roberts Cc: stable@vger.kernel.org Reviewed-by: Vincent Donnefort Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 8957734d6183..ddc8beb55eee 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -1010,9 +1010,12 @@ static int __check_host_shared_guest(struct pkvm_hyp_vm *vm, u64 *__phys, u64 ip return ret; if (!kvm_pte_valid(pte)) return -ENOENT; - if (kvm_granule_size(level) != size) + if (size && kvm_granule_size(level) != size) return -E2BIG; + if (!size) + size = kvm_granule_size(level); + state = guest_get_page_state(pte, ipa); if (state != PKVM_PAGE_SHARED_BORROWED) return -EPERM; @@ -1100,7 +1103,7 @@ int __pkvm_host_relax_perms_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu, enum kvm_ if (prot & ~KVM_PGTABLE_PROT_RWX) return -EINVAL; - assert_host_shared_guest(vm, ipa, PAGE_SIZE); + assert_host_shared_guest(vm, ipa, 0); guest_lock_component(vm); ret = kvm_pgtable_stage2_relax_perms(&vm->pgt, ipa, prot, 0); guest_unlock_component(vm); @@ -1156,7 +1159,7 @@ int __pkvm_host_mkyoung_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu) if (pkvm_hyp_vm_is_protected(vm)) return -EPERM; - assert_host_shared_guest(vm, ipa, PAGE_SIZE); + assert_host_shared_guest(vm, ipa, 0); guest_lock_component(vm); kvm_pgtable_stage2_mkyoung(&vm->pgt, ipa, 0); guest_unlock_component(vm); -- cgit v1.2.3 From f9ac33e45d57bc4aa365363ffb650c830e5bb325 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 9 Sep 2025 08:24:28 +0100 Subject: KVM: arm64: Add build-time check for duplicate DECLARE_REG use The DECLARE_REG() macro provides a convenient way to create a local variable initialized from a cpu context in the hyp trap handlers. However, a common error is to use the macro multiple times in the same scope with the same register index, but for different logical purposes. This results in valid C code that compiles without error, but introduces subtle bugs where a developer expects two different variables to hold values from two different registers, when in fact they are both sourced from the same one. To prevent this entire class of bugs, modify the DECLARE_REG() macro to declare a dummy variable whose name is derived from the register index. If the macro is used again with the same index in the same scope, the compiler will fail with a "redeclaration of variable" error, turning a subtle runtime bug into an obvious build-time failure. Signed-off-by: Fuad Tabba Tested-by: Mark Brown Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/include/nvhe/trap_handler.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h b/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h index 1e6d995968a1..ba5382c12787 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h +++ b/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h @@ -12,7 +12,8 @@ #include #define cpu_reg(ctxt, r) (ctxt)->regs.regs[r] -#define DECLARE_REG(type, name, ctxt, reg) \ +#define DECLARE_REG(type, name, ctxt, reg) \ + __always_unused int ___check_reg_ ## reg; \ type name = (type)cpu_reg(ctxt, (reg)) #endif /* __ARM64_KVM_NVHE_TRAP_HANDLER_H__ */ -- cgit v1.2.3 From 58dfb66b1e4cfb998db9e71437a2c0d9b83a93c0 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 9 Sep 2025 08:24:29 +0100 Subject: KVM: arm64: Rename pkvm.enabled to pkvm.is_protected The 'pkvm.enabled' field in struct kvm_protected_vm is confusingly named. Its purpose is to indicate whether a VM is a _protected_ VM under pKVM, and not whether the VM itself is enabled or running. For a non-protected VM, the VM can be fully active and running, yet this field would be false. This ambiguity can lead to incorrect assumptions about the VM's operational state and makes the code harder to reason about. Rename the field to 'is_protected' to make it unambiguous that the flag tracks the protected status of the VM. No functional change intended. Reviewed-by: Kunwu Chan Signed-off-by: Fuad Tabba Reviewed-by: Kunwu Chan Tested-by: Mark Brown Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 4 ++-- arch/arm64/kvm/hyp/nvhe/pkvm.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 2f2394cce24e..a4289c2f13f5 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -252,7 +252,7 @@ struct kvm_protected_vm { pkvm_handle_t handle; struct kvm_hyp_memcache teardown_mc; struct kvm_hyp_memcache stage2_teardown_mc; - bool enabled; + bool is_protected; }; struct kvm_mpidr_data { @@ -1548,7 +1548,7 @@ struct kvm *kvm_arch_alloc_vm(void); #define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE -#define kvm_vm_is_protected(kvm) (is_protected_kvm_enabled() && (kvm)->arch.pkvm.enabled) +#define kvm_vm_is_protected(kvm) (is_protected_kvm_enabled() && (kvm)->arch.pkvm.is_protected) #define vcpu_is_protected(vcpu) kvm_vm_is_protected((vcpu)->kvm) diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 338505cb0171..6198c1d27b5b 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -406,7 +406,7 @@ static void init_pkvm_hyp_vm(struct kvm *host_kvm, struct pkvm_hyp_vm *hyp_vm, hyp_vm->host_kvm = host_kvm; hyp_vm->kvm.created_vcpus = nr_vcpus; hyp_vm->kvm.arch.mmu.vtcr = host_mmu.arch.mmu.vtcr; - hyp_vm->kvm.arch.pkvm.enabled = READ_ONCE(host_kvm->arch.pkvm.enabled); + hyp_vm->kvm.arch.pkvm.is_protected = READ_ONCE(host_kvm->arch.pkvm.is_protected); hyp_vm->kvm.arch.flags = 0; pkvm_init_features_from_host(hyp_vm, host_kvm); } -- cgit v1.2.3 From 604a5032b454bde03200d755f6ecc3f724511c6a Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 9 Sep 2025 08:24:30 +0100 Subject: KVM: arm64: Rename 'host_kvm' to 'kvm' in pKVM host code In hypervisor (EL2) code, it is important to distinguish between the host's 'struct kvm' and a protected VM's 'struct kvm'. Using 'host_kvm' as variable name in that context makes this distinction clear. However, in the host kernel code (EL1), there is no such ambiguity. The code is only ever concerned with the host's own 'struct kvm' instance. The 'host_' prefix is therefore redundant and adds unnecessary verbosity. Simplify the code by renaming the 'host_kvm' parameter to 'kvm' in all functions within host-side kernel code (EL1). This improves readability and makes the naming consistent with other host-side kernel code. No functional change intended. Signed-off-by: Fuad Tabba Tested-by: Mark Brown Signed-off-by: Marc Zyngier --- arch/arm64/kvm/pkvm.c | 46 +++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c index fcd70bfe44fb..7aaeb66e3f39 100644 --- a/arch/arm64/kvm/pkvm.c +++ b/arch/arm64/kvm/pkvm.c @@ -85,16 +85,16 @@ void __init kvm_hyp_reserve(void) hyp_mem_base); } -static void __pkvm_destroy_hyp_vm(struct kvm *host_kvm) +static void __pkvm_destroy_hyp_vm(struct kvm *kvm) { - if (host_kvm->arch.pkvm.handle) { + if (kvm->arch.pkvm.handle) { WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm, - host_kvm->arch.pkvm.handle)); + kvm->arch.pkvm.handle)); } - host_kvm->arch.pkvm.handle = 0; - free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc); - free_hyp_memcache(&host_kvm->arch.pkvm.stage2_teardown_mc); + kvm->arch.pkvm.handle = 0; + free_hyp_memcache(&kvm->arch.pkvm.teardown_mc); + free_hyp_memcache(&kvm->arch.pkvm.stage2_teardown_mc); } static int __pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu) @@ -129,16 +129,16 @@ static int __pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu) * * Return 0 on success, negative error code on failure. */ -static int __pkvm_create_hyp_vm(struct kvm *host_kvm) +static int __pkvm_create_hyp_vm(struct kvm *kvm) { size_t pgd_sz, hyp_vm_sz; void *pgd, *hyp_vm; int ret; - if (host_kvm->created_vcpus < 1) + if (kvm->created_vcpus < 1) return -EINVAL; - pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.mmu.vtcr); + pgd_sz = kvm_pgtable_stage2_pgd_size(kvm->arch.mmu.vtcr); /* * The PGD pages will be reclaimed using a hyp_memcache which implies @@ -152,7 +152,7 @@ static int __pkvm_create_hyp_vm(struct kvm *host_kvm) /* Allocate memory to donate to hyp for vm and vcpu pointers. */ hyp_vm_sz = PAGE_ALIGN(size_add(PKVM_HYP_VM_SIZE, size_mul(sizeof(void *), - host_kvm->created_vcpus))); + kvm->created_vcpus))); hyp_vm = alloc_pages_exact(hyp_vm_sz, GFP_KERNEL_ACCOUNT); if (!hyp_vm) { ret = -ENOMEM; @@ -160,12 +160,12 @@ static int __pkvm_create_hyp_vm(struct kvm *host_kvm) } /* Donate the VM memory to hyp and let hyp initialize it. */ - ret = kvm_call_hyp_nvhe(__pkvm_init_vm, host_kvm, hyp_vm, pgd); + ret = kvm_call_hyp_nvhe(__pkvm_init_vm, kvm, hyp_vm, pgd); if (ret < 0) goto free_vm; - host_kvm->arch.pkvm.handle = ret; - host_kvm->arch.pkvm.stage2_teardown_mc.flags |= HYP_MEMCACHE_ACCOUNT_STAGE2; + kvm->arch.pkvm.handle = ret; + kvm->arch.pkvm.stage2_teardown_mc.flags |= HYP_MEMCACHE_ACCOUNT_STAGE2; kvm_account_pgtable_pages(pgd, pgd_sz / PAGE_SIZE); return 0; @@ -176,14 +176,14 @@ free_pgd: return ret; } -int pkvm_create_hyp_vm(struct kvm *host_kvm) +int pkvm_create_hyp_vm(struct kvm *kvm) { int ret = 0; - mutex_lock(&host_kvm->arch.config_lock); - if (!host_kvm->arch.pkvm.handle) - ret = __pkvm_create_hyp_vm(host_kvm); - mutex_unlock(&host_kvm->arch.config_lock); + mutex_lock(&kvm->arch.config_lock); + if (!kvm->arch.pkvm.handle) + ret = __pkvm_create_hyp_vm(kvm); + mutex_unlock(&kvm->arch.config_lock); return ret; } @@ -200,14 +200,14 @@ int pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu) return ret; } -void pkvm_destroy_hyp_vm(struct kvm *host_kvm) +void pkvm_destroy_hyp_vm(struct kvm *kvm) { - mutex_lock(&host_kvm->arch.config_lock); - __pkvm_destroy_hyp_vm(host_kvm); - mutex_unlock(&host_kvm->arch.config_lock); + mutex_lock(&kvm->arch.config_lock); + __pkvm_destroy_hyp_vm(kvm); + mutex_unlock(&kvm->arch.config_lock); } -int pkvm_init_host_vm(struct kvm *host_kvm) +int pkvm_init_host_vm(struct kvm *kvm) { return 0; } -- cgit v1.2.3 From 070362648f5f546018747a9a1857c1597594934e Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 9 Sep 2025 08:24:31 +0100 Subject: KVM: arm64: Clarify comments to distinguish pKVM mode from protected VMs The hypervisor code for protected KVM contains comments that are imprecise and at times flat-out wrong. They often refer to a "protected VM" in contexts where the code or data structure applies to _any_ VM managed by the hypervisor when pKVM is enabled. For instance, the 'vm_table' holds handles for all VMs known to the hypervisor, not exclusively for those that are configured as protected. This inaccurate terminology can make the code scope harder to understand for future (and current) developers. Clarify the comments throughout the pKVM hypervisor code to make a clear distinction between the pKVM feature itself (i.e., "protected mode") and the VMs that are specifically configured to be protected. This involves replacing ambiguous uses of "protected VM" with more accurate phrasing. No functional change intended. Signed-off-by: Fuad Tabba Tested-by: Mark Brown Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/include/nvhe/pkvm.h | 2 +- arch/arm64/kvm/hyp/nvhe/pkvm.c | 25 +++++++++++-------------- 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h index ce31d3b73603..4540324b5657 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h +++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h @@ -29,7 +29,7 @@ struct pkvm_hyp_vcpu { }; /* - * Holds the relevant data for running a protected vm. + * Holds the relevant data for running a vm in protected mode. */ struct pkvm_hyp_vm { struct kvm kvm; diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 6198c1d27b5b..abe173406c88 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -23,8 +23,8 @@ unsigned int kvm_arm_vmid_bits; unsigned int kvm_host_sve_max_vl; /* - * The currently loaded hyp vCPU for each physical CPU. Used only when - * protected KVM is enabled, but for both protected and non-protected VMs. + * The currently loaded hyp vCPU for each physical CPU. Used in protected mode + * for both protected and non-protected VMs. */ static DEFINE_PER_CPU(struct pkvm_hyp_vcpu *, loaded_hyp_vcpu); @@ -135,7 +135,7 @@ static int pkvm_check_pvm_cpu_features(struct kvm_vcpu *vcpu) { struct kvm *kvm = vcpu->kvm; - /* Protected KVM does not support AArch32 guests. */ + /* No AArch32 support for protected guests. */ if (kvm_has_feat(kvm, ID_AA64PFR0_EL1, EL0, AARCH32) || kvm_has_feat(kvm, ID_AA64PFR0_EL1, EL1, AARCH32)) return -EINVAL; @@ -210,8 +210,8 @@ static pkvm_handle_t idx_to_vm_handle(unsigned int idx) DEFINE_HYP_SPINLOCK(vm_table_lock); /* - * The table of VM entries for protected VMs in hyp. - * Allocated at hyp initialization and setup. + * A table that tracks all VMs in protected mode. + * Allocated during hyp initialization and setup. */ static struct pkvm_hyp_vm **vm_table; @@ -495,7 +495,7 @@ static int find_free_vm_table_entry(struct kvm *host_kvm) /* * Allocate a VM table entry and insert a pointer to the new vm. * - * Return a unique handle to the protected VM on success, + * Return a unique handle to the VM on success, * negative error code on failure. */ static pkvm_handle_t insert_vm_table_entry(struct kvm *host_kvm, @@ -594,10 +594,8 @@ static void unmap_donated_memory_noclear(void *va, size_t size) } /* - * Initialize the hypervisor copy of the protected VM state using the - * memory donated by the host. - * - * Unmaps the donated memory from the host at stage 2. + * Initialize the hypervisor copy of the VM state using host-donated memory. + * Unmap the donated memory from the host at stage 2. * * host_kvm: A pointer to the host's struct kvm. * vm_hva: The host va of the area being donated for the VM state. @@ -606,7 +604,7 @@ static void unmap_donated_memory_noclear(void *va, size_t size) * the VM. Must be page aligned. Its size is implied by the VM's * VTCR. * - * Return a unique handle to the protected VM on success, + * Return a unique handle to the VM on success, * negative error code on failure. */ int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva, @@ -668,10 +666,9 @@ err_unpin_kvm: } /* - * Initialize the hypervisor copy of the protected vCPU state using the - * memory donated by the host. + * Initialize the hypervisor copy of the vCPU state using host-donated memory. * - * handle: The handle for the protected vm. + * handle: The hypervisor handle for the vm. * host_vcpu: A pointer to the corresponding host vcpu. * vcpu_hva: The host va of the area being donated for the vcpu state. * Must be page aligned. The size of the area must be equal to -- cgit v1.2.3 From 3c45b67625357ac680ee2508493b697cdcd78128 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 9 Sep 2025 08:24:32 +0100 Subject: KVM: arm64: Decouple hyp VM creation state from its handle Currently, the presence of a pKVM handle (pkvm.handle != 0) is used to determine if the corresponding hypervisor (EL2) VM has been created and initialized. This couples the handle's lifecycle with the VM's creation state. This coupling will become problematic with upcoming changes that will allocate the pKVM handle earlier in the VM's life, before the VM is instantiated at the hypervisor. To prepare for this and make the state tracking explicit, decouple the two concepts. Introduce a new boolean flag, 'pkvm.is_created', to track whether the hypervisor-side VM has been created and initialized. A new helper, pkvm_hyp_vm_is_created(), is added to check this flag. All call sites that previously checked for the handle's existence are converted to use the new, explicit check. The 'is_created' flag is set to true upon successful creation in the hypervisor (EL2) and cleared upon destruction. Signed-off-by: Fuad Tabba Tested-by: Mark Brown Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/include/asm/kvm_pkvm.h | 1 + arch/arm64/kvm/hyp/nvhe/pkvm.c | 1 + arch/arm64/kvm/pkvm.c | 11 +++++++++-- 4 files changed, 12 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index a4289c2f13f5..bc57749e3fb9 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -253,6 +253,7 @@ struct kvm_protected_vm { struct kvm_hyp_memcache teardown_mc; struct kvm_hyp_memcache stage2_teardown_mc; bool is_protected; + bool is_created; }; struct kvm_mpidr_data { diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h index ea58282f59bb..08be89c95466 100644 --- a/arch/arm64/include/asm/kvm_pkvm.h +++ b/arch/arm64/include/asm/kvm_pkvm.h @@ -18,6 +18,7 @@ int pkvm_init_host_vm(struct kvm *kvm); int pkvm_create_hyp_vm(struct kvm *kvm); +bool pkvm_hyp_vm_is_created(struct kvm *kvm); void pkvm_destroy_hyp_vm(struct kvm *kvm); int pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index abe173406c88..969f6b293234 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -407,6 +407,7 @@ static void init_pkvm_hyp_vm(struct kvm *host_kvm, struct pkvm_hyp_vm *hyp_vm, hyp_vm->kvm.created_vcpus = nr_vcpus; hyp_vm->kvm.arch.mmu.vtcr = host_mmu.arch.mmu.vtcr; hyp_vm->kvm.arch.pkvm.is_protected = READ_ONCE(host_kvm->arch.pkvm.is_protected); + hyp_vm->kvm.arch.pkvm.is_created = true; hyp_vm->kvm.arch.flags = 0; pkvm_init_features_from_host(hyp_vm, host_kvm); } diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c index 7aaeb66e3f39..45d699bba96a 100644 --- a/arch/arm64/kvm/pkvm.c +++ b/arch/arm64/kvm/pkvm.c @@ -87,12 +87,13 @@ void __init kvm_hyp_reserve(void) static void __pkvm_destroy_hyp_vm(struct kvm *kvm) { - if (kvm->arch.pkvm.handle) { + if (pkvm_hyp_vm_is_created(kvm)) { WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm, kvm->arch.pkvm.handle)); } kvm->arch.pkvm.handle = 0; + kvm->arch.pkvm.is_created = false; free_hyp_memcache(&kvm->arch.pkvm.teardown_mc); free_hyp_memcache(&kvm->arch.pkvm.stage2_teardown_mc); } @@ -165,6 +166,7 @@ static int __pkvm_create_hyp_vm(struct kvm *kvm) goto free_vm; kvm->arch.pkvm.handle = ret; + kvm->arch.pkvm.is_created = true; kvm->arch.pkvm.stage2_teardown_mc.flags |= HYP_MEMCACHE_ACCOUNT_STAGE2; kvm_account_pgtable_pages(pgd, pgd_sz / PAGE_SIZE); @@ -176,12 +178,17 @@ free_pgd: return ret; } +bool pkvm_hyp_vm_is_created(struct kvm *kvm) +{ + return READ_ONCE(kvm->arch.pkvm.is_created); +} + int pkvm_create_hyp_vm(struct kvm *kvm) { int ret = 0; mutex_lock(&kvm->arch.config_lock); - if (!kvm->arch.pkvm.handle) + if (!pkvm_hyp_vm_is_created(kvm)) ret = __pkvm_create_hyp_vm(kvm); mutex_unlock(&kvm->arch.config_lock); -- cgit v1.2.3 From 1abc1ad52989fcc45a0de68bc49656d9fd0c2d74 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 9 Sep 2025 08:24:33 +0100 Subject: KVM: arm64: Separate allocation and insertion of pKVM VM table entries The current insert_vm_table_entry() function performs two actions at once: it finds a free slot in the pKVM VM table and populates it with the pkvm_hyp_vm pointer. Refactor this function as a preparatory step for future work that will require reserving a VM slot and its corresponding handle earlier in the VM lifecycle, before the pkvm_hyp_vm structure is initialized and ready to be inserted. Split the function into a two-phase process: - A new allocate_vm_table_entry() function finds an empty slot, marks it as reserved with a RESERVED_ENTRY placeholder, and returns a handle derived from the slot's index. - The insert_vm_table_entry() function is repurposed to take the handle, validate that the corresponding slot is in the reserved state, and then populate it with the pkvm_hyp_vm pointer. Signed-off-by: Fuad Tabba Tested-by: Mark Brown Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/pkvm.c | 52 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 43 insertions(+), 9 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 969f6b293234..64b760d30d05 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -192,6 +192,11 @@ static int pkvm_vcpu_init_traps(struct pkvm_hyp_vcpu *hyp_vcpu) */ #define HANDLE_OFFSET 0x1000 +/* + * Marks a reserved but not yet used entry in the VM table. + */ +#define RESERVED_ENTRY ((void *)0xa110ca7ed) + static unsigned int vm_handle_to_idx(pkvm_handle_t handle) { return handle - HANDLE_OFFSET; @@ -231,6 +236,10 @@ static struct pkvm_hyp_vm *get_vm_by_handle(pkvm_handle_t handle) if (unlikely(idx >= KVM_MAX_PVMS)) return NULL; + /* A reserved entry doesn't represent an initialized VM. */ + if (unlikely(vm_table[idx] == RESERVED_ENTRY)) + return NULL; + return vm_table[idx]; } @@ -481,7 +490,7 @@ done: return ret; } -static int find_free_vm_table_entry(struct kvm *host_kvm) +static int find_free_vm_table_entry(void) { int i; @@ -494,15 +503,13 @@ static int find_free_vm_table_entry(struct kvm *host_kvm) } /* - * Allocate a VM table entry and insert a pointer to the new vm. + * Reserve a VM table entry. * * Return a unique handle to the VM on success, * negative error code on failure. */ -static pkvm_handle_t insert_vm_table_entry(struct kvm *host_kvm, - struct pkvm_hyp_vm *hyp_vm) +static int allocate_vm_table_entry(void) { - struct kvm_s2_mmu *mmu = &hyp_vm->kvm.arch.mmu; int idx; hyp_assert_lock_held(&vm_table_lock); @@ -515,10 +522,30 @@ static pkvm_handle_t insert_vm_table_entry(struct kvm *host_kvm, if (unlikely(!vm_table)) return -EINVAL; - idx = find_free_vm_table_entry(host_kvm); - if (idx < 0) + idx = find_free_vm_table_entry(); + if (unlikely(idx < 0)) return idx; + vm_table[idx] = RESERVED_ENTRY; + + return idx; +} + +/* + * Insert a pointer to the new VM into the VM table. + * + * Return 0 on success, or negative error code on failure. + */ +static int insert_vm_table_entry(struct kvm *host_kvm, + struct pkvm_hyp_vm *hyp_vm, + pkvm_handle_t handle) +{ + struct kvm_s2_mmu *mmu = &hyp_vm->kvm.arch.mmu; + unsigned int idx; + + hyp_assert_lock_held(&vm_table_lock); + + idx = vm_handle_to_idx(handle); hyp_vm->kvm.arch.pkvm.handle = idx_to_vm_handle(idx); /* VMID 0 is reserved for the host */ @@ -528,7 +555,7 @@ static pkvm_handle_t insert_vm_table_entry(struct kvm *host_kvm, mmu->pgt = &hyp_vm->pgt; vm_table[idx] = hyp_vm; - return hyp_vm->kvm.arch.pkvm.handle; + return 0; } /* @@ -614,6 +641,7 @@ int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva, struct pkvm_hyp_vm *hyp_vm = NULL; size_t vm_size, pgd_size; unsigned int nr_vcpus; + pkvm_handle_t handle; void *pgd = NULL; int ret; @@ -643,10 +671,16 @@ int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva, init_pkvm_hyp_vm(host_kvm, hyp_vm, nr_vcpus); hyp_spin_lock(&vm_table_lock); - ret = insert_vm_table_entry(host_kvm, hyp_vm); + ret = allocate_vm_table_entry(); if (ret < 0) goto err_unlock; + handle = idx_to_vm_handle(ret); + + ret = insert_vm_table_entry(host_kvm, hyp_vm, handle); + if (ret) + goto err_unlock; + ret = kvm_guest_prepare_stage2(hyp_vm, pgd); if (ret) goto err_remove_vm_table_entry; -- cgit v1.2.3 From 814fd6beacf3c105ab8c8796be07d740952899fe Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 9 Sep 2025 08:24:34 +0100 Subject: KVM: arm64: Consolidate pKVM hypervisor VM initialization logic The insert_vm_table_entry() function was performing tasks beyond its primary responsibility. In addition to inserting a VM pointer into the vm_table, it was also initializing several fields within 'struct pkvm_hyp_vm', such as the VMID and stage-2 MMU pointers. This mixing of concerns made the code harder to follow. As another preparatory step towards allowing a VM table entry to be reserved before the VM is fully created, this logic must be cleaned up. By separating table insertion from state initialization, we can control the timing of the initialization step more precisely in subsequent patches. Refactor the code to consolidate all initialization logic into init_pkvm_hyp_vm(): - Move the initialization of the handle, VMID, and MMU fields from insert_vm_table_entry() to init_pkvm_hyp_vm(). - Simplify insert_vm_table_entry() to perform only one action: placing the provided pkvm_hyp_vm pointer into the vm_table. - Update the calling sequence in __pkvm_init_vm() to first allocate an entry in the VM table, initialize the VM, and then insert the VM into the VM table. This is all protected by the vm_table_lock for now. Subsequent patches will adjust the sequence and not hold the vm_table_lock while initializing the VM at the hypervisor (init_pkvm_hyp_vm()). Signed-off-by: Fuad Tabba Tested-by: Mark Brown Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/pkvm.c | 47 +++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 64b760d30d05..a9abbeb530f0 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -410,15 +410,26 @@ static void unpin_host_vcpus(struct pkvm_hyp_vcpu *hyp_vcpus[], } static void init_pkvm_hyp_vm(struct kvm *host_kvm, struct pkvm_hyp_vm *hyp_vm, - unsigned int nr_vcpus) + unsigned int nr_vcpus, pkvm_handle_t handle) { + struct kvm_s2_mmu *mmu = &hyp_vm->kvm.arch.mmu; + int idx = vm_handle_to_idx(handle); + + hyp_vm->kvm.arch.pkvm.handle = handle; + hyp_vm->host_kvm = host_kvm; hyp_vm->kvm.created_vcpus = nr_vcpus; - hyp_vm->kvm.arch.mmu.vtcr = host_mmu.arch.mmu.vtcr; hyp_vm->kvm.arch.pkvm.is_protected = READ_ONCE(host_kvm->arch.pkvm.is_protected); hyp_vm->kvm.arch.pkvm.is_created = true; hyp_vm->kvm.arch.flags = 0; pkvm_init_features_from_host(hyp_vm, host_kvm); + + /* VMID 0 is reserved for the host */ + atomic64_set(&mmu->vmid.id, idx + 1); + + mmu->vtcr = host_mmu.arch.mmu.vtcr; + mmu->arch = &hyp_vm->kvm.arch; + mmu->pgt = &hyp_vm->pgt; } static int pkvm_vcpu_init_sve(struct pkvm_hyp_vcpu *hyp_vcpu, struct kvm_vcpu *host_vcpu) @@ -532,29 +543,19 @@ static int allocate_vm_table_entry(void) } /* - * Insert a pointer to the new VM into the VM table. + * Insert a pointer to the initialized VM into the VM table. * * Return 0 on success, or negative error code on failure. */ -static int insert_vm_table_entry(struct kvm *host_kvm, - struct pkvm_hyp_vm *hyp_vm, - pkvm_handle_t handle) +static int insert_vm_table_entry(pkvm_handle_t handle, + struct pkvm_hyp_vm *hyp_vm) { - struct kvm_s2_mmu *mmu = &hyp_vm->kvm.arch.mmu; unsigned int idx; hyp_assert_lock_held(&vm_table_lock); - idx = vm_handle_to_idx(handle); - hyp_vm->kvm.arch.pkvm.handle = idx_to_vm_handle(idx); - - /* VMID 0 is reserved for the host */ - atomic64_set(&mmu->vmid.id, idx + 1); - - mmu->arch = &hyp_vm->kvm.arch; - mmu->pgt = &hyp_vm->pgt; - vm_table[idx] = hyp_vm; + return 0; } @@ -668,8 +669,6 @@ int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva, if (!pgd) goto err_remove_mappings; - init_pkvm_hyp_vm(host_kvm, hyp_vm, nr_vcpus); - hyp_spin_lock(&vm_table_lock); ret = allocate_vm_table_entry(); if (ret < 0) @@ -677,19 +676,21 @@ int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva, handle = idx_to_vm_handle(ret); - ret = insert_vm_table_entry(host_kvm, hyp_vm, handle); - if (ret) - goto err_unlock; + init_pkvm_hyp_vm(host_kvm, hyp_vm, nr_vcpus, handle); ret = kvm_guest_prepare_stage2(hyp_vm, pgd); + if (ret) + goto err_remove_vm_table_entry; + + ret = insert_vm_table_entry(handle, hyp_vm); if (ret) goto err_remove_vm_table_entry; hyp_spin_unlock(&vm_table_lock); - return hyp_vm->kvm.arch.pkvm.handle; + return handle; err_remove_vm_table_entry: - remove_vm_table_entry(hyp_vm->kvm.arch.pkvm.handle); + remove_vm_table_entry(handle); err_unlock: hyp_spin_unlock(&vm_table_lock); err_remove_mappings: -- cgit v1.2.3 From 256b4668cd890b741c54f83dbbef76ba847c23be Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 9 Sep 2025 08:24:35 +0100 Subject: KVM: arm64: Introduce separate hypercalls for pKVM VM reservation and initialization The existing __pkvm_init_vm hypercall performs both the reservation of a VM table entry and the initialization of the hypervisor VM state in a single operation. This design prevents the host from obtaining a VM handle from the hypervisor until all preparation for the creation and the initialization of the VM is done, which is on the first vCPU run operation. To support more flexible VM lifecycle management, the host needs the ability to reserve a handle early, before the first vCPU run. Refactor the hypercall interface to enable this, splitting the single hypercall into a two-stage process: - __pkvm_reserve_vm: A new hypercall that allocates a slot in the hypervisor's vm_table, marks it as reserved, and returns a unique handle to the host. - __pkvm_unreserve_vm: A corresponding cleanup hypercall to safely release the reservation if the host fails to proceed with full initialization. - __pkvm_init_vm: The existing hypercall is modified to no longer allocate a slot. It now expects a pre-reserved handle and commits the donated VM memory to that slot. For now, the host-side code in __pkvm_create_hyp_vm calls the new reserve and init hypercalls back-to-back to maintain existing behavior. This paves the way for subsequent patches to separate the reservation and initialization steps in the VM's lifecycle. Signed-off-by: Fuad Tabba Tested-by: Mark Brown Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_asm.h | 2 + arch/arm64/kvm/hyp/include/nvhe/pkvm.h | 2 + arch/arm64/kvm/hyp/nvhe/hyp-main.c | 14 +++++ arch/arm64/kvm/hyp/nvhe/pkvm.c | 102 ++++++++++++++++++++++++++------- arch/arm64/kvm/pkvm.c | 12 +++- 5 files changed, 108 insertions(+), 24 deletions(-) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index bec227f9500a..9da54d4ee49e 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -81,6 +81,8 @@ enum __kvm_host_smccc_func { __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff, __KVM_HOST_SMCCC_FUNC___vgic_v3_save_vmcr_aprs, __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_vmcr_aprs, + __KVM_HOST_SMCCC_FUNC___pkvm_reserve_vm, + __KVM_HOST_SMCCC_FUNC___pkvm_unreserve_vm, __KVM_HOST_SMCCC_FUNC___pkvm_init_vm, __KVM_HOST_SMCCC_FUNC___pkvm_init_vcpu, __KVM_HOST_SMCCC_FUNC___pkvm_teardown_vm, diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h index 4540324b5657..184ad7a39950 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h +++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h @@ -67,6 +67,8 @@ static inline bool pkvm_hyp_vm_is_protected(struct pkvm_hyp_vm *hyp_vm) void pkvm_hyp_vm_table_init(void *tbl); +int __pkvm_reserve_vm(void); +void __pkvm_unreserve_vm(pkvm_handle_t handle); int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva, unsigned long pgd_hva); int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu, diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 3206b2c07f82..29430c031095 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -546,6 +546,18 @@ static void handle___pkvm_prot_finalize(struct kvm_cpu_context *host_ctxt) cpu_reg(host_ctxt, 1) = __pkvm_prot_finalize(); } +static void handle___pkvm_reserve_vm(struct kvm_cpu_context *host_ctxt) +{ + cpu_reg(host_ctxt, 1) = __pkvm_reserve_vm(); +} + +static void handle___pkvm_unreserve_vm(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1); + + __pkvm_unreserve_vm(handle); +} + static void handle___pkvm_init_vm(struct kvm_cpu_context *host_ctxt) { DECLARE_REG(struct kvm *, host_kvm, host_ctxt, 1); @@ -606,6 +618,8 @@ static const hcall_t host_hcall[] = { HANDLE_FUNC(__kvm_timer_set_cntvoff), HANDLE_FUNC(__vgic_v3_save_vmcr_aprs), HANDLE_FUNC(__vgic_v3_restore_vmcr_aprs), + HANDLE_FUNC(__pkvm_reserve_vm), + HANDLE_FUNC(__pkvm_unreserve_vm), HANDLE_FUNC(__pkvm_init_vm), HANDLE_FUNC(__pkvm_init_vcpu), HANDLE_FUNC(__pkvm_teardown_vm), diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index a9abbeb530f0..05774aed09cb 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -542,6 +542,33 @@ static int allocate_vm_table_entry(void) return idx; } +static int __insert_vm_table_entry(pkvm_handle_t handle, + struct pkvm_hyp_vm *hyp_vm) +{ + unsigned int idx; + + hyp_assert_lock_held(&vm_table_lock); + + /* + * Initializing protected state might have failed, yet a malicious + * host could trigger this function. Thus, ensure that 'vm_table' + * exists. + */ + if (unlikely(!vm_table)) + return -EINVAL; + + idx = vm_handle_to_idx(handle); + if (unlikely(idx >= KVM_MAX_PVMS)) + return -EINVAL; + + if (unlikely(vm_table[idx] != RESERVED_ENTRY)) + return -EINVAL; + + vm_table[idx] = hyp_vm; + + return 0; +} + /* * Insert a pointer to the initialized VM into the VM table. * @@ -550,13 +577,13 @@ static int allocate_vm_table_entry(void) static int insert_vm_table_entry(pkvm_handle_t handle, struct pkvm_hyp_vm *hyp_vm) { - unsigned int idx; + int ret; - hyp_assert_lock_held(&vm_table_lock); - idx = vm_handle_to_idx(handle); - vm_table[idx] = hyp_vm; + hyp_spin_lock(&vm_table_lock); + ret = __insert_vm_table_entry(handle, hyp_vm); + hyp_spin_unlock(&vm_table_lock); - return 0; + return ret; } /* @@ -622,8 +649,45 @@ static void unmap_donated_memory_noclear(void *va, size_t size) __unmap_donated_memory(va, size); } +/* + * Reserves an entry in the hypervisor for a new VM in protected mode. + * + * Return a unique handle to the VM on success, negative error code on failure. + */ +int __pkvm_reserve_vm(void) +{ + int ret; + + hyp_spin_lock(&vm_table_lock); + ret = allocate_vm_table_entry(); + hyp_spin_unlock(&vm_table_lock); + + if (ret < 0) + return ret; + + return idx_to_vm_handle(ret); +} + +/* + * Removes a reserved entry, but only if is hasn't been used yet. + * Otherwise, the VM needs to be destroyed. + */ +void __pkvm_unreserve_vm(pkvm_handle_t handle) +{ + unsigned int idx = vm_handle_to_idx(handle); + + if (unlikely(!vm_table)) + return; + + hyp_spin_lock(&vm_table_lock); + if (likely(idx < KVM_MAX_PVMS && vm_table[idx] == RESERVED_ENTRY)) + remove_vm_table_entry(handle); + hyp_spin_unlock(&vm_table_lock); +} + /* * Initialize the hypervisor copy of the VM state using host-donated memory. + * * Unmap the donated memory from the host at stage 2. * * host_kvm: A pointer to the host's struct kvm. @@ -633,8 +697,7 @@ static void unmap_donated_memory_noclear(void *va, size_t size) * the VM. Must be page aligned. Its size is implied by the VM's * VTCR. * - * Return a unique handle to the VM on success, - * negative error code on failure. + * Return 0 success, negative error code on failure. */ int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva, unsigned long pgd_hva) @@ -656,6 +719,12 @@ int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva, goto err_unpin_kvm; } + handle = READ_ONCE(host_kvm->arch.pkvm.handle); + if (unlikely(handle < HANDLE_OFFSET)) { + ret = -EINVAL; + goto err_unpin_kvm; + } + vm_size = pkvm_get_hyp_vm_size(nr_vcpus); pgd_size = kvm_pgtable_stage2_pgd_size(host_mmu.arch.mmu.vtcr); @@ -669,30 +738,19 @@ int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva, if (!pgd) goto err_remove_mappings; - hyp_spin_lock(&vm_table_lock); - ret = allocate_vm_table_entry(); - if (ret < 0) - goto err_unlock; - - handle = idx_to_vm_handle(ret); - init_pkvm_hyp_vm(host_kvm, hyp_vm, nr_vcpus, handle); ret = kvm_guest_prepare_stage2(hyp_vm, pgd); if (ret) - goto err_remove_vm_table_entry; + goto err_remove_mappings; + /* Must be called last since this publishes the VM. */ ret = insert_vm_table_entry(handle, hyp_vm); if (ret) - goto err_remove_vm_table_entry; - hyp_spin_unlock(&vm_table_lock); + goto err_remove_mappings; - return handle; + return 0; -err_remove_vm_table_entry: - remove_vm_table_entry(handle); -err_unlock: - hyp_spin_unlock(&vm_table_lock); err_remove_mappings: unmap_donated_memory(hyp_vm, vm_size); unmap_donated_memory(pgd, pgd_size); diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c index 45d699bba96a..082bc15f436c 100644 --- a/arch/arm64/kvm/pkvm.c +++ b/arch/arm64/kvm/pkvm.c @@ -160,17 +160,25 @@ static int __pkvm_create_hyp_vm(struct kvm *kvm) goto free_pgd; } - /* Donate the VM memory to hyp and let hyp initialize it. */ - ret = kvm_call_hyp_nvhe(__pkvm_init_vm, kvm, hyp_vm, pgd); + /* Reserve the VM in hyp and obtain a hyp handle for the VM. */ + ret = kvm_call_hyp_nvhe(__pkvm_reserve_vm); if (ret < 0) goto free_vm; kvm->arch.pkvm.handle = ret; + + /* Donate the VM memory to hyp and let hyp initialize it. */ + ret = kvm_call_hyp_nvhe(__pkvm_init_vm, kvm, hyp_vm, pgd); + if (ret) + goto unreserve_vm; + kvm->arch.pkvm.is_created = true; kvm->arch.pkvm.stage2_teardown_mc.flags |= HYP_MEMCACHE_ACCOUNT_STAGE2; kvm_account_pgtable_pages(pgd, pgd_sz / PAGE_SIZE); return 0; +unreserve_vm: + kvm_call_hyp_nvhe(__pkvm_unreserve_vm, kvm->arch.pkvm.handle); free_vm: free_pages_exact(hyp_vm, hyp_vm_sz); free_pgd: -- cgit v1.2.3 From 07aeb70707b1d52968f4959a5dba321ea4219c8a Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 9 Sep 2025 08:24:36 +0100 Subject: KVM: arm64: Reserve pKVM handle during pkvm_init_host_vm() When a pKVM guest is active, TLB invalidations triggered by host MMU notifiers require a valid hypervisor handle. Currently, this handle is only allocated when the first vCPU is run. However, the guest's memory is associated with the host MMU much earlier, during kvm_arch_init_vm(). This creates a window where an MMU invalidation could occur after the kvm_pgtable pointer checked by the notifi