aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu
diff options
context:
space:
mode:
authorAndrew Martin <andrew.martin@amd.com>2026-05-28 12:54:39 -0400
committerAlex Deucher <alexander.deucher@amd.com>2026-06-03 14:54:46 -0400
commit352ea59028ea48a6fff77f19ae28f98f71946a80 (patch)
treed7395bfaef2d808b5516283902cbe0eac286124f /drivers/gpu
parent2bd550b547deabef98bd3b017ff743b7c34d3a6d (diff)
drm/amdkfd: Fix buffer overflow in SDMA queue checkpoint/restore on GFX11
The v11 MQD manager incorrectly assigned the CP-compute variants of checkpoint_mqd/restore_mqd for KFD_MQD_TYPE_SDMA queues. These functions use sizeof(struct v11_compute_mqd) (2048 bytes) instead of sizeof(struct v11_sdma_mqd) (512 bytes), causing a 1536-byte overflow. During CRIU checkpoint of an SDMA queue on Navi3x: - checkpoint_mqd() reads 2048 bytes from a 512-byte SDMA MQD buffer, leaking 1536 bytes of adjacent GTT memory to userspace During CRIU restore: - restore_mqd() writes 2048 bytes into a 512-byte SDMA MQD buffer, corrupting 1536 bytes of adjacent GTT memory (often the ring buffer or neighboring MQDs) This is a copy-paste regression unique to v11. All other ASIC backends (cik, vi, v9, v10, v12) correctly use the SDMA-specific variants. Add checkpoint_mqd_sdma() and restore_mqd_sdma() functions that properly handle the smaller v11_sdma_mqd structure, matching the pattern used in other MQD managers. Fixes: cc009e613de6 ("drm/amdkfd: Add KFD support for soc21 v3") Assisted-by: Claude:Sonnet 4-5 Signed-off-by: Andrew Martin <andrew.martin@amd.com> Acked-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> (cherry picked from commit 6fa41db7ffdec97d62433adf03b7b9b759af8c2c) Cc: stable@vger.kernel.org
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c49
1 files changed, 41 insertions, 8 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
index a1e3cf2384dd..527c531676e4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
@@ -320,8 +320,7 @@ static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, voi
static void restore_mqd(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
- struct queue_properties *qp,
- const void *mqd_src,
+ struct queue_properties *qp, const void *mqd_src,
const void *ctl_stack_src, const u32 ctl_stack_size)
{
uint64_t addr;
@@ -337,14 +336,48 @@ static void restore_mqd(struct mqd_manager *mm, void **mqd,
*gart_addr = addr;
m->cp_hqd_pq_doorbell_control =
- qp->doorbell_off <<
- CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
- pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
- m->cp_hqd_pq_doorbell_control);
+ qp->doorbell_off << CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+ pr_debug("cp_hqd_pq_doorbell_control 0x%x\n", m->cp_hqd_pq_doorbell_control);
qp->is_active = 0;
}
+static void checkpoint_mqd_sdma(struct mqd_manager *mm,
+ void *mqd,
+ void *mqd_dst,
+ void *ctl_stack_dst)
+{
+ struct v11_sdma_mqd *m;
+
+ m = get_sdma_mqd(mqd);
+
+ memcpy(mqd_dst, m, sizeof(struct v11_sdma_mqd));
+}
+
+static void restore_mqd_sdma(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *qp,
+ const void *mqd_src,
+ const void *ctl_stack_src,
+ const u32 ctl_stack_size)
+{
+ uint64_t addr;
+ struct v11_sdma_mqd *m;
+
+ m = (struct v11_sdma_mqd *) mqd_mem_obj->cpu_ptr;
+ addr = mqd_mem_obj->gpu_addr;
+
+ memcpy(m, mqd_src, sizeof(*m));
+
+ m->sdmax_rlcx_doorbell_offset =
+ qp->doorbell_off << SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT;
+
+ *mqd = m;
+ if (gart_addr)
+ *gart_addr = addr;
+
+ qp->is_active = 0;
+}
static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
@@ -529,8 +562,8 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type,
mqd->update_mqd = update_mqd_sdma;
mqd->destroy_mqd = kfd_destroy_mqd_sdma;
mqd->is_occupied = kfd_is_occupied_sdma;
- mqd->checkpoint_mqd = checkpoint_mqd;
- mqd->restore_mqd = restore_mqd;
+ mqd->checkpoint_mqd = checkpoint_mqd_sdma;
+ mqd->restore_mqd = restore_mqd_sdma;
mqd->mqd_size = sizeof(struct v11_sdma_mqd);
mqd->mqd_stride = kfd_mqd_stride;
#if defined(CONFIG_DEBUG_FS)