aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu
diff options
context:
space:
mode:
authorAlex Deucher <alexander.deucher@amd.com>2025-12-01 14:46:53 -0500
committerAlex Deucher <alexander.deucher@amd.com>2025-12-08 14:14:38 -0500
commitd3ff65243a52afa85166abaa8d00a44c17691dbd (patch)
tree6ae38427430b7ce215e3435ba9e734ef9202743e /drivers/gpu
parenta50d32c41fb25d772cc1c47b6abed8fb811d58c2 (diff)
drm/amdgpu: add a helper for processing recoverable GPUVM faults
Add a common helper to remove the repeated logic from each gmc module. Suggested-by: Lijo Lazar <lijo.lazar@amd.com> Reviewed-by: Lijo Lazar <lijo.lazar@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c48
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c23
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c23
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c23
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c43
6 files changed, 69 insertions, 97 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 4abed753fc2d..8ac92e7bed31 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -524,6 +524,54 @@ void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr,
} while (fault->timestamp < tmp);
}
+int amdgpu_gmc_handle_retry_fault(struct amdgpu_device *adev,
+ struct amdgpu_iv_entry *entry,
+ u64 addr,
+ u32 cam_index,
+ u32 node_id,
+ bool write_fault)
+{
+ int ret;
+
+ if (adev->irq.retry_cam_enabled) {
+ /* Delegate it to a different ring if the hardware hasn't
+ * already done it.
+ */
+ if (entry->ih == &adev->irq.ih) {
+ amdgpu_irq_delegate(adev, entry, 8);
+ return 1;
+ }
+
+ ret = amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id,
+ addr, entry->timestamp, write_fault);
+ WDOORBELL32(adev->irq.retry_cam_doorbell_index, cam_index);
+ if (ret)
+ return 1;
+ } else {
+ /* Process it only if it's the first fault for this address */
+ if (entry->ih != &adev->irq.ih_soft &&
+ amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid,
+ entry->timestamp))
+ return 1;
+
+ /* Delegate it to a different ring if the hardware hasn't
+ * already done it.
+ */
+ if (entry->ih == &adev->irq.ih) {
+ amdgpu_irq_delegate(adev, entry, 8);
+ return 1;
+ }
+
+ /* Try to handle the recoverable page faults by filling page
+ * tables
+ */
+ if (amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id,
+ addr, entry->timestamp, write_fault))
+ return 1;
+ }
+ return 0;
+}
+
int amdgpu_gmc_ras_sw_init(struct amdgpu_device *adev)
{
int r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index b62fa7e92c79..e8e8bfa098c3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -425,6 +425,12 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev,
uint16_t pasid, uint64_t timestamp);
void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr,
uint16_t pasid);
+int amdgpu_gmc_handle_retry_fault(struct amdgpu_device *adev,
+ struct amdgpu_iv_entry *entry,
+ u64 addr,
+ u32 cam_index,
+ u32 node_id,
+ bool write_fault);
int amdgpu_gmc_ras_sw_init(struct amdgpu_device *adev);
int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev);
void amdgpu_gmc_ras_fini(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 47558e572553..0b385a15194d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -115,27 +115,10 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
addr |= ((u64)entry->src_data[1] & 0xf) << 44;
if (retry_fault) {
+ int ret = amdgpu_gmc_handle_retry_fault(adev, entry, addr, 0, 0,
+ write_fault);
/* Returning 1 here also prevents sending the IV to the KFD */
-
- /* Process it only if it's the first fault for this address */
- if (entry->ih != &adev->irq.ih_soft &&
- amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid,
- entry->timestamp))
- return 1;
-
- /* Delegate it to a different ring if the hardware hasn't
- * already done it.
- */
- if (entry->ih == &adev->irq.ih) {
- amdgpu_irq_delegate(adev, entry, 8);
- return 1;
- }
-
- /* Try to handle the recoverable page faults by filling page
- * tables
- */
- if (amdgpu_vm_handle_fault(adev, entry->pasid, 0, 0, addr,
- entry->timestamp, write_fault))
+ if (ret == 1)
return 1;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index ba59ee8e398a..7a1f0742754a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -114,27 +114,10 @@ static int gmc_v11_0_process_interrupt(struct amdgpu_device *adev,
addr |= ((u64)entry->src_data[1] & 0xf) << 44;
if (retry_fault) {
+ int ret = amdgpu_gmc_handle_retry_fault(adev, entry, addr, 0, 0,
+ write_fault);
/* Returning 1 here also prevents sending the IV to the KFD */
-
- /* Process it only if it's the first fault for this address */
- if (entry->ih != &adev->irq.ih_soft &&
- amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid,
- entry->timestamp))
- return 1;
-
- /* Delegate it to a different ring if the hardware hasn't
- * already done it.
- */
- if (entry->ih == &adev->irq.ih) {
- amdgpu_irq_delegate(adev, entry, 8);
- return 1;
- }
-
- /* Try to handle the recoverable page faults by filling page
- * tables
- */
- if (amdgpu_vm_handle_fault(adev, entry->pasid, 0, 0, addr,
- entry->timestamp, write_fault))
+ if (ret == 1)
return 1;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
index dfb06baea1ff..145fcefd1c78 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
@@ -110,27 +110,10 @@ static int gmc_v12_0_process_interrupt(struct amdgpu_device *adev,
hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
if (retry_fault) {
+ int ret = amdgpu_gmc_handle_retry_fault(adev, entry, addr, 0, 0,
+ write_fault);
/* Returning 1 here also prevents sending the IV to the KFD */
-
- /* Process it only if it's the first fault for this address */
- if (entry->ih != &adev->irq.ih_soft &&
- amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid,
- entry->timestamp))
- return 1;
-
- /* Delegate it to a different ring if the hardware hasn't
- * already done it.
- */
- if (entry->ih == &adev->irq.ih) {
- amdgpu_irq_delegate(adev, entry, 8);
- return 1;
- }
-
- /* Try to handle the recoverable page faults by filling page
- * tables
- */
- if (amdgpu_vm_handle_fault(adev, entry->pasid, 0, 0, addr,
- entry->timestamp, write_fault))
+ if (ret == 1)
return 1;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 778ad7ac6d08..97a04e3171f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -583,44 +583,13 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
hub = &adev->vmhub[vmhub];
if (retry_fault) {
- if (adev->irq.retry_cam_enabled) {
- /* Delegate it to a different ring if the hardware hasn't
- * already done it.
- */
- if (entry->ih == &adev->irq.ih) {
- amdgpu_irq_delegate(adev, entry, 8);
- return 1;
- }
-
- cam_index = entry->src_data[2] & 0x3ff;
+ cam_index = entry->src_data[2] & 0x3ff;
- ret = amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id,
- addr, entry->timestamp, write_fault);
- WDOORBELL32(adev->irq.retry_cam_doorbell_index, cam_index);
- if (ret)
- return 1;
- } else {
- /* Process it only if it's the first fault for this address */
- if (entry->ih != &adev->irq.ih_soft &&
- amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid,
- entry->timestamp))
- return 1;
-
- /* Delegate it to a different ring if the hardware hasn't
- * already done it.
- */
- if (entry->ih == &adev->irq.ih) {
- amdgpu_irq_delegate(adev, entry, 8);
- return 1;
- }
-
- /* Try to handle the recoverable page faults by filling page
- * tables
- */
- if (amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id,
- addr, entry->timestamp, write_fault))
- return 1;
- }
+ ret = amdgpu_gmc_handle_retry_fault(adev, entry, addr, cam_index, node_id,
+ write_fault);
+ /* Returning 1 here also prevents sending the IV to the KFD */
+ if (ret == 1)
+ return 1;
}
if (kgd2kfd_vmfault_fast_path(adev, entry, retry_fault))