aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichal Wajdeczko <michal.wajdeczko@intel.com>2026-01-12 19:37:16 +0100
committerMichal Wajdeczko <michal.wajdeczko@intel.com>2026-01-14 16:02:50 +0100
commitdef675cf3f107ba8da78ca0b8650997fdf667538 (patch)
tree9eaa27752619cbed23963efc9c3ed40e08c43349
parent6b2ff1d7c57ef49cd17ffe132173e05ab11a5213 (diff)
drm/xe/mert: Improve handling of MERT CAT errors
All MERT catastrophic errors but VF's LMTT fault are serious, so we shouldn't limit our handling only to print debug messages. Change CATERR message to error level and then declare the device as wedged to match expectation from the design document. For the LMTT faults, add a note about adding tracking of this unexpected VF activity. While at it, rename register fields defnitions to match the BSpec. Also drop trailing include guard name from the regs.h file. BSpec: 74625 Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com> Cc: Lukasz Laguna <lukasz.laguna@intel.com> Reviewed-by: Lukasz Laguna <lukasz.laguna@intel.com> Link: https://patch.msgid.link/20260112183716.28700-1-michal.wajdeczko@intel.com
-rw-r--r--drivers/gpu/drm/xe/regs/xe_mert_regs.h10
-rw-r--r--drivers/gpu/drm/xe/xe_mert.c43
2 files changed, 39 insertions, 14 deletions
diff --git a/drivers/gpu/drm/xe/regs/xe_mert_regs.h b/drivers/gpu/drm/xe/regs/xe_mert_regs.h
index c345e11ceea8..99e5a26da657 100644
--- a/drivers/gpu/drm/xe/regs/xe_mert_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_mert_regs.h
@@ -11,11 +11,13 @@
#define MERT_LMEM_CFG XE_REG(0x1448b0)
#define MERT_TLB_CT_INTR_ERR_ID_PORT XE_REG(0x145190)
-#define MERT_TLB_CT_VFID_MASK REG_GENMASK(16, 9)
-#define MERT_TLB_CT_ERROR_MASK REG_GENMASK(5, 0)
-#define MERT_TLB_CT_LMTT_FAULT 0x05
+#define CATERR_VFID REG_GENMASK(16, 9)
+#define CATERR_CODES REG_GENMASK(5, 0)
+#define CATERR_NO_ERROR 0x00
+#define CATERR_UNMAPPED_GGTT 0x01
+#define CATERR_LMTT_FAULT 0x05
#define MERT_TLB_INV_DESC_A XE_REG(0x14cf7c)
#define MERT_TLB_INV_DESC_A_VALID REG_BIT(0)
-#endif /* _XE_MERT_REGS_H_ */
+#endif
diff --git a/drivers/gpu/drm/xe/xe_mert.c b/drivers/gpu/drm/xe/xe_mert.c
index fc027d2d7a5e..f637df95418b 100644
--- a/drivers/gpu/drm/xe/xe_mert.c
+++ b/drivers/gpu/drm/xe/xe_mert.c
@@ -9,6 +9,7 @@
#include "xe_device.h"
#include "xe_mert.h"
#include "xe_mmio.h"
+#include "xe_sriov_printk.h"
#include "xe_tile.h"
/**
@@ -55,6 +56,37 @@ int xe_mert_invalidate_lmtt(struct xe_device *xe)
return 0;
}
+static void mert_handle_cat_error(struct xe_device *xe)
+{
+ struct xe_tile *tile = xe_device_get_root_tile(xe);
+ u32 reg_val, vfid, code;
+
+ reg_val = xe_mmio_read32(&tile->mmio, MERT_TLB_CT_INTR_ERR_ID_PORT);
+ if (!reg_val)
+ return;
+ xe_mmio_write32(&tile->mmio, MERT_TLB_CT_INTR_ERR_ID_PORT, 0);
+
+ vfid = FIELD_GET(CATERR_VFID, reg_val);
+ code = FIELD_GET(CATERR_CODES, reg_val);
+
+ switch (code) {
+ case CATERR_NO_ERROR:
+ break;
+ case CATERR_UNMAPPED_GGTT:
+ xe_sriov_err(xe, "MERT: CAT_ERR: Access to an unmapped GGTT!\n");
+ xe_device_declare_wedged(xe);
+ break;
+ case CATERR_LMTT_FAULT:
+ xe_sriov_dbg(xe, "MERT: CAT_ERR: VF%u LMTT fault!\n", vfid);
+ /* XXX: track/report malicious VF activity */
+ break;
+ default:
+ xe_sriov_err(xe, "MERT: Unexpected CAT_ERR code=%#x!\n", code);
+ xe_device_declare_wedged(xe);
+ break;
+ }
+}
+
/**
* xe_mert_irq_handler - Handler for MERT interrupts
* @xe: the &xe_device
@@ -68,20 +100,11 @@ void xe_mert_irq_handler(struct xe_device *xe, u32 master_ctl)
struct xe_mert *mert = &tile->mert;
unsigned long flags;
u32 reg_val;
- u8 err;
if (!(master_ctl & SOC_H2DMEMINT_IRQ))
return;
- reg_val = xe_mmio_read32(&tile->mmio, MERT_TLB_CT_INTR_ERR_ID_PORT);
- xe_mmio_write32(&tile->mmio, MERT_TLB_CT_INTR_ERR_ID_PORT, 0);
-
- err = FIELD_GET(MERT_TLB_CT_ERROR_MASK, reg_val);
- if (err == MERT_TLB_CT_LMTT_FAULT)
- drm_dbg(&xe->drm, "MERT catastrophic error: LMTT fault (VF%u)\n",
- FIELD_GET(MERT_TLB_CT_VFID_MASK, reg_val));
- else if (err)
- drm_dbg(&xe->drm, "MERT catastrophic error: Unexpected fault (0x%x)\n", err);
+ mert_handle_cat_error(xe);
spin_lock_irqsave(&mert->lock, flags);
if (mert->tlb_inv_triggered) {