From 192bb40f030a41ca95c5cff8c9340b725bc7ba8b Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 17 Nov 2022 09:33:58 -0800 Subject: drm/i915/gt: Manage uncore->lock while waiting on MCR register The GT MCR code currently relies on uncore->lock to avoid race conditions on the steering control register during MCR operations. The *_fw() versions of MCR operations expect the caller to already hold uncore->lock, while the non-fw variants manage the lock internally. However the sole callsite of intel_gt_mcr_wait_for_reg_fw() does not currently obtain the forcewake lock, allowing a potential race condition (and triggering an assertion on lockdep builds). Furthermore, since 'wait for register value' requests may not return immediately, it is undesirable to hold a fundamental lock like uncore->lock for the entire wait and block all other MMIO for the duration; rather the lock is only needed around the MCR read operations and can be released during the delays. Convert intel_gt_mcr_wait_for_reg_fw() to a non-fw variant that will manage uncore->lock internally. This does have the side effect of causing an unnecessary lookup in the forcewake table on each read operation, but since the caller is still holding the relevant forcewake domain, this will ultimately just incremenent the reference count and won't actually cause any additional MMIO traffic. In the future we plan to switch to a dedicated MCR lock to protect the steering critical section rather than using the overloaded and high-traffic uncore->lock; on MTL and beyond the new lock can be implemented on top of the hardware-provided synchonization mechanism for steering. Fixes: 3068bec83eea ("drm/i915/gt: Add intel_gt_mcr_wait_for_reg_fw()") Cc: Lucas De Marchi Signed-off-by: Matt Roper Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20221117173358.1980230-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_gt.c | 6 +++--- drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 18 ++++++++++-------- drivers/gpu/drm/i915/gt/intel_gt_mcr.h | 12 ++++++------ 3 files changed, 19 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 04908f1d6564..ca3210dd7b22 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -1034,9 +1034,9 @@ get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8, static int wait_for_invalidate(struct intel_gt *gt, struct reg_and_bit rb) { if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) - return intel_gt_mcr_wait_for_reg_fw(gt, rb.mcr_reg, rb.bit, 0, - TLB_INVAL_TIMEOUT_US, - TLB_INVAL_TIMEOUT_MS); + return intel_gt_mcr_wait_for_reg(gt, rb.mcr_reg, rb.bit, 0, + TLB_INVAL_TIMEOUT_US, + TLB_INVAL_TIMEOUT_MS); else return __intel_wait_for_register_fw(gt->uncore, rb.reg, rb.bit, 0, TLB_INVAL_TIMEOUT_US, diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c index 830edffe88cc..d9a8ff9e5e57 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c @@ -730,17 +730,19 @@ void intel_gt_mcr_get_ss_steering(struct intel_gt *gt, unsigned int dss, * * Return: 0 if the register matches the desired condition, or -ETIMEDOUT. */ -int intel_gt_mcr_wait_for_reg_fw(struct intel_gt *gt, - i915_mcr_reg_t reg, - u32 mask, - u32 value, - unsigned int fast_timeout_us, - unsigned int slow_timeout_ms) +int intel_gt_mcr_wait_for_reg(struct intel_gt *gt, + i915_mcr_reg_t reg, + u32 mask, + u32 value, + unsigned int fast_timeout_us, + unsigned int slow_timeout_ms) { - u32 reg_value = 0; -#define done (((reg_value = intel_gt_mcr_read_any_fw(gt, reg)) & mask) == value) int ret; + lockdep_assert_not_held(>->uncore->lock); + +#define done ((intel_gt_mcr_read_any(gt, reg) & mask) == value) + /* Catch any overuse of this function */ might_sleep_if(slow_timeout_ms); GEM_BUG_ON(fast_timeout_us > 20000); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.h b/drivers/gpu/drm/i915/gt/intel_gt_mcr.h index 3fb0502bff22..ae93b20e1c17 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.h @@ -37,12 +37,12 @@ void intel_gt_mcr_report_steering(struct drm_printer *p, struct intel_gt *gt, void intel_gt_mcr_get_ss_steering(struct intel_gt *gt, unsigned int dss, unsigned int *group, unsigned int *instance); -int intel_gt_mcr_wait_for_reg_fw(struct intel_gt *gt, - i915_mcr_reg_t reg, - u32 mask, - u32 value, - unsigned int fast_timeout_us, - unsigned int slow_timeout_ms); +int intel_gt_mcr_wait_for_reg(struct intel_gt *gt, + i915_mcr_reg_t reg, + u32 mask, + u32 value, + unsigned int fast_timeout_us, + unsigned int slow_timeout_ms); /* * Helper for for_each_ss_steering loop. On pre-Xe_HP platforms, subslice -- cgit v1.2.3 From 71feb6f901ecba962177a0a029dc545c91a4b396 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 18 Nov 2022 11:52:49 +0000 Subject: drm/i915: Fix workarounds on Gen2-3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In 3653727560d0 ("drm/i915: Simplify internal helper function signature") I broke the old platforms by not noticing engine workaround init does not initialize the list on old platforms. Fix it by always initializing which already does the right thing by mostly not doing anything if there aren't any workarounds on the list. Signed-off-by: Tvrtko Ursulin Fixes: 3653727560d0 ("drm/i915: Simplify internal helper function signature") Reported-by: Ville Syrjälä Cc: Mika Kuoppala Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20221118115249.2683946-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index c5f1edfeece2..e39ce0bf91b3 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -3010,7 +3010,7 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li static void engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal) { - if (I915_SELFTEST_ONLY(GRAPHICS_VER(engine->i915) < 4)) + if (GRAPHICS_VER(engine->i915) < 4) return; engine_fake_wa_init(engine, wal); @@ -3035,9 +3035,6 @@ void intel_engine_init_workarounds(struct intel_engine_cs *engine) { struct i915_wa_list *wal = &engine->wa_list; - if (GRAPHICS_VER(engine->i915) < 4) - return; - wa_init_start(wal, engine->gt, "engine", engine->name); engine_init_workarounds(engine, wal); wa_init_finish(wal); -- cgit v1.2.3 From 67b5655b2e717b8b681f8acd9cbddd2d687d5d4e Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Sat, 19 Nov 2022 00:03:54 +0530 Subject: drm/i915/mtl: Enable Idle Messaging for GSC CS By defaut idle messaging is disabled for GSC CS so to unblock RC6 entry on media tile idle messaging need to be enabled. v2: - Fix review comments (Vinay) - Set GSC idle hysteresis as per spec (Badal) v3: - Fix review comments (Rodrigo) Bspec: 71496 Cc: Daniele Ceraolo Spurio Signed-off-by: Vinay Belgaumkar Signed-off-by: Badal Nilawar Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20221118183354.1047829-1-badal.nilawar@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_pm.c | 18 ++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_gt_regs.h | 4 ++++ 2 files changed, 22 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index b0a4a2dbe3ee..e971b153fda9 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -15,6 +15,22 @@ #include "intel_rc6.h" #include "intel_ring.h" #include "shmem_utils.h" +#include "intel_gt_regs.h" + +static void intel_gsc_idle_msg_enable(struct intel_engine_cs *engine) +{ + struct drm_i915_private *i915 = engine->i915; + + if (IS_METEORLAKE(i915) && engine->id == GSC0) { + intel_uncore_write(engine->gt->uncore, + RC_PSMI_CTRL_GSCCS, + _MASKED_BIT_DISABLE(IDLE_MSG_DISABLE)); + /* hysteresis 0xA=5us as recommended in spec*/ + intel_uncore_write(engine->gt->uncore, + PWRCTX_MAXCNT_GSCCS, + 0xA); + } +} static void dbg_poison_ce(struct intel_context *ce) { @@ -275,6 +291,8 @@ void intel_engine_init__pm(struct intel_engine_cs *engine) intel_wakeref_init(&engine->wakeref, rpm, &wf_ops); intel_engine_init_heartbeat(engine); + + intel_gsc_idle_msg_enable(engine); } /** diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index bd3848bd7b6a..3658e12db74b 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -916,6 +916,10 @@ #define MSG_IDLE_FW_MASK REG_GENMASK(13, 9) #define MSG_IDLE_FW_SHIFT 9 +#define RC_PSMI_CTRL_GSCCS _MMIO(0x11a050) +#define IDLE_MSG_DISABLE REG_BIT(0) +#define PWRCTX_MAXCNT_GSCCS _MMIO(0x11a054) + #define FORCEWAKE_MEDIA_GEN9 _MMIO(0xa270) #define FORCEWAKE_RENDER_GEN9 _MMIO(0xa278) -- cgit v1.2.3 From e746f84b8e813816951b63485134927ed6763a1b Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Thu, 10 Nov 2022 17:19:12 +0000 Subject: i915/uncore: Acquire fw before loop in intel_uncore_read64_2x32 PMU reads the GT timestamp as a 2x32 mmio read and since upper and lower 32 bit registers are read in a loop, there is a latency involved between getting the GT timestamp and the CPU timestamp. As part of the resolution, refactor intel_uncore_read64_2x32 to acquire forcewake and uncore lock prior to reading upper and lower regs. Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Tvrtko Ursulin Reviewed-by: Ashutosh Dixit Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20221110171913.670286-2-umesh.nerlige.ramappa@intel.com --- drivers/gpu/drm/i915/intel_uncore.h | 44 +++++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h index 5449146a0624..e9e38490815d 100644 --- a/drivers/gpu/drm/i915/intel_uncore.h +++ b/drivers/gpu/drm/i915/intel_uncore.h @@ -382,20 +382,6 @@ __uncore_write(write_notrace, 32, l, false) */ __uncore_read(read64, 64, q, true) -static inline u64 -intel_uncore_read64_2x32(struct intel_uncore *uncore, - i915_reg_t lower_reg, i915_reg_t upper_reg) -{ - u32 upper, lower, old_upper, loop = 0; - upper = intel_uncore_read(uncore, upper_reg); - do { - old_upper = upper; - lower = intel_uncore_read(uncore, lower_reg); - upper = intel_uncore_read(uncore, upper_reg); - } while (upper != old_upper && loop++ < 2); - return (u64)upper << 32 | lower; -} - #define intel_uncore_posting_read(...) ((void)intel_uncore_read_notrace(__VA_ARGS__)) #define intel_uncore_posting_read16(...) ((void)intel_uncore_read16_notrace(__VA_ARGS__)) @@ -455,6 +441,36 @@ static inline void intel_uncore_rmw_fw(struct intel_uncore *uncore, intel_uncore_write_fw(uncore, reg, val); } +static inline u64 +intel_uncore_read64_2x32(struct intel_uncore *uncore, + i915_reg_t lower_reg, i915_reg_t upper_reg) +{ + u32 upper, lower, old_upper, loop = 0; + enum forcewake_domains fw_domains; + unsigned long flags; + + fw_domains = intel_uncore_forcewake_for_reg(uncore, lower_reg, + FW_REG_READ); + + fw_domains |= intel_uncore_forcewake_for_reg(uncore, upper_reg, + FW_REG_READ); + + spin_lock_irqsave(&uncore->lock, flags); + intel_uncore_forcewake_get__locked(uncore, fw_domains); + + upper = intel_uncore_read_fw(uncore, upper_reg); + do { + old_upper = upper; + lower = intel_uncore_read_fw(uncore, lower_reg); + upper = intel_uncore_read_fw(uncore, upper_reg); + } while (upper != old_upper && loop++ < 2); + + intel_uncore_forcewake_put__locked(uncore, fw_domains); + spin_unlock_irqrestore(&uncore->lock, flags); + + return (u64)upper << 32 | lower; +} + static inline int intel_uncore_write_and_verify(struct intel_uncore *uncore, i915_reg_t reg, u32 val, u32 mask, u32 expected_val) -- cgit v1.2.3 From 529d95a6067b74da9d4d5d9ab3009b35c98c5fce Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Thu, 10 Nov 2022 17:19:13 +0000 Subject: drm/i915/selftest: Bump up sample period for busy stats selftest Engine busyness samples around a 10ms period is failing with busyness ranging approx. from 87% to 115% as shown below. The expected range is +/- 5% of the sample period. Fail 10% of the time. rcs0: reported 11716042ns [91%] busyness while spinning [for 12805719ns] When determining busyness of active engine, the GuC based engine busyness implementation relies on a 64 bit timestamp register read. The latency incurred by this register read causes the failure. On DG1, when the test fails, the observed latencies range from 900us - 1.5ms. Optimizing the 2x32 read by acquiring the lock and forcewake prior to all reg reads reduces the rate of failure to around 2%, but does not eliminate it. In order to make the selftest more robust and always account for such latencies, increase the sample period to 100 ms. This eliminates the issue as seen in a 1000 runs. v2: (Ashutosh) - Add error to commit msg - Include gitlab bug - Update commit for inclusion of 2x32 optimized read Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/4418 Signed-off-by: Umesh Nerlige Ramappa Acked-by: Tvrtko Ursulin Reviewed-by: Ashutosh Dixit Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20221110171913.670286-3-umesh.nerlige.ramappa@intel.com --- drivers/gpu/drm/i915/gt/selftest_engine_pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c index 0dcb3ed44a73..87c94314cf67 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c @@ -317,7 +317,7 @@ static int live_engine_busy_stats(void *arg) ENGINE_TRACE(engine, "measuring busy time\n"); preempt_disable(); de = intel_engine_get_busy_time(engine, &t[0]); - mdelay(10); + mdelay(100); de = ktime_sub(intel_engine_get_busy_time(engine, &t[1]), de); preempt_enable(); dt = ktime_sub(t[1], t[0]); -- cgit v1.2.3 From ba51925da4ef763d6a3aa03b15241a85cdb76865 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Mon, 21 Nov 2022 11:24:49 +0200 Subject: drm/i915/gsc: Only initialize GSC in tile 0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For multi-tile setups the GSC operational only on the tile 0. Skip GSC auxiliary device creation for all other tiles in GSC device init code. Initialize basic GSC fields and use the same path as HECI1 (HECI_PXP) device disable. Cc: Tomas Winkler Cc: Vitaly Lubart Signed-off-by: José Roberto de Souza Signed-off-by: Alexander Usyskin Acked-by: Tomas Winkler Reviewed-by: Tomas Winkler Link: https://patchwork.freedesktop.org/patch/msgid/20221121092449.328674-1-alexander.usyskin@intel.com --- drivers/gpu/drm/i915/gt/intel_gsc.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_gsc.c b/drivers/gpu/drm/i915/gt/intel_gsc.c index 976fdf27e790..bcc3605158db 100644 --- a/drivers/gpu/drm/i915/gt/intel_gsc.c +++ b/drivers/gpu/drm/i915/gt/intel_gsc.c @@ -174,6 +174,14 @@ static void gsc_init_one(struct drm_i915_private *i915, struct intel_gsc *gsc, intf->irq = -1; intf->id = intf_id; + /* + * On the multi-tile setups the GSC is functional on the first tile only + */ + if (gsc_to_gt(gsc)->info.id != 0) { + drm_dbg(&i915->drm, "Not initializing gsc for remote tiles\n"); + return; + } + if (intf_id == 0 && !HAS_HECI_PXP(i915)) return; -- cgit v1.2.3 From 8b7f7a9b10b704ba7d73199ff0f01354e0bad7a5 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 22 Nov 2022 16:16:16 +0200 Subject: drm/i915/guc: make default_lists const data The default_lists array should be in rodata. Fixes: dce2bd542337 ("drm/i915/guc: Add Gen9 registers for GuC error state capture.") Cc: Alan Previn Cc: Umesh Nerlige Ramappa Cc: Lucas De Marchi Signed-off-by: Jani Nikula Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20221122141616.3469214-1-jani.nikula@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c index 1d49a7ec0bd8..1c1b85073b4b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c @@ -170,7 +170,7 @@ static const struct __guc_mmio_reg_descr empty_regs_list[] = { } /* List of lists */ -static struct __guc_mmio_reg_descr_group default_lists[] = { +static const struct __guc_mmio_reg_descr_group default_lists[] = { MAKE_REGLIST(default_global_regs, PF, GLOBAL, 0), MAKE_REGLIST(default_rc_class_regs, PF, ENGINE_CLASS, GUC_RENDER_CLASS), MAKE_REGLIST(xe_lpd_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_RENDER_CLASS), -- cgit v1.2.3 From 02224691cb0f367acb476911bddfa21e2d596ca5 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Thu, 10 Nov 2022 16:56:51 -0800 Subject: drm/i915/huc: fix leak of debug object in huc load fence on driver unload The fence is always initialized in huc_init_early, but the cleanup in huc_fini is only being run if HuC is enabled. This causes a leaking of the debug object when HuC is disabled/not supported, which can in turn trigger a warning if we try to register a new debug offset at the same address on driver reload. To fix the issue, make sure to always run the cleanup code. Reported-by: Tvrtko Ursulin Reported-by: Brian Norris Fixes: 27536e03271d ("drm/i915/huc: track delayed HuC load with a fence") Signed-off-by: Daniele Ceraolo Spurio Cc: Tvrtko Ursulin Cc: Brian Norris Cc: Alan Previn Cc: John Harrison Tested-by: Brian Norris Reviewed-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20221111005651.4160369-1-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_huc.c | 12 +++++++----- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 1 + 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index be855811d85d..0976e9101346 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -329,13 +329,15 @@ out: void intel_huc_fini(struct intel_huc *huc) { - if (!intel_uc_fw_is_loadable(&huc->fw)) - return; - + /* + * the fence is initialized in init_early, so we need to clean it up + * even if HuC loading is off. + */ delayed_huc_load_complete(huc); - i915_sw_fence_fini(&huc->delayed_load.fence); - intel_uc_fw_fini(&huc->fw); + + if (intel_uc_fw_is_loadable(&huc->fw)) + intel_uc_fw_fini(&huc->fw); } void intel_huc_suspend(struct intel_huc *huc) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 1d28286e6f06..2a508b137e90 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -722,6 +722,7 @@ int intel_uc_runtime_resume(struct intel_uc *uc) static const struct intel_uc_ops uc_ops_off = { .init_hw = __uc_check_hw, + .fini = __uc_fini, /* to clean-up the init_early initialization */ }; static const struct intel_uc_ops uc_ops_on = { -- cgit v1.2.3 From 9b23059b29238204b1769589d665f44bd9b31255 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Tue, 22 Nov 2022 15:33:28 -0800 Subject: drm/i915/uc: Fix table order verification to check all FW types It was noticed that the table order verification step was only being run once rather than once per firmware type. Fix that. Note that the long term plan is to convert this code to be a mock selftest. It is already only compiled in when selftests are enabled. And the work involved in the conversion was estimated to be non-trivial. So that conversion is currently low on the priority list. Signed-off-by: John Harrison Reviewed-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20221122233328.854217-1-John.C.Harrison@Intel.com --- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 0c80ba51a4bd..31613c7e0838 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -238,7 +238,7 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) [INTEL_UC_FW_TYPE_GUC] = { blobs_guc, ARRAY_SIZE(blobs_guc) }, [INTEL_UC_FW_TYPE_HUC] = { blobs_huc, ARRAY_SIZE(blobs_huc) }, }; - static bool verified; + static bool verified[INTEL_UC_FW_NUM_TYPES]; const struct uc_fw_platform_requirement *fw_blobs; enum intel_platform p = INTEL_INFO(i915)->platform; u32 fw_count; @@ -291,8 +291,8 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) } /* make sure the list is ordered as expected */ - if (IS_ENABLED(CONFIG_DRM_I915_SELFTEST) && !verified) { - verified = true; + if (IS_ENABLED(CONFIG_DRM_I915_SELFTEST) && !verified[uc_fw->type]) { + verified[uc_fw->type] = true; for (i = 1; i < fw_count; i++) { /* Next platform is good: */ @@ -343,7 +343,8 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) continue; bad: - drm_err(&i915->drm, "Invalid FW blob order: %s r%u %s%d.%d.%d comes before %s r%u %s%d.%d.%d\n", + drm_err(&i915->drm, "Invalid %s blob order: %s r%u %s%d.%d.%d comes before %s r%u %s%d.%d.%d\n", + intel_uc_fw_type_repr(uc_fw->type), intel_platform_name(fw_blobs[i - 1].p), fw_blobs[i - 1].rev, fw_blobs[i - 1].blob.legacy ? "L" : "v", fw_blobs[i - 1].blob.major, -- cgit v1.2.3 From f235dbd5b768e238d365fd05d92de5a32abc1c1f Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Mon, 21 Nov 2022 15:56:54 +0100 Subject: drm/i915: Fix negative value passed as remaining time Commit b97060a99b01 ("drm/i915/guc: Update intel_gt_wait_for_idle to work with GuC") extended the API of intel_gt_retire_requests_timeout() with an extra argument 'remaining_timeout', intended for passing back unconsumed portion of requested timeout when 0 (success) is returned. However, when request retirement happens to succeed despite an error returned by a call to dma_fence_wait_timeout(), that error code (a negative value) is passed back instead of remaining time. If we then pass that negative value forward as requested timeout to intel_uc_wait_for_idle(), an explicit BUG will be triggered. If request retirement succeeds but an error code is passed back via remaininig_timeout, we may have no clue on how much of the initial timeout might have been left for spending it on waiting for GuC to become idle. OTOH, since all pending requests have been successfully retired, that error code has been already ignored by intel_gt_retire_requests_timeout(), then we shouldn't fail. Assume no more time has been left on error and pass 0 timeout value to intel_uc_wait_for_idle() to give it a chance to return success if GuC is already idle. v3: Don't fail on any error passed back via remaining_timeout. v2: Fix the issue on the caller side, not the provider. Fixes: b97060a99b01 ("drm/i915/guc: Update intel_gt_wait_for_idle to work with GuC") Signed-off-by: Janusz Krzysztofik Cc: stable@vger.kernel.org # v5.15+ Reviewed-by: Andrzej Hajda Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20221121145655.75141-2-janusz.krzysztofik@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_gt.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index ca3210dd7b22..600c79bb2ffc 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -676,8 +676,13 @@ int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout) return -EINTR; } - return timeout ? timeout : intel_uc_wait_for_idle(>->uc, - remaining_timeout); + if (timeout) + return timeout; + + if (remaining_timeout < 0) + remaining_timeout = 0; + + return intel_uc_wait_for_idle(>->uc, remaining_timeout); } int intel_gt_init(struct intel_gt *gt) -- cgit v1.2.3 From f301a29f143760ce8d3d6b6a8436d45d3448cde6 Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Mon, 21 Nov 2022 15:56:55 +0100 Subject: drm/i915: Never return 0 if not all requests retired Users of intel_gt_retire_requests_timeout() expect 0 return value on success. However, we have no protection from passing back 0 potentially returned by a call to dma_fence_wait_timeout() when it succedes right after its timeout has expired. Replace 0 with -ETIME before potentially using the timeout value as return code, so -ETIME is returned if there are still some requests not retired after timeout, 0 otherwise. v3: Use conditional expression, more compact but also better reflecting intention standing behind the change. v2: Move the added lines down so flush_submission() is not affected. Fixes: f33a8a51602c ("drm/i915: Merge wait_for_timelines with retire_request") Signed-off-by: Janusz Krzysztofik Reviewed-by: Andrzej Hajda Cc: stable@vger.kernel.org # v5.5+ Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20221121145655.75141-3-janusz.krzysztofik@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_gt_requests.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c index edb881d75630..1dfd01668c79 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c @@ -199,7 +199,7 @@ out_active: spin_lock(&timelines->lock); if (remaining_timeout) *remaining_timeout = timeout; - return active_count ? timeout : 0; + return active_count ? timeout ?: -ETIME : 0; } static void retire_work_handler(struct work_struct *work) -- cgit v1.2.3 From 468a4e630c7da8cf586f85cc498d6097aed1ab4b Mon Sep 17 00:00:00 2001 From: Matt Atwood Date: Wed, 23 Nov 2022 10:36:47 -0800 Subject: drm/i915/dg2: Introduce Wa_18018764978 Wa_18018764978 applies to specific steppings of DG2 (G10 C0+, G11 and G12 A0+). Clean up style in function at the same time. Bspec: 66622 Signed-off-by: Matt Atwood Reviewed-by: Gustavo Sousa Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20221123183648.407058-1-matthew.s.atwood@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 3 +++ drivers/gpu/drm/i915/gt/intel_workarounds.c | 7 ++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 3658e12db74b..bdeb31a40623 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -456,6 +456,9 @@ #define GEN8_L3CNTLREG _MMIO(0x7034) #define GEN8_ERRDETBCTRL (1 << 9) +#define PSS_MODE2 _MMIO(0x703c) +#define SCOREBOARD_STALL_FLUSH_CONTROL REG_BIT(5) + #define GEN7_SC_INSTDONE _MMIO(0x7100) #define GEN12_SC_INSTDONE_EXTRA _MMIO(0x7104) #define GEN12_SC_INSTDONE_EXTRA2 _MMIO(0x7108) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index e39ce0bf91b3..33e5120c1ad5 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -770,9 +770,14 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine, /* Wa_14014947963:dg2 */ if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_FOREVER) || - IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915)) + IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915)) wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000); + /* Wa_18018764978:dg2 */ + if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_C0, STEP_FOREVER) || + IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915)) + wa_masked_en(wal, PSS_MODE2, SCOREBOARD_STALL_FLUSH_CONTROL); + /* Wa_15010599737:dg2 */ wa_masked_en(wal, CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN); } -- cgit v1.2.3 From 900a80c5836587d95db32742f66e1f34f7b40fcb Mon Sep 17 00:00:00 2001 From: Matt Atwood Date: Wed, 23 Nov 2022 10:36:48 -0800 Subject: drm/i915/dg2: Introduce Wa_18019271663 Wa_18019271663 applies to all DG2 steppings and skus. Bspec: 66622 Signed-off-by: Matt Atwood Reviewed-by: Gustavo Sousa Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20221123183648.407058-2-matthew.s.atwood@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 7 ++++--- drivers/gpu/drm/i915/gt/intel_workarounds.c | 3 +++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index bdeb31a40623..6caf2d8deb98 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -428,9 +428,10 @@ #define RC_OP_FLUSH_ENABLE (1 << 0) #define HIZ_RAW_STALL_OPT_DISABLE (1 << 2) #define CACHE_MODE_1 _MMIO(0x7004) /* IVB+ */ -#define PIXEL_SUBSPAN_COLLECT_OPT_DISABLE (1 << 6) -#define GEN8_4x4_STC_OPTIMIZATION_DISABLE (1 << 6) -#define GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE (1 << 1) +#define MSAA_OPTIMIZATION_REDUC_DISABLE REG_BIT(11) +#define PIXEL_SUBSPAN_COLLECT_OPT_DISABLE REG_BIT(6) +#define GEN8_4x4_STC_OPTIMIZATION_DISABLE REG_BIT(6) +#define GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE REG_BIT(1) #define GEN7_GT_MODE _MMIO(0x7008) #define GEN9_IZ_HASHING_MASK(slice) (0x3 << ((slice) * 2)) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 33e5120c1ad5..d5229039af87 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -780,6 +780,9 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine, /* Wa_15010599737:dg2 */ wa_masked_en(wal, CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN); + + /* Wa_18019271663:dg2 */ + wa_masked_en(wal, CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE); } static void fakewa_disable_nestedbb_mode(struct intel_engine_cs *engine, -- cgit v1.2.3 From 0f85715804e72cdcf489a89930ec64437e11ca8c Mon Sep 17 00:00:00 2001 From: Aravind Iddamsetty Date: Tue, 22 Nov 2022 12:31:26 +0530 Subject: drm/i915/mtl: Media GT and Render GT share common GGTT On XE_LPM+ platforms the media engines are carved out into a separate GT but have a common GGTMMADR address range which essentially makes the GGTT address space to be shared between media and render GT. As a result any updates in GGTT shall invalidate TLB of GTs sharing it and similarly any operation on GGTT requiring an action on a GT will have to involve all GTs sharing it. setup_private_pat was being done on a per GGTT based as that doesn't touch any GGTT structures moved it to per GT based. BSPEC: 63834 v2: 1. Add details to commit msg 2. includes fix for failure to add item to ggtt->gt_list, as suggested by Lucas 3. as ggtt_flush() is used only for ggtt drop i915_is_ggtt check within it. 4. setup_private_pat moved out of intel_gt_tiles_init v3: 1. Move out for_each_gt from i915_driver.c (Jani Nikula) v4: drop using RCU primitives on ggtt->gt_list as it is not an RCU list (Matt Roper) Cc: Matt Roper Signed-off-by: Aravind Iddamsetty Reviewed-by: Matt Roper Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20221122070126.4813-1-aravind.iddamsetty@intel.com --- drivers/gpu/drm/i915/gt/intel_ggtt.c | 54 +++++++++++++++++++++++-------- drivers/gpu/drm/i915/gt/intel_gt.c | 13 ++++++-- drivers/gpu/drm/i915/gt/intel_gt_types.h | 3 ++ drivers/gpu/drm/i915/gt/intel_gtt.h | 4 +++ drivers/gpu/drm/i915/i915_driver.c | 12 ++++--- drivers/gpu/drm/i915/i915_gem.c | 2 ++ drivers/gpu/drm/i915/i915_gem_evict.c | 51 +++++++++++++++++++++-------- drivers/gpu/drm/i915/i915_vma.c | 5 ++- drivers/gpu/drm/i915/selftests/i915_gem.c | 2 ++ 9 files changed, 111 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 8145851ad23d..7644738b9cdb 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -8,6 +8,7 @@ #include #include +#include #include #include @@ -196,10 +197,13 @@ retry: void i915_ggtt_suspend(struct i915_ggtt *ggtt) { + struct intel_gt *gt; + i915_ggtt_suspend_vm(&ggtt->vm); ggtt->invalidate(ggtt); - intel_gt_check_and_clear_faults(ggtt->vm.gt); + list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) + intel_gt_check_and_clear_faults(gt); } void gen6_ggtt_invalidate(struct i915_ggtt *ggtt) @@ -225,16 +229,21 @@ static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt) static void guc_ggtt_invalidate(struct i915_ggtt *ggtt) { - struct intel_uncore *uncore = ggtt->vm.gt->uncore; struct drm_i915_private *i915 = ggtt->vm.i915; gen8_ggtt_invalidate(ggtt); - if (GRAPHICS_VER(i915) >= 12) - intel_uncore_write_fw(uncore, GEN12_GUC_TLB_INV_CR, - GEN12_GUC_TLB_INV_CR_INVALIDATE); - else - intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE); + if (GRAPHICS_VER(i915) >= 12) { + struct intel_gt *gt; + + list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) + intel_uncore_write_fw(gt->uncore, + GEN12_GUC_TLB_INV_CR, + GEN12_GUC_TLB_INV_CR_INVALIDATE); + } else { + intel_uncore_write_fw(ggtt->vm.gt->uncore, + GEN8_GTCR, GEN8_GTCR_INVALIDATE); + } } u64 gen8_ggtt_pte_encode(dma_addr_t addr, @@ -986,8 +995,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) ggtt->vm.pte_encode = gen8_ggtt_pte_encode; - setup_private_pat(ggtt->vm.gt); - return ggtt_probe_common(ggtt, size); } @@ -1196,7 +1203,14 @@ static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt) */ int i915_ggtt_probe_hw(struct drm_i915_private *i915) { - int ret; + struct intel_gt *gt; + int ret, i; + + for_each_gt(gt, i915, i) { + ret = intel_gt_assign_ggtt(gt); + if (ret) + return ret; + } ret = ggtt_probe_hw(to_gt(i915)->ggtt, to_gt(i915)); if (ret) @@ -1208,6 +1222,19 @@ int i915_ggtt_probe_hw(struct drm_i915_private *i915) return 0; } +struct i915_ggtt *i915_ggtt_create(struct drm_i915_private *i915) +{ + struct i915_ggtt *ggtt; + + ggtt = drmm_kzalloc(&i915->drm, sizeof(*ggtt), GFP_KERNEL); + if (!ggtt) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&ggtt->gt_list); + + return ggtt; +} + int i915_ggtt_enable_hw(struct drm_i915_private *i915) { if (GRAPHICS_VER(i915) < 6) @@ -1296,9 +1323,11 @@ bool i915_ggtt_resume_vm(struct i915_address_space *vm) void i915_ggtt_resume(struct i915_ggtt *ggtt) { + struct intel_gt *gt; bool flush; - intel_gt_check_and_clear_faults(ggtt->vm.gt); + list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) + intel_gt_check_and_clear_faults(gt); flush = i915_ggtt_resume_vm(&ggtt->vm); @@ -1307,9 +1336,6 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt) if (flush) wbinvd_on_all_cpus(); - if (GRAPHICS_VER(ggtt->vm.i915) >= 8) - setup_private_pat(ggtt->vm.gt); - intel_ggtt_restore_fences(ggtt); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 600c79bb2ffc..9c45afdc6e3d 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -109,9 +109,18 @@ static int intel_gt_probe_lmem(struct intel_gt *gt) int intel_gt_assign_ggtt(struct intel_gt *gt) { - gt->ggtt = drmm_kzalloc(>->i915->drm, sizeof(*gt->ggtt), GFP_KERNEL); + /* Media GT shares primary GT's GGTT */ + if (gt->type == GT_MEDIA) { + gt->ggtt = to_gt(gt->i915)->ggtt; + } else { + gt->ggtt = i915_ggtt_create(gt->i915); + if (IS_ERR(gt->ggtt)) + return PTR_ERR(gt->ggtt); + } - return gt->ggtt ? 0 : -ENOMEM; + list_add_tail(>->ggtt_link, >->ggtt->gt_list); + + return 0; } int intel_gt_init_mmio(struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index c1d9cd255e06..8d915640914b 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -277,6 +277,9 @@ struct intel_gt { struct kobject *sysfs_defaults; struct i915_perf_gt perf; + + /** link: &ggtt.gt_list */ + struct list_head ggtt_link; }; struct intel_gt_definition { diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index 4d75ba4bb41d..d1900fec6cd1 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -390,6 +390,9 @@ struct i915_ggtt { struct mutex error_mutex; struct drm_mm_node error_capture; struct drm_mm_node uc_fw; + + /** List of GTs mapping this GGTT */ + struct list_head gt_list; }; struct i915_ppgtt { @@ -584,6 +587,7 @@ void i915_ggtt_disable_guc(struct i915_ggtt *ggtt); int i915_init_ggtt(struct drm_i915_private *i915); void i915_ggtt_driver_release(struct drm_i915_private *i915); void i915_ggtt_driver_late_release(struct drm_i915_private *i915); +struct i915_ggtt *i915_ggtt_create(struct drm_i915_private *i915); static inline bool i915_ggtt_has_aperture(const struct i915_ggtt *ggtt) { diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 69103ae37779..4e1bb3c23c63 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -612,10 +612,6 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) i915_perf_init(dev_priv); - ret = intel_gt_assign_ggtt(to_gt(dev_priv)); - if (ret) - goto err_perf; - ret = i915_ggtt_probe_hw(dev_priv); if (ret) goto err_perf; @@ -1316,7 +1312,8 @@ int i915_driver_suspend_switcheroo(struct drm_i915_private *i915, static int i915_drm_resume(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); - int ret; + struct intel_gt *gt; + int ret, i; disable_rpm_wakeref_asserts(&dev_priv->runtime_pm); @@ -1331,6 +1328,11 @@ static int i915_drm_resume(struct drm_device *dev) drm_err(&dev_priv->drm, "failed to re-enable GGTT\n"); i915_ggtt_resume(to_gt(dev_priv)->ggtt); + + for_each_gt(gt, dev_priv, i) + if (GRAPHICS_VER(gt->i915) >= 8) + setup_private_pat(gt); + /* Must be called after GGTT is resumed. */ intel_dpt_resume(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 8468ca9885fd..086c4702e1bf 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1143,6 +1143,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv) for_each_gt(gt, dev_priv, i) { intel_uc_fetch_firmwares(>->uc); intel_wopcm_init(>->wopcm); + if (GRAPHICS_VER(dev_priv) >= 8) + setup_private_pat(gt); } ret = i915_init_ggtt(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index f025ee4fa526..4cfe36b0366b 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -43,16 +43,25 @@ static bool dying_vma(struct i915_vma *vma) return !kref_read(&vma->obj->base.refcount); } -static int ggtt_flush(struct intel_gt *gt) +static int ggtt_flush(struct i915_address_space *vm) { - /* - * Not everything in the GGTT is tracked via vma (otherwise we - * could evict as required with minimal stalling) so we are forced - * to idle the GPU and explicitly retire outstanding requests in - * the hopes that we can then remove contexts and the like only - * bound by their active reference. - */ - return intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT); + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + struct intel_gt *gt; + int ret = 0; + + list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) { + /* + * Not everything in the GGTT is tracked via vma (otherwise we + * could evict as required with minimal stalling) so we are forced + * to idle the GPU and explicitly retire outstanding requests in + * the hopes that we can then remove contexts and the like only + * bound by their active reference. + */ + ret = intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT); + if (ret) + return ret; + } + return ret; } static bool grab_vma(struct i915_vma *vma, struct i915_gem_ww_ctx *ww) @@ -149,6 +158,7 @@ i915_gem_evict_something(struct i915_address_space *vm, struct drm_mm_node *node; enum drm_mm_insert_mode mode; struct i915_vma *active; + struct intel_gt *gt; int ret; lockdep_assert_held(&vm->mutex); @@ -174,7 +184,14 @@ i915_gem_evict_something(struct i915_address_space *vm, min_size, alignment, color, start, end, mode); - intel_gt_retire_requests(vm->gt); + if (i915_is_ggtt(vm)) { + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + + list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) + intel_gt_retire_requests(gt); + } else { + intel_gt_retire_requests(vm->gt); + } search_again: active = NULL; @@ -246,7 +263,7 @@ search_again: if (I915_SELFTEST_ONLY(igt_evict_ctl.fail_if_busy)) return -EBUSY; - ret = ggtt_flush(vm->gt); + ret = ggtt_flush(vm); if (ret) return ret; @@ -332,7 +349,15 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, * a stray pin (preventing eviction) that can only be resolved by * retiring. */ - intel_gt_retire_requests(vm->gt); + if (i915_is_ggtt(vm)) { + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + struct intel_gt *gt; + + list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) + intel_gt_retire_requests(gt); + } else { + intel_gt_retire_requests(vm->gt); + } if (i915_vm_has_cache_coloring(vm)) { /* Expand search to cover neighbouring guard pages (or lack!) */ @@ -438,7 +463,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww) * switch otherwise is ineffective. */ if (i915_is_ggtt(vm)) { - ret = ggtt_flush(vm->gt); + ret = ggtt_flush(vm); if (ret) return ret; } diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 703fee6b5f75..726705b10637 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1544,6 +1544,8 @@ static int __i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, u32 align, unsigned int flags) { struct i915_address_space *vm = vma->vm; + struct intel_gt *gt; + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); int err; do { @@ -1559,7 +1561,8 @@ static int __i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, } /* Unlike i915_vma_pin, we don't take no for an answer! */ - flush_idle_contexts(vm->gt); + list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) + flush_idle_contexts(gt); if (mutex_lock_interruptible(&vm->mutex) == 0) { /* * We pass NULL ww here, as we don't want to unbind diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c index e5dd82e7e480..2535b9684bd1 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c @@ -127,6 +127,8 @@ static void igt_pm_resume(struct drm_i915_private *i915) */ with_intel_runtime_pm(&i915->runtime_pm, wakeref) { i915_ggtt_resume(to_gt(i915)->ggtt); + if (GRAPHICS_VER(i915) >= 8) + setup_private_pat(to_gt(i915)); i915_gem_resume(i915); } } -- cgit v1.2.3 From 14347a9c889fbdbae81e500f6c6e313f5d8e5271 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Wed, 23 Nov 2022 15:54:17 -0800 Subject: drm/i915/huc: always init the delayed load fence The fence is only tracking if the HuC load is in progress or not and doesn't distinguish between already loaded, not supported or disabled, so we can always initialize it to completed, no matter the actual support. We already do that for most platforms, but we skip it on GTs that lack VCS engines (e.g. MTL root GT), so fix that. Note that the cleanup is already unconditional. While at it, move the init/fini to helper functions. Fixes: 02224691cb0f ("drm/i915/huc: fix leak of debug object in huc load fence on driver unload") Signed-off-by: Daniele Ceraolo Spurio Cc: John Harrison Cc: Alan Previn Reviewed-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20221123235417.1475709-1-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_huc.c | 47 ++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index 0976e9101346..410905da8e97 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -211,6 +211,30 @@ void intel_huc_unregister_gsc_notifier(struct intel_huc *huc, struct bus_type *b huc->delayed_load.nb.notifier_call = NULL; } +static void delayed_huc_load_init(struct intel_huc *huc) +{ + /* + * Initialize fence to be complete as this is expected to be complete + * unless there is a delayed HuC load in progress. + */ + i915_sw_fence_init(&huc->delayed_load.fence, + sw_fence_dummy_notify); + i915_sw_fence_commit(&huc->delayed_load.fence); + + hrtimer_init(&huc->delayed_load.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + huc->delayed_load.timer.function = huc_delayed_load_timer_callback; +} + +static void delayed_huc_load_fini(struct intel_huc *huc) +{ + /* + * the fence is initialized in init_early, so we need to clean it up + * even if HuC loading is off. + */ + delayed_huc_load_complete(huc); + i915_sw_fence_fini(&huc->delayed_load.fence); +} + static bool vcs_supported(struct intel_gt *gt) { intel_engine_mask_t mask = gt->info.engine_mask; @@ -241,6 +265,15 @@ void intel_huc_init_early(struct intel_huc *huc) intel_uc_fw_init_early(&huc->fw, INTEL_UC_FW_TYPE_HUC); + /* + * we always init the fence as already completed, even if HuC is not + * supported. This way we don't have to distinguish between HuC not + * supported/disabled or already loaded, and can focus on if the load + * is currently in progress (fence not complete) or not, which is what + * we care about for stalling userspace submissions. + */ + delayed_huc_load_init(huc); + if (!vcs_supported(gt)) { intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_NOT_SUPPORTED); return; @@ -255,17 +288,6 @@ void intel_huc_init_early(struct intel_huc *huc) huc->status.mask = HUC_FW_VERIFIED; huc->status.value = HUC_FW_VERIFIED; } - - /* - * Initialize fence to be complete as this is expected to be complete - * unless there is a delayed HuC reload in progress. - */ - i915_sw_fence_init(&huc->delayed_load.fence, - sw_fence_dummy_notify); - i915_sw_fence_commit(&huc->delayed_load.fence); - - hrtimer_init(&huc->delayed_load.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - huc->delayed_load.timer.function = huc_delayed_load_timer_callback; } #define HUC_LOAD_MODE_STRING(x) (x ? "GSC" : "legacy") @@ -333,8 +355,7 @@ void intel_huc_fini(struct intel_huc *huc) * the fence is initialized in init_early, so we need to clean it up * even if HuC loading is off. */ - delayed_huc_load_complete(huc); - i915_sw_fence_fini(&huc->delayed_load.fence); + delayed_huc_load_fini(huc); if (intel_uc_fw_is_loadable(&huc->fw)) intel_uc_fw_fini(&huc->fw); -- cgit v1.2.3 From 03b713d029bd17a1ed426590609af79843db95e2 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 28 Nov 2022 15:30:10 -0800 Subject: drm/i915/gt: Correct kerneldoc for intel_gt_mcr_wait_for_reg() The kerneldoc function name was not updated when this function was converted to a non-fw form. Fixes: 192bb40f030a ("drm/i915/gt: Manage uncore->lock while waiting on MCR register") Reported-by: kernel test robot Signed-off-by: Matt Roper Reviewed-by: Balasubramani Vivekanandan Link: https://patchwork.freedesktop.org/patch/msgid/20221128233014.4000136-2-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c index d9a8ff9e5e57..ea86c1ab5dc5 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c @@ -702,7 +702,7 @@ void intel_gt_mcr_get_ss_steering(struct intel_gt *gt, unsigned int dss, } /** - * intel_gt_mcr_wait_for_reg_fw - wait until MCR register matches expected state + * intel_gt_mcr_wait_for_reg - wait until MCR register matches expected state * @gt: GT structure * @reg: the register to read * @mask: mask to apply to register value -- cgit v1.2.3 From 8d9f7d25d50ba55f6b3463d0b8085f62efc39ec4 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 28 Nov 2022 15:30:11 -0800 Subject: drm/i915/gt: Pass gt rather than uncore to lowest-level reads/writes Passing the GT rather than uncore to the lowest level MCR read and write functions will make it easier to introduce dedicated MCR locking in a following patch. Signed-off-by: Matt Roper Reviewed-by: Balasubramani Vivekanandan Link: https://patchwork.freedesktop.org/patch/msgid/20221128233014.4000136-3-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c index ea86c1ab5dc5..f4484bb18ec9 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c @@ -221,7 +221,7 @@ static i915_reg_t mcr_reg_cast(const i915_mcr_reg_t mcr) /* * rw_with_mcr_steering_fw - Access a register with specific MCR steering - * @uncore: pointer to struct intel_uncore + * @gt: GT to read register from * @reg: register being accessed * @rw_flag: FW_REG_READ for read access or FW_REG_WRITE for write access * @group: group number (documented as "sliceid" on older platforms) @@ -232,10 +232,11 @@ static i915_reg_t mcr_reg_cast(const i915_mcr_reg_t mcr) * * Caller needs to make sure the relevant forcewake wells are up. */ -static u32 rw_with_mcr_steering_fw(struct intel_uncore *uncore, +static u32 rw_with_mcr_steering_fw(struct intel_gt *gt, i915_mcr_reg_t reg, u8 rw_flag, int group, int instance, u32 value) { + struct intel_uncore *uncore = gt->uncore; u32 mcr_mask, mcr_ss, mcr, old_mcr, val = 0; lockdep_assert_held(&uncore->lock); @@ -308,11 +309,12 @@ static u32 rw_with_mcr_steering_fw(struct intel_uncore *uncore, return val; } -static u32 rw_with_mcr_steering(struct intel_uncore *uncore, +static u32 rw_with_mcr_steering(struct intel_gt *gt, i915_mcr_reg_t reg, u8 rw_flag, int group, int instance, u32 value) { + struct intel_uncore *uncore = gt->uncore; enum forcewake_domains fw_domains; u32 val; @@ -325,7 +327,7 @@ static u32 rw_with_mcr_steering(struct intel_uncore *uncore, spin_lock_irq(&uncore->lock); intel_uncore_forcewake_get__locked(uncore, fw_domains); - val = rw_with_mcr_steering_fw(uncore, reg, rw_flag, group, instance, value); + val = rw_with_mcr_steering_fw(gt, reg, rw_flag, group, instance, value); intel_uncore_forcewake_put__locked(uncore, fw_domains); spin_unlock_irq(&uncore->lock); @@ -347,7 +349,7 @@ u32 intel_gt_mcr_read(struct intel_gt *gt, i915_mcr_reg_t reg, int group, int instance) { - return rw_with_mcr_steering(gt->uncore, reg, FW_REG_READ, group, instance, 0); + return rw_with_mcr_steering(gt, reg, FW_REG_READ, group, instance, 0); } /** @@ -364,7 +366,7 @@ u32 intel_gt_mcr_read(struct intel_gt *gt, void intel_gt_mcr_unicast_write(struct intel_gt *gt, i915_mcr_reg_t reg, u32 value, int group, int instance) { - rw_with_mcr_steering(gt->uncore, reg, FW_REG_WRITE, group, instance, value); + rw_with_mcr_steering(gt, reg, FW_REG_WRITE, group, instance, value); } /** @@ -588,7 +590,7 @@ u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_mcr_reg_t reg) for (type = 0; type < NUM_STEERING_TYPES; type++) { if (reg_needs_read_steering(gt, reg, type)) { get_nonterminated_steering(gt, type, &group, &instance); - return rw_with_mcr_steering_fw(gt->uncore, reg, + return rw_with_mcr_steering_fw(gt, reg, FW_REG_READ, group, instance, 0); } @@ -615,7 +617,7 @@ u32 intel_gt_mcr_read_any(struct intel_gt *gt, i915_mcr_reg_t reg) for (type = 0; type < NUM_STEERING_TYPES; type++) { if (reg_needs_read_steering(gt, reg, type)) { get_nonterminated_steering(gt, type, &group, &instance); - return rw_with_mcr_steering(gt->uncore, reg, + return rw_with_mcr_steering(gt, reg, FW_REG_READ, group, instance, 0); } -- cgit v1.2.3 From 4186e2185b4ffc9ce652566d4a4f249484841ff4 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 28 Nov 2022 15:30:12 -0800 Subject: drm/i915/gt: Add dedicated MCR lock We've been overloading uncore->lock to protect access to the MCR steering register. That's not really what uncore->lock is intended for, and it would be better if we didn't need to hold such a high-traffic spinlock for the whole sequence of (apply steering, access MCR register, restore steering). Let's create a dedicated MCR lock to protect the steering control register over this critical section and stop relying on the high-traffic uncore->lock. For now the new lock is a software lock. However some platforms (MTL and beyond) have a hardware-provided locking mechanism that can be used to serialize not only software accesses, but also hardware/firmware accesses as well; support for that hardware level lock will be added in a future patch. v2: - Use irqsave/irqrestore spinlock calls; platforms using execlist submission rather than GuC submission can perform MCR accesses in interrupt context because reset -> errordump happens in a tasklet. Cc: Chris Wilson Cc: Mika Kuoppala Cc: Balasubramani Vivekanandan Signed-off-by: Matt Roper Reviewed-by: Balasubramani Vivekanandan Link: https://patchwork.freedesktop.org/patch/msgid/20221128233014.4000136-4-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_gt.c | 7 ++- drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 79 +++++++++++++++++++++++++++-- drivers/gpu/drm/i915/gt/intel_gt_mcr.h | 2 + drivers/gpu/drm/i915/gt/intel_gt_types.h | 8 +++ drivers/gpu/drm/i915/gt/intel_mocs.c | 3 ++ drivers/gpu/drm/i915/gt/intel_workarounds.c | 12 +++-- 6 files changed, 101 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 9c45afdc6e3d..42b9e2bc7477 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -1087,6 +1087,7 @@ static void mmio_invalidate_full(struct intel_gt *gt) enum intel_engine_id id; const i915_reg_t *regs; unsigned int num = 0; + unsigned long flags; if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { regs = NULL; @@ -1107,7 +1108,8 @@ static void mmio_invalidate_full(struct intel_gt *gt) intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); - spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */ + intel_gt_mcr_lock(gt, &flags); + spin_lock(&uncore->lock); /* serialise invalidate with GT reset */ awake = 0; for_each_engine(engine, gt, id) { @@ -1141,7 +1143,8 @@ static void mmio_invalidate_full(struct intel_gt *gt) IS_ALDERLAKE_P(i915))) intel_uncore_write_fw(uncore, GEN12_OA_TLB_INV_CR, 1); - spin_unlock_irq(&uncore->lock); + spin_unlock(&uncore->lock); + intel_gt_mcr_unlock(gt, flags); for_each_engine_masked(engine, gt, awake, tmp) { struct reg_and_bit rb; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c index f4484bb18ec9..aa070ae57f11 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c @@ -143,6 +143,8 @@ void intel_gt_mcr_init(struct intel_gt *gt) unsigned long fuse; int i; + spin_lock_init(>->mcr_lock); + /* * An mslice is unavailable only if both the meml3 for the slice is * disabled *and* all of the DSS in the slice (quadrant) are disabled. @@ -228,6 +230,7 @@ static i915_reg_t mcr_reg_cast(const i915_mcr_reg_t mcr) * @instance: instance number (documented as "subsliceid" on older platforms) * @value: register value to be written (ignored for read) * + * Context: The caller must hold the MCR lock * Return: 0 for write access. register value for read access. * * Caller needs to make sure the relevant forcewake wells are up. @@ -239,7 +242,7 @@ static u32 rw_with_mcr_steering_fw(struct intel_gt *gt, struct intel_uncore *uncore = gt->uncore; u32 mcr_mask, mcr_ss, mcr, old_mcr, val = 0; - lockdep_assert_held(&uncore->lock); + lockdep_assert_held(>->mcr_lock); if (GRAPHICS_VER_FULL(uncore->i915) >= IP_VER(12, 70)) { /* @@ -316,6 +319,7 @@ static u32 rw_with_mcr_steering(struct intel_gt *gt, { struct intel_uncore *uncore = gt->uncore; enum forcewake_domains fw_domains; + unsigned long flags; u32 val; fw_domains = intel_uncore_forcewake_for_reg(uncore, mcr_reg_cast(reg), @@ -324,17 +328,59 @@ static u32 rw_with_mcr_steering(struct intel_gt *gt, GEN8_MCR_SELECTOR, FW_REG_READ | FW_REG_WRITE); - spin_lock_irq(&uncore->lock); + intel_gt_mcr_lock(gt, &flags); + spin_lock(&uncore->lock); intel_uncore_forcewake_get__locked(uncore, fw_domains); val = rw_with_mcr_steering_fw(gt, reg, rw_flag, group, instance, value); intel_uncore_forcewake_put__locked(uncore, fw_domains); - spin_unlock_irq(&uncore->lock); + spin_unlock(&uncore->lock); + intel_gt_mcr_unlock(gt, flags); return val; } +/** + * intel_gt_mcr_lock - Acquire MCR steering lock + * @gt: GT structure + * @flags: storage to save IRQ flags to + * + * Performs locking to protect the steering for the duration of an MCR + * operation. Depending on the platform, this may be a software lock + * (gt->mcr_lock) or a hardware lock (i.e., a register that synchronizes + * access not only for the driver, but also for external hardware and + * firmware agents). + * + * Context: Takes gt->mcr_lock. uncore->lock should *not* be held when this + * function is called, although it may be acquired after this + * function call. + */ +void intel_gt_mcr_lock(struct intel_gt *gt, unsigned long *flags) +{ + unsigned long __flags; + + lockdep_assert_not_held(>->uncore->lock); + + spin_lock_irqsave(>->mcr_lock, __flags); + + *flags = __flags; +} + +/** + * intel_gt_mcr_unlock - Release MCR steering lock + * @gt: GT structure + * @flags: IRQ flags to restore + * + * Releases the lock acquired by intel_gt_mcr_lock(). + * + * Context: Releases gt->mcr_lock + */ +void intel_gt_mcr_unlock(struct intel_gt *gt, unsigned long flags) +{ + spin_unlock_irqrestore(>->mcr_lock, flags); +} + /** * intel_gt_mcr_read - read a specific instance of an MCR register * @gt: GT structure @@ -342,6 +388,8 @@ static u32 rw_with_mcr_steering(struct intel_gt *gt, * @group: the MCR group * @instance: the MCR instance * + * Context: Takes and releases gt->mcr_lock + * * Returns the value read from an MCR register after steering toward a specific * group/instance. */ @@ -362,6 +410,8 @@ u32 intel_gt_mcr_read(struct intel_gt *gt, * * Write an MCR register in unicast mode after steering toward a specific * group/instance. + * + * Context: Calls a function that takes and releases gt->mcr_lock */ void intel_gt_mcr_unicast_write(struct intel_gt *gt, i915_mcr_reg_t reg, u32 value, int group, int instance) @@ -376,10 +426,16 @@ void intel_gt_mcr_unicast_write(struct intel_gt *gt, i915_mcr_reg_t reg, u32 val * @value: value to write * * Write an MCR register in multicast mode to update all instances. + * + * Context: Takes and releases gt->mcr_lock */ void intel_gt_mcr_multicast_write(struct intel_gt *gt, i915_mcr_reg_t reg, u32 value) { + unsigned long flags; + + intel_gt_mcr_lock(gt, &flags); + /* * Ensure we have multicast behavior, just in case some non-i915 agent * left the hardware in unicast mode. @@ -388,6 +444,8 @@ void intel_gt_mcr_multicast_write(struct intel_gt *gt, intel_uncore_write_fw(gt->uncore, MTL_MCR_SELECTOR, GEN11_MCR_MULTICAST); intel_uncore_write(gt->uncore, mcr_reg_cast(reg), value); + + intel_gt_mcr_unlock(gt, flags); } /** @@ -400,9 +458,13 @@ void intel_gt_mcr_multicast_write(struct intel_gt *gt, * function assumes the caller is already holding any necessary forcewake * domains; use intel_gt_mcr_multicast_write() in cases where forcewake should * be obtained automatically. + * + * Context: The caller must hold gt->mcr_lock. */ void intel_gt_mcr_multicast_write_fw(struct intel_gt *gt, i915_mcr_reg_t reg, u32 value) { + lockdep_assert_held(>->mcr_lock); + /* * Ensure we have multicast behavior, just in case some non-i915 agent * left the hardware in unicast mode. @@ -429,6 +491,8 @@ void intel_gt_mcr_multicast_write_fw(struct intel_gt *gt, i915_mcr_reg_t reg, u3 * domains; use intel_gt_mcr_multicast_rmw() in cases where forcewake should * be obtained automatically. * + * Context: Calls functions that take and release gt->mcr_lock + * * Returns the old (unmodified) value read. */ u32 intel_gt_mcr_multicast_rmw(struct intel_gt *gt, i915_mcr_reg_t reg, @@ -580,6 +644,8 @@ void intel_gt_mcr_get_nonterminated_steering(struct intel_gt *gt, * domains; use intel_gt_mcr_read_any() in cases where forcewake should be * obtained automatically. * + * Context: The caller must hold gt->mcr_lock. + * * Returns the value from a non-terminated instance of @reg. */ u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_mcr_reg_t reg) @@ -587,6 +653,8 @@ u32 intel_gt_mcr_read_any_fw