From 77c296966e866a795742a46fc52a218771894867 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 13 Nov 2020 13:25:10 +0000 Subject: drm/i915: Avoid memory leak with more than 16 workarounds on a list I forgot to free the old list when growing past 16 entries. Luckily, as much as I checked, none of the current platforms has more than 16 workarounds on a single list. Signed-off-by: Tvrtko Ursulin Fixes: 452420d22d5b ("drm/i915: Fuse per-context workaround handling with the common framework") Reported-by: Chris Wilson Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20201113132510.2298483-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index fed9503a7c4e..adc9a8ea410a 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -131,8 +131,10 @@ static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa) return; } - if (wal->list) + if (wal->list) { memcpy(list, wal->list, sizeof(*wa) * wal->count); + kfree(wal->list); + } wal->list = list; } -- cgit v1.2.3 From 01d708840c26c9532579677eaca942363a009fd5 Mon Sep 17 00:00:00 2001 From: Zhang Xiaoxu Date: Mon, 16 Nov 2020 09:41:12 -0500 Subject: drm/i915/selftests: Fix wrong return value of perf_series_engines() If intel context create failed, the perf_series_engines() will return 0 rather than error, because we doesn't initialize the return value. Fixes: cbfd3a0c5a55 ("drm/i915/selftests: Add request throughput measurement to perf") Reported-by: Hulk Robot Signed-off-by: Zhang Xiaoxu Reviewed-by: Mika Kuoppala Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20201116144112.3673011-1-zhangxiaoxu5@huawei.com --- drivers/gpu/drm/i915/selftests/i915_request.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 64bbb8288249..480b3da4d8a6 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -2467,8 +2467,10 @@ static int perf_series_engines(void *arg) struct intel_context *ce; ce = intel_context_create(engine); - if (IS_ERR(ce)) + if (IS_ERR(ce)) { + err = PTR_ERR(ce); goto out; + } err = intel_context_pin(ce); if (err) { -- cgit v1.2.3 From 19384452052a1e0525e663bfbdd62ac1399bb647 Mon Sep 17 00:00:00 2001 From: Zhang Xiaoxu Date: Mon, 16 Nov 2020 09:35:40 -0500 Subject: drm/i915/selftests: Fix wrong return value of perf_request_latency() If intel context create failed, the perf_request_latency() will return 0 rather than error, because we doesn't initialize the return value. Fixes: 25c26f18ea79 ("drm/i915/selftests: Measure dispatch latency") Reported-by: Hulk Robot Signed-off-by: Zhang Xiaoxu Reviewed-by: Mika Kuoppala Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20201116143540.3648870-1-zhangxiaoxu5@huawei.com --- drivers/gpu/drm/i915/selftests/i915_request.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 480b3da4d8a6..e424a6d1a68c 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -2293,8 +2293,10 @@ static int perf_request_latency(void *arg) struct intel_context *ce; ce = intel_context_create(engine); - if (IS_ERR(ce)) + if (IS_ERR(ce)) { + err = PTR_ERR(ce); goto out; + } err = intel_context_pin(ce); if (err) { -- cgit v1.2.3 From ac54c826cdd6858dfe0246fc3f195f5705675601 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 26 Oct 2020 21:32:28 -0700 Subject: drm/i915/dg1: make Wa_22010271021 permanent Just like for rkl and tgl, this should be permanent as well for dg1 instead just for A0. The commit making it permanent for those platforms ended up "racing" with the commit adding the DG1 WAs, so now fix that up. v2: Add "tgl,dg1" to WA comment (Matt) Cc: Swathi Dhanavanthri Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20201027043228.696518-3-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index adc9a8ea410a..a82554baa6ac 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1770,6 +1770,14 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) */ wa_write_or(wal, GEN7_FF_THREAD_MODE, GEN12_FF_TESSELATION_DOP_GATE_DISABLE); + + /* + * Wa_1606700617:tgl,dg1 + * Wa_22010271021:tgl,rkl,dg1 + */ + wa_masked_en(wal, + GEN9_CS_DEBUG_MODE1, + FF_DOP_CLOCK_GATE_DISABLE); } if (IS_DG1_REVID(i915, DG1_REVID_A0, DG1_REVID_A0) || @@ -1798,14 +1806,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN6_RC_SLEEP_PSMI_CONTROL, GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE | GEN8_RC_SEMA_IDLE_MSG_DISABLE); - - /* - * Wa_1606700617:tgl - * Wa_22010271021:tgl,rkl - */ - wa_masked_en(wal, - GEN9_CS_DEBUG_MODE1, - FF_DOP_CLOCK_GATE_DISABLE); } if (IS_GEN(i915, 12)) { -- cgit v1.2.3 From d33fcd798cb71268a48f5a26a8ab7ab0ddd51955 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 17 Nov 2020 11:30:39 +0000 Subject: drm/i915/gt: Ignore dt==0 for reporting underflows The presumption was that some time would always elapse between recording the start and the finish of a context switch. This turns out to be a regular occurrence and emitting a debug statement superfluous. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20201117113103.21480-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 8a51c1c3a091..52b84474f93a 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1307,7 +1307,7 @@ static void reset_active(struct i915_request *rq, static void st_update_runtime_underflow(struct intel_context *ce, s32 dt) { #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) - ce->runtime.num_underflow += dt < 0; + ce->runtime.num_underflow++; ce->runtime.max_underflow = max_t(u32, ce->runtime.max_underflow, -dt); #endif } @@ -1324,7 +1324,7 @@ static void intel_context_update_runtime(struct intel_context *ce) ce->runtime.last = intel_context_get_runtime(ce); dt = ce->runtime.last - old; - if (unlikely(dt <= 0)) { + if (unlikely(dt < 0)) { CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n", old, ce->runtime.last, dt); st_update_runtime_underflow(ce, dt); -- cgit v1.2.3 From 45e50f48b7907e650cfbbc7879abfe3a0c419c73 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 18 Nov 2020 13:38:39 +0000 Subject: drm/i915/gt: Remember to free the virtual breadcrumbs Since we allocate some breadcrumbs for the virtual engine, and the virtual engine has a custom destructor, we also need to free the breadcrumbs after use. Fixes: b3786b29379c ("drm/i915/gt: Distinguish the virtual breadcrumbs from the irq breadcrumbs") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20201118133839.1783-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 52b84474f93a..f7eca93f04bc 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -5512,6 +5512,7 @@ static void virtual_context_destroy(struct kref *kref) __execlists_context_fini(&ve->context); intel_context_fini(&ve->context); + intel_breadcrumbs_free(ve->base.breadcrumbs); intel_engine_free_request_pool(&ve->base); kfree(ve->bonds); -- cgit v1.2.3 From dac67c2d338c8ff2f5abb9a2210bfc0264c025e5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 3 Nov 2020 20:43:07 +0000 Subject: drm/i915/gvt: Remove incorrect kerneldoc marking Just a normal comment, not a kerneldoc function description. drivers/gpu/drm/i915/gvt/handlers.c:1666: warning: Function parameter or member 'vgpu' not described in 'bxt_ppat_low_write' drivers/gpu/drm/i915/gvt/handlers.c:1666: warning: Function parameter or member 'offset' not described in 'bxt_ppat_low_write' drivers/gpu/drm/i915/gvt/handlers.c:1666: warning: Function parameter or member 'p_data' not described in 'bxt_ppat_low_write' drivers/gpu/drm/i915/gvt/handlers.c:1666: warning: Function parameter or member 'bytes' not described in 'bxt_ppat_low_write' Signed-off-by: Chris Wilson Acked-by: Zhenyu Wang Link: https://patchwork.freedesktop.org/patch/msgid/20201103204307.15723-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gvt/handlers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index ce93079cf933..4ddc9c847470 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1651,7 +1651,7 @@ static int edp_psr_imr_iir_write(struct intel_vgpu *vgpu, return 0; } -/** +/* * FixMe: * If guest fills non-priv batch buffer on ApolloLake/Broxton as Mesa i965 did: * 717e7539124d (i965: Use a WC map and memcpy for the batch instead of pwrite.) -- cgit v1.2.3 From 14cb9a7763622460a904c50b8adbf69a83d6cab5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 19 Nov 2020 16:56:11 +0000 Subject: drm/i915/gt: Include semaphore status in print_request() When pretty-printing the requests for debug, also show the status of any semaphore waits as part of its runnable status. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20201119165616.10834-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 0b31670343f5..1ed84ee8ce41 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1321,6 +1321,7 @@ static void print_request(struct drm_printer *m, rq->fence.context, rq->fence.seqno, i915_request_completed(rq) ? "!" : i915_request_started(rq) ? "*" : + !i915_sw_fence_signaled(&rq->semaphore) ? "&" : "", test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags) ? "+" : -- cgit v1.2.3 From 1f0e785a9cc09b430d0fbe4e9ac438d43c245815 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 19 Nov 2020 16:56:12 +0000 Subject: drm/i915: Lift i915_request_show() Extract i915_request_show for reuse in other request chain pretty printers. For a bonus point, quietly change the seqno format from %llx to %lld to match everywhere else. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20201119165616.10834-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 47 +++---------------------------- drivers/gpu/drm/i915/gt/intel_lrc.c | 2 +- drivers/gpu/drm/i915/gt/intel_lrc.h | 2 +- drivers/gpu/drm/i915/i915_request.c | 39 +++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_request.h | 5 ++++ 5 files changed, 50 insertions(+), 45 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 1ed84ee8ce41..c3bb2e9546e6 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1294,45 +1294,6 @@ bool intel_engine_can_store_dword(struct intel_engine_cs *engine) } } -static int print_sched_attr(const struct i915_sched_attr *attr, - char *buf, int x, int len) -{ - if (attr->priority == I915_PRIORITY_INVALID) - return x; - - x += snprintf(buf + x, len - x, - " prio=%d", attr->priority); - - return x; -} - -static void print_request(struct drm_printer *m, - struct i915_request *rq, - const char *prefix) -{ - const char *name = rq->fence.ops->get_timeline_name(&rq->fence); - char buf[80] = ""; - int x = 0; - - x = print_sched_attr(&rq->sched.attr, buf, x, sizeof(buf)); - - drm_printf(m, "%s %llx:%llx%s%s %s @ %dms: %s\n", - prefix, - rq->fence.context, rq->fence.seqno, - i915_request_completed(rq) ? "!" : - i915_request_started(rq) ? "*" : - !i915_sw_fence_signaled(&rq->semaphore) ? "&" : - "", - test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, - &rq->fence.flags) ? "+" : - test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, - &rq->fence.flags) ? "-" : - "", - buf, - jiffies_to_msecs(jiffies - rq->emitted_jiffies), - name); -} - static struct intel_timeline *get_timeline(struct i915_request *rq) { struct intel_timeline *tl; @@ -1530,7 +1491,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, intel_context_is_banned(rq->context) ? "*" : ""); len += print_ring(hdr + len, sizeof(hdr) - len, rq); scnprintf(hdr + len, sizeof(hdr) - len, "rq: "); - print_request(m, rq, hdr); + i915_request_show(m, rq, hdr); } for (port = execlists->pending; (rq = *port); port++) { char hdr[160]; @@ -1544,7 +1505,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, intel_context_is_banned(rq->context) ? "*" : ""); len += print_ring(hdr + len, sizeof(hdr) - len, rq); scnprintf(hdr + len, sizeof(hdr) - len, "rq: "); - print_request(m, rq, hdr); + i915_request_show(m, rq, hdr); } rcu_read_unlock(); execlists_active_unlock_bh(execlists); @@ -1688,7 +1649,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, if (rq) { struct intel_timeline *tl = get_timeline(rq); - print_request(m, rq, "\t\tactive "); + i915_request_show(m, rq, "\t\tactive "); drm_printf(m, "\t\tring->start: 0x%08x\n", i915_ggtt_offset(rq->ring->vma)); @@ -1726,7 +1687,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, drm_printf(m, "\tDevice is asleep; skipping register dump\n"); } - intel_execlists_show_requests(engine, m, print_request, 8); + intel_execlists_show_requests(engine, m, i915_request_show, 8); drm_printf(m, "HWSP:\n"); hexdump(m, engine->status_page.addr, PAGE_SIZE); diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index f7eca93f04bc..b6ab1161942a 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -5981,7 +5981,7 @@ int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine, void intel_execlists_show_requests(struct intel_engine_cs *engine, struct drm_printer *m, void (*show_request)(struct drm_printer *m, - struct i915_request *rq, + const struct i915_request *rq, const char *prefix), unsigned int max) { diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h index c2d287f25497..32e6e204f544 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h @@ -106,7 +106,7 @@ void intel_lr_context_reset(struct intel_engine_cs *engine, void intel_execlists_show_requests(struct intel_engine_cs *engine, struct drm_printer *m, void (*show_request)(struct drm_printer *m, - struct i915_request *rq, + const struct i915_request *rq, const char *prefix), unsigned int max); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 0e813819b041..673991718ae6 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1855,6 +1855,45 @@ out: return timeout; } +static int print_sched_attr(const struct i915_sched_attr *attr, + char *buf, int x, int len) +{ + if (attr->priority == I915_PRIORITY_INVALID) + return x; + + x += snprintf(buf + x, len - x, + " prio=%d", attr->priority); + + return x; +} + +void i915_request_show(struct drm_printer *m, + const struct i915_request *rq, + const char *prefix) +{ + const char *name = rq->fence.ops->get_timeline_name((struct dma_fence *)&rq->fence); + char buf[80] = ""; + int x = 0; + + x = print_sched_attr(&rq->sched.attr, buf, x, sizeof(buf)); + + drm_printf(m, "%s %llx:%lld%s%s %s @ %dms: %s\n", + prefix, + rq->fence.context, rq->fence.seqno, + i915_request_completed(rq) ? "!" : + i915_request_started(rq) ? "*" : + !i915_sw_fence_signaled(&rq->semaphore) ? "&" : + "", + test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &rq->fence.flags) ? "+" : + test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, + &rq->fence.flags) ? "-" : + "", + buf, + jiffies_to_msecs(jiffies - rq->emitted_jiffies), + name); +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_request.c" #include "selftests/i915_request.c" diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 16b721080195..09609071b725 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -43,6 +43,7 @@ struct drm_file; struct drm_i915_gem_object; +struct drm_printer; struct i915_request; struct i915_capture_list { @@ -369,6 +370,10 @@ long i915_request_wait(struct i915_request *rq, #define I915_WAIT_PRIORITY BIT(1) /* small priority bump for the request */ #define I915_WAIT_ALL BIT(2) /* used by i915_gem_object_wait() */ +void i915_request_show(struct drm_printer *m, + const struct i915_request *rq, + const char *prefix); + static inline bool i915_request_signaled(const struct i915_request *rq) { /* The request may live longer than its HWSP, so check flags first! */ -- cgit v1.2.3 From 562675d09a351a81e0dfc9a9d7df0f5f4f2fb6a9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 19 Nov 2020 16:56:13 +0000 Subject: drm/i915/gt: Update request status flags for debug pretty-printer We plan to expand upon the number of available statuses for when we pretty-print the requests along the timelines, and so need a new set of flags. We have settled upon: Unready [U] - initial status after being submitted, the request is not ready for execution as it is waiting for external fences Ready [R] - all fences the request was waiting on have been signaled, and the request is now ready for execution and will be in a backend queue - a ready request may still need to wait on semaphores [internal fences] Ready/virtual [V] - same as ready, but queued over multiple backends Executing [E] - the request has been transferred from the backend queue and submitted for execution on HW - a completed request may still be regarded as executing, its status may not be updated until it is retired and removed from the lists Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20201119165616.10834-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 6 +-- drivers/gpu/drm/i915/gt/intel_lrc.c | 15 +++--- drivers/gpu/drm/i915/gt/intel_lrc.h | 3 +- drivers/gpu/drm/i915/i915_request.c | 85 ++++++++++++++++++++++++++----- drivers/gpu/drm/i915/i915_request.h | 3 +- 5 files changed, 88 insertions(+), 24 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index c3bb2e9546e6..d4e988b2816a 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1491,7 +1491,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, intel_context_is_banned(rq->context) ? "*" : ""); len += print_ring(hdr + len, sizeof(hdr) - len, rq); scnprintf(hdr + len, sizeof(hdr) - len, "rq: "); - i915_request_show(m, rq, hdr); + i915_request_show(m, rq, hdr, 0); } for (port = execlists->pending; (rq = *port); port++) { char hdr[160]; @@ -1505,7 +1505,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, intel_context_is_banned(rq->context) ? "*" : ""); len += print_ring(hdr + len, sizeof(hdr) - len, rq); scnprintf(hdr + len, sizeof(hdr) - len, "rq: "); - i915_request_show(m, rq, hdr); + i915_request_show(m, rq, hdr, 0); } rcu_read_unlock(); execlists_active_unlock_bh(execlists); @@ -1649,7 +1649,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, if (rq) { struct intel_timeline *tl = get_timeline(rq); - i915_request_show(m, rq, "\t\tactive "); + i915_request_show(m, rq, "\t\tactive ", 0); drm_printf(m, "\t\tring->start: 0x%08x\n", i915_ggtt_offset(rq->ring->vma)); diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index b6ab1161942a..5257f3c71366 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -5982,7 +5982,8 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, struct drm_printer *m, void (*show_request)(struct drm_printer *m, const struct i915_request *rq, - const char *prefix), + const char *prefix, + int indent), unsigned int max) { const struct intel_engine_execlists *execlists = &engine->execlists; @@ -5997,7 +5998,7 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, count = 0; list_for_each_entry(rq, &engine->active.requests, sched.link) { if (count++ < max - 1) - show_request(m, rq, "\t\tE "); + show_request(m, rq, "\t\t", 0); else last = rq; } @@ -6007,7 +6008,7 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, "\t\t...skipping %d executing requests...\n", count - max); } - show_request(m, last, "\t\tE "); + show_request(m, last, "\t\t", 0); } if (execlists->switch_priority_hint != INT_MIN) @@ -6025,7 +6026,7 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, priolist_for_each_request(rq, p, i) { if (count++ < max - 1) - show_request(m, rq, "\t\tQ "); + show_request(m, rq, "\t\t", 0); else last = rq; } @@ -6036,7 +6037,7 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, "\t\t...skipping %d queued requests...\n", count - max); } - show_request(m, last, "\t\tQ "); + show_request(m, last, "\t\t", 0); } last = NULL; @@ -6048,7 +6049,7 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, if (rq) { if (count++ < max - 1) - show_request(m, rq, "\t\tV "); + show_request(m, rq, "\t\t", 0); else last = rq; } @@ -6059,7 +6060,7 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, "\t\t...skipping %d virtual requests...\n", count - max); } - show_request(m, last, "\t\tV "); + show_request(m, last, "\t\t", 0); } spin_unlock_irqrestore(&engine->active.lock, flags); diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h index 32e6e204f544..802585a308e9 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h @@ -107,7 +107,8 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, struct drm_printer *m, void (*show_request)(struct drm_printer *m, const struct i915_request *rq, - const char *prefix), + const char *prefix, + int indent), unsigned int max); struct intel_context * diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 673991718ae6..8d7d29c9e375 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1867,28 +1867,89 @@ static int print_sched_attr(const struct i915_sched_attr *attr, return x; } +static char queue_status(const struct i915_request *rq) +{ + if (i915_request_is_active(rq)) + return 'E'; + + if (i915_request_is_ready(rq)) + return intel_engine_is_virtual(rq->engine) ? 'V' : 'R'; + + return 'U'; +} + +static const char *run_status(const struct i915_request *rq) +{ + if (i915_request_completed(rq)) + return "!"; + + if (i915_request_started(rq)) + return "*"; + + if (!i915_sw_fence_signaled(&rq->semaphore)) + return "&"; + + return ""; +} + +static const char *fence_status(const struct i915_request *rq) +{ + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)) + return "+"; + + if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags)) + return "-"; + + return ""; +} + void i915_request_show(struct drm_printer *m, const struct i915_request *rq, - const char *prefix) + const char *prefix, + int indent) { const char *name = rq->fence.ops->get_timeline_name((struct dma_fence *)&rq->fence); char buf[80] = ""; int x = 0; + /* + * The prefix is used to show the queue status, for which we use + * the following flags: + * + * U [Unready] + * - initial status upon being submitted by the user + * + * - the request is not ready for execution as it is waiting + * for external fences + * + * R [Ready] + * - all fences the request was waiting on have been signaled, + * and the request is now ready for execution and will be + * in a backend queue + * + * - a ready request may still need to wait on semaphores + * [internal fences] + * + * V [Ready/virtual] + * - same as ready, but queued over multiple backends + * + * E [Executing] + * - the request has been transferred from the backend queue and + * submitted for execution on HW + * + * - a completed request may still be regarded as executing, its + * status may not be updated until it is retired and removed + * from the lists + */ + x = print_sched_attr(&rq->sched.attr, buf, x, sizeof(buf)); - drm_printf(m, "%s %llx:%lld%s%s %s @ %dms: %s\n", - prefix, + drm_printf(m, "%s%.*s%c %llx:%lld%s%s %s @ %dms: %s\n", + prefix, indent, " ", + queue_status(rq), rq->fence.context, rq->fence.seqno, - i915_request_completed(rq) ? "!" : - i915_request_started(rq) ? "*" : - !i915_sw_fence_signaled(&rq->semaphore) ? "&" : - "", - test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, - &rq->fence.flags) ? "+" : - test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, - &rq->fence.flags) ? "-" : - "", + run_status(rq), + fence_status(rq), buf, jiffies_to_msecs(jiffies - rq->emitted_jiffies), name); diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 09609071b725..8f6173b1c3df 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -372,7 +372,8 @@ long i915_request_wait(struct i915_request *rq, void i915_request_show(struct drm_printer *m, const struct i915_request *rq, - const char *prefix); + const char *prefix, + int indent); static inline bool i915_request_signaled(const struct i915_request *rq) { -- cgit v1.2.3 From 0986317a45df7ff380f1512b53a2f94ab16922b8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 19 Nov 2020 16:56:14 +0000 Subject: drm/i915/gt: Show all active timelines for debugging Include the active timelines for debugfs/i915_engine_info, so that we can see which have unready requests inflight which are not shown otherwise. Suggested-by: Tvrtko Ursulin Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20201119165616.10834-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_timeline.c | 80 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_timeline.h | 9 ++++ drivers/gpu/drm/i915/i915_debugfs.c | 16 ++++--- 3 files changed, 98 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 7ea94d201fe6..512afacd2bdc 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -617,6 +617,86 @@ void intel_gt_fini_timelines(struct intel_gt *gt) GEM_BUG_ON(!list_empty(&timelines->hwsp_free_list)); } +void intel_gt_show_timelines(struct intel_gt *gt, + struct drm_printer *m, + void (*show_request)(struct drm_printer *m, + const struct i915_request *rq, + const char *prefix, + int indent)) +{ + struct intel_gt_timelines *timelines = >->timelines; + struct intel_timeline *tl, *tn; + LIST_HEAD(free); + + spin_lock(&timelines->lock); + list_for_each_entry_safe(tl, tn, &timelines->active_list, link) { + unsigned long count, ready, inflight; + struct i915_request *rq, *rn; + struct dma_fence *fence; + + if (!mutex_trylock(&tl->mutex)) { + drm_printf(m, "Timeline %llx: busy; skipping\n", + tl->fence_context); + continue; + } + + intel_timeline_get(tl); + GEM_BUG_ON(!atomic_read(&tl->active_count)); + atomic_inc(&tl->active_count); /* pin the list element */ + spin_unlock(&timelines->lock); + + count = 0; + ready = 0; + inflight = 0; + list_for_each_entry_safe(rq, rn, &tl->requests, link) { + if (i915_request_completed(rq)) + continue; + + count++; + if (i915_request_is_ready(rq)) + ready++; + if (i915_request_is_active(rq)) + inflight++; + } + + drm_printf(m, "Timeline %llx: { ", tl->fence_context); + drm_printf(m, "count: %lu, ready: %lu, inflight: %lu", + count, ready, inflight); + drm_printf(m, ", seqno: { current: %d, last: %d }", + *tl->hwsp_seqno, tl->seqno); + fence = i915_active_fence_get(&tl->last_request); + if (fence) { + drm_printf(m, ", engine: %s", + to_request(fence)->engine->name); + dma_fence_put(fence); + } + drm_printf(m, " }\n"); + + if (show_request) { + list_for_each_entry_safe(rq, rn, &tl->requests, link) + show_request(m, rq, "", 2); + } + + mutex_unlock(&tl->mutex); + spin_lock(&timelines->lock); + + /* Resume list iteration after reacquiring spinlock */ + list_safe_reset_next(tl, tn, link); + if (atomic_dec_and_test(&tl->active_count)) + list_del(&tl->link); + + /* Defer the final release to after the spinlock */ + if (refcount_dec_and_test(&tl->kref.refcount)) { + GEM_BUG_ON(atomic_read(&tl->active_count)); + list_add(&tl->link, &free); + } + } + spin_unlock(&timelines->lock); + + list_for_each_entry_safe(tl, tn, &free, link) + __intel_timeline_free(&tl->kref); +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "gt/selftests/mock_timeline.c" #include "gt/selftest_timeline.c" diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.h b/drivers/gpu/drm/i915/gt/intel_timeline.h index 9882cd911d8e..634acebd0c4b 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.h +++ b/drivers/gpu/drm/i915/gt/intel_timeline.h @@ -31,6 +31,8 @@ #include "i915_syncmap.h" #include "intel_timeline_types.h" +struct drm_printer; + struct intel_timeline * __intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp, @@ -106,4 +108,11 @@ int intel_timeline_read_hwsp(struct i915_request *from, void intel_gt_init_timelines(struct intel_gt *gt); void intel_gt_fini_timelines(struct intel_gt *gt); +void intel_gt_show_timelines(struct intel_gt *gt, + struct drm_printer *m, + void (*show_request)(struct drm_printer *m, + const struct i915_request *rq, + const char *prefix, + int indent)); + #endif diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 77e76b665098..354b95c438d0 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1306,24 +1306,26 @@ static int i915_runtime_pm_status(struct seq_file *m, void *unused) static int i915_engine_info(struct seq_file *m, void *unused) { - struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct drm_i915_private *i915 = node_to_i915(m->private); struct intel_engine_cs *engine; intel_wakeref_t wakeref; struct drm_printer p; - wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); + wakeref = intel_runtime_pm_get(&i915->runtime_pm); seq_printf(m, "GT awake? %s [%d]\n", - yesno(dev_priv->gt.awake), - atomic_read(&dev_priv->gt.wakeref.count)); + yesno(i915->gt.awake), + atomic_read(&i915->gt.wakeref.count)); seq_printf(m, "CS timestamp frequency: %u Hz\n", - RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_hz); + RUNTIME_INFO(i915)->cs_timestamp_frequency_hz); p = drm_seq_file_printer(m); - for_each_uabi_engine(engine, dev_priv) + for_each_uabi_engine(engine, i915) intel_engine_dump(engine, &p, "%s\n", engine->name); - intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); + intel_gt_show_timelines(&i915->gt, &p, NULL); + + intel_runtime_pm_put(&i915->runtime_pm, wakeref); return 0; } -- cgit v1.2.3 From b5b349b93b0ec9051a98a401a5e105476fd549dd Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 19 Nov 2020 16:56:15 +0000 Subject: drm/i915: Lift waiter/signaler iterators Lift the list iteration defines for traversing the signaler/waiter lists into i915_scheduler.h for reuse. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20201119165616.10834-5-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 10 ---------- drivers/gpu/drm/i915/i915_scheduler_types.h | 10 ++++++++++ 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 5257f3c71366..30759e95da0e 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1836,16 +1836,6 @@ static void virtual_xfer_context(struct virtual_engine *ve, } } -#define for_each_waiter(p__, rq__) \ - list_for_each_entry_lockless(p__, \ - &(rq__)->sched.waiters_list, \ - wait_link) - -#define for_each_signaler(p__, rq__) \ - list_for_each_entry_rcu(p__, \ - &(rq__)->sched.signalers_list, \ - signal_link) - static void defer_request(struct i915_request *rq, struct list_head * const pl) { LIST_HEAD(list); diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h index f72e6c397b08..343ed44d5ed4 100644 --- a/drivers/gpu/drm/i915/i915_scheduler_types.h +++ b/drivers/gpu/drm/i915/i915_scheduler_types.h @@ -81,4 +81,14 @@ struct i915_dependency { #define I915_DEPENDENCY_WEAK BIT(2) }; +#define for_each_waiter(p__, rq__) \ + list_for_each_entry_lockless(p__, \ + &(rq__)->sched.waiters_list, \ + wait_link) + +#define for_each_signaler(p__, rq__) \ + list_for_each_entry_rcu(p__, \ + &(rq__)->sched.signalers_list, \ + signal_link) + #endif /* _I915_SCHEDULER_TYPES_H_ */ -- cgit v1.2.3 From da7ac715d339d53dfb4e6ce325de842e80897814 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 19 Nov 2020 16:56:16 +0000 Subject: drm/i915: Show timeline dependencies for debug Include the signalers each request in the timeline is waiting on, as a means to try and identify the cause of a stall. This can be quite verbose, even as for now we only show each request in the timeline and its immediate antecedents. This generates output like: Timeline 886: { count 1, ready: 0, inflight: 0, seqno: { current: 664, last: 666 }, engine: rcs0 } U 886:29a- prio=0 @ 134ms: gem_exec_parall<4621> U bc1:27a- prio=0 @ 134ms: gem_exec_parall[4917] Timeline 825: { count 1, ready: 0, inflight: 0, seqno: { current: 802, last: 804 }, engine: vcs0 } U 825:324 prio=0 @ 107ms: gem_exec_parall<4518> U b75:140- prio=0 @ 110ms: gem_exec_parall<5486> Timeline b46: { count 1, ready: 0, inflight: 0, seqno: { current: 782, last: 784 }, engine: vcs0 } U b46:310- prio=0 @ 70ms: gem_exec_parall<5428> U c11:170- prio=0 @ 70ms: gem_exec_parall[5501] Timeline 96b: { count 1, ready: 0, inflight: 0, seqno: { current: 632, last: 634 }, engine: vcs0 } U 96b:27a- prio=0 @ 67ms: gem_exec_parall<4878> U b75:19e- prio=0 @ 67ms: gem_exec_parall<5486> Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20201119165616.10834-6-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 3 ++- drivers/gpu/drm/i915/i915_scheduler.c | 28 ++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_scheduler.h | 7 +++++++ 3 files changed, 37 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 354b95c438d0..263074c2c097 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -45,6 +45,7 @@ #include "i915_debugfs.h" #include "i915_debugfs_params.h" #include "i915_irq.h" +#include "i915_scheduler.h" #include "i915_trace.h" #include "intel_pm.h" #include "intel_sideband.h" @@ -1323,7 +1324,7 @@ static int i915_engine_info(struct seq_file *m, void *unused) for_each_uabi_engine(engine, i915) intel_engine_dump(engine, &p, "%s\n", engine->name); - intel_gt_show_timelines(&i915->gt, &p, NULL); + intel_gt_show_timelines(&i915->gt, &p, i915_request_show_with_schedule); intel_runtime_pm_put(&i915->runtime_pm, wakeref); diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index cbb880b10c65..b9cf9931ebd7 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -504,6 +504,34 @@ void i915_sched_node_fini(struct i915_sched_node *node) spin_unlock_irq(&schedule_lock); } +void i915_request_show_with_schedule(struct drm_printer *m, + const struct i915_request *rq, + const char *prefix, + int indent) +{ + struct i915_dependency *dep; + + i915_request_show(m, rq, prefix, indent); + if (i915_request_completed(rq)) + return; + + rcu_read_lock(); + for_each_signaler(dep, rq) { + const struct i915_request *signaler = + node_to_request(dep->signaler); + + /* Dependencies along the same timeline are expected. */ + if (signaler->timeline == rq->timeline) + continue; + + if (i915_request_completed(signaler)) + continue; + + i915_request_show(m, signaler, prefix, indent + 2); + } + rcu_read_unlock(); +} + static void i915_global_scheduler_shrink(void) { kmem_cache_shrink(global.slab_dependencies); diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index 6f0bf00fc569..4501e5ac2637 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -13,6 +13,8 @@ #include "i915_scheduler_types.h" +struct drm_printer; + #define priolist_for_each_request(it, plist, idx) \ for (idx = 0; idx < ARRAY_SIZE((plist)->requests); idx++) \ list_for_each_entry(it, &(plist)->requests[idx], sched.link) @@ -54,4 +56,9 @@ static inline void i915_priolist_free(struct i915_priolist *p) __i915_priolist_free(p); } +void i915_request_show_with_schedule(struct drm_printer *m, + const struct i915_request *rq, + const char *prefix, + int indent); + #endif /* _I915_SCHEDULER_H_ */ -- cgit v1.2.3 From 67dd0b9677e895a60099fc0a4cc275f88cfee5a9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 20 Nov 2020 14:03:12 +0000 Subject: drm/i915/gem: Remove incorrect early dbg print We print out the "logical" context support before we discover whether or not the engines have logical contexts. No one, except Tvrtko, seems to have noticed the error, so the debug message must not be useful to anyone. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20201120140314.24749-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 4fd38101bb56..a6299da64de4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -904,9 +904,6 @@ static void init_contexts(struct i915_gem_contexts *gc) void i915_gem_init__contexts(struct drm_i915_private *i915) { init_contexts(&i915->gem.contexts); - drm_dbg(&i915->drm, "%s context support initialized\n", - DRIVER_CAPS(i915)->has_logical_contexts ? - "logical" : "fake"); } void i915_gem_driver_release__contexts(struct drm_i915_private *i915) -- cgit v1.2.3 From 8005f37ca941d59eda940910185015ed793491d3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 20 Nov 2020 14:03:13 +0000 Subject: drm/i915/selftests: Improve granularity for mocs reset checks Allow us to validate mocs configurations after reset if we have either engine or global reset. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20201120140314.24749-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/selftest_mocs.c | 40 +++++++++++++++++---------------- 1 file changed, 21 insertions(+), 19 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c b/drivers/gpu/drm/i915/gt/selftest_mocs.c index b25eba50c88e..21dcd91cbd62 100644 --- a/drivers/gpu/drm/i915/gt/selftest_mocs.c +++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c @@ -361,29 +361,34 @@ static int active_engine_reset(struct intel_context *ce, static int __live_mocs_reset(struct live_mocs *mocs, struct intel_context *ce) { + struct intel_gt *gt = ce->engine->gt; int err; - err = intel_engine_reset(ce->engine, "mocs"); - if (err) - return err; + if (intel_has_reset_engine(gt)) { + err = intel_engine_reset(ce->engine, "mocs"); + if (err) + return err; - err = check_mocs_engine(mocs, ce); - if (err) - return err; + err = check_mocs_engine(mocs, ce); + if (err) + return err; - err = active_engine_reset(ce, "mocs"); - if (err) - return err; + err = active_engine_reset(ce, "mocs"); + if (err) + return err; - err = check_mocs_engine(mocs, ce); - if (err) - return err; + err = check_mocs_engine(mocs, ce); + if (err) + return err; + } - intel_gt_reset(ce->engine->gt, ce->engine->mask, "mocs"); + if (intel_has_gpu_reset(gt)) { + intel_gt_reset(gt, ce->engine->mask, "mocs"); - err = check_mocs_engine(mocs, ce); - if (err) - return err; + err = check_mocs_engine(mocs, ce); + if (err) + return err; + } return 0; } @@ -398,9 +403,6 @@ static int live_mocs_reset(void *arg) /* Check the mocs setup is retained over per-engine and global resets */ - if (!intel_has_reset_engine(gt)) - return 0; - err = live_mocs_init(&mocs, gt); if (err) return err; -- cgit v1.2.3 From 16cfcb0f3c4bd20b03739a11f5292a3bb413bb24 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 20 Nov 2020 14:03:14 +0000 Subject: drm/i915/selftests: Small tweak to put the termination conditions together If we run out of ring space, or exceed the desired runtime, we wish to stop the subtest. Put these checks together, so that we always keep the requests flushed on completion. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20201120140314.24749-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/selftest_timeline.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c index 2edf2b15885f..e4285d5a0360 100644 --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c @@ -1090,12 +1090,6 @@ static int live_hwsp_read(void *arg) } count++; - if (8 * watcher[1].rq->ring->emit > - 3 * watcher[1].rq->ring->size) { - i915_request_put(rq); - break; - } - /* Flush the timeline before manually wrapping again */ if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE, @@ -1104,9 +1098,14 @@ static int live_hwsp_read(void *arg) i915_request_put(rq); goto out; } - retire_requests(tl); i915_request_put(rq); + + /* Single requests are limited to half a ring at most */ + if (8 * watcher[1].rq->ring->emit > + 3 * watcher[1].rq->ring->size) + break; + } while (!__igt_timeout(end_time, NULL)); WRITE_ONCE(*(u32 *)tl->hwsp_seqno, 0xdeadbeef); -- cgit v1.2.3 From 4ee73792574643b836b0503f06c27c0904c89f0e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 21 Nov 2020 19:03:52 +0000 Subject: drm/i915/gt: Plug IPS into intel_rps_set The old IPS interface did not match the RPS interface that we tried to plug it into (bool vs int return). Once repaired, our minimal selftesting is finally happy! Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20201121190352.15996-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_rps.c | 34 ++++++++++++++++++++++------------ drivers/gpu/drm/i915/gt/selftest_rps.c | 5 ++++- 2 files changed, 26 insertions(+), 13 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 0d88f17799ff..b13e7845d483 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -400,7 +400,7 @@ static unsigned int gen5_invert_freq(struct intel_rps *rps, return val; } -static bool gen5_rps_set(struct intel_rps *rps, u8 val) +static int __gen5_rps_set(struct intel_rps *rps, u8 val) { struct intel_uncore *uncore = rps_to_uncore(rps); u16 rgvswctl; @@ -410,7 +410,7 @@ static bool gen5_rps_set(struct intel_rps *rps, u8 val) rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); if (rgvswctl & MEMCTL_CMD_STS) { DRM_DEBUG("gpu busy, RCS change rejected\n"); - return false; /* still busy with another command */ + return -EBUSY; /* still busy with another command */ } /* Invert the frequency bin into an ips delay */ @@ -426,7 +426,18 @@ static bool gen5_rps_set(struct intel_rps *rps, u8 val) rgvswctl |= MEMCTL_CMD_STS; intel_uncore_write16(uncore, MEMSWCTL, rgvswctl); - return true; + return 0; +} + +static int gen5_rps_set(struct intel_rps *rps, u8 val) +{ + int err; + + spin_lock_irq(&mchdev_lock); + err = __gen5_rps_set(rps, val); + spin_unlock_irq(&mchdev_lock); + + return err; } static unsigned long intel_pxfreq(u32 vidfreq) @@ -557,7 +568,7 @@ static bool gen5_rps_enable(struct intel_rps *rps) "stuck trying to change perf mode\n"); mdelay(1); - gen5_rps_set(rps, rps->cur_freq); + __gen5_rps_set(rps, rps->cur_freq); rps->ips.last_count1 = intel_uncore_read(uncore, DMIEC); rps->ips.last_count1 += intel_uncore_read(uncore, DDREC); @@ -599,7 +610,7 @@ static void gen5_rps_disable(struct intel_rps *rps) intel_uncore_write(uncore, MEMINTRSTS, MEMINT_EVAL_CHG); /* Go back to the starting frequency */ - gen5_rps_set(rps, rps->idle_freq); + __gen5_rps_set(rps, rps->idle_freq); mdelay(1); rgvswctl |= MEMCTL_CMD_STS; intel_uncore_write(uncore, MEMSWCTL, rgvswctl); @@ -797,20 +808,19 @@ static int rps_set(struct intel_rps *rps, u8 val, bool update) struct drm_i915_private *i915 = rps_to_i915(rps); int err; - if (INTEL_GEN(i915) < 6) - return 0; - if (val == rps->last_freq) return 0; if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) err = vlv_rps_set(rps, val); - else + else if (INTEL_GEN(i915) >= 6) err = gen6_rps_set(rps, val); + else + err = gen5_rps_set(rps, val); if (err) return err; - if (update) + if (update && INTEL_GEN(i915) >= 6) gen6_rps_set_thresholds(rps, val); rps->last_freq = val; @@ -1794,7 +1804,7 @@ void gen5_rps_irq_handler(struct intel_rps *rps) rps->min_freq_softlimit, rps->max_freq_softlimit); - if (new_freq != rps->cur_freq && gen5_rps_set(rps, new_freq)) + if (new_freq != rps->cur_freq && !__gen5_rps_set(rps, new_freq)) rps->cur_freq = new_freq; spin_unlock(&mchdev_lock); @@ -2105,7 +2115,7 @@ bool i915_gpu_turbo_disable(void) spin_lock_irq(&mchdev_lock); rps->max_freq_softlimit = rps->min_freq; - ret = gen5_rps_set(&i915->gt.rps, rps->min_freq); + ret = !__gen5_rps_set(&i915->gt.rps, rps->min_freq); spin_unlock_irq(&mchdev_lock); drm_dev_put(&i915->drm); diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c index aa5675ecb5cc..967641fee42a 100644 --- a/drivers/gpu/drm/i915/gt/selftest_rps.c +++ b/drivers/gpu/drm/i915/gt/selftest_rps.c @@ -185,7 +185,10 @@ static u8 rps_set_check(struct intel_rps *rps, u8 freq) { mutex_lock(&rps->lock); GEM_BUG_ON(!intel_rps_is_active(rps)); - intel_rps_set(rps, freq); + if (wait_for(!intel_rps_set(rps, freq), 50)) { + mutex_unlock(&rps->lock); + return 0; + } GEM_BUG_ON(rps->last_freq != freq); mutex_unlock(&rps->lock); -- cgit v1.2.3 From 9d5612ca165a58aacc160465532e7998b9aab270 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 23 Nov 2020 11:37:14 +0000 Subject: drm/i915/gt: Defer enabling the breadcrumb interrupt to after submission Move the register slow register write and readback from out of the critical path for execlists submission and delay it until the following worker, shaving off around 200us. Note that the same signal_irq_work() is allowed to run concurrently on each CPU (but it will only be queued once, once running though it can be requeued and reexecuted) so we have to remember to lock the global interactions as we cannot rely on the signal_irq_work() itself providing the serialisation (in constrast to a tasklet). By pushing the arm/disarm into the central signaling worker we can close the race for disarming the interrupt (and dropping its associated GT wakeref) on parking the engine. If we loose the race, that GT wakeref may be held indefinitely, preventing the machine from sleeping while the GPU is ostensibly idle. v2: Move the self-arming parking of the signal_irq_work to a flush of the irq-work from intel_breadcrumbs_park(). Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2271 Fixes: e23005604b2f ("drm/i915/gt: Hold context/request reference while breadcrumbs are active") Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20201123113717.20500-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 109 ++++++++++++++++++---------- 1 file changed, 70 insertions(+), 39 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index d8b206e53660..8d85683314e1 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -30,18 +30,21 @@ #include "i915_trace.h" #include "intel_breadcrumbs.h" #include "intel_context.h" +#include "intel_engine_pm.h" #include "intel_gt_pm.h" #include "intel_gt_requests.h" -static void irq_enable(struct intel_engine_cs *engine) +static bool irq_enable(struct intel_engine_cs *engine) { if (!engine->irq_enable) - return; + return false; /* Caller disables interrupts */ spin_lock(&engine->gt->irq_lock); engine->irq_enable(engine); spin_unlock(&engine->gt->irq_lock); + + return true; } static void irq_disable(struct intel_engine_cs *engine) @@ -57,12 +60,11 @@ static void irq_disable(struct intel_engine_cs *engine) static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) { - lockdep_assert_held(&b->irq_lock); - - if (!b->irq_engine || b->irq_armed) - return; - - if (!intel_gt_pm_get_if_awake(b->irq_engine->gt)) + /* + * Since we are waiting on a request, the GPU should be busy + * and should have its own rpm reference. + */ + if (GEM_WARN_ON(!intel_gt_pm_get_if_awake(b->irq_engine->gt))) return; /* @@ -73,25 +75,24 @@ static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) */ WRITE_ONCE(b->irq_armed, true); - /* - * Since we are waiting on a request, the GPU should be busy - * and should have its own rpm reference. This is tracked - * by i915->gt.awake, we can forgo holding our own wakref - * for the interrupt as before i915->gt.awake is released (when - * the driver is idle) we disarm the breadcrumbs. - */ - - if (!b->irq_enabled++) - irq_enable(b->irq_engine); + /* Requests may have completed before we could enable the interrupt. */ + if (!b->irq_enabled++ && irq_enable(b->irq_engine)) + irq_work_queue(&b->irq_work); } -static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) +static void intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) { - lockdep_assert_held(&b->irq_lock); - - if (!b->irq_engine || !b->irq_armed) + if (!b->irq_engine) return; + spin_lock(&b->irq_lock); + if (!b->irq_armed) + __intel_breadcrumbs_arm_irq(b); + spin_unlock(&b->irq_lock); +} + +static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) +{ GEM_BUG_ON(!b->irq_enabled); if (!--b->irq_enabled) irq_disable(b->irq_engine); @@ -105,8 +106,6 @@ static void add_signaling_context(struct intel_breadcrumbs *b, { intel_context_get(ce); list_add_tail(&ce->signal_link, &b->signalers); - if (list_is_first(&ce->signal_link, &b->signalers)) - __intel_breadcrumbs_arm_irq(b); } static void remove_signaling_context(struct intel_breadcrumbs *b, @@ -197,7 +196,32 @@ static void signal_irq_work(struct irq_work *work) spin_lock(&b->irq_lock); - if (list_empty(&b->signalers)) + /* + * Keep the irq armed until the interrupt after all listeners are gone. + * + * Enabling/disabling the interrupt is rather costly, roughly a couple + * of hundred microseconds. If we are proactive and enable/disable + * the interrupt around every request that wants a breadcrumb, we + * quickly drown in the extra orders of magnitude of latency imposed + * on request submission. + * + * So we try to be lazy, and keep the interrupts enabled until no + * more listeners appear within a breadcrumb interrupt interval (that + * is until a request completes that no one cares about). The + * observation is that listeners come in batches, and will often + * listen to a bunch of requests in succession. Though note on icl+, + * interrupts are always enabled due to concerns with rc6 being + * dysfunctional with per-engine interrupt masking. + * + * We also try to avoid raising too many interrupts, as they may + * be generated by userspace batches and it is unfortunately rather + * too easy to drown the CPU under a flood of GPU interrupts. Thus + * whenever no one appears to be listening, we turn off the interrupts. + * Fewer interrupts should conserve power -- at the very least, fewer + * interrupt draw less ire from other users of the system and tools + * like powertop. + */ + if (b->irq_armed && list_empty(&b->signalers)) __intel_breadcrumbs_disarm_irq(b); list_splice_init(&b->signaled_requests, &signal); @@ -251,6 +275,9 @@ static void signal_irq_work(struct irq_work *work) i915_request_put(rq); } + + if (!READ_ONCE(b->irq_armed) && !list_empty(&b->signalers)) + intel_breadcrumbs_arm_irq(b); } struct intel_breadcrumbs * @@ -292,21 +319,22 @@ void intel_breadcrumbs_reset(struct intel_breadcrumbs *b) void intel_breadcrumbs_park(struct intel_breadcrumbs *b) { - unsigned long flags; - - if (!READ_ONCE(b->irq_armed)) - return; - - spin_lock_irqsave(&b->irq_lock, flags); - __intel_breadcrumbs_disarm_irq(b); - spin_unlock_irqrestore(&b->irq_lock, flags); - - if (!list_empty(&b->signalers)) - irq_work_queue(&b->irq_work); + /* Kick the work once more to drain the signalers */ + irq_work_sync(&b->irq_work); + while (unlikely(READ_ONCE(b->irq_armed))) { + local_irq_disable(); + signal_irq_work(&b->irq_work); + local_irq_enable(); + cond_resched(); + } + GEM_BUG_ON(!list_empty(&b->signalers)); } void intel_breadcrumbs_free(struct intel_breadcrumbs *b) { + irq_work_sync(&b->irq_work); + GEM_BUG_ON(!list_empty(&b->signalers)); + GEM_BUG_ON(b->irq_armed); kfree(b); } @@ -362,9 +390,12 @@ static void insert_breadcrumb(struct i915_request *rq, GEM_BUG_ON(!check_signal_order(ce, rq)); set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); - /* Check after attaching to irq, interrupt may have already fired. */ - if (__request_completed(rq)) - irq_work_queue(&b->irq_work); + /* + * Defer enabling the interrupt to after HW submission and recheck + * the request as it may have completed and raised the interrupt as + * we were attaching it into the lists. + */ + irq_work_queue(&b->irq_work); } bool i915_request_enable_breadcrumb(struct i915_request *rq) -- cgit v1.2.3 From 6cfe66eb71b638968350b5f0fff051fd25eb75fb Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 23 Nov 2020 11:37:15 +0000 Subject: drm/i915/gt: Track signaled breadcrumbs outside of the breadcrumb spinlock Make b->signaled_requests a lockless-list so that we can manipulate it outside of the b->irq_lock. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20201123113717.20500-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 34 +++++++++++++++-------- drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h | 2 +- drivers/gpu/drm/i915/i915_request.h | 6 +++- 3 files changed, 28 insertions(+), 14 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index 8d85683314e1..43cfabb102ea 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -173,26 +173,34 @@ static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl) intel_engine_add_retire(b->irq_engine, tl); } -static bool __signal_request(struct i915_request *rq, struct list_head *signals) +static bool __signal_request(struct i915_request *rq) { - clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); - if (!__dma_fence_signal(&rq->fence)) { i915_request_put(rq); return false; } - list_add_tail(&rq->signal_link, signals); return true; } +static struct llist_node * +slist_add(struct llist_node *node, struct llist_node *head) +{ + node->next = head; + return node; +} + static void signal_irq_work(struct irq_work *work) { struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work); const ktime_t timestamp = ktime_get(); + struct llist_node *signal, *sn; struct intel_context *ce, *cn; struct list_head *pos, *next; - LIST_HEAD(signal); + + signal = NULL; + if (unlikely(!llist_empty(&b->signaled_requests))) + signal = llist_del_all(&b->signaled_requests); spin_lock(&b->irq_lock); @@ -224,8 +232,6 @@ static void signal_irq_work(struct irq_work *work) if (b->irq_armed && list_empty(&b->signalers)) __intel_breadcrumbs_disarm_irq(b); - list_splice_init(&b->signaled_requests, &signal); - list_for_each_entry_safe(ce, cn, &b->signalers, signal_link) { GEM_BUG_ON(list_empty(&ce->signals)); @@ -242,7 +248,10 @@ static void signal_irq_work(struct irq_work *work) * spinlock as the callback chain may end up adding * more signalers to the same context or engine. */ - __signal_request(rq, &signal); + clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); + if (__signal_request(rq)) + /* We own signal_node now, xfer to local list */ + signal = slist_add(&rq->signal_node, signal); } /* @@ -262,9 +271,9 @@ static void signal_irq_work(struct irq_work *work) spin_unlock(&b->irq_lock); - list_for_each_safe(pos, next, &signal) { + llist_for_each_safe(signal, sn, signal) { struct i915_request *rq = - list_entry(pos, typeof(*rq), signal_link); + llist_entry(signal, typeof(*rq), signal_node); struct list_head cb_list; spin_lock(&rq->lock); @@ -291,7 +300,7 @@ intel_breadcrumbs_create(struct intel_engine_cs *irq_engine) spin_lock_init(&b->irq_lock); INIT_LIST_HEAD(&b->signalers); - INIT_LIST_HEAD(&b->signaled_requests); + init_llist_head(&b->signaled_requests); init_irq_work(&b->irq_work, signal_irq_work); @@ -355,7 +364,8 @@ static void insert_breadcrumb(struct i915_request *rq, * its signal completion. */ if (__request_completed(rq)) { - if (__signal_request(rq, &b->signaled_requests)) + if (__signal_request(rq) && + llist_add(&rq->signal_node, &b->signaled_requests)) irq_work_queue(&b->irq_work); return; } diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h index 8e53b9942695..3fa19820b37a 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h @@ -35,7 +35,7 @@ struct intel_breadcrumbs { struct intel_engine_cs *irq_engine; struct list_head signalers; - struct list_head signaled_requests; + struct llist_head signaled_requests; struct irq_work irq_work; /* for use from inside irq_lock */ diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 8f6173b1c3df..b222f7b46e9c 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -177,7 +177,11 @@ struct i915_request { struct intel_context *context; struct intel_ring *ring; struct intel_timeline __rcu *timeline; - struct list_head signal_link; + + union { + struct list_head signal_link; + struct llist_node signal_node; + }; /* * The rcu epoch of when this request was allocated. Used to judiciously -- cgit v1.2.3 From 3aef910d26ef48b8a79d48b006dc04383b86dd31 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 23 Nov 2020 11:37:16 +0000 Subject: drm/i915/gt: Don't cancel the interrupt shadow too early We currently want to keep the interrupt enabled until the interrupt after which we have no more work to do. This heuristic was broken by us kicking the irq-work on adding a completed request without attaching a signaler -- hence it appearing to the irq-worker that an interrupt had fired when we were idle. Fixes: 2854d866327a ("drm/i915/gt: Replace intel_engine_transfer_stale