aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2026-03-07 05:29:50 -1000
committerTejun Heo <tj@kernel.org>2026-03-07 05:29:50 -1000
commit84b1a0ea0b7c23dec240783a592e480780efe459 (patch)
treefb54f1eda2215bcc252843ba7e2b940b0a0bfa49 /kernel
parent35250720d6ed1e83e0d1e12b7e8bf7b8316d7d58 (diff)
sched_ext: Implement scx_bpf_dsq_reenq() for user DSQs
scx_bpf_dsq_reenq() currently only supports local DSQs. Extend it to support user-defined DSQs by adding a deferred re-enqueue mechanism similar to the local DSQ handling. Add per-cpu deferred_reenq_user_node/flags to scx_dsq_pcpu and deferred_reenq_users list to scx_rq. When scx_bpf_dsq_reenq() is called on a user DSQ, the DSQ's per-cpu node is added to the current rq's deferred list. process_deferred_reenq_users() then iterates the DSQ using the cursor helpers and re-enqueues each task. Signed-off-by: Tejun Heo <tj@kernel.org> Reviewed-by: Andrea Righi <arighi@nvidia.com>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched/ext.c128
-rw-r--r--kernel/sched/sched.h1
2 files changed, 129 insertions, 0 deletions
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index f51e4c20cd95..805c6689c99a 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -1181,6 +1181,18 @@ static void schedule_dsq_reenq(struct scx_sched *sch, struct scx_dispatch_q *dsq
}
schedule_deferred(rq);
+ } else if (!(dsq->id & SCX_DSQ_FLAG_BUILTIN)) {
+ struct rq *rq = this_rq();
+ struct scx_dsq_pcpu *dsq_pcpu = per_cpu_ptr(dsq->pcpu, cpu_of(rq));
+ struct scx_deferred_reenq_user *dru = &dsq_pcpu->deferred_reenq_user;
+
+ scoped_guard (raw_spinlock_irqsave, &rq->scx.deferred_reenq_lock) {
+ if (list_empty(&dru->node))
+ list_move_tail(&dru->node, &rq->scx.deferred_reenq_users);
+ dru->flags |= reenq_flags;
+ }
+
+ schedule_deferred(rq);
} else {
scx_error(sch, "DSQ 0x%llx not allowed for reenq", dsq->id);
}
@@ -3784,12 +3796,108 @@ static void process_deferred_reenq_locals(struct rq *rq)
}
}
+static void reenq_user(struct rq *rq, struct scx_dispatch_q *dsq, u64 reenq_flags)
+{
+ struct rq *locked_rq = rq;
+ struct scx_sched *sch = dsq->sched;
+ struct scx_dsq_list_node cursor = INIT_DSQ_LIST_CURSOR(cursor, dsq, 0);
+ struct task_struct *p;
+ s32 nr_enqueued = 0;
+
+ lockdep_assert_rq_held(rq);
+
+ raw_spin_lock(&dsq->lock);
+
+ while (likely(!READ_ONCE(sch->bypass_depth))) {
+ struct rq *task_rq;
+
+ p = nldsq_cursor_next_task(&cursor, dsq);
+ if (!p)
+ break;
+
+ if (!task_should_reenq(p, reenq_flags))
+ continue;
+
+ task_rq = task_rq(p);
+
+ if (locked_rq != task_rq) {
+ if (locked_rq)
+ raw_spin_rq_unlock(locked_rq);
+ if (unlikely(!raw_spin_rq_trylock(task_rq))) {
+ raw_spin_unlock(&dsq->lock);
+ raw_spin_rq_lock(task_rq);
+ raw_spin_lock(&dsq->lock);
+ }
+ locked_rq = task_rq;
+
+ /* did we lose @p while switching locks? */
+ if (nldsq_cursor_lost_task(&cursor, task_rq, dsq, p))
+ continue;
+ }
+
+ /* @p is on @dsq, its rq and @dsq are locked */
+ dispatch_dequeue_locked(p, dsq);
+ raw_spin_unlock(&dsq->lock);
+ do_enqueue_task(task_rq, p, SCX_ENQ_REENQ, -1);
+
+ if (!(++nr_enqueued % SCX_TASK_ITER_BATCH)) {
+ raw_spin_rq_unlock(locked_rq);
+ locked_rq = NULL;
+ cpu_relax();
+ }
+
+ raw_spin_lock(&dsq->lock);
+ }
+
+ list_del_init(&cursor.node);
+ raw_spin_unlock(&dsq->lock);
+
+ if (locked_rq != rq) {
+ if (locked_rq)
+ raw_spin_rq_unlock(locked_rq);
+ raw_spin_rq_lock(rq);
+ }
+}
+
+static void process_deferred_reenq_users(struct rq *rq)
+{
+ lockdep_assert_rq_held(rq);
+
+ while (true) {
+ struct scx_dispatch_q *dsq;
+ u64 reenq_flags = 0;
+
+ scoped_guard (raw_spinlock, &rq->scx.deferred_reenq_lock) {
+ struct scx_deferred_reenq_user *dru =
+ list_first_entry_or_null(&rq->scx.deferred_reenq_users,
+ struct scx_deferred_reenq_user,
+ node);
+ struct scx_dsq_pcpu *dsq_pcpu;
+
+ if (!dru)
+ return;
+
+ dsq_pcpu = container_of(dru, struct scx_dsq_pcpu,
+ deferred_reenq_user);
+ dsq = dsq_pcpu->dsq;
+ swap(dru->flags, reenq_flags);
+ list_del_init(&dru->node);
+ }
+
+ BUG_ON(dsq->id & SCX_DSQ_FLAG_BUILTIN);
+ reenq_user(rq, dsq, reenq_flags);
+ }
+}
+
static void run_deferred(struct rq *rq)
{
process_ddsp_deferred_locals(rq);
if (!list_empty(&rq->scx.deferred_reenq_locals))
process_deferred_reenq_locals(rq);
+
+ if (!list_empty(&rq->scx.deferred_reenq_users))
+ process_deferred_reenq_users(rq);
}
#ifdef CONFIG_NO_HZ_FULL
@@ -4119,6 +4227,7 @@ static s32 init_dsq(struct scx_dispatch_q *dsq, u64 dsq_id,
struct scx_dsq_pcpu *pcpu = per_cpu_ptr(dsq->pcpu, cpu);
pcpu->dsq = dsq;
+ INIT_LIST_HEAD(&pcpu->deferred_reenq_user.node);
}
return 0;
@@ -4126,6 +4235,23 @@ static s32 init_dsq(struct scx_dispatch_q *dsq, u64 dsq_id,
static void exit_dsq(struct scx_dispatch_q *dsq)
{
+ s32 cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct scx_dsq_pcpu *pcpu = per_cpu_ptr(dsq->pcpu, cpu);
+ struct scx_deferred_reenq_user *dru = &pcpu->deferred_reenq_user;
+ struct rq *rq = cpu_rq(cpu);
+
+ /*
+ * There must have been a RCU grace period since the last
+ * insertion and @dsq should be off the deferred list by now.
+ */
+ if (WARN_ON_ONCE(!list_empty(&dru->node))) {
+ guard(raw_spinlock_irqsave)(&rq->scx.deferred_reenq_lock);
+ list_del_init(&dru->node);
+ }
+ }
+
free_percpu(dsq->pcpu);
}
@@ -7308,6 +7434,7 @@ void __init init_sched_ext_class(void)
BUG_ON(!zalloc_cpumask_var_node(&rq->scx.cpus_to_wait, GFP_KERNEL, n));
raw_spin_lock_init(&rq->scx.deferred_reenq_lock);
INIT_LIST_HEAD(&rq->scx.deferred_reenq_locals);
+ INIT_LIST_HEAD(&rq->scx.deferred_reenq_users);
rq->scx.deferred_irq_work = IRQ_WORK_INIT_HARD(deferred_irq_workfn);
rq->scx.kick_cpus_irq_work = IRQ_WORK_INIT_HARD(kick_cpus_irq_workfn);
@@ -8354,6 +8481,7 @@ __bpf_kfunc struct task_struct *scx_bpf_dsq_peek(u64 dsq_id,
* supported:
*
* - Local DSQs (%SCX_DSQ_LOCAL or %SCX_DSQ_LOCAL_ON | $cpu)
+ * - User DSQs
*
* Re-enqueues are performed asynchronously. Can be called from anywhere.
*/
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 0794852524e7..893f89ce2a77 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -810,6 +810,7 @@ struct scx_rq {
raw_spinlock_t deferred_reenq_lock;
struct list_head deferred_reenq_locals; /* scheds requesting reenq of local DSQ */
+ struct list_head deferred_reenq_users; /* user DSQs requesting reenq */
struct balance_callback deferred_bal_cb;
struct irq_work deferred_irq_work;
struct irq_work kick_cpus_irq_work;