aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-07-29 17:42:52 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-07-29 17:42:52 -0700
commitbf76f23aa1c178e9115eba17f699fa726aed669b (patch)
treea88270238eae8a0e6d98225c7de4b06bc2d2fb37 /kernel
parent14bed9bc81bae64db98349319f367bfc7dab0afd (diff)
parent1b5f1454091e9e9fb5c944b3161acf4ec0894d0d (diff)
Merge tag 'sched-core-2025-07-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar: "Core scheduler changes: - Better tracking of maximum lag of tasks in presence of different slices duration, for better handling of lag in the fair scheduler (Vincent Guittot) - Clean up and standardize #if/#else/#endif markers throughout the entire scheduler code base (Ingo Molnar) - Make SMP unconditional: build the SMP scheduler's data structures and logic on UP kernel too, even though they are not used, to simplify the scheduler and remove around 200 #ifdef/[#else]/#endif blocks from the scheduler (Ingo Molnar) - Reorganize cgroup bandwidth control interface handling for better interfacing with sched_ext (Tejun Heo) Balancing: - Bump sd->max_newidle_lb_cost when newidle balance fails (Chris Mason) - Remove sched_domain_topology_level::flags to simplify the code (Prateek Nayak) - Simplify and clean up build_sched_topology() (Li Chen) - Optimize build_sched_topology() on large machines (Li Chen) Real-time scheduling: - Add initial version of proxy execution: a mechanism for mutex-owning tasks to inherit the scheduling context of higher priority waiters. Currently limited to a single runqueue and conditional on CONFIG_EXPERT, and other limitations (John Stultz, Peter Zijlstra, Valentin Schneider) - Deadline scheduler (Juri Lelli): - Fix dl_servers initialization order (Juri Lelli) - Fix DL scheduler's root domain reinitialization logic (Juri Lelli) - Fix accounting bugs after global limits change (Juri Lelli) - Fix scalability regression by implementing less agressive dl_server handling (Peter Zijlstra) PSI: - Improve scalability by optimizing psi_group_change() cpu_clock() usage (Peter Zijlstra) Rust changes: - Make Task, CondVar and PollCondVar methods inline to avoid unnecessary function calls (Kunwu Chan, Panagiotis Foliadis) - Add might_sleep() support for Rust code: Rust's "#[track_caller]" mechanism is used so that Rust's might_sleep() doesn't need to be defined as a macro (Fujita Tomonori) - Introduce file_from_location() (Boqun Feng) Debugging & instrumentation: - Make clangd usable with scheduler source code files again (Peter Zijlstra) - tools: Add root_domains_dump.py which dumps root domains info (Juri Lelli) - tools: Add dl_bw_dump.py for printing bandwidth accounting info (Juri Lelli) Misc cleanups & fixes: - Remove play_idle() (Feng Lee) - Fix check_preemption_disabled() (Sebastian Andrzej Siewior) - Do not call __put_task_struct() on RT if pi_blocked_on is set (Luis Claudio R. Goncalves) - Correct the comment in place_entity() (wang wei)" * tag 'sched-core-2025-07-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (84 commits) sched/idle: Remove play_idle() sched: Do not call __put_task_struct() on rt if pi_blocked_on is set sched: Start blocked_on chain processing in find_proxy_task() sched: Fix proxy/current (push,pull)ability sched: Add an initial sketch of the find_proxy_task() function sched: Fix runtime accounting w/ split exec & sched contexts sched: Move update_curr_task logic into update_curr_se locking/mutex: Add p->blocked_on wrappers for correctness checks locking/mutex: Rework task_struct::blocked_on sched: Add CONFIG_SCHED_PROXY_EXEC & boot argument to enable/disable sched/topology: Remove sched_domain_topology_level::flags x86/smpboot: avoid SMT domain attach/destroy if SMT is not enabled x86/smpboot: moves x86_topology to static initialize and truncate x86/smpboot: remove redundant CONFIG_SCHED_SMT smpboot: introduce SDTL_INIT() helper to tidy sched topology setup tools/sched: Add dl_bw_dump.py for printing bandwidth accounting info tools/sched: Add root_domains_dump.py which dumps root domains info sched/deadline: Fix accounting after global limits change sched/deadline: Reset extra_bw to max_bw when clearing root domains sched/deadline: Initialize dl_servers after SMP ...
Diffstat (limited to 'kernel')
-rw-r--r--kernel/fork.c3
-rw-r--r--kernel/locking/mutex-debug.c9
-rw-r--r--kernel/locking/mutex.c18
-rw-r--r--kernel/locking/mutex.h3
-rw-r--r--kernel/locking/ww_mutex.h16
-rw-r--r--kernel/sched/autogroup.c9
-rw-r--r--kernel/sched/autogroup.h6
-rw-r--r--kernel/sched/build_policy.c6
-rw-r--r--kernel/sched/build_utility.c9
-rw-r--r--kernel/sched/clock.c7
-rw-r--r--kernel/sched/completion.c5
-rw-r--r--kernel/sched/core.c869
-rw-r--r--kernel/sched/core_sched.c2
-rw-r--r--kernel/sched/cpuacct.c2
-rw-r--r--kernel/sched/cpudeadline.c1
-rw-r--r--kernel/sched/cpudeadline.h4
-rw-r--r--kernel/sched/cpufreq.c1
-rw-r--r--kernel/sched/cpufreq_schedutil.c6
-rw-r--r--kernel/sched/cpupri.c1
-rw-r--r--kernel/sched/cpupri.h5
-rw-r--r--kernel/sched/cputime.c17
-rw-r--r--kernel/sched/deadline.c208
-rw-r--r--kernel/sched/debug.c47
-rw-r--r--kernel/sched/fair.c408
-rw-r--r--kernel/sched/idle.c15
-rw-r--r--kernel/sched/isolation.c2
-rw-r--r--kernel/sched/loadavg.c6
-rw-r--r--kernel/sched/membarrier.c2
-rw-r--r--kernel/sched/pelt.c5
-rw-r--r--kernel/sched/pelt.h67
-rw-r--r--kernel/sched/psi.c129
-rw-r--r--kernel/sched/rt.c112
-rw-r--r--kernel/sched/sched-pelt.h1
-rw-r--r--kernel/sched/sched.h243
-rw-r--r--kernel/sched/smp.h7
-rw-r--r--kernel/sched/stats.c5
-rw-r--r--kernel/sched/stats.h10
-rw-r--r--kernel/sched/stop_task.c5
-rw-r--r--kernel/sched/swait.c1
-rw-r--r--kernel/sched/syscalls.c15
-rw-r--r--kernel/sched/topology.c57
-rw-r--r--kernel/sched/wait.c1
-rw-r--r--kernel/sched/wait_bit.c3
-rw-r--r--kernel/smpboot.c4
44 files changed, 1094 insertions, 1258 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index 321b8dd74eac..f82b77eef7fe 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2127,9 +2127,8 @@ __latent_entropy struct task_struct *copy_process(
lockdep_init_task(p);
#endif
-#ifdef CONFIG_DEBUG_MUTEXES
p->blocked_on = NULL; /* not blocked yet */
-#endif
+
#ifdef CONFIG_BCACHE
p->sequential_io = 0;
p->sequential_io_avg = 0;
diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c
index 6e6f6071cfa2..949103fd8e9b 100644
--- a/kernel/locking/mutex-debug.c
+++ b/kernel/locking/mutex-debug.c
@@ -53,17 +53,18 @@ void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
{
lockdep_assert_held(&lock->wait_lock);
- /* Mark the current thread as blocked on the lock: */
- task->blocked_on = waiter;
+ /* Current thread can't be already blocked (since it's executing!) */
+ DEBUG_LOCKS_WARN_ON(__get_task_blocked_on(task));
}
void debug_mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
struct task_struct *task)
{
+ struct mutex *blocked_on = __get_task_blocked_on(task);
+
DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
DEBUG_LOCKS_WARN_ON(waiter->task != task);
- DEBUG_LOCKS_WARN_ON(task->blocked_on != waiter);
- task->blocked_on = NULL;
+ DEBUG_LOCKS_WARN_ON(blocked_on && blocked_on != lock);
INIT_LIST_HEAD(&waiter->list);
waiter->task = NULL;
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index a39ecccbd106..80d778fedd60 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -644,6 +644,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
goto err_early_kill;
}
+ __set_task_blocked_on(current, lock);
set_current_state(state);
trace_contention_begin(lock, LCB_F_MUTEX);
for (;;) {
@@ -680,6 +681,12 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
first = __mutex_waiter_is_first(lock, &waiter);
+ /*
+ * As we likely have been woken up by task
+ * that has cleared our blocked_on state, re-set
+ * it to the lock we are trying to acquire.
+ */
+ set_task_blocked_on(current, lock);
set_current_state(state);
/*
* Here we order against unlock; we must either see it change
@@ -691,8 +698,15 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
if (first) {
trace_contention_begin(lock, LCB_F_MUTEX | LCB_F_SPIN);
+ /*
+ * mutex_optimistic_spin() can call schedule(), so
+ * clear blocked on so we don't become unselectable
+ * to run.
+ */
+ clear_task_blocked_on(current, lock);
if (mutex_optimistic_spin(lock, ww_ctx, &waiter))
break;
+ set_task_blocked_on(current, lock);
trace_contention_begin(lock, LCB_F_MUTEX);
}
@@ -700,6 +714,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
}
raw_spin_lock_irqsave(&lock->wait_lock, flags);
acquired:
+ __clear_task_blocked_on(current, lock);
__set_current_state(TASK_RUNNING);
if (ww_ctx) {
@@ -729,9 +744,11 @@ skip_wait:
return 0;
err:
+ __clear_task_blocked_on(current, lock);
__set_current_state(TASK_RUNNING);
__mutex_remove_waiter(lock, &waiter);
err_early_kill:
+ WARN_ON(__get_task_blocked_on(current));
trace_contention_end(lock, ret);
raw_spin_unlock_irqrestore_wake(&lock->wait_lock, flags, &wake_q);
debug_mutex_free_waiter(&waiter);
@@ -942,6 +959,7 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
next = waiter->task;
debug_mutex_wake_waiter(lock, waiter);
+ __clear_task_blocked_on(next, lock);
wake_q_add(&wake_q, next);
}
diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h
index cbff35b9b7ae..2e8080a9bee3 100644
--- a/kernel/locking/mutex.h
+++ b/kernel/locking/mutex.h
@@ -6,7 +6,7 @@
*
* Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
*/
-
+#ifndef CONFIG_PREEMPT_RT
/*
* This is the control structure for tasks blocked on mutex, which resides
* on the blocked task's kernel stack:
@@ -70,3 +70,4 @@ extern void debug_mutex_init(struct mutex *lock, const char *name,
# define debug_mutex_unlock(lock) do { } while (0)
# define debug_mutex_init(lock, name, key) do { } while (0)
#endif /* !CONFIG_DEBUG_MUTEXES */
+#endif /* CONFIG_PREEMPT_RT */
diff --git a/kernel/locking/ww_mutex.h b/kernel/locking/ww_mutex.h
index 37f025a096c9..086fd5487ca7 100644
--- a/kernel/locking/ww_mutex.h
+++ b/kernel/locking/ww_mutex.h
@@ -284,6 +284,12 @@ __ww_mutex_die(struct MUTEX *lock, struct MUTEX_WAITER *waiter,
#ifndef WW_RT
debug_mutex_wake_waiter(lock, waiter);
#endif
+ /*
+ * When waking up the task to die, be sure to clear the
+ * blocked_on pointer. Otherwise we can see circular
+ * blocked_on relationships that can't resolve.
+ */
+ __clear_task_blocked_on(waiter->task, lock);
wake_q_add(wake_q, waiter->task);
}
@@ -331,9 +337,15 @@ static bool __ww_mutex_wound(struct MUTEX *lock,
* it's wounded in __ww_mutex_check_kill() or has a
* wakeup pending to re-read the wounded state.
*/
- if (owner != current)
+ if (owner != current) {
+ /*
+ * When waking up the task to wound, be sure to clear the
+ * blocked_on pointer. Otherwise we can see circular
+ * blocked_on relationships that can't resolve.
+ */
+ __clear_task_blocked_on(owner, lock);
wake_q_add(wake_q, owner);
-
+ }
return true;
}
diff --git a/kernel/sched/autogroup.c b/kernel/sched/autogroup.c
index 2b331822c7e7..cdea931aae30 100644
--- a/kernel/sched/autogroup.c
+++ b/kernel/sched/autogroup.c
@@ -4,6 +4,9 @@
* Auto-group scheduling implementation:
*/
+#include "autogroup.h"
+#include "sched.h"
+
unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1;
static struct autogroup autogroup_default;
static atomic_t autogroup_seq_nr;
@@ -25,9 +28,9 @@ static void __init sched_autogroup_sysctl_init(void)
{
register_sysctl_init("kernel", sched_autogroup_sysctls);
}
-#else
+#else /* !CONFIG_SYSCTL: */
#define sched_autogroup_sysctl_init() do { } while (0)
-#endif
+#endif /* !CONFIG_SYSCTL */
void __init autogroup_init(struct task_struct *init_task)
{
@@ -108,7 +111,7 @@ static inline struct autogroup *autogroup_create(void)
free_rt_sched_group(tg);
tg->rt_se = root_task_group.rt_se;
tg->rt_rq = root_task_group.rt_rq;
-#endif
+#endif /* CONFIG_RT_GROUP_SCHED */
tg->autogroup = ag;
sched_online_group(tg, &root_task_group);
diff --git a/kernel/sched/autogroup.h b/kernel/sched/autogroup.h
index 90d69f2c5eaf..06c82b2bdfb5 100644
--- a/kernel/sched/autogroup.h
+++ b/kernel/sched/autogroup.h
@@ -2,6 +2,8 @@
#ifndef _KERNEL_SCHED_AUTOGROUP_H
#define _KERNEL_SCHED_AUTOGROUP_H
+#include "sched.h"
+
#ifdef CONFIG_SCHED_AUTOGROUP
struct autogroup {
@@ -41,7 +43,7 @@ autogroup_task_group(struct task_struct *p, struct task_group *tg)
extern int autogroup_path(struct task_group *tg, char *buf, int buflen);
-#else /* !CONFIG_SCHED_AUTOGROUP */
+#else /* !CONFIG_SCHED_AUTOGROUP: */
static inline void autogroup_init(struct task_struct *init_task) { }
static inline void autogroup_free(struct task_group *tg) { }
@@ -61,6 +63,6 @@ static inline int autogroup_path(struct task_group *tg, char *buf, int buflen)
return 0;
}
-#endif /* CONFIG_SCHED_AUTOGROUP */
+#endif /* !CONFIG_SCHED_AUTOGROUP */
#endif /* _KERNEL_SCHED_AUTOGROUP_H */
diff --git a/kernel/sched/build_policy.c b/kernel/sched/build_policy.c
index 72d97aa8b726..c4a488e67aa7 100644
--- a/kernel/sched/build_policy.c
+++ b/kernel/sched/build_policy.c
@@ -50,11 +50,9 @@
#include "idle.c"
#include "rt.c"
+#include "cpudeadline.c"
-#ifdef CONFIG_SMP
-# include "cpudeadline.c"
-# include "pelt.c"
-#endif
+#include "pelt.c"
#include "cputime.c"
#include "deadline.c"
diff --git a/kernel/sched/build_utility.c b/kernel/sched/build_utility.c
index bf9d8db94b70..e2cf3b08d4e9 100644
--- a/kernel/sched/build_utility.c
+++ b/kernel/sched/build_utility.c
@@ -80,11 +80,10 @@
#include "wait_bit.c"
#include "wait.c"
-#ifdef CONFIG_SMP
-# include "cpupri.c"
-# include "stop_task.c"
-# include "topology.c"
-#endif
+#include "cpupri.c"
+#include "stop_task.c"
+
+#include "topology.c"
#ifdef CONFIG_SCHED_CORE
# include "core_sched.c"
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index a09655b48140..f5e6dd6a6b3a 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -54,6 +54,9 @@
*
*/
+#include <linux/sched/clock.h>
+#include "sched.h"
+
/*
* Scheduler clock - returns current time in nanosec units.
* This is default implementation.
@@ -471,7 +474,7 @@ notrace void sched_clock_idle_wakeup_event(void)
}
EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
-#else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
+#else /* !CONFIG_HAVE_UNSTABLE_SCHED_CLOCK: */
void __init sched_clock_init(void)
{
@@ -489,7 +492,7 @@ notrace u64 sched_clock_cpu(int cpu)
return sched_clock();
}
-#endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
+#endif /* !CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
/*
* Running clock - returns the time that has elapsed while a guest has been
diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c
index 3561ab533dd4..19ee702273c0 100644
--- a/kernel/sched/completion.c
+++ b/kernel/sched/completion.c
@@ -13,6 +13,11 @@
* Waiting for completion is a typically sync point, but not an exclusion point.
*/
+#include <linux/linkage.h>
+#include <linux/sched/debug.h>
+#include <linux/completion.h>
+#include "sched.h"
+
static void complete_with_flags(struct completion *x, int wake_flags)
{
unsigned long flags;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index dc44bd8ef2ea..3ec00d08d46a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -96,6 +96,7 @@
#include "../workqueue_internal.h"
#include "../../io_uring/io-wq.h"
#include "../smpboot.h"
+#include "../locking/mutex.h"
EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_send_cpu);
EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_send_cpumask);
@@ -119,6 +120,35 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(sched_compute_energy_tp);
DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
+#ifdef CONFIG_SCHED_PROXY_EXEC
+DEFINE_STATIC_KEY_TRUE(__sched_proxy_exec);
+static int __init setup_proxy_exec(char *str)
+{
+ bool proxy_enable = true;
+
+ if (*str && kstrtobool(str + 1, &proxy_enable)) {
+ pr_warn("Unable to parse sched_proxy_exec=\n");
+ return 0;
+ }
+
+ if (proxy_enable) {
+ pr_info("sched_proxy_exec enabled via boot arg\n");
+ static_branch_enable(&__sched_proxy_exec);
+ } else {
+ pr_info("sched_proxy_exec disabled via boot arg\n");
+ static_branch_disable(&__sched_proxy_exec);
+ }
+ return 1;
+}
+#else
+static int __init setup_proxy_exec(char *str)
+{
+ pr_warn("CONFIG_SCHED_PROXY_EXEC=n, so it cannot be enabled or disabled at boot time\n");
+ return 0;
+}
+#endif
+__setup("sched_proxy_exec", setup_proxy_exec);
+
/*
* Debugging: various feature bits
*
@@ -481,13 +511,13 @@ void sched_core_put(void)
schedule_work(&_work);
}
-#else /* !CONFIG_SCHED_CORE */
+#else /* !CONFIG_SCHED_CORE: */
static inline void sched_core_enqueue(struct rq *rq, struct task_struct *p) { }
static inline void
sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags) { }
-#endif /* CONFIG_SCHED_CORE */
+#endif /* !CONFIG_SCHED_CORE */
/* need a wrapper since we may need to trace from modules */
EXPORT_TRACEPOINT_SYMBOL(sched_set_state_tp);
@@ -650,7 +680,6 @@ void raw_spin_rq_unlock(struct rq *rq)
raw_spin_unlock(rq_lockp(rq));
}
-#ifdef CONFIG_SMP
/*
* double_rq_lock - safely lock two runqueues
*/
@@ -667,7 +696,6 @@ void double_rq_lock(struct rq *rq1, struct rq *rq2)
double_rq_clock_clear_update(rq1, rq2);
}
-#endif
/*
* __task_rq_lock - lock the rq @p resides on.
@@ -853,8 +881,6 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer)
return HRTIMER_NORESTART;
}
-#ifdef CONFIG_SMP
-
static void __hrtick_restart(struct rq *rq)
{
struct hrtimer *timer = &rq->hrtick_timer;
@@ -899,33 +925,12 @@ void hrtick_start(struct rq *rq, u64 delay)
smp_call_function_single_async(cpu_of(rq), &rq->hrtick_csd);
}
-#else
-/*
- * Called to set the hrtick timer state.
- *
- * called with rq->lock held and IRQs disabled
- */
-void hrtick_start(struct rq *rq, u64 delay)
-{
- /*
- * Don't schedule slices shorter than 10000ns, that just
- * doesn't make sense. Rely on vruntime for fairness.
- */
- delay = max_t(u64, delay, 10000LL);
- hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay),
- HRTIMER_MODE_REL_PINNED_HARD);
-}
-
-#endif /* CONFIG_SMP */
-
static void hrtick_rq_init(struct rq *rq)
{
-#ifdef CONFIG_SMP
INIT_CSD(&rq->hrtick_csd, __hrtick_start, rq);
-#endif
hrtimer_setup(&rq->hrtick_timer, hrtick, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
}
-#else /* CONFIG_SCHED_HRTICK */
+#else /* !CONFIG_SCHED_HRTICK: */
static inline void hrtick_clear(struct rq *rq)
{
}
@@ -933,7 +938,7 @@ static inline void hrtick_clear(struct rq *rq)
static inline void hrtick_rq_init(struct rq *rq)
{
}
-#endif /* CONFIG_SCHED_HRTICK */
+#endif /* !CONFIG_SCHED_HRTICK */
/*
* try_cmpxchg based fetch_or() macro so it works for different integer types:
@@ -949,7 +954,7 @@ static inline void hrtick_rq_init(struct rq *rq)
_val; \
})
-#if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG)
+#ifdef TIF_POLLING_NRFLAG
/*
* Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG,
* this avoids any races wrt polling state changes and thereby avoids
@@ -988,13 +993,11 @@ static inline bool set_nr_and_not_polling(struct thread_info *ti, int tif)
return true;
}
-#ifdef CONFIG_SMP
static inline bool set_nr_if_polling(struct task_struct *p)
{
return false;
}
#endif
-#endif
static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task)
{
@@ -1167,7 +1170,6 @@ void resched_cpu(int cpu)
raw_spin_rq_unlock_irqrestore(rq, flags);
}
-#ifdef CONFIG_SMP
#ifdef CONFIG_NO_HZ_COMMON
/*
* In the semi idle case, use the nearest busy CPU for migrating timers
@@ -1374,10 +1376,8 @@ bool sched_can_stop_tick(struct rq *rq)
return true;
}
#endif /* CONFIG_NO_HZ_FULL */
-#endif /* CONFIG_SMP */
-#if defined(CONFIG_RT_GROUP_SCHED) || (defined(CONFIG_FAIR_GROUP_SCHED) && \
- (defined(CONFIG_SMP) || defined(CONFIG_CFS_BANDWIDTH)))
+#if defined(CONFIG_RT_GROUP_SCHED) || defined(CONFIG_FAIR_GROUP_SCHED)
/*
* Iterate task_group tree rooted at *from, calling @down when first entering a
* node and @up when leaving it for the final time.
@@ -1971,7 +1971,7 @@ undo:
sysctl_sched_uclamp_util_min_rt_default = old_min_rt;
return result;
}
-#endif
+#endif /* CONFIG_SYSCTL */
static void uclamp_fork(struct task_struct *p)
{
@@ -2037,13 +2037,13 @@ static void __init init_uclamp(void)
}
}
-#else /* !CONFIG_UCLAMP_TASK */
+#else /* !CONFIG_UCLAMP_TASK: */
static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p, int flags) { }
static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p) { }
static inline void uclamp_fork(struct task_struct *p) { }
static inline void uclamp_post_fork(struct task_struct *p) { }
static inline void init_uclamp(void) { }
-#endif /* CONFIG_UCLAMP_TASK */
+#endif /* !CONFIG_UCLAMP_TASK */
bool sched_task_on_rq(struct task_struct *p)
{
@@ -2353,8 +2353,6 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state
return ncsw;
}
-#ifdef CONFIG_SMP
-
static void
__do_set_cpus_allowed(struct task_struct *p, struct affinity_context *ctx);
@@ -2936,8 +2934,15 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag
struct set_affinity_pending my_pending = { }, *pending = NULL;
bool stop_pending, complete = false;
- /* Can the task run on the task's current CPU? If so, we're done */
- if (cpumask_test_cpu(task_cpu(p), &p->cpus_mask)) {
+ /*
+ * Can the task run on the task's current CPU? If so, we're done
+ *
+ * We are also done if the task is the current donor, boosting a lock-
+ * holding proxy, (and potentially has been migrated outside its
+ * current or previous affinity mask)
+ */
+ if (cpumask_test_cpu(task_cpu(p), &p->cpus_mask) ||
+ (task_current_donor(rq, p) && !task_current(rq, p))) {
struct task_struct *push_task = NULL;
if ((flags & SCA_MIGRATE_ENABLE) &&
@@ -3305,6 +3310,8 @@ void relax_compatible_cpus_allowed_ptr(struct task_struct *p)
WARN_ON_ONCE(ret);
}
+#ifdef CONFIG_SMP
+
void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
{
unsigned int state = READ_ONCE(p->__state);
@@ -3358,6 +3365,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
__set_task_cpu(p, new_cpu);
}
+#endif /* CONFIG_SMP */
#ifdef CONFIG_NUMA_BALANCING
static void __migrate_swap_task(struct task_struct *p, int cpu)
@@ -3657,17 +3665,6 @@ void sched_set_stop_task(int cpu, struct task_struct *stop)
}
}
-#else /* CONFIG_SMP */
-
-static inline void migrate_disable_switch(struct rq *rq, struct task_struct *p) { }
-
-static inline bool rq_has_pinned_tasks(struct rq *rq)
-{
- return false;
-}
-
-#endif /* !CONFIG_SMP */
-
static void
ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
{
@@ -3678,7 +3675,6 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
rq = this_rq();
-#ifdef CONFIG_SMP
if (cpu == rq->cpu) {
__schedstat_inc(rq->ttwu_local);
__schedstat_inc(p->stats.nr_wakeups_local);
@@ -3698,7 +3694,6 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
if (wake_flags & WF_MIGRATED)
__schedstat_inc(p->stats.nr_wakeups_migrate);
-#endif /* CONFIG_SMP */
__schedstat_inc(rq->ttwu_count);
__schedstat_inc(p->stats.nr_wakeups);
@@ -3727,13 +3722,11 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
if (p->sched_contributes_to_load)
rq->nr_uninterruptible--;
-#ifdef CONFIG_SMP
if (wake_flags & WF_RQ_SELECTED)
en_flags |= ENQUEUE_RQ_SELECTED;
if (wake_flags & WF_MIGRATED)
en_flags |= ENQUEUE_MIGRATED;
else
-#endif
if (p->in_iowait) {
delayacct_blkio_end(p);
atomic_dec(&task_rq(p)->nr_iowait);
@@ -3744,7 +3737,6 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
ttwu_do_wakeup(p);
-#ifdef CONFIG_SMP
if (p->sched_class->task_woken) {
/*
* Our task @p is fully woken up and running; so it's safe to
@@ -3766,7 +3758,6 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
rq->idle_stamp = 0;
}
-#endif
}
/*
@@ -3820,7 +3811,6 @@ static int ttwu_runnable(struct task_struct *p, int wake_flags)
return ret;
}
-#ifdef CONFIG_SMP
void sched_ttwu_pending(void *arg)
{
struct llist_node *llist = arg;
@@ -3887,7 +3877,9 @@ static void __ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags
p->sched_remote_wakeup = !!(wake_flags & WF_MIGRATED);
WRITE_ONCE(rq->ttwu_pending, 1);
+#ifdef CONFIG_SMP
__smp_call_single_queue(cpu, &p->wake_entry.llist);
+#endif
}
void wake_up_if_idle(int cpu)
@@ -3993,15 +3985,6 @@ static bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags)
return false;
}
-#else /* !CONFIG_SMP */
-
-static inline bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags)
-{
- return false;
-}
-
-#endif /* CONFIG_SMP */
-
static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
{
struct rq *rq = cpu_rq(cpu);
@@ -4257,7 +4240,6 @@ int try_to_wake_up(struct task_struct *p, unsigned int sta