aboutsummaryrefslogtreecommitdiff
path: root/kernel/sched
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-12-03 13:04:07 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2025-12-03 13:04:07 -0800
commit8449d3252c2603a51ffc7c36cb5bd94874378b7d (patch)
treee834b0c0569532e33e622a6966ae67632d2cab66 /kernel/sched
parent2b60145734a0e5a4b73952a540928d2c4f4fed64 (diff)
parentb1bcaed1e39a9e0dfbe324a15d2ca4253deda316 (diff)
Merge tag 'cgroup-for-6.19' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup updates from Tejun Heo: - Defer task cgroup unlink until after the dying task's final context switch so that controllers see the cgroup properly populated until the task is truly gone - cpuset cleanups and simplifications. Enforce that domain isolated CPUs stay in root or isolated partitions and fail if isolated+nohz_full would leave no housekeeping CPU. Fix sched/deadline root domain handling during CPU hot-unplug and race for tasks in attaching cpusets - Misc fixes including memory reclaim protection documentation and selftest KTAP conformance * tag 'cgroup-for-6.19' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: (21 commits) cpuset: Treat cpusets in attaching as populated sched/deadline: Walk up cpuset hierarchy to decide root domain when hot-unplug cgroup/cpuset: Introduce cpuset_cpus_allowed_locked() docs: cgroup: No special handling of unpopulated memcgs docs: cgroup: Note about sibling relative reclaim protection docs: cgroup: Explain reclaim protection target selftests/cgroup: conform test to KTAP format output cpuset: remove need_rebuild_sched_domains cpuset: remove global remote_children list cpuset: simplify node setting on error cgroup: include missing header for struct irq_work cgroup: Fix sleeping from invalid context warning on PREEMPT_RT cgroup/cpuset: Globally track isolated_cpus update cgroup/cpuset: Ensure domain isolated CPUs stay in root or isolated partition cgroup/cpuset: Move up prstate_housekeeping_conflict() helper cgroup/cpuset: Fail if isolated and nohz_full don't leave any housekeeping cgroup/cpuset: Rename update_unbound_workqueue_cpumask() to update_isolation_cpumasks() cgroup: Defer task cgroup unlink until after the task is done switching out cgroup: Move dying_tasks cleanup from cgroup_task_release() to cgroup_task_free() cgroup: Rename cgroup lifecycle hooks to cgroup_task_*() ...
Diffstat (limited to 'kernel/sched')
-rw-r--r--kernel/sched/autogroup.c4
-rw-r--r--kernel/sched/core.c2
-rw-r--r--kernel/sched/deadline.c54
3 files changed, 52 insertions, 8 deletions
diff --git a/kernel/sched/autogroup.c b/kernel/sched/autogroup.c
index cdea931aae30..954137775f38 100644
--- a/kernel/sched/autogroup.c
+++ b/kernel/sched/autogroup.c
@@ -178,8 +178,8 @@ autogroup_move_group(struct task_struct *p, struct autogroup *ag)
* this process can already run with task_group() == prev->tg or we can
* race with cgroup code which can read autogroup = prev under rq->lock.
* In the latter case for_each_thread() can not miss a migrating thread,
- * cpu_cgroup_attach() must not be possible after cgroup_exit() and it
- * can't be removed from thread list, we hold ->siglock.
+ * cpu_cgroup_attach() must not be possible after cgroup_task_exit()
+ * and it can't be removed from thread list, we hold ->siglock.
*
* If an exiting thread was already removed from thread list we rely on
* sched_autogroup_exit_task().
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index fc358c1b6ca9..dba4e58d5d1a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5143,6 +5143,8 @@ static struct rq *finish_task_switch(struct task_struct *prev)
if (prev->sched_class->task_dead)
prev->sched_class->task_dead(prev);
+ cgroup_task_dead(prev);
+
/* Task is done with its stack. */
put_task_stack(prev);
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 67f540c23717..319439fe1870 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -2675,6 +2675,7 @@ static struct task_struct *pick_earliest_pushable_dl_task(struct rq *rq, int cpu
return NULL;
}
+/* Access rule: must be called on local CPU with preemption disabled */
static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl);
static int find_later_rq(struct task_struct *task)
@@ -3117,11 +3118,43 @@ void __init init_sched_dl_class(void)
GFP_KERNEL, cpu_to_node(i));
}
+/*
+ * This function always returns a non-empty bitmap in @cpus. This is because
+ * if a root domain has reserved bandwidth for DL tasks, the DL bandwidth
+ * check will prevent CPU hotplug from deactivating all CPUs in that domain.
+ */
+static void dl_get_task_effective_cpus(struct task_struct *p, struct cpumask *cpus)
+{
+ const struct cpumask *hk_msk;
+
+ hk_msk = housekeeping_cpumask(HK_TYPE_DOMAIN);
+ if (housekeeping_enabled(HK_TYPE_DOMAIN)) {
+ if (!cpumask_intersects(p->cpus_ptr, hk_msk)) {
+ /*
+ * CPUs isolated by isolcpu="domain" always belong to
+ * def_root_domain.
+ */
+ cpumask_andnot(cpus, cpu_active_mask, hk_msk);
+ return;
+ }
+ }
+
+ /*
+ * If a root domain holds a DL task, it must have active CPUs. So
+ * active CPUs can always be found by walking up the task's cpuset
+ * hierarchy up to the partition root.
+ */
+ cpuset_cpus_allowed_locked(p, cpus);
+}
+
+/* The caller should hold cpuset_mutex */
void dl_add_task_root_domain(struct task_struct *p)
{
struct rq_flags rf;
struct rq *rq;
struct dl_bw *dl_b;
+ unsigned int cpu;
+ struct cpumask *msk = this_cpu_cpumask_var_ptr(local_cpu_mask_dl);
raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
if (!dl_task(p) || dl_entity_is_special(&p->dl)) {
@@ -3129,16 +3162,25 @@ void dl_add_task_root_domain(struct task_struct *p)
return;
}
- rq = __task_rq_lock(p, &rf);
-
+ /*
+ * Get an active rq, whose rq->rd traces the correct root
+ * domain.
+ * Ideally this would be under cpuset reader lock until rq->rd is
+ * fetched. However, sleepable locks cannot nest inside pi_lock, so we
+ * rely on the caller of dl_add_task_root_domain() holds 'cpuset_mutex'
+ * to guarantee the CPU stays in the cpuset.
+ */
+ dl_get_task_effective_cpus(p, msk);
+ cpu = cpumask_first_and(cpu_active_mask, msk);
+ BUG_ON(cpu >= nr_cpu_ids);
+ rq = cpu_rq(cpu);
dl_b = &rq->rd->dl_bw;
- raw_spin_lock(&dl_b->lock);
+ /* End of fetching rd */
+ raw_spin_lock(&dl_b->lock);
__dl_add(dl_b, p->dl.dl_bw, cpumask_weight(rq->rd->span));
-
raw_spin_unlock(&dl_b->lock);
-
- task_rq_unlock(rq, p, &rf);
+ raw_spin_unlock_irqrestore(&p->pi_lock, rf.flags);
}
void dl_clear_root_domain(struct root_domain *rd)