From c6f4e552e1eae4a5726230254108213b085e1ae3 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Sat, 15 Nov 2025 19:07:41 -0800
Subject: rcutorture: Add a textbook-style trivial preemptible RCU

This commit adds a trivial textbook implementation of preemptible RCU
to rcutorture ("torture_type=trivial-preempt"), similar in spirit to the
existing "torture_type=trivial" textbook implementation of non-preemptible
RCU.  Neither trivial RCU implementation has any value for production use,
and are intended only to keep Paul honest in his introductory writings
and presentations.

[ paulmck: Apply kernel test robot feedback. ]

Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
---
 kernel/rcu/Kconfig.debug | 11 ++++++++++
 kernel/rcu/rcu.h         |  4 ++++
 kernel/rcu/rcutorture.c  | 57 +++++++++++++++++++++++++++++++++++++++++++++++-
 kernel/rcu/update.c      | 22 +++++++++++++++++++
 4 files changed, 93 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/rcu/Kconfig.debug b/kernel/rcu/Kconfig.debug
index 625d75392647..e078e988773d 100644
--- a/kernel/rcu/Kconfig.debug
+++ b/kernel/rcu/Kconfig.debug
@@ -228,4 +228,15 @@ config RCU_DYNTICKS_TORTURE
 
 	  This has no value for production and is only for testing.
 
+config TRIVIAL_PREEMPT_RCU
+	bool "Textbook trivial preemptible RCU in rcutorture"
+	depends on RCU_EXPERT && RCU_TORTURE_TEST
+	default n
+	help
+	  This option enables a textbook preemptible RCU that is
+	  implemented in rcutorture.  Its sole purpose is to validate
+	  code used in books, papers, and presentations.
+
+	  This has no value for production and is only for testing.
+
 endmenu # "RCU Debugging"
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 9b10b57b79ad..fa6d30ce73d1 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -691,4 +691,8 @@ int rcu_stall_notifier_call_chain(unsigned long val, void *v);
 static inline int rcu_stall_notifier_call_chain(unsigned long val, void *v) { return NOTIFY_DONE; }
 #endif // #else // #if defined(CONFIG_RCU_STALL_COMMON) && defined(CONFIG_RCU_CPU_STALL_NOTIFIER)
 
+#ifdef CONFIG_TRIVIAL_PREEMPT_RCU
+void synchronize_rcu_trivial_preempt(void);
+#endif // #ifdef CONFIG_TRIVIAL_PREEMPT_RCU
+
 #endif /* __LINUX_RCU_H */
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 8a9282a0245c..3c272413666b 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -1061,6 +1061,61 @@ static struct rcu_torture_ops trivial_ops = {
 	.name		= "trivial"
 };
 
+#ifdef CONFIG_TRIVIAL_PREEMPT_RCU
+
+/*
+ * Definitions for trivial CONFIG_PREEMPT=y torture testing.  This
+ * implementation does not work well with large numbers of tasks or with
+ * long-term preemption.  Either or both get you RCU CPU stall warnings.
+ */
+
+static void rcu_sync_torture_init_trivial_preempt(void)
+{
+	rcu_sync_torture_init();
+	if (WARN_ONCE(onoff_interval || shuffle_interval, "%s: Non-zero onoff_interval (%d) or shuffle_interval (%d) breaks trivial RCU, resetting to zero", __func__, onoff_interval, shuffle_interval)) {
+		onoff_interval = 0;
+		shuffle_interval = 0;
+	}
+}
+
+static int rcu_torture_read_lock_trivial_preempt(void)
+{
+	struct task_struct *t = current;
+
+	WRITE_ONCE(t->rcu_trivial_preempt_nesting, t->rcu_trivial_preempt_nesting + 1);
+	smp_mb();
+	return 0;
+}
+
+static void rcu_torture_read_unlock_trivial_preempt(int idx)
+{
+	struct task_struct *t = current;
+
+	smp_store_release(&t->rcu_trivial_preempt_nesting, t->rcu_trivial_preempt_nesting - 1);
+}
+
+static struct rcu_torture_ops trivial_preempt_ops = {
+	.ttype		= RCU_TRIVIAL_FLAVOR,
+	.init		= rcu_sync_torture_init_trivial_preempt,
+	.readlock	= rcu_torture_read_lock_trivial_preempt,
+	.read_delay	= rcu_read_delay,  // just reuse rcu's version.
+	.readunlock	= rcu_torture_read_unlock_trivial_preempt,
+	.readlock_held	= torture_readlock_not_held,
+	.get_gp_seq	= rcu_no_completed,
+	.sync		= synchronize_rcu_trivial_preempt,
+	.exp_sync	= synchronize_rcu_trivial_preempt,
+	.irq_capable	= 0, // In theory it should be, but let's keep it trivial.
+	.name		= "trivial-preempt"
+};
+
+#define TRIVIAL_PREEMPT_OPS &trivial_preempt_ops,
+
+#else // #ifdef CONFIG_TRIVIAL_PREEMPT_RCU
+
+#define TRIVIAL_PREEMPT_OPS
+
+#endif // #else // #ifdef CONFIG_TRIVIAL_PREEMPT_RCU
+
 #ifdef CONFIG_TASKS_RCU
 
 /*
@@ -4449,7 +4504,7 @@ rcu_torture_init(void)
 	static struct rcu_torture_ops *torture_ops[] = {
 		&rcu_ops, &rcu_busted_ops, &srcu_ops, &srcud_ops, &busted_srcud_ops,
 		TASKS_OPS TASKS_RUDE_OPS TASKS_TRACING_OPS
-		&trivial_ops,
+		&trivial_ops, TRIVIAL_PREEMPT_OPS
 	};
 
 	if (!torture_init_begin(torture_type, verbose))
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index d98a5c38e19c..b62735a67884 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -538,6 +538,28 @@ long torture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask, bool do
 EXPORT_SYMBOL_GPL(torture_sched_setaffinity);
 #endif
 
+#if IS_ENABLED(CONFIG_TRIVIAL_PREEMPT_RCU)
+// Trivial and stupid grace-period wait.  Defined here so that lockdep
+// kernels can find tasklist_lock.
+void synchronize_rcu_trivial_preempt(void)
+{
+	struct task_struct *g;
+	struct task_struct *t;
+
+	smp_mb(); // Order prior accesses before grace-period start.
+	rcu_read_lock(); // Protect task list.
+	for_each_process_thread(g, t) {
+		if (t == current)
+			continue;  // Don't deadlock on ourselves!
+		// Order later rcu_read_lock() on other tasks after QS.
+		while (smp_load_acquire(&t->rcu_trivial_preempt_nesting))
+			continue;
+	}
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(synchronize_rcu_trivial_preempt);
+#endif // #if IS_ENABLED(CONFIG_TRIVIAL_PREEMPT_RCU)
+
 int rcu_cpu_stall_notifiers __read_mostly; // !0 = provide stall notifiers (rarely useful)
 EXPORT_SYMBOL_GPL(rcu_cpu_stall_notifiers);
 
-- 
cgit v1.2.3


From b0c8dd5097aaa7bfc70c8933de6be0dcdc995592 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 5 Feb 2026 13:43:32 -0800
Subject: refscale: Ditch ref_scale_shutdown in favor of
 torture_shutdown_init()

The torture_shutdown_init() function spawns a shutdown kthread in
a manner very similar to that implemented by ref_scale_shutdown().
This commit therefore re-implements ref_scale_shutdown in terms of
torture_shutdown_init().

The initial draft of this patch was generated by version 2.1.16 of the
Claude AI/LLM, but trained and configured for use by my employer, and
prompted to refer to Linux-kernel source code.  This initial draft failed
to provide a forward reference to ref_scale_cleanup(), passed zero to
torture_shutdown_init() for an unwelcome insta-shutdown, and failed to
pass the kvm.sh --duration argument in as a refscale module parameter.
On the other hand, it did catch the need to NULL main_task on the
post-test self-shutdown code path, which I might well have forgotten
to do.

This version of the patch fixes those problems, and in fact very little
of the initial draft remains.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
---
 kernel/rcu/refscale.c | 51 ++++++++++++++-------------------------------------
 1 file changed, 14 insertions(+), 37 deletions(-)

(limited to 'kernel')

diff --git a/kernel/rcu/refscale.c b/kernel/rcu/refscale.c
index c158b6a947cd..a2d9d75d88a1 100644
--- a/kernel/rcu/refscale.c
+++ b/kernel/rcu/refscale.c
@@ -92,15 +92,9 @@ torture_param(int, nreaders, -1, "Number of readers, -1 for 75% of CPUs.");
 torture_param(int, nruns, 30, "Number of experiments to run.");
 // Reader delay in nanoseconds, 0 for no delay.
 torture_param(int, readdelay, 0, "Read-side delay in nanoseconds.");
-
-#ifdef MODULE
-# define REFSCALE_SHUTDOWN 0
-#else
-# define REFSCALE_SHUTDOWN 1
-#endif
-
-torture_param(bool, shutdown, REFSCALE_SHUTDOWN,
-	      "Shutdown at end of scalability tests.");
+// Maximum shutdown delay in seconds, or zero for no shutdown.
+torture_param(int, shutdown_secs, !IS_MODULE(CONFIG_REPRO_TEST) * 300,
+	      "Shutdown at end of scalability tests or at specified timeout (s).");
 
 struct reader_task {
 	struct task_struct *task;
@@ -109,12 +103,8 @@ struct reader_task {
 	u64 last_duration_ns;
 };
 
-static struct task_struct *shutdown_task;
-static wait_queue_head_t shutdown_wq;
-
 static struct task_struct *main_task;
 static wait_queue_head_t main_wq;
-static int shutdown_start;
 
 static struct reader_task *reader_tasks;
 
@@ -1357,6 +1347,8 @@ static u64 process_durations(int n)
 	return sum;
 }
 
+static void ref_scale_cleanup(void);
+
 // The main_func is the main orchestrator, it performs a bunch of
 // experiments.  For every experiment, it orders all the readers
 // involved to start and waits for them to finish the experiment. It
@@ -1443,9 +1435,10 @@ static int main_func(void *arg)
 
 oom_exit:
 	// This will shutdown everything including us.
-	if (shutdown) {
-		shutdown_start = 1;
-		wake_up(&shutdown_wq);
+	if (shutdown_secs) {
+		main_task = NULL;  // Avoid self-kill deadlock.
+		ref_scale_cleanup();
+		kernel_power_off();
 	}
 
 	// Wait for torture to stop us
@@ -1463,8 +1456,8 @@ static void
 ref_scale_print_module_parms(const struct ref_scale_ops *cur_ops, const char *tag)
 {
 	pr_alert("%s" SCALE_FLAG
-		 "--- %s:  verbose=%d verbose_batched=%d shutdown=%d holdoff=%d lookup_instances=%ld loops=%d nreaders=%d nruns=%d readdelay=%d\n", scale_type, tag,
-		 verbose, verbose_batched, shutdown, holdoff, lookup_instances, loops, nreaders, nruns, readdelay);
+		 "--- %s:  verbose=%d verbose_batched=%d shutdown_secs=%d holdoff=%d lookup_instances=%ld loops=%d nreaders=%d nruns=%d readdelay=%d\n", scale_type, tag,
+		 verbose, verbose_batched, shutdown_secs, holdoff, lookup_instances, loops, nreaders, nruns, readdelay);
 }
 
 static void
@@ -1497,19 +1490,6 @@ ref_scale_cleanup(void)
 	torture_cleanup_end();
 }
 
-// Shutdown kthread.  Just waits to be awakened, then shuts down system.
-static int
-ref_scale_shutdown(void *arg)
-{
-	wait_event_idle(shutdown_wq, shutdown_start);
-
-	smp_mb(); // Wake before output.
-	ref_scale_cleanup();
-	kernel_power_off();
-
-	return -EINVAL;
-}
-
 static int __init
 ref_scale_init(void)
 {
@@ -1553,13 +1533,10 @@ ref_scale_init(void)
 	ref_scale_print_module_parms(cur_ops, "Start of test");
 
 	// Shutdown task
-	if (shutdown) {
-		init_waitqueue_head(&shutdown_wq);
-		firsterr = torture_create_kthread(ref_scale_shutdown, NULL,
-						  shutdown_task);
+	if (shutdown_secs) {
+		firsterr = torture_shutdown_init(shutdown_secs, ref_scale_cleanup);
 		if (torture_init_error(firsterr))
 			goto unwind;
-		schedule_timeout_uninterruptible(1);
 	}
 
 	// Reader tasks (default to ~75% of online CPUs).
@@ -1604,7 +1581,7 @@ ref_scale_init(void)
 unwind:
 	torture_init_end();
 	ref_scale_cleanup();
-	if (shutdown) {
+	if (shutdown_secs) {
 		WARN_ON(!IS_MODULE(CONFIG_RCU_REF_SCALE_TEST));
 		kernel_power_off();
 	}
-- 
cgit v1.2.3


From 359cf5c942b8fce9cf2b7f3c1eb5b8186f0d9b30 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Sun, 8 Feb 2026 19:03:30 -0800
Subject: rcuscale: Ditch rcu_scale_shutdown in favor of
 torture_shutdown_init()

The torture_shutdown_init() function spawns a shutdown kthread in
a manner very similar to that implemented by rcu_scale_shutdown().
This commit therefore re-implements rcu_scale_shutdown() in terms of
torture_shutdown_init().

This patch was generated by Claude given as input the patch making the
same transformation of ref_scale_shutdown().

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
---
 kernel/rcu/rcuscale.c | 78 ++++++++++++++-------------------------------------
 1 file changed, 21 insertions(+), 57 deletions(-)

(limited to 'kernel')

diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c
index 4ac2b134a983..ac0b1c6b7dae 100644
--- a/kernel/rcu/rcuscale.c
+++ b/kernel/rcu/rcuscale.c
@@ -79,12 +79,6 @@ MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.ibm.com>");
  * test-end checks, and the pair of calls through pointers.
  */
 
-#ifdef MODULE
-# define RCUSCALE_SHUTDOWN 0
-#else
-# define RCUSCALE_SHUTDOWN 1
-#endif
-
 torture_param(bool, gp_async, false, "Use asynchronous GP wait primitives");
 torture_param(int, gp_async_max, 1000, "Max # outstanding waits per writer");
 torture_param(bool, gp_exp, false, "Use expedited GP wait primitives");
@@ -92,8 +86,8 @@ torture_param(int, holdoff, 10, "Holdoff time before test start (s)");
 torture_param(int, minruntime, 0, "Minimum run time (s)");
 torture_param(int, nreaders, -1, "Number of RCU reader threads");
 torture_param(int, nwriters, -1, "Number of RCU updater threads");
-torture_param(bool, shutdown, RCUSCALE_SHUTDOWN,
-	      "Shutdown at end of scalability tests.");
+torture_param(int, shutdown_secs, !IS_MODULE(CONFIG_RCU_SCALE_TEST) * 300,
+	      "Shutdown at end of scalability tests or at specified timeout (s).");
 torture_param(int, verbose, 1, "Enable verbose debugging printk()s");
 torture_param(int, writer_holdoff, 0, "Holdoff (us) between GPs, zero to disable");
 torture_param(int, writer_holdoff_jiffies, 0, "Holdoff (jiffies) between GPs, zero to disable");
@@ -123,7 +117,6 @@ static int nrealreaders;
 static int nrealwriters;
 static struct task_struct **writer_tasks;
 static struct task_struct **reader_tasks;
-static struct task_struct *shutdown_task;
 
 static u64 **writer_durations;
 static bool *writer_done;
@@ -132,7 +125,6 @@ static int *writer_n_durations;
 static atomic_t n_rcu_scale_reader_started;
 static atomic_t n_rcu_scale_writer_started;
 static atomic_t n_rcu_scale_writer_finished;
-static wait_queue_head_t shutdown_wq;
 static u64 t_rcu_scale_writer_started;
 static u64 t_rcu_scale_writer_finished;
 static unsigned long b_rcu_gp_test_started;
@@ -519,6 +511,8 @@ static void rcu_scale_async_cb(struct rcu_head *rhp)
 	rcu_scale_free(wmbp);
 }
 
+static void rcu_scale_cleanup(void);
+
 /*
  * RCU scale writer kthread.  Repeatedly does a grace period.
  */
@@ -622,9 +616,11 @@ rcu_scale_writer(void *arg)
 					b_rcu_gp_test_finished =
 						cur_ops->get_gp_seq();
 				}
-				if (shutdown) {
+				if (shutdown_secs) {
+					writer_tasks[me] = NULL;
 					smp_mb(); /* Assign before wake. */
-					wake_up(&shutdown_wq);
+					rcu_scale_cleanup();
+					kernel_power_off();
 				}
 			}
 		}
@@ -668,8 +664,8 @@ static void
 rcu_scale_print_module_parms(struct rcu_scale_ops *cur_ops, const char *tag)
 {
 	pr_alert("%s" SCALE_FLAG
-		 "--- %s: gp_async=%d gp_async_max=%d gp_exp=%d holdoff=%d minruntime=%d nreaders=%d nwriters=%d writer_holdoff=%d writer_holdoff_jiffies=%d verbose=%d shutdown=%d\n",
-		 scale_type, tag, gp_async, gp_async_max, gp_exp, holdoff, minruntime, nrealreaders, nrealwriters, writer_holdoff, writer_holdoff_jiffies, verbose, shutdown);
+		 "--- %s: gp_async=%d gp_async_max=%d gp_exp=%d holdoff=%d minruntime=%d nreaders=%d nwriters=%d writer_holdoff=%d writer_holdoff_jiffies=%d verbose=%d shutdown_secs=%d\n",
+		 scale_type, tag, gp_async, gp_async_max, gp_exp, holdoff, minruntime, nrealreaders, nrealwriters, writer_holdoff, writer_holdoff_jiffies, verbose, shutdown_secs);
 }
 
 /*
@@ -722,6 +718,8 @@ static void kfree_call_rcu(struct rcu_head *rh)
 	kfree(obj);
 }
 
+static void kfree_scale_cleanup(void);
+
 static int
 kfree_scale_thread(void *arg)
 {
@@ -791,9 +789,11 @@ kfree_scale_thread(void *arg)
 		       rcuscale_seq_diff(b_rcu_gp_test_finished, b_rcu_gp_test_started),
 		       PAGES_TO_MB(mem_begin - mem_during));
 
-		if (shutdown) {
+		if (shutdown_secs) {
+			kfree_reader_tasks[me] = NULL;
 			smp_mb(); /* Assign before wake. */
-			wake_up(&shutdown_wq);
+			kfree_scale_cleanup();
+			kernel_power_off();
 		}
 	}
 
@@ -820,22 +820,6 @@ kfree_scale_cleanup(void)
 	torture_cleanup_end();
 }
 
-/*
- * shutdown kthread.  Just waits to be awakened, then shuts down system.
- */
-static int
-kfree_scale_shutdown(void *arg)
-{
-	wait_event_idle(shutdown_wq,
-			atomic_read(&n_kfree_scale_thread_ended) >= kfree_nrealthreads);
-
-	smp_mb(); /* Wake before output. */
-
-	kfree_scale_cleanup();
-	kernel_power_off();
-	return -EINVAL;
-}
-
 // Used if doing RCU-kfree'ing via call_rcu().
 static unsigned long jiffies_at_lazy_cb;
 static struct rcu_head lazy_test1_rh;
@@ -895,13 +879,10 @@ kfree_scale_init(void)
 
 	kfree_nrealthreads = compute_real(kfree_nthreads);
 	/* Start up the kthreads. */
-	if (shutdown) {
-		init_waitqueue_head(&shutdown_wq);
-		firsterr = torture_create_kthread(kfree_scale_shutdown, NULL,
-						  shutdown_task);
+	if (shutdown_secs) {
+		firsterr = torture_shutdown_init(shutdown_secs, kfree_scale_cleanup);
 		if (torture_init_error(firsterr))
 			goto unwind;
-		schedule_timeout_uninterruptible(1);
 	}
 
 	pr_alert("kfree object size=%zu, kfree_by_call_rcu=%d\n",
@@ -1058,20 +1039,6 @@ rcu_scale_cleanup(void)
 	torture_cleanup_end();
 }
 
-/*
- * RCU scalability shutdown kthread.  Just waits to be awakened, then shuts
- * down system.
- */
-static int
-rcu_scale_shutdown(void *arg)
-{
-	wait_event_idle(shutdown_wq, atomic_read(&n_rcu_scale_writer_finished) >= nrealwriters);
-	smp_mb(); /* Wake before output. */
-	rcu_scale_cleanup();
-	kernel_power_off();
-	return -EINVAL;
-}
-
 static int __init
 rcu_scale_init(void)
 {
@@ -1121,13 +1088,10 @@ rcu_scale_init(void)
 
 	/* Start up the kthreads. */
 
-	if (shutdown) {
-		init_waitqueue_head(&shutdown_wq);
-		firsterr = torture_create_kthread(rcu_scale_shutdown, NULL,
-						  shutdown_task);
+	if (shutdown_secs) {
+		firsterr = torture_shutdown_init(shutdown_secs, rcu_scale_cleanup);
 		if (torture_init_error(firsterr))
 			goto unwind;
-		schedule_timeout_uninterruptible(1);
 	}
 	reader_tasks = kzalloc_objs(reader_tasks[0], nrealreaders);
 	if (reader_tasks == NULL) {
@@ -1201,7 +1165,7 @@ rcu_scale_init(void)
 unwind:
 	torture_init_end();
 	rcu_scale_cleanup();
-	if (shutdown) {
+	if (shutdown_secs) {
 		WARN_ON(!IS_MODULE(CONFIG_RCU_SCALE_TEST));
 		kernel_power_off();
 	}
-- 
cgit v1.2.3


From 3e3d7d8f3ad35deaf3f8150f66555ef54cf1754e Mon Sep 17 00:00:00 2001
From: Zqiang <qiang.zhang@linux.dev>
Date: Mon, 5 Jan 2026 09:19:51 +0800
Subject: rcu-tasks: Remove unnecessary smp_store_release() in
 cblist_init_generic()

The cblist_init_generic() is executed during the CPU early boot
phase due to commit:30ef09635b9e ("rcu-tasks: Initialize callback
lists at rcu_init() time"), at this time, only one boot CPU is
online and the irq is disabled. this commit therefore use routine
assignment replace of smp_store_release() and WRITE_ONCE() in the
cblist_init_generic().

Signed-off-by: Zqiang <qiang.zhang@linux.dev>
Reviewed-by: Paul E. McKenney <paulmck@kernel.org>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
---
 kernel/rcu/tasks.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index 2b55e6acf3c1..48f0d803c8e2 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -291,9 +291,9 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
 	shift = ilog2(rcu_task_cpu_ids / lim);
 	if (((rcu_task_cpu_ids - 1) >> shift) >= lim)
 		shift++;
-	WRITE_ONCE(rtp->percpu_enqueue_shift, shift);
-	WRITE_ONCE(rtp->percpu_dequeue_lim, lim);
-	smp_store_release(&rtp->percpu_enqueue_lim, lim);
+	rtp->percpu_enqueue_shift = shift;
+	rtp->percpu_dequeue_lim = lim;
+	rtp->percpu_enqueue_lim = lim;
 
 	pr_info("%s: Setting shift to %d and lim to %d rcu_task_cb_adjust=%d rcu_task_cpu_ids=%d.\n",
 			rtp->name, data_race(rtp->percpu_enqueue_shift), data_race(rtp->percpu_enqueue_lim),
-- 
cgit v1.2.3


From 18d01ff3b9812b785673689780bb3868c4c1e2fa Mon Sep 17 00:00:00 2001
From: Joel Fernandes <joelagnelf@nvidia.com>
Date: Sat, 3 Jan 2026 10:37:54 -0500
Subject: rcu/nocb: Consolidate rcu_nocb_cpu_offload/deoffload functions

The rcu_nocb_cpu_offload() and rcu_nocb_cpu_deoffload() functions are
nearly duplicates.

Therefore, extract the common logic into rcu_nocb_cpu_toggle_offload()
which takes an 'offload' boolean, and make both exported functions
simple wrappers.

This eliminates a bunch of duplicate code at the call sites, namely
mutex locking, CPU hotplug locking and CPU online checks.

Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
---
 kernel/rcu/tree_nocb.h | 70 +++++++++++++++++++++++++-------------------------
 1 file changed, 35 insertions(+), 35 deletions(-)

(limited to 'kernel')

diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index b3337c7231cc..d5e4d23090e8 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -1081,30 +1081,6 @@ static int rcu_nocb_rdp_deoffload(struct rcu_data *rdp)
 	return 0;
 }
 
-int rcu_nocb_cpu_deoffload(int cpu)
-{
-	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
-	int ret = 0;
-
-	cpus_read_lock();
-	mutex_lock(&rcu_state.nocb_mutex);
-	if (rcu_rdp_is_offloaded(rdp)) {
-		if (!cpu_online(cpu)) {
-			ret = rcu_nocb_rdp_deoffload(rdp);
-			if (!ret)
-				cpumask_clear_cpu(cpu, rcu_nocb_mask);
-		} else {
-			pr_info("NOCB: Cannot CB-deoffload online CPU %d\n", rdp->cpu);
-			ret = -EINVAL;
-		}
-	}
-	mutex_unlock(&rcu_state.nocb_mutex);
-	cpus_read_unlock();
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(rcu_nocb_cpu_deoffload);
-
 static bool rcu_nocb_rdp_offload_wait_cond(struct rcu_data *rdp)
 {
 	unsigned long flags;
@@ -1149,28 +1125,52 @@ static int rcu_nocb_rdp_offload(struct rcu_data *rdp)
 	return 0;
 }
 
-int rcu_nocb_cpu_offload(int cpu)
+/* Common helper for CPU offload/deoffload operations. */
+static int rcu_nocb_cpu_toggle_offload(int cpu, bool offload)
 {
 	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
 	int ret = 0;
 
 	cpus_read_lock();
 	mutex_lock(&rcu_state.nocb_mutex);
-	if (!rcu_rdp_is_offloaded(rdp)) {
-		if (!cpu_online(cpu)) {
-			ret = rcu_nocb_rdp_offload(rdp);
-			if (!ret)
-				cpumask_set_cpu(cpu, rcu_nocb_mask);
-		} else {
-			pr_info("NOCB: Cannot CB-offload online CPU %d\n", rdp->cpu);
-			ret = -EINVAL;
-		}
+
+	/* Already in desired state, nothing to do. */
+	if (rcu_rdp_is_offloaded(rdp) == offload)
+		goto out_unlock;
+
+	if (cpu_online(cpu)) {
+		pr_info("NOCB: Cannot CB-%soffload online CPU %d\n",
+			offload ? "" : "de", rdp->cpu);
+		ret = -EINVAL;
+		goto out_unlock;
 	}
+
+	if (offload) {
+		ret = rcu_nocb_rdp_offload(rdp);
+		if (!ret)
+			cpumask_set_cpu(cpu, rcu_nocb_mask);
+	} else {
+		ret = rcu_nocb_rdp_deoffload(rdp);
+		if (!ret)
+			cpumask_clear_cpu(cpu, rcu_nocb_mask);
+	}
+
+out_unlock:
 	mutex_unlock(&rcu_state.nocb_mutex);
 	cpus_read_unlock();
-
 	return ret;
 }
+
+int rcu_nocb_cpu_deoffload(int cpu)
+{
+	return rcu_nocb_cpu_toggle_offload(cpu, false /* de-offload */);
+}
+EXPORT_SYMBOL_GPL(rcu_nocb_cpu_deoffload);
+
+int rcu_nocb_cpu_offload(int cpu)
+{
+	return rcu_nocb_cpu_toggle_offload(cpu, true /* offload */);
+}
 EXPORT_SYMBOL_GPL(rcu_nocb_cpu_offload);
 
 #ifdef CONFIG_RCU_LAZY
-- 
cgit v1.2.3


From 2243517a5440caa635b945deb7915397ef39b29b Mon Sep 17 00:00:00 2001
From: Joel Fernandes <joelagnelf@nvidia.com>
Date: Sat, 3 Jan 2026 15:54:37 -0500
Subject: rcu/nocb: Extract nocb_bypass_needs_flush() to reduce duplication

The bypass flush decision logic is duplicated in rcu_nocb_try_bypass()
and nocb_gp_wait() with similar conditions.

This commit therefore extracts the functionality into a common helper
function nocb_bypass_needs_flush() improving the code readability.

A flush_faster parameter is added to controlling the flushing thresholds
and timeouts. This design was in the original commit d1b222c6be1f
("rcu/nocb: Add bypass callback queueing") to avoid having the GP
kthread aggressively flush the bypass queue.

Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
---
 kernel/rcu/tree_nocb.h | 51 ++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 37 insertions(+), 14 deletions(-)

(limited to 'kernel')

diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index d5e4d23090e8..1047b30cd46b 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -378,6 +378,38 @@ static void rcu_nocb_try_flush_bypass(struct rcu_data *rdp, unsigned long j)
 	WARN_ON_ONCE(!rcu_nocb_do_flush_bypass(rdp, NULL, j, false));
 }
 
+/*
+ * Determine if the bypass queue needs to be flushed based on time and size.
+ * For lazy-only bypass queues, use the lazy flush timeout; otherwise flush
+ * based on jiffy advancement. The flush_faster controls flush aggressiveness.
+ */
+static bool nocb_bypass_needs_flush(struct rcu_data *rdp, long bypass_ncbs,
+				    long lazy_ncbs, unsigned long j,
+				    bool flush_faster)
+{
+	bool bypass_is_lazy;
+	unsigned long bypass_first;
+	unsigned long flush_timeout;
+	long qhimark_thresh;
+
+	if (!bypass_ncbs)
+		return false;
+
+	qhimark_thresh = flush_faster ? qhimark : 2 * qhimark;
+	if (bypass_ncbs >= qhimark_thresh)
+		return true;
+
+	bypass_first = READ_ONCE(rdp->nocb_bypass_first);
+	bypass_is_lazy = (bypass_ncbs == lazy_ncbs);
+
+	if (bypass_is_lazy)
+		flush_timeout = rcu_get_jiffies_lazy_flush();
+	else
+		flush_timeout = flush_faster ? 0 : 1;
+
+	return time_after(j, bypass_first + flush_timeout);
+}
+
 /*
  * See whether it is appropriate to use the ->nocb_bypass list in order
  * to control contention on ->nocb_lock.  A limited number of direct
@@ -404,7 +436,8 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
 	unsigned long cur_gp_seq;
 	unsigned long j = jiffies;
 	long ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
-	bool bypass_is_lazy = (ncbs == READ_ONCE(rdp->lazy_len));
+	long lazy_len = READ_ONCE(rdp->lazy_len);
+	bool bypass_is_lazy = (ncbs == lazy_len);
 
 	lockdep_assert_irqs_disabled();
 
@@ -456,10 +489,7 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
 
 	// If ->nocb_bypass has been used too long or is too full,
 	// flush ->nocb_bypass to ->cblist.
-	if ((ncbs && !bypass_is_lazy && j != READ_ONCE(rdp->nocb_bypass_first)) ||
-	    (ncbs &&  bypass_is_lazy &&
-	     (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + rcu_get_jiffies_lazy_flush()))) ||
-	    ncbs >= qhimark) {
+	if (nocb_bypass_needs_flush(rdp, ncbs, lazy_len, j, true)) {
 		rcu_nocb_lock(rdp);
 		*was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
 
@@ -673,15 +703,8 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
 		bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
 		lazy_ncbs = READ_ONCE(rdp->lazy_len);
 
-		if (bypass_ncbs && (lazy_ncbs == bypass_ncbs) &&
-		    (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + rcu_get_jiffies_lazy_flush()) ||
-		     bypass_ncbs > 2 * qhimark)) {
-			flush_bypass = true;
-		} else if (bypass_ncbs && (lazy_ncbs != bypass_ncbs) &&
-		    (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + 1) ||
-		     bypass_ncbs > 2 * qhimark)) {
-			flush_bypass = true;
-		} else if (!bypass_ncbs && rcu_segcblist_empty(&rdp->cblist)) {
+		flush_bypass = nocb_bypass_needs_flush(rdp, bypass_ncbs, lazy_ncbs, j, false);
+		if (!flush_bypass && !bypass_ncbs && rcu_segcblist_empty(&rdp->cblist)) {
 			rcu_nocb_unlock_irqrestore(rdp, flags);
 			continue; /* No callbacks here, try next. */
 		}
-- 
cgit v1.2.3


From a18396219ba52b524d8b86bf9e2515b01c068614 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Wed, 4 Mar 2026 15:40:39 -0800
Subject: torture: Avoid modulo-zero error in torture_hrtimeout_ns()

Currently, all calls to torture_hrtimeout_ns() either provide a non-zero
fuzzt_ns or a NULL trsp, either of which avoids taking the modulus of a
zero-valued fuzzt_ns.  But this code should do a better job of defending
itself, so this commit explicitly checks fuzzt_ns and avoids the modulus
when its value is zero.

Reviewed-by: Joel Fernandes <joelagnelf@nvidia.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
---
 kernel/torture.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/torture.c b/kernel/torture.c
index ec3370986976..62c1ac777694 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -93,7 +93,7 @@ int torture_hrtimeout_ns(ktime_t baset_ns, u32 fuzzt_ns, const enum hrtimer_mode
 {
 	ktime_t hto = baset_ns;
 
-	if (trsp)
+	if (trsp && fuzzt_ns)
 		hto += torture_random(trsp) % fuzzt_ns;
 	set_current_state(TASK_IDLE);
 	return schedule_hrtimeout(&hto, mode);
-- 
cgit v1.2.3


From ab875b3e179ff7ca2a982bc14f7fe810862c7594 Mon Sep 17 00:00:00 2001
From: Gustavo Luiz Duarte <gustavold@gmail.com>
Date: Tue, 17 Mar 2026 17:41:17 -0400
Subject: rcu: Add BOOTPARAM_RCU_STALL_PANIC Kconfig option

Add a Kconfig option to set the default value of the
kernel.panic_on_rcu_stall sysctl, allowing the kernel to be built
with panic-on-RCU-stall enabled by default.

This is useful for high-availability systems that require automatic
recovery (via panic_timeout) when a CPU stall is detected, without
needing userspace to configure the sysctl at boot.

This follows the pattern established by BOOTPARAM_SOFTLOCKUP_PANIC
and BOOTPARAM_HUNG_TASK_PANIC.  The runtime sysctl can still override
the Kconfig default.

Reviewed-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Gustavo Luiz Duarte <gustavold@gmail.com>
Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
---
 kernel/rcu/Kconfig.debug | 24 ++++++++++++++++++++++++
 kernel/rcu/tree_stall.h  |  2 +-
 2 files changed, 25 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/rcu/Kconfig.debug b/kernel/rcu/Kconfig.debug
index e078e988773d..35218ba74eb5 100644
--- a/kernel/rcu/Kconfig.debug
+++ b/kernel/rcu/Kconfig.debug
@@ -175,6 +175,30 @@ config RCU_CPU_STALL_NOTIFIER
 	  Say Y here if you want RCU CPU stall notifiers (you don't want them)
 	  Say N if you are unsure.
 
+config BOOTPARAM_RCU_STALL_PANIC
+	bool "Panic (reboot) on RCU CPU stall"
+	depends on RCU_STALL_COMMON
+	default n
+	help
+	  Say Y here to enable the kernel to panic when an RCU CPU stall
+	  is detected.
+
+	  The panic can be used in combination with panic_timeout,
+	  to cause the system to reboot automatically after an
+	  RCU CPU stall has been detected. This feature is useful for
+	  high-availability systems that have uptime guarantees and
+	  where a CPU stall must be resolved ASAP.
+
+	  The kernel.max_rcu_stall_to_panic sysctl can be used to set
+	  a minimum number of stalls before panicking, allowing the
+	  system to tolerate a given number of RCU CPU stalls before
+	  triggering a panic.
+
+	  This setting can be overridden at runtime via the
+	  kernel.panic_on_rcu_stall sysctl parameter.
+
+	  Say N if unsure.
+
 config RCU_TRACE
 	bool "Enable tracing for RCU"
 	depends on DEBUG_KERNEL
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index b67532cb8770..43ddabf46b5e 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -17,7 +17,7 @@
 // Controlling CPU stall warnings, including delay calculation.
 
 /* panic() on RCU Stall sysctl. */
-static int sysctl_panic_on_rcu_stall __read_mostly;
+static int sysctl_panic_on_rcu_stall __read_mostly = IS_ENABLED(CONFIG_BOOTPARAM_RCU_STALL_PANIC);
 static int sysctl_max_rcu_stall_to_panic __read_mostly;
 
 static const struct ctl_table rcu_stall_sysctl_table[] = {
-- 
cgit v1.2.3


From 95c7d025cc8c3c6c41206e2a18332eb04878b7ef Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Sat, 14 Mar 2026 06:18:48 -0700
Subject: rcutorture: Test call_srcu() with preemption disabled and not

This commit tests invoking call_srcu() with preemption both enabled
and disabled, via acquiring of pi lock.

[ Joel: reword commit message. ]

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
---
 kernel/rcu/rcutorture.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'kernel')

diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 3c272413666b..5f2848b828dc 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -842,7 +842,14 @@ static unsigned long srcu_torture_completed(void)
 
 static void srcu_torture_deferred_free(struct rcu_torture *rp)
 {
+	unsigned long flags;
+	bool lockit = jiffies & 0x1;
+
+	if (lockit)
+		raw_spin_lock_irqsave(&current->pi_lock, flags);
 	call_srcu(srcu_ctlp, &rp->rtort_rcu, rcu_torture_cb);
+	if (lockit)
+		raw_spin_unlock_irqrestore(&current->pi_lock, flags);
 }
 
 static void srcu_torture_synchronize(void)
-- 
cgit v1.2.3