Merge tag 'rcu-next-v6.15' of git://git.kernel.org/pub/scm/linux/kernel/git/rcu/linux

Pull RCU updates from Boqun Feng: "Documentation: - Add broken-timing possibility to stallwarn.rst - Improve discussion of this_cpu_ptr(), add raw_cpu_ptr() - Document self-propagating callbacks - Point call_srcu() to call_rcu() for detailed memory ordering - Add CONFIG_RCU_LAZY delays to call_rcu() kernel-doc header - Clarify RCU_LAZY and RCU_LAZY_DEFAULT_OFF help text - Remove references to old grace-period-wait primitives srcu: - Introduce srcu_read_{un,}lock_fast(), which is similar to srcu_read_{un,}lock_lite(): avoid smp_mb()s in lock and unlock at the cost of calling synchronize_rcu() in synchronize_srcu() Moreover, by returning the percpu offset of the counter at srcu_read_lock_fast() time, srcu_read_unlock_fast() can avoid extra pointer dereferencing, which makes it faster than srcu_read_{un,}lock_lite() srcu_read_{un,}lock_fast() are intended to replace rcu_read_{un,}lock_trace() if possible RCU torture: - Add get_torture_init_jiffies() to return the start time of the test - Add a test_boost_holdoff module parameter to allow delaying boosting tests when building rcutorture as built-in - Add grace period sequence number logging at the beginning and end of failure/close-call results - Switch to hexadecimal for the expedited grace period sequence number in the rcu_exp_grace_period trace point - Make cur_ops->format_gp_seqs take buffer length - Move RCU_TORTURE_TEST_{CHK_RDR_STATE,LOG_CPU} to bool - Complain when invalid SRCU reader_flavor is specified - Add FORCE_NEED_SRCU_NMI_SAFE Kconfig for testing, which forces SRCU uses atomics even when percpu ops are NMI safe, and use the Kconfig for SRCU lockdep testing Misc: - Split rcu_report_exp_cpu_mult() mask parameter and use for tracing - Remove READ_ONCE() for rdp->gpwrap access in __note_gp_changes() - Fix get_state_synchronize_rcu_full() GP-start detection - Move RCU Tasks self-tests to core_initcall() - Print segment lengths in show_rcu_nocb_gp_state() - Make RCU watch ct_kernel_exit_state() warning - Flush console log from kernel_power_off() - rcutorture: Allow a negative value for nfakewriters - rcu: Update TREE05.boot to test normal synchronize_rcu() - rcu: Use _full() API to debug synchronize_rcu() Make RCU handle PREEMPT_LAZY better: - Fix header guard for rcu_all_qs() - rcu: Rename PREEMPT_AUTO to PREEMPT_LAZY - Update __cond_resched comment about RCU quiescent states - Handle unstable rdp in rcu_read_unlock_strict() - Handle quiescent states for PREEMPT_RCU=n, PREEMPT_COUNT=y - osnoise: Provide quiescent states - Adjust rcutorture with possible PREEMPT_RCU=n && PREEMPT_COUNT=y combination - Limit PREEMPT_RCU configurations - Make rcutorture senario TREE07 and senario TREE10 use PREEMPT_LAZY=y" * tag 'rcu-next-v6.15' of git://git.kernel.org/pub/scm/linux/kernel/git/rcu/linux: (59 commits) rcutorture: Make scenario TREE07 build CONFIG_PREEMPT_LAZY=y rcutorture: Make scenario TREE10 build CONFIG_PREEMPT_LAZY=y rcu: limit PREEMPT_RCU configurations rcutorture: Update ->extendables check for lazy preemption rcutorture: Update rcutorture_one_extend_check() for lazy preemption osnoise: provide quiescent states rcu: Use _full() API to debug synchronize_rcu() rcu: Update TREE05.boot to test normal synchronize_rcu() rcutorture: Allow a negative value for nfakewriters Flush console log from kernel_power_off() context_tracking: Make RCU watch ct_kernel_exit_state() warning rcu/nocb: Print segment lengths in show_rcu_nocb_gp_state() rcu-tasks: Move RCU Tasks self-tests to core_initcall() rcu: Fix get_state_synchronize_rcu_full() GP-start detection torture: Make SRCU lockdep testing use srcu_read_lock_nmisafe() srcu: Add FORCE_NEED_SRCU_NMI_SAFE Kconfig for testing rcutorture: Complain when invalid SRCU reader_flavor is specified rcutorture: Move RCU_TORTURE_TEST_{CHK_RDR_STATE,LOG_CPU} to bool rcutorture: Make cur_ops->format_gp_seqs take buffer length rcutorture: Add ftrace-compatible timestamp to GP# failure/close-call output ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2025-03-24 19:41:37 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2025-03-24 19:41:37 -0700
commit: 3ba7dfb8da62c43ea02bc278863367c2b0427cc1 (patch)
tree: 57ea0b9f16027db228f77b64fb1fd60eb167c289 /kernel/rcu/tree.c
parent: 2f2d529458523f6d889a926623f4ddc7de4df063 (diff)
parent: 467c890f2d1ad6de9fd1dbd196fdc8f3ee63190a (diff)
1 files changed, 51 insertions, 13 deletions
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 5dbc4189037c..659f83e71048 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -538,6 +538,26 @@ void rcutorture_get_gp_data(int *flags, unsigned long *gp_seq)
 }
 EXPORT_SYMBOL_GPL(rcutorture_get_gp_data);
 
+/* Gather grace-period sequence numbers for rcutorture diagnostics. */
+unsigned long long rcutorture_gather_gp_seqs(void)
+{
+	return ((READ_ONCE(rcu_state.gp_seq) & 0xffffULL) << 40) |
+	       ((READ_ONCE(rcu_state.expedited_sequence) & 0xffffffULL) << 16) |
+	       (READ_ONCE(rcu_state.gp_seq_polled) & 0xffffULL);
+}
+EXPORT_SYMBOL_GPL(rcutorture_gather_gp_seqs);
+
+/* Format grace-period sequence numbers for rcutorture diagnostics. */
+void rcutorture_format_gp_seqs(unsigned long long seqs, char *cp, size_t len)
+{
+	unsigned int egp = (seqs >> 16) & 0xffffffULL;
+	unsigned int ggp = (seqs >> 40) & 0xffffULL;
+	unsigned int pgp = seqs & 0xffffULL;
+
+	snprintf(cp, len, "g%04x:e%06x:p%04x", ggp, egp, pgp);
+}
+EXPORT_SYMBOL_GPL(rcutorture_format_gp_seqs);
+
 #if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK))
 /*
  * An empty function that will trigger a reschedule on
@@ -1254,7 +1274,7 @@ static bool __note_gp_changes(struct rcu_node *rnp, struct rcu_data *rdp)
 
 	/* Handle the ends of any preceding grace periods first. */
 	if (rcu_seq_completed_gp(rdp->gp_seq, rnp->gp_seq) ||
-	    unlikely(READ_ONCE(rdp->gpwrap))) {
+	    unlikely(rdp->gpwrap)) {
 		if (!offloaded)
 			ret = rcu_advance_cbs(rnp, rdp); /* Advance CBs. */
 		rdp->core_needs_qs = false;
@@ -1268,7 +1288,7 @@ static bool __note_gp_changes(struct rcu_node *rnp, struct rcu_data *rdp)
 
 	/* Now handle the beginnings of any new-to-this-CPU grace periods. */
 	if (rcu_seq_new_gp(rdp->gp_seq, rnp->gp_seq) ||
-	    unlikely(READ_ONCE(rdp->gpwrap))) {
+	    unlikely(rdp->gpwrap)) {
 		/*
 		 * If the current grace period is waiting for this CPU,
 		 * set up to detect a quiescent state, otherwise don't
@@ -1283,7 +1303,7 @@ static bool __note_gp_changes(struct rcu_node *rnp, struct rcu_data *rdp)
 	rdp->gp_seq = rnp->gp_seq;  /* Remember new grace-period state. */
 	if (ULONG_CMP_LT(rdp->gp_seq_needed, rnp->gp_seq_needed) || rdp->gpwrap)
 		WRITE_ONCE(rdp->gp_seq_needed, rnp->gp_seq_needed);
-	if (IS_ENABLED(CONFIG_PROVE_RCU) && READ_ONCE(rdp->gpwrap))
+	if (IS_ENABLED(CONFIG_PROVE_RCU) && rdp->gpwrap)
 		WRITE_ONCE(rdp->last_sched_clock, jiffies);
 	WRITE_ONCE(rdp->gpwrap, false);
 	rcu_gpnum_ovf(rnp, rdp);
@@ -1612,12 +1632,10 @@ static void rcu_sr_normal_complete(struct llist_node *node)
 {
 	struct rcu_synchronize *rs = container_of(
 		(struct rcu_head *) node, struct rcu_synchronize, head);
-	unsigned long oldstate = (unsigned long) rs->head.func;
 
 	WARN_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) &&
-		!poll_state_synchronize_rcu(oldstate),
-		"A full grace period is not passed yet: %lu",
-		rcu_seq_diff(get_state_synchronize_rcu(), oldstate));
+		!poll_state_synchronize_rcu_full(&rs->oldstate),
+		"A full grace period is not passed yet!\n");
 
 	/* Finally. */
 	complete(&rs->completion);
@@ -1801,10 +1819,14 @@ static noinline_for_stack bool rcu_gp_init(void)
 
 	/* Advance to a new grace period and initialize state. */
 	record_gp_stall_check_time();
+	/*
+	 * A new wait segment must be started before gp_seq advanced, so
+	 * that previous gp waiters won't observe the new gp_seq.
+	 */
+	start_new_poll = rcu_sr_normal_gp_init();
 	/* Record GP times before starting GP, hence rcu_seq_start(). */
 	rcu_seq_start(&rcu_state.gp_seq);
 	ASSERT_EXCLUSIVE_WRITER(rcu_state.gp_seq);
-	start_new_poll = rcu_sr_normal_gp_init();
 	trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("start"));
 	rcu_poll_gp_seq_start(&rcu_state.gp_seq_polled_snap);
 	raw_spin_unlock_irq_rcu_node(rnp);
@@ -3102,7 +3124,7 @@ module_param(enable_rcu_lazy, bool, 0444);
  * critical sections have completed.
  *
  * Use this API instead of call_rcu() if you don't want the callback to be
- * invoked after very long periods of time, which can happen on systems without
+ * delayed for very long periods of time, which can happen on systems without
  * memory pressure and on systems which are lightly loaded or mostly idle.
  * This function will cause callbacks to be invoked sooner than later at the
  * expense of extra power. Other than that, this function is identical to, and
@@ -3133,6 +3155,12 @@ EXPORT_SYMBOL_GPL(call_rcu_hurry);
  * might well execute concurrently with RCU read-side critical sections
  * that started after call_rcu() was invoked.
  *
+ * It is perfectly legal to repost an RCU callback, potentially with
+ * a different callback function, from within its callback function.
+ * The specified function will be invoked after another full grace period
+ * has elapsed.  This use case is similar in form to the common practice
+ * of reposting a timer from within its own handler.
+ *
  * RCU read-side critical sections are delimited by rcu_read_lock()
  * and rcu_read_unlock(), and may be nested.  In addition, but only in
  * v5.0 and later, regions of code across which interrupts, preemption,
@@ -3161,6 +3189,13 @@ EXPORT_SYMBOL_GPL(call_rcu_hurry);
  *
  * Implementation of these memory-ordering guarantees is described here:
  * Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst.
+ *
+ * Specific to call_rcu() (as opposed to the other call_rcu*() functions),
+ * in kernels built with CONFIG_RCU_LAZY=y, call_rcu() might delay for many
+ * seconds before starting the grace period needed by the corresponding
+ * callback.  This delay can significantly improve energy-efficiency
+ * on low-utilization battery-powered devices.  To avoid this delay,
+ * in latency-sensitive kernel code, use call_rcu_hurry().
  */
 void call_rcu(struct rcu_head *head, rcu_callback_t func)
 {
@@ -3209,7 +3244,7 @@ static void synchronize_rcu_normal(void)
 	 * snapshot before adding a request.
 	 */
 	if (IS_ENABLED(CONFIG_PROVE_RCU))
-		rs.head.func = (void *) get_state_synchronize_rcu();
+		get_state_synchronize_rcu_full(&rs.oldstate);
 
 	rcu_sr_normal_add_req(&rs);
 
@@ -3352,14 +3387,17 @@ EXPORT_SYMBOL_GPL(get_state_synchronize_rcu);
  */
 void get_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp)
 {
-	struct rcu_node *rnp = rcu_get_root();
-
 	/*
 	 * Any prior manipulation of RCU-protected data must happen
 	 * before the loads from ->gp_seq and ->expedited_sequence.
 	 */
 	smp_mb();  /* ^^^ */
-	rgosp->rgos_norm = rcu_seq_snap(&rnp->gp_seq);
+
+	// Yes, rcu_state.gp_seq, not rnp_root->gp_seq, the latter's use
+	// in poll_state_synchronize_rcu_full() notwithstanding.  Use of
+	// the latter here would result in too-short grace periods due to
+	// interactions with newly onlined CPUs.
+	rgosp->rgos_norm = rcu_seq_snap(&rcu_state.gp_seq);
 	rgosp->rgos_exp = rcu_seq_snap(&rcu_state.expedited_sequence);
 }
 EXPORT_SYMBOL_GPL(get_state_synchronize_rcu_full);
author	Linus Torvalds <torvalds@linux-foundation.org>	2025-03-24 19:41:37 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2025-03-24 19:41:37 -0700
commit	3ba7dfb8da62c43ea02bc278863367c2b0427cc1 (patch)
tree	57ea0b9f16027db228f77b64fb1fd60eb167c289 /kernel/rcu/tree.c
parent	2f2d529458523f6d889a926623f4ddc7de4df063 (diff)
parent	467c890f2d1ad6de9fd1dbd196fdc8f3ee63190a (diff)