From 15c7c972cd26d89a26788e609c53b5a465324a6c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 7 Oct 2019 18:53:18 -0700 Subject: rcu: Use *_ONCE() to protect lockless ->expmask accesses The rcu_node structure's ->expmask field is accessed locklessly when starting a new expedited grace period and when reporting an expedited RCU CPU stall warning. This commit therefore handles the former by taking a snapshot of ->expmask while the lock is held and the latter by applying READ_ONCE() to lockless reads and WRITE_ONCE() to the corresponding updates. Link: https://lore.kernel.org/lkml/CANpmjNNmSOagbTpffHr4=Yedckx9Rm2NuGqC9UqE+AOz5f1-ZQ@mail.gmail.com Reported-by: syzbot+134336b86f728d6e55a0@syzkaller.appspotmail.com Signed-off-by: Paul E. McKenney Acked-by: Marco Elver --- kernel/rcu/tree_exp.h | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index d632cd019597..69c5aa64fcfd 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -134,7 +134,7 @@ static void __maybe_unused sync_exp_reset_tree(void) rcu_for_each_node_breadth_first(rnp) { raw_spin_lock_irqsave_rcu_node(rnp, flags); WARN_ON_ONCE(rnp->expmask); - rnp->expmask = rnp->expmaskinit; + WRITE_ONCE(rnp->expmask, rnp->expmaskinit); raw_spin_unlock_irqrestore_rcu_node(rnp, flags); } } @@ -211,7 +211,7 @@ static void __rcu_report_exp_rnp(struct rcu_node *rnp, rnp = rnp->parent; raw_spin_lock_rcu_node(rnp); /* irqs already disabled */ WARN_ON_ONCE(!(rnp->expmask & mask)); - rnp->expmask &= ~mask; + WRITE_ONCE(rnp->expmask, rnp->expmask & ~mask); } } @@ -241,7 +241,7 @@ static void rcu_report_exp_cpu_mult(struct rcu_node *rnp, raw_spin_unlock_irqrestore_rcu_node(rnp, flags); return; } - rnp->expmask &= ~mask; + WRITE_ONCE(rnp->expmask, rnp->expmask & ~mask); __rcu_report_exp_rnp(rnp, wake, flags); /* Releases rnp->lock. */ } @@ -372,12 +372,10 @@ static void sync_rcu_exp_select_node_cpus(struct work_struct *wp) raw_spin_unlock_irqrestore_rcu_node(rnp, flags); /* IPI the remaining CPUs for expedited quiescent state. */ - for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) { + for_each_leaf_node_cpu_mask(rnp, cpu, mask_ofl_ipi) { unsigned long mask = leaf_node_cpu_bit(rnp, cpu); struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); - if (!(mask_ofl_ipi & mask)) - continue; retry_ipi: if (rcu_dynticks_in_eqs_since(rdp, rdp->exp_dynticks_snap)) { mask_ofl_test |= mask; @@ -491,7 +489,7 @@ static void synchronize_sched_expedited_wait(void) struct rcu_data *rdp; mask = leaf_node_cpu_bit(rnp, cpu); - if (!(rnp->expmask & mask)) + if (!(READ_ONCE(rnp->expmask) & mask)) continue; ndetected++; rdp = per_cpu_ptr(&rcu_data, cpu); @@ -503,7 +501,8 @@ static void synchronize_sched_expedited_wait(void) } pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n", jiffies - jiffies_start, rcu_state.expedited_sequence, - rnp_root->expmask, ".T"[!!rnp_root->exp_tasks]); + READ_ONCE(rnp_root->expmask), + ".T"[!!rnp_root->exp_tasks]); if (ndetected) { pr_err("blocking rcu_node structures:"); rcu_for_each_node_breadth_first(rnp) { @@ -513,7 +512,7 @@ static void synchronize_sched_expedited_wait(void) continue; pr_cont(" l=%u:%d-%d:%#lx/%c", rnp->level, rnp->grplo, rnp->grphi, - rnp->expmask, + READ_ONCE(rnp->expmask), ".T"[!!rnp->exp_tasks]); } pr_cont("\n"); @@ -521,7 +520,7 @@ static void synchronize_sched_expedited_wait(void) rcu_for_each_leaf_node(rnp) { for_each_leaf_node_possible_cpu(rnp, cpu) { mask = leaf_node_cpu_bit(rnp, cpu); - if (!(rnp->expmask & mask)) + if (!(READ_ONCE(rnp->expmask) & mask)) continue; dump_cpu_task(cpu); } -- cgit v1.2.3 From 9f08cf088676c12a5b53bd5a29cf04f00c787b5d Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Tue, 8 Oct 2019 13:01:40 +0800 Subject: rcu: Avoid modifying mask_ofl_ipi in sync_rcu_exp_select_node_cpus() The "mask_ofl_ipi" is used to track which CPUs get IPIed, however in the IPI sending loop, "mask_ofl_ipi" along with another variable "mask_ofl_test" might also get modified to record which CPUs' quiesent states must be reported by the sync_rcu_exp_select_node_cpus() at the end of sync_rcu_exp_select_node_cpus(). This overlap of roles can be confusing, so this patch cleans things a little by using "mask_ofl_ipi" solely for determining which CPUs must be IPIed and "mask_ofl_test" for solely determining on behalf of which CPUs sync_rcu_exp_select_node_cpus() must report a quiscent state. Signed-off-by: Boqun Feng Signed-off-by: Paul E. McKenney Reviewed-by: Joel Fernandes (Google) Acked-by: Marco Elver --- kernel/rcu/tree_exp.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 69c5aa64fcfd..6a6f328a5f52 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -387,10 +387,10 @@ retry_ipi: } ret = smp_call_function_single(cpu, rcu_exp_handler, NULL, 0); put_cpu(); - if (!ret) { - mask_ofl_ipi &= ~mask; + /* The CPU will report the QS in response to the IPI. */ + if (!ret) continue; - } + /* Failed, raced with CPU hotplug operation. */ raw_spin_lock_irqsave_rcu_node(rnp, flags); if ((rnp->qsmaskinitnext & mask) && @@ -401,13 +401,12 @@ retry_ipi: schedule_timeout_uninterruptible(1); goto retry_ipi; } - /* CPU really is offline, so we can ignore it. */ - if (!(rnp->expmask & mask)) - mask_ofl_ipi &= ~mask; + /* CPU really is offline, so we must report its QS. */ + if (rnp->expmask & mask) + mask_ofl_test |= mask; raw_spin_unlock_irqrestore_rcu_node(rnp, flags); } /* Report quiescent states for those that went offline. */ - mask_ofl_test |= mask_ofl_ipi; if (mask_ofl_test) rcu_report_exp_cpu_mult(rnp, mask_ofl_test, false); } -- cgit v1.2.3 From 6cf539a87a61a4fbc43f625267dbcbcf283872ed Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Wed, 9 Oct 2019 17:57:43 +0200 Subject: rcu: Fix data-race due to atomic_t copy-by-value This fixes a data-race where `atomic_t dynticks` is copied by value. The copy is performed non-atomically, resulting in a data-race if `dynticks` is updated concurrently. This data-race was found with KCSAN: ================================================================== BUG: KCSAN: data-race in dyntick_save_progress_counter / rcu_irq_enter write to 0xffff989dbdbe98e0 of 4 bytes by task 10 on cpu 3: atomic_add_return include/asm-generic/atomic-instrumented.h:78 [inline] rcu_dynticks_snap kernel/rcu/tree.c:310 [inline] dyntick_save_progress_counter+0x43/0x1b0 kernel/rcu/tree.c:984 force_qs_rnp+0x183/0x200 kernel/rcu/tree.c:2286 rcu_gp_fqs kernel/rcu/tree.c:1601 [inline] rcu_gp_fqs_loop+0x71/0x880 kernel/rcu/tree.c:1653 rcu_gp_kthread+0x22c/0x3b0 kernel/rcu/tree.c:1799 kthread+0x1b5/0x200 kernel/kthread.c:255 read to 0xffff989dbdbe98e0 of 4 bytes by task 154 on cpu 7: rcu_nmi_enter_common kernel/rcu/tree.c:828 [inline] rcu_irq_enter+0xda/0x240 kernel/rcu/tree.c:870 irq_enter+0x5/0x50 kernel/softirq.c:347 Reported by Kernel Concurrency Sanitizer on: CPU: 7 PID: 154 Comm: kworker/7:1H Not tainted 5.3.0+ #5 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014 Workqueue: kblockd blk_mq_run_work_fn ================================================================== Signed-off-by: Marco Elver Cc: Paul E. McKenney Cc: Josh Triplett Cc: Steven Rostedt Cc: Mathieu Desnoyers Cc: Joel Fernandes Cc: Ingo Molnar Cc: Dmitry Vyukov Cc: rcu@vger.kernel.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 1694a6b57ad8..6145e08a1407 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -577,7 +577,7 @@ static void rcu_eqs_enter(bool user) } lockdep_assert_irqs_disabled(); - trace_rcu_dyntick(TPS("Start"), rdp->dynticks_nesting, 0, rdp->dynticks); + trace_rcu_dyntick(TPS("Start"), rdp->dynticks_nesting, 0, atomic_read(&rdp->dynticks)); WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current)); rdp = this_cpu_ptr(&rcu_data); do_nocb_deferred_wakeup(rdp); @@ -650,14 +650,15 @@ static __always_inline void rcu_nmi_exit_common(bool irq) * leave it in non-RCU-idle state. */ if (rdp->dynticks_nmi_nesting != 1) { - trace_rcu_dyntick(TPS("--="), rdp->dynticks_nmi_nesting, rdp->dynticks_nmi_nesting - 2, rdp->dynticks); + trace_rcu_dyntick(TPS("--="), rdp->dynticks_nmi_nesting, rdp->dynticks_nmi_nesting - 2, + atomic_read(&rdp->dynticks)); WRITE_ONCE(rdp->dynticks_nmi_nesting, /* No store tearing. */ rdp->dynticks_nmi_nesting - 2); return; } /* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */ - trace_rcu_dyntick(TPS("Startirq"), rdp->dynticks_nmi_nesting, 0, rdp->dynticks); + trace_rcu_dyntick(TPS("Startirq"), rdp->dynticks_nmi_nesting, 0, atomic_read(&rdp->dynticks)); WRITE_ONCE(rdp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */ if (irq) @@ -744,7 +745,7 @@ static void rcu_eqs_exit(bool user) rcu_dynticks_task_exit(); rcu_dynticks_eqs_exit(); rcu_cleanup_after_idle(); - trace_rcu_dyntick(TPS("End"), rdp->dynticks_nesting, 1, rdp->dynticks); + trace_rcu_dyntick(TPS("End"), rdp->dynticks_nesting, 1, atomic_read(&rdp->dynticks)); WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current)); WRITE_ONCE(rdp->dynticks_nesting, 1); WARN_ON_ONCE(rdp->dynticks_nmi_nesting); @@ -833,7 +834,7 @@ static __always_inline void rcu_nmi_enter_common(bool irq) } trace_rcu_dyntick(incby == 1 ? TPS("Endirq") : TPS("++="), rdp->dynticks_nmi_nesting, - rdp->dynticks_nmi_nesting + incby, rdp->dynticks); + rdp->dynticks_nmi_nesting + incby, atomic_read(&rdp->dynticks)); WRITE_ONCE(rdp->dynticks_nmi_nesting, /* Prevent store tearing. */ rdp->dynticks_nmi_nesting + incby); barrier(); -- cgit v1.2.3 From aca2991a25da03ca96127b1d21e1f4aba41f81a6 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 30 Oct 2019 06:51:57 -0700 Subject: rcu: Substitute lookup for bit-twiddling in sync_rcu_exp_select_node_cpus() The code in sync_rcu_exp_select_node_cpus() calculates the current CPU's mask within its rcu_node structure's bitmasks, but this has already been computed in the ->grpmask field of that CPU's rcu_data structure. This commit therefore just uses this ->grpmask field. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_exp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 6a6f328a5f52..3b59c3ee42e5 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -345,8 +345,8 @@ static void sync_rcu_exp_select_node_cpus(struct work_struct *wp) /* Each pass checks a CPU for identity, offline, and idle. */ mask_ofl_test = 0; for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) { - unsigned long mask = leaf_node_cpu_bit(rnp, cpu); struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); + unsigned long mask = rdp->grpmask; int snap; if (raw_smp_processor_id() == cpu || @@ -373,8 +373,8 @@ static void sync_rcu_exp_select_node_cpus(struct work_struct *wp) /* IPI the remaining CPUs for expedited quiescent state. */ for_each_leaf_node_cpu_mask(rnp, cpu, mask_ofl_ipi) { - unsigned long mask = leaf_node_cpu_bit(rnp, cpu); struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); + unsigned long mask = rdp->grpmask; retry_ipi: if (rcu_dynticks_in_eqs_since(rdp, rdp->exp_dynticks_snap)) { -- cgit v1.2.3 From fd6bc19d7676a060a171d1cf3dcbf6fd797eb05f Mon Sep 17 00:00:00 2001 From: Neeraj Upadhyay Date: Tue, 19 Nov 2019 03:17:07 +0000 Subject: rcu: Fix missed wakeup of exp_wq waiters Tasks waiting within exp_funnel_lock() for an expedited grace period to elapse can be starved due to the following sequence of events: 1. Tasks A and B both attempt to start an expedited grace period at about the same time. This grace period will have completed when the lower four bits of the rcu_state structure's ->expedited_sequence field are 0b'0100', for example, when the initial value of this counter is zero. Task A wins, and thus does the actual work of starting the grace period, including acquiring the rcu_state structure's .exp_mutex and sets the counter to 0b'0001'. 2. Because task B lost the race to start the grace period, it waits on ->expedited_sequence to reach 0b'0100' inside of exp_funnel_lock(). This task therefore blocks on the rcu_node structure's ->exp_wq[1] field, keeping in mind that the end-of-grace-period value of ->expedited_sequence (0b'0100') is shifted down two bits before indexing the ->exp_wq[] field. 3. Task C attempts to start another expedited grace period, but blocks on ->exp_mutex, which is still held by Task A. 4. The aforementioned expedited grace period completes, so that ->expedited_sequence now has the value 0b'0100'. A kworker task therefore acquires the rcu_state structure's ->exp_wake_mutex and starts awakening any tasks waiting for this grace period. 5. One of the first tasks awakened happens to be Task A. Task A therefore releases the rcu_state structure's ->exp_mutex, which allows Task C to start the next expedited grace period, which causes the lower four bits of the rcu_state structure's ->expedited_sequence field to become 0b'0101'. 6. Task C's expedited grace period completes, so that the lower four bits of the rcu_state structure's ->expedited_sequence field now become 0b'1000'. 7. The kworker task from step 4 above continues its wakeups. Unfortunately, the wake_up_all() refetches the rcu_state structure's .expedited_sequence field: wake_up_all(&rnp->exp_wq[rcu_seq_ctr(rcu_state.expedited_sequence) & 0x3]); This results in the wakeup being applied to the rcu_node structure's ->exp_wq[2] field, which is unfortunate given that Task B is instead waiting on ->exp_wq[1]. On a busy system, no harm is done (or at least no permanent harm is done). Some later expedited grace period will redo the wakeup. But on a quiet system, such as many embedded systems, it might be a good long time before there was another expedited grace period. On such embedded systems, this situation could therefore result in a system hang. This issue manifested as DPM device timeout during suspend (which usually qualifies as a quiet time) due to a SCSI device being stuck in _synchronize_rcu_expedited(), with the following stack trace: schedule() synchronize_rcu_expedited() synchronize_rcu() scsi_device_quiesce() scsi_bus_suspend() dpm_run_callback() __device_suspend() This commit therefore prevents such delays, timeouts, and hangs by making rcu_exp_wait_wake() use its "s" argument consistently instead of refetching from rcu_state.expedited_sequence. Fixes: 3b5f668e715b ("rcu: Overlap wakeups with next expedited grace period") Signed-off-by: Neeraj Upadhyay Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_exp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 3b59c3ee42e5..fa143e40cd93 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -557,7 +557,7 @@ static void rcu_exp_wait_wake(unsigned long s) spin_unlock(&rnp->exp_lock); } smp_mb(); /* All above changes before wakeup. */ - wake_up_all(&rnp->exp_wq[rcu_seq_ctr(rcu_state.expedited_sequence) & 0x3]); + wake_up_all(&rnp->exp_wq[rcu_seq_ctr(s) & 0x3]); } trace_rcu_exp_grace_period(rcu_state.name, s, TPS("endwake")); mutex_unlock(&rcu_state.exp_wake_mutex); -- cgit v1.2.3 From 4bc6b745e5cbefed92c48071e28a5f41246d0470 Mon Sep 17 00:00:00 2001 From: Neeraj Upadhyay Date: Tue, 19 Nov 2019 11:50:52 -0800 Subject: rcu: Allow only one expedited GP to run concurrently with wakeups The current expedited RCU grace-period code expects that a task requesting an expedited grace period cannot awaken until that grace period has reached the wakeup phase. However, it is possible for a long preemption to result in the waiting task never sleeping. For example, consider the following sequence of events: 1. Task A starts an expedited grace period by invoking synchronize_rcu_expedited(). It proceeds normally up to the wait_event() near the end of that function, and is then preempted (or interrupted or whatever). 2. The expedited grace period completes, and a kworker task starts the awaken phase, having incremented the counter and acquired the rcu_state structure's .exp_wake_mutex. This kworker task is then preempted or interrupted or whatever. 3. Task A resumes and enters wait_event(), which notes that the expedited grace period has completed, and thus doesn't sleep. 4. Task B starts an expedited grace period exactly as did Task A, complete with the preemption (or whatever delay) just before the call to wait_event(). 5. The expedited grace period completes, and another kworker task starts the awaken phase, having incremented the counter. However, it blocks when attempting to acquire the rcu_state structure's .exp_wake_mutex because step 2's kworker task has not yet released it. 6. Steps 4 and 5 repeat, resulting in overflow of the rcu_node structure's ->exp_wq[] array. In theory, this is harmless. Tasks waiting on the various ->exp_wq[] array will just be spuriously awakened, but they will just sleep again on noting that the rcu_state structure's ->expedited_sequence value has not advanced far enough. In practice, this wastes CPU time and is an accident waiting to happen. This commit therefore moves the rcu_exp_gp_seq_end() call that officially ends the expedited grace period (along with associate tracing) until after the ->exp_wake_mutex has been acquired. This prevents Task A from awakening prematurely, thus preventing more than one expedited grace period from being in flight during a previous expedited grace period's wakeup phase. Fixes: 3b5f668e715b ("rcu: Overlap wakeups with next expedited grace period") Signed-off-by: Neeraj Upadhyay [ paulmck: Added updated comment. ] Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_exp.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index fa143e40cd93..7a1f09376e62 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -539,14 +539,13 @@ static void rcu_exp_wait_wake(unsigned long s) struct rcu_node *rnp; synchronize_sched_expedited_wait(); - rcu_exp_gp_seq_end(); - trace_rcu_exp_grace_period(rcu_state.name, s, TPS("end")); - /* - * Switch over to wakeup mode, allowing the next GP, but -only- the - * next GP, to proceed. - */ + // Switch over to wakeup mode, allowing the next GP to proceed. + // End the previous grace period only after acquiring the mutex + // to ensure that only one GP runs concurrently with wakeups. mutex_lock(&rcu_state.exp_wake_mutex); + rcu_exp_gp_seq_end(); + trace_rcu_exp_grace_period(rcu_state.name, s, TPS("end")); rcu_for_each_node_breadth_first(rnp) { if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s)) { -- cgit v1.2.3 From 6c7d7dbf5b7f965eda0d39fbbb8fee005b08f340 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 27 Nov 2019 13:59:37 -0800 Subject: rcu: Rename sync_rcu_preempt_exp_done() to sync_rcu_exp_done() Now that the RCU flavors have been consolidated, there is one common function for checking to see if an expedited RCU grace period has completed, namely sync_rcu_preempt_exp_done(). Because this function is no longer specific to RCU-preempt, this commit removes the "_preempt" from its name. This commit also changes sync_rcu_preempt_exp_done_unlocked() to sync_rcu_exp_done_unlocked() for the same reason. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_exp.h | 19 +++++++++---------- kernel/rcu/tree_plugin.h | 4 ++-- 2 files changed, 11 insertions(+), 12 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 7a1f09376e62..3923c0743c3e 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -148,7 +148,7 @@ static void __maybe_unused sync_exp_reset_tree(void) * * Caller must hold the specificed rcu_node structure's ->lock */ -static bool sync_rcu_preempt_exp_done(struct rcu_node *rnp) +static bool sync_rcu_exp_done(struct rcu_node *rnp) { raw_lockdep_assert_held_rcu_node(rnp); @@ -157,17 +157,16 @@ static bool sync_rcu_preempt_exp_done(struct rcu_node *rnp) } /* - * Like sync_rcu_preempt_exp_done(), but this function assumes the caller - * doesn't hold the rcu_node's ->lock, and will acquire and release the lock - * itself + * Like sync_rcu_exp_done(), but this function assumes the caller doesn't + * hold the rcu_node's ->lock, and will acquire and release the lock itself */ -static bool sync_rcu_preempt_exp_done_unlocked(struct rcu_node *rnp) +static bool sync_rcu_exp_done_unlocked(struct rcu_node *rnp) { unsigned long flags; bool ret; raw_spin_lock_irqsave_rcu_node(rnp, flags); - ret = sync_rcu_preempt_exp_done(rnp); + ret = sync_rcu_exp_done(rnp); raw_spin_unlock_irqrestore_rcu_node(rnp, flags); return ret; @@ -191,7 +190,7 @@ static void __rcu_report_exp_rnp(struct rcu_node *rnp, unsigned long mask; for (;;) { - if (!sync_rcu_preempt_exp_done(rnp)) { + if (!sync_rcu_exp_done(rnp)) { if (!rnp->expmask) rcu_initiate_boost(rnp, flags); else @@ -471,9 +470,9 @@ static void synchronize_sched_expedited_wait(void) for (;;) { ret = swait_event_timeout_exclusive( rcu_state.expedited_wq, - sync_rcu_preempt_exp_done_unlocked(rnp_root), + sync_rcu_exp_done_unlocked(rnp_root), jiffies_stall); - if (ret > 0 || sync_rcu_preempt_exp_done_unlocked(rnp_root)) + if (ret > 0 || sync_rcu_exp_done_unlocked(rnp_root)) return; WARN_ON(ret < 0); /* workqueues should not be signaled. */ if (rcu_cpu_stall_suppress) @@ -507,7 +506,7 @@ static void synchronize_sched_expedited_wait(void) rcu_for_each_node_breadth_first(rnp) { if (rnp == rnp_root) continue; /* printed unconditionally */ - if (sync_rcu_preempt_exp_done_unlocked(rnp)) + if (sync_rcu_exp_done_unlocked(rnp)) continue; pr_cont(" l=%u:%d-%d:%#lx/%c", rnp->level, rnp->grplo, rnp->grphi, diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index fa08d55f7040..6dbea4bcf065 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -485,7 +485,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags) empty_norm = !rcu_preempt_blocked_readers_cgp(rnp); WARN_ON_ONCE(rnp->completedqs == rnp->gp_seq && (!empty_norm || rnp->qsmask)); - empty_exp = sync_rcu_preempt_exp_done(rnp); + empty_exp = sync_rcu_exp_done(rnp); smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ np = rcu_next_node_entry(t, rnp); list_del_init(&t->rcu_node_entry); @@ -509,7 +509,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags) * Note that rcu_report_unblock_qs_rnp() releases rnp->lock, * so we must take a snapshot of the expedited state. */ - empty_exp_now = sync_rcu_preempt_exp_done(rnp); + empty_exp_now = sync_rcu_exp_done(rnp); if (!empty_norm && !rcu_preempt_blocked_readers_cgp(rnp)) { trace_rcu_quiescent_state_report(TPS("preempt_rcu"), rnp->gp_seq, -- cgit v1.2.3 From de8cd0a533bfb57ff4ec6c85e3bdca013a5adcb7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 27 Nov 2019 14:20:41 -0800 Subject: rcu: Update tree_exp.h function-header comments The function-header comments in kernel/rcu/tree_exp.h have gotten a bit out of date, so this commit updates a number of them. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_exp.h | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 3923c0743c3e..1eafbcd56679 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -21,7 +21,7 @@ static void rcu_exp_gp_seq_start(void) } /* - * Return then value that expedited-grace-period counter will have + * Return the value that the expedited-grace-period counter will have * at the end of the current grace period. */ static __maybe_unused unsigned long rcu_exp_gp_seq_endval(void) @@ -39,7 +39,9 @@ static void rcu_exp_gp_seq_end(void) } /* - * Take a snapshot of the expedited-grace-period counter. + * Take a snapshot of the expedited-grace-period counter, which is the + * earliest value that will indicate that a full grace period has + * elapsed since the current time. */ static unsigned long rcu_exp_gp_seq_snap(void) { @@ -143,22 +145,18 @@ static void __maybe_unused sync_exp_reset_tree(void) * Return non-zero if there is no RCU expedited grace period in progress * for the specified rcu_node structure, in other words, if all CPUs and * tasks covered by the specified rcu_node structure have done their bit - * for the current expedited grace period. Works only for preemptible - * RCU -- other RCU implementation use other means. - * - * Caller must hold the specificed rcu_node structure's ->lock + * for the current expedited grace period. */ static bool sync_rcu_exp_done(struct rcu_node *rnp) { raw_lockdep_assert_held_rcu_node(rnp); - return rnp->exp_tasks == NULL && READ_ONCE(rnp->expmask) == 0; } /* - * Like sync_rcu_exp_done(), but this function assumes the caller doesn't - * hold the rcu_node's ->lock, and will acquire and release the lock itself + * Like sync_rcu_exp_done(), but where the caller does not hold the + * rcu_node's ->lock. */ static bool sync_rcu_exp_done_unlocked(struct rcu_node *rnp) { @@ -180,8 +178,6 @@ static bool sync_rcu_exp_done_unlocked(struct rcu_node *rnp) * which the task was queued or to one of that rcu_node structure's ancestors, * recursively up the tree. (Calm down, calm down, we do the recursion * iteratively!) - * - * Caller must hold the specified rcu_node structure's ->lock. */ static void __rcu_report_exp_rnp(struct rcu_node *rnp, bool wake, unsigned long flags) @@ -189,6 +185,7 @@ static void __rcu_report_exp_rnp(struct rcu_node *rnp, { unsigned long mask; + raw_lockdep_assert_held_rcu_node(rnp); for (;;) { if (!sync_rcu_exp_done(rnp)) { if (!rnp->expmask) @@ -452,6 +449,10 @@ static void sync_rcu_exp_select_cpus(void) flush_work(&rnp->rew.rew_work); } +/* + * Wait for the expedited grace period to elapse, issuing any needed + * RCU CPU stall warnings along the way. + */ static void synchronize_sched_expedited_wait(void) { int cpu; @@ -781,7 +782,7 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp) * implementations, it is still unfriendly to real-time workloads, so is * thus not recommended for any sort of common-case code. In fact, if * you are using synchronize_rcu_expedited() in a loop, please restructure - * your code to batch your updates, and then Use a single synchronize_rcu() + * your code to batch your updates, and then use a single synchronize_rcu() * instead. * * This has the same semantics as (but is more brutal than) synchronize_rcu(). -- cgit v1.2.3 From 28f0361fdfab267a392cd6a6401446c9ea64de95 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 27 Nov 2019 14:24:58 -0800 Subject: rcu: Replace synchronize_sched_expedited_wait() "_sched" with "_rcu" After RCU flavor consolidation, synchronize_sched_expedited_wait() does both RCU-preempt and RCU-sched, whichever happens to have been built into the running kernel. This commit therefore changes this function's name to synchronize_rcu_expedited_wait() to reflect its new generic nature. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_exp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 1eafbcd56679..081a17942e57 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -453,7 +453,7 @@ static void sync_rcu_exp_select_cpus(void) * Wait for the expedited grace period to elapse, issuing any needed * RCU CPU stall warnings along the way. */ -static void synchronize_sched_expedited_wait(void) +static void synchronize_rcu_expedited_wait(void) { int cpu; unsigned long jiffies_stall; @@ -538,7 +538,7 @@ static void rcu_exp_wait_wake(unsigned long s) { struct rcu_node *rnp; - synchronize_sched_expedited_wait(); + synchronize_rcu_expedited_wait(); // Switch over to wakeup mode, allowing the next GP to proceed. // End the previous grace period only after acquiring the mutex -- cgit v1.2.3 From df1e849ae4559544ff00ff5052eefe2479750539 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 27 Nov 2019 16:36:45 -0800 Subject: rcu: Enable tick for nohz_full CPUs slow to provide expedited QS An expedited grace period can be stalled by a nohz_full CPU looping in kernel context. This possibility is currently handled by some carefully crafted checks in rcu_read_unlock_special() that enlist help from ksoftirqd when permitted by the scheduler. However, it is exactly these checks that require the scheduler avoid holding any of its rq or pi locks across rcu_read_unlock() without also having held them across the entire RCU read-side critical section. It would therefore be very nice if expedited grace periods could handle nohz_full CPUs looping in kernel context without such checks. This commit therefore adds code to the expedited grace period's wait and cleanup code that forces the scheduler-clock interrupt on for CPUs that fail to quickly supply a quiescent state. "Quickly" is currently a hard-coded single-jiffy delay. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.h | 1 + kernel/rcu/tree_exp.h | 52 ++++++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 46 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 055c31781d3a..f9253ed406ba 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -182,6 +182,7 @@ struct rcu_data { bool rcu_need_heavy_qs; /* GP old, so heavy quiescent state! */ bool rcu_urgent_qs; /* GP old need light quiescent state. */ bool rcu_forced_tick; /* Forced tick to provide QS. */ + bool rcu_forced_tick_exp; /* ... provide QS to expedited GP. */ #ifdef CONFIG_RCU_FAST_NO_HZ bool all_lazy; /* All CPU's CBs lazy at idle start? */ unsigned long last_accelerate; /* Last jiffy CBs were accelerated. */ diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 081a17942e57..30b2a02aef39 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -230,7 +230,9 @@ static void __maybe_unused rcu_report_exp_rnp(struct rcu_node *rnp, bool wake) static void rcu_report_exp_cpu_mult(struct rcu_node *rnp, unsigned long mask, bool wake) { + int cpu; unsigned long flags; + struct rcu_data *rdp; raw_spin_lock_irqsave_rcu_node(rnp, flags); if (!(rnp->expmask & mask)) { @@ -238,6 +240,13 @@ static void rcu_report_exp_cpu_mult(struct rcu_node *rnp, return; } WRITE_ONCE(rnp->expmask, rnp->expmask & ~mask); + for_each_leaf_node_cpu_mask(rnp, cpu, mask) { + rdp = per_cpu_ptr(&rcu_data, cpu); + if (!IS_ENABLED(CONFIG_NO_HZ_FULL) || !rdp->rcu_forced_tick_exp) + continue; + rdp->rcu_forced_tick_exp = false; + tick_dep_clear_cpu(cpu, TICK_DEP_BIT_RCU_EXP); + } __rcu_report_exp_rnp(rnp, wake, flags); /* Releases rnp->lock. */ } @@ -449,6 +458,26 @@ static void sync_rcu_exp_select_cpus(void) flush_work(&rnp->rew.rew_work); } +/* + * Wait for the expedited grace period to elapse, within time limit. + * If the time limit is exceeded without the grace period elapsing, + * return false, otherwise return true. + */ +static bool synchronize_rcu_expedited_wait_once(long tlimit) +{ + int t; + struct rcu_node *rnp_root = rcu_get_root(); + + t = swait_event_timeout_exclusive(rcu_state.expedited_wq, + sync_rcu_exp_done_unlocked(rnp_root), + tlimit); + // Workqueues should not be signaled. + if (t > 0 || sync_rcu_exp_done_unlocked(rnp_root)) + return true; + WARN_ON(t < 0); /* workqueues should not be signaled. */ + return false; +} + /* * Wait for the expedited grace period to elapse, issuing any needed * RCU CPU stall warnings along the way. @@ -460,22 +489,31 @@ static void synchronize_rcu_expedited_wait(void) unsigned long jiffies_start; unsigned long mask; int ndetected; + struct rcu_data *rdp; struct rcu_node *rnp; struct rcu_node *rnp_root = rcu_get_root(); - int ret; trace_rcu_exp_grace_period(rcu_state.name, rcu_exp_gp_seq_endval(), TPS("startwait")); jiffies_stall = rcu_jiffies_till_stall_check(); jiffies_start = jiffies; + if (IS_ENABLED(CONFIG_NO_HZ_FULL)) { + if (synchronize_rcu_expedited_wait_once(1)) + return; + rcu_for_each_leaf_node(rnp) { + for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) { + rdp = per_cpu_ptr(&rcu_data, cpu); + if (rdp->rcu_forced_tick_exp) + continue; + rdp->rcu_forced_tick_exp = true; + tick_dep_set_cpu(cpu, TICK_DEP_BIT_RCU_EXP); + } + } + WARN_ON_ONCE(1); + } for (;;) { - ret = swait_event_timeout_exclusive( - rcu_state.expedited_wq, - sync_rcu_exp_done_unlocked(rnp_root), - jiffies_stall); - if (ret > 0 || sync_rcu_exp_done_unlocked(rnp_root)) + if (synchronize_rcu_expedited_wait_once(jiffies_stall)) return; - WARN_ON(ret < 0); /* workqueues should not be signaled. */ if (rcu_cpu_stall_suppress) continue; panic_on_rcu_stall(); -- cgit v1.2.3 From 610dea36d3083a977e4f156206cbe1eaa2a532f0 Mon Sep 17 00:00:00 2001 From: Stefan Reiter Date: Fri, 4 Oct 2019 19:49:10 +0000 Subject: rcu/nocb: Fix dump_tree hierarchy print always active Commit 18cd8c93e69e ("rcu/nocb: Print gp/cb kthread hierarchy if dump_tree") added print statements to rcu_organize_nocb_kthreads for debugging, but incorrectly guarded them, causing the function to always spew out its message. This patch fixes it by guarding both pr_alert statements with dump_tree, while also changing the second pr_alert to a pr_cont, to print the hierarchy in a single line (assuming that's how it was supposed to work). Fixes: 18cd8c93e69e ("rcu/nocb: Print gp/cb kthread hierarchy if dump_tree") Signed-off-by: Stefan Reiter [ paulmck: Make single-nocbs-CPU GP kthreads look less erroneous. ] Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index fa08d55f7040..758bfe1de536 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -2321,6 +2321,8 @@ static void __init rcu_organize_nocb_kthreads(void) { int cpu; bool firsttime = true; + bool gotnocbs = false; + bool gotnocbscbs = true; int ls = rcu_nocb_gp_stride; int nl = 0; /* Next GP kthread. */ struct rcu_data *rdp; @@ -2343,21 +2345,31 @@ static void __init rcu_organize_nocb_kthreads(void) rdp = per_cpu_ptr(&rcu_data, cpu); if (rdp->cpu >= nl) { /* New GP kthread, set up for CBs & next GP. */ + gotnocbs = true; nl = DIV_ROUND_UP(rdp->cpu + 1, ls) * ls; rdp->nocb_gp_rdp = rdp; rdp_gp = rdp; - if (!firsttime && dump_tree) - pr_cont("\n"); - firsttime = false; - pr_alert("%s: No-CB GP kthread CPU %d:", __func__, cpu); + if (dump_tree) { + if (!firsttime) + pr_cont("%s\n", gotnocbscbs + ? "" : " (self only)"); + gotnocbscbs = false; + firsttime = false; + pr_alert("%s: No-CB GP kthread CPU %d:", + __func__, cpu); + } } else { /* Another CB kthread, link to previous GP kthread. */ + gotnocbscbs = true; rdp->nocb_gp_rdp = rdp_gp; rdp_prev->nocb_next_cb_rdp = rdp; - pr_alert(" %d", cpu); + if (dump_tree) + pr_cont(" %d", cpu); } rdp_prev = rdp; } + if (gotnocbs && dump_tree) + pr_cont("%s\n", gotnocbscbs ? "" : " (self only)"); } /* -- cgit v1.2.3 From 6935c3983b246d5fbfebd3b891c825e65c118f2d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Oct 2019 14:21:54 -0700 Subject: rcu: Avoid data-race in rcu_gp_fqs_check_wake() The rcu_gp_fqs_check_wake() function uses rcu_preempt_blocked_readers_cgp() to read ->gp_tasks while other cpus might overwrite this field. We need READ_ONCE()/WRITE_ONCE() pairs to avoid compiler tricks and KCSAN splats like the following : BUG: KCSAN: data-race in rcu_gp_fqs_check_wake / rcu_preempt_deferred_qs_irqrestore write to 0xffffffff85a7f190 of 8 bytes by task 7317 on cpu 0: rcu_preempt_deferred_qs_irqrestore+0x43d/0x580 kernel/rcu/tree_plugin.h:507 rcu_read_unlock_special+0xec/0x370 kernel/rcu/tree_plugin.h:659 __rcu_read_unlock+0xcf/0xe0 kernel/rcu/tree_plugin.h:394 rcu_read_unlock include/linux/rcupdate.h:645 [inline] __ip_queue_xmit+0x3b0/0xa40 net/ipv4/ip_output.c:533 ip_queue_xmit+0x45/0x60 include/net/ip.h:236 __tcp_transmit_skb+0xdeb/0x1cd0 net/ipv4/tcp_output.c:1158 __tcp_send_ack+0x246/0x300 net/ipv4/tcp_output.c:3685 tcp_send_ack+0x34/0x40 net/ipv4/tcp_output.c:3691 tcp_cleanup_rbuf+0x130/0x360 net/ipv4/tcp.c:1575 tcp_recvmsg+0x633/0x1a30 net/ipv4/tcp.c:2179 inet_recvmsg+0xbb/0x250 net/ipv4/af_inet.c:838 sock_recvmsg_nosec net/socket.c:871 [inline] sock_recvmsg net/socket.c:889 [inline] sock_recvmsg+0x92/0xb0 net/socket.c:885 sock_read_iter+0x15f/0x1e0 net/socket.c:967 call_read_iter include/linux/fs.h:1864 [inline] new_sync_read+0x389/0x4f0 fs/read_write.c:414 read to 0xffffffff85a7f190 of 8 bytes by task 10 on cpu 1: rcu_gp_fqs_check_wake kernel/rcu/tree.c:1556 [inline] rcu_gp_fqs_check_wake+0x93/0xd0 kernel/rcu/tree.c:1546 rcu_gp_fqs_loop+0x36c/0x580 kernel/rcu/tree.c:1611 rcu_gp_kthread+0x143/0x220 kernel/rcu/tree.c:1768 kthread+0x1d4/0x200 drivers/block/aoe/aoecmd.c:1253 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:352 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 10 Comm: rcu_preempt Not tainted 5.3.0+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Signed-off-by: Eric Dumazet Reported-by: syzbot [ paulmck: Added another READ_ONCE() for RCU CPU stall warnings. ] Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 758bfe1de536..fe5f44811761 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -220,7 +220,7 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp) * blocked tasks. */ if (!rnp->gp_tasks && (blkd_state & RCU_GP_BLKD)) { - rnp->gp_tasks = &t->rcu_node_entry; + WRITE_ONCE(rnp->gp_tasks, &t->rcu_node_entry); WARN_ON_ONCE(rnp->completedqs == rnp->gp_seq); } if (!rnp->exp_tasks && (blkd_state & RCU_EXP_BLKD)) @@ -340,7 +340,7 @@ EXPORT_SYMBOL_GPL(rcu_note_context_switch); */ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp) { - return rnp->gp_tasks != NULL; + return READ_ONCE(rnp->gp_tasks) != NULL; } /* Bias and limit values for ->rcu_read_lock_nesting. */ @@ -493,7 +493,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags) trace_rcu_unlock_preempted_task(TPS("rcu_preempt"), rnp->gp_seq, t->pid); if (&t->rcu_node_entry == rnp->gp_tasks) - rnp->gp_tasks = np; + WRITE_ONCE(rnp->gp_tasks, np); if (&t->rcu_node_entry == rnp->exp_tasks) rnp->exp_tasks = np; if (IS_ENABLED(CONFIG_RCU_BOOST)) { @@ -663,7 +663,7 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) dump_blkd_tasks(rnp, 10); if (rcu_preempt_has_tasks(rnp) && (rnp->qsmaskinit || rnp->wait_blkd_tasks)) { - rnp->gp_tasks = rnp->blkd_tasks.next; + WRITE_ONCE(rnp->gp_tasks, rnp->blkd_tasks.next); t = container_of(rnp->gp_tasks, struct task_struct, rcu_node_entry); trace_rcu_unlock_preempted_task(TPS("rcu_preempt-GPS"), @@ -757,7 +757,8 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck) pr_info("%s: %d:%d ->qsmask %#lx ->qsmaskinit %#lx ->qsmaskinitnext %#lx\n", __func__, rnp1->grplo, rnp1->grphi, rnp1->qsmask, rnp1->qsmaskinit, rnp1->qsmaskinitnext); pr_info("%s: ->gp_tasks %p ->boost_tasks %p ->exp_tasks %p\n", - __func__, rnp->gp_tasks, rnp->boost_tasks, rnp->exp_tasks); + __func__, READ_ONCE(rnp->gp_tasks), rnp->boost_tasks, + rnp->exp_tasks); pr_info("%s: ->blkd_tasks", __func__); i = 0; list_for_each(lhp, &rnp->blkd_tasks) { -- cgit v1.2.3 From 03bd2983d7a9f898fd89f8f7215c3e56732d8ecd Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 10 Oct 2019 09:05:27 -0700 Subject: rcu: Use lockdep rather than comment to enforce lock held The rcu_preempt_check_blocked_tasks() function has a comment that states that the rcu_node structure's ->lock must be held, which might be informative, but which carries little weight if not read. This commit therefore removes this comment in favor of raw_lockdep_assert_held_rcu_node(), which will complain quite visibly if the required lock is not held. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index fe5f44811761..ed54d36465e2 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -648,8 +648,7 @@ static void rcu_read_unlock_special(struct task_struct *t) * Check that the list of blocked tasks for the newly completed grace * period is in fact empty. It is a serious bug to complete a grace * period that still has RCU readers blocked! This function must be - * invoked -before- updating this rnp's ->gp_seq, and the rnp's ->lock - * must be held by the caller. + * invoked -before- updating this rnp's ->gp_seq. * * Also, if there are blocked tasks on the list, they automatically * block the newly created grace period, so set up ->gp_tasks accordingly. @@ -659,6 +658,7 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) struct task_struct *t; RCU_LOCKDEP_WARN(preemptible(), "rcu_preempt_check_blocked_tasks() invoked with preemption enabled!!!\n"); + raw_lockdep_assert_held_rcu_node(rnp); if (WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp))) dump_blkd_tasks(rnp, 10); if (rcu_preempt_has_tasks(rnp) && -- cgit v1.2.3 From b3e627d3d5092a87fc9b9e37e341610cfecfbfdc Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 15 Oct 2019 02:55:57 +0000 Subject: rcu: Make PREEMPT_RCU be a modifier to TREE_RCU Currently PREEMPT_RCU and TREE_RCU are mutually exclusive Kconfig options. But PREEMPT_RCU actually specifies a kind of TREE_RCU, namely a preemptible TREE_RCU. This commit therefore makes PREEMPT_RCU be a modifer to the TREE_RCU Kconfig option. This has the benefit of simplifying several of the #if expressions that formerly needed to check both, but now need only check one or the other. Signed-off-by: Lai Jiangshan Signed-off-by: Lai Jiangshan Reviewed-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney --- kernel/rcu/Kconfig | 13 +++++++------ kernel/rcu/Makefile | 1 - kernel/rcu/rcu.h | 2 +- kernel/rcu/update.c | 2 +- kernel/sysctl.c | 2 +- 5 files changed, 10 insertions(+), 10 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig index 7644eda17d62..0303934e6ef0 100644 --- a/kernel/rcu/Kconfig +++ b/kernel/rcu/Kconfig @@ -7,7 +7,7 @@ menu "RCU Subsystem" config TREE_RCU bool - default y if !PREEMPTION && SMP + default y if SMP help This option selects the RCU implementation that is designed for very large SMP system with hundreds or @@ -17,6 +17,7 @@ config TREE_RCU config PREEMPT_RCU bool default y if PREEMPTION + select TREE_RCU help This option selects the RCU implementation that is designed for very large SMP systems with hundreds or @@ -78,7 +79,7 @@ config TASKS_RCU user-mode execution as quiescent states. config RCU_STALL_COMMON - def_bool ( TREE_RCU || PREEMPT_RCU ) + def_bool TREE_RCU help This option enables RCU CPU stall code that is common between the TINY and TREE variants of RCU. The purpose is to allow @@ -86,13 +87,13 @@ config RCU_STALL_COMMON making these warnings mandatory for the tree variants. config RCU_NEED_SEGCBLIST - def_bool ( TREE_RCU || PREEMPT_RCU || TREE_SRCU ) + def_bool ( TREE_RCU || TREE_SRCU ) config RCU_FANOUT int "Tree-based hierarchical RCU fanout value" range 2 64 if 64BIT range 2 32 if !64BIT - depends on (TREE_RCU || PREEMPT_RCU) && RCU_EXPERT + depends on TREE_RCU && RCU_EXPERT default 64 if 64BIT default 32 if !64BIT help @@ -112,7 +113,7 @@ config RCU_FANOUT_LEAF int "Tree-based hierarchical RCU leaf-level fanout value" range 2 64 if 64BIT range 2 32 if !64BIT - depends on (TREE_RCU || PREEMPT_RCU) && RCU_EXPERT + depends on TREE_RCU && RCU_EXPERT default 16 help This option controls the leaf-level fanout of hierarchical @@ -187,7 +188,7 @@ config RCU_BOOST_DELAY config RCU_NOCB_CPU bool "Offload RCU callback processing from boot-selected CPUs" - depends on TREE_RCU || PREEMPT_RCU + depends on TREE_RCU depends on RCU_EXPERT || NO_HZ_FULL default n help diff --git a/kernel/rcu/Makefile b/kernel/rcu/Makefile index 020e8b6a644b..82d5fba48b2f 100644 --- a/kernel/rcu/Makefile +++ b/kernel/rcu/Makefile @@ -9,6 +9,5 @@ obj-$(CONFIG_TINY_SRCU) += srcutiny.o obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o obj-$(CONFIG_RCU_PERF_TEST) += rcuperf.o obj-$(CONFIG_TREE_RCU) += tree.o -obj-$(CONFIG_PREEMPT_RCU) += tree.o obj-$(CONFIG_TINY_RCU) += tiny.o obj-$(CONFIG_RCU_NEED_SEGCBLIST) += rcu_segcblist.o diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index ab504fbc76ca..eabafde2349e 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -454,7 +454,7 @@ enum rcutorture_type { INVALID_RCU_FLAVOR }; -#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU) +#if defined(CONFIG_TREE_RCU) void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags, unsigned long *gp_seq); void do_trace_rcu_torture_read(const char *rcutorturename, diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 1861103662db..34a7452b25fd 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -435,7 +435,7 @@ struct debug_obj_descr rcuhead_debug_descr = { EXPORT_SYMBOL_GPL(rcuhead_debug_descr); #endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */ -#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU) || defined(CONFIG_RCU_TRACE) +#if defined(CONFIG_TREE_RCU) || defined(CONFIG_RCU_TRACE) void do_trace_rcu_torture_read(const char *rcutorturename, struct rcu_head *rhp, unsigned long secs, unsigned long c_old, unsigned long c) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 70665934d53e..d396aaaf19a3 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1268,7 +1268,7 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_do_static_key, }, #endif -#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU) +#if defined(CONFIG_TREE_RCU) { .procname = "panic_on_rcu_stall", .data = &sysctl_panic_on_rcu_stall, -- cgit v1.2.3 From 90326f0521a88004194f88f1b597b54347482b5c Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 15 Oct 2019 21:18:14 +0200 Subject: rcu: Use CONFIG_PREEMPTION where appropriate The config option `CONFIG_PREEMPT' is used for the preemption model "Low-Latency Desktop". The config option `CONFIG_PREEMPTION' is enabled when kernel preemption is enabled which is true for the preemption model `CONFIG_PREEMPT' and `CONFIG_PREEMPT_RT'. Use `CONFIG_PREEMPTION' if it applies to both preemption models and not just to `CONFIG_PREEMPT'. Cc: "Paul E. McKenney" Cc: Josh Triplett Cc: Steven Rostedt Cc: Mathieu Desnoyers Cc: Lai Jiangshan Cc: Joel Fernandes Cc: Davidlohr Bueso Cc: rcu@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Paul E. McKenney --- kernel/rcu/Kconfig | 4 ++-- kernel/rcu/rcutorture.c | 2 +- kernel/rcu/srcutiny.c | 2 +- kernel/rcu/tree.c | 4 ++-- kernel/rcu/tree_exp.h | 2 +- kernel/rcu/tree_plugin.h | 4 ++-- 6 files changed, 9 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig index 0303934e6ef0..1cc940fef17c 100644 --- a/kernel/rcu/Kconfig +++ b/kernel/rcu/Kconfig @@ -201,8 +201,8 @@ config RCU_NOCB_CPU specified at boot time by the rcu_nocbs parameter. For each such CPU, a kthread ("rcuox/N") will be created to invoke callbacks, where the "N" is the CPU being offloaded, and where - the "p" for RCU-preempt (PREEMPT kernels) and "s" for RCU-sched - (!PREEMPT kernels). Nothing prevents this kthread from running + the "p" for RCU-preempt (PREEMPTION kernels) and "s" for RCU-sched + (!PREEMPTION kernels). Nothing prevents this kthread from running on the specified CPUs, but (1) the kthreads may be preempted between each callback, and (2) affinity or cgroups can be used to force the kthreads to run on whatever set of CPUs is desired. diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index dee043feb71f..121a0507a7ce 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -1730,7 +1730,7 @@ static void rcu_torture_fwd_cb_cr(struct rcu_head *rhp) // Give the scheduler a chance, even on nohz_full CPUs. static void rcu_torture_fwd_prog_cond_resched(unsigned long iter) { - if (IS_ENABLED(CONFIG_PREEMPT) && IS_ENABLED(CONFIG_NO_HZ_FULL)) { + if (IS_ENABLED(CONFIG_PREEMPTION) && IS_ENABLED(CONFIG_NO_HZ_FULL)) { // Real call_rcu() floods hit userspace, so emulate that. if (need_resched() || (iter & 0xfff)) schedule(); diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c index 44d6606b8325..6208c1dae5c9 100644 --- a/kernel/rcu/srcutiny.c +++ b/kernel/rcu/srcutiny.c @@ -103,7 +103,7 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock); /* * Workqueue handler to drive one grace period and invoke any callbacks - * that become ready as a result. Single-CPU and !PREEMPT operation + * that become ready as a result. Single-CPU and !PREEMPTION operation * means that we get away with murder on synchronization. ;-) */ void srcu_drive_gp(struct work_struct *wp) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 1694a6b57ad8..c9dbb05e4c13 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2698,9 +2698,9 @@ EXPORT_SYMBOL_GPL(kfree_call_rcu); /* * During early boot, any blocking grace-period wait automatically - * implies a grace period. Later on, this is never the case for PREEMPT. + * implies a grace period. Later on, this is never the case for PREEMPTION. * - * Howevr, because a context switch is a grace period for !PREEMPT, any + * Howevr, because a context switch is a grace period for !PREEMPTION, any * blocking grace-period wait automatically implies a grace period if * there is only one CPU online at any point time during execution of * either synchronize_rcu() or synchronize_rcu_expedited(). It is OK to diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index d632cd019597..98d078cafa5a 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -670,7 +670,7 @@ static void rcu_exp_handler(void *unused) } } -/* PREEMPT=y, so no PREEMPT=n expedited grace period to clean up after. */ +/* PREEMPTION=y, so no PREEMPTION=n expedited grace period to clean up after. */ static void sync_sched_exp_online_cleanup(int cpu) { } diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index ed54d36465e2..8cdce111ea73 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -789,7 +789,7 @@ static void __init rcu_bootup_announce(void) } /* - * Note a quiescent state for PREEMPT=n. Because we do not need to know + * Note a quiescent state for PREEMPTION=n. Because we do not need to know * how many quiescent states passed, just if there was at least one since * the start of the grace period, this just sets a flag. The caller must * have disabled preemption. @@ -839,7 +839,7 @@ void rcu_all_qs(void) EXPORT_SYMBOL_GPL(rcu_all_qs); /* - * Note a PREEMPT=n context switch. The caller must have disabled interrupts. + * Note a PREEMPTION=n context switch. The caller must have disabled interrupts. */ void rcu_note_context_switch(bool preempt) { -- cgit v1.2.3 From a289e608b3e740c15f623148c26cdec2d6698ce0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 5 Nov 2019 08:31:56 -0800 Subject: rcutorture: Pull callback forward-progress data into rcu_fwd struct Now that RCU behaves reasonably well with the current single-kthread call_rcu() forward-progress testing, it is time to add more kthreads. This commit takes a first step towards that goal by wrapping what will be the per-kthread data into a new rcu_fwd structure. Signed-off-by: Paul E. McKenney --- kernel/rcu/rcutorture.c | 103 +++++++++++++++++++++++++++--------------------- 1 file changed, 58 insertions(+), 45 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index dee043feb71f..22a75a4b6b40 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -1663,23 +1663,34 @@ struct rcu_fwd_cb { struct rcu_fwd_cb *rfc_next; int rfc_gps; }; -static DEFINE_SPINLOCK(rcu_fwd_lock); -static struct rcu_fwd_cb *rcu_fwd_cb_head; -static struct rcu_fwd_cb **rcu_fwd_cb_tail = &rcu_fwd_cb_head; -static long n_launders_cb; -static unsigned long rcu_fwd_startat; -static bool rcu_fwd_emergency_stop; + #define MAX_FWD_CB_JIFFIES (8 * HZ) /* Maximum CB test duration. */ #define MIN_FWD_CB_LAUNDERS 3 /* This many CB invocations to count. */ #define MIN_FWD_CBS_LAUNDERED 100 /* Number of counted CBs. */ #define FWD_CBS_HIST_DIV 10 /* Histogram buckets/second. */ +#define N_LAUNDERS_HIST (2 * MAX_FWD_CB_JIFFIES / (HZ / FWD_CBS_HIST_DIV)) + struct rcu_launder_hist { long n_launders; unsigned long launder_gp_seq; }; -#define N_LAUNDERS_HIST (2 * MAX_FWD_CB_JIFFIES / (HZ / FWD_CBS_HIST_DIV)) -static struct rcu_launder_hist n_launders_hist[N_LAUNDERS_HIST]; -static unsigned long rcu_launder_gp_seq_start; + +struct rcu_fwd { + spinlock_t rcu_fwd_lock; + struct rcu_fwd_cb *rcu_fwd_cb_head; + struct rcu_fwd_cb **rcu_fwd_cb_tail; + long n_launders_cb; + unsigned long rcu_fwd_startat; + struct rcu_launder_hist n_launders_hist[N_LAUNDERS_HIST]; + unsigned long rcu_launder_gp_seq_start; +}; + +struct rcu_fwd rcu_fwds = { + .rcu_fwd_lock = __SPIN_LOCK_UNLOCKED(rcu_fwds.rcu_fwd_lock), + .rcu_fwd_cb_tail = &rcu_fwds.rcu_fwd_cb_head, +}; + +bool rcu_fwd_emergency_stop; static void rcu_torture_fwd_cb_hist(void) { @@ -1688,16 +1699,17 @@ static void rcu_torture_fwd_cb_hist(void) int i; int j; - for (i = ARRAY_SIZE(n_launders_hist) - 1; i > 0; i--) - if (n_launders_hist[i].n_launders > 0) + for (i = ARRAY_SIZE(rcu_fwds.n_launders_hist) - 1; i > 0; i--) + if (rcu_fwds.n_launders_hist[i].n_launders > 0) break; pr_alert("%s: Callback-invocation histogram (duration %lu jiffies):", - __func__, jiffies - rcu_fwd_startat); - gps_old = rcu_launder_gp_seq_start; + __func__, jiffies - rcu_fwds.rcu_fwd_startat); + gps_old = rcu_fwds.rcu_launder_gp_seq_start; for (j = 0; j <= i; j++) { - gps = n_launders_hist[j].launder_gp_seq; + gps = rcu_fwds.n_launders_hist[j].launder_gp_seq; pr_cont(" %ds/%d: %ld:%ld", - j + 1, FWD_CBS_HIST_DIV, n_launders_hist[j].n_launders, + j + 1, FWD_CBS_HIST_DIV, + rcu_fwds.n_launders_hist[j].n_launders, rcutorture_seq_diff(gps, gps_old)); gps_old = gps; } @@ -1714,17 +1726,17 @@ static void rcu_torture_fwd_cb_cr(struct rcu_head *rhp) rfcp->rfc_next = NULL; rfcp->rfc_gps++; - spin_lock_irqsave(&rcu_fwd_lock, flags); - rfcpp = rcu_fwd_cb_tail; - rcu_fwd_cb_tail = &rfcp->rfc_next; + spin_lock_irqsave(&rcu_fwds.rcu_fwd_lock, flags); + rfcpp = rcu_fwds.rcu_fwd_cb_tail; + rcu_fwds.rcu_fwd_cb_tail = &rfcp->rfc_next; WRITE_ONCE(*rfcpp, rfcp); - WRITE_ONCE(n_launders_cb, n_launders_cb + 1); - i = ((jiffies - rcu_fwd_startat) / (HZ / FWD_CBS_HIST_DIV)); - if (i >= ARRAY_SIZE(n_launders_hist)) - i = ARRAY_SIZE(n_launders_hist) - 1; - n_launders_hist[i].n_launders++; - n_launders_hist[i].launder_gp_seq = cur_ops->get_gp_seq(); - spin_unlock_irqrestore(&rcu_fwd_lock, flags); + WRITE_ONCE(rcu_fwds.n_launders_cb, rcu_fwds.n_launders_cb + 1); + i = ((jiffies - rcu_fwds.rcu_fwd_startat) / (HZ / FWD_CBS_HIST_DIV)); + if (i >= ARRAY_SIZE(rcu_fwds.n_launders_hist)) + i = ARRAY_SIZE(rcu_fwds.n_launders_hist) - 1; + rcu_fwds.n_launders_hist[i].n_launders++; + rcu_fwds.n_launders_hist[i].launder_gp_seq = cur_ops->get_gp_seq(); + spin_unlock_irqrestore(&rcu_fwds.rcu_fwd_lock, flags); } // Give the scheduler a chance, even on nohz_full CPUs. @@ -1751,16 +1763,16 @@ static unsigned long rcu_torture_fwd_prog_cbfree(void) struct rcu_fwd_cb *rfcp; for (;;) { - spin_lock_irqsave(&rcu_fwd_lock, flags); - rfcp = rcu_fwd_cb_head; + spin_lock_irqsave(&rcu_fwds.rcu_fwd_lock, flags); + rfcp = rcu_fwds.rcu_fwd_cb_head; if (!rfcp) { - spin_unlock_irqrestore(&rcu_fwd_lock, flags); + spin_unlock_irqrestore(&rcu_fwds.rcu_fwd_lock, flags); break; } - rcu_fwd_cb_head = rfcp->rfc_next; - if (!rcu_fwd_cb_head) - rcu_fwd_cb_tail = &rcu_fwd_cb_head; - spin_unlock_irqrestore(&rcu_fwd_lock, flags); + rcu_fwds.rcu_fwd_cb_head = rfcp->rfc_next; + if (!rcu_fwds.rcu_fwd_cb_head) + rcu_fwds.rcu_fwd_cb_tail = &rcu_fwds.rcu_fwd_cb_head; + spin_unlock_irqrestore(&rcu_fwds.rcu_fwd_lock, flags); kfree(rfcp); freed++; rcu_torture_fwd_prog_cond_resched(freed); @@ -1804,8 +1816,8 @@ static void rcu_torture_fwd_prog_nr(int *tested, int *tested_tries) sd = cur_ops->stall_dur() + 1; sd4 = (sd + fwd_progress_div - 1) / fwd_progress_div; dur = sd4 + torture_random(&trs) % (sd - sd4); - WRITE_ONCE(rcu_fwd_startat, jiffies); - stopat = rcu_fwd_startat + dur; + WRITE_ONCE(rcu_fwds.rcu_fwd_startat, jiffies); + stopat = rcu_fwds.rcu_fwd_startat + dur; while (time_before(jiffies, stopat) && !shutdown_time_arrived() && !READ_ONCE(rcu_fwd_emergency_stop) && !torture_must_stop()) { @@ -1864,23 +1876,23 @@ static void rcu_torture_fwd_prog_cr(void) /* Loop continuously posting RCU callbacks. */ WRITE_ONCE(rcu_fwd_cb_nodelay, true); cur_ops->sync(); /* Later readers see above write. */ - WRITE_ONCE(rcu_fwd_startat, jiffies); - stopat = rcu_fwd_startat + MAX_FWD_CB_JIFFIES; + WRITE_ONCE(rcu_fwds.rcu_fwd_startat, jiffies); + stopat = rcu_fwds.rcu_fwd_startat + MAX_FWD_CB_JIFFIES; n_launders = 0; - n_launders_cb = 0; + rcu_fwds.n_launders_cb = 0; // Hoist initialization for multi-kthread n_launders_sa = 0; n_max_cbs = 0; n_max_gps = 0; - for (i = 0; i < ARRAY_SIZE(n_launders_hist); i++) - n_launders_hist[i].n_launders = 0; + for (i = 0; i < ARRAY_SIZE(rcu_fwds.n_launders_hist); i++) + rcu_fwds.n_launders_hist[i].n_launders = 0; cver = READ_ONCE(rcu_torture_current_version); gps = cur_ops->get_gp_seq(); - rcu_launder_gp_seq_start = gps; + rcu_fwds.rcu_launder_gp_seq_start = gps; tick_dep_set_task(current, TICK_DEP_BIT_RCU); while (time_before(jiffies, stopat) && !shutdown_time_arrived() && !READ_ONCE(rcu_fwd_emergency_stop) && !torture_must_stop()) { - rfcp = READ_ONCE(rcu_fwd_cb_head); + rfcp = READ_ONCE(rcu_fwds.rcu_fwd_cb_head); rfcpn = NULL; if (rfcp) rfcpn = READ_ONCE(rfcp->rfc_next); @@ -1888,7 +1900,7 @@ static void rcu_torture_fwd_prog_cr(void) if (rfcp->rfc_gps >= MIN_FWD_CB_LAUNDERS && ++n_max_gps >= MIN_FWD_CBS_LAUNDERED) break; - rcu_fwd_cb_head = rfcpn; + rcu_fwds.rcu_fwd_cb_head = rfcpn; n_launders++; n_launders_sa++; } else { @@ -1910,7 +1922,7 @@ static void rcu_torture_fwd_prog_cr(void) } } stoppedat = jiffies; - n_launders_cb_snap = READ_ONCE(n_launders_cb); + n_launders_cb_snap = READ_ONCE(rcu_fwds.n_launders_cb); cver = READ_ONCE(rcu_torture_current_version) - cver; gps = rcutorture_seq_diff(cur_ops->get_gp_seq(), gps); cur_ops->cb_barrier(); /* Wait for callbacks to be invoked. */ @@ -1921,7 +1933,8 @@ static void rcu_torture_fwd_prog_cr(void) WARN_ON(n_max_gps < MIN_FWD_CBS_LAUNDERED); pr_alert("%s Duration %lu barrier: %lu pending %ld n_launders: %ld n_launders_sa: %ld n_max_gps: %ld n_max_cbs: %ld cver %ld gps %ld\n", __func__, - stoppedat - rcu_fwd_startat, jiffies - stoppedat, + stoppedat - rcu_fwds.rcu_fwd_startat, + jiffies - stoppedat, n_launders + n_max_cbs - n_launders_cb_snap, n_launders, n_launders_sa, n_max_gps, n_max_cbs, cver, gps); @@ -1943,7 +1956,7 @@ static int rcutorture_oom_notify(struct notifier_block *self, WARN(1, "%s invoked upon OOM during forward-progress testing.\n", __func__); rcu_torture_fwd_cb_hist(); - rcu_fwd_progress_check(1 + (jiffies - READ_ONCE(rcu_fwd_startat)) / 2); + rcu_fwd_progress_check(1 + (jiffies - READ_ONCE(rcu_fwds.rcu_fwd_startat)) / 2); WRITE_ONCE(rcu_fwd_emergency_stop, true); smp_mb(); /* Emergency stop before free and wait to avoid hangs. */ pr_info("%s: Freed %lu RCU callbacks.\n", -- cgit v1.2.3 From 6b1b832546067caac8c5833abf88fa082d253b2f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 5 Nov 2019 09:08:58 -0800 Subject: rcutorture: Thread rcu_fwd pointer through forward-progress functions In order to add multiple kthreads, it will be necessary to allow the various functions to operate on a pointer to their kthread's rcu_fwd structure. This commit therefore starts the process of adding the needed "struct rcu_fwd" parameters and arguments to the various callback forward-progress functions. Note that rcutorture_oom_notify() and rcu_torture_fwd_cb_hist() will eventually need to iterate over all kthreads' rcu_fwd structures. Signed-off-by: Paul E. McKenney --- kernel/rcu/rcutorture.c | 78 ++++++++++++++++++++++++++----------------------- 1 file changed, 41 insertions(+), 37 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 22a75a4b6b40..cc88ce910a6d 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -1661,6 +1661,7 @@ static void rcu_torture_fwd_prog_cb(struct rcu_head *rhp) struct rcu_fwd_cb { struct rcu_head rh; struct rcu_fwd_cb *rfc_next; + struct rcu_fwd *rfc_rfp; int rfc_gps; }; @@ -1692,24 +1693,24 @@ struct rcu_fwd rcu_fwds = { bool rcu_fwd_emergency_stop; -static void rcu_torture_fwd_cb_hist(void) +static void rcu_torture_fwd_cb_hist(struct rcu_fwd *rfp) { unsigned long gps; unsigned long gps_old; int i; int j; - for (i = ARRAY_SIZE(rcu_fwds.n_launders_hist) - 1; i > 0; i--) - if (rcu_fwds.n_launders_hist[i].n_launders > 0) + for (i = ARRAY_SIZE(rfp->n_launders_hist) - 1; i > 0; i--) + if (rfp->n_launders_hist[i].n_launders > 0) break; pr_alert("%s: Callback-invocation histogram (duration %lu jiffies):", - __func__, jiffies - rcu_fwds.rcu_fwd_startat); - gps_old = rcu_fwds.rcu_launder_gp_seq_start; + __func__, jiffies - rfp->rcu_fwd_startat); + gps_old = rfp->rcu_launder_gp_seq_start; for (j = 0; j <= i; j++) { - gps = rcu_fwds.n_launders_hist[j].