diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/Kconfig.preempt | 25 | ||||
| -rw-r--r-- | kernel/Makefile | 6 | ||||
| -rw-r--r-- | kernel/irq/manage.c | 12 | ||||
| -rw-r--r-- | kernel/lockdep.c | 27 | ||||
| -rw-r--r-- | kernel/printk.c | 2 | ||||
| -rw-r--r-- | kernel/rcupreempt.c | 10 | ||||
| -rw-r--r-- | kernel/rcupreempt_trace.c | 10 | ||||
| -rw-r--r-- | kernel/rcutorture.c | 66 | ||||
| -rw-r--r-- | kernel/rcutree.c | 1535 | ||||
| -rw-r--r-- | kernel/rcutree_trace.c | 271 | ||||
| -rw-r--r-- | kernel/resource.c | 9 | ||||
| -rw-r--r-- | kernel/softirq.c | 19 | ||||
| -rw-r--r-- | kernel/stacktrace.c | 11 |
13 files changed, 1927 insertions, 76 deletions
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt index 9fdba03dc1fc..bf987b95b356 100644 --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt @@ -52,28 +52,3 @@ config PREEMPT endchoice -config PREEMPT_RCU - bool "Preemptible RCU" - depends on PREEMPT - default n - help - This option reduces the latency of the kernel by making certain - RCU sections preemptible. Normally RCU code is non-preemptible, if - this option is selected then read-only RCU sections become - preemptible. This helps latency, but may expose bugs due to - now-naive assumptions about each RCU read-side critical section - remaining on a given CPU through its execution. - - Say N if you are unsure. - -config RCU_TRACE - bool "Enable tracing for RCU - currently stats in debugfs" - depends on PREEMPT_RCU - select DEBUG_FS - default y - help - This option provides tracing in RCU which presents stats - in debugfs for debugging RCU implementation. - - Say Y here if you want to enable RCU tracing - Say N if you are unsure. diff --git a/kernel/Makefile b/kernel/Makefile index 19fad003b19d..b4fdbbff5ec0 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -74,10 +74,10 @@ obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ obj-$(CONFIG_SECCOMP) += seccomp.o obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o obj-$(CONFIG_CLASSIC_RCU) += rcuclassic.o +obj-$(CONFIG_TREE_RCU) += rcutree.o obj-$(CONFIG_PREEMPT_RCU) += rcupreempt.o -ifeq ($(CONFIG_PREEMPT_RCU),y) -obj-$(CONFIG_RCU_TRACE) += rcupreempt_trace.o -endif +obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o +obj-$(CONFIG_PREEMPT_RCU_TRACE) += rcupreempt_trace.o obj-$(CONFIG_RELAY) += relay.o obj-$(CONFIG_SYSCTL) += utsname_sysctl.o obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 801addda3c43..e9d1c8205a3b 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -673,6 +673,18 @@ int request_irq(unsigned int irq, irq_handler_t handler, struct irq_desc *desc; int retval; + /* + * handle_IRQ_event() always ignores IRQF_DISABLED except for + * the _first_ irqaction (sigh). That can cause oopsing, but + * the behavior is classified as "will not fix" so we need to + * start nudging drivers away from using that idiom. + */ + if ((irqflags & (IRQF_SHARED|IRQF_DISABLED)) + == (IRQF_SHARED|IRQF_DISABLED)) + pr_warning("IRQ %d/%s: IRQF_DISABLED is not " + "guaranteed on shared IRQs\n", + irq, devname); + #ifdef CONFIG_LOCKDEP /* * Lockdep wants atomic interrupt handlers: diff --git a/kernel/lockdep.c b/kernel/lockdep.c index e4bdda8dcf04..4fa6eeb4e8a7 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -291,14 +291,12 @@ void lockdep_off(void) { current->lockdep_recursion++; } - EXPORT_SYMBOL(lockdep_off); void lockdep_on(void) { current->lockdep_recursion--; } - EXPORT_SYMBOL(lockdep_on); /* @@ -580,7 +578,8 @@ static void print_lock_class_header(struct lock_class *class, int depth) /* * printk all lock dependencies starting at <entry>: */ -static void print_lock_dependencies(struct lock_class *class, int depth) +static void __used +print_lock_dependencies(struct lock_class *class, int depth) { struct lock_list *entry; @@ -2512,7 +2511,6 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name, if (subclass) register_lock_class(lock, subclass, 1); } - EXPORT_SYMBOL_GPL(lockdep_init_map); /* @@ -2693,8 +2691,9 @@ static int check_unlock(struct task_struct *curr, struct lockdep_map *lock, } static int -__lock_set_subclass(struct lockdep_map *lock, - unsigned int subclass, unsigned long ip) +__lock_set_class(struct lockdep_map *lock, const char *name, + struct lock_class_key *key, unsigned int subclass, + unsigned long ip) { struct task_struct *curr = current; struct held_lock *hlock, *prev_hlock; @@ -2721,6 +2720,7 @@ __lock_set_subclass(struct lockdep_map *lock, return print_unlock_inbalance_bug(curr, lock, ip); found_it: + lockdep_init_map(lock, name, key, 0); class = register_lock_class(lock, subclass, 0); hlock->class_idx = class - lock_classes + 1; @@ -2905,9 +2905,9 @@ static void check_flags(unsigned long flags) #endif } -void -lock_set_subclass(struct lockdep_map *lock, - unsigned int subclass, unsigned long ip) +void lock_set_class(struct lockdep_map *lock, const char *name, + struct lock_class_key *key, unsigned int subclass, + unsigned long ip) { unsigned long flags; @@ -2917,13 +2917,12 @@ lock_set_subclass(struct lockdep_map *lock, raw_local_irq_save(flags); current->lockdep_recursion = 1; check_flags(flags); - if (__lock_set_subclass(lock, subclass, ip)) + if (__lock_set_class(lock, name, key, subclass, ip)) check_chain_key(current); current->lockdep_recursion = 0; raw_local_irq_restore(flags); } - -EXPORT_SYMBOL_GPL(lock_set_subclass); +EXPORT_SYMBOL_GPL(lock_set_class); /* * We are not always called with irqs disabled - do that here, @@ -2947,7 +2946,6 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass, current->lockdep_recursion = 0; raw_local_irq_restore(flags); } - EXPORT_SYMBOL_GPL(lock_acquire); void lock_release(struct lockdep_map *lock, int nested, @@ -2965,7 +2963,6 @@ void lock_release(struct lockdep_map *lock, int nested, current->lockdep_recursion = 0; raw_local_irq_restore(flags); } - EXPORT_SYMBOL_GPL(lock_release); #ifdef CONFIG_LOCK_STAT @@ -3450,7 +3447,6 @@ retry: if (unlock) read_unlock(&tasklist_lock); } - EXPORT_SYMBOL_GPL(debug_show_all_locks); /* @@ -3471,7 +3467,6 @@ void debug_show_held_locks(struct task_struct *task) { __debug_show_held_locks(task); } - EXPORT_SYMBOL_GPL(debug_show_held_locks); void lockdep_sys_exit(void) diff --git a/kernel/printk.c b/kernel/printk.c index f492f1583d77..e651ab05655f 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -662,7 +662,7 @@ asmlinkage int vprintk(const char *fmt, va_list args) if (recursion_bug) { recursion_bug = 0; strcpy(printk_buf, recursion_bug_msg); - printed_len = sizeof(recursion_bug_msg); + printed_len = strlen(recursion_bug_msg); } /* Emit the output into the temporary buffer */ printed_len += vscnprintf(printk_buf + printed_len, diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c index 59236e8b9daa..04982659875a 100644 --- a/kernel/rcupreempt.c +++ b/kernel/rcupreempt.c @@ -551,6 +551,16 @@ void rcu_irq_exit(void) } } +void rcu_nmi_enter(void) +{ + rcu_irq_enter(); +} + +void rcu_nmi_exit(void) +{ + rcu_irq_exit(); +} + static void dyntick_save_progress_counter(int cpu) { struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); diff --git a/kernel/rcupreempt_trace.c b/kernel/rcupreempt_trace.c index 35c2d3360ecf..7c2665cac172 100644 --- a/kernel/rcupreempt_trace.c +++ b/kernel/rcupreempt_trace.c @@ -149,12 +149,12 @@ static void rcupreempt_trace_sum(struct rcupreempt_trace *sp) sp->done_length += cp->done_length; sp->done_add += cp->done_add; sp->done_remove += cp->done_remove; - atomic_set(&sp->done_invoked, atomic_read(&cp->done_invoked)); + atomic_add(atomic_read(&cp->done_invoked), &sp->done_invoked); sp->rcu_check_callbacks += cp->rcu_check_callbacks; - atomic_set(&sp->rcu_try_flip_1, - atomic_read(&cp->rcu_try_flip_1)); - atomic_set(&sp->rcu_try_flip_e1, - atomic_read(&cp->rcu_try_flip_e1)); + atomic_add(atomic_read(&cp->rcu_try_flip_1), + &sp->rcu_try_flip_1); + atomic_add(atomic_read(&cp->rcu_try_flip_e1), + &sp->rcu_try_flip_e1); sp->rcu_try_flip_i1 += cp->rcu_try_flip_i1; sp->rcu_try_flip_ie1 += cp->rcu_try_flip_ie1; sp->rcu_try_flip_g1 += cp->rcu_try_flip_g1; diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 85cb90588a55..b31065522104 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -39,6 +39,7 @@ #include <linux/moduleparam.h> #include <linux/percpu.h> #include <linux/notifier.h> +#include <linux/reboot.h> #include <linux/freezer.h> #include <linux/cpu.h> #include <linux/delay.h> @@ -108,7 +109,6 @@ struct rcu_torture { int rtort_mbtest; }; -static int fullstop = 0; /* stop generating callbacks at test end. */ static LIST_HEAD(rcu_torture_freelist); static struct rcu_torture *rcu_torture_current = NULL; static long rcu_torture_current_version = 0; @@ -136,6 +136,30 @@ static int stutter_pause_test = 0; #endif int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT; +#define FULLSTOP_SIGNALED 1 /* Bail due to signal. */ +#define FULLSTOP_CLEANUP 2 /* Orderly shutdown. */ +static int fullstop; /* stop generating callbacks at test end. */ +DEFINE_MUTEX(fullstop_mutex); /* protect fullstop transitions and */ + /* spawning of kthreads. */ + +/* + * Detect and respond to a signal-based shutdown. + */ +static int +rcutorture_shutdown_notify(struct notifier_block *unused1, + unsigned long unused2, void *unused3) +{ + if (fullstop) + return NOTIFY_DONE; + if (signal_pending(current)) { + mutex_lock(&fullstop_mutex); + if (!ACCESS_ONCE(fullstop)) + fullstop = FULLSTOP_SIGNALED; + mutex_unlock(&fullstop_mutex); + } + return NOTIFY_DONE; +} + /* * Allocate an element from the rcu_tortures pool. */ @@ -199,11 +223,12 @@ rcu_random(struct rcu_random_state *rrsp) static void rcu_stutter_wait(void) { - while (stutter_pause_test || !rcutorture_runnable) + while ((stutter_pause_test || !rcutorture_runnable) && !fullstop) { if (rcutorture_runnable) schedule_timeout_interruptible(1); else schedule_timeout_interruptible(round_jiffies_relative(HZ)); + } } /* @@ -599,7 +624,7 @@ rcu_torture_writer(void *arg) rcu_stutter_wait(); } while (!kthread_should_stop() && !fullstop); VERBOSE_PRINTK_STRING("rcu_torture_writer task stopping"); - while (!kthread_should_stop()) + while (!kthread_should_stop() && fullstop != FULLSTOP_SIGNALED) schedule_timeout_uninterruptible(1); return 0; } @@ -624,7 +649,7 @@ rcu_torture_fakewriter(void *arg) } while (!kthread_should_stop() && !fullstop); VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task stopping"); - while (!kthread_should_stop()) + while (!kthread_should_stop() && fullstop != FULLSTOP_SIGNALED) schedule_timeout_uninterruptible(1); return 0; } @@ -734,7 +759,7 @@ rcu_torture_reader(void *arg) VERBOSE_PRINTK_STRING("rcu_torture_reader task stopping"); if (irqreader && cur_ops->irqcapable) del_timer_sync(&t); - while (!kthread_should_stop()) + while (!kthread_should_stop() && fullstop != FULLSTOP_SIGNALED) schedule_timeout_uninterruptible(1); return 0; } @@ -831,7 +856,7 @@ rcu_torture_stats(void *arg) do { schedule_timeout_interruptible(stat_interval * HZ); rcu_torture_stats_print(); - } while (!kthread_should_stop()); + } while (!kthread_should_stop() && !fullstop); VERBOSE_PRINTK_STRING("rcu_torture_stats task stopping"); return 0; } @@ -899,7 +924,7 @@ rcu_torture_shuffle(void *arg) do { schedule_timeout_interruptible(shuffle_interval * HZ); rcu_torture_shuffle_tasks(); - } while (!kthread_should_stop()); + } while (!kthread_should_stop() && !fullstop); VERBOSE_PRINTK_STRING("rcu_torture_shuffle task stopping"); return 0; } @@ -914,10 +939,10 @@ rcu_torture_stutter(void *arg) do { schedule_timeout_interruptible(stutter * HZ); stutter_pause_test = 1; - if (!kthread_should_stop()) + if (!kthread_should_stop() && !fullstop) schedule_timeout_interruptible(stutter * HZ); stutter_pause_test = 0; - } while (!kthread_should_stop()); + } while (!kthread_should_stop() && !fullstop); VERBOSE_PRINTK_STRING("rcu_torture_stutter task stopping"); return 0; } @@ -934,12 +959,27 @@ rcu_torture_print_module_parms(char *tag) stutter, irqreader); } +static struct notifier_block rcutorture_nb = { + .notifier_call = rcutorture_shutdown_notify, +}; + static void rcu_torture_cleanup(void) { int i; - fullstop = 1; + mutex_lock(&fullstop_mutex); + if (!fullstop) { + /* If being signaled, let it happen, then exit. */ + mutex_unlock(&fullstop_mutex); + schedule_timeout_interruptible(10 * HZ); + if (cur_ops->cb_barrier != NULL) + cur_ops->cb_barrier(); + return; + } + fullstop = FULLSTOP_CLEANUP; + mutex_unlock(&fullstop_mutex); + unregister_reboot_notifier(&rcutorture_nb); if (stutter_task) { VERBOSE_PRINTK_STRING("Stopping rcu_torture_stutter task"); kthread_stop(stutter_task); @@ -1015,6 +1055,8 @@ rcu_torture_init(void) { &rcu_ops, &rcu_sync_ops, &rcu_bh_ops, &rcu_bh_sync_ops, &srcu_ops, &sched_ops, &sched_ops_sync, }; + mutex_lock(&fullstop_mutex); + /* Process args and tell the world that the torturer is on the job. */ for (i = 0; i < ARRAY_SIZE(torture_ops); i++) { cur_ops = torture_ops[i]; @@ -1024,6 +1066,7 @@ rcu_torture_init(void) if (i == ARRAY_SIZE(torture_ops)) { printk(KERN_ALERT "rcutorture: invalid torture type: \"%s\"\n", torture_type); + mutex_unlock(&fullstop_mutex); return (-EINVAL); } if (cur_ops->init) @@ -1146,9 +1189,12 @@ rcu_torture_init(void) goto unwind; } } + register_reboot_notifier(&rcutorture_nb); + mutex_unlock(&fullstop_mutex); return 0; unwind: + mutex_unlock(&fullstop_mutex); rcu_torture_cleanup(); return firsterr; } diff --git a/kernel/rcutree.c b/kernel/rcutree.c new file mode 100644 index 000000000000..a342b032112c --- /dev/null +++ b/kernel/rcutree.c @@ -0,0 +1,1535 @@ +/* + * Read-Copy Update mechanism for mutual exclusion + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright IBM Corporation, 2008 + * + * Authors: Dipankar Sarma <dipankar@in.ibm.com> + * Manfred Spraul <manfred@colorfullife.com> + * Paul E. McKenney <paulmck@linux.vnet.ibm.com> Hierarchical version + * + * Based on the original work by Paul McKenney <paulmck@us.ibm.com> + * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. + * + * For detailed explanation of Read-Copy Update mechanism see - + * Documentation/RCU + */ +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/spinlock.h> +#include <linux/smp.h> +#include <linux/rcupdate.h> +#include <linux/interrupt.h> +#include <linux/sched.h> +#include <asm/atomic.h> +#include <linux/bitops.h> +#include <linux/module.h> +#include <linux/completion.h> +#include <linux/moduleparam.h> +#include <linux/percpu.h> +#include <linux/notifier.h> +#include <linux/cpu.h> +#include <linux/mutex.h> +#include <linux/time.h> + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +static struct lock_class_key rcu_lock_key; +struct lockdep_map rcu_lock_map = + STATIC_LOCKDEP_MAP_INIT("rcu_read_lock", &rcu_lock_key); +EXPORT_SYMBOL_GPL(rcu_lock_map); +#endif + +/* Data structures. */ + +#define RCU_STATE_INITIALIZER(name) { \ + .level = { &name.node[0] }, \ + .levelcnt = { \ + NUM_RCU_LVL_0, /* root of hierarchy. */ \ + NUM_RCU_LVL_1, \ + NUM_RCU_LVL_2, \ + NUM_RCU_LVL_3, /* == MAX_RCU_LVLS */ \ + }, \ + .signaled = RCU_SIGNAL_INIT, \ + .gpnum = -300, \ + .completed = -300, \ + .onofflock = __SPIN_LOCK_UNLOCKED(&name.onofflock), \ + .fqslock = __SPIN_LOCK_UNLOCKED(&name.fqslock), \ + .n_force_qs = 0, \ + .n_force_qs_ngp = 0, \ +} + +struct rcu_state rcu_state = RCU_STATE_INITIALIZER(rcu_state); +DEFINE_PER_CPU(struct rcu_data, rcu_data); + +struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state); +DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); + +#ifdef CONFIG_NO_HZ +DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks); +#endif /* #ifdef CONFIG_NO_HZ */ + +static int blimit = 10; /* Maximum callbacks per softirq. */ +static int qhimark = 10000; /* If this many pending, ignore blimit. */ +static int qlowmark = 100; /* Once only this many pending, use blimit. */ + +static void force_quiescent_state(struct rcu_state *rsp, int relaxed); + +/* + * Return the number of RCU batches processed thus far for debug & stats. + */ +long rcu_batches_completed(void) +{ + return rcu_state.completed; +} +EXPORT_SYMBOL_GPL(rcu_batches_completed); + +/* + * Return the number of RCU BH batches processed thus far for debug & stats. + */ +long rcu_batches_completed_bh(void) +{ + return rcu_bh_state.completed; +} +EXPORT_SYMBOL_GPL(rcu_batches_completed_bh); + +/* + * Does the CPU have callbacks ready to be invoked? + */ +static int +cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp) +{ + return &rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]; +} + +/* + * Does the current CPU require a yet-as-unscheduled grace period? + */ +static int +cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp) +{ + /* ACCESS_ONCE() because we are accessing outside of lock. */ + return *rdp->nxttail[RCU_DONE_TAIL] && + ACCESS_ONCE(rsp->completed) == ACCESS_ONCE(rsp->gpnum); +} + +/* + * Return the root node of the specified rcu_state structure. + */ +static struct rcu_node *rcu_get_root(struct rcu_state *rsp) +{ + return &rsp->node[0]; +} + +#ifdef CONFIG_SMP + +/* + * If the specified CPU is offline, tell the caller that it is in + * a quiescent state. Otherwise, whack it with a reschedule IPI. + * Grace periods can end up waiting on an offline CPU when that + * CPU is in the process of coming online -- it will be added to the + * rcu_node bitmasks before it actually makes it online. The same thing + * can happen while a CPU is in the process of coming online. Because this + * race is quite rare, we check for it after detecting that the grace + * period has been delayed rather than checking each and every CPU + * each and every time we start a new grace period. + */ +static int rcu_implicit_offline_qs(struct rcu_data *rdp) +{ + /* + * If the CPU is offline, it is in a quiescent state. We can + * trust its state not to change because interrupts are disabled. + */ + if (cpu_is_offline(rdp->cpu)) { + rdp->offline_fqs++; + return 1; + } + + /* The CPU is online, so send it a reschedule IPI. */ + if (rdp->cpu != smp_processor_id()) + smp_send_reschedule(rdp->cpu); + else + set_need_resched(); + rdp->resched_ipi++; + return 0; +} + +#endif /* #ifdef CONFIG_SMP */ + +#ifdef CONFIG_NO_HZ +static DEFINE_RATELIMIT_STATE(rcu_rs, 10 * HZ, 5); + +/** + * rcu_enter_nohz - inform RCU that current CPU is entering nohz + * + * Enter nohz mode, in other words, -leave- the mode in which RCU + * read-side critical sections can occur. (Though RCU read-side + * critical sections can occur in irq handlers in nohz mode, a possibility + * handled by rcu_irq_enter() and rcu_irq_exit()). + */ +void rcu_enter_nohz(void) +{ + unsigned long flags; + struct rcu_dynticks *rdtp; + + smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ + local_irq_save(flags); + rdtp = &__get_cpu_var(rcu_dynticks); + rdtp->dynticks++; + rdtp->dynticks_nesting--; + WARN_ON_RATELIMIT(rdtp->dynticks & 0x1, &rcu_rs); + local_irq_restore(flags); +} + +/* + * rcu_exit_nohz - inform RCU that current CPU is leaving nohz + * + * Exit nohz mode, in other words, -enter- the mode in which RCU + * read-side critical sections normally occur. + */ +void rcu_exit_nohz(void) +{ + unsigned long flags; + struct rcu_dynticks *rdtp; + + local_irq_save(flags); + rdtp = &__get_cpu_var(rcu_dynticks); + rdtp->dynticks++; + rdtp->dynticks_nesting++; + WARN_ON_RATELIMIT(!(rdtp->dynticks & 0x1), &rcu_rs); + local_irq_restore(flags); + smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ +} + +/** + * rcu_nmi_enter - inform RCU of entry to NMI context + * + * If the CPU was idle with dynamic ticks active, and there is no + * irq handler running, this updates rdtp->dynticks_nmi to let the + * RCU grace-period handling know that the CPU is active. + */ +void rcu_nmi_enter(void) +{ + struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); + + if (rdtp->dynticks & 0x1) + return; + rdtp->dynticks_nmi++; + WARN_ON_RATELIMIT(!(rdtp->dynticks_nmi & 0x1), &rcu_rs); + smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ +} + +/** + * rcu_nmi_exit - inform RCU of exit from NMI context + * + * If the CPU was idle with dynamic ticks active, and there is no + * irq handler running, this updates rdtp->dynticks_nmi to let the + * RCU grace-period handling know that the CPU is no longer active. + */ +void rcu_nmi_exit(void) +{ + struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); + + if (rdtp->dynticks & 0x1) + return; + smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ + rdtp->dynticks_nmi++; + WARN_ON_RATELIMIT(rdtp->dynticks_nmi & 0x1, &rcu_rs); +} + +/** + * rcu_irq_enter - inform RCU of entry to hard irq context + * + * If the CPU was idle with dynamic ticks active, this updates the + * rdtp->dynticks to let the RCU handling know that the CPU is active. + */ +void rcu_irq_enter(void) +{ + struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); + + if (rdtp->dynticks_nesting++) + return; + rdtp->dynticks++; + WARN_ON_RATELIMIT(!(rdtp->dynticks & 0x1), &rcu_rs); + smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ +} + +/** + * rcu_irq_exit - inform RCU of exit from hard irq context + * + * If the CPU was idle with dynamic ticks active, update the rdp->dynticks + * to put let the RCU handling be aware that the CPU is going back to idle + * with no ticks. + */ +void rcu_irq_exit(void) +{ + struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); + + if (--rdtp->dynticks_nesting) + return; + smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ + rdtp->dynticks++; + WARN_ON_RATELIMIT(rdtp->dynticks & 0x1, &rcu_rs); + + /* If the interrupt queued a callback, get out of dyntick mode. */ + if (__get_cpu_var(rcu_data).nxtlist || + __get_cpu_var(rcu_bh_data).nxtlist) + set_need_resched(); +} + +/* + * Record the specified "completed" value, which is later used to validate + * dynticks counter manipulations. Specify "rsp->completed - 1" to + * unconditionally invalidate any future dynticks manipulations (which is + * useful at the beginning of a grace period). + */ +static void dyntick_record_completed(struct rcu_state *rsp, long comp) +{ + rsp->dynticks_completed = comp; +} + +#ifdef CONFIG_SMP + +/* + * Recall the previously recorded value of the completion for dynticks. + */ +static long dyntick_recall_completed(struct rcu_state *rsp) +{ + return rsp->dynticks_completed; +} + +/* + * Snapshot the specified CPU's dynticks counter so that we can later + * credit them with an implicit quiescent state. Return 1 if this CPU + * is already in a quiescent state courtesy of dynticks idle mode. + */ +static int dyntick_save_progress_counter(struct rcu_data *rdp) +{ + int ret; + int snap; + int snap_nmi; + + snap = rdp->dynticks->dynticks; + snap_nmi = rdp->dynticks->dynticks_nmi; + smp_mb(); /* Order sampling of snap with end of grace period. */ + rdp->dynticks_snap = snap; + rdp->dynticks_nmi_snap = snap_nmi; + ret = ((snap & 0x1) == 0) && ((snap_nmi & 0x1) == 0); + if (ret) + rdp->dynticks_fqs++; + return ret; +} + +/* + * Return true if the specified CPU has passed through a quiescent + * state by virtue of being in or having passed through an dynticks + * idle state since the last call to dyntick_save_progress_counter() + * for this same CPU. + */ +static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) +{ + long curr; + long curr_nmi; + long snap; + long snap_nmi; + + curr = rdp->dynticks->dynticks; + snap = rdp->dynticks_snap; + curr_nmi = rdp->dynticks->dynticks_nmi; + snap_nmi = rdp->dynticks_nmi_snap; + smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ + + /* + * If the CPU passed through or entered a dynticks idle phase with + * no active irq/NMI handlers, then we can safely pretend that the CPU + * already acknowledged the request to pass through a quiescent + * state. Either way, that CPU cannot possibly be in an RCU + * read-side critical section that started before the beginning + * of the current RCU grace period. + */ + if ((curr != snap || (curr & 0x1) == 0) && + (curr_nmi != snap_nmi || (curr_nmi & 0x1) == 0)) { + rdp->dynticks_fqs++; + return 1; + } + + /* Go check for the CPU being offline. */ + return rcu_implicit_offline_qs(rdp); +} + +#endif /* #ifdef CONFIG_SMP */ + +#else /* #ifdef CONFIG_NO_HZ */ + +static void dyntick_record_completed(struct rcu_state *rsp, long comp) +{ +} + +#ifdef CONFIG_SMP + +/* + * If there are no dynticks, then the only way that a CPU can passively + * be in a quiescent state is to be offline. Unlike dynticks idle, which + * is a point in time during the prior (already finished) grace period, + * an offline CPU is always in a quiescent state, and thus can be + * unconditionally applied. So just return the current value of completed. + */ +static long dyntick_recall_completed(struct rcu_state *rsp) +{ + return rsp->completed; +} + +static int dyntick_save_progress_counter(struct rcu_data *rdp) +{ + return 0; +} + +static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) +{ + return rcu_implicit_offline_qs(rdp); +} + +#endif /* #ifdef CONFIG_SMP */ + +#endif /* #else #ifdef CONFIG_NO_HZ */ + +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR + +static void record_gp_stall_check_time(struct rcu_state *rsp) +{ + rsp->gp_start = jiffies; + rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_CHECK; +} + +static void print_other_cpu_stall(struct rcu_state *rsp) +{ + int cpu; + long delta; + unsigned long flags; + struct rcu_node *rnp = rcu_get_root(rsp); + struct rcu_node *rnp_cur = rsp->level[NUM_RCU_LVLS - 1]; + struct rcu_node *rnp_end = &rsp->node[NUM_RCU_NODES]; + + /* Only let one CPU complain about others per time interval. */ + + spin_lock_irqsave(&rnp->lock, flags); + delta = jiffies - rsp->jiffies_stall; + if (delta < RCU_STALL_RAT_DELAY || rsp->gpnum == rsp->completed) { + spin_unlock_irqrestore(&rnp->lock, flags); + return; + } + rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK; + spin_unlock_irqrestore(&rnp->lock, flags); + + /* OK, time to rat on our buddy... */ + + printk(KERN_ERR "INFO: RCU detected CPU stalls:"); + for (; rnp_cur < rnp_end; rnp_cur++) { + if (rnp_cur->qsmask == 0) + continue; + for (cpu = 0; cpu <= rnp_cur->grphi - rnp_cur->grplo; cpu++) + if (rnp_cur->qsmask & (1UL << cpu)) + printk(" %d", rnp_cur->grplo + cpu); + } + printk(" (detected by %d, t=%ld jiffies)\n", + smp_processor_id(), (long)(jiffies - rsp->gp_start)); + force_quiescent_state(rsp, 0); /* Kick them all. */ +} + +static void print_cpu_stall(struct rcu_state *rsp) +{ + unsigned long flags; + struct rcu_node *rnp = rcu_get_root(rsp); + + printk(KERN_ERR "INFO: RCU detected CPU %d stall (t=%lu jiffies)\n", + smp_processor_id(), jiffies - rsp->gp_start); + dump_stack(); + spin_lock_irqsave(&rnp->lock, flags); + if ((long)(jiffies - rsp->jiffies_stall) >= 0) + rsp->jiffies_stall = + jiffies + RCU_SECONDS_TILL_STALL_RECHECK; + spin_unlock_irqrestore(&rnp->lock, flags); + set_need_resched(); /* kick ourselves to get things going. */ +} + +static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) +{ + long delta; + struct rcu_node *rnp; + + delta = jiffies - rsp->jiffies_stall; + rnp = rdp->mynode; + if ((rnp->qsmask & rdp->grpmask) && delta >= 0) { + + /* We haven't checked in, so go dump stack. */ + print_cpu_stall(rsp); + + } else if (rsp->gpnum != rsp->completed && + delta >= RCU_STALL_RAT_DELAY) { + + /* They had two time units to dump stack, so complain. */ + print_other_cpu_stall(rsp); + } +} + +#else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ + +static void record_gp_stall_check_time(struct rcu_state *rsp) +{ +} + +static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) +{ +} + +#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ + +/* + * Update CPU-local rcu_data state to record the newly noticed grace period. + * This is used both when we started the grace period and when we notice + * that someone else started the grace period. + */ +static void note_new_gpnum(struct rcu_state *rsp, struct rcu_data *rdp) +{ + rdp->qs_pending = 1; + rdp->passed_quiesc = 0; + rdp->gpnum = rsp->gpnum; + rdp->n_rcu_pending_force_qs = rdp->n_rcu_pending + + RCU_JIFFIES_TILL_FORCE_QS; +} + +/* + * Did someone else start a new RCU grace period start since we last + * checked? Update local state appropriately if so. Must be called + * on the CPU corresponding to rdp. + */ +static int +check_for_new_grace_period(struct rcu_state *rsp, struct rcu_data *rdp) +{ + unsigned long flags; + int ret = 0; + + local_irq_save(flags); + if (rdp->gpnum != rsp->gpnum) { + note_new_gpnum(rsp, rdp); + ret = 1; + } + local_irq_restore(flags); + return ret; +} + +/* + * Start a new RCU grace period if warranted, re-initializing the hierarchy + * in preparation for detecting the next grace period. The caller must hold + * the root node's ->lock, which is released before return. Hard irqs must + * be disabled. + */ +static void +rcu_start_gp(struct rcu_state *rsp, unsigned long flags) + __releases(rcu_get_root(rsp)->lock) +{ + struct rcu_data *rdp = rsp->rda[smp_processor_id()]; + struct rcu_node *rnp = rcu_get_root(rsp); + struct rcu_node *rnp_cur; + struct rcu_node *rnp_end; + + if (!cpu_needs_another_gp(rsp, rdp)) { + spin_unlock_irqrestore(&rnp->lock, flags); + return; + } + + /* Advance to a new grace period and initialize state. */ + rsp->gpnum++; + rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */ + rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; + rdp->n_rcu_pending_force_qs = rdp->n_rcu_pending + + RCU_JIFFIES_TILL_FORCE_QS; + record_gp_stall_check_time(rsp); + dyntick_record_completed(rsp, rsp->completed - 1); + note_new_gpnum(rsp, rdp); + + /* + * Because we are first, we know that all our callbacks will + * be cove |
