diff options
| author | Thomas Gleixner <tglx@linutronix.de> | 2014-06-22 12:06:40 +0200 |
|---|---|---|
| committer | Thomas Gleixner <tglx@linutronix.de> | 2014-06-23 11:22:35 +0200 |
| commit | 5cee964597260237dd2cabb3ec22bba0da24b25d (patch) | |
| tree | f548efb4181a4cffb026adf43178e65330533e87 /kernel/time/timer.c | |
| parent | 58394271c610e9c65dd0165a1c1f6dec75dc5f3e (diff) | |
time/timers: Move all time(r) related files into kernel/time
Except for Kconfig.HZ. That needs a separate treatment.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'kernel/time/timer.c')
| -rw-r--r-- | kernel/time/timer.c | 1734 |
1 files changed, 1734 insertions, 0 deletions
diff --git a/kernel/time/timer.c b/kernel/time/timer.c new file mode 100644 index 000000000000..3bb01a323b2a --- /dev/null +++ b/kernel/time/timer.c @@ -0,0 +1,1734 @@ +/* + * linux/kernel/timer.c + * + * Kernel internal timers + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * 1997-01-28 Modified by Finn Arne Gangstad to make timers scale better. + * + * 1997-09-10 Updated NTP code according to technical memorandum Jan '96 + * "A Kernel Model for Precision Timekeeping" by Dave Mills + * 1998-12-24 Fixed a xtime SMP race (we need the xtime_lock rw spinlock to + * serialize accesses to xtime/lost_ticks). + * Copyright (C) 1998 Andrea Arcangeli + * 1999-03-10 Improved NTP compatibility by Ulrich Windl + * 2002-05-31 Move sys_sysinfo here and make its locking sane, Robert Love + * 2000-10-05 Implemented scalable SMP per-CPU timer handling. + * Copyright (C) 2000, 2001, 2002 Ingo Molnar + * Designed by David S. Miller, Alexey Kuznetsov and Ingo Molnar + */ + +#include <linux/kernel_stat.h> +#include <linux/export.h> +#include <linux/interrupt.h> +#include <linux/percpu.h> +#include <linux/init.h> +#include <linux/mm.h> +#include <linux/swap.h> +#include <linux/pid_namespace.h> +#include <linux/notifier.h> +#include <linux/thread_info.h> +#include <linux/time.h> +#include <linux/jiffies.h> +#include <linux/posix-timers.h> +#include <linux/cpu.h> +#include <linux/syscalls.h> +#include <linux/delay.h> +#include <linux/tick.h> +#include <linux/kallsyms.h> +#include <linux/irq_work.h> +#include <linux/sched.h> +#include <linux/sched/sysctl.h> +#include <linux/slab.h> +#include <linux/compat.h> + +#include <asm/uaccess.h> +#include <asm/unistd.h> +#include <asm/div64.h> +#include <asm/timex.h> +#include <asm/io.h> + +#define CREATE_TRACE_POINTS +#include <trace/events/timer.h> + +__visible u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES; + +EXPORT_SYMBOL(jiffies_64); + +/* + * per-CPU timer vector definitions: + */ +#define TVN_BITS (CONFIG_BASE_SMALL ? 4 : 6) +#define TVR_BITS (CONFIG_BASE_SMALL ? 6 : 8) +#define TVN_SIZE (1 << TVN_BITS) +#define TVR_SIZE (1 << TVR_BITS) +#define TVN_MASK (TVN_SIZE - 1) +#define TVR_MASK (TVR_SIZE - 1) +#define MAX_TVAL ((unsigned long)((1ULL << (TVR_BITS + 4*TVN_BITS)) - 1)) + +struct tvec { + struct list_head vec[TVN_SIZE]; +}; + +struct tvec_root { + struct list_head vec[TVR_SIZE]; +}; + +struct tvec_base { + spinlock_t lock; + struct timer_list *running_timer; + unsigned long timer_jiffies; + unsigned long next_timer; + unsigned long active_timers; + unsigned long all_timers; + struct tvec_root tv1; + struct tvec tv2; + struct tvec tv3; + struct tvec tv4; + struct tvec tv5; +} ____cacheline_aligned; + +struct tvec_base boot_tvec_bases; +EXPORT_SYMBOL(boot_tvec_bases); +static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases; + +/* Functions below help us manage 'deferrable' flag */ +static inline unsigned int tbase_get_deferrable(struct tvec_base *base) +{ + return ((unsigned int)(unsigned long)base & TIMER_DEFERRABLE); +} + +static inline unsigned int tbase_get_irqsafe(struct tvec_base *base) +{ + return ((unsigned int)(unsigned long)base & TIMER_IRQSAFE); +} + +static inline struct tvec_base *tbase_get_base(struct tvec_base *base) +{ + return ((struct tvec_base *)((unsigned long)base & ~TIMER_FLAG_MASK)); +} + +static inline void +timer_set_base(struct timer_list *timer, struct tvec_base *new_base) +{ + unsigned long flags = (unsigned long)timer->base & TIMER_FLAG_MASK; + + timer->base = (struct tvec_base *)((unsigned long)(new_base) | flags); +} + +static unsigned long round_jiffies_common(unsigned long j, int cpu, + bool force_up) +{ + int rem; + unsigned long original = j; + + /* + * We don't want all cpus firing their timers at once hitting the + * same lock or cachelines, so we skew each extra cpu with an extra + * 3 jiffies. This 3 jiffies came originally from the mm/ code which + * already did this. + * The skew is done by adding 3*cpunr, then round, then subtract this + * extra offset again. + */ + j += cpu * 3; + + rem = j % HZ; + + /* + * If the target jiffie is just after a whole second (which can happen + * due to delays of the timer irq, long irq off times etc etc) then + * we should round down to the whole second, not up. Use 1/4th second + * as cutoff for this rounding as an extreme upper bound for this. + * But never round down if @force_up is set. + */ + if (rem < HZ/4 && !force_up) /* round down */ + j = j - rem; + else /* round up */ + j = j - rem + HZ; + + /* now that we have rounded, subtract the extra skew again */ + j -= cpu * 3; + + /* + * Make sure j is still in the future. Otherwise return the + * unmodified value. + */ + return time_is_after_jiffies(j) ? j : original; +} + +/** + * __round_jiffies - function to round jiffies to a full second + * @j: the time in (absolute) jiffies that should be rounded + * @cpu: the processor number on which the timeout will happen + * + * __round_jiffies() rounds an absolute time in the future (in jiffies) + * up or down to (approximately) full seconds. This is useful for timers + * for which the exact time they fire does not matter too much, as long as + * they fire approximately every X seconds. + * + * By rounding these timers to whole seconds, all such timers will fire + * at the same time, rather than at various times spread out. The goal + * of this is to have the CPU wake up less, which saves power. + * + * The exact rounding is skewed for each processor to avoid all + * processors firing at the exact same time, which could lead + * to lock contention or spurious cache line bouncing. + * + * The return value is the rounded version of the @j parameter. + */ +unsigned long __round_jiffies(unsigned long j, int cpu) +{ + return round_jiffies_common(j, cpu, false); +} +EXPORT_SYMBOL_GPL(__round_jiffies); + +/** + * __round_jiffies_relative - function to round jiffies to a full second + * @j: the time in (relative) jiffies that should be rounded + * @cpu: the processor number on which the timeout will happen + * + * __round_jiffies_relative() rounds a time delta in the future (in jiffies) + * up or down to (approximately) full seconds. This is useful for timers + * for which the exact time they fire does not matter too much, as long as + * they fire approximately every X seconds. + * + * By rounding these timers to whole seconds, all such timers will fire + * at the same time, rather than at various times spread out. The goal + * of this is to have the CPU wake up less, which saves power. + * + * The exact rounding is skewed for each processor to avoid all + * processors firing at the exact same time, which could lead + * to lock contention or spurious cache line bouncing. + * + * The return value is the rounded version of the @j parameter. + */ +unsigned long __round_jiffies_relative(unsigned long j, int cpu) +{ + unsigned long j0 = jiffies; + + /* Use j0 because jiffies might change while we run */ + return round_jiffies_common(j + j0, cpu, false) - j0; +} +EXPORT_SYMBOL_GPL(__round_jiffies_relative); + +/** + * round_jiffies - function to round jiffies to a full second + * @j: the time in (absolute) jiffies that should be rounded + * + * round_jiffies() rounds an absolute time in the future (in jiffies) + * up or down to (approximately) full seconds. This is useful for timers + * for which the exact time they fire does not matter too much, as long as + * they fire approximately every X seconds. + * + * By rounding these timers to whole seconds, all such timers will fire + * at the same time, rather than at various times spread out. The goal + * of this is to have the CPU wake up less, which saves power. + * + * The return value is the rounded version of the @j parameter. + */ +unsigned long round_jiffies(unsigned long j) +{ + return round_jiffies_common(j, raw_smp_processor_id(), false); +} +EXPORT_SYMBOL_GPL(round_jiffies); + +/** + * round_jiffies_relative - function to round jiffies to a full second + * @j: the time in (relative) jiffies that should be rounded + * + * round_jiffies_relative() rounds a time delta in the future (in jiffies) + * up or down to (approximately) full seconds. This is useful for timers + * for which the exact time they fire does not matter too much, as long as + * they fire approximately every X seconds. + * + * By rounding these timers to whole seconds, all such timers will fire + * at the same time, rather than at various times spread out. The goal + * of this is to have the CPU wake up less, which saves power. + * + * The return value is the rounded version of the @j parameter. + */ +unsigned long round_jiffies_relative(unsigned long j) +{ + return __round_jiffies_relative(j, raw_smp_processor_id()); +} +EXPORT_SYMBOL_GPL(round_jiffies_relative); + +/** + * __round_jiffies_up - function to round jiffies up to a full second + * @j: the time in (absolute) jiffies that should be rounded + * @cpu: the processor number on which the timeout will happen + * + * This is the same as __round_jiffies() except that it will never + * round down. This is useful for timeouts for which the exact time + * of firing does not matter too much, as long as they don't fire too + * early. + */ +unsigned long __round_jiffies_up(unsigned long j, int cpu) +{ + return round_jiffies_common(j, cpu, true); +} +EXPORT_SYMBOL_GPL(__round_jiffies_up); + +/** + * __round_jiffies_up_relative - function to round jiffies up to a full second + * @j: the time in (relative) jiffies that should be rounded + * @cpu: the processor number on which the timeout will happen + * + * This is the same as __round_jiffies_relative() except that it will never + * round down. This is useful for timeouts for which the exact time + * of firing does not matter too much, as long as they don't fire too + * early. + */ +unsigned long __round_jiffies_up_relative(unsigned long j, int cpu) +{ + unsigned long j0 = jiffies; + + /* Use j0 because jiffies might change while we run */ + return round_jiffies_common(j + j0, cpu, true) - j0; +} +EXPORT_SYMBOL_GPL(__round_jiffies_up_relative); + +/** + * round_jiffies_up - function to round jiffies up to a full second + * @j: the time in (absolute) jiffies that should be rounded + * + * This is the same as round_jiffies() except that it will never + * round down. This is useful for timeouts for which the exact time + * of firing does not matter too much, as long as they don't fire too + * early. + */ +unsigned long round_jiffies_up(unsigned long j) +{ + return round_jiffies_common(j, raw_smp_processor_id(), true); +} +EXPORT_SYMBOL_GPL(round_jiffies_up); + +/** + * round_jiffies_up_relative - function to round jiffies up to a full second + * @j: the time in (relative) jiffies that should be rounded + * + * This is the same as round_jiffies_relative() except that it will never + * round down. This is useful for timeouts for which the exact time + * of firing does not matter too much, as long as they don't fire too + * early. + */ +unsigned long round_jiffies_up_relative(unsigned long j) +{ + return __round_jiffies_up_relative(j, raw_smp_processor_id()); +} +EXPORT_SYMBOL_GPL(round_jiffies_up_relative); + +/** + * set_timer_slack - set the allowed slack for a timer + * @timer: the timer to be modified + * @slack_hz: the amount of time (in jiffies) allowed for rounding + * + * Set the amount of time, in jiffies, that a certain timer has + * in terms of slack. By setting this value, the timer subsystem + * will schedule the actual timer somewhere between + * the time mod_timer() asks for, and that time plus the slack. + * + * By setting the slack to -1, a percentage of the delay is used + * instead. + */ +void set_timer_slack(struct timer_list *timer, int slack_hz) +{ + timer->slack = slack_hz; +} +EXPORT_SYMBOL_GPL(set_timer_slack); + +/* + * If the list is empty, catch up ->timer_jiffies to the current time. + * The caller must hold the tvec_base lock. Returns true if the list + * was empty and therefore ->timer_jiffies was updated. + */ +static bool catchup_timer_jiffies(struct tvec_base *base) +{ + if (!base->all_timers) { + base->timer_jiffies = jiffies; + return true; + } + return false; +} + +static void +__internal_add_timer(struct tvec_base *base, struct timer_list *timer) +{ + unsigned long expires = timer->expires; + unsigned long idx = expires - base->timer_jiffies; + struct list_head *vec; + + if (idx < TVR_SIZE) { + int i = expires & TVR_MASK; + vec = base->tv1.vec + i; + } else if (idx < 1 << (TVR_BITS + TVN_BITS)) { + int i = (expires >> TVR_BITS) & TVN_MASK; + vec = base->tv2.vec + i; + } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) { + int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK; + vec = base->tv3.vec + i; + } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) { + int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK; + vec = base->tv4.vec + i; + } else if ((signed long) idx < 0) { + /* + * Can happen if you add a timer with expires == jiffies, + * or you set a timer to go off in the past + */ + vec = base->tv1.vec + (base->timer_jiffies & TVR_MASK); + } else { + int i; + /* If the timeout is larger than MAX_TVAL (on 64-bit + * architectures or with CONFIG_BASE_SMALL=1) then we + * use the maximum timeout. + */ + if (idx > MAX_TVAL) { + idx = MAX_TVAL; + expires = idx + base->timer_jiffies; + } + i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK; + vec = base->tv5.vec + i; + } + /* + * Timers are FIFO: + */ + list_add_tail(&timer->entry, vec); +} + +static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) +{ + (void)catchup_timer_jiffies(base); + __internal_add_timer(base, timer); + /* + * Update base->active_timers and base->next_timer + */ + if (!tbase_get_deferrable(timer->base)) { + if (!base->active_timers++ || + time_before(timer->expires, base->next_timer)) + base->next_timer = timer->expires; + } + base->all_timers++; +} + +#ifdef CONFIG_TIMER_STATS +void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr) +{ + if (timer->start_site) + return; + + timer->start_site = addr; + memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); + timer->start_pid = current->pid; +} + +static void timer_stats_account_timer(struct timer_list *timer) +{ + unsigned int flag = 0; + + if (likely(!timer->start_site)) + return; + if (unlikely(tbase_get_deferrable(timer->base))) + flag |= TIMER_STATS_FLAG_DEFERRABLE; + + timer_stats_update_stats(timer, timer->start_pid, timer->start_site, + timer->function, timer->start_comm, flag); +} + +#else +static void timer_stats_account_timer(struct timer_list *timer) {} +#endif + +#ifdef CONFIG_DEBUG_OBJECTS_TIMERS + +static struct debug_obj_descr timer_debug_descr; + +static void *timer_debug_hint(void *addr) +{ + return ((struct timer_list *) addr)->function; +} + +/* + * fixup_init is called when: + * - an active object is initialized + */ +static int timer_fixup_init(void *addr, enum debug_obj_state state) +{ + struct timer_list *timer = addr; + + switch (state) { + case ODEBUG_STATE_ACTIVE: + del_timer_sync(timer); + debug_object_init(timer, &timer_debug_descr); + return 1; + default: + return 0; + } +} + +/* Stub timer callback for improperly used timers. */ +static void stub_timer(unsigned long data) +{ + WARN_ON(1); +} + +/* + * fixup_activate is called when: + * - an active object is activated + * - an unknown object is activated (might be a statically initialized object) + */ +static int timer_fixup_activate(void *addr, enum debug_obj_state state) +{ + struct timer_list *timer = addr; + + switch (state) { + + case ODEBUG_STATE_NOTAVAILABLE: + /* + * This is not really a fixup. The timer was + * statically initialized. We just make sure that it + * is tracked in the object tracker. + */ + if (timer->entry.next == NULL && + timer->entry.prev == TIMER_ENTRY_STATIC) { + debug_object_init(timer, &timer_debug_descr); + debug_object_activate(timer, &timer_debug_descr); + return 0; + } else { + setup_timer(timer, stub_timer, 0); + return 1; + } + return 0; + + case ODEBUG_STATE_ACTIVE: + WARN_ON(1); + + default: + return 0; + } +} + +/* + * fixup_free is called when: + * - an active object is freed + */ +static int timer_fixup_free(void *addr, enum debug_obj_state state) +{ + struct timer_list *timer = addr; + + switch (state) { + case ODEBUG_STATE_ACTIVE: + del_timer_sync(timer); + debug_object_free(timer, &timer_debug_descr); + return 1; + default: + return 0; + } +} + +/* + * fixup_assert_init is called when: + * - an untracked/uninit-ed object is found + */ +static int timer_fixup_assert_init(void *addr, enum debug_obj_state state) +{ + struct timer_list *timer = addr; + + switch (state) { + case ODEBUG_STATE_NOTAVAILABLE: + if (timer->entry.prev == TIMER_ENTRY_STATIC) { + /* + * This is not really a fixup. The timer was + * statically initialized. We just make sure that it + * is tracked in the object tracker. + */ + debug_object_init(timer, &timer_debug_descr); + return 0; + } else { + setup_timer(timer, stub_timer, 0); + return 1; + } + default: + return 0; + } +} + +static struct debug_obj_descr timer_debug_descr = { + .name = "timer_list", + .debug_hint = timer_debug_hint, + .fixup_init = timer_fixup_init, + .fixup_activate = timer_fixup_activate, + .fixup_free = timer_fixup_free, + .fixup_assert_init = timer_fixup_assert_init, +}; + +static inline void debug_timer_init(struct timer_list *timer) +{ + debug_object_init(timer, &timer_debug_descr); +} + +static inline void debug_timer_activate(struct timer_list *timer) +{ + debug_object_activate(timer, &timer_debug_descr); +} + +static inline void debug_timer_deactivate(struct timer_list *timer) +{ + debug_object_deactivate(timer, &timer_debug_descr); +} + +static inline void debug_timer_free(struct timer_list *timer) +{ + debug_object_free(timer, &timer_debug_descr); +} + +static inline void debug_timer_assert_init(struct timer_list *timer) +{ + debug_object_assert_init(timer, &timer_debug_descr); +} + +static void do_init_timer(struct timer_list *timer, unsigned int flags, + const char *name, struct lock_class_key *key); + +void init_timer_on_stack_key(struct timer_list *timer, unsigned int flags, + const char *name, struct lock_class_key *key) +{ + debug_object_init_on_stack(timer, &timer_debug_descr); + do_init_timer(timer, flags, name, key); +} +EXPORT_SYMBOL_GPL(init_timer_on_stack_key); + +void destroy_timer_on_stack(struct timer_list *timer) +{ + debug_object_free(timer, &timer_debug_descr); +} +EXPORT_SYMBOL_GPL(destroy_timer_on_stack); + +#else +static inline void debug_timer_init(struct timer_list *timer) { } +static inline void debug_timer_activate(struct timer_list *timer) { } +static inline void debug_timer_deactivate(struct timer_list *timer) { } +static inline void debug_timer_assert_init(struct timer_list *timer) { } +#endif + +static inline void debug_init(struct timer_list *timer) +{ + debug_timer_init(timer); + trace_timer_init(timer); +} + +static inline void +debug_activate(struct timer_list *timer, unsigned long expires) +{ + debug_timer_activate(timer); + trace_timer_start(timer, expires); +} + +static inline void debug_deactivate(struct timer_list *timer) +{ + debug_timer_deactivate(timer); + trace_timer_cancel(timer); +} + +static inline void debug_assert_init(struct timer_list *timer) +{ + debug_timer_assert_init(timer); +} + +static void do_init_timer(struct timer_list *timer, unsigned int flags, + const char *name, struct lock_class_key *key) +{ + struct tvec_base *base = __raw_get_cpu_var(tvec_bases); + + timer->entry.next = NULL; + timer->base = (void *)((unsigned long)base | flags); + timer->slack = -1; +#ifdef CONFIG_TIMER_STATS + timer->start_site = NULL; + timer->start_pid = -1; + memset(timer->start_comm, 0, TASK_COMM_LEN); +#endif + lockdep_init_map(&timer->lockdep_map, name, key, 0); +} + +/** + * init_timer_key - initialize a timer + * @timer: the timer to be initialized + * @flags: timer flags + * @name: name of the timer + * @key: lockdep class key of the fake lock used for tracking timer + * sync lock dependencies + * + * init_timer_key() must be done to a timer prior calling *any* of the + * other timer functions. + */ +void init_timer_key(struct timer_list *timer, unsigned int flags, + const char *name, struct lock_class_key *key) +{ + debug_init(timer); + do_init_timer(timer, flags, name, key); +} +EXPORT_SYMBOL(init_timer_key); + +static inline void detach_timer(struct timer_list *timer, bool clear_pending) +{ + struct list_head *entry = &timer->entry; + + debug_deactivate(timer); + + __list_del(entry->prev, entry->next); + if (clear_pending) + entry->next = NULL; + entry->prev = LIST_POISON2; +} + +static inline void +detach_expired_timer(struct timer_list *timer, struct tvec_base *base) +{ + detach_timer(timer, true); + if (!tbase_get_deferrable(timer->base)) + base->active_timers--; + base->all_timers--; + (void)catchup_timer_jiffies(base); +} + +static int detach_if_pending(struct timer_list *timer, struct tvec_base *base, + bool clear_pending) +{ + if (!timer_pending(timer)) + return 0; + + detach_timer(timer, clear_pending); + if (!tbase_get_deferrable(timer->base)) { + base->active_timers--; + if (timer->expires == base->next_timer) + base->next_timer = base->timer_jiffies; + } + base->all_timers--; + (void)catchup_timer_jiffies(base); + return 1; +} + +/* + * We are using hashed locking: holding per_cpu(tvec_bases).lock + * means that all timers which are tied to this base via timer->base are + * locked, and the base itself is locked too. + * + * So __run_timers/migrate_timers can safely modify all timers which could + * be found on ->tvX lists. + * + * When the timer's base is locked, and the timer removed from list, it is + * possible to set timer->base = NULL and drop the lock: the timer remains + * locked. + */ +static struct tvec_base *lock_timer_base(struct timer_list *timer, + unsigned long *flags) + __acquires(timer->base->lock) +{ + struct tvec_base *base; + + for (;;) { + struct tvec_base *prelock_base = timer->base; + base = tbase_get_base(prelock_base); + if (likely(base != NULL)) { + spin_lock_irqsave(&base->lock, *flags); + if (likely(prelock_base == timer->base)) + return base; + /* The timer has migrated to another CPU */ + spin_unlock_irqrestore(&base->lock, *flags); + } + cpu_relax(); + } +} + +static inline int +__mod_timer(struct timer_list *timer, unsigned long expires, + bool pending_only, int pinned) +{ + struct tvec_base *base, *new_base; + unsigned long flags; + int ret = 0 , cpu; + + timer_stats_timer_set_start_info(timer); + BUG_ON(!timer->function); + + base = lock_timer_base(timer, &flags); + + ret = detach_if_pending(timer, base, false); + if (!ret && pending_only) + goto out_unlock; + + debug_activate(timer, expires); + + cpu = get_nohz_timer_target(pinned); + new_base = per_cpu(tvec_bases, cpu); + + if (base != new_base) { + /* + * We are trying to schedule the timer on the local CPU. + * However we can't change timer's base while it is running, + * otherwise del_timer_sync() can't detect that the timer's + * handler yet has not finished. This also guarantees that + * the timer is serialized wrt itself. + */ + if (likely(base->running_timer != timer)) { + /* See the comment in lock_timer_base() */ + timer_set_base(timer, NULL); + spin_unlock(&base->lock); + base = new_base; + spin_lock(&base->lock); + timer_set_base(timer, base); + } + } + + timer->expires = expires; + internal_add_timer(base, timer); + +out_unlock: + spin_unlock_irqrestore(&base->lock, flags); + + return ret; +} + +/** + * mod_timer_pending - modify a pending timer's timeout + * @timer: the pending timer to be modified + * @expires: new timeout in jiffies + * + * mod_timer_pending() is the same for pending timers as mod_timer(), + * but will not re-activate and modify already deleted timers. + * + * It is useful for unserialized use of timers. + */ +int mod_timer_pending(struct timer_list *timer, unsigned long expires) +{ + return __mod_timer(timer, expires, true, TIMER_NOT_PINNED); +} +EXPORT_SYMBOL(mod_timer_pending); + +/* + * Decide where to put the timer while taking the slack into account + * + * Algorithm: + * 1) calculate the maximum (absolute) time + * 2) calculate the highest bit where the expires and new max are different + * 3) use this bit to make a mask + * 4) use the bitmask to round down the maximum time, so that all last + * bits are zeros + */ +static inline +unsigned long apply_slack(struct timer_list *timer, unsigned long expires) +{ + unsigned long expires_limit, mask; + int bit; + + if (timer->slack >= 0) { + expires_limit = expires + timer->slack; + } else { + long delta = expires - jiffies; + + if (delta < 256) + return expires; + + expires_limit = expires + delta / 256; + } + mask = expires ^ expires_limit; + if (mask == 0) + return expires; + + bit = find_last_bit(&mask, BITS_PER_LONG); + + mask = (1UL << bit) - 1; + + expires_limit = expires_limit & ~(mask); + + return expires_limit; +} + +/** + * mod_timer - modify a timer's timeout + * @timer: the timer to be modified + * @expires: new timeout in jiffies + * + * mod_timer() is a more efficient way to update the expire field of an + * active timer (if the timer is inactive it will be activated) + * + * mod_timer(timer, expires) is equivalent to: + * + * del_timer(timer); timer->expires = expires; add_timer(timer); + * + * Note that if there are multiple unserialized concurrent users of the + * same timer, then mod_timer() is the only safe way to modify the timeout, + * since add_timer() cannot modify an already running timer. + * + * The function returns whether it has modified a pending timer or not. + * (ie. mod_timer() of an inactive timer returns 0, mod_timer() of an + * active timer returns 1.) + */ +int mod_timer(struct timer_list *timer, unsigned long expires) +{ + expires = apply_slack(timer, expires); + + /* + * This is a common optimization triggered by the + * networking code - if the timer is re-modified + * to be the same thing then just return: + */ + if (timer_pending(timer) && timer->expires == expires) + return 1; + + return __mod_timer(timer, expires, false, TIMER_NOT_PINNED); +} +EXPORT_SYMBOL(mod_timer); + +/** + * mod_timer_pinned - modify a timer's timeout + * @timer: the timer to be modified + * @expires: new timeout in jiffies + * + * mod_timer_pinned() is a way to update the expire field of an + * active timer (if the timer is inactive it will be activated) + * and to ensure that the timer is scheduled on the current CPU. + * + * Note that this does not prevent the timer from being migrated + * when the current CPU goes offline. If this is a problem for + * you, use CPU-hotplug notifiers to handle it correctly, for + * example, cancelling the timer when the corresponding CPU goes + * offline. + * + * mod_timer_pinned(timer, expires) is equivalent to: + * + * del_timer(timer); timer->expires = expires; add_timer(timer); + */ +int mod_timer_pinned(struct timer_list *timer, unsigned long expires) +{ + if (timer->expires == expires && timer_pending(timer)) + return 1; + + return __mod_timer(timer, expires, false, TIMER_PINNED); +} +EXPORT_SYMBOL(mod_timer_pinned); + +/** + * add_timer - start a timer + * @timer: the timer to be added + * + * The kernel will do a ->function(->data) callback from the + * timer interrupt at the ->expires point in the future. The + * current time is 'jiffies'. + * + * The timer's ->expires, ->function (and if the handler uses it, ->data) + * fields must be set prior calling this function. + * + * Timers with an ->expires field in the past will be executed in the next + * timer tick. + */ +void add_timer(struct timer_list *timer) +{ + BUG_ON(timer_pending(timer)); + mod_timer(timer, timer->expires); +} +EXPORT_SYMBOL(add_timer); + +/** + * add_timer_on - start a timer on a particular CPU + * @timer: the timer to be added + * @cpu: the CPU to start it on + * + * This is not very scalable on SMP. Double adds are not possible. + */ +void add_timer_on(struct timer_list *timer, int cpu) +{ + struct tvec_base *base = per_cpu(tvec_bases, cpu); + unsigned long flags; + + timer_stats_timer_set_start_info(timer); + BUG_ON(timer_pending(timer) || !timer->function); + spin_lock_irqsave(&base->lock, flags); + timer_set_base(timer, base); + debug_activate(timer, timer->expires); + internal_add_timer(base, timer); + /* + * Check whether the other CPU is in dynticks mode and needs + * to be triggered to reevaluate the timer wheel. + * We are protected against the other CPU fiddling + * with the timer by holding the timer base lock. This also + * makes sure that a CPU on the way to stop its tick can not + * evaluate the timer wheel. + * + * Spare the IPI for deferrable timers on idle targets though. + * The next busy ticks will take care of it. Except full dynticks + * require special care against races with idle_cpu(), lets deal + * with that later. + */ + if (!tbase_get_deferrable(timer->base) || tick_nohz_full_cpu(cpu)) + wake_up_nohz_cpu(cpu); + + spin_unlock_irqrestore(&base->lock, flags); +} +EXPORT_SYMBOL_GPL(add_timer_on); + +/** + * del_timer - deactive a timer. + * @timer: the timer to be deactivated + * + * del_timer() deactivates a timer - this works on both active and inactive + * timers. + * + * The function returns whether it has deactivated a pending timer or not. + * (ie. del_timer() of an inactive timer returns 0, del_timer() of an + * active timer returns 1.) + */ +int del_timer(struct timer_list *timer) +{ + struct tvec_base *base; + unsigned long flags; + int ret = 0; + + debug_assert_init(timer); + + timer_stats_timer_clear_start_info(timer); + if (timer_pending(timer)) { + base = lock_timer_base(timer, &flags); + ret = detach_if_pending(timer, base, true); + spin_unlock_irqrestore(&base->lock, flags); + } + + return ret; +} +EXPORT_SYMBOL(del_timer); + +/** + * try_to_del_timer_sync - Try to deactivate a timer + * @timer: timer do del + * + * This function tries to deactivate a timer. Upon successful (ret >= 0) + * exit the timer is not queued and the handler is not running on any CPU. + */ +int try_to_del_timer_sync(struct timer_list *timer) +{ + struct tvec_base *base; + unsigned long flags; + int ret = -1; + + debug_assert_init(timer); + + base = lock_timer_base(timer, &flags); + + if (base->running_timer != timer) { + timer_stats_timer_clear_start_info(timer); + ret = detach_if_pending(timer, base, true); + } + spin_unlock_irqrestore(&base->lock, flags); + + return ret; +} +EXPORT_SYMBOL(try_to_del_timer_sync); + +#ifdef CONFIG_SMP +/** + * del_timer_sync - deactivate a timer and wait for the handler to finish. + * @timer: the timer to be deactivated + * + * This function only differs from del_timer() on SMP: besides deactivating + * the timer it also makes sure the handler has finished executing on other + * CPUs. + * + * Synchronization rules: Callers must prevent restarting of the timer, + * otherwise this function is meaningless. It must not be called from + * interrupt contexts unless the timer is an irqsafe one. The caller must + * not hold locks which would prevent completion of the timer's + * handler. The timer's handler must not call add_timer_on(). Upon exit the + * timer is not queued and the handler is not running on any CPU. + * + * Note: For !irqsafe timers, you must not hold locks that are held in + * interrupt context while calling this function. Even if the lock has + * nothing to do with the timer in question. Here's why: + * + * CPU0 CPU1 + * ---- ---- + * <SOFTIRQ> + * call_timer_fn(); + * base->running_timer = mytimer; + * spin_lock_irq(somelock); + * <IRQ> + * spin_lock(somelock); + * del_timer_sync(mytimer); + * while (base->running_timer == mytimer); + * + * Now del_timer_sync() will never return and never release somelock. + * The interrupt on the other CPU is waiting to grab somelock but + * it has interrupted the softirq that CPU0 is waiting to finish. + * + * The function returns whether it has deactivated a pending timer or not. + */ +int del_timer_sync(struct timer_list *timer) +{ +#ifdef CONFIG_LOCKDEP + unsigned long flags; + + /* + * If lockdep gives a backtrace here, please reference + * the synchronization rules above. + */ + local_irq_save(flags); + lock_map_acquire(&timer->lockdep_map); + lock_map_release(&timer->lockdep_map); + local_irq_rest |
